Source code for pymend.docstring_parser.google

"""Google-style docstring parsing."""

import inspect
import re
from collections import OrderedDict
from collections.abc import Mapping, Sequence
from enum import IntEnum
from typing import NamedTuple, Optional

from .common import (
    EXAMPLES_KEYWORDS,
    PARAM_KEYWORDS,
    RAISES_KEYWORDS,
    RETURNS_KEYWORDS,
    YIELDS_KEYWORDS,
    Docstring,
    DocstringExample,
    DocstringMeta,
    DocstringParam,
    DocstringRaises,
    DocstringReturns,
    DocstringStyle,
    DocstringYields,
    KeyReturnDict,
    MainSections,
    ParseError,
    RenderingStyle,
    append_description,
    split_description,
)


[docs] class SectionType(IntEnum): """Types of sections.""" SINGULAR = 0 """For sections like examples.""" MULTIPLE = 1 """For sections like params.""" SINGULAR_OR_MULTIPLE = 2 """For sections like returns or yields."""
[docs] class Section(NamedTuple): """A docstring section.""" title: str key: str type_info: SectionType
GOOGLE_TYPED_ARG_REGEX = re.compile(r"\s*(.+?)\s*\(\s*(.*[^\s]+)\s*\)") GOOGLE_ARG_DESC_REGEX = re.compile(r".*\. Defaults to (.+)\.") MULTIPLE_PATTERN = re.compile( # Match anything that has leading whitespace and then contiguous non-whitespace # (non colon) character followed by a colon. # somecontiguoustype: some description r"(\s*[^:\s]+:)" # Match anything that has some contiguous text, then something in parens, # immediately followed by a colon. r"|(\s*[^:\s]+\s+\(.+\):)" # Allow whitespace if we have a closing ] before the color, optionally with a ) # some var name (list[int, int]): some description r"|([^:]*\]:.*)" # Allow for arbitrary chaining of pipe character for type annotations int | str # Where the individual types are allowed to have spaces as long as they start # and end without one ([^\s|][^\|]*[^\s|]) r"|(\s*[^\s|][^\|]*[^\s|](\s*\|\s*[^\s|][^\|]*[^\s|])+:)" ) DEFAULT_SECTIONS = [ Section("Arguments", "param", SectionType.MULTIPLE), Section("Args", "param", SectionType.MULTIPLE), Section("Parameters", "param", SectionType.MULTIPLE), Section("Params", "param", SectionType.MULTIPLE), Section("Raises", "raises", SectionType.MULTIPLE), Section("Exceptions", "raises", SectionType.MULTIPLE), Section("Except", "raises", SectionType.MULTIPLE), Section("Attributes", "attribute", SectionType.MULTIPLE), Section("Generics", "generics", SectionType.MULTIPLE), Section("Example", "examples", SectionType.SINGULAR), Section("Examples", "examples", SectionType.SINGULAR), Section("Returns", "returns", SectionType.SINGULAR_OR_MULTIPLE), Section("Yields", "yields", SectionType.SINGULAR_OR_MULTIPLE), ]
[docs] class GoogleParser: """Parser for Google-style docstrings.""" def __init__( self, sections: Optional[list[Section]] = None, *, title_colon: bool = True ) -> None: """Set up sections. Parameters ---------- sections : Optional[list[Section]] Recognized sections or None to defaults. title_colon : bool Require colon after section title. (Default value = True) """ if not sections: sections = DEFAULT_SECTIONS self.sections = {s.title: s for s in sections} # Maps section keys to the actually used titles. self.section_titles: dict[str, str] = {} self.title_colon = title_colon self._setup()
[docs] def canonical_titles(self) -> KeyReturnDict[str, str]: """Get the canonical title for a section key. Parameters ---------- title : str Section key. Returns ------- KeyReturnDict[str, str] Canonical title. """ return KeyReturnDict( { title: self.section_titles[key] for title in self.sections if (key := self.sections[title].key) in self.section_titles } )
def _setup(self) -> None: """Set up parser with the colon type and title regex.""" colon = ":" if self.title_colon else "" self.titles_re = re.compile( "^(" + "|".join(f"({t})" for t in self.sections) + ")" + colon + "[ \t\r\f\v]*$", flags=re.MULTILINE, ) @staticmethod def _build_single_meta(section: Section, desc: str) -> DocstringMeta: """Build docstring element for single line sections. Parameters ---------- section : Section The section that is being processed. desc : str docstring element text Returns ------- DocstringMeta Docstring meta wrapper. Raises ------ ParseError If the section represents a parameter section. In that case we would not expect to be in the single line function. """ if section.key in RETURNS_KEYWORDS: return DocstringReturns( args=[section.key], description=desc, type_name=None, is_generator=False, ) if section.key in YIELDS_KEYWORDS: return DocstringYields( args=[section.key], description=desc, type_name=None, is_generator=True, ) if section.key in RAISES_KEYWORDS: return DocstringRaises(args=[section.key], description=desc, type_name=None) if section.key in EXAMPLES_KEYWORDS: return DocstringExample(args=[section.key], snippet=None, description=desc) if section.key in PARAM_KEYWORDS: msg = "Expected parameter name." raise ParseError(msg) return DocstringMeta(args=[section.key], description=desc) def _prepare_multi_meta(self, section: Section, text: str) -> tuple[str, str]: """Check text for consistency and split into before and desc. Parameters ---------- section : Section The section that is being processed. text : str docstring element text Returns ------- before : str The part before the colon. desc : str The description of the element. Raises ------ ParseError If the text did not match the multi pattern regex. ParseError If there is no colon in the text. """ if not MULTIPLE_PATTERN.match(text): msg = ( "Could not match multi pattern to split " f"chunk part {text!r} for section {section.title}." ) raise ParseError(msg) if ":" not in text: msg = f"Expected a colon in {text!r} for title {section.title}." raise ParseError(msg) # Split spec and description before, desc = text.split(":", 1) if desc: desc = desc[1:] if desc[0] == " " else desc if "\n" in desc: first_line, rest = desc.split("\n", 1) desc = first_line + "\n" + inspect.cleandoc(rest) desc = desc.strip("\n") return before, desc def _build_multi_meta(self, section: Section, text: str) -> DocstringMeta: """Build docstring element for multiline section. Parameters ---------- section : Section The section that is being processed. text : str title of section containing element Returns ------- DocstringMeta docstring meta element Raises ------ ParseError If the text lacks a colon ':' """ before, desc = self._prepare_multi_meta(section, text) if section.key in PARAM_KEYWORDS: match = GOOGLE_TYPED_ARG_REGEX.match(before) if match: arg_name, type_name = match.group(1, 2) if type_name.endswith(", optional"): is_optional = True type_name = type_name[:-10] elif type_name.endswith("?"): is_optional = True type_name = type_name[:-1] else: is_optional = False else: arg_name, type_name = before, None is_optional = None match = GOOGLE_ARG_DESC_REGEX.match(desc) default = match.group(1) if match else None return DocstringParam( args=[section.key, before], description=desc, arg_name=arg_name, type_name=type_name, is_optional=is_optional, default=default, ) if section.key in RETURNS_KEYWORDS | YIELDS_KEYWORDS: match = GOOGLE_TYPED_ARG_REGEX.match(before) if match: arg_name, type_name = match.group(1, 2) else: arg_name, type_name = None, before if section.key in RETURNS_KEYWORDS: return DocstringReturns( args=[section.key, arg_name or type_name], description=desc, return_name=arg_name, type_name=type_name, is_generator=False, ) return DocstringYields( args=[section.key, arg_name or type_name], description=desc, yield_name=arg_name, type_name=type_name, is_generator=True, ) if section.key in RAISES_KEYWORDS: return DocstringRaises( args=[section.key, before], description=desc, type_name=before ) return DocstringMeta(args=[section.key, before], description=desc)
[docs] def add_section(self, section: Section) -> None: """Add or replace a section. Parameters ---------- section : Section The new section. """ self.sections[section.title] = section self._setup()
def _split_sections(self, meta_chunk: str) -> Mapping[str, str]: """Split the cunk into sections as determined by the titles.. Parameters ---------- meta_chunk : str Part of the docstring NOT holding the description. Returns ------- Mapping[str, str] Mapping between sectrion title and part of the docstring that deals with it. Raises ------ ParseError If multiple titles are found for the same section. """ chunks: Mapping[str, str] = OrderedDict() matches = list(self.titles_re.finditer(meta_chunk)) if not matches: return chunks splits = [ (matches[j].end(), matches[j + 1].start()) for j in range(len(matches) - 1) ] splits.append((matches[-1].end(), len(meta_chunk))) for j, (start, end) in enumerate(splits): title = matches[j].group(1) if title not in self.sections: continue key = self.sections[title].key if key in self.section_titles: msg = ( "Duplicated titles for identical section:" f" {title} and {self.section_titles[key]}" ) raise ParseError(msg) self.section_titles[key] = title # Clear Any Unknown Meta # Ref: https://github.com/rr-/docstring_parser/issues/29 meta_details = meta_chunk[start:end] unknown_meta = re.search(r"\n\S", meta_details) if unknown_meta is not None: meta_details = meta_details[: unknown_meta.start()] chunks[title] = meta_details.strip("\n") return chunks def _determine_indent(self, chunk: str) -> str: """Determine indent. Parameters ---------- chunk : str Chunk to determine the indent for. Returns ------- str String representing the indent. Raises ------ ParseError If no indent could be determined. """ indent_match = re.search(r"^\s*", chunk) if not indent_match: msg = f"Can't infer indent from '{chunk}'" raise ParseError(msg) return indent_match.group() def _get_chunks(self, text: str) -> tuple[str, str]: """Split docstring into description and meta part. Parameters ---------- text : str Docstring text to split. Returns ------- tuple[str, str] Docstring representing the description and the rest. """ if match := self.titles_re.search(text): return text[: match.start()], text[match.start() :] return text, "" def _get_multi_chunk_splits( self, chunk: str, title: str, indent: str ) -> list[tuple[int, int]]: """Get the starting and ending position for each element of a multi chunk. Parameters ---------- chunk : str Full chunk to split. title : str Title of the section represented by the chunk. indent : str Indent before each element of the chunk. Returns ------- list[tuple[int, int]] List of all start and end positions of each element of the chunk. Raises ------ ParseError If no entry could be found with the expected indent. """ # Split based on lines which have exactly that indent c_matches = list(re.finditer(rf"^{indent}(?=\S)", chunk, flags=re.MULTILINE)) if not c_matches: msg = ( f'No specification for "{title}": "{chunk}"' " Maybe check your indentation?" ) raise ParseError(msg) c_splits = [ (c_cur.end(), c_next.start()) for c_cur, c_next in zip(c_matches, c_matches[1:]) ] c_splits.append((c_matches[-1].end(), len(chunk))) return c_splits
[docs] def parse(self, text: Optional[str]) -> Docstring: """Parse the Google-style docstring into its components. Parameters ---------- text : Optional[str] docstring text Returns ------- Docstring parsed docstring Raises ------ ParseError If no specification could be found for a title, chunk pair. """ ret = Docstring(style=DocstringStyle.GOOGLE) if not text: return ret # Clean according to PEP-0257 text = inspect.cleandoc(text) desc_chunk, meta_chunk = self._get_chunks(text) # Break description into short and long parts split_description(ret, desc_chunk) # Split by sections determined by titles chunks = self._split_sections(meta_chunk) if not chunks: return ret # Add elements from each chunk for title, chunk in chunks.items(): # Determine indent indent = self._determine_indent(chunk) section = self.sections[title] # Check for singular elements if section.type_info == SectionType.SINGULAR: part = inspect.cleandoc(chunk) ret.meta.append(self._build_single_meta(section, part)) continue # Split based on lines which have exactly that indent c_splits = self._get_multi_chunk_splits(chunk, title, indent) if section.type_info == SectionType.MULTIPLE: for start, end in c_splits: part = chunk[start:end].strip("\n") ret.meta.append(self._build_multi_meta(section, part)) else: # SectionType.SINGULAR_OR_MULTIPLE # Try to handle it as a multiple section with multiple entries try: metas = [ self._build_multi_meta(section, chunk[start:end].strip("\n")) for start, end in c_splits ] # Fall back to a singular entry for multi or single section except ParseError: part = inspect.cleandoc(chunk) if MULTIPLE_PATTERN.match(part): ret.meta.append(self._build_multi_meta(section, part)) else: ret.meta.append(self._build_single_meta(section, part)) else: ret.meta.extend(metas) ret.section_titles = self.canonical_titles() return ret
[docs] def parse(text: Optional[str]) -> Docstring: """Parse the Google-style docstring into its components. Parameters ---------- text : Optional[str] docstring text Returns ------- Docstring parsed docstring """ return GoogleParser().parse(text)
[docs] def compose( # noqa: PLR0915 docstring: Docstring, rendering_style: RenderingStyle = RenderingStyle.COMPACT, indent: str = " ", ) -> str: """Render a parsed docstring into docstring text. Parameters ---------- docstring : Docstring parsed docstring representation rendering_style : RenderingStyle the style to render docstrings (Default value = RenderingStyle.COMPACT) indent : str the characters used as indentation in the docstring string (Default value = ' ') Returns ------- str docstring text """ titles: KeyReturnDict[str, str] = ( docstring.section_titles if docstring.style == DocstringStyle.GOOGLE else KeyReturnDict() ) def process_one(one: MainSections) -> None: """Build the output text for one entry in a section. Parameters ---------- one : MainSections Docstring for which to build the raw text. """ head = "" if isinstance(one, DocstringParam): head += one.arg_name or "" elif isinstance(one, DocstringReturns): head += one.return_name or "" elif isinstance(one, DocstringYields): head += one.yield_name or "" if isinstance(one, DocstringParam) and one.is_optional: optional = ( "?" if rendering_style == RenderingStyle.COMPACT else ", optional" ) else: optional = "" if one.type_name and head: head += f" ({one.type_name}{optional}):" elif one.type_name: head += f"{one.type_name}{optional}:" elif head: head += ":" if head: head = indent + head if one.description and rendering_style == RenderingStyle.EXPANDED: body = f"\n{indent}{indent}".join([head, *one.description.splitlines()]) parts.append(body) elif one.description: (first, *rest) = one.description.splitlines() body = f"\n{indent}{indent}".join( [(f"{head} {first}" if head else f"{indent}{first}"), *rest] ) parts.append(body) else: parts.append(head) def process_sect(name: str, args: Sequence[MainSections]) -> None: """Build the output for a docstring section. Parameters ---------- name : str Section for which to build the output. args : Sequence[MainSections] List of individual elements of that section. """ name = titles[name] if args: parts.append(f"{name}:") for arg in args: process_one(arg) parts.append("") parts: list[str] = [] append_description(docstring, parts) process_sect("Args", [p for p in docstring.params or [] if p.args[0] == "param"]) process_sect( "Attributes", [p for p in docstring.params or [] if p.args[0] == "attribute"], ) process_sect( "Generics", [p for p in docstring.params or [] if p.args[0] == "generics"], ) process_sect( "Returns", docstring.many_returns, ) process_sect("Yields", docstring.many_yields) process_sect("Raises", docstring.raises or []) if docstring.returns and not docstring.many_returns: ret = docstring.returns parts.append(f'{titles["Yields" if ret else "Returns"]:}') parts.append("-" * len(parts[-1])) process_one(ret) for meta in docstring.meta: if isinstance( meta, (DocstringParam, DocstringReturns, DocstringRaises, DocstringYields) ): continue # Already handled parts.append(f'{titles[meta.args[0].replace("_", "").title()]}:') if meta.description: lines = [(indent + line).rstrip() for line in meta.description.splitlines()] parts.append("\n".join(lines)) parts.append("") while parts and not parts[-1]: parts.pop() return "\n".join(parts)