Source code for pymend.docstring_parser.epydoc

"""Epyoc-style docstring parsing.

.. seealso:: http://epydoc.sourceforge.net/manual-fields.html
"""

import inspect
import re
from typing import NamedTuple, Optional

from .common import (
    Docstring,
    DocstringMeta,
    DocstringParam,
    DocstringRaises,
    DocstringReturns,
    DocstringStyle,
    DocstringYields,
    ParseError,
    RenderingStyle,
    append_description,
    clean_str,
    split_description,
)


[docs] class SectionPattern(NamedTuple): """Patterns for docstring sections.""" param: re.Pattern[str] raises: re.Pattern[str] returns: re.Pattern[str] meta: re.Pattern[str]
[docs] class SectionMatch(NamedTuple): """Matches of docstring sections.""" param: Optional[re.Match[str]] raises: Optional[re.Match[str]] returns: Optional[re.Match[str]] meta: Optional[re.Match[str]]
def _get_matches_for_chunk(chunk: str, patterns: SectionPattern) -> SectionMatch: """Apply a search for each pattern to the chunk. Parameters ---------- chunk : str Chunk to match the patterns against. patterns : SectionPattern Collection of regex patterns to match against the chunk. Returns ------- SectionMatch Tuple of matches of the patterns against the chunk. """ return SectionMatch( param=re.search(patterns.param, chunk), raises=re.search(patterns.raises, chunk), returns=re.search(patterns.returns, chunk), meta=re.search(patterns.meta, chunk), )
[docs] class StreamToken(NamedTuple): """One entry of the stream list.""" base: str key: str args: list[str] desc: str
def _tokenize( meta_chunk: str, patterns: SectionPattern, ) -> list[StreamToken]: """Return the tokenized stream according to the regex patterns. Parameters ---------- meta_chunk : str Chunk to tokenize. patterns : SectionPattern Collection of patterns for different sections. Returns ------- list[StreamToken] (base, key, args, desc) base: Literal['param', 'raise', 'return', 'meta'] key: str: args: List[str] desc: str: Description Raises ------ ParseError If none of the patterns match against the chunk. ParseError If we match a section in the general meta case that should have already been matched in a specific section. """ stream: list[StreamToken] = [] for chunk_match in re.finditer( r"(^@.*?)(?=^@|\Z)", meta_chunk, flags=re.DOTALL | re.MULTILINE ): chunk = chunk_match.group(0) if not chunk: continue matches = _get_matches_for_chunk(chunk, patterns) match = matches.param or matches.raises or matches.returns or matches.meta if not match: msg = f'Error parsing meta information near "{chunk}".' raise ParseError(msg) if matches.param: base = "param" key: str = match.group(1) args = [match.group(2).strip()] elif matches.raises: base = "raise" key: str = match.group(1) args = [] if match.group(2) is None else [match.group(2).strip()] elif matches.returns: base = "return" if match.group(1) in ("return", "rtype") else "yield" key: str = match.group(1) args = [] else: base = "meta" key: str = match.group(1) token = clean_str(match.group(2).strip()) args = [] if token is None else re.split(r"\s+", token) # Make sure we didn't match some existing keyword in an incorrect # way here: if key in { "param", "keyword", "type", "return", "rtype", "yield", "ytype", }: msg = f'Error parsing meta information near "{chunk}".' raise ParseError(msg) desc = chunk[match.end() :].strip() if "\n" in desc: first_line, rest = desc.split("\n", 1) desc = first_line + "\n" + inspect.cleandoc(rest) stream.append(StreamToken(base, key, args, desc)) return stream def _combine_params(stream: list[StreamToken]) -> dict[str, dict[str, Optional[str]]]: """Group the list of tokens into sections based on section and information.. Parameters ---------- stream : list[StreamToken] List of tokens to group into dict. Returns ------- dict[str, dict[str, Optional[str]]] Dictionary grouping parsed param sections by section (param name, "return", "yield") and information they represent (type_name, description) """ params: dict[str, dict[str, Optional[str]]] = {} for base, key, args, desc in stream: if base not in ["param", "return", "yield"]: continue # nothing to do arg_name = args[0] if base == "param" else base info = params.setdefault(arg_name, {}) info_key = "type_name" if "type" in key else "description" info[info_key] = desc return params def _add_meta_information( stream: list[StreamToken], params: dict[str, dict[str, Optional[str]]], ret: Docstring, ) -> None: """Add the meta information into the docstring instance. Parameters ---------- stream : list[StreamToken] Stream of tokens of the string- params : dict[str, dict[str, Optional[str]]] Grouped information about each section. ret : Docstring Docstring instance to add the information to. Raises ------ ParseError If an unexpected section is encountered. """ is_done: dict[str, bool] = {} for token in stream: if token.base == "param" and not is_done.get(token.args[0], False): (arg_name,) = token.args info = params[arg_name] type_name = info.get("type_name") if type_name and type_name.endswith("?"): is_optional = True type_name = type_name[:-1] else: is_optional = False match = re.match(r".*defaults to (.+)", token.desc, flags=re.DOTALL) default = match[1].rstrip(".") if match else None meta_item = DocstringParam( args=[token.key, arg_name], description=info.get("description"), arg_name=arg_name, type_name=type_name, is_optional=is_optional, default=default, ) is_done[arg_name] = True elif token.base == "return" and not is_done.get("return", False): info = params["return"] meta_item = DocstringReturns( args=[token.key], description=info.get("description"), type_name=info.get("type_name"), is_generator=False, ) is_done["return"] = True elif token.base == "yield" and not is_done.get("yield", False): info = params["yield"] meta_item = DocstringYields( args=[token.key], description=info.get("description"), type_name=info.get("type_name"), is_generator=True, ) is_done["yield"] = True elif token.base == "raise": (type_name,) = token.args or (None,) meta_item = DocstringRaises( args=[token.key, *token.args], description=token.desc, type_name=type_name, ) elif token.base == "meta": meta_item = DocstringMeta( args=[token.key, *token.args], description=token.desc, ) else: arg_key = token.args[0] if token.args else token.base if not is_done.get(arg_key, False): msg = ( "Error building meta information. " f"Encountered unexpected section {arg_key}." ) raise ParseError(msg) continue # don't append ret.meta.append(meta_item)
[docs] def parse(text: Optional[str]) -> Docstring: """Parse the epydoc-style docstring into its components. Parameters ---------- text : Optional[str] docstring to parse Returns ------- Docstring parsed docstring """ ret = Docstring(style=DocstringStyle.EPYDOC) if not text: return ret text = inspect.cleandoc(text) if match := re.search("^@", text, flags=re.MULTILINE): desc_chunk = text[: match.start()] meta_chunk = text[match.start() :] else: desc_chunk = text meta_chunk = "" split_description(ret, desc_chunk) patterns = SectionPattern( param=re.compile(r"(param|keyword|type)(\s+[_A-z][_A-z0-9]*\??):"), raises=re.compile(r"(raise)(\s+[_A-z][_A-z0-9]*\??)?:"), returns=re.compile(r"(return|rtype|yield|ytype):"), meta=re.compile(r"([_A-z][_A-z0-9]+)((\s+[_A-z][_A-z0-9]*\??)*):"), ) # tokenize stream = _tokenize(meta_chunk, patterns) # Combine type_name, arg_name, and description information params = _combine_params(stream) _add_meta_information(stream, params, ret) return ret
[docs] def compose( docstring: Docstring, rendering_style: RenderingStyle = RenderingStyle.COMPACT, indent: str = " ", ) -> str: """Render a parsed docstring into docstring text. Parameters ---------- docstring : Docstring parsed docstring representation rendering_style : RenderingStyle the style to render docstrings (Default value = RenderingStyle.COMPACT) indent : str the characters used as indentation in the docstring string (Default value = ' ') Returns ------- str docstring text """ def process_desc(desc: Optional[str], *, is_type: bool) -> str: """Process a description section. Parameters ---------- desc : Optional[str] Description to process is_type : bool Whether the description represent type information. Returns ------- str The properly rendered description information. """ if not desc: return "" if rendering_style == RenderingStyle.EXPANDED or ( rendering_style == RenderingStyle.CLEAN and not is_type ): (first, *rest) = desc.splitlines() return "\n".join(["\n" + indent + first] + [indent + line for line in rest]) (first, *rest) = desc.splitlines() return "\n".join([f" {first}"] + [indent + line for line in rest]) parts: list[str] = [] append_description(docstring, parts) for meta in docstring.meta: if isinstance(meta, DocstringParam): if meta.type_name: type_name = f"{meta.type_name}?" if meta.is_optional else meta.type_name text = f"@type {meta.arg_name}:" text += process_desc(type_name, is_type=True) parts.append(text) text = ( f"@param {meta.arg_name}:" f"{process_desc(meta.description, is_type=False)}" ) parts.append(text) elif isinstance(meta, (DocstringReturns, DocstringYields)): (arg_key, type_key) = ( ("yield", "ytype") if isinstance(meta, DocstringYields) else ("return", "rtype") ) if meta.type_name: text = f"@{type_key}:{process_desc(meta.type_name, is_type=True)}" parts.append(text) if meta.description: text = f"@{arg_key}:{process_desc(meta.description, is_type=False)}" parts.append(text) elif isinstance(meta, DocstringRaises): text = f"@raise {meta.type_name}:" if meta.type_name else "@raise:" text += process_desc(meta.description, is_type=False) parts.append(text) else: text = f'@{" ".join(meta.args)}:' text += process_desc(meta.description, is_type=False) parts.append(text) return "\n".join(parts)