Source code for pymend.pymend
"""Module for general management of writing docstrings of multiple files."""
import ast
import platform
import sys
import tempfile
import traceback
from dataclasses import dataclass
from pathlib import Path
from typing import NamedTuple
from click import echo
import pymend.docstring_parser as dsp
from .file_parser import AstAnalyzer
from .output import diff
from .report import Changed
from .types import ElementDocstring, FixerSettings
__author__ = "J-E. Nitschke"
__copyright__ = "Copyright 2012-2021 A. Daouzli"
__licence__ = "MIT"
__version__ = "1.1.0"
__maintainer__ = "J-E. Nitschke"
[docs]
@dataclass
class FileContentRepresentation:
"""Container for str and list representation of file contents."""
lst: list[str]
lines: str
[docs]
class Styles(NamedTuple):
"""Container for input and output style."""
input_style: dsp.DocstringStyle
output_style: dsp.DocstringStyle
[docs]
class PyComment:
"""Manage several python scripts docstrings.
It is used to parse and rewrite in a Pythonic way all the
functions', methods' and classes' docstrings.
The changes are then provided in a patch file.
"""
def __init__(
self,
input_file: Path,
*,
fixer_settings: FixerSettings,
output_style: dsp.DocstringStyle = dsp.DocstringStyle.NUMPYDOC,
input_style: dsp.DocstringStyle = dsp.DocstringStyle.AUTO,
proceed_directly: bool = True,
) -> None:
r"""Set the configuration including the source to proceed and options.
Parameters
----------
input_file : Path
path name (file or folder)
fixer_settings : FixerSettings
Settings for which fixes should be performed.
output_style : dsp.DocstringStyle
Output style to use for docstring.
(Default value = dsp.DocstringStyle.NUMPYDOC)
input_style : dsp.DocstringStyle
Input docstring style.
Auto means that the style is detected automatically. Can cause issues when
styles are mixed in examples or descriptions."
(Default value = dsp.DocstringStyle.AUTO)
proceed_directly : bool
Whether the file should be parsed directly with the call of
the constructor. (Default value = True)
"""
self.input_file = input_file
self.style = Styles(input_style, output_style)
input_lines = self.input_file.read_text(encoding="utf-8")
self._input = FileContentRepresentation(
input_lines.splitlines(keepends=True), input_lines
)
self._output = FileContentRepresentation([], "")
self.settings = fixer_settings
self._changed = []
self.docs_list = []
self.fixed = False
if proceed_directly:
self.proceed()
[docs]
def proceed(self) -> None:
"""Parse file and generates/converts the docstrings."""
self._parse()
self._compute_before_after()
def _parse(self) -> list[ElementDocstring]:
"""Parse input file's content and generates a list of its elements/docstrings.
Returns
-------
list[ElementDocstring]
List of information about module, classes and functions.
"""
ast_parser = AstAnalyzer(self._input.lines, settings=self.settings)
self.docs_list = sorted(
ast_parser.parse_from_ast(), key=lambda element: element.lines
)
return self.docs_list
def _compute_before_after(self) -> tuple[list[str], list[str], list[str]]:
r"""Compute the before and after and assert equality and stability.
Make sure that pymend is idempotent.
Make sure that the original and final Ast's are the same (except for docstring.)
Returns
-------
tuple[list[str], list[str], list[str]]
Tuple of before, after, changed,
"""
list_from, list_to, list_changed = self._get_changes()
self._output.lst = list_to
self._output.lines = "".join(list_to)
self._changed = list_changed
self.assert_equality(self._input.lines, self._output.lines)
self.assert_stability(list_from, list_to)
self.fixed = True
return list_from, list_to, list_changed
def _get_changes(self) -> tuple[list[str], list[str], list[str]]:
r"""Compute the list of lines before and after the proposed docstring changes.
Elements of the list already contain '\n' at the end.
Returns
-------
list_from : list[str]
Original file as list of lines.
list_to : list[str]
Modified content as list of lines.
list_changed : list[str]
List of names of elements that were changed.
Raises
------
ValueError
If the endline of a docstring was parsed as None.
"""
# Handle case of empty file here.
list_from = self._input.lst or [""]
list_to: list[str] = []
list_changed: list[str] = []
last = 0
# Loop over all found docstrings and replace the lines where they used to
# (or ought to) be with the new docstring.
for e in self.docs_list:
start, end = e.lines
if end is None:
log = self.dump_to_file(
"INTERNAL ERROR: End of docstring is None."
" Not sure what to do with this yet.",
"Original file:.\n",
"".join(list_from),
"Problematic element:\n",
repr(e),
)
msg = (
"INTERNAL ERROR: End of docstring is None."
" Not sure what to do with this yet."
" Please report a bug on"
" https://github.com/JanEricNitschke/pymend/issues."
f" This diff might be helpful: {log}"
)
raise ValueError(msg)
# e.line are line number starting at one.
# We are now using them to index into a list starting at 0.
start, end = start - 1, end - 1
# Grab output docstring and add quotes, indentation and modifiers
in_docstring = e.docstring
# Do not need to worry about start being out of range
# if there was a docstring then it points to that.
# If there wasnt then there should still be at least one line
# after the function/class definition. Otherwise that would
# already have raised an error earlier.
old_line = list_from[start]
leading_whitespace = old_line[: -len(old_line.lstrip())]
trailing_comment = (
self._get_trailing_comment(list_from[end]) if e.had_docstring else ""
)
out_docstring = self._finalizes(
docstring=e.output_docstring(
output_style=self.style.output_style,
input_style=self.style.input_style,
settings=self.settings,
),
indentation=leading_whitespace,
modifier=e.modifier,
trailing=trailing_comment,
)
# Check if the docstring changed and if so, add it to the list of changed
# We can not directly compare with the original out_docstring
# because that is missing indentation.
# And it is easiest to add the quotes, modifiers, trailings
# in one go with the indentation. So for this comparison we have to
# strip them away again.
if (
in_docstring
!= out_docstring.strip()[
3 + len(e.modifier) : -(3 + len(trailing_comment))
]
):
list_changed.append(e.name)
# Add all the unchanged things between last and current docstring
list_to.extend(list_from[last:start])
# Add the new docstring
list_to.extend(out_docstring.splitlines(keepends=True))
# If there was no old docstring then we need to make sure we
# do not remove the content that was originally on the first line
# of element.
if not in_docstring:
list_to.append(old_line)
last = end + 1
# Add the rest of the file.
if last < len(list_from):
list_to.extend(list_from[last:])
return list_from, list_to, list_changed
def _get_trailing_comment(self, line: str) -> str:
"""Grab any trailing comment that was potentially at the last line.
Parameters
----------
line : str
The last line of the docstring.
Returns
-------
str
The trailing comment
"""
# This might need some work in the future if there are both
# types in the same line.
line = line.strip()
closing_quotes = max(line.rfind('"""'), line.rfind("'''"))
if closing_quotes == -1:
return ""
return line[closing_quotes + 3 :]
def _finalizes(
self,
*,
docstring: str,
quotes: str = '"""',
indentation: str = " ",
modifier: str = "",
trailing: str = "",
) -> str:
r"""Add quotes, indentation and modifiers to the docstring.
Parameters
----------
docstring : str
The raw docstring to complete.
quotes : str
Quotes to use for the docstring. (Default value = '\"\"\"')
indentation : str
How much to indent the docstring lines (Default value = ' ')
modifier : str
Modifier to put before the opening triple quotes.
Any combination of ("r", "f", "u") (Default value = '')
trailing : str
Any trailing comment was after the original docstring but on
the same line. (Default value = '')
Returns
-------
str
The properly indented docstring, wrapped in triple quotes
and preceded by the desired modifier.
"""
if not docstring:
return ""
split = f"{modifier}{quotes}{docstring}".splitlines()
# One line docstring get the quotes on the same line
if len(split) > 1:
split.append(quotes)
# Multi-line get them on the next
else:
split[0] += quotes
for index, line in enumerate(split):
if line.strip():
split[index] = indentation + line
return "\n".join(split) + trailing + "\n"
[docs]
def assert_stability(self, src: list[str], dst: list[str]) -> None:
"""Assert that running pymend on its own output does not change anything.
Parameters
----------
src : list[str]
List of lines from the input file.
dst : list[str]
List of lines that pymend produced.
Raises
------
AssertionError
If a second run of pymend produces a different output than the first.
"""
# pylint: disable=protected-access
comment = self.__copy_from_output()
comment._parse() # noqa: SLF001
before, after, changed = comment._get_changes() # noqa: SLF001
if changed or not (dst == before and dst == after):
log = self.dump_to_file(
"INTERNAL ERROR: PyMend produced different "
"docstrings on the second pass.\n"
"Changed:\n",
"\n".join(changed),
"".join(diff(src, dst, "source", "first pass")),
"".join(diff(dst, after, "first pass", "second pass")),
)
msg = (
"INTERNAL ERROR:"
" PyMend produced different docstrings on the second pass."
" Please report a bug on"
" https://github.com/JanEricNitschke/pymend/issues."
f" This diff might be helpful: {log}"
)
raise AssertionError(msg)
[docs]
def assert_equality(self, src_lines: str, dst_lines: str) -> None:
"""Assert that running pymend does not change functional ast.
Done by comparing the asts for the original and produced outputs
while ignoring the docstrings themselves.
Parameters
----------
src_lines : str
Lines from the input file.
dst_lines : str
Lines that pymend produced.
Raises
------
AssertionError
If the content of the input file could not be parsed into an ast.
AssertionError
If the output from pymend could not be parsed into an ast.
AssertionError
If the output from pymend produces a different (reduced) ast
than the input.
"""
try:
src_ast = ast.parse(src_lines)
except Exception as exc:
msg = f"Failed to parse source file AST: {exc}\n"
raise AssertionError(msg) from exc
try:
dst_ast = ast.parse(dst_lines)
except Exception as exc: # noqa: BLE001
log = self.dump_to_file(
"INTERNAL ERROR: PyMend produced invalid code:\n",
"".join(traceback.format_tb(exc.__traceback__)),
dst_lines,
)
msg = (
f"INTERNAL ERROR: PyMend produced invalid code: {exc}. "
"Please report a bug on"
" https://github.com/JanEricNitschke/pymend/issues."
f" This invalid output might be helpful: {log}"
)
raise AssertionError(msg) from None
src_ast_list = self._stringify_ast(src_ast)
dst_ast_list = self._stringify_ast(dst_ast)
if src_ast_list != dst_ast_list:
log = self.dump_to_file(
"INTERNAL ERROR: PyMend produced code "
"that is not equivalent to the source\n",
"".join(diff(src_ast_list, dst_ast_list, "src", "dst")),
)
msg = (
"INTERNAL ERROR: PyMend produced code that is not equivalent to the"
" source. Please report a bug on "
"https://github.com/JanEricNitschke/pymend/issues."
f" This diff might be helpful: {log}"
)
raise AssertionError(msg) from None
def __copy_from_output(self) -> "PyComment":
"""Create a new PyComment with the same output style and lines from the input.
Parameters
----------
lines : list[str]
List of lines that should make up the `input_lines` of the copied
instance.
Returns
-------
'PyComment'
The new instance with the same output style and lines initialized
by the `lines` argument.
"""
# pylint: disable=protected-access
py_comment = PyComment.__new__(PyComment)
py_comment._input = FileContentRepresentation( # noqa: SLF001
self._output.lst.copy(), self._output.lines
)
py_comment.settings = self.settings
py_comment._output = FileContentRepresentation([], "") # noqa: SLF001
py_comment.style = self.style
py_comment.docs_list = []
return py_comment
def _strip_ast(self, ast_node: ast.AST) -> None:
"""Remove all docstrings from the ast.
Parameters
----------
ast_node : ast.AST
Node representing the full ast.
"""
for node in ast.walk(ast_node):
# let's work only on functions & classes definitions
if not isinstance(
node, (ast.FunctionDef, ast.ClassDef, ast.AsyncFunctionDef, ast.Module)
):
continue
if not node.body:
continue
if not isinstance(first_element := node.body[0], ast.Expr):
continue
if not isinstance(docnode := first_element.value, ast.Constant):
continue
if not isinstance(docnode.value, str):
continue
node.body = node.body[1:]
def _stringify_ast(self, node: ast.AST) -> list[str]:
"""Turn ast into string representation with all docstrings removed.
Parameters
----------
node : ast.AST
Node to turn into a reduced string representation.
Returns
-------
list[str]
List of lines making up the reduced string representation.
"""
self._strip_ast(node)
return ast.dump(node, indent=1).splitlines(keepends=True)
[docs]
def dump_to_file(self, *output: str, ensure_final_newline: bool = True) -> str:
"""Dump `output` to a temporary file. Return path to the file.
Parameters
----------
*output : str
List of strings to dump into the output.
ensure_final_newline : bool
Whether to make sure that every dumped string
ends in a new line. (Default value = True)
Returns
-------
str
Path to the produced temp file.
"""
with tempfile.NamedTemporaryFile(
mode="w", prefix="blk_", suffix=".log", delete=False, encoding="utf8"
) as f:
for lines in output:
f.write(lines)
if ensure_final_newline and lines and lines[-1] != "\n":
f.write("\n")
return f.name
def _docstring_diff(self) -> list[str]:
"""Build the diff between original docstring and proposed docstring.
Returns
-------
list[str]
The resulting diff
"""
return diff(
self._input.lst,
self._output.lst,
f"a/{self.input_file}",
f"b/{self.input_file}",
)
[docs]
def output_patch(self) -> Changed:
"""Output the patch. Either to stdout or a file depending on input file.
Returns
-------
Changed
Whether there were any changes.
"""
if not self.fixed:
self.proceed()
if self._changed:
lines_to_write = self._get_patch_lines()
if self.input_file.name == "-":
sys.stdout.writelines(lines_to_write)
else:
self._write_patch_file(lines_to_write)
return Changed.YES if bool(self._changed) else Changed.NO
[docs]
def output_fix(self) -> Changed:
"""Output the fixed file. Either to stdout or the file.
Returns
-------
Changed
Whether there were any changes.
Raises
------
AssertionError
If the input and output lines are identical but pymend reports
some elements to have changed.
"""
if not self.fixed:
self.proceed()
if (self._input.lines == self._output.lines) != (len(self._changed) == 0):
log = self.dump_to_file(
"INTERNAL ERROR: "
"Elements having changed does not line up with list of changed "
"elements.\n",
"List of changed elements:\n",
"\n".join(self._changed),
"Diff\n",
"".join(self._docstring_diff()),
)
msg = (
"INTERNAL ERROR: "
"Elements having changed does not line up with list of changed"
" elements."
" Please report a bug on"
" https://github.com/JanEricNitschke/pymend/issues."
f" This invalid output might be helpful: {log}"
)
raise AssertionError(msg)
if self.input_file.name == "-":
sys.stdout.writelines(self._output.lst)
elif self._input.lines != self._output.lines:
echo(
"Modified docstrings of element"
f'{"s" if len(self._changed) > 1 else ""} '
f'({", ".join(self._changed)}) in file {self.input_file}.'
)
self._overwrite_source_file()
return Changed.YES if bool(self._changed) else Changed.NO
def _get_patch_lines(self) -> list[str]:
r"""Return the diff between source_path and target_path.
Parameters
----------
source_path : str
name of the original file (Default value = '')
target_path : str
name of the final file (Default value = '')
Returns
-------
list[str]
the diff as a list of \n terminated lines
"""
return [
f"# Patch generated by Pymend v{__version__}\n\n",
*self._docstring_diff(),
]
def _write_patch_file(self, lines_to_write: list[str]) -> None:
r"""Write lines_to_write to a the file called patch_file.
Parameters
----------
lines_to_write : list[str]
lines to write to the file - they should be \n terminated
"""
# Change this if pathlib ever gets a `append_suffix` method
# To Path(self.input_file).append_suffix(".patch")
file_path = Path(self.input_file)
base_name = file_path.name
patch_name = f"{base_name}.patch"
patch_path = Path(patch_name)
directory_parts = file_path.parent.parts
length = len(directory_parts)
index = 1
while patch_path.exists() and index <= length:
patch_name = "_".join(
[*directory_parts[length - index :], f"{base_name}.patch"]
)
patch_path = Path(patch_name)
index += 1
with patch_path.open("w", encoding="utf-8") as file:
file.writelines(lines_to_write)
def _overwrite_source_file(self) -> None:
r"""Overwrite the file with line_to_write.
Parameters
----------
lines_to_write : list[str]
lines to write to the file - they should be \n terminated
"""
tmp_filename = Path(f"{self.input_file}.writing")
ok = False
try:
with tmp_filename.open("w", encoding="utf-8") as file:
file.writelines(self._output.lines)
ok = True
finally:
if ok:
if platform.system() == "Windows":
self._windows_rename(tmp_filename)
else:
tmp_filename.rename(self.input_file)
else:
tmp_filename.unlink()
def _windows_rename(self, tmp_filename: Path) -> None:
"""Workaround the fact that os.rename raises an OSError on Windows.
Parameters
----------
tmp_filename : Path
The file to rename
"""
input_file = Path(self.input_file)
if input_file.is_file():
input_file.unlink()
tmp_filename.rename(input_file)
[docs]
def report_issues(self) -> tuple[int, str]:
"""Produce a report of all found issues with the docstrings in the file.
Returns
-------
tuple[int, str]
The number of elements that had issues as well as
a string representation of those.
"""
issues: list[str] = []
for elem in self.docs_list:
n_issues, report = elem.report_issues()
if n_issues:
issues.append(report)
if not issues:
return 0, ""
report = (
f"{'*'*50}\nThe following issues were found in file {self.input_file}:\n"
+ "\n".join(issues)
)
return len(issues), report