Skip to content

NumpyDocParser

Bases: AbstractDocumentationParser

Parses documentation in the NumpyDoc format. See https://numpydoc.readthedocs.io/en/latest/format.html for more information.

This class is not thread-safe. Each thread should create its own instance.

Source code in library_analyzer/processing/api/documentation_parsing/_NumpyDocParser.py
class NumpyDocParser(AbstractDocumentationParser):
    """
    Parses documentation in the NumpyDoc format. See https://numpydoc.readthedocs.io/en/latest/format.html for more
    information.

    This class is not thread-safe. Each thread should create its own instance.
    """

    def __init__(self):
        self.__cached_function_node: Optional[astroid.FunctionDef] = None
        self.__cached_numpydoc_string: Optional[NumpyDocString] = None

    def get_class_documentation(
        self, class_node: astroid.ClassDef
    ) -> ClassDocumentation:
        docstring = get_full_docstring(class_node)

        return ClassDocumentation(
            description=_get_description(NumpyDocString(docstring)),
            full_docstring=docstring,
        )

    def get_function_documentation(
        self, function_node: astroid.FunctionDef
    ) -> FunctionDocumentation:
        docstring = get_full_docstring(function_node)

        return FunctionDocumentation(
            description=_get_description(
                self.__get_cached_function_numpydoc_string(function_node, docstring)
            ),
            full_docstring=docstring,
        )

    def get_parameter_documentation(
        self,
        function_node: astroid.FunctionDef,
        parameter_name: str,
        parameter_assigned_by: ParameterAssignment,
    ) -> ParameterDocumentation:

        # For constructors (__init__ functions) the parameters are described on the class
        if function_node.name == "__init__" and isinstance(
            function_node.parent, astroid.ClassDef
        ):
            docstring = get_full_docstring(function_node.parent)
        else:
            docstring = get_full_docstring(function_node)

        # Find matching parameter docstrings
        function_numpydoc = self.__get_cached_function_numpydoc_string(
            function_node, docstring
        )
        all_parameters_numpydoc: list[
            numpydoc.docscrape.Parameter
        ] = function_numpydoc.get("Parameters", [])
        matching_parameters_numpydoc = [
            it
            for it in all_parameters_numpydoc
            if _is_matching_parameter_numpydoc(
                it, parameter_name, parameter_assigned_by
            )
        ]

        if len(matching_parameters_numpydoc) == 0:
            return ParameterDocumentation(type="", default_value="", description="")

        last_parameter_numpydoc = matching_parameters_numpydoc[-1]
        type_, default_value = _get_type_and_default_value(last_parameter_numpydoc)
        return ParameterDocumentation(
            type=type_,
            default_value=default_value,
            description="\n".join(
                [line.rstrip() for line in last_parameter_numpydoc.desc]
            ),
        )

    def __get_cached_function_numpydoc_string(
        self, function_node: astroid.FunctionDef, docstring: str
    ) -> NumpyDocString:
        """
        Returns the NumpyDocString for the given function node. It is only recomputed when the function node differs
        from the previous one that was passed to this function. This avoids reparsing the docstring for the function
        itself and all of its parameters.

        On Lars's system this caused a significant performance improvement: Previously, 8.382s were spent inside the
        function get_parameter_documentation when parsing sklearn. Afterwards, it was only 2.113s.
        """

        if self.__cached_function_node is not function_node:
            self.__cached_function_node = function_node
            self.__cached_numpydoc_string = NumpyDocString(docstring)

        return self.__cached_numpydoc_string

__cached_function_node: Optional[astroid.FunctionDef] = None instance-attribute

__cached_numpydoc_string: Optional[NumpyDocString] = None instance-attribute

__get_cached_function_numpydoc_string(function_node, docstring)

Returns the NumpyDocString for the given function node. It is only recomputed when the function node differs from the previous one that was passed to this function. This avoids reparsing the docstring for the function itself and all of its parameters.

On Lars's system this caused a significant performance improvement: Previously, 8.382s were spent inside the function get_parameter_documentation when parsing sklearn. Afterwards, it was only 2.113s.

Source code in library_analyzer/processing/api/documentation_parsing/_NumpyDocParser.py
def __get_cached_function_numpydoc_string(
    self, function_node: astroid.FunctionDef, docstring: str
) -> NumpyDocString:
    """
    Returns the NumpyDocString for the given function node. It is only recomputed when the function node differs
    from the previous one that was passed to this function. This avoids reparsing the docstring for the function
    itself and all of its parameters.

    On Lars's system this caused a significant performance improvement: Previously, 8.382s were spent inside the
    function get_parameter_documentation when parsing sklearn. Afterwards, it was only 2.113s.
    """

    if self.__cached_function_node is not function_node:
        self.__cached_function_node = function_node
        self.__cached_numpydoc_string = NumpyDocString(docstring)

    return self.__cached_numpydoc_string

__init__()

Source code in library_analyzer/processing/api/documentation_parsing/_NumpyDocParser.py
def __init__(self):
    self.__cached_function_node: Optional[astroid.FunctionDef] = None
    self.__cached_numpydoc_string: Optional[NumpyDocString] = None

get_class_documentation(class_node)

Source code in library_analyzer/processing/api/documentation_parsing/_NumpyDocParser.py
def get_class_documentation(
    self, class_node: astroid.ClassDef
) -> ClassDocumentation:
    docstring = get_full_docstring(class_node)

    return ClassDocumentation(
        description=_get_description(NumpyDocString(docstring)),
        full_docstring=docstring,
    )

get_function_documentation(function_node)

Source code in library_analyzer/processing/api/documentation_parsing/_NumpyDocParser.py
def get_function_documentation(
    self, function_node: astroid.FunctionDef
) -> FunctionDocumentation:
    docstring = get_full_docstring(function_node)

    return FunctionDocumentation(
        description=_get_description(
            self.__get_cached_function_numpydoc_string(function_node, docstring)
        ),
        full_docstring=docstring,
    )

get_parameter_documentation(function_node, parameter_name, parameter_assigned_by)

Source code in library_analyzer/processing/api/documentation_parsing/_NumpyDocParser.py
def get_parameter_documentation(
    self,
    function_node: astroid.FunctionDef,
    parameter_name: str,
    parameter_assigned_by: ParameterAssignment,
) -> ParameterDocumentation:

    # For constructors (__init__ functions) the parameters are described on the class
    if function_node.name == "__init__" and isinstance(
        function_node.parent, astroid.ClassDef
    ):
        docstring = get_full_docstring(function_node.parent)
    else:
        docstring = get_full_docstring(function_node)

    # Find matching parameter docstrings
    function_numpydoc = self.__get_cached_function_numpydoc_string(
        function_node, docstring
    )
    all_parameters_numpydoc: list[
        numpydoc.docscrape.Parameter
    ] = function_numpydoc.get("Parameters", [])
    matching_parameters_numpydoc = [
        it
        for it in all_parameters_numpydoc
        if _is_matching_parameter_numpydoc(
            it, parameter_name, parameter_assigned_by
        )
    ]

    if len(matching_parameters_numpydoc) == 0:
        return ParameterDocumentation(type="", default_value="", description="")

    last_parameter_numpydoc = matching_parameters_numpydoc[-1]
    type_, default_value = _get_type_and_default_value(last_parameter_numpydoc)
    return ParameterDocumentation(
        type=type_,
        default_value=default_value,
        description="\n".join(
            [line.rstrip() for line in last_parameter_numpydoc.desc]
        ),
    )