Skip to content

extract_boundary

Extract valid BoundaryTypes.

Extract valid BoundaryTypes described by predefined rules.

Parameters:

Name Type Description Default
description str

Description string of the parameter to be examined.

required
type_string str

Type string of the parameter to be examined.

required

Returns:

Type Description
set[BoundaryType]

A set containing valid BoundaryTypes.

Source code in src/library_analyzer/processing/api/_extract_boundary_values.py
def extract_boundary(description: str, type_string: str) -> set[BoundaryType]:
    """Extract valid BoundaryTypes.

    Extract valid BoundaryTypes described by predefined rules.

    Parameters
    ----------
    description
        Description string of the parameter to be examined.

    type_string
        Type string of the parameter to be examined.

    Returns
    -------
    set[BoundaryType]
        A set containing valid BoundaryTypes.
    """
    boundaries = BoundaryList()

    type_string = _preprocess_docstring(type_string)
    type_doc = _nlp(type_string)

    type_matches = _matcher(type_doc)
    type_matches = [(_nlp.vocab.strings[match_id], type_doc[start:end]) for match_id, start, end in type_matches]

    description_preprocessed = _preprocess_docstring(description)
    description_doc = _nlp(description_preprocessed)

    desc_matches = []
    for sent in description_doc.sents:
        d_matches = _matcher(sent)
        d_matches = [(_nlp.vocab.strings[match_id], sent[start:end]) for match_id, start, end in d_matches]
        desc_matches.extend(d_matches)

    if type_matches:
        type_list = []  # Possible numeric data types that may be used with the parameter to be examined.
        restriction_list = []  # Restrictions of the type such as non-negative
        match_label = ""

        for match in type_matches:
            if match[0] == "BOUNDARY_TYPE":
                type_list.append(match[1].text)
            else:
                restriction_list.append(match)

        type_length = len(type_list)

        # If the length of the found types is 1, the boundary type is described only in the type string
        # and the value range only in the description string.

        if type_length == 1:
            type_text = type_list[0]
            match_string: Span | None = None

            if len(restriction_list) == 1:
                match_label = restriction_list[0][0]
                match_string = restriction_list[0][1]

            # Checking the description for boundaries if no restriction was found in the type string
            elif len(desc_matches) > 0:
                match_label, match_string = desc_matches[0]
                for m_label, m_string in desc_matches:
                    if m_label == "BOUNDARY_INTERVAL":
                        match_label = m_label
                        match_string = m_string
                        break

                if match_label == "BOUNDARY_TYPE" and len(desc_matches) > 1:
                    type_text = match_string.text
                    match_label, match_string = desc_matches[1]

            boundaries.add_boundary(match_label, type_text, match_string)

        elif type_length > 1:
            found_type_rel_val = any(match[0] == "BOUNDARY_TYPE_REL_VAL" for match in type_matches)

            if found_type_rel_val:
                _analyze_matches(type_matches, boundaries)
            else:
                _analyze_matches(desc_matches, boundaries)

    return boundaries.get_boundaries()