Skip to content

extract_valid_literals

Extract all valid literals.

Parameters:

Name Type Description Default
description str

Description string of the parameter to be examined.

required
type_string str

Type string of the prameter to be examined.

required

Returns:

Type Description
set[str]

Set of extracted literals.

Source code in src/library_analyzer/processing/api/_extract_valid_values.py
def extract_valid_literals(description: str, type_string: str) -> set[str]:
    """Extract all valid literals.

    Parameters
    ----------
    description
        Description string of the parameter to be examined.
    type_string
        Type string of the prameter to be examined.


    Returns
    -------
    set[str]
        Set of extracted literals.

    """
    _extracted.clear()

    nlp = MATCHER_CONFIG.get_nlp()
    descr_matcher = MATCHER_CONFIG.get_descr_matcher()
    type_matcher = MATCHER_CONFIG.get_type_matcher()

    type_match_labels = []

    none_and_bool = {"False", "None", "True"}

    description = _preprocess_docstring(description)
    desc_doc = nlp.make_doc(" ".join(description.split()))

    type_string = _preprocess_docstring(type_string, is_type_string=True)
    type_doc = nlp.make_doc(type_string)

    descr_matcher(desc_doc)

    type_matches = type_matcher(type_doc)
    type_matches = _nlp_matches_to_readable_matches(type_matches, nlp, type_doc)

    if type_matches:
        type_match_labels = [match_label for match_label, _ in type_matches]

        if "ENUM_BOOL" in type_match_labels:
            _extracted.append("True")
            _extracted.append("False")

        for match_label, match_span in type_matches:
            if match_label == "ENUM_TYPE_SINGLE_VALS" and "ENUM_TYPE_CURLY" not in type_match_labels:
                substituted_string = re.sub(r"['`]+", '"', match_span.text)
                _extracted.append(substituted_string)
    values_to_be_removed = []
    for val in _extracted:
        if val in ["True", "False"] and "ENUM_BOOL" not in type_match_labels:
            values_to_be_removed.append(val)
        if val[0] == '"' and not val[1:-1].isalpha():
            for c in val[1:-1]:
                if c in ["!", "ยง", "$", "%", "&", "/", "=", "?", "*", "~"]:
                    _extracted.remove(val)
                    break

    for val in values_to_be_removed:
        _extracted.remove(val)

    extracted_set = set(_extracted)

    is_enum_str = False
    for label, match_span in type_matches:
        if label == "ENUM_STR" and match_span.text != "of str":
            is_enum_str = True

    if is_enum_str and not extracted_set.difference(none_and_bool):
        extracted_set.add("unlistable_str")

    return extracted_set