def extract_valid_literals(description: str, type_string: str) -> set[str]:
"""Extract all valid literals.
Parameters
----------
description
Description string of the parameter to be examined.
type_string
Type string of the prameter to be examined.
Returns
-------
set[str]
Set of extracted literals.
"""
_extracted.clear()
nlp = MATCHER_CONFIG.get_nlp()
descr_matcher = MATCHER_CONFIG.get_descr_matcher()
type_matcher = MATCHER_CONFIG.get_type_matcher()
type_match_labels = []
none_and_bool = {"False", "None", "True"}
description = _preprocess_docstring(description)
desc_doc = nlp.make_doc(" ".join(description.split()))
type_string = _preprocess_docstring(type_string, is_type_string=True)
type_doc = nlp.make_doc(type_string)
descr_matcher(desc_doc)
type_matches = type_matcher(type_doc)
type_matches = _nlp_matches_to_readable_matches(type_matches, nlp, type_doc)
if type_matches:
type_match_labels = [match_label for match_label, _ in type_matches]
if "ENUM_BOOL" in type_match_labels:
_extracted.append("True")
_extracted.append("False")
for match_label, match_span in type_matches:
if match_label == "ENUM_TYPE_SINGLE_VALS" and "ENUM_TYPE_CURLY" not in type_match_labels:
substituted_string = re.sub(r"['`]+", '"', match_span.text)
_extracted.append(substituted_string)
values_to_be_removed = []
for val in _extracted:
if val in ["True", "False"] and "ENUM_BOOL" not in type_match_labels:
values_to_be_removed.append(val)
if val[0] == '"' and not val[1:-1].isalpha():
for c in val[1:-1]:
if c in ["!", "ยง", "$", "%", "&", "/", "=", "?", "*", "~"]:
_extracted.remove(val)
break
for val in values_to_be_removed:
_extracted.remove(val)
extracted_set = set(_extracted)
is_enum_str = False
for label, match_span in type_matches:
if label == "ENUM_STR" and match_span.text != "of str":
is_enum_str = True
if is_enum_str and not extracted_set.difference(none_and_bool):
extracted_set.add("unlistable_str")
return extracted_set