import re
regex = re.compile(r"""
\bCUI\b
""", flags=re.MULTILINE | re.VERBOSE | re.DOTALL | re.UNICODE)
test_str = ("/**\n"
"Contributors: Charles Staich\n"
"Match Confidence: Low\n"
"Language: Regular Expressions (regex)\n"
"Language Version: \n"
"Flags:\n"
"Purpose: Match CUI Basic and CUI Specified. Note the importance of evaluating multiple possible labels, such that CUI Specified may also be considered if this pattern matches. CUI Specified requires *at least* the same protections as CUI Basic (citation needed).\n"
"Assumptions: CUI Basic and CUI Specified documents contain the phrase 'CUI'\n"
"Plain English Pattern Narrative: \n"
" Case sensitive, and does not match 'cui'.\n"
" Indiscriminately matches any single instance of the word* 'CUI' without any additional qualifiers, context, or markings necessary (Low Confidence)\n"
"Capturing Group(s): None\n"
"Definitions:\n"
" Word: A string of alphanumeric characters surrounded by word boundaries (\\b).\n"
"Comments:\n"
" Do not use this in Production without heavy testing.\n"
" I realize now why the community has taken to using the word cooey. I'm certain there have been many collective hours spent reviewing false positive matches they created through filing standards, policy, communications, and documentation.\n"
"**/\n\n"
"// PATTERN:\n"
"\\bCUI\\b\n\n"
"// POSITIVE CASES:\n"
"CUI\n"
"asdf CUI jkl;\n"
"This document does not contain CUI.\n"
"This portal is not permitted to be used for transferring CUI.\n"
"No CUI allowed.\n"
"===CUI===\n"
"(CUI)\n"
"***CUI***\n"
"^CUI^\n"
"!CUI!\n"
"~CUI~\n"
"CUI//SP-TEST\n"
"CUI//TEST\n\n\n"
"// NEGATIVE CASES:\n"
"cui\n"
"0CUI-\n"
"ACUI*\n"
"-CUI2\n"
"CUi CuI cUI\n"
"AAACUIAAA\n"
"Cooey\n"
"Circuit\n"
"CIRCUIT\n"
"CUISP\n")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html