import re
regex = re.compile(r"^[a-zA-Z]\d{4}(-[a-zA-Z0-9]{2})?$", flags=re.MULTILINE)
test_str = ("# The CMS, who maintain these codes, describe the format as:-\n"
"# \"HCPCS Level II codes (also known as alpha-numeric codes) consist of a single alphabetical letter followed by 4 numeric digits.\"\n"
"# (https://www.cms.gov/medicare/coding-billing/healthcare-common-procedure-system)\n"
"# NOTE: This regex will not identify HCPCS Level I codes (AKA CPT codes)\n"
"# Also, some sources say the single letter ranges only from A-V (this regex follows A-Z)\n"
"# It's not clear if the first letter must be capitalized. This regex assumes not.\n"
"# This version of the regex matches optional modifiers (ie a hyphen then 2 characters after the code)\n"
"# (https://www.aapc.com/resources/what-are-medical-coding-modifiers)\n"
"# A separate regex has been created on regex101 that ignores modifiers\n\n"
"# Genuine code patterns\n"
"E8015\n"
"A6410\n"
"C5278\n"
"G2102\n"
"M1221\n"
"V2623\n"
"V2756\n"
"c5278\n"
"g2102\n"
"m1221\n"
"v2623\n"
"v2756\n"
"G2102-23\n"
"E8015-OK\n"
"V2756-L5\n"
"V2756-5Q\n\n"
"# Should fail\n"
"EE8015\n"
"6410\n"
"E641O\n"
"[6410\n"
"_6510\n"
"g212\n"
"C52789\n"
"G2101-\n"
"G2101-2\n"
"G2101-K\n"
"G2101-OKK\n"
"G2101-123\n"
"G2101-!!\n"
"G2101-!1\n"
"G2101-()")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html