import re
regex = re.compile(r"\(?\b(?P<book_name>Gen(?:esis)?|Exo(?:dus)?|Lev(?:iticus)?|Num(?:bers)?|Deut(?:eronomy)?|Josh(?:ua)?|Judg(?:es)?|Ru(?:th)?|(?:1st|1|First)\ Sam(?:uel)?|(?:2nd|2|Second)\ Sam(?:uel)?|(?:1st|1|First)\ Kings|(?:2nd|2|Second)\ Kings|(?:1st|1|First)\ Chr(?:onicles|on)?|(?:2nd|2|Second)\ Chr(?:onicles|on)?|Ezr(?:a)?|Neh(?:emiah)?|Est(?:her)?|Job|Psa(?:lms?)?|Prov(?:erbs)?|Eccles(?:iastes)?|Songs|(?:Song\ of\ )?Solomon|Isa(?:iah)?|Jer(?:emiah)?|Lam(?:entations)?|Eze(?:kiel)?|Dan(?:iel)?|Hos(?:ea)?|Joel|Am(?:os)?|Obad(?:iah)?|Jon(?:ah)?|Micah|Na(?:hum)?|Hab(?:akkuk)?|Zeph(?:aniah)?|Hagg(?:ai)?|Zech(?:ariah)?|Mal(?:achi)?|Matt(?:hew)?|Mark|Luke|John|Acts|Rom(?:ans)?|(?:1st|1|First)\ Cor(?:inthians)?|(?:2nd|2|Second)\ Cor(?:inthians)?|Gal(?:atians)?|Eph(?:esians)?|Phil(?:ippians)?|Col(?:ossians)?|(?:1st|1|First)\ Thess(?:alonians)?|(?:2nd|2|Second)\ Thess(?:alonians)?|(?:1st|1|First)\ Tim(?:othy)?|(?:2nd|2|Second)\ Timothy|Tit(?:us)?|Philem(?:on)?|Heb(?:rews)?|James|(?:1st|1|First)\ Peter|(?:2nd|2|Second)\ Peter|(?:1st|1|First)\ John|(?:2nd|2|Second)\ John|(?:3rd|3|Third)\ John|Jud(?:e)?|Rev(?:elation)?)\b\s?(?P<chapter>\b(?:150|1[0-4]\d|[1-9]\d|[1-9])\b(?!\s*-\s*\d+\b:)(?:\s*-\s*\b(?:150|1[0-4]\d|[1-9]\d|[1-9])\b)?(?!\s*:\s*,))(?:\s*:\s*(?!\s*,)(?P<verse>(?:(?:\s*,\s*)?\b(?:17[0-6]|1[0-6]\d|[1-9]\d|[1-9])\b(?:\s*-\s*\b(?:17[0-6]|1[0-6]\d|[1-9]\d|[1-9])\b)?)+))?\)?", flags=re.MULTILINE | re.IGNORECASE)
test_str = ("SHOULD MATCH:\n"
"# Single chapters only:\n"
" 2nd Kings 1 2 Kings 1 Second Kings 1\n"
" Genesis 1 (Genesis 1) Gen 1 (Gen 1)\n"
" matthew 4\n"
" 1 chr 1 2nd Chron 3 1st Chronicles 8\n\n"
"# Single chapter with single verse:\n"
" Gen 1:1 Genesis 1:1 (Gen 1:1)\n\n"
"# Single chapter with range of verses:\n"
" Ezra 1:1-3 Hos 150:175-176\n\n"
"# Single chapter with comma-separated verses:\n"
" Daniel 5:6,7 Daniel 1:2,3,4\n\n"
"# Range of chapters:\n"
" Genesis 1-5 Ruth 99-150\n\n\n"
"SHOULD NOT MATCH:\n"
"# Verse only\n"
" Job :3\n\n"
"# Range of chapters WITH verse:\n"
" Psalms 1-3:4\n\n"
"# Range of chapters WITH range of verses:\n"
" Proverbs 33-44:55-66\n\n"
"# Chapter with comma before verse:\n"
" Solomon 5:,4\n")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html