use strict;
my $str = 'SHOULD MATCH:
# Single chapters only:
2nd Kings 1 2 Kings 1 Second Kings 1
Genesis 1 (Genesis 1) Gen 1 (Gen 1)
matthew 4
1 chr 1 2nd Chron 3 1st Chronicles 8
# Single chapter with single verse:
Gen 1:1 Genesis 1:1 (Gen 1:1)
# Single chapter with range of verses:
Ezra 1:1-3 Hos 150:175-176
# Single chapter with comma-separated verses:
Daniel 5:6,7 Daniel 1:2,3,4
# Range of chapters:
Genesis 1-5 Ruth 99-150
SHOULD NOT MATCH:
# Verse only
Job :3
# Range of chapters WITH verse:
Psalms 1-3:4
# Range of chapters WITH range of verses:
Proverbs 33-44:55-66
# Chapter with comma before verse:
Solomon 5:,4
';
my $regex = qr/\(?(\b(Gen(?:esis)?|Exo(?:dus)?|Lev(?:iticus)?|Num(?:bers)?|Deut(?:eronomy)?|Josh(?:ua)?|Judg(?:es)?|Ru(?:th)?|(?:1st|1|First)\ Sam(?:uel)?|(?:2nd|2|Second)\ Sam(?:uel)?|(?:1st|1|First)\ Kings|(?:2nd|2|Second)\ Kings|(?:1st|1|First)\ Chr(?:onicles|on)?|(?:2nd|2|Second)\ Chr(?:onicles|on)?|Ezr(?:a)?|Neh(?:emiah)?|Est(?:her)?|Job|Psa(?:lms?)?|Prov(?:erbs)?|Eccles(?:iastes)?|Songs|(?:Song\ of\ )?Solomon|Isa(?:iah)?|Jer(?:emiah)?|Lam(?:entations)?|Eze(?:kiel)?|Dan(?:iel)?|Hos(?:ea)?|Joel|Am(?:os)?|Obad(?:iah)?|Jon(?:ah)?|Micah|Na(?:hum)?|Hab(?:akkuk)?|Zeph(?:aniah)?|Hagg(?:ai)?|Zech(?:ariah)?|Mal(?:achi)?|Matt(?:hew)?|Mark|Luke|John|Acts|Rom(?:ans)?|(?:1st|1|First)\ Cor(?:inthians)?|(?:2nd|2|Second)\ Cor(?:inthians)?|Gal(?:atians)?|Eph(?:esians)?|Phil(?:ippians)?|Col(?:ossians)?|(?:1st|1|First)\ Thess(?:alonians)?|(?:2nd|2|Second)\ Thess(?:alonians)?|(?:1st|1|First)\ Tim(?:othy)?|(?:2nd|2|Second)\ Timothy|Tit(?:us)?|Philem(?:on)?|Heb(?:rews)?|James|(?:1st|1|First)\ Peter|(?:2nd|2|Second)\ Peter|(?:1st|1|First)\ John|(?:2nd|2|Second)\ John|(?:3rd|3|Third)\ John|Jud(?:e)?|Rev(?:elation)?)\b\s?(\b(?:[1-9]\d?|1[0-4]\d|150)\b(?!\s*-\s*\d+\b:)(?:\s*-\s*\b(?:[1-9]\d?|1[0-4]\d|150)\b)?(?!\s*:\s*,))(?:\s*:\s*(?!\s*,)((?:(?:\s*,\s*)?\b(?:[1-9]\d?|1[0-6]\d|17[0-6])\b(?:\s*-\s*\b(?:[1-9]\d?|1[0-6]\d|17[0-6])\b)?)+))?)\)?/mip;
if ( $str =~ /$regex/g ) {
print "Whole match is ${^MATCH} and its start/end positions can be obtained via \$-[0] and \$+[0]\n";
# print "Capture Group 1 is $1 and its start/end positions can be obtained via \$-[1] and \$+[1]\n";
# print "Capture Group 2 is $2 ... and so on\n";
}
# ${^POSTMATCH} and ${^PREMATCH} are also available with the use of '/p'
# Named capture groups can be called via $+{name}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Perl, please visit: http://perldoc.perl.org/perlre.html