use strict;
my $str = 'John Doe
Mary Jane Smith
Jean-Luc van der Berg, Sr.
Maria de la Cruz
Mohamed bin Ahmed al-Farsi
Anna-Nicole Johnson Jr.
Carlos Alberto dos Santos
Sophie von und zu Rhein
Li Na
O\'Connor Fitzpatrick III
Yusuf ibn Abdullah
Emma-Louise O\'Reilly
Juan Carlos de la Torre
Fatima bint Mohammed
Hans-Christian Andersen
Elena di Matteo
Abdul-Rahman al-Hashimi
Isabella della Rovere
Jean-Pierre Dupont, Jr
Sara van den Berg
John Q. Public
Dr. Jane Doe, PhD
Prof. John Smith, MD
Mr. James Walter Thomas Jr.
Ms. Emily Davis, CPA
Dr. Michael David Jacob Brown, DDS
Mrs. Sarah Wilson, RN
Rev. Thomas Anderson, DD
Mr. Adams
T\'Challa
ǃXóõ ǁKaru
ǃHõã ǂNuka
J. K. Rowling
John Jacob Jingleheimer Schmidt';
my $regex = qr/^(?<name>[^.,?;:\s]+)$|^(?:(?<honorific>(?:mrs?|ms|dr|prof|rev|hon)\.?|miss|sir|dame|lord|lady)\s+)?(?:(?<given_name>(?<first_name>[^.,?;:\s]+\.?)(?:\s+(?<middle_name>(?:[^.,?;:\s]+\.?)(?:\s+[^.,?;:\s]+\.?)*?))??)\s+)??(?:(?<family_name>(?:(?:(?:(?:a|ab|af|ap|abu|aït|al|ālam|at|ath|aust|austre|bar|bath|bat|ben|bin|ibn|bet|bint|da|das|de la|degli|del|dele|della|der|di|dos|du|e|el|fetch|vetch|fitz|i|ka|kil|gil|la|le|lille|lu|m'|mac|mc|mck|mhic|mic|mala|mellom|myljom|na|ณ|ned|nedre|neder|ngā|nic|ní|nin|nord|norr|ny|o|ó|ua|uí|opp|upp|öfver|ost|öst|öster|øst|østre|över|øvste|øvre|øver|öz|pour|putra|putera|putri|puteri|setia|setya|stor|söder|sør|sønder|syd|søndre|syndre|søre|te|ter|ter|tre|van|van de|van den|van der|van het|van 't|väst|väster|verch|erch|vest|vestre|vesle|vetle|von|war|zu|von und zu)\s)?[^.,!?;:\s]+)-)?(?:(?:a|ab|af|ap|abu|aït|al|ālam|at|ath|aust|austre|bar|bath|bat|ben|bin|ibn|bet|bint|da|das|de la|degli|del|dele|della|der|di|dos|du|e|el|fetch|vetch|fitz|i|ka|kil|gil|la|le|lille|lu|m'|mac|mc|mck|mhic|mic|mala|mellom|myljom|na|ณ|ned|nedre|neder|ngā|nic|ní|nin|nord|norr|ny|o|ó|ua|uí|opp|upp|öfver|ost|öst|öster|øst|østre|över|øvste|øvre|øver|öz|pour|putra|putera|putri|puteri|setia|setya|stor|söder|sør|sønder|syd|søndre|syndre|søre|te|ter|ter|tre|van|van de|van den|van der|van het|van 't|väst|väster|verch|erch|vest|vestre|vesle|vetle|von|war|zu|von und zu)\s+)?[^.,?;:\s]+)??(?:,?\s+(?<suffix>Sr\.?|Snr|Jr\.?|Jnr|[IVX]+))?)?(?<post_nominal>,.*)?$/mip;
if ( $str =~ /$regex/g ) {
print "Whole match is ${^MATCH} and its start/end positions can be obtained via \$-[0] and \$+[0]\n";
# print "Capture Group 1 is $1 and its start/end positions can be obtained via \$-[1] and \$+[1]\n";
# print "Capture Group 2 is $2 ... and so on\n";
}
# ${^POSTMATCH} and ${^PREMATCH} are also available with the use of '/p'
# Named capture groups can be called via $+{name}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Perl, please visit: http://perldoc.perl.org/perlre.html