use strict;
my $str = '<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<lastmod>2005-01-01</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
<loc>http://www.example.com/</loc>
</url>
<url>
<loc>http://www.example.com/catalog?item=12&desc=vacation_hawaii</loc>
<changefreq>weekly</changefreq>
</url>
<url>
<loc>http://www.example.com/catalog?item=73&desc=vacation_new_zealand</loc>
<lastmod>2004-12-23</lastmod>
<changefreq>weekly</changefreq>
</url>
<url>
<loc>http://www.example.com/catalog?item=74&desc=vacation_newfoundland</loc>
<lastmod>2004-12-23T18:00:15+00:00</lastmod>
<priority>0.3</priority>
</url>
<url>
<loc>http://www.example.com/catalog?item=83&desc=vacation_usa</loc>
<lastmod>2004-11-23</lastmod>
</url>
</urlset>';
my $regex = qr~<url>\s*
(?>
(?>
(?><lastmod>\s*(?<mod>[^<]+)\s*</lastmod>)
|
(?><changefreq>\s*(?<freq>\w+)\s*</changefreq>)
|
(?><priority>\s*(?<prio>[01](?>\.\d{1,2})?)\s*</priority>)
)\s*
){0,3}\s*
<loc>\s*
(?<uri>[^<]+)\s*
</loc>\s*
(?>
(?>
(?><lastmod>\s*(?<mod>[^<]+)\s*</lastmod>)
|
(?><changefreq>\s*(?<freq>\w+)\s*</changefreq>)
|
(?><priority>\s*(?<prio>[01](?>\.\d{1,2})?)\s*</priority>)
)\s*
){0,3}\s*
</url>~xup;
if ( $str =~ /$regex/g ) {
print "Whole match is ${^MATCH} and its start/end positions can be obtained via \$-[0] and \$+[0]\n";
# print "Capture Group 1 is $1 and its start/end positions can be obtained via \$-[1] and \$+[1]\n";
# print "Capture Group 2 is $2 ... and so on\n";
}
# ${^POSTMATCH} and ${^PREMATCH} are also available with the use of '/p'
# Named capture groups can be called via $+{name}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Perl, please visit: http://perldoc.perl.org/perlre.html