re = /<url>\s*
(?>
(?>
(?><lastmod>\s*(?<mod>[^<]+)\s*<\/lastmod>)
|
(?><changefreq>\s*(?<freq>\w+)\s*<\/changefreq>)
|
(?><priority>\s*(?<prio>[01](?>\.\d{1,2})?)\s*<\/priority>)
)\s*
){0,3}\s*
<loc>\s*
(?<uri>[^<]+)\s*
<\/loc>\s*
(?>
(?>
(?><lastmod>\s*(?<mod>[^<]+)\s*<\/lastmod>)
|
(?><changefreq>\s*(?<freq>\w+)\s*<\/changefreq>)
|
(?><priority>\s*(?<prio>[01](?>\.\d{1,2})?)\s*<\/priority>)
)\s*
){0,3}\s*
<\/url>/x
str = '<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<lastmod>2005-01-01</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
<loc>http://www.example.com/</loc>
</url>
<url>
<loc>http://www.example.com/catalog?item=12&desc=vacation_hawaii</loc>
<changefreq>weekly</changefreq>
</url>
<url>
<loc>http://www.example.com/catalog?item=73&desc=vacation_new_zealand</loc>
<lastmod>2004-12-23</lastmod>
<changefreq>weekly</changefreq>
</url>
<url>
<loc>http://www.example.com/catalog?item=74&desc=vacation_newfoundland</loc>
<lastmod>2004-12-23T18:00:15+00:00</lastmod>
<priority>0.3</priority>
</url>
<url>
<loc>http://www.example.com/catalog?item=83&desc=vacation_usa</loc>
<lastmod>2004-11-23</lastmod>
</url>
</urlset>'
# Print the match result
str.scan(re) do |match|
puts match.to_s
end
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Ruby, please visit: http://ruby-doc.org/core-2.2.0/Regexp.html