use strict;
my $str = 'https://www.example.com
http://www.example.com
www.example.com
example.com
http://blog.example.com
http://www.example.com/product
http://www.example.com/products?id=1&page=2
http://www.example.com#up
http://255.255.255.255
255.255.255.255
255.255.255.255/test
http://invalid.com/perl.cgi?key= | http://web-site.com/cgi-bin/perl.cgi?key1=value1&key2
http://www.site.com:8008
http://www.site.com:8008 10.1016.12.31/naTUrE.S0735-1097(98)2000/12/31/34:7-7 http://myrepo.example.org/ark:/12345/bcd987
http://n2t.net/ark:/12345/bcd987
http://texashistory.unt.edu/ark:/67531/metapth346793
http://example.org/ark:/12025/654xz321/s3/f8.05v.tiff
https://doi.org/10.3886/ICPSR06849 10.3886/ICPSR06849 https://www.icpsr.umich.edu/icpsrweb/NACJD/studies/6849/version/1
doi.org/10.1175/1520-0485(2002)032<0870:CT>2.0.CO;2
ark: 12025/654xz321/s3/f8.05v.tiff
self.crossref_dois = (
\'10.2310/JIM.0b013e31820bab4c\',
\'10.1007/978-3-642-28108-2_19\',
\'10.1016/S0735-1097(98)00347-7\',
)
self.hard_dois = (
\'10.1175/1520-0485(2002)032<0870:CT>2.0.CO;2\',
\'10.1002/(SICI)1522-2594(199911)42:5<952::AID-MRM16>3.0.CO;2-S\',
\'10.1579/0044-7447(2006)35\\[89:RDUICP\\]2.0.CO;2\',
)
self.currently_not_supported = (
\'10.1007.10/978-3-642-28108-2_19\',
\'10.1000.10/123456\',
\'10.1016.12.31/nature.S0735-1097(98)2000/12/31/34:7-7\',
)
self.crossref_dois = (
\'doi.org/10.2310/JIM.0b013e31820bab4c\',
\'doi.org/10.1007/978-3-642-28108-2_19\',
\'doi.org/10.1016/S0735-1097(98)00347-7\',
)
self.hard_dois = (
\'doi.org/10.1175/1520-0485(2002)032<0870:CT>2.0.CO;2\',
\'doi.org/10.1002/(SICI)1522-2594(199911)42:5<952::AID-MRM16>3.0.CO;2-S\',
\'doi.org/10.1579/0044-7447(2006)35\\[89:RDUICP\\]2.0.CO;2\',
)
self.currently_not_supported = (
\'doi.org/10.1007.10/978-3-642-28108-2_19\',
\'doi.org/10.1000.10/123456\',
\'doi.org/10.1016.12.31/nature.S0735-1097(98)2000/12/31/34:7-7\',';
my $regex = qr/(/?ark:/? ?)([-a-zA-Z0-9@:%_\+.~#?&//=]*)/mp;
if ( $str =~ /$regex/g ) {
print "Whole match is ${^MATCH} and its start/end positions can be obtained via \$-[0] and \$+[0]\n";
# print "Capture Group 1 is $1 and its start/end positions can be obtained via \$-[1] and \$+[1]\n";
# print "Capture Group 2 is $2 ... and so on\n";
}
# ${^POSTMATCH} and ${^PREMATCH} are also available with the use of '/p'
# Named capture groups can be called via $+{name}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Perl, please visit: http://perldoc.perl.org/perlre.html