use strict;
my $str = 'https://thechildrenareourfuture.org/#SomeRandomAnchorGoesHere
http://user:pass@example.com:8080/omega:33
https://www.google.com:65535/
https://www.google.com:42069/
https://www.google.com:9876/
https://www.google.com:59999/
https://www.google.com:10000/
https://www.google.com:1234/
https://www.google.com:53/
https://www.google.com:123/
https://www.google.com:0/
ftp://username:password@example.com:21/path/here/to/file.tar.gz
http://google.com?redirect=https%3A%2F%2Flocalhost%3A8080&var2=somethingelse
https://john.smith@github.com/UserGroup/repo.git
https://mattermost.com:8065/team/messages/@john.smith
https://punycode.xn--j6w193g
smtp://somesite.com/myFile.php
SMTP://SOMESITE.COM/MYFILE.PHP
unknown://somesite.com/fiLeNuM12345.php
//SomeSite.ru/foO.php
://SomeOtherSite.oRg/baRR.php
https://_sub1.sub2.sub3.s-u_b4.domain.com
http://username:password@subdomain.domain.co.uk/path/to/file.html#Anchor1?key1=value1&key2=value2';
my $regex = qr/(?#First, match the protocol)
(?:https?|ftp)://
(?#Next, check for optional username and/or password)
(?#Note: The following two char classes are functionally equivalent)
(?:[\x21-\x39\x3b-\x3f\x41-\x7e]+(?::[!-9;-?A-~]+)?@)?
(?#Next, let's match the domain [with support for Punycode ])
(?:xn--[0-9a-z]+|[0-9A-Za-z_-]+\.)*(?:xn--[0-9a-z]+|[0-9A-Za-z-]+)\.(?:xn--[0-9a-z]+|[0-9A-Za-z]{2,10})
(?#Let's match on optional port)
(?::(?:6553[0-5]|655[0-2]\d|65[0-4]\d{2}|6[0-4]\d{3}|[1-5]\d{4}|[1-9]\d{1,3}|\d))?
(?#Next, let's match on the path)
(?:/[\x21\x22\x24\x25\x27-x2e\x30-\x3b\x3e\x40-\x5b\x5d-\x7e]*)*
(?#Next, let's match on an anchor)
(?:\#[\x21\x22\x24\x25\x27-x2e\x30-\x3b\x3e\x40-\x5b\x5d-\x7e]*)?
(?#Last, but not least, we match on URI params)
(?:\?[\x21\x22\x24\x25\x27-\x2e\x30-\x3b\x40-\x5b\x5d-\x7e]+=[\x21\x22\x24\x25\x27-\x2e\x30-\x3b\x40-\x5b\x5d-\x7e]*)?
(?#Additional params)
(?:&[\x21\x22\x24\x25\x27-\x2e\x30-\x3b\x40-\x5b\x5d-\x7e]+=[\x21\x22\x24\x25\x27-\x2e\x30-\x3b\x40-\x5b\x5d-\x7e]*)*
/mxip;
if ( $str =~ /$regex/g ) {
print "Whole match is ${^MATCH} and its start/end positions can be obtained via \$-[0] and \$+[0]\n";
# print "Capture Group 1 is $1 and its start/end positions can be obtained via \$-[1] and \$+[1]\n";
# print "Capture Group 2 is $2 ... and so on\n";
}
# ${^POSTMATCH} and ${^PREMATCH} are also available with the use of '/p'
# Named capture groups can be called via $+{name}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Perl, please visit: http://perldoc.perl.org/perlre.html