use strict;
my $str = 'Valid localhost addresses:
localhost
user:password@localhost:80
localhost:8080/path/to/page.html
Valid IPv4 addresses:
10.10.0.1
192.168.0.1
192.168.0.1:8888
user:password@192.168.0.1:8888
user:password@192.168.0.1
Valid IPv6 addresses: // regex does not check for that, check here: http://vernon.mauery.com/content/projects/linux/ipv6_regex
100::
100::ffff:ffff:ffff:ffff
::ffff:0.0.0.0
64:ff9b::0.0.0.0
ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff
fe80::
2001:2f:ffff:ffff:ffff:ffff:ffff:ffff
2001:db8::
::1
Valid domains: // all should work
www.test.com
www1.test.com
www1.test.com:8080
test.com
test-test.com
test.com/path/to/page?var=1&var2=2
test.com:80
user:password@test.com
user:password@test-test.com
user:password@test.com/path/to/page
user:password@test.com/path/to/page?var=1&var2=2
user:password@test.com:80
user:password@test.com:80/path/to/page?var=1&var2=2
Maximum-Length-Of-63-Characters-For-A-Subdomain-Completely-Used.com
user:password@Maximum-Length-Of-63-Characters-For-A-Subdomain-Completely-Used.com
user:password@Maximum-Length-Of-63-Characters-For-A-Subdomain-Completely-Used.com:8080
Maximum-Length-Of-63-Characters-For-A-Subdomain-Completely-Used.Maximum-Length-Of-63-Characters-For-A-Subdomain-Completely-Used.Maximum-Length-Of-63-Characters-For-A-Subdomain-Completely-Used.MaximumLengthOf63CharactersForATopLevelDomainWithoutTheDashes
Maximum-Length-Of-63-Characters-For-A-Subdomain-Completely-Used.Maximum-Length-Of-63-Characters-For-A-Subdomain-Completely-Used.Maximum-Length-Of-63-Characters-For-A-Subdomain-Completely-Used.MaximumLengthOf63CharactersForATopLevelDomainWithoutTheDashes:8888
user:password@Maximum-Length-Of-63-Characters-For-A-Subdomain-Completely-Used.Maximum-Length-Of-63-Characters-For-A-Subdomain-Completely-Used.Maximum-Length-Of-63-Characters-For-A-Subdomain-Completely-Used.MaximumLengthOf63CharactersForATopLevelDomainWithoutTheDashes:8888
a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a // Maximum number of 127 suddomains, seperated by 126 dots
Invalid domains: // should not work
justLongGibberishBLABLABLA
againButThisTimeWithNumbers12345675678797
ShouldNOTWorkBecauseOfTheDash-.com
UrlLenghtIsValidButThisSubdomainIsTooLongMaximumForASubdomainIs63Characters.com
This-Url-Has-Valid-Subdomains.But-The-Url-Length-In-General-Is-Too-Long.This-Means-Longer-Than-253-Characters-For-The-Complete-Url.Including-The-Top-Level-Domain-Itself.So-This-Line-Shouldnt-Be-Catched-By-The-Regex.Its-Exactly-One-Character-Too-Long1.com';
my $regex = qr/(?:^(\w{1,255}):(.{1,255})@|^)(?:(?:(?=\S{0,253}(?:$|:| ))((?:[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?\.)+(?:[a-z0-9]{1,63})))|localhost)(:\d{1,5})?/mip;
if ( $str =~ /$regex/g ) {
print "Whole match is ${^MATCH} and its start/end positions can be obtained via \$-[0] and \$+[0]\n";
# print "Capture Group 1 is $1 and its start/end positions can be obtained via \$-[1] and \$+[1]\n";
# print "Capture Group 2 is $2 ... and so on\n";
}
# ${^POSTMATCH} and ${^PREMATCH} are also available with the use of '/p'
# Named capture groups can be called via $+{name}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Perl, please visit: http://perldoc.perl.org/perlre.html