use strict;
my $str = '# TEST
## List of valid domains:
www.google.com
google.com
mkyong123.com
mkyong-info.com
sub.mkyong.com
sub.mkyong-info.com
mkyong.com.au
g.co
a.12E
mkyong.t.t.co
xn--stackoverflow.com
stackoverflow.xn--com
xn--d1ai6ai.xn--p1ai
stackoverflow.co.uk
a.xn--wgbh1c
1.2.3.4.com
x.XN--VERMGENSBERATUNG-PWB
abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijk.com
www.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijk.com
abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcde.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijk.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijk.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijk.com
## List of invalid domains:
http://google.com
http://www.google.com
dot.
space com
under_score.com
underscore.c_om
-dash.com
dash-.com
sub.-dash.com
sub-.dash.com
-.com
-com
.com
com
mx.gmail.com.
mkyong.t.t.c
mkyong,com
mkyong.com/users
a.123
x.XN--VERMGENSBERATUNG-PWBB
abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijkk.com
www.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijkk.co.uk
abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcde.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijk.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijk.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijk.comm
0123456789 +-.,!@#$%^&*();\\\\/|<>\\"\\\'
12345 -98.7 3.141 .6180 9,000 +42
HELP
https://github.com/c-hive/guides/blob/57cbbf72f1152214a160f9be8a57e2c4dc868cfd/etc/regex.md#domains
# allowed chars
[\\w\\d\\.\\-]
EXPLAIN
# once upon a time...
^
# doesn\'t contain underscore (this is so that we can use \\w alter on)
(?![\\w\\d\\.\\-]*?_[\\w\\d\\.\\-]*?)
# doesn\'t contain dash at the beginning of a label
(?!(?:[\\d\\w]+?\\.)?\\-[\\w\\d\\.\\-]*?)
# doesn\'t contain dash at the end of a label
(?![\\w\\d]+?\\-\\.(?:[\\d\\w\\.\\-]+?))
# starts with a non-limit char
(?=[\\w\\d\\])
# contains at least 1 dot
(?=[\\w\\d\\.\\-]*?\\.+[\\w\\d\\.\\-]*?)
# not longer than 253 chars
(?![\\w\\d\\.\\-]{253})
# doesn\'t contain a label longer than 63 char
(?!(?:\\.?[\\w\\d\\-\\.]*?[\\w\\d\\-]{64,}\\.)+?)
# allowed chars
[\\w\\d\\.\\-]+?
# TLD is not digit-only
(?<![\\w\\d\\-\\.]*?\\.[\\d]+?)
# TLD is at least 2 characters
(?<=[\\w\\d\\-]{2,})
# TLD is at most 24 characters
(?<![\\w\\d\\-]{25})
# the end
&';
my $regex = qr/^(?!.*?_.*?)(?!(?:[\d\w]+?\.)?\-[\w\d\.\-]*?)(?![\w\d]+?\-\.(?:[\d\w\.\-]+?))(?=[\w\d])(?=[\w\d\.\-]*?\.+[\w\d\.\-]*?)(?![\w\d\.\-]{254})(?!(?:\.?[\w\d\-\.]*?[\w\d\-]{64,}\.)+?)[\w\d\.\-]+?(?<![\w\d\-\.]*?\.[\d]+?)(?<=[\w\d\-]{2,})(?<![\w\d\-]{25})$/mp;
if ( $str =~ /$regex/g ) {
print "Whole match is ${^MATCH} and its start/end positions can be obtained via \$-[0] and \$+[0]\n";
# print "Capture Group 1 is $1 and its start/end positions can be obtained via \$-[1] and \$+[1]\n";
# print "Capture Group 2 is $2 ... and so on\n";
}
# ${^POSTMATCH} and ${^PREMATCH} are also available with the use of '/p'
# Named capture groups can be called via $+{name}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Perl, please visit: http://perldoc.perl.org/perlre.html