use strict;
my $str = 'Lines which should be partially or fully matched:
http:www.google.com/
http//www.google.com/
http://www.google.com/
://www.google.com/
www.google.com/
www.google.com:8000
www.google.com/?key=value
github.io
www.google.com/abc/def/ijk#123
www.google.com/abc/def/ijk?v=123123123
www.google.com/abc/def/watch?v=1231231231
https://www.youtube.com/channel/UCgeu2xe0XRscaKyvBt3WgmQ
http://www.google.com/?key=value
http://www.youtube.com/
However, it should skip (do not match lines) which contain youtube video ID):
http://www.youtube.com/watch?v=B5Gj78s6H7w&feature=youtu.be
https://www.youtube.com/embed/y19EaW2X7ac
music.youtube.com/embed/y19EaW2X7ac
https://www.youtube.com/watch?v=B5Gj78s6H7w&feature=youtu.be
https://www.youtube.com/watch?feature=youtu.be&v=B5Gj78s6H7w
https://www.youtu.be/B5Gj78s6H7w&feature=youtu.be
https://www.youtu.be/B5Gj78s6H7w
';
my $regex = qr/(?i)(?(DEFINE)(?<proto>(?:https?:)?\/\/)(?<port>:[0-9]{2,5})(?<tld>(?:a(?:[cd]|e(?:ro)?|[fgil-oqr]|s(?:ia)?|[tuwxz])|b(?:[abd-h]|iz?|[jl-oq-tvwyz])|c(?:at?|[cdf-ik-n]|o(?:m|op)?|[ru-z])|d[ejkmoz]|e[ceghr-u]|f[i-kmor]|g[abd-il-np-uwy]|h[kmnrtu]|i(?:[delm]|n(?:fo|t)?|[oq-t])|j(?:[em]|o(?:bs)?|p)|k[eg-imnprwyz]|l[a-cikr-vy]|m(?:[ac-hk]|lc?|[mn]|o(?:bi)?|[p-t]|u(?:seum)?|[v-z])|n(?:a(?:me)?|c|et?|[fgilopruz])|o(?:m|rg)|p(?:[ae-hk-n]|ost|ro?|[stwy])|qa|r[eosuw]|s(?:[a-eg-or]|t(?:udio)?|[uvx-z])|t(?:[cd]|el|[f-hj-p]|r(?:avel)?|[tvwz])|u[agkmsyz]|v[aceginu]|w[fs]|y[et]|z[amw]))(?<path>(\/(?:[a-z0-9+%-]\.?)+)*\/?)(?<query>\?[a-z+&$_.-][a-z0-9;:@&%=+\/.-]*)(?<hash>\#[a-z_.-][a-z0-9+$%_.-]*)(?<subdomain>[a-z0-9\-\.]+\.)(?<yt_domain>(?:www\.)?(?:youtube\.com|youtu\.be)\/)(?<yt_hash>(?:[\w-]{10,12})+)(?<yt_video>(?&proto)?(?&yt_domain)+(?:watch)?(?:\/embed\/|\?v=)+(?&yt_hash)+))((?&yt_video).*(*SKIP)(*FAIL)|(?&proto)?(?&subdomain)(?&tld)(?&port)?(?&path)?(?&query)?(?&hash)?)/p;
if ( $str =~ /$regex/g ) {
print "Whole match is ${^MATCH} and its start/end positions can be obtained via \$-[0] and \$+[0]\n";
# print "Capture Group 1 is $1 and its start/end positions can be obtained via \$-[1] and \$+[1]\n";
# print "Capture Group 2 is $2 ... and so on\n";
}
# ${^POSTMATCH} and ${^PREMATCH} are also available with the use of '/p'
# Named capture groups can be called via $+{name}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Perl, please visit: http://perldoc.perl.org/perlre.html