use strict;
my $str = '14/May/2021:09:37:39 +0000 [715153] -> GET /content/ams/healthcheck/regent.html HTTP/1.1
14/May/2021:09:37:39 +0000 [2585977] <- 200 text/html;charset=UTF-8 6ms
14/May/2021:10:37:39 +0100 [1502141] -> GET /libs/granite/csrf/token.json HTTP/1.1
14/May/2021:09:37:39 +0000 [715152] <- 200 text/html;charset=UTF-8 6ms
14/May/2021:11:52:22 +0000 [11097977] -> GET /content/onehub_nfz/ru/ru/model-overview/caddy.html?tc=oa-[B]_[Caddy]_[All]_[None]_[BMM]_[RU]_[Eval]_Yandex_Search_AON_Paid_Search-Yandex-cpc-Search-banner&kw={PHRASE}&utm_source=yandex&utm_medium=cpc&utm_campaign=[B]_[Caddy]_[All]_[None]_[BMM]_[RU]_[Eval]_Yandex_Search_AON_Paid_Search&utm_term={PHRASE}&utm_content=s_{SRC}|cid_45783621|gid_{GBID}|aid_8728578416|pid_{PHRASE_EXPORT_ID}|rid_{PARAM126}|p_{POS}|pty_{PTYPE} HTTP/1.1
';
my $regex = qr/(?P<request_date>^[\w\/\:]+)\s+(?P<request_offset>[\d\+]+)\s+\[(?P<request_id>.+)\]\s+(?P<request_inout>[\-\>\<]+)\s+((?P<request_method>[GET|POST|HEAD|PUT|DELETE|CONNECT|OPTIONS|TRACE|PATCH]+)?\s+(?P<request_path>[^;][^;][a-zA-Z0-9\/\_\-\.\=\@\:\%\+\~\#\?\&\{\}\[\]\|]+)\s+(?P<request_protocol>.+)\s+|(?P<request_code>\d+)\s+(?P<request_filetype>.+)\s+(?P<request_duration>[\d\w]+)\s+)/mp;
if ( $str =~ /$regex/g ) {
print "Whole match is ${^MATCH} and its start/end positions can be obtained via \$-[0] and \$+[0]\n";
# print "Capture Group 1 is $1 and its start/end positions can be obtained via \$-[1] and \$+[1]\n";
# print "Capture Group 2 is $2 ... and so on\n";
}
# ${^POSTMATCH} and ${^PREMATCH} are also available with the use of '/p'
# Named capture groups can be called via $+{name}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Perl, please visit: http://perldoc.perl.org/perlre.html