use strict;
my $str = 'This captures an entity even if it lacks the \';\', which is commonly encountered in the wild.
kbdash - - - - -; -
dash ‐ ‐ ‐ ‐ ‐ ‐; ‐
hyphen ‑ ‐ ‑ ‑ ‑ ‑; ‑
figure ‒ ‒ ‒ ‒ ‒; ‒
em – – – – – –; –
en — — — — — —; —
horbar ― ― ― ― ― ―; ―
minus − − − − − −; −
hybull ⁃ ⁃ ⁃ ⁃ ⁃ ⁃; ⁃
fe58 ﹘ ﹘ ﹘ ﹘ ﹘; ﹘
fe63 ﹣ ﹣ ﹣ ﹣ ﹣; ﹣
ff0d - - - - -; -
(?:([-‐‑‒–—―−⁃﹘﹣-])|(?:&(?:(?:#x(2d|201[0-5]|2212|2043|fe58|fe63|ff0d))|(?:#(45|820[89]|821[0123]|8722|8259|65112|65123|65293))|(hyphen|[nm]?dash|hybull|horbar|minus));?))';
my $regex = qr/(?:([-‐‑‒–—―−⁃﹘﹣-])|(?:&(?:(?:#x(2d|201[0-5]|2212|2043|fe58|fe63|ff0d))|(?:#(45|820[89]|821[0123]|8722|8259|65112|65123|65293))|(hyphen|[nm]?dash|hybull|horbar|minus));?))/mup;
if ( $str =~ /$regex/g ) {
print "Whole match is ${^MATCH} and its start/end positions can be obtained via \$-[0] and \$+[0]\n";
# print "Capture Group 1 is $1 and its start/end positions can be obtained via \$-[1] and \$+[1]\n";
# print "Capture Group 2 is $2 ... and so on\n";
}
# ${^POSTMATCH} and ${^PREMATCH} are also available with the use of '/p'
# Named capture groups can be called via $+{name}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Perl, please visit: http://perldoc.perl.org/perlre.html