use strict;
my $str = '- Must start with `#{`, not captured.
- Must end with `}`, not captured.
- Named capture group \'inner\' within the brackets.
- Must only contain periods, dashes, underscores, unicode letters, and 0-9 digits.
- Must contain at least two unicode letters, or at least one unicode letter and digit.
- Cannot begin or end with a period. Ex.`#{.foo.}`not matched.
- Cannot start with a digit. Ex. `#{42_digitAtStart}` is not matched.
- No periods before unicode chars, Ex. `#{_.foo}` is not matched.
- Two periods cannot be next to one another. Ex. `#{foo.bar}` is OK, but `#{foo..bar}`, `#{.foo}`, and `#{foo.}` are not.
- cannot contain any mix of more than 4 dashes or underscores next to one another. Eg. `#{foo____}` and `#{-_-_foo-bar_-_-}` are OK, but `#{--___ foo}` and `#{bar-_-_--} are not.
MUST MATCH
#{alpha} #{beta} #{charlie}
#{ìñtérnâtiônàlizàtïon}
#{مرحبا} #{こんにちは} #{мир}
#{-dashes-}
#{_under_scores_}
#{dots.and-dashes-and_under_scores}
#{some_ASCII_Digits_12345}
#{a2ndPositionDigit}
#{test.foo.bar.baz}
#{outer_must_not_match#{#{only_deepest_must_match}}}
#{____fourUnderscoresAndDashesTogether}
#{fourDashes-_--}
#{a2} // One letter and one digit
#{period._} // Note period BEFORE letters is not allowed
MUST NOT MATCH
#{} // Empty
#{a} #{-} #{_} // Single characters
#{42_digitAtStart}
#{0123456789} // Only digits
#{.periodAtStart}
#{periodAtEnd.}
#{moreThanOnePeriod..Together}
#{a.singleStartingCharacterFollowedByPeriod}
#{#non-Dash_Underscore_Period.Symbols}
#{---} // Only Dashes
#{-_-} // Only dashes and underscores
#{_-_}
#{-_-_-moreThanFourModifiers}
#{_.dotBeforeCharacters}
#{-_-_-moreThanFourModifiers}
#{modifiers-----five}
#{modifiers______six}
#{modifiers-------seven}
#{modifiers_-_-_-_-eight}
#{moreThanFourModifiers-_-_-}
#{last\\} // escape backslashes
#{\\first}
#{mid\\dle}';
my $regex = qr/#\{(?![.\d])(?=(?<inner>(?:[\-_]{0,4}(?<![\-_]{5})[\p{L}\d]{2,}[\.\-_]?[\-_]{0,3})+))\1(?<!\.)\}/up;
if ( $str =~ /$regex/g ) {
print "Whole match is ${^MATCH} and its start/end positions can be obtained via \$-[0] and \$+[0]\n";
# print "Capture Group 1 is $1 and its start/end positions can be obtained via \$-[1] and \$+[1]\n";
# print "Capture Group 2 is $2 ... and so on\n";
}
# ${^POSTMATCH} and ${^PREMATCH} are also available with the use of '/p'
# Named capture groups can be called via $+{name}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Perl, please visit: http://perldoc.perl.org/perlre.html