use strict;
my $str = 'print("Hello, world!") # this is a comment.
print("The quick \\\\brown\\\\\\ndog\\rfox\\tjumps\\t over #the# /lazy/ \\"dog\\"###") # and here\'s where the real comment starts
print("you can use Alt+0009 to enter a tab character!")
# The above line had no comments, so the <comment> group didn\'t participate in the match.
print("indented code") # with comment
print("never indent code like this, but it still matches anyway")# comment lacking preceding whitespace >:)
print("space indented code")
print("Missing end quote
print("unescaped \\")
print("implicitly" " joined## " "string " "lit#erals") # a line can contain multiple string literals
print("the quick " # line 1
"brown fox " # line 2
"jumps over " # line 3
"the lazy dog") # line 4
print("The quick brown", animal, "jumps over the", adjective, animal2, sep=" ") "
print(\'Robert\'); DROP TABLE `Students`; -- \')';
my $regex = qr/^(?P<indentation>(?P<sp_or_tab>[ \t])*+)(?P<stmt>(?P<non_str_lit>(?:(?!\#)[^\"'\r\n])*?)(?:(?P<str_lit>(?P<begin_quote>(?P<single_quote>')|\")(?:(?(single_quote)\"|')|\\[\"']|\\[^\r\n ]|[^\"'\\\r\n])*?(?P=begin_quote))(?&non_str_lit))*?)(?P<whitespace>(?&sp_or_tab)*+)(?P<comment>(?:\#)[^\r\n]*+)?(?P<line_ending>\r|\n|\r\n)?$/mp;
if ( $str =~ /$regex/g ) {
print "Whole match is ${^MATCH} and its start/end positions can be obtained via \$-[0] and \$+[0]\n";
# print "Capture Group 1 is $1 and its start/end positions can be obtained via \$-[1] and \$+[1]\n";
# print "Capture Group 2 is $2 ... and so on\n";
}
# ${^POSTMATCH} and ${^PREMATCH} are also available with the use of '/p'
# Named capture groups can be called via $+{name}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Perl, please visit: http://perldoc.perl.org/perlre.html