use strict;
my $str = '<?xml version=\'1.0\' encoding=\'shift_jis\'?>
<!-- foo -->
<foo>bar</foo>
Baz
<foo asdf="foo" />
<CharacterInfo defaultUid=\'\'>
<Character
name=\'??????\'
uid=\'511111\'
weapon=\'??\'
HR=\'7\' GR=\'0\' lastLogin=\'1645561498\'
sex=\'M\' />
<Character name =\'Potatoe/>sss\' uid= \'511111\' weapon = \'??\' HR=\'7\' GR="0" lastLogin=\'1645561498\' sex=\'M\' />
</CharacterInfo>
Plain text
<hr/>
Multi
line
text
<br />
<!--
This is a comment!
<foo />
-->
<foo
bar/>
<baz asdf="2"></baz>
<asd foo="asd\\"q\'we" / >
<b>Hello, <i>World</i>!</b>
<Character
name=\'??????\'
uid=\'511111\'
weapon=\'??\'
HR=\'7\' GR=\'0\' lastLogin=\'1645561498\'
sex=\'M\' />
<Character name =\'Potatoe/>sss\' uid= \'511111\' weapon = \'??\' HR=\'7\' GR="0" lastLogin=\'1645561498\' sex=\'M\' />
<![CDATA[]]>
<![CDATA[Foobar<>]]>
<![CDATA[
asdasdasd
adsd
sd
sd
]]>
';
my $regex = qr`(?:<!\[CDATA\[(?<cdata>.*?)\]\]>|(?<xml><\?.*?\?>)|<!--(?<comment>.*?)-->|<\s*(?<tag>[A-z][A-z0-9-_.:]*)(?:\s+(?:[A-z][A-z0-9-_.:]*)\s*(?:=\s*(?<quote>["'])((?:\\\k<quote>|(?:(?!\k<quote>)).)*)(\k<quote>)\s*)?)*\s*(?:/\s*>|>(?<innerHTML>.*)?<\s*/\s*\k<tag>\s*>)|(?<text>[^<]*))`msp;
if ( $str =~ /$regex/g ) {
print "Whole match is ${^MATCH} and its start/end positions can be obtained via \$-[0] and \$+[0]\n";
# print "Capture Group 1 is $1 and its start/end positions can be obtained via \$-[1] and \$+[1]\n";
# print "Capture Group 2 is $2 ... and so on\n";
}
# ${^POSTMATCH} and ${^PREMATCH} are also available with the use of '/p'
# Named capture groups can be called via $+{name}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Perl, please visit: http://perldoc.perl.org/perlre.html