import re
regex = re.compile(r"""
(?:
<\?php
(?:
[^"'\/<\?]+
|
(?:
"(?:[^\\"]|\\[\s\S])*"
|
# '(?:[^\\']|\\[\s\S])*'
#|
\/\*(?:[^\*]|\*[^\/])*\*\/
|
(?:\#|\/\/).+(?:\n|$)
|
(?:
<<<\s*
(?:
(?:'([a-zA-Z0-9_]+)'\s*?\n)(?:[^\n]*\n)*?\s*\1\s*?;?\s*?(?:\n|$)
|
(?:([a-zA-Z0-9_]+)\s*?\n)(?:[^\n]*\n)*?\s*\2\s*?;?\s*?(?:\n|$)
)
)
)
|
(?:[^?]|\?[^>])
)*
(?:\?>|$)
)
""", flags=re.VERBOSE | re.IGNORECASE)
test_str = ("<!--\n"
" The PHP masterpage\n"
"-->\n"
"<?php\n"
"$matches = array();\n"
"// separates \n"
"$re = <<<PREG\n"
" /^\n"
" \n"
" (?:\\\\s*(?:<!--.*?-->))*\n"
" <\\\\?php\\\\s*include\\\\s*(\\\\()?\\\\s*([\\\\'\\\\\"])(?:(?!\\\\2).)*(?-i)Master\\\\.php(?i)\\\\2(?(-2)\\\\))\\\\s*;\\\\s*\\\\?>\\\\s*\n"
" (?:\\\\s*(?:<!--.*?-->))*\n"
" <master\\\\s*src=([\\\\'\\\\\\\"])(?<master_url>.+?)\\\\3\\\\/>(?<content>[\\s\\S]*)\n"
" $\n"
" /ix\n"
"PREG;\n\n\n"
"$re = <<<PREG2\n"
" /^\n"
" \n"
" (?:\\\\s*(?:<!--.*?-->))*\n"
" <\\\\?php\\\\s*include\\\\s*(\\\\()?\\\\s*([\\\\'\\\\\"])(?:(?!\\\\2).)*(?-i)Master\\\\.php(?i)\\\\2(?(-2)\\\\))\\\\s*;\\\\s*\\\\?>\\\\s*\n"
" (?:\\\\s*(?:<!--.*?-->))*\n"
" <master\\\\s*src=([\\\\'\\\\\\\"])(?<master_url>.+?)\\\\3\\\\/>(?<content>[\\s\\S]*)\n"
" $\n"
" /ix\n"
"PREG2;\n\n"
"$raw = file_get_contents(filter_input(INPUT_SERVER, 'SCRIPT_FILENAME'));\n"
"preg_match($re, $raw, $matches);\n\n"
"$master = file_get_contents($matches[\"master_url\"]);\n"
"$child = $matches[\"content\"];\n\n"
"$child_txt = eval2str($child);\n\n"
"$master_txt = eval2str($master);\n\n"
"$placeholder_xml = new DOMDocument();\n"
"@$placeholder_xml->loadHTML($master_txt, LIBXML_NOWARNING);\n\n"
"$child_xml = new DOMDocument();\n"
"@$child_xml->loadHTML($child_txt, LIBXML_NOWARNING);\n\n"
"$child_elems = $child_xml->getElementsByTagName(\"content\");\n\n\n"
"foreach($child_elems as $c)\n"
"{\n"
" $name = $c->getAttribute(\"name\");\n"
" $child_map[$name] = $placeholder_xml->importNode($c, true);\n"
"}\n\n"
"$placeholder_elems = $placeholder_xml->getElementsByTagName(\"placeholder\");\n"
"while($placeholder_elems->length > 0)\n"
"{\n"
" $oldnode = $placeholder_elems->item(0);\n"
" $name = $oldnode->getAttribute(\"name\");\n"
" \n"
" if(!isset($child_map[$name]))\n"
" {\n"
" $newnode = $placeholder_xml->createElement(\"content\");\n"
" $attr = $placeholder_xml->createAttribute(\"name\");\n"
" $attr->value = $name;\n"
" $newnode->appendChild($attr);\n"
" $child_map[$name] = $newnode;\n"
" }\n"
" $newnode = $child_map[$name];\n"
" \n"
" $parent = $oldnode->parentNode;\n"
" \n"
" $parent->replaceChild($newnode, $oldnode);\n"
" $placeholder_elems = $placeholder_xml->getElementsByTagName(\"placeholder\"); // \n"
"}\n\n"
"die($placeholder_xml->saveHTML()); // to end execution of the remaining code in the calling file\n"
"/*\n\n"
"*/\n"
"function eval2str($code)\n"
"{\n"
" ob_start();\n"
" eval(\"?>$code\");\n"
" return ob_get_clean();\n"
"}\n"
"?>")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html