import re
regex = re.compile(r"""
(?(DEFINE)
(?<tagName>
[a-z][a-z\d]*+ (?=[>\s/])
)
(?<tagAttr>
(?>
[^>"']++
| " [^"]*+ "
| ' [^']*+ '
)*+
)
)
< (li|/ul) /?+ (?=[>\s/]) ((?&tagAttr)) >
((?:
[^<]++
| < /?+ (?!(?:li|ul)(?=[>\s/])) (?&tagName) (?&tagAttr) > #any tag except <li> and <ul>
| < (?! /?+ (?&tagName) ) #not tag
)*)
(?= < (?:li|/ul) (?=[>\s/]) (?&tagAttr) ) #check for nested <li> and <ul>
""", flags=re.DOTALL | re.IGNORECASE | re.VERBOSE | re.UNICODE)
test_str = ("<ul>\n"
" <li fhgfhfg ghgfhfghhfgh= ghghg=\"...\" a = 'ghghg'>dfdsfsd\n"
" <ul>\n"
" <li>111</li>\n"
" <li/>2222\n"
" <li>mmmmm</li>\n"
" </ul>\n"
" <li>dfdsfdsfsdf <b class=\"...\"><br/>fgdfgd<i></b><!-- dfdf--><li>\n"
" <li>\n"
" <li>dsfsdfsdfdsf\n"
"</ul>\n\n")
subst = "<$3$4>$5</LI>\\r"
result = regex.sub(subst, test_str)
if result:
print(result)
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html