# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"(?i)^(?!((<\/|<)(h[1-6]|ul|li|script|blockquote)(\s.+)*>))(.+)(?!(<(\/\3)>))$"
test_str = ("<h2>Lorem ipsum dolor sit amet, consectetur adipiscing elit</h2>\n\n"
"Vivamus vel tempor turpis, <strong>non rutrum quam</strong>. Suspendisse ac rhoncus felis, eget porta lectus. \n\n"
"Nam vulputate sapien risus, vel vehicula mi volutpat sed. \n\n"
"<ul>\n"
"<li>Sed feugiat nibh at nisl eleifend scelerisque.</li>\n"
"<li>Aliquam non maximus ipsum. Aliquam erat volutpat.</li>\n"
"</ul>\n\n"
"<h3>Praesent eget diam sit amet leo vehicula sagittis at quis tortor</h3>\n\n"
"Nunc nec sem ac nunc tincidunt aliquam ut nec dolor. Nulla facilisi.\n\n"
"<img alt=\"xxxx\" src=\"xxx.png\" class=\"xxxxx\"/> \n\n"
"<caption><i>ccccc</i></caption> \n\n"
"<img alt=\"xxxx\" src=\"xxx.png\" class=\"xxxxx\"/> \n\n"
"<i>ccccc</i>\n\n"
"<strong>Nunc nec sem ac nunc tincidunt aliquam ut nec dolor. Nulla facilisi.</strong>\n\n"
"Vivamus vel tempor turpis, <strong>non rutrum quam</strong>. Suspendisse ac rhoncus felis, eget porta lectus.\n\n"
"Nunc nec\n"
"Nunc nec\n"
"Nunc nec\n\n"
"<blockquote class=\"twitter-tweet\" data-lang=\"en\"><p lang=\"en\" dir=\"ltr\">Vivamus vel tempor turpis <a href=\"\"></a></p>— xxxxx (@xxxxxx) <a href=\"https://twitter.com/xxxx/status/x?ref_src=x\">February 15, 2019</a></blockquote>\n\n"
"<script async src=\"\" charset=\"utf-8\"></script>")
subst = "<p>$5</p>"
# You can manually specify the number of replacements by changing the 4th argument
result = re.sub(regex, subst, test_str, 0, re.MULTILINE)
if result:
print (result)
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html