import re
regex = re.compile(r"<((?!a|\/a)[^>]*)>\s*", flags=re.MULTILINE)
test_str = ("<html>\n"
"<head>\n"
"<meta http-equiv=\\\"Content-Type\\\" content=\\\"text/html; charset=utf-8\\\">\n"
"<meta content=\\\"text/html; charset=us-ascii\\\">\n"
"<meta name=\\\"ProgId\\\" content=\\\"Word.Document\\\">\n"
"<meta name=\\\"Generator\\\" content=\\\"Microsoft Word 15\\\">\n"
"<meta name=\\\"Originator\\\" content=\\\"Microsoft Word 15\\\">\n"
"<style>\n"
"<!--\n"
"@font-face\n"
"\\t{font-family:\\\"Cambria Math\\\"}\n"
"@font-face\n"
"\\t{font-family:Calibri}\n"
"p.MsoNormal, li.MsoNormal, div.MsoNormal\n"
"\\t{margin:0cm;\n"
"\\tmargin-bottom:.0001pt;\n"
"\\tfont-size:11.0pt;\n"
"\\tfont-family:\\\"Calibri\\\",sans-serif}\n"
"a:link, span.MsoHyperlink\n"
"\\t{color:#0563C1;\n"
"\\ttext-decoration:underline}\n"
"a:visited, span.MsoHyperlinkFollowed\n"
"\\t{color:#954F72;\n"
"\\ttext-decoration:underline}\n"
"p.msonormal0, li.msonormal0, div.msonormal0\n"
"\\t{margin-right:0cm;\n"
"\\tmargin-left:0cm;\n"
"\\tfont-size:11.0pt;\n"
"\\tfont-family:\\\"Calibri\\\",sans-serif}\n"
"span.EmailStyle18\n"
"\\t{font-family:\\\"Calibri\\\",sans-serif}\n"
".MsoChpDefault\n"
"\\t{font-size:10.0pt;\n"
"\\tfont-family:\\\"Calibri\\\",sans-serif}\n"
"@page WordSection1\n"
"\\t{margin:72.0pt 72.0pt 72.0pt 72.0pt}\n"
"div.WordSection1\n"
"\\t{}\n"
"-->\n"
"</style>\n"
"</head>\n"
"<body lang=\\\"EN-GB\\\" link=\\\"#0563C1\\\" vlink=\\\"#954F72\\\" style=\\\"\\\">\n"
"<div class=\\\"WordSection1\\\">\n"
"<p class=\\\"MsoNormal\\\">I NEED TO HAVE THIS STRING</p>\n"
"<p class=\\\"MsoNormal\\\"> </p>\n"
"<p class=\\\"MsoNormal\\\"><span style=\\\"\\\">AND I NEED THE FOLLOWING ANCHOR\n"
"<a href=\\\"google.com\\\">\n"
" THIS ONE</a>.</span></p>\n"
"<p class=\\\"MsoNormal\\\"> </p>\n"
"</div>\n"
"</body>\n"
"</html>\n")
subst = ""
result = regex.sub(subst, test_str)
if result:
print(result)
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html