import re
regex = re.compile(r"<(script|style)[^>]*>[\s\S]*?<\/\1>|<\/?[^>]+>", flags=re.MULTILINE)
test_str = ("<div class=\"my_class\">\n"
" <div class=\"copy\">\n"
" © 2018 Texto\n"
" </div>\n"
" </div>\n\n"
"<div class=\"my_class\">\n"
" <h1>¿como estas?</h1>\n"
" <div class=\"copy\">\n"
" © 2018 Texto\n"
" </div>\n"
"<script>\n"
"function hola(){\n\n"
"}\n"
"</script>\n"
"<style>\n"
".red{\n"
" background-color: red;\n"
"}\n"
"</style>\n\n")
subst = ""
result = regex.sub(subst, test_str)
if result:
print(result)
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html