import re
regex = re.compile(r"((?<=<|<\/)|(?<= ))[A-Za-z0-9]+:| xmlns(:[A-Za-z0-9]+)?=\".*?\"")
test_str = ("<?xml version=\"1.0\" encoding=\"utf-16\"?>\n"
"<ArrayOfInserts xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\">\n"
" <insert>\n"
" <offer xmlns=\"http://schema.peters.com/doc_353/1/Types\">0174587</offer>\n"
" <type2 xmlns=\"http://schema.peters.com/doc_353/1/Types\">014717</type2>\n"
" <supplier xmlns=\"http://schema.peters.com/doc_353/1/Types\">019172</supplier>\n"
" <id_frame xmlns=\"http://schema.peters.com/doc_353/1/Types\" />\n"
" <type3 xmlns=\"http://schema.peters.com/doc_353/1/Types\">\n"
" <type2 />\n"
" <main>false</main>\n"
" </type3>\n"
" <status xmlns=\"http://schema.peters.com/doc_353/1/Types\">Some state</status>\n"
" </insert>\n"
"</ArrayOfInserts>")
subst = ""
result = regex.sub(subst, test_str)
if result:
print(result)
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html