import re
regex = re.compile(r"(?:<\s*[\w-]+)*(?:\s([\w-]+)(?:\s*=\s*(?:(?:\"((?:(?:\s|\S)*?[^\\])*?)\")|(?:'((?:(?:\s|\S)*?[^\\])*?)')))*)*?", flags=re.MULTILINE)
test_str = ("<tag></tag>\n"
"< tag></ tag>\n"
"<tag></ tag>\n"
"<tag attribute></ tag >\n"
"<tag attribute1 attribute-2 attribute_3 /></tag>\n"
"<tag attribute=\"value\" attribute= \"value\"></ tag>\n"
"<tag attribute></tag>\n"
"< tag attribute attribute></tag>\n"
"< tag attribute=\"value\" attribute=\"val\\\"ue\" attribute='val\\'ue'></tag>\n"
"<tag attribute></tag>\n"
"<tag attribute attribute></tag>\n"
"<tag attribute=\"value\" attribute=\"value\"></tag>\n"
"< tag attribute=\"value\" attribute=\"value\"></tag>\n"
"< tag attribute=\"value\" attribute =\"value\" attribute></tag>")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html