import re
regex = re.compile(r"""
(?:(?<=\n\n)|\A\n*|\n{2,})
[ ]{0,3}
(?<tag_all>
(?:
(?:
<
[[:blank:]\h\v]*
(?<tag_name>[a-zA-Z0-9][\w\-]+)
(?<tag_attributes>(?&tag_attr))*
[[:blank:]\h\v]*
>
(?<html_content>.*?)
(?<tag_close>
<
[[:blank:]\h\v]*
\/
\g{tag_name}
[[:blank:]\h\v]*
>
)
)
|
(?:
<!--[[:blank:]\h\v]*(?<html_comment>.*?)[[:blank:]\h\v]*-->
)
|
(?:
<
[[:blank:]\h\v]*
(?<tag_name>[a-zA-Z0-9][\w\-]+)
(?<tag_attributes>(?&tag_attr))*
[[:blank:]\h\v]*
\/?
[[:blank:]\h\v]*
>
)
)
(?<html_after>
\z
|
[ ]*\n{1,2}
(?!
[[:blank:]\h\v]*
(?:
(?:
<
[[:blank:]\h\v]*
[a-zA-Z0-9][\w\-]+
(?:(?&tag_attr))*
[[:blank:]\h\v]*
>
)
|
(?:
<
[[:blank:]\h\v]*
\/?
[[:blank:]\h\v]*
[\w\-]+
(?:(?&tag_attr))*
[[:blank:]\h\v]*
\/?
[[:blank:]\h\v]*
>
)
)
)
)
)
(?(DEFINE)
(?<tag_attr>
(?:
[[:blank:]\h]*
[\w\-]+
[[:blank:]\h]*
=
[^\"\'[:blank]\h]+
[[:blank:]\h]*
)
|
(?:
[[:blank:]\h]*
[\w\-]+
[[:blank:]\h]*
=
[[:blank:]\h]*
(?<quote>["'])
(.*?)
\g{quote}
[[:blank:]\h]*
)
)
)
""", flags=re.VERBOSE | re.DOTALL | re.MULTILINE)
test_str = ("<abbr title=\"`first backtick!\">SB</abbr> \n"
"<abbr title=\"`second backtick!\">SB</abbr>")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html