import re
regex = re.compile(r"(?<=<.+.>)(.*?)(?=<.*\/.+.?>)")
test_str = ("<h1>HTML Ipsum Presents</h1>\n\n"
"<p><strong>Pellentesque habitant morbi tristique</strong> senectus et netus et malesuada fames ac turpis egestas. Vestibulum tortor quam, feugiat vitae, ultricies eget, tempor sit amet, ante. Donec eu libero sit amet quam egestas semper. <em>Aenean ultricies mi vitae est.</em> Mauris placerat eleifend leo. Quisque sit amet est et sapien ullamcorper pharetra. Vestibulum erat wisi, condimentum sed, <code>commodo vitae</code>, ornare sit amet, wisi. Aenean fermentum, elit eget tincidunt condimentum, eros ipsum rutrum orci, sagittis tempus lacus enim ac dui. <a href=\"#\">Donec non enim</a> in turpis pulvinar facilisis. Ut felis.</p>\n\n"
"<h2>Header Level 2</h2>\n\n"
"<ol>\n"
" <li>Lorem ipsum dolor sit amet, consectetuer adipiscing elit.</li>\n"
" <li>Aliquam tincidunt mauris eu risus.</li>\n"
"</ol>\n\n"
"<blockquote><p>Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vivamus magna. Cras in mi at felis aliquet congue. Ut a est eget ligula molestie gravida. Curabitur massa. Donec eleifend, libero at sagittis mollis, tellus est malesuada tellus, at luctus turpis elit sit amet quam. Vivamus pretium ornare est.</p></blockquote>\n\n"
"<h3>Header Level 3</h3>\n\n"
"<ul>\n"
" <li>Lorem ipsum dolor sit amet, consectetuer adipiscing elit.</li>\n"
" <li>Aliquam tincidunt mauris eu risus.</li>\n"
"</ul>")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html