import re
regex = re.compile(r"^<a[^>]*href=\"([^\"]+)\"[^>]*([^<]+).*$", flags=re.MULTILINE)
test_str = ("<a href=\"final/main.html\">Multimedia Implementation</a><br/>\n"
"<a href=\"final/toc.html\">Table of Contents</a><br/>\n"
"<a href=\"final/pref01.html\">About the Author</a><br/>\n"
"<a href=\"final/pref02.html\">About the Technical Reviewers</a><br/>\n"
"<a href=\"final/pref03.html\">Acknowledgments</a><br/>\n"
"<a href=\"final/part01.html\">Part I: Introduction and Overview</a><br/>\n"
"<a href=\"final/ch01.html\">Chapter 1. Technical Overview</a><br/>")
subst = "<navpoint id=\"n\" playOrder=\"\">\\n<navLabel><text>$2</text></navLabel>\\n<content src=\"$1\" />\\n</navpoint>\\n"
result = regex.sub(subst, test_str)
if result:
print(result)
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html