import re
regex = re.compile(r"""
^[ ]{0,3} # Leading space up to 3
(?<link_all>
(?<!\\)\[ # ID within brackets
(?!\^) # Make sure, this is not confused with a footnote
(?<link_id>.+?)
(?<!\\)\]
[ \t]* # Possibly with some space before colon
\:
[ \t]*\n?[ \t]* # Possibly with some space after the colon, possibly with a new line in between?
(?:
<(?<link_url>[^\>]+)> # link within <>
|
(?<link_url>\S+) # or link without <>
)
(?:
(?:
[ \t]+ # Either some space or tabs
|
[ \t]*\n[ \t]* # or a new line surrounded by 0 or more spaces or tabs
)
(?:
(?:
(?<link_title_container>['"]) # Title is surrounded ether by double or single quotes
(?<link_title>.+?)
\g{link_title_container} # make the sure enclosing mark balance
)
| # or
\((?<link_title>[^\)]+)\) # by parenthesis
)
)?
[ \t]* # Possibly ending with some trailing spaces or tab
)
(?:\n+|\Z) # terminated by a new line or end of file
""", flags=re.MULTILINE | re.VERBOSE)
test_str = ("[1]: /url/ \"Title\"\n\n"
"[refid]: /path/to/something (Title)\n\n"
"[^block]:\n"
" Paragraph.\n\n"
" * List item\n\n"
" > Blockquote\n\n"
" Code block\n")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html