import re
regex = re.compile(r"(?:([-‐‑‒–—―−⁃﹘﹣-])|(?:&(?:(?:#x(2d|201[0-5]|2212|2043|fe58|fe63|ff0d))|(?:#(45|820[89]|821[0123]|8722|8259|65112|65123|65293))|(hyphen|[nm]?dash|hybull|horbar|minus));?))", flags=re.MULTILINE | re.UNICODE)
test_str = ("This captures an entity even if it lacks the ';', which is commonly encountered in the wild.\n\n"
"kbdash - - - - -; -\n"
"dash ‐ ‐ ‐ ‐ ‐ ‐; ‐\n"
"hyphen ‑ ‐ ‑ ‑ ‑ ‑; ‑\n"
"figure ‒ ‒ ‒ ‒ ‒; ‒\n"
"em – – – – – –; –\n"
"en — — — — — —; —\n"
"horbar ― ― ― ― ― ―; ―\n"
"minus − − − − − −; −\n"
"hybull ⁃ ⁃ ⁃ ⁃ ⁃ ⁃; ⁃\n"
"fe58 ﹘ ﹘ ﹘ ﹘ ﹘; ﹘\n"
"fe63 ﹣ ﹣ ﹣ ﹣ ﹣; ﹣\n"
"ff0d - - - - -; -\n\n"
"(?:([-‐‑‒–—―−⁃﹘﹣-])|(?:&(?:(?:#x(2d|201[0-5]|2212|2043|fe58|fe63|ff0d))|(?:#(45|820[89]|821[0123]|8722|8259|65112|65123|65293))|(hyphen|[nm]?dash|hybull|horbar|minus));?))")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html