import re
regex = re.compile(r"(?P<AUC>AUC)|(?P<Cmax>C<sub>max<\/sub>)|(?P<Tmax>T<sub>max<\/sub>)|(?P<dose>\d+\s*[a-zA-Zμ]*g(?!(?:[^\s\.]|\.\S)))|(?P<dose_normalized>\d+\s*[a-zA-Zμ]*g\/[a-zA-Zμ]*g(?!(?:[^\s\.]|\.\S)))", flags=re.MULTILINE)
test_str = ("The median T<sub>max</sub> of asciminib following oral administration is 2.5 hours.[L38995] At a dose of 80mg once daily, the steady-state C<sub>max</sub> and AUC<sub>tau</sub> were 1781 ng/mL and 15112 ng.h/mL, respectively. At a dose of 40mg twice daily, the steady-state C<sub>max</sub> and AUC<sub>tau</sub> were 793 ng/mL and 5262 ng.h/mL, respectively. At a dose of 200mg twice daily (for treatment of T315I mutants), the steady-state C<sub>max</sub> and AUC<sub>tau</sub> were 5642 ng/mL and 37547 ng.h/mL, respectively.[L38995]\n\n"
"As compared to the fasted state, the co-administration of asciminib with a high-fat meal decreased the AUC and C<sub>max</sub> by 62% and 68%, respectively, and its co-administration with a low-fat meal decreased the AUC and C<sub>max</sub> by 30% and 35%, respectively.[L38995]")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html