import re
regex = re.compile(r"^(?P<species>[A-Z][^A-Z(]+)(?P<author>(?<!^).*)$", flags=re.MULTILINE)
test_str = ("Dalbergia acutifoliolata Mendonca & Sousa\n"
"Dalbergia adami Berhaut\n"
"Dalbergia afzeliana G.Don\n"
"Dalbergia agudeloi J.Linares & M. Sousa\n"
"Dalbergia albiflora Hutch. & Dalziel\n"
"Dalbergia altissima Baker f.\n"
"Dalbergia amazonica (Radlk.) Ducke\n"
"Dalbergia amerimmon L. ex B.D.Jacks\n"
"Dalbergia andapensis Bosser & R.Rabev.\n"
"Dalbergia arbutifolia Baker\n"
"Dalbergia arbutifolia aberrans Polhill\n"
"Dalbergia armata E.Mey.\n"
"Dalbergia assamica Benth.\n"
"Dalbergia aurea Bosser & R.Rabev.\n"
"Dalbergia baronii Baker\n"
"Dalbergia bathiei R.Vig.\n"
"Dalbergia benthamii\n"
"Dalbergia berteroi\n"
"Dalbergia pseudo-sissoo Miq.\n"
"Dalbergia ovata var. glomeriflora (Kurz) Thoth.\n"
"Dalbergia albiflora subsp. albiflora")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html