import re
regex = re.compile(r"^(\S+).*?(Transmembrane)\t(\w*)\t(\w*)", flags=re.MULTILINE)
test_str = ("O75581 UniProtKB Transmembrane 1371 1393 . . . Note=Helical;Ontology_term=ECO:0000255;evidence=ECO:0000255 \n\n"
"O75581 UniProtKB Topological domain 1394 1613 . . . Note=Cytoplasmic;Ontology_term=ECO:0000255;evidence=ECO:0000255 \n\n"
"O75581 UniProtKB Repeat 63 106 . . . Note=LDL-receptor class B 1 \n\n"
"P13688 UniProtKB Transmembrane 429 452 . . . Note=Helical;Ontology_term=ECO:0000255;evidence=ECO:0000255 \n\n"
"P13688 UniProtKB Topological domain 453 526 . . . Note=Cytoplasmic;Ontology_term=ECO:0000255;evidence=ECO:0000255 \n\n"
"P13688 UniProtKB Domain 35 142 . . . Note=Ig-like V-type;Ontology_term=ECO:0000250;evidence=ECO:0000250|UniProtKB:P31997 \n\n"
"P19022 UniProtKB Transmembrane 725 745 . . . Note=Helical;Ontology_term=ECO:0000255;evidence=ECO:0000255 \n\n")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html