# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"^(\S+).*?(Transmembrane)\t(\w*)\t(\w*)"
test_str = ("O75581 UniProtKB Transmembrane 1371 1393 . . . Note=Helical;Ontology_term=ECO:0000255;evidence=ECO:0000255 \n\n"
"O75581 UniProtKB Topological domain 1394 1613 . . . Note=Cytoplasmic;Ontology_term=ECO:0000255;evidence=ECO:0000255 \n\n"
"O75581 UniProtKB Repeat 63 106 . . . Note=LDL-receptor class B 1 \n\n"
"P13688 UniProtKB Transmembrane 429 452 . . . Note=Helical;Ontology_term=ECO:0000255;evidence=ECO:0000255 \n\n"
"P13688 UniProtKB Topological domain 453 526 . . . Note=Cytoplasmic;Ontology_term=ECO:0000255;evidence=ECO:0000255 \n\n"
"P13688 UniProtKB Domain 35 142 . . . Note=Ig-like V-type;Ontology_term=ECO:0000250;evidence=ECO:0000250|UniProtKB:P31997 \n\n"
"P19022 UniProtKB Transmembrane 725 745 . . . Note=Helical;Ontology_term=ECO:0000255;evidence=ECO:0000255 \n\n")
matches = re.finditer(regex, test_str, re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html