import re
regex = re.compile(r'SRR([0-9]+)([\.])([0-9]+)([\s]+)([A-Z]+)', flags=re.MULTILINE)
test_str = (" sequence_id cdr3_aa\n"
"0 SRR11610498.829751 AKDLGPDAGYTVFNDYFDN\n"
"1 SRR11610498.853725 AKDLGPDAGYTVFNDYFDN\n"
"2 SRR11610498.856922 AKDLGPDAGYTVFNHYFDN\n"
"3 SRR11610498.861591 AKDLGPDAGYTVFNDYFDN\n"
"4 SRR11610498.909314 AKDLGPDAGYTVFNDYFDN\n"
"5 SRR11610498.915768 AKDLGPDAGYTVFNDYFDN\n"
"6 SRR11610498.922245 ARGGLVVVDSFDY\n"
"7 SRR11610498.1017490 ASDLSYASSWLHYFDV\n"
"8 SRR11610498.1022079 ARLRFDNGALYFDY\n"
"9 SRR11610498.1037667 ASSAPPSGFNWFDP\n"
"10 SRR11610498.1111232 AKGPQIVATTTDNLEV\n"
"11 SRR11610498.1183596 AKDLGPDAGYTVFNDYFDN \n\n\n\n")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html