# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"(?s)Query #.*?(?:(?:Alignments)|\Z)"
test_str = ("Query #1: 020.1-Bni_its1_2019_envio1set1 Query ID: lcl|Query_25247 Length: 286\n\n"
"Sequences producing significant alignments:\n"
" Scientific Common Max Total query E Per. Acc. \n"
"Description Name Name Taxid Score Score cover Value Ident Len Accession \n"
"Manihot esculenta mitochondrion, complete genome Manihot escu... cassava 3983 182 182 53% 2e-41 88.46 682840 NC_045136.1 \n"
"Manihot esculenta mitochondrion, complete genome Manihot escu... cassava 3983 182 182 53% 2e-41 88.46 682840 MK176513.1 \n"
"Asparagus officinalis cultivar Atlas mitochondrion, complete... Asparagus of... garden aspar... 4686 124 124 26% 4e-24 96.05 492062 NC_053642.1 \n"
"Capsicum annuum var. glabriusculum bio-material USDA:PI 593546... Capsicum ann... NA 165789 89.8 149 27% 1e-13 100.00 497487 MN196478.1 \n"
"Capsicum annuum cultivar Jeju mitochondrion, complete genome Capsicum annuum NA 4072 89.8 149 27% 1e-13 100.00 511530 KJ865410.1 \n"
"Capsicum annuum cultivar CMS line FS4401 mitochondrion, comple... Capsicum annuum NA 4072 89.8 149 27% 1e-13 100.00 507452 KJ865409.1 \n"
"Capsicum annuum clone CaSSR_42 microsatellite sequence Capsicum annuum NA 4072 89.8 89.8 17% 1e-13 98.04 668 KF824256.1 \n"
"mitochondrion genome of Capsicum chinense chromosome 1, comple... Capsicum chi... NA 80379 89.8 149 29% 1e-13 98.04 307629 MZ736638.1 \n"
"Dolichandrone spathacea chromosome DS1 mitochondrion, complete... Dolichandron... NA 241844 71.3 71.3 14% 5e-08 97.56 610698 MW432179.1 \n"
"Dolichandrone cauda-felina mitochondrion, complete genome Markhamia ca... NA 2871431 71.3 71.3 14% 5e-08 97.56 837447 MW432178.1 \n"
"Haematoxylum brasiletto mitochondrion, complete genome Haematoxylum... NA 191923 60.2 60.2 11% 1e-04 100.00 631094 NC_045040.1 \n"
"Haematoxylum brasiletto mitochondrion, complete genome Haematoxylum... NA 191923 60.2 60.2 11% 1e-04 100.00 631094 MN017229.1 \n"
"Leucaena trichandra mitochondrion, complete genome Leucaena tri... NA 190760 60.2 60.2 11% 1e-04 100.00 722009 NC_039738.1 \n"
"Leucaena trichandra mitochondrion, complete genome Leucaena tri... NA 190760 60.2 60.2 11% 1e-04 100.00 722009 MH717173.1 \n"
"Gleditsia sinensis mitochondrion, complete genome Gleditsia si... NA 66096 60.2 60.2 11% 1e-04 100.00 594121 MT921986.1 \n\n\n"
"Alignments:\n\n"
">Manihot esculenta mitochondrion, complete genome\n"
"Sequence ID: NC_045136.1 Length: 682840\n"
"Range 1: 329883 to 330034\n")
matches = re.finditer(regex, test_str, re.DOTALL)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html