import re
regex = re.compile(r"\b(?<=\w)\s{20}", flags=re.MULTILINE)
test_str = ("Ateles_geoffroyi GATGAGTGTGGCAAGGCCCAAGCGGAAGTGCCG--GTTGCT\n"
"Ateles_hybridus GATGAGTGTGGCAAGGCCCAAGCGGAAGTGCCG--GTTGCT\n"
"Ateles_fusciceps GATGAGTGTGGCAAGGCCCAAGCGGAAGTGC??????????\n"
"Ateles_chamek GATGAGTGTGGCAAGGCCCA?????????????????????\n"
"Ateles_paniscus ?????????????????????????????????????????\n"
"Brachyteles_arachnoides GATGAGTGTGGCAAGGCCCAAGCGGAAGT????????????\n"
"Brachyteles_hypoxanthus GATGAGTGTGGCAAG??????????????????????????\n"
"Lagothrix_cana GATGAGTGTGGCAAGGCCCAAGCGGAAGTGCCG--GTTGCT\n"
"Lagothrix_lagotricha GATGAGTGTGGCAAGGCCCAAGCGGAAGTGCCG--GTTGCT\n"
"Alouatta_belzebul ?????????????????????????????????????????\n"
"Alouatta_caraya ?????????????????????????????????????????\n"
"Alouatta_sara GATGAATGTGGCAAGGCCCAAGCAGAAGTGCCG--GTTGCT\n"
"Alouatta_palliata GATGAATGTGGCAAGGCCCAAGCAGAAGTGCCG--GTTGCT\n"
"Callicebus_brunneus ?????????????????????????????????????????\n"
"Callicebus_moloch GATGAGTGTGGCAAGGCCCAAGCAGAAGTGCCG--G?????\n"
"Callicebus_cupreus GATGAGTGTGGCAAGGCCCAAGCAGAAGTGCCG--GTTGCT\n"
"Callicebus_caligatus GATGAGTGTGGCAAGGCCCAAGCAGAAGTGCCG--GTTGCT\n"
"Callicebus_donacophilus GATGAGTGTGGCAAGGCCCAAGCAGAAGTGCCG--GTTGCT\n"
"Callicebus_coimbrai GATGAGTGTGGCAAGGCCCAAGCGGAAGTGCCG--GTTGCT\n"
"Callicebus_personatus GATGAGTGTGGCAAGGCCCAAGCGGAA??????????????\n"
"Callicebus_nigrifrons GATGAGTGTGGCAAGGCCCAAGCGGAAGTGCCG--GTTGCT\n"
"Cacajao_melanocephalus GATGAGTGTGGCAAAGCCCA?????????????????????\n"
"Cacajao_calvus GATGAGTGTGGCAAGGCCCAAGCGGAAGTGCCG--GTTGCT\n"
"Chiropotes_israelita GATGAGTGTGGCAAGGCCCAAGCGGAAGTGCCG--GTTGCT\n"
"Chiropotes_satanas_chiro ?????????????????????????????????????????\n"
"Pithecia_irrorata GATGAGTGTGGCAAGGCCCAAGCGGAAGTGCCG--G?????\n"
"Pithecia_pithecia GATGAGTGTGGCAAGGCCCAAGCGGAAGTGCCG--GTTGCT\n"
"Tarsius_bancanus ?????????????????????????????????????????\n"
"Tarsius_syrichta ?????????????????????????????????????????\n"
"Lepilemur_ankaranensis AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Lepilemur_septentrionali AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Lepilemur_dorsalis AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Lepilemur_ruficaudatus AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Lepilemur_jamesi AACGAGTGTGGCAAGGCCCAGG???????????????????\n"
"Microcebus_murinus_subsp AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Microcebus_murinus_subsp ?????????????????????????????????????????\n"
"Mirza_zaza AACGAGTGTGGCAAGGCCCAGGCAGAAGTTCCG--GTTGCT\n"
"Cheirogaleus_medius ?????????????????????????????????????????\n"
"Propithecus_verreauxi_co ?????????????????????????????????????????\n"
"Propithecus_verreauxi AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Propithecus_tattersalli AACGAGTGTGGCAAGGCCCAGGCTGAAGTTCCG--GTTGCT\n"
"Propithecus_diadema AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Propithecus_edwardsi AACGAGTGTGGCAAGGCCCAGGCG?????????????????\n"
"Avahi_laniger AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Eulemur_rufus AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Eulemur_collaris AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Eulemur_fulvus AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Eulemur_albifrons AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Eulemur_sanfordi AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Eulemur_mongoz AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Eulemur_macaco AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Eulemur_macaco_flavifron AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Eulemur_coronatus AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Eulemur_rubriventer AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Hapalemur_occidentalis AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Hapalemur_griseus AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Lemur_catta AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Varecia_variegata_varieg AACGAGTGTGGCAAGGCCCAGGCAGAAGTTCCG--GTTGCT\n"
"Varecia_variegata_rubra AACGAGTGTGGCAAGGCCCAGGCAGAAGTTCCG--GTTGCT\n"
"Daubentonia_madagascarie ?????????????????????????????????????????\n"
"Nycticebus_bengalensis AACGAGTGTGGCAAGGCCCAGGCAGAAGTTCCG--GTTGCT\n"
"Nycticebus_coucang AACGAGTGTGGCAAGGCCCAGGCAGAAGTTCCG--GTTGCT\n"
"Nycticebus_pygmaeus AACGAGTATGGCAAGGCCCAGGCAGAAGTTCCG--GTTGCT\n"
"Loris_tardigradus AACGAGTGTGGCAAGGCCCAGGCAGAAGTTCCG--GTTGCT\n"
"Arctocebus_calabarensis AACGAGTGTGGCAAGGCCCAGGCAGAAGTTCCG--GTTGCT\n"
"Perodicticus_potto AACGAGTGTGGCAAGGCCCAGGCAGAAGTTCCG--GTTGCT\n"
"Otolemur_garnetti AACGAGTGTGGCAAGGCCCAGGCAGAAGTTCCG--GTTGCT\n"
"Otolemur_crassicaudatus AACGAGTGTGGCAAGGCCCAGGCAGAAGTTCCG--GTTGCT\n"
"Galago_moholi ?????????????????????????????????????????\n"
"Galago_senegalensis ?????????????????????????????????????????\n"
"Galago_thomasi AACGAGTGTGGCAAGGCCCAGGCG?????????????????\n"
"Galeopterus_variegatus GATGAGTGTGGCAAGGCCCAAGCAGAAGTTCCG--G?????\n"
"Cynocephalus_volans GATGAGTGTGGCAAGGCCCAAGCAGAAGTCCCG--GTTGCT\n"
"Tupaia_glis ?????????????????????????????????????????\n"
"Tupaia_minor ?????????????????????????????????????????\n"
"rabbit_rabbit_rabbit ?????????????????????????????????????????\n")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html