import re
regex = re.compile(r"^[^\t]*+\t[^\t]*+\t.{3}\|(NODE[^\t]*+)\t[^\n]*+\r?\n(?=[\s\S]*^[^\t]*\t[^\t]*\t.{3}\|\1\t)", flags=re.MULTILINE | re.DOTALL)
test_str = ("QUERY LEN_Q HIT LEN_H HSPS EXPCT FR_ALQ FR_IDQ LN_ALQ GAPS_QH STRND_Q STRND_H\n"
"----- ----- --- ----- ---- ----- ------ ------ ------ ------- ------- -------\n"
"ERR#####.4121.2 108 lcl|NODE_95175_length_258_cov_1.5665_ID_2124081_gene1_1_258_- 258 1 2.0e-51 1.00 1.00 108 0 1 -1\n"
"ERR#####.14326.2 108 lcl|NODE_92640_length_260_cov_1.03415_ID_2119011_gene1_1_260_+ 260 1 2.0e-51 1.00 1.00 108 0 1 1\n"
"ERR#####.16528.1 108 lcl|NODE_95524_length_258_cov_2.07389_ID_2124779_gene1_1_256_- 256 1 2.0e-51 1.00 1.00 108 0 1 1\n"
"ERR#####.25520.2 108 lcl|NODE_36003_length_314_cov_1.22394_ID_2005737_gene1_1_314_- 314 1 5.0e-51 0.99 1.00 107 0 1 1\n"
"ERR#####.44384.1 108 lcl|NODE_50866_length_294_cov_1.08368_ID_2035463_gene1_30_293_- 264 1 9.0e-34 0.93 0.92 100 0 1 -1\n"
"ERR#####.56115.1 108 lcl|NODE_18974_length_370_cov_1.24762_ID_1971679_gene1_252_370_- 119 1 2.0e-51 1.00 1.00 108 0 1 1\n"
"ERR#####.87252.2 107 lcl|NODE_120774_length_241_cov_2.72043_ID_2175279_gene1_1_241_+ 241 1 5.0e-51 1.00 1.00 107 0 1 -1\n"
"ERR#####.90593.1 108 lcl|NODE_67870_length_279_cov_0.946429_ID_2069471_gene1_136_279_- 144 1 2.0e-51 1.00 1.00 108 0 1 1\n"
"ERR#####.106181.2 108 lcl|NODE_58897_length_286_cov_0.917749_ID_2051525_gene1_1_286_- 286 1 2.0e-51 1.00 1.00 108 0 1 1\n"
"ERR#####.117289.1 108 lcl|NODE_71638_length_276_cov_0.959276_ID_2077007_gene1_1_276_+ 276 1 2.0e-51 1.00 1.00 108 0 1 -1\n"
"ERR#####.117289.2 108 lcl|NODE_71638_length_276_cov_0.959276_ID_2077007_gene1_1_276_+ 276 1 2.0e-51 1.00 1.00 108 0 1 1\n"
"ERR#####.123681.1 108 lcl|NODE_19028_length_369_cov_1.6879_ID_1971787_gene1_1_369_- 369 1 2.0e-51 1.00 1.00 108 0 1 1\n"
"ERR#####.128398.2 108 lcl|NODE_49072_length_295_cov_0.883333_ID_2031875_gene1_1_295_+ 295 1 2.0e-51 1.00 1.00 108 0 1 1\n"
"ERR#####.143329.1 107 lcl|NODE_16024_length_385_cov_1.78788_ID_1965779_gene1_1_385_+ 385 1 3.0e-49 1.00 0.99 107 0 1 1\n"
"ERR#####.146612.2 108 lcl|NODE_157162_length_220_cov_0.848485_ID_2248055_gene1_1_220_+ 220 1 3.0e-19 0.46 1.00 50 0 1 -1\n"
"ERR#####.153442.1 108 lcl|NODE_3372_length_555_cov_1.68_ID_1940475_gene1_1_479_+ 479 1 2.0e-51 1.00 1.00 108 0 1 -1\n"
"ERR#####.164193.1 108 lcl|NODE_19028_length_369_cov_1.6879_ID_1971787_gene1_1_369_- 369 1 2.0e-51 1.00 1.00 108 0 1 1\n"
"ERR#####.164193.2 108 lcl|NODE_19028_length_369_cov_1.6879_ID_1971787_gene1_1_369_- 369 1 2.0e-51 1.00 1.00 108 0 1 -1\n"
"ERR#####.170699.1 108 lcl|NODE_172294_length_212_cov_1.34395_ID_2278319_gene1_1_212_+ 212 1 2.0e-51 1.00 1.00 108 0 1 1\n"
"ERR#####.175913.1 108 lcl|NODE_133624_length_233_cov_1.19101_ID_2200979_gene1_1_233_+ 233 1 2.0e-51 1.00 1.00 108 0 1 1\n"
"ERR#####.175913.2 108 lcl|NODE_133624_length_233_cov_1.19101_ID_2200979_gene1_1_233_+ 233 1 2.0e-51 1.00 1.00 108 0 1 -1\n"
"ERR#####.200777.1 107 lcl|NODE_95377_length_258_cov_1.52217_ID_2124485_gene1_1_258_- 258 1 5.0e-51 1.00 1.00 107 0 1 1\n"
"ERR#####.207997.2 108 lcl|NODE_92640_length_260_cov_1.03415_ID_2119011_gene1_1_260_+ 260 1 2.0e-51 1.00 1.00 108 0 1 -1\n"
"ERR#####.210681.1 103 lcl|NODE_148763_length_225_cov_1.19412_ID_2231257_gene1_1_225_+ 225 1 9.0e-49 1.00 1.00 103 0 1 -1\n"
"ERR#####.210681.2 105 lcl|NODE_148763_length_225_cov_1.19412_ID_2231257_gene1_1_225_+ 225 1 7.0e-50 1.00 1.00 105 0 1 1\n"
"ERR#####.223511.1 108 lcl|NODE_134257_length_233_cov_0.893258_ID_2202245_gene1_3_233_+ 231 1 2.0e-51 1.00 1.00 108 0 1 1\n"
"ERR#####.223511.2 108 lcl|NODE_134257_length_233_cov_0.893258_ID_2202245_gene1_3_233_+ 231 1 1.0e-18 0.45 1.00 49 0 1 -1\n"
"ERR#####.242071.2 108 lcl|NODE_46692_length_298_cov_0.790123_ID_2027115_gene2_142_298_- 157 1 2.0e-51 1.00 1.00 108 0 1 1\n"
"ERR#####.264004.2 108 lcl|NODE_59741_length_286_cov_1.4329_ID_2053213_gene1_1_286_- 286 1 2.0e-51 1.00 1.00 108 0 1 -1\n"
"ERR#####.289356.2 100 lcl|NODE_20869_length_361_cov_1.50327_ID_1975469_gene1_1_361_- 361 1 4.0e-47 1.00 1.00 100 0 1 -1\n"
"ERR#####.289566.1 108 lcl|NODE_72809_length_275_cov_0.963636_ID_2079349_gene1_1_275_- 275 1 2.0e-51 1.00 1.00 108 0 1 1\n"
"ERR#####.297577.2 108 lcl|NODE_59273_length_286_cov_0.848485_ID_2052277_gene1_1_286_+ 286 1 3.0e-44 0.88 1.00 95 0 1 -1\n"
"ERR#####.301879.2 108 lcl|NODE_166289_length_215_cov_1.2375_ID_2266309_gene1_1_215_+ 215 1 2.0e-51 1.00 1.00 108 0 1 -1\n"
"ERR#####.307296.1 105 lcl|NODE_181368_length_207_cov_1.09868_ID_2296467_gene1_1_207_- 207 1 7.0e-50 1.00 1.00 105 0 1 -1\n"
"ERR#####.316831.1 102 lcl|NODE_30424_length_327_cov_1.16176_ID_1994579_gene1_1_327_- 327 1 3.0e-48 1.00 1.00 102 0 1 1\n"
"ERR#####.332122.1 96 lcl|NODE_39832_length_307_cov_1.1627_ID_2013395_gene1_1_121_+ 121 1 6.0e-45 1.00 1.00 96 0 1 -1\n"
"ERR#####.332122.2 96 lcl|NODE_39832_length_307_cov_1.1627_ID_2013395_gene1_1_121_+ 121 1 6.0e-45 1.00 1.00 96 0 1 1\n"
"ERR#####.337878.2 108 lcl|NODE_129859_length_235_cov_1.73889_ID_2193449_gene1_1_235_+ 235 1 2.0e-51 1.00 1.00 108 0 1 1\n"
"ERR#####.376174.2 108 lcl|NODE_10478_length_425_cov_1.36216_ID_1954687_gene1_65_425_- 361 1 3.0e-38 0.78 1.00 84 0 1 1\n"
"ERR#####.411768.2 106 lcl|NODE_143684_length_227_cov_1.22093_ID_2221099_gene1_1_227_+ 227 1 2.0e-50 1.00 1.00 106 0 1 -1\n"
"ERR#####.432459.1 108 lcl|NODE_50114_length_294_cov_1.33054_ID_2033959_gene1_40_294_- 255 1 7.0e-30 0.64 1.00 69 0 1 -1\n"
"ERR#####.432459.2 108 lcl|NODE_50114_length_294_cov_1.33054_ID_2033959_gene1_40_294_- 255 1 2.0e-51 1.00 1.00 108 0 1 1")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html