import re
regex = re.compile(r"^(?>\S+[^\S\n]+){3,}?(\S+?)\K\1(?!\S)", flags=re.MULTILINE)
test_str = ("00101 blah 0000202 thisisasentencethisisasentence 99929\n"
"00102 blah 0000202 thisisasentenc1thisisasentenc1 999292")
subst = ""
result = regex.sub(subst, test_str)
if result:
print(result)
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html