import re
regex = re.compile(r"((\d+\.\d+)\s+)+(\d+)\s?(?=\w+)", flags=re.MULTILINE)
test_str = ("\"\"\"BEAVER COUNTY 001 0000 \n"
"1010 BEAVER \n"
"2010 BEAVER COUNTY SCH DIST \n"
"0.008504 \n"
"0.008508 \n"
"4010 COUNTY SPECIAL SERVICE DIST NO.1 <---capture this 4010\n"
"4040 BEAVER COUNTY \n"
"8005 GREENVILLE SOLAR\n"
"0.004258 \n"
"0.008348 \n"
"...(more decimals)\n"
"0.008238 \n"
"4060 SPECIAL SERVICE DISTRICT NO 7 <---capture this 4060\n"
"\"\"\"")
subst = "\\3"
result = regex.sub(subst, test_str)
if result:
print(result)
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html