import re
regex = re.compile(r"(?:^|\b)(?<number>(?:\d+|\d{1,3}(?:,\d{3})+)(?:\.\d+)?)(?<replace> ?|&[^;]{2,7};)(?<dimensions>(?<mathjax>\$[^\n\$]+\$)|(?<unit>(?<si>(?!as)(?:[QRYZEPTGMkhdcmµnpfazyrq]|da)?(?:mol|cd|Hz|Pa|Wb|lm|lx|Bq|Gy|Sv|kat|[mgsAKNJWCVFΩSTHL]))|(?<imperial>mil|P|in|ft|yd|mi|le|ftm|nmi|oz|lbs?|t|tsp|tbsp|c|fl oz|pt|qt|gal|(?:sq|cu) ?(?:in|ft|yd|mi)|ft-lbf?|lbf?-ft|deg|Btu|psi|Ly)|(?<common>mph|kph|rad|sr)|(?<binary>(?:(?:[KMGTPEZY])i?)?B)))(?=\b|$| |[.,:;()\[\]{}=/*+—–-])", flags=re.MULTILINE)
test_str = ("I have a 30 $m^2$ pumpernickle\n"
"More precisely it's 29.87$m^2$.\n"
"Traveling over 4,000 km, to where once there was\n"
"...in the year 2016 as a new method of...\n"
"He crashed at 50mph\n"
"They collided at speeds of over 200m/s\n"
"The reading is 2kHz.\n"
"The range is 20kHz–1MHz (1,000.3 kHz).\n"
"Just 4M words at 30MB.")
subst = "$<number> $<dimensions>"
result = regex.sub(subst, test_str)
if result:
print(result)
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html