# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"(?<!\d,)(?<!\d)\d{1,3}(?:,\d{3})*(?!,?\d)"
test_str = ("I need help finding a regex rule that should search in a large string/text and match numbers that have the format: 12,345,678 or 1,234,567 or 12,345 or 1,234.\n\n"
"for example: for 12,345,678 it should match 12,345,678 and not 345,678 or 45,678 or anything similar\n\n"
"I looked in: Regex help needed to match numbers but the answers either match 1 in 1,23,456 (should not at all because 1,23,456 is not a number) or match 23,456 in 12,23,456 (should not match at all)\n\n"
"In creating a regex rule to match the correct format number, I tried first creating the rule of what it should not match(i.e., not 1,23,456), then I tried creating the rule of what it should match. The last rule I created matches in most cases, but not in all.\n\n"
"MATCH:\n\n"
"1 12 123 1,234 12,345 123,456 1,234,567 12,\n\n"
"NO MATCH:\n\n"
"1,24,567 1,234,5 1,2 1,2,567 12567 ")
matches = re.finditer(regex, test_str, re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html