# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"(?<!検索結果:)(?<!次の)(?<![0-90-9])(?<![0-90-9][,,、])(?:[〇一二三四五六七八九十百千万億兆0-90-9]|京+)[,,、]?.+[〇一二三四五六七八九十百千万億兆京0-90-9].+件"
test_str = ("Issue: with the original regex pattern, everything mathes (numbers + 件), but those that shouldn't partially match as you see below. \n\n\n"
"******************************************\n"
"*THESE BELOW SHOULD MATCH (just the numbers + 件)*\n\n"
"販売実績100万件\n"
"販売実績100万件\n"
"販売実績1,000件\n"
"販売実績1,000件\n"
"販売実績1,000,000件です\n"
"100,000件\n"
"5000件\n\n"
"******************************************\n\n"
"*BELOW PATTERNS SHOULD NOT MATCH*\n\n"
"(☓)検索結果:10件\n"
"(x)検索結果:500件\n"
"(x)次の100件\n"
"(☓)10件表示\n"
"(☓)10件を表示\n"
"(☓)10件の表示\n"
"(x)全件\n\n"
"(x)検索結果:13953952件 <--- problem\n"
"(x)検索結果:100,000,000件 <--- problem\n"
"(x)検索結果:5000件\n"
"(x)次の5,000件\n"
"(x)次の5,0000件 <--- problem\n"
"(x)次の1000件\n"
"(x)次の5万件")
matches = re.finditer(regex, test_str, re.MULTILINE | re.UNICODE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html