# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"\d+%[^.](?:(?!original\b)[^\d\r\n])*\bpercentage\b"
test_str = ("In some cases, 10% of the sales starts with the original percentage --> my regex match with this string but I don't want to because it contains the word \"original\"\n\n"
"The 10% of the sales starts with a certain percentage --> my regex match with this string, it's okay because it doesn't containt the word \"original\"\n\n"
"The 10% of the original\n"
"percentage of the sale is higher--> my regex doesn't match with this string, and it's okay because it containts the word \"original\" (maybe because the new line starts with percentage?)\n\n"
"The 10% of the original sale\n"
"is the percentage of that --> my regex match with this string but I don't want to because it contains the word \"original\"\n\n"
"10% \"whatever\" percentage\n\n"
"10% original percentage")
matches = re.finditer(regex, test_str, re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html