# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"(?i)(?=(((\()?(red|gold|silver|king)( pack)?(\))?)|((70|83|84|100|120)(s|'s)?)))((((\()?(red|gold|silver|king)( pack)?(\))? )?((70|83|84|100|120)(s|'s)? )?)(\bbox\b))"
test_str = ("Python does not support conditionals using lookaround, even though Python does support lookaround outside conditionals. Instead of a conditional like (?(?=regex)then|else), you can alternate two opposite lookarounds: (?=regex)then|(?!regex)else.\n\n"
"https://www.regular-expressions.info/lookaround.html\n"
"https://www.regular-expressions.info/conditional.html\n\n"
"LOOKAHEAD to see if EITHER pattern1 or pattern2 exists\n"
"pattern1 = ((\\()?(red|gold|silver|king)( pack)?(\\))?)\n"
"pattern2 = ((70|83|84|100|120)(s|'s)?)\n"
"IF one of the patterns exists in the string, THEN look for \n"
"pattern1 box\n"
"pattern2 box\n"
"pattern1 pattern2 box\n"
" \n"
"IS NOT cases - things that I do NOT want to match\n"
"box\n"
" box\n"
"Marlboro Box - this is a perfect example of a NOT case - I do NOT want to match this string\n"
"OR maybe I do ... ARGH - NO I do not want to match BOX!!\n"
"moving box - example that I NEVER want to match ... ARGHHH\n"
"junk 's matchbox car\n"
"matchboxracer\n"
"boxer - we NEVER think about the NOT cases ... lol mitch and I fought about this for years. recently he meantioned the NEED for NOT\n\n"
"IS cases - aka - things that I WANT TO MATCH\n\n"
"Marlboro 100's Box\n"
"Marlboro Gold Pack 100's Box\n"
"Marlboro Special Blend (Gold Pack) 100's Box\n"
"Marlboro Silver Pack Box\n"
"Marlboro Special Blend (Red Pack) 100s Box\n"
"Pall Mall RED 100 BOX\n"
"Marlboro Special Blend (Gold Pack) Box\n\n\n\n\n")
matches = re.finditer(regex, test_str, re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html