# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = (r"(?x) # free-spacing mode\n"
r"(?(DEFINE)\n"
r" # Within this DEFINE block, we'll define many subroutines\n"
r" # They build on each other like lego until we can define\n"
r" # a \"big number\"\n\n"
r" (?<one_to_9> \n"
r" # The basic regex:\n"
r" # one|two|three|four|five|six|seven|eight|nine\n"
r" # We'll use an optimized version:\n"
r" # Option 1: four|eight|(?:fiv|(?:ni|o)n)e|t(?:wo|hree)|\n"
r" # s(?:ix|even)\n"
r" # Option 2:\n"
r" (?:f(?:ive|our)|s(?:even|ix)|t(?:hree|wo)|(?:ni|o)ne|eight)\n"
r" ) # end one_to_9 definition\n\n"
r" (?<ten_to_19> \n"
r" # The basic regex:\n"
r" # ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|\n"
r" # eighteen|nineteen\n"
r" # We'll use an optimized version:\n"
r" # Option 1: twelve|(?:(?:elev|t)e|(?:fif|eigh|nine|(?:thi|fou)r|\n"
r" # s(?:ix|even))tee)n\n"
r" # Option 2:\n"
r" (?:(?:(?:s(?:even|ix)|f(?:our|if)|nine)te|e(?:ighte|lev))en|\n"
r" t(?:(?:hirte)?en|welve)) \n"
r" ) # end ten_to_19 definition\n\n"
r" (?<two_digit_prefix>\n"
r" # The basic regex:\n"
r" # twenty|thirty|forty|fifty|sixty|seventy|eighty|ninety\n"
r" # We'll use an optimized version:\n"
r" # Option 1: (?:fif|six|eigh|nine|(?:tw|sev)en|(?:thi|fo)r)ty\n"
r" # Option 2:\n"
r" (?:s(?:even|ix)|t(?:hir|wen)|f(?:if|or)|eigh|nine)ty\n"
r" ) # end two_digit_prefix definition\n\n"
r" (?<one_to_99>\n"
r" (?&two_digit_prefix)(?:[- ](?&one_to_9))?|(?&ten_to_19)|\n"
r" (?&one_to_9)\n"
r" ) # end one_to_99 definition\n\n"
r" (?<one_to_999>\n"
r" (?&one_to_9)[ ]hundred(?:[ ](?:and[ ])?(?&one_to_99))?|\n"
r" (?&one_to_99)\n"
r" ) # end one_to_999 definition\n\n"
r" (?<one_to_999_999>\n"
r" (?&one_to_999)[ ]thousand(?:[ ](?&one_to_999))?|\n"
r" (?&one_to_999)\n"
r" ) # end one_to_999_999 definition\n\n"
r" (?<one_to_999_999_999>\n"
r" (?&one_to_999)[ ]million(?:[ ](?&one_to_999_999))?|\n"
r" (?&one_to_999_999)\n"
r" ) # end one_to_999_999_999 definition\n\n"
r" (?<one_to_999_999_999_999>\n"
r" (?&one_to_999)[ ]billion(?:[ ](?&one_to_999_999_999))?|\n"
r" (?&one_to_999_999_999)\n"
r" ) # end one_to_999_999_999_999 definition\n\n"
r" (?<one_to_999_999_999_999_999>\n"
r" (?&one_to_999)[ ]trillion(?:[ ](?&one_to_999_999_999_999))?|\n"
r" (?&one_to_999_999_999_999)\n"
r" ) # end one_to_999_999_999_999_999 definition\n\n"
r" (?<bignumber>\n"
r" zero|(?&one_to_999_999_999_999_999)\n"
r" ) # end bignumber definition\n\n"
r" (?<zero_to_9>\n"
r" (?&one_to_9)|zero\n"
r" ) # end zero to 9 definition\n\n"
r" (?<decimals>\n"
r" point(?:[ ](?&zero_to_9))+\n"
r" ) # end decimals definition\n"
r" \n"
r") # End DEFINE\n\n\n"
r"####### The Regex Matching Starts Here ########\n"
r"^(?&bignumber)(?:[ ](?&decimals))?$\n\n"
r"### Other examples of groups we could match ###\n"
r"#(?&bignumber)\n"
r"# (?&one_to_99)\n"
r"# (?&one_to_999)\n"
r"# (?&one_to_999_999)\n"
r"# (?&one_to_999_999_999)\n"
r"# (?&one_to_999_999_999_999)\n"
r"# (?&one_to_999_999_999_999_999)")
test_str = ("one trillion\n"
"seven hundred twenty two\n"
"7even\n"
"zero point nine five\n"
"nine hundred ninety nine thousand two hundred thirteen")
matches = re.finditer(regex, test_str, re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html