# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"(([a-z]*[.:]*)\s*((\d*[.]\d*)+$))"
test_str = ("\"LOWE’S HOME CENTERS, INC.\n"
"5770 READ BOULEUARD NEW ORLEANS, LA 70127 (504) 613-1800\n"
"- SALE - SALES 4: S2690BC1 1295021 07-17-10\n"
"296458 COMM SERIES QUANTUM 3 BUR 149.00\n"
"SUBTOTAL: 149.00\n"
"TAX: 13.41\n"
"INUOICE 18327 TOTAL: 162.41\n"
"MERCH/GIFT CARDS : 162.41\n"
"MERCH/GIFT CARD 7334 AUTHCODE 005450 BEGIN BAL TRANSACTION AMT ENDING BAL 216.91 162.41 54.50\n"
"STORE: 2690 TERMINAL: 18 07/17/10 09:13:09\n"
"OF ITEMS PURCHASED: 1\"\n"
"Grocery_receipts_001.docx.txt,\"Walmart x'c\n"
"Save money. Live better. •\n"
"(813) 932-0562 Manager COLLEEN BRICKEY 8885 N FLORIDA AVE TAMPA FL 33604\n"
"ST# 5221 OP#\n"
" 00001061 TE# 06\n"
" TR# 05332\n"
" BREAD\n"
" 007225003712 F\n"
" 2.88\n"
" N\n"
" BREAD\n"
" 007225003712 F\n"
" 2.88\n"
" N\n"
" GV PNT BUTTR\n"
" 007874237003 F\n"
" 3.84\n"
" N\n"
" GV PNT BUTTR\n"
" 007874237003 F\n"
" 3.84\n"
" N\n"
" GV PNT BUTTR\n"
" 007874237003 F\n"
" 3.84\n"
" N\n"
" GV PNT BUTTR\n"
" 007874237003 F\n"
" 3.84\n"
" N\n"
" GV PARM 160Z\n"
" 007874201510 F\n"
" 4.98\n"
" 0\n"
" GV CHNK CHKN\n"
" 007874206784 F\n"
" 1.98\n"
" N\n"
" GV CHNK CHKN\n"
" 007874206784 F\n"
" 1.98\n"
" N\n"
" 12 CT NITRIL\n"
" 073191913822\n"
" 2.78\n"
" X\n"
" FOLGERS\n"
" 002550000377 F\n"
" 10.48\n"
" N\n"
" SC TUIST UP\n"
" 007874222682 F\n"
" 0.84\n"
" X\n"
" EGGS\n"
" 060538871459 F\n"
" 1.88\n"
" 0\n"
" \n\n"
" SUBTOTAL\n"
" 46.04\n"
" \n\n"
" TAX\n"
" 1 7.000 X\n"
" 0.26\n"
" \n\n"
" \n\n"
" TOTAL\n"
" 46.30\n"
" \n\n"
" \n\n"
" DEBIT TEND\n"
" 46.30\n"
" \n\n"
" \n\n"
" CHANGE DUE\n"
" 0.00\n"
" \n\n"
" \n\n"
"EFT DEBIT PAY FROM PRIMARY\n"
"ACCOUNT : 5259\n"
"1. TOTAL PURCHASE\n"
"PAYMENT DECLINED DEBIT NOT AVAILABLE 11/06/11 02:21:54\n"
"EFT DEBIT PAY FROM PRIMARY\n"
"ACCOUNT : 5259\n"
"1. TOTAL PURCHASE REF # 131000195280\n"
"NETWORK ID. 0071 APPR CODE 297664 11/06/11 02:22:54\n\n"
"ft ITEMS SOLD 13\n"
"TC# 0432 2121 1542 2401 9590\n"
" \n\n"
"Lagawag Is back for Electronics, Togs, and Jewelrg. 10/17/11-12/16/11 11/06/11 02:22:59\"\n"
"traderjoes-receipt1.docx.txt,\"TRADER JOE’S\n\n\n\n\n\n\n\n\n\n\n"
" \n\n"
"“V.\n"
"\\L3P\n"
"2-49 T 0.10 T\n"
"OPEN 8:00AM TO 10:00PM DAILY\n"
"* LOWFAT 1% MILK - HALF GALLON ^GROCERY NON TAXABLE\n"
"1. 6 0.59 ^GROCERY NON TAXABLE\n"
"2. 10.19 GROCERY NON TAXABLE\n"
"^ MISSISSIPPI MUD 32 OZ CROCK\n"
"* CRY\n"
"“• TJ!S VEGGIE STIX £ BAGEL MINI WHEAT 4 PK\n"
"* CHICKEN CH2 YMEIN STIR FRY MIX\n"
"* PIZZA ORGANIC SICILIA?? STYLE R « TJ'S CHEESE & GARLIC CROUTONS\n"
"* LA GRANJA SYRAH LES CAVES JOSEPH BORDEAUX\n"
"* CHICKEN ORANGE MANDARIN W/SCE\n"
"* DARK CHOCOLATE ROCKY ROAD SQUA\n"
" \n\n"
"* RESTAURANT-STYLE WHITE CHIPS ~ SLICED TURKEY ROASTED o CRACKERS MULTIGRAIN ^ SALSA CHIPOTLE HOT -FRESH PK BOYSENBERRY PRESERVES\n"
"* CHUNKY SALTED PEANUT BUTTER\n"
"* TJ SLICED JACK W/PEPPERS «* SORRENTO SALAD TJ'S\n"
"* CHOC BAR FLD'DRKSPEC « CHOC BAR FLD DRKSPEC\n"
"$66.87\n"
"$0.91\n"
"$67.78\n"
"$67.78\n"
"SUBTOTAL STATE TAX 1 TOTAL DEBIT\n"
"PURCHASE SWIPED AUTH# 402241 INVOICE #: 5876\n"
",199,,00,\n"
"02-09-2012 06:45PM REFERENCE #: 293536\n\n\n"
"ITEMS 33 S, Christian\n"
"02-09-2012 06:43PM 0199 04 0098 5876\n"
"THANK YOU FOR SHOPPING AT TRADER JOE'S www.traderjoes.com\"\n"
"transaction-1.docx.txt,\"The Pharmacy America Trusts • Since 1901\"\"\n"
"I'm MARILYN. Thank you for allowing me to serve you today,\n"
"502 10 9032 05988 027\n"
"RFN# 0598-8279-0320-1003-2820\n"
"F SMPLY SLN ALL4.25Z 1A 7.00 SALE\n"
"F DULC0LAX 8.30Z 1A 10.00 SALE\n"
"DOVE TRTMN M0IST8Z 1A 4.00 SALE\n"
"MFG COUPON 1 1.50-MFGC\n"
"MFG COUPON 1 4.00-MFGC\n"
"MFG COUPON 1 5.00-MFGC\n"
"SUBTOTAL 10.50\n"
"A-6.85% SALES TAX TOTAL\n"
"CASH 12.00\n"
"CHANGE .06\n"
"HAG ADVERTISED SAVINGS: 7.97\n"
"MFG COUPON SAVINGS: 10.50\n"
"YOUR TOTAL SAVINGS: 18.47\n")
matches = re.finditer(regex, test_str, re.IGNORECASE | re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html