# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"(?<!\d|-|\/|to|\$)(?<!\$(\d)\.)([1-9][0-9]{0,2}(\.\d{0,2})?\s?)(lbs|lb|LBS|LB|KG|kg|G|g|L|l+)(?!\d|\w)"
test_str = ("10lb Potato bag\n"
"Apples Gld Delicious 3lb\n"
"Grandfer Rocha Pear2.5lbs\n"
"CHIN.MAND.5LB\n"
"Batata Veg (10LB)\n"
"SIG 170LB99 Legging SzB 1ea\n"
"ARJO CDA1450035 SupBlk 400LB1ea\n"
"AHC EB205L \n"
"DRI 10220-1 Bariatric500lb 1 ea\n"
"TILDA SONA MASOORI 20 LBS\n"
"OVAL ROASTER 9 - 12 LBS\n"
"ORGANIC TRIPACK FRUIT 1.2 KG\n"
"KGF5KT2404 PEPLUM T, PURPLE\n"
"AIR 1773LB-XL 15-20 KnXL 1ea\n"
"PLUM BAG 1KG US\n"
"NEXT XTRA 100G\n"
"SUDOCREM 60G\n"
"EGGPLANT GRILLED IN OIL (3KG)\n"
"CORONATION GRAPE 2L CA\n"
"MEAT DEAL $11.00 KG\n"
"LIVE LOBSTER- 2.00-3.00 LB-MSC\n"
"PC MAGIC GROW 20-20-20 3.55KG\n"
"KGF7W3409 KGVELCROWB,GREY\n"
"TZATZIKI 500G\n"
"2+1 LEAD MIX")
matches = re.finditer(regex, test_str, re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html