# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"^\W|^\d|^[A-Z ]+$|^[tT]able.+|^[Ff]igure.+|^[fF][aA][Xx].+|^[Ee]mail.+|^EMAIL.+|\.[\w]+\.|\d[\+\-\\\*\/]\d|[\(\)\+\-\\\*\/] [\(\)\+\-\\\*\/]"
test_str = ("AL SJ \n"
"alias56 45 5\n"
"23 \n\n"
"table 3\n"
"I love my mom (YES)\n"
"(123)\n\n"
"EMAIL\n"
"email :\n"
"Fig.2.3.\n"
"a.b.b.\n\n"
"Model ES2/50/600 ALD Vacuum Technology will be operated in December 2007\n\n"
"International Price Ex) \\1,000,000\n\n"
"(1985) An almost ideal demand system for visitor expenditures, Journal of Transport Economics and Policy, Vol.19, No.2, 161-171\n\n\n"
"Then he gave advice to Morgenstern not to pay too much respect to the Austrian tradition and to go on with his own belief of the impossibility of forecasting\n\n"
"Head Office & Plant: 3-20, 1-chome, Higashisuna, Koto-ku, Tokyo 136-0074\n\n"
"micro-amanlysis of ther fh ehif wnf . ejf-e weew e$ kgejg ^%&%^ ekjwe843759843 iy3ty dfgmdbkjjkdg73 dg gh\n\n"
"This study applies the Repeated Discrete Choice Model (hereafter RDC) as one of (.\n\n"
".")
matches = re.finditer(regex, test_str, re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html