# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"[\u0027\u02B9\u02BB\u02BC\u02BE\u02C8\u02EE\u0301\u0313\u0315\u055A\u05F3\u07F4\u07F5\u1FBF\u2018\u2019\u2032\uA78C\uFF07]"
test_str = ("' - 0027 APOSTROPHE\n"
"ʹ - 02B9 MODIFIER LETTER PRIME\n"
"ʻ - 02BB MODIFIER LETTER TURNED COMMA\n"
"ʼ - 02BC MODIFIER LETTER APOSTROPHE\n"
"ʾ - 02BE MODIFIER LETTER RIGHT HALF RING\n"
"ˈ - 02C8 MODIFIER LETTER VERTICAL LINE\n"
"ˮ - 02EE MODIFIER LETTER DOUBLE APOSTROPHE\n"
"́ - 0301 COMBINING ACUTE ACCENT\n"
"̓ - 0313 COMBINING COMMA ABOVE\n"
"̕ - 0315 COMBINING COMMA ABOVE RIGHT\n"
"՚ - 055A ARMENIAN APOSTROPHE\n"
"׳ - 05F3 HEBREW PUNCTUATION GERESH\n"
"ߴ - 07F4 NKO HIGH TONE APOSTROPHE\n"
"ߵ - 07F5 NKO LOW TONE APOSTROPHE\n"
"᾿ - 1FBF GREEK PSILI\n"
"‘ - 2018 LEFT SINGLE QUOTATION MARK\n"
"’ - 2019 RIGHT SINGLE QUOTATION MARK\n"
"′ - 2032 PRIME\n"
"ꞌ - A78C LATIN SMALL LETTER SALTILLO\n"
"' - FF07 FULLWIDTH APOSTROPHE")
matches = re.finditer(regex, test_str, re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html