# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"‘(?!(?:“[^”\n]*”|[^‘”\n])*’(?![a-z]))"
test_str = ("“double-quote,” and some text\n"
"Some text and “double-quote” and an ‘orphan\n"
"“double-quote,” some text and ‘matched l/rsquo’\n"
"Some text and “double-quote” and an ‘orphan and an ’elided word\n"
"“double-quote,” ‘matched l/rsquo’ and an ‘orphan and ’elided word\n"
"Some text, “double-quote,” more text, an ‘orphan and a contract’n\n"
"“double-quote,” some text, and another “double quote.”\n"
"“double-quote,” some text, an ‘orphan and “another double-quote”\n"
"“double-quote,” a ‘matched l/rsquo with an “embedded double-quote” in it.’\n"
"“double-quote,” a ‘matched l/rsquo with an “embedded double-quote” in it’ and an ‘orphan.\n"
"“double-quote,” some text, an ‘orphan, a contract’n, and “another double-quote”\n"
"“double-quote,” some text, another “double-quote,” an ‘orphan and a contract’n\n"
"“double-quote” and “another double-quote” and an ‘orphan and ‘another lsquo\n"
"“double-quote” and some text and an ‘orphan and ‘a matched l/rsquo’\n"
"Some text and “double-quote” and an ‘orphan and ‘a matched l/rsquo with a contract’n’\n"
"some text and an ‘orphan\n"
"some text and an ‘orphan and an ’elided word\n"
"some text and an ‘orphan and a contract’n\n"
"some text ‘orphan, another ‘lsquo, and more text\n"
"some text ‘orphan, a contract’n, another ‘lsquo and ’elided word and more text\n"
"some text, an ‘orphan and a “double-quote with ‘matching l/rsquo’” and more text\n\n"
"Some text, “double quote, with an ‘orphan inside.”\n"
"Some text, “double quote, with a ‘matched l/rsquo inside.’”\n"
"Some text, “double quote, with an ‘orphan and contract’n inside.”\n"
"Some text, “double quote, with an ‘orphan and ’elided word inside.”\n"
"Some text, “double quote, with an ‘orphan and ‘matched l/rsquo inside.’”\n"
"“‘Orphan immediately following double-quote.”\n"
"“‘Matching l/rsquo immediately inside double-quote.’”\n"
"“‘Orphan immediately following double-quote with ‘matched l/rsquo’ inside.”\n"
"“Double quote,” another “double quote with an ‘orphan and ‘matched l/rsquo’”\n"
"“double quote,” another “double quote with an ‘orphan and contract’n and ‘matched l/rsquo’ and more text”")
matches = re.finditer(regex, test_str, re.MULTILINE | re.IGNORECASE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html