import re
regex = re.compile(r"[“\"]([A-Za-z0-9.-][A-Za-z,:’]*(?:\s+[A-Za-z0-9.-][A-Za-z,:’]*){4,})[”\"]", flags=re.MULTILINE)
test_str = "Attorney General William Barr said the volume of information compromised was “staggering” and the largest breach in U.S. history.“This theft not only caused significant financial damage to Equifax but invaded the privacy of many, millions of Americans and imposed substantial costs and burdens on them as they had to take measures to protect themselves from identity theft,” said Mr. Barr."
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html