# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"From:\s+(?<from>.*?)\s+To:\s+(?<to>.*?)\s+Subject:\s+(?<subject>.*?)(?:\s+References: (?<ref>.*?))?$"
test_str = ("Text of variable length spread across several lines Serialization-type text separated by colons (eg ABC:DEF:GHI) A date\n\n"
"From: One line of text To: One or more lines Subject: One or more lines References: One or more lines\n\n"
"From: One line of text To: One \n"
"or \n"
"more \n"
"lines Subject: One or more lines References: One or more lines\n\n"
"From: One line of text To: One or more lines Subject: No references.\n\n"
"From: One line of text To: One or more lines Subject: One \n"
"or \n"
"more \n"
"lines \n"
"References: One or more lines\n\n"
"Paragraph 1 Title: A paragraph\n\n"
"Paragraph 2 Title: Another paragraph")
matches = re.finditer(regex, test_str, re.MULTILINE | re.DOTALL)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html