# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"^[ \t]*$"
test_str = ("This is the\n"
"first sentence.\n\n"
"This is a really long sentence\n"
"and it just keeps going across many\n"
"rows there will not necessarily be \n"
"punctuation\n"
"or consistency in word length\n"
"the only difference in ending sentence\n"
"is the next row will be blank\n\n"
"here would be the third sentence\n"
"as \n"
"you see\n"
"the blanks between rows of data \n"
"help define what a sentence is\n\n"
"this would be sentence 4\n"
"i want to pull data\n"
"from text file\n"
"as such (in sentences) \n"
"where sentences are defined with\n"
"blank records in between\n\n"
"this would be sentence 5 since blank row above it\n"
"and continues but ends because blank row(s) below it")
matches = re.finditer(regex, test_str, re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html