import re
regex = re.compile(r"(?m)^(?: +|\t+)\+ *(?:VAR|CONST) *\w+ *=.*(?:\R^(?> +|\t+)[^+\s].*)*")
test_str = (" + VAR name1 = var indented by two spaces and the first nonspace character is '+'\n"
" + VAR name2 = var indented by 2x2 spaces\n\n"
" + VAR name3 = var indented by one \\t\n"
" + VAR name4 = the next line with \"name5\" is not valid. missing the = character, should not be matched\n"
" + VAR name5\n"
" + CONST name6 = the type could be VAR or CONST\n\n"
" + VAR multi1 = multiline value where the continuation lines\n"
" are indented (starts with two spaces or one tab) and NOT followed by the '+'\n\n"
" + VAR multi1 = multiline value\n"
" indented\n\n"
" + VAR multi1 = multiline value\n"
" indented ok too\n\n\n"
" + VAR single = this is single line\n"
" + because this line even if it is indented, the first nonspace character is '+'\n\n"
" + VAR multi2 = multiline\n"
" could be\n"
" indented\n"
" any way\n"
" and any number of times\n"
" until the first non-indented line\n\n"
"the following should NOT match\n\n"
"+ VAR some = sould not be matched, because the line isn't indented\n"
" + VAR some = sould not be matched, because the line isn't indented at least with TWO spaces or one tab\n"
" + SOME name = value not matched because the SOME isn't VAR or CONST")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html