import re
regex = re.compile(r"^(\d+) 0 obj(?:(?!^\d+ 0 obj$).)*?\/Type\s*\/Page\s.*?endobj$", flags=re.DOTALL | re.MULTILINE)
test_str = ("%PDF-1.3\n"
"%¦¦¦¦\n\n"
"1 0 obj\n"
"<<\n"
"/Type /Catalog /AcroForm << /Fields [12 0 R 13 0 R] /NeedAppearances false /SigFlags 3 /Version /1.7 /Pages 3 0 R /Names << >> /ViewerPreferences << /Direction /L2R >> /PageLayout /SinglePage /PageMode /UseNone /OpenAction [0 0 R /FitH null] /DR << /Font << /F1 14 0 R >> >> /DA (/F1 0 Tf 0 g) /Q 0 >> /Perms << /DocMDP 11 0 R >>\n"
"/Outlines 2 0 R\n"
"/Pages 3 0 R\n"
">>\n"
"endobj\n\n"
"2 0 obj\n"
"<<\n"
"/Type /Outlines\n"
"/Count 0\n"
">>\n"
"endobj\n\n"
"3 0 obj\n"
"<<\n"
"/Type /Pages\n"
"/Count 2\n"
"/Kids [ 4 0 R 6 0 R ]\n"
">>\n"
"endobj\n\n"
"4 0 obj\n"
"<<\n"
"/Type /Page\n"
"/Parent 3 0 R\n"
"/Resources <<\n"
"/Font <<\n"
"/F1 9 0 R\n"
">>\n"
"/ProcSet 8 0 R\n"
">>\n"
"/MediaBox [0 0 612.0000 792.0000]\n"
"/Contents 5 0 R\n"
">>\n"
"endobj\n\n"
"5 0 obj\n"
"<< /Length 1074 >>\n"
"stream\n"
"2 J\n"
"BT\n"
"0 0 0 rg\n"
"/F1 0027 Tf\n"
"57.3750 722.2800 Td\n"
"( A Simple PDF File ) Tj\n"
"ET\n"
"BT\n"
"/F1 0010 Tf")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html