import re
regex = re.compile(r"(?<=statement: )([ _\-|0-9,:;\.=A-Za-z\(\)\"\n\t'].+?)(?=\d{4}-\d{2}-\d{2}|$)", flags=re.DOTALL)
test_str = ("2015-05-20 02:27:41 GMT 555bbcfd.3685 cmsmgt001 LOG: statement: COMMIT\n"
"2015-05-20 02:27:41 GMT 555bcdb3.3908 cmsmgt001 LOG: statement: BEGIN\n"
"2015-05-20 02:27:41 GMT 555bcdb3.3908 cmsmgt001 LOG: statement: SET search_path TO pC35126D4,public;\n"
"2015-05-20 02:27:41 GMT 555bcdb3.3908 cmsmgt001 LOG: statement: SELECT cms_images.id, cms_images.url, cms_images.uploaded_filename, cms_images.size, cms_images.width, cms_images.height, cms_images.ext, cms_images.data, cms_images.hash_sha1, cms_images._created, cms_images._modified, cms_images._hash \n"
"FROM cms_images \n"
"WHERE cms_images.id = 912148443466212857204723 \n"
"LIMIT 1\n"
"2015-05-20 02:27:41 GMT 555bcdb3.3908 cmsmgt001 LOG: statement: ROLLBACK\n"
"2015-05-20 02:27:41 GMT 555bbb9f.364b cmsmgt001 LOG: statement: BEGIN\n"
"2015-05-20 02:27:41 GMT 555bbb9f.364b cmsmgt001 LOG: statement: SET search_path TO pC35126D4,public;\n"
"2015-05-20 02:27:41 GMT 555bbb9f.364b cmsmgt001 LOG: statement: SELECT cms_images.id, cms_images.url, cms_images.uploaded_filename, cms_images.size, cms_images.width, cms_images.height, cms_images.ext, cms_images.data, cms_images.hash_sha1, cms_images._created, cms_images._modified, cms_images._hash \n"
"FROM cms_images \n"
"WHERE cms_images.hash_sha1 = '035f3fac7dd057bac10f2203ecb8ea6a97d2116c' \n"
"LIMIT 1\n"
"2015-05-20 02:27:41 GMT 555bbb9f.364b cmsmgt001 LOG: statement: ROLLBACK\n"
"2015-05-20 02:27:41 GMT 555bc0dd.372c cmsmgt001 LOG: statement: BEGIN\n"
"2015-05-20 02:27:41 GMT 555bc0dd.372c cmsmgt001 LOG: statement: SET search_path TO pC35126D4,public;\n"
"2015-05-20 02:27:41 GMT 555bc0dd.372c cmsmgt001 LOG: statement: SELECT cms_images.id, cms_images.url, cms_images.uploaded_filename, cms_images.size, cms_images.width, cms_images.height, cms_images.ext, cms_images.data, cms_images.hash_sha1, cms_images._created, cms_images._modified, cms_images._hash \n"
"FROM cms_images \n"
"WHERE cms_images.hash_sha1 = '035f3fac7dd057bac10f2203ecb8ea6a97d2116c' \n"
"LIMIT 1\n"
"2015-05-20 02:27:41 GMT 555bc0dd.372c cmsmgt001 LOG: statement: ROLLBACK\n"
"2015-05-20 02:27:41 GMT 555bbc90.3665 cmsmgt001 LOG: statement: BEGIN\n"
"2015-05-20 02:27:41 GMT 555bbc90.3665 cmsmgt001 LOG: statement: SET search_path TO pC35126D4,public;\n"
"2015-05-20 02:27:41 GMT 555bbc90.3665 cmsmgt001 LOG: statement: SELECT cms_celebrities.id, cms_celebrities.vendor, cms_celebrities.tms_person_id, cms_celebrities.birth_year, cms_celebrities.first_name, cms_celebrities.last_name, cms_celebrities.display_name, cms_celebrities.image_id, cms_celebrities._created, cms_celebrities._modified, cms_celebrities._hash \n"
"FROM cms_celebrities \n"
"WHERE cms_celebrities.tms_person_id = '47088' \n"
"LIMIT 1\n"
"2015-05-20 02:27:41 GMT 555bbc90.3665 cmsmgt001 LOG: statement: ROLLBACK\n"
"2015-05-20 02:27:41 GMT 555bbcc8.3670 cmsmgt001 LOG: statement: BEGIN\n"
"2015-05-20 02:27:41 GMT 555bbcc8.3670 cmsmgt001 LOG: statement: SET search_path TO pC35126D4,public;\n"
"2015-05-20 02:27:41 GMT 555bbcc8.3670 cmsmgt001 LOG: statement: select nextval('\"cms_celebrities_id_seq\"')\n"
"2015-05-20 02:27:41 GMT 555bbcc8.3670 cmsmgt001 LOG: statement: INSERT INTO cms_celebrities (id, vendor, tms_person_id, birth_year, first_name, last_name, display_name, image_id, _created, _modified, _hash) VALUES (1571, 'gracenote', '47088', 1925, 'Monique', 'Van Vooren', 'Monique Van Vooren', 1701, '2015-05-20T02:27:41.768803'::timestamp, '2015-05-20T02:27:41.768803'::timestamp, '2015-05-20 02:27:41.768803|7111|0.7293680991519143')\n"
"2015-05-20 02:27:41 GMT 555bbcc8.3670 cmsmgt001 LOG: statement: COMMIT\n"
"2015-05-20 02:27:41 GMT 555bbd8b.3698 cmsmgt001 LOG: statement: BEGIN\n"
"2015-05-20 02:27:41 GMT 555bbd8b.3698 cmsmgt001 LOG: statement: SET search_path TO pC35126D4,public;\n"
"2015-05-20 02:27:41 GMT 555bbd8b.3698 cmsmgt001 LOG: statement: SELECT cms_asset_celebrities.id, cms_asset_celebrities.asset_id, cms_asset_celebrities.celebrity_id, cms_asset_celebrities.role, cms_asset_celebrities.order_pos, cms_asset_celebrities._created, cms_asset_celebrities._modified, cms_asset_celebrities._hash \n"
"FROM cms_asset_celebrities \n"
"WHERE celebrity_id=1571 \n"
"LIMIT 1")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html