import re
regex = re.compile(r"(var preferences = {)[\s\S]*(};)", flags=re.MULTILINE)
test_str = ("<!DOCTYPE html>\n"
"<html>\n"
" <head>\n"
" <meta http-equiv='x-ua-compatible' content='IE=edge,chrome=1' />\n"
" <meta name=\"robots\" content=\"nofollow\" />\n"
" <meta name=\"robots\" content=\"noindex\" />\n"
" <meta name=\"robots\" content=\"noarchive\" />\n"
" <meta http-equiv=\"Cache-Control\" content=\"private, no-cache, no-store, must-revalidate, max-age=0\" />\n"
" <meta http-equiv=\"Pragma\" content=\"no-cache\" />\n"
" <meta http-equiv=\"Expires\" content=\"0\" />\n"
" <meta http-equiv=\"Expires\" content=\"Tue, 01 Jan 1980 1:00:00 GMT\" />\n"
" <meta charset='UTF-8' />\n"
" <title>BLANK - FILL WITH CLIENT TITLE</title>\n"
" <link href='css/bpdev.css' rel='stylesheet' type='text/css' />\n"
" <script src='https://code.jquery.com/jquery-1.11.1.min.js'></script>\n"
" <script>!window.jQuery && document.write('<script src=\"common/js/jquery/jquery-1.11.1.min.js\"><\\/script>');</script>\n"
" <link rel='shortcut icon' href='images/favicon.ico' type='image/x-icon' />\n"
" <style id=\"antiClickjack\">body{display:none !important;}</style>\n"
" </head>\n"
" <body id='userTools'>\n"
" <div id='notificationContainer' data-automation='notification_window'></div>\n"
" <div id='overlay' class='modalOverlay' data-automation='modal_overlay'></div>\n"
" <div id='dialog' class='modal' data-automation='modal_window'></div>\n"
" <div id='container'>\n"
" <div id='header' data-automation='header'></div>\n"
" <div id='loader' data-automation='content_loader'></div>\n"
" <div id='content' data-automation='content_area'></div>\n"
" </div>\n"
" <div id='copyFooter' class='copyright' data-automation='copyright_footer'></div>\n"
" <script src='common/js/gateway.framework-min.js' type='text/javascript'></script>\n"
" <script src='common/js/gateway.app-min.js' type='text/javascript'></script>\n"
" <script type='text/javascript'>\n"
" if (self === top) {\n"
" var antiClickjack = document.getElementById('antiClickjack');\n"
" antiClickjack.parentNode.removeChild(antiClickjack);\n"
" } else {\n"
" top.location = self.location;\n"
" }\n"
" (function($) {\n"
" var preferences = {\n"
" logo : 'images/logo.png',\n"
" logoWidth : 250,\n"
" logoHeight : 38,\n"
" logoAlt : '',\n"
" logoLink : '',\n"
" logoutURL : '../../pkmslogout'\n"
" };\n"
" Gateway.startGateway(preferences, true);\n"
" }(jQuery));\n"
" </script>\n"
" </body>\n"
"</html>")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html