# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"\s*<div\sid=\"descriptionAndDetails[\s\S]*?<div\sid=\"productDescription[\s\S]*?(<p>[\s\S]*?[\s\S]*?\s*)<\/div>"
test_str = ("<div id=\"descriptionAndDetails\" class=\"a-section a-spacing-extra-large\">\n"
" <div id=\"productDescription_feature_div\" class=\"feature\" data-feature-name=\"productDescription\" data-cel-widget=\"productDescription_feature_div\">\n"
" \n"
" \n\n\n\n\n"
" \n\n"
" \n"
" \n"
" \n"
" \n"
" \n"
" \n"
" \n"
" \n"
" \n\n\n\n"
" \n"
" <div id=\"productDescription_feature_div\" data-feature-name=\"productDescription\" data-template-name=\"productDescription\" class=\"a-row feature\" data-cel-widget=\"productDescription_feature_div\">\n"
" \n"
" \n"
" \n"
" <div class=\"a-divider a-divider-section\"><div class=\"a-divider-inner\"></div></div>\n"
" \n"
" \n"
" <h2 class=\"default\">\n"
" Product description\n"
" \n"
" </h2>\n"
" \n"
" \n"
" \n\n"
" \n"
" \n"
" \n"
" \n"
" \n\n\n\n\n\n"
" <div id=\"productDescription\" class=\"a-section a-spacing-small\">\n"
" \n\n\n\n\n\n"
" \n"
" \n"
" \n\n\n"
" \n"
" \n"
" \n"
" \n"
" \n\n"
" \n"
" \n"
" \n"
" \n"
" \n"
" <!-- show up to 2 reviews by default --> \n"
" \n"
" \n"
" \n"
" \n"
" <p>Router bits ideal for cutting mortises Each of these router bits is constructed with M2 high speed steel to run cooler - Visit Harbor Freight Tools For More Information.\n"
" \n"
" </p>\n"
" \n"
" \n"
" \n"
" \n"
" \n"
" \n"
" \n"
" \n"
" \n"
" \n"
" </div>\n\n"
" \n"
" <style type=\"text/css\">\n"
"#productDescription {\n"
" color: #333333;\n"
" word-wrap: break-word;\n"
" font-size: small;\n"
" line-height: initial;\n"
" margin: 0.5em 0px 0em 25px;\n"
"}\n\n"
"#productDescription_feature_div > h2.default {\n"
" color: #CC6600;\n"
" font-size: medium;\n"
" margin: 0 0 0.25em;\n"
"}\n\n"
"#productDescription_feature_div > h2.books {\n"
" color:#333 !important;\n"
" font-size:21px !important;\n"
" line-height: 1.3;\n"
" padding-bottom: 4px;\n"
" font-weight: normal;\n"
" margin: 0px;\n"
"}\n\n"
"#productDescription_feature_div > h2.softlines {\n"
" color:#333 !important; \n"
" font-size:21px !important;\n"
" line-height: 1.3;\n"
" padding-bottom: 4px;\n"
" font-weight: bold;\n"
" margin: 0px;\n"
"}\n"
"#productDescription > p, #productDescription > div, #productDescription > table {\n"
" margin: 0 0 1em 0;\n"
"}\n\n"
"#productDescription p {\n"
" margin: 0em 0 1em 1em;\n"
"}\n\n"
"#productDescription h3 {\n"
" font-weight: normal;\n"
" color: #333333;\n"
" font-size: 1.23em;\n"
" clear: left;\n"
" margin: 0.75em 0px 0.375em -15px;\n"
"}\n\n"
"#productDescription table {\n"
" border-collapse: inherit !important;\n"
" margin-bottom: 0;\n"
"}\n\n"
"#productDescription table img {\n"
" max-width: inherit !important;\n"
"}\n\n"
"#productDescription table td {\n"
" font-size: small;\n"
" vertical-align: inherit !important;\n"
"}\n\n"
"#productDescription ul li {\n"
" margin: 0 0 0 20px;\n"
"}\n\n"
"#productDescription ul li ul {\n"
" list-style-type: disc !important;\n"
" margin-left: 20px !important;\n"
"}\n\n"
"#productDescription ul ul li {\n"
" list-style-type: disc !important;\n"
" margin-left: 20px !important;\n"
"}\n\n"
"#productDescription > ul ul li {\n"
" list-style-type: disc !important;\n"
"} \n\n\n"
"#productDescription ul li ul li {\n"
" margin: 0 0 0 20px;\n"
"}\n\n"
"#productDescription .aplus p {\n"
" margin: 0 0 1em 0;\n"
"}\n\n"
"#productDescription small {\n"
" font-size: smaller;\n"
"}\n\n"
"#productDescription.prodDescWidth {\n"
" max-width: 1000px\n"
"}\n\n"
"</style>\n\n"
"<!-- Used to set table width because AUI is overriding the width attribute of the tables coming in description -->\n"
"<script type=\"text/javascript\">\n"
"P.when('jQuery').execute(function($){\n"
" $(\"#productDescription table\").each(function() {\n"
" var width = $(this).attr('width');\n"
" if (width) width += 'px';\n"
" else width = 'auto';\n"
" $(this).css('width', width);\n\n"
" var padding = $(this).attr('cellpadding');\n"
" if (padding) padding += 'px';\n"
" else padding = '0px';\n"
" $(this).css('padding', padding);\n"
" });\n"
"});\n"
"</script>\n\n"
" \n\n\n\n"
" \n"
" \n"
" \n"
" \n"
" </div>\n\n\n"
" \n"
" \n"
" \n"
" \n"
" \n"
" \n"
" \n"
" \n"
" \n"
" \n"
" \n"
" \n"
" \n\n"
" \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n"
" </div>\n"
"</div>")
matches = re.finditer(regex, test_str, re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html