# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"<div\s[\s\S]*?featurebullets_feature_div[\s\S]*?>\s*<div\s[\s\S]*?feature-bullets[\s\S]*?>\s*(<ul[\s\S]*?<\/ul>)[\s\S]*?<\/div>[\s\S]*?<\/div>"
test_str = ("<div id=\"featurebullets_feature_div\" class=\"feature\" data-feature-name=\"featurebullets\" data-cel-widget=\"featurebullets_feature_div\">\n"
" \n"
" \n\n\n\n\n"
" \n\n"
" \n"
" \n"
" \n"
" \n"
" \n"
" \n"
" \n"
" \n"
" \n\n\n\n"
"<div id=\"feature-bullets\" class=\"a-section a-spacing-medium a-spacing-top-small\">\n\n\n\n\n\n\n\n\n"
" \n"
" \n"
" <ul class=\"a-unordered-list a-vertical a-spacing-none\">\n"
" \n"
" \n"
" <li><span class=\"a-list-item\"> \n"
" 100% Washed Twill\n"
" \n"
" </span></li>\n"
" \n"
" <li><span class=\"a-list-item\"> \n"
" Imported\n"
" \n"
" </span></li>\n"
" \n"
" <li><span class=\"a-list-item\"> \n"
" Rubber sole\n"
" \n"
" </span></li>\n"
" \n"
" <li><span class=\"a-list-item\"> \n"
" Shaft measures approximately Low-Top\" from arch\n"
" \n"
" </span></li>\n"
" \n"
" <li><span class=\"a-list-item\"> \n"
" Heel measures approximately 0.75\"\n"
" \n"
" </span></li>\n"
" \n"
" <li><span class=\"a-list-item\"> \n"
" Platform measures approximately 0.5\"\n"
" \n"
" </span></li>\n"
" \n"
" <li><span class=\"a-list-item\"> \n"
" Slip-on canvas sneaker featuring laceless vamp and small logo tags at tongue and topline\n"
" \n"
" </span></li>\n"
" \n"
" </ul>\n"
" <!-- Loading EDP related metadata -->\n"
" \n"
" \n"
" \n\n"
" \n\n\n\n\n"
"<span class=\"edp-feature-declaration\" data-edp-feature-name=\"featurebullets\" data-edp-asin=\"B00NNA39IE\" data-data-hash=\"3757568849\" data-defects=\"[{"id":"defect-mismatch-info","value":"Different from product"},{"id":"defect-missing-information","value":"Missing information"},{"id":"defect-unessential-info","value":"Unimportant information"},{"id":"defect-other-productinfo-issue","value":"Other"}]\" data-metadata=\"CATALOG\" data-feature-container-id=\"\" data-custom-event-handler=\"\" data-display-name=\"Bullet Points\" data-edit-data-state=\"featureBulletsEDPEditData\" data-position=\"\" data-resolver=\"CQResolver\"></span>\n"
" \n"
" \n"
" \n"
" \n"
" \n"
" \n\n"
" \n\n"
" \n\n"
" \n"
"</div>\n\n"
" \n"
" \n"
" \n"
" \n"
" \n"
" \n"
" \n"
" \n"
" \n"
" \n"
" \n"
" \n"
" \n\n"
" \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n"
" </div>")
matches = re.finditer(regex, test_str, re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html