import re
regex = re.compile(r"<td class=\"name\">\n\s+<a[^>]+>(.*)(?=<\/a>)")
test_str = ("<header class=\"ipl-header\">\n"
" <div class=\"ipl-header__content\">\n"
" <h4 name=\"producers\" id=\"producers\" class=\"ipl-header__content ipl-list-title\">\n"
" Produced by\n"
" </h4>\n"
" </div>\n"
" <a class=\"ipl-header__edit-link\" href=\"https://contribute.imdb.com/updates?update=tt2527336:producers\">Edit</a>\n"
"</header>\n"
"<table class=\"subpage_data spFirst crew_list\">\n"
" <tbody>\n"
" <tr class=\"even\">\n"
" <td class=\"name\">\n"
" <a href=\"/name/nm0009190/?ref_=tt_rv\"\n"
" >J.J. Abrams</a>\n"
" </td>\n"
" <td>...</td>\n"
" <td>executive producer</td>\n"
" </tr>\n"
" <tr class=\"odd\">\n"
" <td class=\"name\">\n"
" <a href=\"/name/nm0027297/?ref_=tt_rv\"\n"
" >Pippa Anderson</a>\n"
" </td>\n"
" <td>...</td>\n"
" <td>co-producer</td>\n"
" </tr>\n"
" </tbody>\n"
"</table>")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html