import re
regex = re.compile(r"(?s)(?:<td[^<]*>|\G(?!^))(?:<[^<]+>)?(?!\s+)([^<]*)(?:<[^<]+>)?")
test_str = ("<table class=\"fiche_table_caracter\"><tbody>\n"
"<tr>\n"
" <td class=\"caracteristique\"><strong>Design</strong></td>\n"
" <td>Classique (full tactile)</td>\n"
"</tr>\n\n"
"<tr>\n"
" <td class=\"caracteristique\"><strong>Système d'exploitation (OS)</strong></td>\n"
" <td>iOS</td>\n"
"</tr>\n"
"<tr>\n"
" <td class=\"caracteristique\"><strong>Ecran</strong></td>\n"
" <td>4,7'' (1334 x 750 pixels)<br />16 millions de couleurs</td>\n"
"</tr>\n"
"<tr>\n"
" <td class=\"caracteristique\"><strong>Mémoire interne</strong></td>\n"
" <td>128 Go, 1 Go RAM</td>\n"
"</tr>\n"
"<tr>\n"
" <td class=\"caracteristique\"><strong>Appareil photo</strong></td>\n"
" <td>8 mégapixels</td>\n"
"</tr>\n"
"</tbody>\n"
"</table>")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html