import re
regex = re.compile(r"<a.*?href=\"(.+?.pdf\?min)\".*?>(.*?)<\/a>", flags=re.DOTALL)
test_str = ("<table cellspacing=\"0\" cellpadding=\"0\" width=\"100%\" border=\"0\">\n"
" <tbody>\n"
" <tr>\n"
" <td width=\"25%\"><a href=\"http://www.test.com/myfile.pdf?min\" target=\"_blank\">Menus 18 €</a></div></td>\n"
" <td width=\"25%\"><a href=\"http://www.test.com/myfile.pdf?min\" target=\"_blank\">Menus 24 et 26 €</a></div></td>\n"
" <td width=\"25%\"><a href=\"http://www.test.com/myfile.pdf?min\" target=\"_blank\">Menus 30 et 37 € </a></div></td>\n"
" <td width=\"25%\"><a href=\"http://www.test.com/myfile.pdf?min\">La Carte détaillée <br>\n"
" (Entrées - Viandes - Poissons)</a></td>\n"
" </tr>\n"
" </tbody>\n"
"</table>")
subst = "ok"
result = regex.sub(subst, test_str)
if result:
print(result)
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html