import re
regex = re.compile(r"""
<a
\s+
((?:[^\s<>'"=]+(?:=(?:"[^"]*"|'[^']*'|[^\s'">]+))?\s+)*)
href=(?|"([^"]*\.(?:jpe?g|png))"|'([^']*\.(?:jpe?g|png))'|([^\s'">]*\.(?:jpe?g|png))(?=[>\s]))
((?:\s+[^\s<>'"=]+(?:=(?:"[^"]*"|'[^']*'|[^\s'">]+))?)*)
\s*>
(.*?)
</a>
""", flags=re.MULTILINE | re.IGNORECASE | re.DOTALL | re.VERBOSE)
test_str = ("<p>С этой формы приходят заявки <a href=\"https://site.com/lack_tech.php\">https://site.com/lack_tech.php</a></p>\n"
"<p>Или что ты имеешь ввиду?</p>\n"
"<div class=\"attachment_files_message\"> \n"
" <p>Прикреплённые файлы:</p>\n"
" <a href=\"http://site.com/public/uploads/kylticket/2670/Screenshot_1.png\" target=\"_blank\">Screenshot_1.png</a>\n"
"<a href=http://site.com/public/uploads/kylticket/2670/Screenshot_1.png target=\"_blank\">Screenshot_1.png</a>\n"
" <a\n"
" target=\"_blank\"\n"
" href=\"http://site.com/public/uploads/kylticket/2670/Screenshot_1.png\" >Screenshot_1.png</a>\n"
"</div>")
subst = "<a $1$3 data-fancybox=\"gallery\" href=\"$2\"><img src=\"$2\" alt=\"\" class=\"tmp_class\"></a>"
result = regex.sub(subst, test_str)
if result:
print(result)
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html