import re
regex = re.compile(r"^!*(?P<title>[A-Za-z0-9].+?)(?:\s+(?P<isbn>[0-9]{10}))?\s+by\s+(?P<author>[A-Z0-9][^(]+)(?=\s\()", flags=re.MULTILINE)
test_str = ("!!Mixed Fortunes - An Economic History of China Russia and the West 0198703635 by Vladimir Popov (Jun 17, 2014 4_1).pdf\n"
"!Mixed Fortunes - An Economic History of China Russia and the West 0198703635 by Vladimir Popov (Jun 17, 2014 4_1).pdf\n"
"Mixed Fortunes - An Economic History of China Russia and the West 0198703635 by Vladimir Popov (Jun 17, 2014 4_1).pdf\n"
"!!Mixed Fortunes - An Economic History of China Russia and the West by Vladimir Popov (Jun 17, 2014 4_1).pdf\n"
"!!Mixed Fortunes - An Economic History of China Russia and the West by 1 Vladimir Popov (Jun 17, 2014 4_1).pdf")
subst = "\\3"
result = regex.sub(subst, test_str)
if result:
print(result)
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html