import re
regex = re.compile(r"^([[(](?P<series>[^_-]+?)[])])?[\s]*?(?P<author>(?! [-]).*?)([\s]+?-[\s]*)(?P<title>[^\(]*?)((?P<comments>[\(](?:[\s]*(((?P<published>[12][\d]{3,3})\s*)((?=([,\)]))))?)?((?P<prepublisher>[\w]*?[,-]?)([,]?[\s]*((?P<publisher>[-|~'&%$#\"!\w\s]*(([a-zA-Z]+)[-|~'&%$#\"!\w\s]*))(?!(([\d-]{11}-?([x]|[\d])[\s]*[\)])))))?(?P<isbn>([,]?[\s]*(?P<isbntag>(?i)ISBN[-]?1(?:(?P<is10>0)|3)?[:\s])?\s*(?(isbntag)(?(is10)((?=[\d-]{11}-[\d|x][\s]*[\)])(?P<isbn10>\d{1,5}[ -]\d{1,7}[ -]\d{1,6}[ -](?:\d|x)))|((?:(?=[\d-]{17}[\s]*[\)])(?P<isbn13>97(?:8|9)[ -]\d{1,5}[ -]\d{1,7}[ -]\d{1,6}[ -]\d))))|(?:(?=(([\d-]{11}-([\d]|[x]))[\s]*[\)]))(?P<ISBN10>\d{1,5}([ -])\d{1,7}[ -]\d{1,6}[ -](?:\d|x))|(?:(?=[\d-]{17}[\s]*[\)])(?P<ISBN13>97(?:8|9)[ -]\d{1,5}[ -]\d{1,7}[ -]\d{1,6}[ -]\d)))))?)(?=([\s]*[,\)])))?(?P<tooManyCommasOrOnlyDigits>.*)[\s]*[\)]))*?$", flags=re.MULTILINE)
test_str = ("\n\n"
"[serie ] author, author - this is it (1234, publish ,ISBN-10: 345-801-028-x )\n"
"[serie ] author, author - this is it (1234, publish ,ISBN-10: 345-801-028-x )\n\n"
"[series ] author - this is it (1234, 345-801-028-8)\n"
"[series ] author - this is it (1234, 345-801-028-x)\n\n"
"[series ] author - this it (1234,ddddd 345-801-028-8)\n"
"[series ] author - this is it (1234, 345-801-028-8)\n\n"
"[series ] author - this is bad isbn becomes publisher (9234, 345-801-028-x)\n\n"
"[series is messed up ]author - this is just date and isbn (1234, 978-801-028-060-7)\n"
"[series ]author - this is just date and invalid isbn (1234, 978-801-028-060-x)\n"
"[series ]author - this is invalid date (123, 1234erw978-8888801-028-060-7)\n"
"[series ]author - this is isbn-13 (978-801-028-060-7)\n"
"[series ]author - this is isbn-10 (78-801-0260-7)\n"
"[series ]author - not enough dashes (78-80150260-7)\n"
"[series ]author - this is it (12343-p978-801-0260-7)\n\n"
"[series ]author - this is it (19,mix textandnumbers123,978-801-028-060-7)\n\n"
"[series ]author - this is no date but valid isbn in pub name (1234-lk-jkj978-801-028-060-7)\n"
"[series ]author - this is it (1234,lkjkj978-801-028-060-7)\n"
"[series ]author - this is it (123,4lkjkj,978-801-028-060-7)\n\n"
"[series ]author - this is it (1234,jhkh987'&%'%$#$%&ljh,lkjnkjnlkjhljh 5545t , 978-801-028-060-7)\n"
"(,1234)\n\n"
"[series gets messed up ]author - this is missing date but has comma (,ISBN-13: 978-801-028-060-7)\n"
"[series ]author - this is it (1234,ISBN-10: 978-801-006-0)\n"
"[series ]author - this is it (1234,ISBN-13: 978-801-028-060-7)\n"
"[series ]author - this is it (,ISBN-10: 978-801-006-x)\n"
"[series ]author - this is no date (ISBN-10: 978-801-006-x)\n\n\n"
"[series ]author - this is dateonly (2020)\n")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html