use strict;
my $str = '
[serie ] author, author - this is it (1234, publish ,ISBN-10: 345-801-028-x )
[serie ] author, author - this is it (1234, publish ,ISBN-10: 345-801-028-x )
[series ] author - this is it (1234, 345-801-028-8)
[series ] author - this is it (1234, 345-801-028-x)
[series ] author - this it (1234,ddddd 345-801-028-8)
[series ] author - this is it (1234, 345-801-028-8)
[series ] author - this is bad isbn becomes publisher (9234, 345-801-028-x)
[series is messed up ]author - this is just date and isbn (1234, 978-801-028-060-7)
[series ]author - this is just date and invalid isbn (1234, 978-801-028-060-x)
[series ]author - this is invalid date (123, 1234erw978-8888801-028-060-7)
[series ]author - this is isbn-13 (978-801-028-060-7)
[series ]author - this is isbn-10 (78-801-0260-7)
[series ]author - not enough dashes (78-80150260-7)
[series ]author - this is it (12343-p978-801-0260-7)
[series ]author - this is it (19,mix textandnumbers123,978-801-028-060-7)
[series ]author - this is no date but valid isbn in pub name (1234-lk-jkj978-801-028-060-7)
[series ]author - this is it (1234,lkjkj978-801-028-060-7)
[series ]author - this is it (123,4lkjkj,978-801-028-060-7)
[series ]author - this is it (1234,jhkh987\'&%\'%$#$%&ljh,lkjnkjnlkjhljh 5545t , 978-801-028-060-7)
(,1234)
[series gets messed up ]author - this is missing date but has comma (,ISBN-13: 978-801-028-060-7)
[series ]author - this is it (1234,ISBN-10: 978-801-006-0)
[series ]author - this is it (1234,ISBN-13: 978-801-028-060-7)
[series ]author - this is it (,ISBN-10: 978-801-006-x)
[series ]author - this is no date (ISBN-10: 978-801-006-x)
[series ]author - this is dateonly (2020)
';
my $regex = qr/^([[(](?P<series>[^_-]+?)[])])?[\s]*?(?P<author>(?! [-]).*?)([\s]+?-[\s]*)(?P<title>[^\(]*?)((?P<comments>[\(](?:[\s]*(((?P<published>[12][\d]{3,3})\s*)((?=([,\)]))))?)?((?P<prepublisher>[\w]*?[,-]?)([,]?[\s]*((?P<publisher>[-|~'&%$#"!\w\s]*(([a-zA-Z]+)[-|~'&%$#"!\w\s]*))(?!(([\d-]{11}-?([x]|[\d])[\s]*[\)])))))?(?P<isbn>([,]?[\s]*(?P<isbntag>(?i)ISBN[-]?1(?:(?P<is10>0)|3)?[:\s])?\s*(?(isbntag)(?(is10)((?=[\d-]{11}-[\d|x][\s]*[\)])(?P<isbn10>\d{1,5}[ -]\d{1,7}[ -]\d{1,6}[ -](?:\d|x)))|((?:(?=[\d-]{17}[\s]*[\)])(?P<isbn13>97(?:8|9)[ -]\d{1,5}[ -]\d{1,7}[ -]\d{1,6}[ -]\d))))|(?:(?=(([\d-]{11}-([\d]|[x]))[\s]*[\)]))(?P<ISBN10>\d{1,5}([ -])\d{1,7}[ -]\d{1,6}[ -](?:\d|x))|(?:(?=[\d-]{17}[\s]*[\)])(?P<ISBN13>97(?:8|9)[ -]\d{1,5}[ -]\d{1,7}[ -]\d{1,6}[ -]\d)))))?)(?=([\s]*[,\)])))?(?P<tooManyCommasOrOnlyDigits>.*)[\s]*[\)]))*?$/mp;
if ( $str =~ /$regex/g ) {
print "Whole match is ${^MATCH} and its start/end positions can be obtained via \$-[0] and \$+[0]\n";
# print "Capture Group 1 is $1 and its start/end positions can be obtained via \$-[1] and \$+[1]\n";
# print "Capture Group 2 is $2 ... and so on\n";
}
# ${^POSTMATCH} and ${^PREMATCH} are also available with the use of '/p'
# Named capture groups can be called via $+{name}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Perl, please visit: http://perldoc.perl.org/perlre.html