re = /^([[(](?P<series>[^_-]+?)[])])?[\s]*?(?P<author>(?! [-]).*?)([\s]+?-[\s]*)(?P<title>[^\(]*?)((?P<comments>[\(](?:[\s]*(((?P<published>[12][\d]{3,3})\s*)((?=([,\)]))))?)?((?P<prepublisher>[\w]*?[,-]?)([,]?[\s]*((?P<publisher>[-|~'&%$#"!\w\s]*(([a-zA-Z]+)[-|~'&%$#"!\w\s]*))(?!(([\d-]{11}-?([x]|[\d])[\s]*[\)])))))?(?P<isbn>([,]?[\s]*(?P<isbntag>(?i)ISBN[-]?1(?:(?P<is10>0)|3)?[:\s])?\s*(?(isbntag)(?(is10)((?=[\d-]{11}-[\d|x][\s]*[\)])(?P<isbn10>\d{1,5}[ -]\d{1,7}[ -]\d{1,6}[ -](?:\d|x)))|((?:(?=[\d-]{17}[\s]*[\)])(?P<isbn13>97(?:8|9)[ -]\d{1,5}[ -]\d{1,7}[ -]\d{1,6}[ -]\d))))|(?:(?=(([\d-]{11}-([\d]|[x]))[\s]*[\)]))(?P<ISBN10>\d{1,5}([ -])\d{1,7}[ -]\d{1,6}[ -](?:\d|x))|(?:(?=[\d-]{17}[\s]*[\)])(?P<ISBN13>97(?:8|9)[ -]\d{1,5}[ -]\d{1,7}[ -]\d{1,6}[ -]\d)))))?)(?=([\s]*[,\)])))?(?P<tooManyCommasOrOnlyDigits>.*)[\s]*[\)]))*?$/m
str = '
[serie ] author, author - this is it (1234, publish ,ISBN-10: 345-801-028-x )
[serie ] author, author - this is it (1234, publish ,ISBN-10: 345-801-028-x )
[series ] author - this is it (1234, 345-801-028-8)
[series ] author - this is it (1234, 345-801-028-x)
[series ] author - this it (1234,ddddd 345-801-028-8)
[series ] author - this is it (1234, 345-801-028-8)
[series ] author - this is bad isbn becomes publisher (9234, 345-801-028-x)
[series is messed up ]author - this is just date and isbn (1234, 978-801-028-060-7)
[series ]author - this is just date and invalid isbn (1234, 978-801-028-060-x)
[series ]author - this is invalid date (123, 1234erw978-8888801-028-060-7)
[series ]author - this is isbn-13 (978-801-028-060-7)
[series ]author - this is isbn-10 (78-801-0260-7)
[series ]author - not enough dashes (78-80150260-7)
[series ]author - this is it (12343-p978-801-0260-7)
[series ]author - this is it (19,mix textandnumbers123,978-801-028-060-7)
[series ]author - this is no date but valid isbn in pub name (1234-lk-jkj978-801-028-060-7)
[series ]author - this is it (1234,lkjkj978-801-028-060-7)
[series ]author - this is it (123,4lkjkj,978-801-028-060-7)
[series ]author - this is it (1234,jhkh987\'&%\'%$#$%&ljh,lkjnkjnlkjhljh 5545t , 978-801-028-060-7)
(,1234)
[series gets messed up ]author - this is missing date but has comma (,ISBN-13: 978-801-028-060-7)
[series ]author - this is it (1234,ISBN-10: 978-801-006-0)
[series ]author - this is it (1234,ISBN-13: 978-801-028-060-7)
[series ]author - this is it (,ISBN-10: 978-801-006-x)
[series ]author - this is no date (ISBN-10: 978-801-006-x)
[series ]author - this is dateonly (2020)
'
# Print the match result
str.scan(re) do |match|
puts match.to_s
end
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Ruby, please visit: http://ruby-doc.org/core-2.2.0/Regexp.html