re = /<!ENTITY\s[a-z0-9]*\s"(&[a-zA-Z0-9]+;){4,}">(?:.*?<!ENTITY\s[a-z0-9]*\s"(&[a-zA-Z0-9]+;){4,}">){4,}/i
str = '<!DOCTYPE data [
<!ENTITY a0 "dos" >
<!ENTITY a1 "&a0;&a0;&a0;&a0;">
<!ENTITY a2 "&a1;&a1;&a1;&a1;&a1;&a1;">
<!ENTITY a1 "&a2;&a2;&a2;&a2;&a2;&a2;">
test
<!ENTITY a1 "&a2;&a2;&a2;&ertertert;&a2;&a2;">
<!ENTITY a1 "&a2;&a2;&a2;&ertertert;&a2;&a2;">
<!ENTITY a1 "&a2;&a2;&a2;&a2;&a2;&a2;">
d
dd
<html abc>
a
<!ENTITY a2 "&a3;&a3;&a3;&a3;&a3;">
<!ENTITY a1 "&a0;&a0;&a0;&a0;&d5;">
]>
<data>&a2;</data>'
# Print the match result
str.scan(re) do |match|
puts match.to_s
end
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Ruby, please visit: http://ruby-doc.org/core-2.2.0/Regexp.html