#include <StringConstants.au3> ; to declare the Constants of StringRegExp
#include <Array.au3> ; UDF needed for _ArrayDisplay and _ArrayConcatenate
Local $sRegex = "(?xis)(?(DEFINE)" & @CRLF & _
" (?<XHTML>" & @CRLF & _
" (?&comment)*\s*" & @CRLF & _
" (?&DOCTYPE)" & @CRLF & _
" (?&comment)*\s*" & @CRLF & _
" (?&HTML)" & @CRLF & _
" (?&comment)*\s*" & @CRLF & _
" )" & @CRLF & _
"" & @CRLF & _
" # HTML element" & @CRLF & _
" (?<HTML>" & @CRLF & _
" <html(?&attrs)?>(?&content)<\/html>" & @CRLF & _
" )" & @CRLF & _
"" & @CRLF & _
" # Match content" & @CRLF & _
" (?<content>" & @CRLF & _
" \s*" & @CRLF & _
" (?:" & @CRLF & _
" ((?&tag) | [^<>]+)\s*" & @CRLF & _
" )*" & @CRLF & _
" )" & @CRLF & _
"" & @CRLF & _
" # General tag" & @CRLF & _
" (?<tag>" & @CRLF & _
" <((?&tagname))(?&attrs)?\s*(?:" & @CRLF & _
" \/>|" & @CRLF & _
" >\s*(?&content)\s*" & @CRLF & _
" <\/\g'-1'>" & @CRLF & _
" )|(?&comment)" & @CRLF & _
" )" & @CRLF & _
"" & @CRLF & _
" # Attributes" & @CRLF & _
" (?<attrs>\s+" & @CRLF & _
" # The name" & @CRLF & _
" (?&keyword) (" & @CRLF & _
" \s*=\s*" & @CRLF & _
" (?:" & @CRLF & _
" (?&keyword)|(?&string)" & @CRLF & _
" )" & @CRLF & _
" )?" & @CRLF & _
" (?&attrs)?" & @CRLF & _
" )" & @CRLF & _
"" & @CRLF & _
" (?<string>" & @CRLF & _
" "(?:\\.|.)+?"|" & @CRLF & _
" '(?:\\.|.)+?'" & @CRLF & _
" )" & @CRLF & _
"" & @CRLF & _
" (?<comment>" & @CRLF & _
" \s*<!--(.+?)-->\s*" & @CRLF & _
" )" & @CRLF & _
"" & @CRLF & _
" # Match keyword" & @CRLF & _
" (?<keyword>[^\s\/>"'=]+)" & @CRLF & _
" # Match tag name" & @CRLF & _
" (?<tagname>(?!xml)[A-Za-z_][A-Za-z\d_.-]*)" & @CRLF & _
"" & @CRLF & _
" # DOCTYPE expression" & @CRLF & _
" (?<DOCTYPE>" & @CRLF & _
" <!doctype\s+x?html\s*(public\s*(?&string))?(\s+(?&string))*>" & @CRLF & _
" )" & @CRLF & _
")" & @CRLF & _
"" & @CRLF & _
"^\s*(?&XHTML)\s*$"
Local $sString = "<!-- test -->" & @CRLF & _
"<!DOCTYPE html " & @CRLF & _
" PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"" & @CRLF & _
" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">" & @CRLF & _
" <html goat:style="''inva'lid css I know">textttt<head></head><body><div class="onfoodstamps"><div class="upper">foo<p>ayyy</p>bar</div>baz</div><br/></head></html>"
Local $aArray = StringRegExp($sString, $sRegex, $STR_REGEXPARRAYGLOBALFULLMATCH)
Local $aFullArray[0]
For $i = 0 To UBound($aArray) -1
_ArrayConcatenate($aFullArray, $aArray[$i])
Next
$aArray = $aFullArray
; Present the entire match result
_ArrayDisplay($aArray, "Result")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for AutoIt, please visit: https://www.autoitscript.com/autoit3/docs/functions/StringRegExp.htm