#include <StringConstants.au3> ; to declare the Constants of StringRegExp
#include <Array.au3> ; UDF needed for _ArrayDisplay and _ArrayConcatenate
Local $sRegex = "(?mXx)(?<!(?<formula_character>[\w\-+])) # must start at a word/formula boundary" & @CRLF & _
"(?=[\w\-+@=<>()[\]\/.]{3}) # minimum number of characters" & @CRLF & _
"(?<unit> # a molecular unit" & @CRLF & _
" (?: #multiple instances of:" & @CRLF & _
" (?: # option 1: a single element component, eg. 35Cl3-" & @CRLF & _
" (?: # optional mass number as preceeding superscript" & @CRLF & _
" (?<isotope>[1-9]\d{0,2})" & @CRLF & _
" | (?:<sup>)(?&isotope)(?:<\/sup>)" & @CRLF & _
" )? " & @CRLF & _
"" & @CRLF & _
" (?: # optional atomic number as preceeding subscript" & @CRLF & _
" (?<species>[1-9]\d{0,2})" & @CRLF & _
" | (?:<sub>)(?&species)(?:<\/sub>)" & @CRLF & _
" )?" & @CRLF & _
"" & @CRLF & _
" (?: #atomic symbol" & @CRLF & _
" A(?:c|g|l|m|r|s|t|u)" & @CRLF & _
" | B(?:a|e|h|i|k|r)?" & @CRLF & _
" | C(?:a|d|e|f|l|m|n|o|r|s|u)?" & @CRLF & _
" | D(?:b|s|y)" & @CRLF & _
" | E(?:r|s|u)" & @CRLF & _
" | F(?:e|l|m|r)?" & @CRLF & _
" | G(?:a|d|e)" & @CRLF & _
" | H(?:e|f|g|o|s)?" & @CRLF & _
" | I(?:n|r)?" & @CRLF & _
" | Kr?" & @CRLF & _
" | L(?:a|i|r|u|v)" & @CRLF & _
" | M(?:c|d|g|n|o|t)" & @CRLF & _
" | N(?:a|b|d|e|h|i|o|p)?" & @CRLF & _
" | O(?:g|s)?" & @CRLF & _
" | P(?:a|b|d|m|o|r|t|u)?" & @CRLF & _
" | R(?:a|b|e|f|g|h|n|u)" & @CRLF & _
" | S(?:b|c|e|g|i|m|n|r)?" & @CRLF & _
" | T(?:a|b|c|e|h|i|l|m|s)" & @CRLF & _
" | U" & @CRLF & _
" | V" & @CRLF & _
" | W" & @CRLF & _
" | Xe" & @CRLF & _
" | Yb?" & @CRLF & _
" | Z(?:n|r)" & @CRLF & _
" )" & @CRLF & _
"" & @CRLF & _
" (?<sub_count> # optional proportion as subscript matching any positive number or 'n'" & @CRLF & _
" (?<count>\d+\.\d+|[1-9]\d*|n)" & @CRLF & _
" | (?:<sub>)(?&count)(?:<\/sub>)" & @CRLF & _
" )?" & @CRLF & _
"" & @CRLF & _
" (?<sup_charge> # optional ionic charge as superscript" & @CRLF & _
" (?<charge>\d*[+\-])" & @CRLF & _
" | (?:<sup>)(?&charge)(?:<\/sup>)" & @CRLF & _
" )?" & @CRLF & _
"" & @CRLF & _
" (?::{1,3}|[@\-=])? # optional symbol describing connectivity to the next element component " & @CRLF & _
" )" & @CRLF & _
"" & @CRLF & _
" | # option 2: an ion" & @CRLF & _
" \[(?&unit)](?&sup_charge)? # square brackets containing a molecular unit with optional charge" & @CRLF & _
"" & @CRLF & _
" | # option 3: a repeating unit" & @CRLF & _
" \((?&unit)\)(?&sub_count)(?&sup_charge)? # parentheses containing a molecular unit with count and optional charge" & @CRLF & _
" " & @CRLF & _
" )+" & @CRLF & _
")" & @CRLF & _
"(?!(?&formula_character)) # must end at a word/formula boundary"
Local $sString = "BrI" & @CRLF & _
"CCl4" & @CRLF & _
"CH3I" & @CRLF & _
"C2H5Br" & @CRLF & _
"H2O4S" & @CRLF & _
"Al2(SO4)3" & @CRLF & _
"[[ClO2]+[ClO4]-]" & @CRLF & _
"[SO4]2-" & @CRLF & _
"CB4.2" & @CRLF & _
"CBn" & @CRLF & _
"CaCl2" & @CRLF & _
"(CH3)3CH" & @CRLF & _
"[Co(NH3)6]3+Cl3-" & @CRLF & _
"[Co(NH<sub>3</sub>)<sub>6</sub>]<sup>3+</sup>Cl<sub>3</sub><sup>-</sup>" & @CRLF & _
"C<sub>4</sub>H<sub>10</sub>" & @CRLF & _
"[As@Ni12As20]3-" & @CRLF & _
"[As@Ni<sub>12</sub>As<sub>20</sub>]<sup>3-</sup>" & @CRLF & _
"CH3-CH2-OH" & @CRLF & _
"SO<sub>4</sub><sup>2-</sup>" & @CRLF & _
"Fe0.95O" & @CRLF & _
"[32PO4]3-" & @CRLF & _
"18O16O" & @CRLF & _
"Fe0.95O" & @CRLF & _
"<sup>16</sup><sub>8</sub>O<sub>2</sub>" & @CRLF & _
"My secret chemical is H2O don't tell" & @CRLF & _
"" & @CRLF & _
"UNiCoRn-PoOP" & @CRLF & _
"" & @CRLF & _
"" & @CRLF & _
"h2o case sensitive" & @CRLF & _
"fireH2O not at word break" & @CRLF & _
"()3- no atomic symbol" & @CRLF & _
"NOt ReAL AToMiC SYMbOLs" & @CRLF & _
"SO-4 inverted ion and count" & @CRLF & _
"Fe0O zero count" & @CRLF & _
"0H2 invalid isotope mass" & @CRLF & _
"100010H invalid atomic number" & @CRLF & _
"9999999H mass or atomic number too large" & @CRLF & _
""
Local $aArray = StringRegExp($sString, $sRegex, $STR_REGEXPARRAYGLOBALFULLMATCH)
Local $aFullArray[0]
For $i = 0 To UBound($aArray) -1
_ArrayConcatenate($aFullArray, $aArray[$i])
Next
$aArray = $aFullArray
; Present the entire match result
_ArrayDisplay($aArray, "Result")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for AutoIt, please visit: https://www.autoitscript.com/autoit3/docs/functions/StringRegExp.htm