$re = '/(?<!(?<formula_character>[\w\-+])) # must start at a word/formula boundary
(?=[\w\-+@=<>()[\]\/.]{3}) # minimum number of characters
(?<unit> # a molecular unit
(?: #multiple instances of:
(?: # option 1: a single element component, eg. 35Cl3-
(?: # optional mass number as preceeding superscript
(?<isotope>[1-9]\d{0,2})
| (?:<sup>)(?&isotope)(?:<\/sup>)
)?
(?: # optional atomic number as preceeding subscript
(?<species>[1-9]\d{0,2})
| (?:<sub>)(?&species)(?:<\/sub>)
)?
(?: #atomic symbol
A(?:c|g|l|m|r|s|t|u)
| B(?:a|e|h|i|k|r)?
| C(?:a|d|e|f|l|m|n|o|r|s|u)?
| D(?:b|s|y)
| E(?:r|s|u)
| F(?:e|l|m|r)?
| G(?:a|d|e)
| H(?:e|f|g|o|s)?
| I(?:n|r)?
| Kr?
| L(?:a|i|r|u|v)
| M(?:c|d|g|n|o|t)
| N(?:a|b|d|e|h|i|o|p)?
| O(?:g|s)?
| P(?:a|b|d|m|o|r|t|u)?
| R(?:a|b|e|f|g|h|n|u)
| S(?:b|c|e|g|i|m|n|r)?
| T(?:a|b|c|e|h|i|l|m|s)
| U
| V
| W
| Xe
| Yb?
| Z(?:n|r)
)
(?<sub_count> # optional proportion as subscript matching any positive number or \'n\'
(?<count>\d+\.\d+|[1-9]\d*|n)
| (?:<sub>)(?&count)(?:<\/sub>)
)?
(?<sup_charge> # optional ionic charge as superscript
(?<charge>\d*[+\-])
| (?:<sup>)(?&charge)(?:<\/sup>)
)?
(?::{1,3}|[@\-=])? # optional symbol describing connectivity to the next element component
)
| # option 2: an ion
\[(?&unit)](?&sup_charge)? # square brackets containing a molecular unit with optional charge
| # option 3: a repeating unit
\((?&unit)\)(?&sub_count)(?&sup_charge)? # parentheses containing a molecular unit with count and optional charge
)+
)
(?!(?&formula_character)) # must end at a word/formula boundary/mXx';
$str = 'BrI
CCl4
CH3I
C2H5Br
H2O4S
Al2(SO4)3
[[ClO2]+[ClO4]-]
[SO4]2-
CB4.2
CBn
CaCl2
(CH3)3CH
[Co(NH3)6]3+Cl3-
[Co(NH<sub>3</sub>)<sub>6</sub>]<sup>3+</sup>Cl<sub>3</sub><sup>-</sup>
C<sub>4</sub>H<sub>10</sub>
[As@Ni12As20]3-
[As@Ni<sub>12</sub>As<sub>20</sub>]<sup>3-</sup>
CH3-CH2-OH
SO<sub>4</sub><sup>2-</sup>
Fe0.95O
[32PO4]3-
18O16O
Fe0.95O
<sup>16</sup><sub>8</sub>O<sub>2</sub>
My secret chemical is H2O don\'t tell
UNiCoRn-PoOP
h2o case sensitive
fireH2O not at word break
()3- no atomic symbol
NOt ReAL AToMiC SYMbOLs
SO-4 inverted ion and count
Fe0O zero count
0H2 invalid isotope mass
100010H invalid atomic number
9999999H mass or atomic number too large
';
preg_match_all($re, $str, $matches, PREG_SET_ORDER, 0);
// Print the entire match result
var_dump($matches);
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for PHP, please visit: http://php.net/manual/en/ref.pcre.php