const regex = /(?(DEFINE)
# Match a carbon atom only. It can be a uppercase "C" or lowercase
# "c" (for rings). We'll avoid negative lookahead and lookbehind and
# instead let the "ignored" pattern consume these other elements.
(?<carbon>[cC])
# Ignored other elements and syntax chars.
# This pattern should be greedy with *+ instead of *. This will make
# the pattern consume "Sc" or "Tc" so that we cannot match the "c"
# as a carbon atom. This solution is to avoid using negative lookbehind
# and lookahead inside the carbon sub-pattern just above.
(?<ignored>
(?: # Non-capturing just for the "or" operator.
[-+.=\#$@()\[\]\/\\:%0-9] # ignored chars.
|
C[laroudsnemf] # Cx Elements: Cl, Ca, Cr, Co, Cu, etc
|
[STMA]c # Xc Elements: Sc, Tc, Mc and Ac
|
[HBNOFPSKVYIWU] # Any single-letter element, but not C.
|
[ABD-Z][abd-z] # 2-chars Elements (not very precise but short)
)*+ # The ignored items can be 0 or n times, in a greedy way.
)
)
# The pattern is carbon surounded by ignored elements and chars, 4 times.
^(?:\g<ignored>\g<carbon>\g<ignored>){4}$/gm;
// Alternative syntax using RegExp constructor
// const regex = new RegExp('(?(DEFINE)
# Match a carbon atom only. It can be a uppercase "C" or lowercase
# "c" (for rings). We\'ll avoid negative lookahead and lookbehind and
# instead let the "ignored" pattern consume these other elements.
(?<carbon>[cC])
# Ignored other elements and syntax chars.
# This pattern should be greedy with *+ instead of *. This will make
# the pattern consume "Sc" or "Tc" so that we cannot match the "c"
# as a carbon atom. This solution is to avoid using negative lookbehind
# and lookahead inside the carbon sub-pattern just above.
(?<ignored>
(?: # Non-capturing just for the "or" operator.
[-+.=\\#$@()\\[\\]\\\/\\\\:%0-9] # ignored chars.
|
C[laroudsnemf] # Cx Elements: Cl, Ca, Cr, Co, Cu, etc
|
[STMA]c # Xc Elements: Sc, Tc, Mc and Ac
|
[HBNOFPSKVYIWU] # Any single-letter element, but not C.
|
[ABD-Z][abd-z] # 2-chars Elements (not very precise but short)
)*+ # The ignored items can be 0 or n times, in a greedy way.
)
)
# The pattern is carbon surounded by ignored elements and chars, 4 times.
^(?:\\g<ignored>\\g<carbon>\\g<ignored>){4}$', 'gm')
const str = `[C]-[C](-[O])-[O]
[O]-[C]-[C]=[O]
[C]-[C](-[O])=[O]
[O]=[C]-[C]=[O]
[C]=[C]-[O]-[O]
[O]-[C]=[C]-[O]
[C]=[C](-[O])-[O]
[O]-[C]=[C]=[O]
[O]=[C]=[C]=[O]
[C]#[C]-[O]-[O]
[O]-[C]#[C]-[O]
[C]-[C]-[O]-[Ca]
[O]-[C]-[C]-[OH]
[C]-[C](-[O])-[OH]
[O]=[C]-[C]-[OH]
[C]-[C](=[O])-[OH]
[C]=[C]-[O]-[OH]
[O]-[C]=[C]-[OH]
[C]=[C](-[O])-[OH]
[O]=[C]=[C]-[OH]
[C]#[C]-[O]-[OH]
[O]-[C]#[C]-[OH]
[OH]-[C]-[C]-[OH]
[C]-[C](-[OH])-[OH]
[OH]-[C]=[C]-[OH]
[C]=[C](-[Ca])-[OH]
[OH]-[C]#[C]-[OH]
[C]-[CH](-[C])-[Sc]
[C]-[CH](-[C])-[C]
[C]-[CH]-[C]-[CH]
[C]-[CH](-[C])-[CH]
[C]-[CH]-[C]=[CH]
[C]-[CH]-[C]#[CH]
[C]-[CH]=[C]-[CH]
[C]=[CH]-[C]-[CH]
[C]-[CH]=[C]=[CH]
[C]=[CH]-[C]=[CH]
[C]=[CH]-[C]#[CH]
[C]-[CH]-[C]-[CH2]
[C]-[CH](-[C])-[CH2]
[C]-[CH]-[C]=[CH2]
[C]-[CH]=[C]-[CH2]
`;
// Reset `lastIndex` if this regex is defined globally
// regex.lastIndex = 0;
let m;
while ((m = regex.exec(str)) !== null) {
// This is necessary to avoid infinite loops with zero-width matches
if (m.index === regex.lastIndex) {
regex.lastIndex++;
}
// The result can be accessed through the `m`-variable.
m.forEach((match, groupIndex) => {
console.log(`Found match, group ${groupIndex}: ${match}`);
});
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for JavaScript, please visit: https://developer.mozilla.org/en/docs/Web/JavaScript/Guide/Regular_Expressions