const regex = /(?<!&)(?:\b(?:nbsp|quot|divide)|#[0-9a-f]+);/gmi;
// Alternative syntax using RegExp constructor
// const regex = new RegExp('(?<!&)(?:\\b(?:nbsp|quot|divide)|#[0-9a-f]+);', 'gmi')
const str = `I have a huge HTML with several special chars, in the forms or "�.
Faulty HEX: #82173333;
Some of them are wrong, because they lack the initial &.
I would like to search for such wrong spacial chars. I know that I can search all the right special chars by means of the following regex:
\\&(?:[a-z]+|#x?\\d+);\\
But I'd need a regex useful to search the wrong ones (without the initial &). Can you help me? Thanks in advance
Edit:
As suggested, I post an example. My HTML cointains the following statement:
<![CDATA[<nolink>blablabla blablabla</nolink>]]>nbsp;
where we have 2 special HTML character:
divide;
÷
quot;
I'm interested in finding the second item, because it is wrong (laking the initial &).
So the output of the requested regex should be: quot;`;
// Reset `lastIndex` if this regex is defined globally
// regex.lastIndex = 0;
let m;
while ((m = regex.exec(str)) !== null) {
// This is necessary to avoid infinite loops with zero-width matches
if (m.index === regex.lastIndex) {
regex.lastIndex++;
}
// The result can be accessed through the `m`-variable.
m.forEach((match, groupIndex) => {
console.log(`Found match, group ${groupIndex}: ${match}`);
});
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for JavaScript, please visit: https://developer.mozilla.org/en/docs/Web/JavaScript/Guide/Regular_Expressions