const regex = /(?:\b(\p{Lu}\w*(?:\s+\p{Lu}\w*)*)(?:\s+et\s+al\.)?)?\s*\(([^()]*\d{4})\)/gm;
// Alternative syntax using RegExp constructor
// const regex = new RegExp('(?:\\b(\\p{Lu}\\w*(?:\\s+\\p{Lu}\\w*)*)(?:\\s+et\\s+al\\.)?)?\\s*\\(([^()]*\\d{4})\\)', 'gm')
const str = `This is a test. I only want to select the (cites) in parenthesis. I do not want it to return words in parenthesis that do not have years attached, such as abbreviations (abbr). For example, citing (Smith 2010) is something I would want to be returned. I would also want multiple citations returned separately such as (Smith 2010; Jones 2001; Brown 2020). I would also want Cooper et al. (2015) returned as Cooper 2015, and not just 2015.`;
// Reset `lastIndex` if this regex is defined globally
// regex.lastIndex = 0;
let m;
while ((m = regex.exec(str)) !== null) {
// This is necessary to avoid infinite loops with zero-width matches
if (m.index === regex.lastIndex) {
regex.lastIndex++;
}
// The result can be accessed through the `m`-variable.
m.forEach((match, groupIndex) => {
console.log(`Found match, group ${groupIndex}: ${match}`);
});
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for JavaScript, please visit: https://developer.mozilla.org/en/docs/Web/JavaScript/Guide/Regular_Expressions