use strict;
my $str = '<div>
<p>foo</p>
<script type=\'text/javascript\'>another script goes here</script>
</div>
<script type=\'text/javascript\'>
var googletag = googletag || {};
googletag.cmd = googletag.cmd || [];
(function() {
var gads = document.createElement(\'script\');
gads.async = true;
gads.type = \'text/javascript\';
var useSSL = \'https:\' == document.location.protocol;
gads.src = (useSSL ? \'https:\' : \'http:\') +
\'//www.googletagservices.com/tag/js/gpt.js\';
var node = document.getElementsByTagName(\'script\')[0];
node.parentNode.insertBefore(gads, node);
})();
</script>
<div>
<p>bar</p>
<script type=\'text/javascript\'>yet another script goes here</script>
</div>
<script type=\'text/javascript\'>
googletag.cmd.push(function() {
googletag.defineSlot(\'/6176201/Opera_120x600\', [120, 600], \'div-gpt-ad-6176201-opera_120x600\').addService(googletag.pubads());
googletag.defineSlot(\'/6176201/Opera_300x250\', [300, 250], \'div-gpt-ad-6176201-opera_300x250\').addService(googletag.pubads());
googletag.defineSlot(\'/6176201/Opera_728x90\', [728, 90], \'div-gpt-ad-6176201-opera_728x90\').addService(googletag.pubads());
googletag.defineSlot(\'/6176201/Contextuals_Operaweb\', [557, 30], \'div-gpt-ad-6176201-contextuals_operaweb\').addService(googletag.pubads());
googletag.pubads().collapseEmptyDivs();
googletag.pubads().enableSingleRequest();
googletag.enableServices();
});
</script>
<script type=\'text/javascript\'>
googletag.cmd.push(function() { googletag.display(\'div-gpt-ad-6176201-opera_728x90\'); });
</script>
<div></div><script type=\'text/javascript\'>this script must stay too</script><a></a>
<script type=\'text/javascript\'>
googletag.cmd.push(function() { googletag.display(\'div-gpt-ad-6176201-opera_120x600\'); });
</script>';
my $regex = qr/<script type='text\/javascript'(?:(?!<\/?script).\n)+?(\n*.*?)*?(googletag)(\n*.*?)*(?:(?!<\/?script>))*?<\/script>/ip;
if ( $str =~ /$regex/g ) {
print "Whole match is ${^MATCH} and its start/end positions can be obtained via \$-[0] and \$+[0]\n";
# print "Capture Group 1 is $1 and its start/end positions can be obtained via \$-[1] and \$+[1]\n";
# print "Capture Group 2 is $2 ... and so on\n";
}
# ${^POSTMATCH} and ${^PREMATCH} are also available with the use of '/p'
# Named capture groups can be called via $+{name}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Perl, please visit: http://perldoc.perl.org/perlre.html