use strict;
my $str = '/a> ref|NP_010829.1| Irc4p [Saccharomyces cerevisiae S288c] >gi|74676333|sp|Q03036.1|IRC4_YEAST RecName: Full=Uncharacterized protein IRC4; AltName: Full=Increased recombination centers protein 4 >gi|1165295|gb|AAB64982.1| Ydr540cp [Saccharomyces cerevisiae] >gi|51012753|gb|AAT92670.1| YDR540C [Saccharomyces cerevisiae] >gi|151942499|gb|EDN60855.1| conserved protein [Saccharomyces cerevisiae YJM789] >gi|190404545|gb|EDV07812.1| conserved hypothetical protein [Saccharomyces cerevisiae RM11-1a] >gi|259145774|emb|CAY79038.1| Irc4p [Saccharomyces cerevisiae EC1118] >gi|285811545|tpg|DAA12369.1| TPA: Irc4p [Saccharomyces cerevisiae S288c] >gi|323309617|gb|EGA62826.1| Irc4p [Saccharomyces cerevisiae FostersO] >gi|323338091|gb|EGA79326.1| Irc4p [Saccharomyces cerevisiae Vin13] >gi|365766295|gb|EHN07794.1| Irc4p [Saccharomyces cerevisiae x Saccharomyces kudriavzevii VIN7] >gi|392300658|gb|EIW11749.1| Irc4p [Saccharomyces cerevisiae CEN.PK113-7D] >gi|584366859|gb|EWG86852.1| Irc4p [Saccharomyces cerevisiae R008] >gi|584372222|gb|EWG92158.1| Irc4p [Saccharomyces cerevisiae P301] >gi|584376691|gb|EWG96547.1| Irc4p [Saccharomyces cerevisiae R103] >gi|584477456|gb|EWH19199.1| Irc4p [Saccharomyces cerevisiae P283]';
my $regex = qr/^(.+?)>.*$/mp;
if ( $str =~ /$regex/g ) {
print "Whole match is ${^MATCH} and its start/end positions can be obtained via \$-[0] and \$+[0]\n";
# print "Capture Group 1 is $1 and its start/end positions can be obtained via \$-[1] and \$+[1]\n";
# print "Capture Group 2 is $2 ... and so on\n";
}
# ${^POSTMATCH} and ${^PREMATCH} are also available with the use of '/p'
# Named capture groups can be called via $+{name}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Perl, please visit: http://perldoc.perl.org/perlre.html