import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Example {
public static void main(String[] args) {
final String regex = "([\\d\\.]+%) identity";
final String string = "# /data/spine/bin/external/fasta/bin/glsearch36 -b 2 -m 9i -q /tmp/temp.seq /data/spine/databases/UniProt/sprot.compact_headers.distinct.mouse\n"
+ "GLSEARCH performs a global-query/local-library search\n"
+ " version 36.3.6 Nov, 2013(preload9)\n"
+ "Query: /tmp/temp.seq\n"
+ " 1>>>seq - 299 aa\n"
+ "Library: /data/spine/databases/UniProt/sprot.compact_headers.distinct.mouse\n"
+ " 9277236 residues in 16315 sequences\n\n"
+ "Statistics: Unscaled normal statistics: mu= -50.7059 var=2892.0306 Ztrim: 0\n"
+ " statistics sampled from 13365 (13367) to 13365 sequences\n"
+ "Algorithm: Global/Local affine Needleman-Wunsch (2007) (6.0 April 2007)\n"
+ "Parameters: BL50 matrix (15:-5), open/ext: -12/-2\n"
+ " Scan time: 11.560\n\n"
+ "The best scores are: n-w bits E(16315) %_id %_sim alen\n"
+ "SP|P61375|LHX5_MOUSE ( 402) 1361 66.3 5.7e-148 0.685 0.868 302\n"
+ "SP|P50481|LHX3_MOUSE ( 400) 331 30.8 1e-08 0.319 0.524 307\n\n"
+ ">>>seq, 299 aa vs /data/spine/databases/UniProt/sprot.compact_headers.distinct.mouse library\n\n"
+ ">>SP|P61375|LHX5_MOUSE (402 aa)\n"
+ " n-w opt: 1361 Z-score: 312.5 bits: 66.3 E(16315): 5.7e-148\n"
+ "global/local score: 1361; 68.5% identity (86.8% similar) in 302 aa overlap (1-299:107-400)\n\n"
+ " 10 20 30\n"
+ "seq YIDENKFVCKEDYLSNSSVAKENSLHSATT\n"
+ " :::::::::.::::.::. ::.::.:...\n"
+ "SP|P61 VRKARSKVFHLNCFTCMVCNKQLSTGEELYVIDENKFVCKDDYLSSSSL-KEGSLNSVSS\n"
+ " 80 90 100 110 120 130 \n\n"
+ " 40 50 60 70 80 90\n"
+ "seq GSDPSLSPDSQDPSQDDAKDSESANVSDKEGGSNENDDQNLGAKRRGPRTTIKAKQLETL\n"
+ " .: ::::: ::: ::: :...... :::: ..:::..:: :.:::::::::::::::::\n"
+ "SP|P61 CTDRSLSPDLQDPLQDDPKETDNSTSSDKETANNENEEQNSGTKRRGPRTTIKAKQLETL\n"
+ " 140 150 160 170 180 190 \n\n"
+ " 100 110 120 130 140 150\n"
+ "seq KAAFAATPKPTRHIREQLAQETGLNMRVIQVWFQNRRSKERRMKQLSALGARRHAFFRSP\n"
+ " ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::\n"
+ "SP|P61 KAAFAATPKPTRHIREQLAQETGLNMRVIQVWFQNRRSKERRMKQLSALGARRHAFFRSP\n"
+ " 200 210 220 230 240 250 \n\n"
+ " 160 170 180 190 200 210\n"
+ "seq RRMRPLVDRLEPGELIPNGPFSFYGDYQSEYYGPGGNYDFFPQGPPSSQAQTPVDLPFVP\n"
+ " :::::: ::. .:.. . :...::::::.::.:::::::: .:::: :::.:.: :. \n"
+ "SP|P61 RRMRPLGGRLDESEMLGSTPYTYYGDYQSDYYAPGGNYDFFAHGPPS-QAQSPADSSFLA\n"
+ " 260 270 280 290 300 310 \n\n"
+ " 220 230 240 250 260 \n"
+ "seq SSGPSGTPLGGLDHPLPGHHPSSEAQRFTDILAHPPGDSPSPEPSLPGPLHSMSAEVF--\n"
+ " .:::..::::.:. :: : : ... ::::...:: :.:::::.::: :: : .::: \n"
+ "SP|P61 ASGPGSTPLGALEPPLAGPH-GADNPRFTDMISHP--DTPSPEPGLPGALHPMPGEVFSG\n"
+ " 320 330 340 350 360 370 \n\n"
+ " 270 280 290 \n"
+ "seq GPSPPFSSLSVNGGASYGNHLSHP-PEMNEAA \n"
+ " :::::: ..: ..:.. :::: ::.:::: \n"
+ "SP|P61 GPSPPFP---MSGTSGYSGPLSHPNPELNEAAVW\n"
+ " 380 390 400 \n\n"
+ ">>SP|P50481|LHX3_MOUSE (400 aa)\n"
+ " n-w opt: 331 Z-score: 121.0 bits: 30.8 E(16315): 1e-08\n"
+ "global/local score: 331; 31.9% identity (52.4% similar) in 307 aa overlap (1-299:99-365)\n\n"
+ " 10 20 \n"
+ "seq YIDENKFVCK-EDYLSN-SSVAKENSLHSA\n"
+ " : .. : . .:.. . : .. \n"
+ "SP|P50 AERCFSRGESVYCKDDFFKRFGTKCAACQLGIPPTQVVRRAQDFVYHLHCFACVVCKRQL\n"
+ " 70 80 90 100 110 120 \n\n"
+ " 30 40 50 60 70 80 \n"
+ "seq TTGSDPSLSPDSQDPSQDDAKDSESANVSDKEGGSNENDDQNLGAKRRGPRTTIKAKQLE\n"
+ " .::.. : ::. . : :.:. . :. ::: ::::: :::::\n"
+ "SP|P50 ATGDEFYLMEDSRLVCK---ADYETAKQREAEAT----------AKR--PRTTITAKQLE\n"
+ " 130 140 150 160 170 \n\n"
+ " 90 100 110 120 130 140 \n"
+ "seq TLKAAFAATPKPTRHIREQLAQETGLNMRVIQVWFQNRRSKERRMKQLSALGARRHAFFR\n"
+ " :::.:. ..:::.::.::::..::::.:::.::::::::.::.:.:. .: : .::\n"
+ "SP|P50 TLKSAYNTSPKPARHVREQLSSETGLDMRVVQVWFQNRRAKEKRLKK-DAGRQRWGQYFR\n"
+ " 180 190 200 210 220 230 \n\n"
+ " 150 160 170 180 190 200 \n"
+ "seq SPRRMRPLVDRLEPGELIPNGPFSFYGDYQSEYYGPGGNYDFFPQGPPSSQAQTPVDLPF\n"
+ " . .: : : .: .: : .. . :: . :.. .\n"
+ "SP|P50 NMKRSRG----------------SSKSDKDSIQEGQDSDAEVSFTDEPSMADMGPANGLY\n"
+ " 240 250 260 270 \n\n"
+ " 210 220 230 240 250 260 \n"
+ "seq VPSSGPS---GTPLGGLDHPLPGHHPSSEAQRFTDILAHPP-GDSPSPEP--SLPGPLHS\n"
+ " . :. : :.::: : . ... .. : : ::: ::::: \n"
+ "SP|P50 SSLGEPAPALGRPVGGLGSFTLDHGGLTGPEQYRELRPGSPYGIPPSPAAPQSLPGPQPL\n"
+ " 280 290 300 310 320 330 \n\n"
+ " 270 280 290 \n"
+ "seq MSAEVFGPSPPFSSLSVNGGASYGNHLSHPPEMNEAA \n"
+ " .:. :. : ..::. .. :. :: : : \n"
+ "SP|P50 LSSLVY----PDTNLSLVPSGPPGG----PPPMRVLAGNGPSSDLSTESSSGYPDFPASP\n"
+ " 340 350 360 370 380 \n\n"
+ "SP|P50 ASWLDEVDHAQF\n"
+ " 390 400\n\n\n"
+ ">>>///\n\n"
+ "299 residues in 1 query sequences\n"
+ "9277236 residues in 16315 library sequences\n"
+ " Tcomplib [36.3.6 Nov, 2013(preload9)] (8 proc in memory [0G])\n"
+ " start: Thu Sep 3 20:44:48 2015 done: Thu Sep 3 20:44:55 2015\n"
+ " Total Scan time: 11.560 Total Display time: 0.010\n\n"
+ "Function used was GLSEARCH [36.3.6 Nov, 2013(preload9)]";
final Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);
final Matcher matcher = pattern.matcher(string);
while (matcher.find()) {
System.out.println("Full match: " + matcher.group(0));
for (int i = 1; i <= matcher.groupCount(); i++) {
System.out.println("Group " + i + ": " + matcher.group(i));
}
}
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Java, please visit: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html