import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Example {
public static void main(String[] args) {
final String regex = "(?ms)ID\\s+.*?^AC\\s+(\\w+);.*?^OX\\s+NCBI_TaxID=(\\d+).*?(?#how to optionally capture group ^FT\\s+VAR_SEQ.*?\\/FTId=\\w+\\. ).*?^\\s{5}(.*?)//";
final String string = "ID TTC29_HUMAN Reviewed; 475 AA.\n"
+ "AC Q8NA56; A4GU95; Q9BXB6;\n"
+ "DT 10-JUL-2007, integrated into UniProtKB/Swiss-Prot.\n"
+ "DT 10-JUL-2007, sequence version 2.\n"
+ "DT 15-FEB-2017, entry version 114.\n"
+ "DE RecName: Full=Tetratricopeptide repeat protein 29;\n"
+ "DE Short=TPR repeat protein 29;\n"
+ "DE AltName: Full=Protein TBPP2A;\n"
+ "DE AltName: Full=Testis development protein NYD-SP14;\n"
+ "GN Name=TTC29;\n"
+ "OS Homo sapiens (Human).\n"
+ "OC Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;\n"
+ "OC Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini;\n"
+ "OC Catarrhini; Hominidae; Homo.\n"
+ "OX NCBI_TaxID=9606;\n"
+ "RN [1]\n"
+ "RP NUCLEOTIDE SEQUENCE [MRNA] (ISOFORM 1), AND VARIANTS TYR-140 AND\n"
+ "RP THR-276.\n"
+ "RC TISSUE=Testis;\n"
+ "RA Sha J.H.;\n"
+ "RT \"Cloning and identification of a novel gene related development gene\n"
+ "RT NYD-SP14.\";\n"
+ "RL Submitted (FEB-2001) to the EMBL/GenBank/DDBJ databases.\n"
+ "RN [2]\n"
+ "RP NUCLEOTIDE SEQUENCE [MRNA] (ISOFORM 1), AND VARIANTS TYR-140 AND\n"
+ "RP THR-276.\n"
+ "RC TISSUE=Mammary cancer;\n"
+ "RA Li J.M., Cheng J., Wang Q.;\n"
+ "RL Submitted (FEB-2007) to the EMBL/GenBank/DDBJ databases.\n"
+ "RN [3]\n"
+ "RP NUCLEOTIDE SEQUENCE [LARGE SCALE MRNA] (ISOFORM 2).\n"
+ "RC TISSUE=Testis;\n"
+ "RX PubMed=14702039; DOI=10.1038/ng1285;\n"
+ "RA Ota T., Suzuki Y., Nishikawa T., Otsuki T., Sugiyama T., Irie R.,\n"
+ "RA Wakamatsu A., Hayashi K., Sato H., Nagai K., Kimura K., Makita H.,\n"
+ "RA Sekine M., Obayashi M., Nishi T., Shibahara T., Tanaka T., Ishii S.,\n"
+ "RA Yamamoto J., Saito K., Kawai Y., Isono Y., Nakamura Y., Nagahari K.,\n"
+ "RA Murakami K., Yasuda T., Iwayanagi T., Wagatsuma M., Shiratori A.,\n"
+ "RA Sudo H., Hosoiri T., Kaku Y., Kodaira H., Kondo H., Sugawara M.,\n"
+ "RA Takahashi M., Kanda K., Yokoi T., Furuya T., Kikkawa E., Omura Y.,\n"
+ "RA Abe K., Kamihara K., Katsuta N., Sato K., Tanikawa M., Yamazaki M.,\n"
+ "RA Ninomiya K., Ishibashi T., Yamashita H., Murakawa K., Fujimori K.,\n"
+ "RA Tanai H., Kimata M., Watanabe M., Hiraoka S., Chiba Y., Ishida S.,\n"
+ "RA Ono Y., Takiguchi S., Watanabe S., Yosida M., Hotuta T., Kusano J.,\n"
+ "RA Kanehori K., Takahashi-Fujii A., Hara H., Tanase T.-O., Nomura Y.,\n"
+ "RA Togiya S., Komai F., Hara R., Takeuchi K., Arita M., Imose N.,\n"
+ "RA Musashino K., Yuuki H., Oshima A., Sasaki N., Aotsuka S.,\n"
+ "RA Yoshikawa Y., Matsunawa H., Ichihara T., Shiohata N., Sano S.,\n"
+ "RA Moriya S., Momiyama H., Satoh N., Takami S., Terashima Y., Suzuki O.,\n"
+ "RA Nakagawa S., Senoh A., Mizoguchi H., Goto Y., Shimizu F., Wakebe H.,\n"
+ "RA Hishigaki H., Watanabe T., Sugiyama A., Takemoto M., Kawakami B.,\n"
+ "RA Yamazaki M., Watanabe K., Kumagai A., Itakura S., Fukuzumi Y.,\n"
+ "RA Fujimori Y., Komiyama M., Tashiro H., Tanigami A., Fujiwara T.,\n"
+ "RA Ono T., Yamada K., Fujii Y., Ozaki K., Hirao M., Ohmori Y.,\n"
+ "RA Kawabata A., Hikiji T., Kobatake N., Inagaki H., Ikema Y., Okamoto S.,\n"
+ "RA Okitani R., Kawakami T., Noguchi S., Itoh T., Shigeta K., Senba T.,\n"
+ "RA Matsumura K., Nakajima Y., Mizuno T., Morinaga M., Sasaki M.,\n"
+ "RA Togashi T., Oyama M., Hata H., Watanabe M., Komatsu T.,\n"
+ "RA Mizushima-Sugano J., Satoh T., Shirai Y., Takahashi Y., Nakagawa K.,\n"
+ "RA Okumura K., Nagase T., Nomura N., Kikuchi H., Masuho Y., Yamashita R.,\n"
+ "RA Nakai K., Yada T., Nakamura Y., Ohara O., Isogai T., Sugano S.;\n"
+ "RT \"Complete sequencing and characterization of 21,243 full-length human\n"
+ "RT cDNAs.\";\n"
+ "RL Nat. Genet. 36:40-45(2004).\n"
+ "RN [4]\n"
+ "RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA].\n"
+ "RX PubMed=15815621; DOI=10.1038/nature03466;\n"
+ "RA Hillier L.W., Graves T.A., Fulton R.S., Fulton L.A., Pepin K.H.,\n"
+ "RA Minx P., Wagner-McPherson C., Layman D., Wylie K., Sekhon M.,\n"
+ "RA Becker M.C., Fewell G.A., Delehaunty K.D., Miner T.L., Nash W.E.,\n"
+ "RA Kremitzki C., Oddy L., Du H., Sun H., Bradshaw-Cordum H., Ali J.,\n"
+ "RA Carter J., Cordes M., Harris A., Isak A., van Brunt A., Nguyen C.,\n"
+ "RA Du F., Courtney L., Kalicki J., Ozersky P., Abbott S., Armstrong J.,\n"
+ "RA Belter E.A., Caruso L., Cedroni M., Cotton M., Davidson T., Desai A.,\n"
+ "RA Elliott G., Erb T., Fronick C., Gaige T., Haakenson W., Haglund K.,\n"
+ "RA Holmes A., Harkins R., Kim K., Kruchowski S.S., Strong C.M.,\n"
+ "RA Grewal N., Goyea E., Hou S., Levy A., Martinka S., Mead K.,\n"
+ "RA McLellan M.D., Meyer R., Randall-Maher J., Tomlinson C.,\n"
+ "RA Dauphin-Kohlberg S., Kozlowicz-Reilly A., Shah N.,\n"
+ "RA Swearengen-Shahid S., Snider J., Strong J.T., Thompson J., Yoakum M.,\n"
+ "RA Leonard S., Pearman C., Trani L., Radionenko M., Waligorski J.E.,\n"
+ "RA Wang C., Rock S.M., Tin-Wollam A.-M., Maupin R., Latreille P.,\n"
+ "RA Wendl M.C., Yang S.-P., Pohl C., Wallis J.W., Spieth J., Bieri T.A.,\n"
+ "RA Berkowicz N., Nelson J.O., Osborne J., Ding L., Meyer R., Sabo A.,\n"
+ "RA Shotland Y., Sinha P., Wohldmann P.E., Cook L.L., Hickenbotham M.T.,\n"
+ "RA Eldred J., Williams D., Jones T.A., She X., Ciccarelli F.D.,\n"
+ "RA Izaurralde E., Taylor J., Schmutz J., Myers R.M., Cox D.R., Huang X.,\n"
+ "RA McPherson J.D., Mardis E.R., Clifton S.W., Warren W.C.,\n"
+ "RA Chinwalla A.T., Eddy S.R., Marra M.A., Ovcharenko I., Furey T.S.,\n"
+ "RA Miller W., Eichler E.E., Bork P., Suyama M., Torrents D.,\n"
+ "RA Waterston R.H., Wilson R.K.;\n"
+ "RT \"Generation and annotation of the DNA sequences of human chromosomes 2\n"
+ "RT and 4.\";\n"
+ "RL Nature 434:724-731(2005).\n"
+ "CC -!- ALTERNATIVE PRODUCTS:\n"
+ "CC Event=Alternative splicing; Named isoforms=2;\n"
+ "CC Name=1;\n"
+ "CC IsoId=Q8NA56-1; Sequence=Displayed;\n"
+ "CC Name=2;\n"
+ "CC IsoId=Q8NA56-2; Sequence=VSP_026638;\n"
+ "CC Note=No experimental confirmation available.;\n"
+ "CC -!- SEQUENCE CAUTION:\n"
+ "CC Sequence=AAK29064.1; Type=Frameshift; Positions=467; Evidence={ECO:0000305};\n"
+ "CC -----------------------------------------------------------------------\n"
+ "CC Copyrighted by the UniProt Consortium, see http://www.uniprot.org/terms\n"
+ "CC Distributed under the Creative Commons Attribution-NoDerivs License\n"
+ "CC -----------------------------------------------------------------------\n"
+ "DR EMBL; AF345910; AAK29064.1; ALT_FRAME; mRNA.\n"
+ "DR EMBL; EF432564; ABO31099.1; -; mRNA.\n"
+ "DR EMBL; AK093145; BAC04072.1; -; mRNA.\n"
+ "DR EMBL; AC092435; -; NOT_ANNOTATED_CDS; Genomic_DNA.\n"
+ "DR EMBL; AC097497; -; NOT_ANNOTATED_CDS; Genomic_DNA.\n"
+ "DR EMBL; AC093887; -; NOT_ANNOTATED_CDS; Genomic_DNA.\n"
+ "DR CCDS; CCDS47141.1; -. [Q8NA56-1]\n"
+ "DR RefSeq; NP_001287690.1; NM_001300761.2.\n"
+ "DR RefSeq; NP_001304735.1; NM_001317806.1.\n"
+ "DR RefSeq; NP_114162.2; NM_031956.3. [Q8NA56-1]\n"
+ "DR UniGene; Hs.378893; -.\n"
+ "DR ProteinModelPortal; Q8NA56; -.\n"
+ "DR SMR; Q8NA56; -.\n"
+ "DR BioGrid; 123805; 16.\n"
+ "DR STRING; 9606.ENSP00000316740; -.\n"
+ "DR iPTMnet; Q8NA56; -.\n"
+ "DR PhosphoSitePlus; Q8NA56; -.\n"
+ "DR BioMuta; TTC29; -.\n"
+ "DR DMDM; 152112335; -.\n"
+ "DR PaxDb; Q8NA56; -.\n"
+ "DR PeptideAtlas; Q8NA56; -.\n"
+ "DR PRIDE; Q8NA56; -.\n"
+ "DR Ensembl; ENST00000325106; ENSP00000316740; ENSG00000137473. [Q8NA56-1]\n"
+ "DR GeneID; 83894; -.\n"
+ "DR KEGG; hsa:83894; -.\n"
+ "DR UCSC; uc003ikw.5; human. [Q8NA56-1]\n"
+ "DR CTD; 83894; -.\n"
+ "DR DisGeNET; 83894; -.\n"
+ "DR GeneCards; TTC29; -.\n"
+ "DR HGNC; HGNC:29936; TTC29.\n"
+ "DR HPA; HPA037006; -.\n"
+ "DR HPA; HPA061473; -.\n"
+ "DR neXtProt; NX_Q8NA56; -.\n"
+ "DR OpenTargets; ENSG00000137473; -.\n"
+ "DR PharmGKB; PA145147799; -.\n"
+ "DR eggNOG; ENOG410IFVK; Eukaryota.\n"
+ "DR eggNOG; ENOG410XPVA; LUCA.\n"
+ "DR GeneTree; ENSGT00390000008611; -.\n"
+ "DR HOGENOM; HOG000067965; -.\n"
+ "DR HOVERGEN; HBG108611; -.\n"
+ "DR InParanoid; Q8NA56; -.\n"
+ "DR PhylomeDB; Q8NA56; -.\n"
+ "DR TreeFam; TF328344; -.\n"
+ "DR ChiTaRS; TTC29; human.\n"
+ "DR GenomeRNAi; 83894; -.\n"
+ "DR PRO; PR:Q8NA56; -.\n"
+ "DR Proteomes; UP000005640; Chromosome 4.\n"
+ "DR Bgee; ENSG00000137473; -.\n"
+ "DR CleanEx; HS_TTC29; -.\n"
+ "DR ExpressionAtlas; Q8NA56; baseline and differential.\n"
+ "DR Genevisible; Q8NA56; HS.\n"
+ "DR Gene3D; 1.25.40.10; -; 2.\n"
+ "DR InterPro; IPR013026; TPR-contain_dom.\n"
+ "DR InterPro; IPR011990; TPR-like_helical_dom.\n"
+ "DR InterPro; IPR019734; TPR_repeat.\n"
+ "DR SMART; SM00028; TPR; 4.\n"
+ "DR SUPFAM; SSF48452; SSF48452; 1.\n"
+ "DR PROSITE; PS50293; TPR_REGION; 2.\n"
+ "PE 2: Evidence at transcript level;\n"
+ "KW Alternative splicing; Complete proteome; Polymorphism;\n"
+ "KW Reference proteome; Repeat; TPR repeat.\n"
+ "FT CHAIN 1 475 Tetratricopeptide repeat protein 29.\n"
+ "FT /FTId=PRO_0000294435.\n"
+ "FT REPEAT 182 215 TPR 1.\n"
+ "FT REPEAT 234 267 TPR 2.\n"
+ "FT REPEAT 274 307 TPR 3.\n"
+ "FT REPEAT 314 347 TPR 4.\n"
+ "FT REPEAT 354 387 TPR 5.\n"
+ "FT VAR_SEQ 1 1 M -> MIPMFTVTLEDSGTLWKSLHSSSESE (in\n"
+ "FT isoform 2).\n"
+ "FT {ECO:0000303|PubMed:14702039}.\n"
+ "FT /FTId=VSP_026638.\n"
+ "FT VARIANT 94 94 L -> P (in dbSNP:rs35123039).\n"
+ "FT /FTId=VAR_033179.\n"
+ "FT VARIANT 140 140 H -> Y (in dbSNP:rs17610219).\n"
+ "FT {ECO:0000269|Ref.1, ECO:0000269|Ref.2}.\n"
+ "FT /FTId=VAR_033180.\n"
+ "FT VARIANT 276 276 A -> T (in dbSNP:rs10013280).\n"
+ "FT {ECO:0000269|Ref.1, ECO:0000269|Ref.2}.\n"
+ "FT /FTId=VAR_033181.\n"
+ "FT CONFLICT 239 239 L -> F (in Ref. 3; BAC04072).\n"
+ "FT {ECO:0000305}.\n"
+ "SQ SEQUENCE 475 AA; 55082 MW; 09BF33E42330C53A CRC64;\n"
+ " MTTLPPLPMT RPKLTALARQ KLPCSSRKIP RSQLIKEKDD IDHYLEVNFK GLSKEEVAAY\n"
+ " RNSYKKNICV DMLRDGYHKS FTELFALMER WDALREAARV RSLFWLQKPL EEQPDKLDYL\n"
+ " YHYLTRAEDA ERKESFEDVH NNLYALACYF NNSEDKWVRN HFYERCFKIA QLIKIDCGKK\n"
+ " EAEAHMHMGL LYEEDGQLLE AAEHYEAFHQ LTQGRIWKDE TGRSLNLLAC ESLLRTYRLL\n"
+ " SDKMLENKEY KQAIKILIKA SEIAKEGSDK KMEAEASYYL GLAHLAAEEY ETALTVLDTY\n"
+ " CKISTDLDDD LSLGRGYEAI AKVLQSQGEM TEAIKYLKKF VKIARNNFQS LDLVRASTML\n"
+ " GDIYNEKGYY NKASECFQQA FDTTVELMSM PLMDETKVHY GIAKAHQMML TVNNYIESAD\n"
+ " LTSLNYLLSW KESRGNIEPD PVTEEFRGST VEAVSQNSER LEELSRFPGD QKNET\n"
+ "//\n"
+ "ID 2NPD_NEUCR Reviewed; 378 AA.\n"
+ "AC Q01284; Q7RV78;\n"
+ "DT 01-NOV-1997, integrated into UniProtKB/Swiss-Prot.\n"
+ "DT 01-NOV-1996, sequence version 1.\n"
+ "DT 15-FEB-2017, entry version 106.\n"
+ "DE RecName: Full=Nitronate monooxygenase;\n"
+ "DE EC=1.13.12.16;\n"
+ "DE AltName: Full=2-nitropropane dioxygenase;\n"
+ "DE Short=2-NPD;\n"
+ "DE AltName: Full=Nitroalkane oxidase;\n"
+ "DE Flags: Precursor;\n"
+ "GN Name=ncd-2; ORFNames=G17A4.200, NCU03949;\n"
+ "OS Neurospora crassa (strain ATCC 24698 / 74-OR23-1A / CBS 708.71 / DSM\n"
+ "OS 1257 / FGSC 987).\n"
+ "OC Eukaryota; Fungi; Dikarya; Ascomycota; Pezizomycotina;\n"
+ "OC Sordariomycetes; Sordariomycetidae; Sordariales; Sordariaceae;\n"
+ "OC Neurospora.\n"
+ "OX NCBI_TaxID=367110;\n"
+ "RN [1]\n"
+ "RP NUCLEOTIDE SEQUENCE [MRNA], CATALYTIC ACTIVITY, SUBSTRATE SPECIFICITY,\n"
+ "RP COFACTOR, BIOPHYSICOCHEMICAL PROPERTIES, SUBUNIT, AND REACTION\n"
+ "RP MECHANISM.\n"
+ "RC STRAIN=ATCC 10337 / FGSC 1758 / NBRC 6067 / IMI 53239;\n"
+ "RX PubMed=9501443;\n"
+ "RA Gorlatova N., Tchorzewski M., Kurihara T., Soda K., Esaki N.;\n"
+ "RT \"Purification, characterization, and mechanism of a flavin\n"
+ "RT mononucleotide-dependent 2-nitropropane dioxygenase from Neurospora\n"
+ "RT crassa.\";\n"
+ "RL Appl. Environ. Microbiol. 64:1029-1033(1998).\n"
+ "RN [2]\n"
+ "RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA].\n"
+ "RC STRAIN=ATCC 24698 / 74-OR23-1A / CBS 708.71 / DSM 1257 / FGSC 987;\n"
+ "RX PubMed=12655011; DOI=10.1093/nar/gkg293;\n"
+ "RA Mannhaupt G., Montrone C., Haase D., Mewes H.-W., Aign V.,\n"
+ "RA Hoheisel J.D., Fartmann B., Nyakatura G., Kempken F., Maier J.,\n"
+ "RA Schulte U.;\n"
+ "RT \"What's in the genome of a filamentous fungus? Analysis of the\n"
+ "RT Neurospora genome sequence.\";\n"
+ "RL Nucleic Acids Res. 31:1944-1954(2003).\n"
+ "RN [3]\n"
+ "RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA].\n"
+ "RC STRAIN=ATCC 24698 / 74-OR23-1A / CBS 708.71 / DSM 1257 / FGSC 987;\n"
+ "RX PubMed=12712197; DOI=10.1038/nature01554;\n"
+ "RA Galagan J.E., Calvo S.E., Borkovich K.A., Selker E.U., Read N.D.,\n"
+ "RA Jaffe D.B., FitzHugh W., Ma L.-J., Smirnov S., Purcell S., Rehman B.,\n"
+ "RA Elkins T., Engels R., Wang S., Nielsen C.B., Butler J., Endrizzi M.,\n"
+ "RA Qui D., Ianakiev P., Bell-Pedersen D., Nelson M.A.,\n"
+ "RA Werner-Washburne M., Selitrennikoff C.P., Kinsey J.A., Braun E.L.,\n"
+ "RA Zelter A., Schulte U., Kothe G.O., Jedd G., Mewes H.-W., Staben C.,\n"
+ "RA Marcotte E., Greenberg D., Roy A., Foley K., Naylor J.,\n"
+ "RA Stange-Thomann N., Barrett R., Gnerre S., Kamal M., Kamvysselis M.,\n"
+ "RA Mauceli E.W., Bielke C., Rudd S., Frishman D., Krystofova S.,\n"
+ "RA Rasmussen C., Metzenberg R.L., Perkins D.D., Kroken S., Cogoni C.,\n"
+ "RA Macino G., Catcheside D.E.A., Li W., Pratt R.J., Osmani S.A.,\n"
+ "RA DeSouza C.P.C., Glass N.L., Orbach M.J., Berglund J.A., Voelker R.,\n"
+ "RA Yarden O., Plamann M., Seiler S., Dunlap J.C., Radford A., Aramayo R.,\n"
+ "RA Natvig D.O., Alex L.A., Mannhaupt G., Ebbole D.J., Freitag M.,\n"
+ "RA Paulsen I., Sachs M.S., Lander E.S., Nusbaum C., Birren B.W.;\n"
+ "RT \"The genome sequence of the filamentous fungus Neurospora crassa.\";\n"
+ "RL Nature 422:859-868(2003).\n"
+ "RN [4]\n"
+ "RP COFACTOR, SUBSTRATE SPECIFICITY, AND REACTION MECHANISM.\n"
+ "RX PubMed=19577534; DOI=10.1016/j.abb.2009.06.018;\n"
+ "RA Gadda G., Francis K.;\n"
+ "RT \"Nitronate monooxygenase, a model for anionic flavin semiquinone\n"
+ "RT intermediates in oxidative catalysis.\";\n"
+ "RL Arch. Biochem. Biophys. 493:53-61(2010).\n"
+ "CC -!- FUNCTION: Catalyzes the oxidation of alkyl nitronates to produce\n"
+ "CC the corresponding carbonyl compounds and nitrites. Anionic forms\n"
+ "CC of nitroalkanes are much better substrates than are neutral forms.\n"
+ "CC -!- CATALYTIC ACTIVITY: Ethylnitronate + O(2) = acetaldehyde + nitrite\n"
+ "CC + other products. {ECO:0000269|PubMed:9501443}.\n"
+ "CC -!- COFACTOR:\n"
+ "CC Name=FMN; Xref=ChEBI:CHEBI:58210;\n"
+ "CC Evidence={ECO:0000269|PubMed:19577534,\n"
+ "CC ECO:0000269|PubMed:9501443};\n"
+ "CC Note=Binds 1 FMN per subunit. {ECO:0000269|PubMed:19577534,\n"
+ "CC ECO:0000269|PubMed:9501443};\n"
+ "CC -!- BIOPHYSICOCHEMICAL PROPERTIES:\n"
+ "CC Kinetic parameters:\n"
+ "CC KM=3.1 mM for 2-nitropropane {ECO:0000269|PubMed:9501443};\n"
+ "CC KM=6 mM for nitroethane {ECO:0000269|PubMed:9501443};\n"
+ "CC KM=8.3 mM for 1-nitropropane {ECO:0000269|PubMed:9501443};\n"
+ "CC -!- SUBUNIT: Homodimer. {ECO:0000269|PubMed:9501443}.\n"
+ "CC -!- SIMILARITY: Belongs to the nitronate monooxygenase family.\n"
+ "CC {ECO:0000305}.\n"
+ "CC -----------------------------------------------------------------------\n"
+ "CC Copyrighted by the UniProt Consortium, see http://www.uniprot.org/terms\n"
+ "CC Distributed under the Creative Commons Attribution-NoDerivs License\n"
+ "CC -----------------------------------------------------------------------\n"
+ "DR EMBL; U22530; AAA64218.1; -; mRNA.\n"
+ "DR EMBL; BX908812; CAF06155.1; -; Genomic_DNA.\n"
+ "DR EMBL; CM002241; EAA28352.1; -; Genomic_DNA.\n"
+ "DR PIR; T46693; T46693.\n"
+ "DR RefSeq; XP_957588.1; XM_952495.3.\n"
+ "DR ProteinModelPortal; Q01284; -.\n"
+ "DR EnsemblFungi; EAA28352; EAA28352; NCU03949.\n"
+ "DR GeneID; 3873678; -.\n"
+ "DR KEGG; ncr:NCU03949; -.\n"
+ "DR EuPathDB; FungiDB:NCU03949; -.\n"
+ "DR HOGENOM; HOG000123285; -.\n"
+ "DR InParanoid; Q01284; -.\n"
+ "DR KO; K00459; -.\n"
+ "DR OMA; VDAGGHQ; -.\n"
+ "DR OrthoDB; EOG092651WG; -.\n"
+ "DR BioCyc; MetaCyc:MONOMER-302; -.\n"
+ "DR BRENDA; 1.13.11.32; 3627.\n"
+ "DR BRENDA; 1.13.12.16; 3627.\n"
+ "DR SABIO-RK; Q01284; -.\n"
+ "DR Proteomes; UP000001805; Chromosome 5, Linkage Group VI.\n"
+ "DR GO; GO:0005634; C:nucleus; IBA:GO_Central.\n"
+ "DR GO; GO:0018580; F:nitronate monooxygenase activity; IEA:UniProtKB-EC.\n"
+ "DR GO; GO:0003700; F:transcription factor activity, sequence-specific DNA binding; IBA:GO_Central.\n"
+ "DR GO; GO:0009410; P:response to xenobiotic stimulus; IBA:GO_Central.\n"
+ "DR Gene3D; 3.20.20.70; -; 1.\n"
+ "DR InterPro; IPR013785; Aldolase_TIM.\n"
+ "DR InterPro; IPR004136; NMO.\n"
+ "DR Pfam; PF03060; NMO; 1.\n"
+ "PE 1: Evidence at protein level;\n"
+ "KW Complete proteome; Flavoprotein; FMN; Monooxygenase; Oxidoreductase;\n"
+ "KW Reference proteome.\n"
+ "FT PROPEP 1 15 {ECO:0000255}.\n"
+ "FT /FTId=PRO_0000020575.\n"
+ "FT CHAIN 16 378 Nitronate monooxygenase.\n"
+ "FT /FTId=PRO_0000020576.\n"
+ "FT NP_BIND 37 39 FMN. {ECO:0000250}.\n"
+ "FT NP_BIND 229 231 FMN. {ECO:0000250}.\n"
+ "FT NP_BIND 252 253 FMN. {ECO:0000250}.\n"
+ "FT ACT_SITE 196 196 Proton acceptor. {ECO:0000255}.\n"
+ "FT BINDING 196 196 Substrate. {ECO:0000250}.\n"
+ "SQ SEQUENCE 378 AA; 39916 MW; E453EB43FD23E441 CRC64;\n"
+ " MHFPGHSSKK EESAQAALTK LNSWFPTTKN PVIISAPMYL IANGTLAAEV SKAGGIGFVA\n"
+ " GGSDFRPGSS HLTALSTELA SARSRLGLTD RPLTPLPGIG VGLILTHTIS VPYVTDTVLP\n"
+ " ILIEHSPQAV WLFANDPDFE ASSEPGAKGT AKQIIEALHA SGFVVFFQVG TVKDARKAAA\n"
+ " DGADVIVAQG IDAGGHQLAT GSGIVSLVPE VRDMLDREFK EREVVVVAAG GVADGRGVVG\n"
+ " ALGLGAEGVV LGTRFTVAVE ASTPEFRRKV ILETNDGGLN TVKSHFHDQI NCNTIWHNVY\n"
+ " DGRAVRNASY DDHAAGVPFE ENHKKFKEAA SSGDNSRAVT WSGTAVGLIK DQRPAGDIVR\n"
+ " ELREEAKERI KKIQAFAA\n"
+ "//\n"
+ "ID 5NTD_LUTLO Reviewed; 572 AA.\n"
+ "AC Q9XZ43;\n"
+ "DT 11-JAN-2001, integrated into UniProtKB/Swiss-Prot.\n"
+ "DT 01-NOV-1999, sequence version 1.\n"
+ "DT 12-APR-2017, entry version 83.\n"
+ "DE RecName: Full=Protein 5NUC;\n"
+ "DE Includes:\n"
+ "DE RecName: Full=UDP-sugar hydrolase;\n"
+ "DE EC=3.6.1.45;\n"
+ "DE AltName: Full=UDP-sugar diphosphatase;\n"
+ "DE AltName: Full=UDP-sugar pyrophosphatase;\n"
+ "DE Includes:\n"
+ "DE RecName: Full=5'-nucleotidase;\n"
+ "DE Short=5'-NT;\n"
+ "DE EC=3.1.3.5;\n"
+ "DE Flags: Precursor;\n"
+ "GN Name=5NUC;\n"
+ "OS Lutzomyia longipalpis (Sand fly).\n"
+ "OC Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta;\n"
+ "OC Pterygota; Neoptera; Holometabola; Diptera; Nematocera; Psychodoidea;\n"
+ "OC Psychodidae; Lutzomyia; Lutzomyia.\n"
+ "OX NCBI_TaxID=7200;\n"
+ "RN [1]\n"
+ "RP NUCLEOTIDE SEQUENCE [MRNA].\n"
+ "RC STRAIN=Jacobina; TISSUE=Salivary gland;\n"
+ "RX PubMed=10611354; DOI=10.1073/pnas.96.26.15155;\n"
+ "RA Charlab R., Valenzuela J.G., Rowton E.D., Ribeiro J.M.;\n"
+ "RT \"Toward an understanding of the biochemical and pharmacological\n"
+ "RT complexity of the saliva of a hematophagous sand fly Lutzomyia\n"
+ "RT longipalpis.\";\n"
+ "RL Proc. Natl. Acad. Sci. U.S.A. 96:15155-15160(1999).\n"
+ "RN [2]\n"
+ "RP CHARACTERIZATION.\n"
+ "RC TISSUE=Salivary gland;\n"
+ "RX PubMed=10727894; DOI=10.1016/S0965-1748(99)00123-X;\n"
+ "RA Ribeiro J.M.C., Rowton E.D., Charlab R.;\n"
+ "RT \"The salivary 5'-nucleotidase/phosphodiesterase of the hematophagus\n"
+ "RT sand fly, Lutzomyia longipalpis.\";\n"
+ "RL Insect Biochem. Mol. Biol. 30:279-285(2000).\n"
+ "RN [3]\n"
+ "RP ERRATUM.\n"
+ "RA Ribeiro J.M.C., Rowton E.D., Charlab R.;\n"
+ "RL Insect Biochem. Mol. Biol. 30:609-609(2000).\n"
+ "CC -!- FUNCTION: Degradation of external UDP-glucose to uridine\n"
+ "CC monophosphate and glucose-1-phosphate, which can then be used by\n"
+ "CC the cell. {ECO:0000250}.\n"
+ "CC -!- CATALYTIC ACTIVITY: UDP-sugar + H(2)O = UMP + alpha-D-aldose 1-\n"
+ "CC phosphate.\n"
+ "CC -!- CATALYTIC ACTIVITY: A 5'-ribonucleotide + H(2)O = a ribonucleoside\n"
+ "CC + phosphate.\n"
+ "CC -!- COFACTOR:\n"
+ "CC Name=Zn(2+); Xref=ChEBI:CHEBI:29105; Evidence={ECO:0000250};\n"
+ "CC -!- SIMILARITY: Belongs to the 5'-nucleotidase family. {ECO:0000305}.\n"
+ "DR EMBL; AF132510; AAD32190.1; -; mRNA.\n"
+ "DR ProteinModelPortal; Q9XZ43; -.\n"
+ "DR SMR; Q9XZ43; -.\n"
+ "DR Proteomes; UP000092461; Unassembled WGS sequence.\n"
+ "DR GO; GO:0008253; F:5'-nucleotidase activity; IEA:UniProtKB-EC.\n"
+ "DR GO; GO:0046872; F:metal ion binding; IEA:UniProtKB-KW.\n"
+ "DR GO; GO:0000166; F:nucleotide binding; IEA:UniProtKB-KW.\n"
+ "DR GO; GO:0008768; F:UDP-sugar diphosphatase activity; IEA:UniProtKB-EC.\n"
+ "DR GO; GO:0009166; P:nucleotide catabolic process; IEA:InterPro.\n"
+ "DR Gene3D; 3.60.21.10; -; 1.\n"
+ "DR Gene3D; 3.90.780.10; -; 1.\n"
+ "DR InterPro; IPR008334; 5'-Nucleotdase_C.\n"
+ "DR InterPro; IPR006146; 5'-Nucleotdase_CS.\n"
+ "DR InterPro; IPR006179; 5_nucleotidase/apyrase.\n"
+ "DR InterPro; IPR004843; Calcineurin-like_PHP_ApaH.\n"
+ "DR InterPro; IPR029052; Metallo-depent_PP-like.\n"
+ "DR PANTHER; PTHR11575; PTHR11575; 1.\n"
+ "DR Pfam; PF02872; 5_nucleotid_C; 1.\n"
+ "DR Pfam; PF00149; Metallophos; 1.\n"
+ "DR PRINTS; PR01607; APYRASEFAMLY.\n"
+ "DR SUPFAM; SSF55816; SSF55816; 1.\n"
+ "DR SUPFAM; SSF56300; SSF56300; 1.\n"
+ "DR PROSITE; PS00785; 5_NUCLEOTIDASE_1; 1.\n"
+ "DR PROSITE; PS00786; 5_NUCLEOTIDASE_2; 1.\n"
+ "PE 1: Evidence at protein level;\n"
+ "KW Complete proteome; Disulfide bond; Glycoprotein; Hydrolase;\n"
+ "KW Metal-binding; Nucleotide-binding; Reference proteome; Signal; Zinc.\n"
+ "FT SIGNAL 1 25 {ECO:0000255}.\n"
+ "FT CHAIN 26 572 Protein 5NUC.\n"
+ "FT /FTId=PRO_0000000023.\n"
+ "FT REGION 512 518 Substrate binding. {ECO:0000250}.\n"
+ "FT METAL 39 39 Zinc 1. {ECO:0000250}.\n"
+ "FT METAL 41 41 Zinc 1. {ECO:0000250}.\n"
+ "FT METAL 93 93 Zinc 1. {ECO:0000250}.\n"
+ "FT METAL 93 93 Zinc 2. {ECO:0000250}.\n"
+ "FT METAL 125 125 Zinc 2. {ECO:0000250}.\n"
+ "FT METAL 227 227 Zinc 2. {ECO:0000250}.\n"
+ "FT METAL 250 250 Zinc 2. {ECO:0000250}.\n"
+ "FT BINDING 361 361 Substrate. {ECO:0000250}.\n"
+ "FT BINDING 399 399 Substrate. {ECO:0000250}.\n"
+ "FT BINDING 404 404 Substrate. {ECO:0000250}.\n"
+ "FT BINDING 427 427 Substrate. {ECO:0000250}.\n"
+ "FT SITE 126 126 Transition state stabilizer.\n"
+ "FT {ECO:0000250}.\n"
+ "FT SITE 129 129 Transition state stabilizer.\n"
+ "FT {ECO:0000250}.\n"
+ "FT CARBOHYD 82 82 N-linked (GlcNAc...). {ECO:0000255}.\n"
+ "FT CARBOHYD 454 454 N-linked (GlcNAc...). {ECO:0000255}.\n"
+ "FT CARBOHYD 490 490 N-linked (GlcNAc...). {ECO:0000255}.\n"
+ "FT DISULFID 54 64 {ECO:0000250}.\n"
+ "FT DISULFID 360 365 {ECO:0000250}.\n"
+ "FT DISULFID 488 491 {ECO:0000250}.\n"
+ "**\n"
+ "** ################# INTERNAL SECTION ##################\n"
+ "**EV ECO:0000250; -; XXX; 01-JAN-1900.\n"
+ "**EV ECO:0000255; -; XXX; 01-JAN-1900.\n"
+ "**EV ECO:0000305; -; XXX; 01-JAN-1900.\n"
+ "**ZB JSG, 08-MAR-2006; RAB, 07-MAR-2017;\n"
+ "SQ SEQUENCE 572 AA; 63354 MW; 69A652338C04536D CRC64;\n"
+ " MLFFLNFFVL VFSIELALLT ASAAAEDGSY EIIILHTNDM HARFDQTNAG SNKCQEKDKI\n"
+ " ASKCYGGFAR VSTMVKKFRE ENGSSVLFLN AGDTYTGTPW FTLYKETIAT EMMNILRPDA\n"
+ " ASLGNHEFDK GVEGLVPFLN GVTFPILTAN LDTSQEPTMT NAKNLKRSMI FTVSGHRVGV\n"
+ " IGYLTPDTKF LSDVGKVNFI PEVEAINTEA QRLKKEENAE IIIVVGHSGL IKDREIAEKC\n"
+ " PLVDIIVGGH SHTFLYTGSQ PDREVPVDVY PVVVTQSSGK KVPIVQAYCF TKYLGYFKVT\n"
+ " INGKGNVVGW TGQPILLNNN IPQDQEVLTA LEKYRERVEN YGNRVIGVSR VILNGGHTEC\n"
+ " RFHECNMGNL ITDAFVYANV ISTPMSTNAW TDASVVLYQS GGIRAPIDPR TAAGSITRLE\n"
+ " LDNVLPFGNA LYVVKVPGNV LRKALEHSVH RYSNTSGWGE FPQVSGLKIR FNVNEEIGKR\n"
+ " VKSVKVLCSN CSQPEYQPLR NKKTYNVIMD SFMKDGGDGY SMFKPLKIIK TLPLGDIETV\n"
+ " EAYIEKMGPI FPAVEGRITV LGGLQKSDED WH\n"
+ "//\n"
+ "ID 14311_ARATH Reviewed; 252 AA.\n"
+ "AC Q9S9Z8; A0JQ87; F4HWN0; Q0WL19;\n"
+ "DT 05-DEC-2001, integrated into UniProtKB/Swiss-Prot.\n"
+ "DT 16-NOV-2011, sequence version 2.\n"
+ "DT 15-MAR-2017, entry version 98.\n"
+ "DE RecName: Full=14-3-3-like protein GF14 omicron;\n"
+ "DE AltName: Full=General regulatory factor 11;\n"
+ "GN Name=GRF11; OrderedLocusNames=At1g34760; ORFNames=F21H2.3;\n"
+ "OS Arabidopsis thaliana (Mouse-ear cress).\n"
+ "OC Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta;\n"
+ "OC Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae;\n"
+ "OC Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Camelineae;\n"
+ "OC Arabidopsis.\n"
+ "OX NCBI_TaxID=3702;\n"
+ "RN [1]\n"
+ "RP NUCLEOTIDE SEQUENCE [MRNA] (ISOFORM 2).\n"
+ "RC TISSUE=Leaf;\n"
+ "RA Alsterfjord M., Rosenquist M., Larsson C., Sommarin M.;\n"
+ "RT \"Novel 14-3-3 isoforms in Arabidopsis thaliana.\";\n"
+ "RL Submitted (NOV-2000) to the EMBL/GenBank/DDBJ databases.\n"
+ "RN [2]\n"
+ "RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA].\n"
+ "RC STRAIN=cv. Columbia;\n"
+ "RX PubMed=11130712; DOI=10.1038/35048500;\n"
+ "RA Theologis A., Ecker J.R., Palm C.J., Federspiel N.A., Kaul S.,\n"
+ "RA White O., Alonso J., Altafi H., Araujo R., Bowman C.L., Brooks S.Y.,\n"
+ "RA Buehler E., Chan A., Chao Q., Chen H., Cheuk R.F., Chin C.W.,\n"
+ "RA Chung M.K., Conn L., Conway A.B., Conway A.R., Creasy T.H., Dewar K.,\n"
+ "RA Dunn P., Etgu P., Feldblyum T.V., Feng J.-D., Fong B., Fujii C.Y.,\n"
+ "RA Gill J.E., Goldsmith A.D., Haas B., Hansen N.F., Hughes B., Huizar L.,\n"
+ "RA Hunter J.L., Jenkins J., Johnson-Hopson C., Khan S., Khaykin E.,\n"
+ "RA Kim C.J., Koo H.L., Kremenetskaia I., Kurtz D.B., Kwan A., Lam B.,\n"
+ "RA Langin-Hooper S., Lee A., Lee J.M., Lenz C.A., Li J.H., Li Y.-P.,\n"
+ "RA Lin X., Liu S.X., Liu Z.A., Luros J.S., Maiti R., Marziali A.,\n"
+ "RA Militscher J., Miranda M., Nguyen M., Nierman W.C., Osborne B.I.,\n"
+ "RA Pai G., Peterson J., Pham P.K., Rizzo M., Rooney T., Rowley D.,\n"
+ "RA Sakano H., Salzberg S.L., Schwartz J.R., Shinn P., Southwick A.M.,\n"
+ "RA Sun H., Tallon L.J., Tambunga G., Toriumi M.J., Town C.D.,\n"
+ "RA Utterback T., Van Aken S., Vaysberg M., Vysotskaia V.S., Walker M.,\n"
+ "RA Wu D., Yu G., Fraser C.M., Venter J.C., Davis R.W.;\n"
+ "RT \"Sequence and analysis of chromosome 1 of the plant Arabidopsis\n"
+ "RT thaliana.\";\n"
+ "RL Nature 408:816-820(2000).\n"
+ "RN [3]\n"
+ "RP GENOME REANNOTATION.\n"
+ "RC STRAIN=cv. Columbia;\n"
+ "RG The Arabidopsis Information Resource (TAIR);\n"
+ "RL Submitted (APR-2011) to the EMBL/GenBank/DDBJ databases.\n"
+ "RN [4]\n"
+ "RP NUCLEOTIDE SEQUENCE [LARGE SCALE MRNA] (ISOFORM 1).\n"
+ "RC STRAIN=cv. Columbia;\n"
+ "RA Totoki Y., Seki M., Ishida J., Nakajima M., Enju A., Kamiya A.,\n"
+ "RA Narusaka M., Shin-i T., Nakagawa M., Sakamoto N., Oishi K., Kohara Y.,\n"
+ "RA Kobayashi M., Toyoda A., Sakaki Y., Sakurai T., Iida K., Akiyama K.,\n"
+ "RA Satou M., Toyoda T., Konagaya A., Carninci P., Kawai J.,\n"
+ "RA Hayashizaki Y., Shinozaki K.;\n"
+ "RT \"Large-scale analysis of RIKEN Arabidopsis full-length (RAFL) cDNAs.\";\n"
+ "RL Submitted (JUL-2006) to the EMBL/GenBank/DDBJ databases.\n"
+ "RN [5]\n"
+ "RP NUCLEOTIDE SEQUENCE [LARGE SCALE MRNA] (ISOFORM 3).\n"
+ "RC STRAIN=cv. Columbia;\n"
+ "RA Bautista V.R., Kim C.J., Chen H., Quinitio C., Ecker J.R.;\n"
+ "RT \"Arabidopsis ORF Clones.\";\n"
+ "RL Submitted (NOV-2006) to the EMBL/GenBank/DDBJ databases.\n"
+ "CC -!- FUNCTION: Is associated with a DNA binding complex that binds to\n"
+ "CC the G box, a well-characterized cis-acting DNA regulatory element\n"
+ "CC found in plant genes. {ECO:0000250}.\n"
+ "CC -!- ALTERNATIVE PRODUCTS:\n"
+ "CC Event=Alternative splicing; Named isoforms=3;\n"
+ "CC Comment=A number of isoforms are produced. According to EST\n"
+ "CC sequences.;\n"
+ "CC Name=1;\n"
+ "CC IsoId=Q9S9Z8-1; Sequence=Displayed;\n"
+ "CC Name=2;\n"
+ "CC IsoId=Q9S9Z8-2; Sequence=VSP_042043, VSP_042044;\n"
+ "CC Name=3;\n"
+ "CC IsoId=Q9S9Z8-3; Sequence=VSP_042045;\n"
+ "CC -!- SIMILARITY: Belongs to the 14-3-3 family. {ECO:0000305}.\n"
+ "DR EMBL; AF323920; AAG47840.1; -; mRNA.\n"
+ "DR EMBL; AC007894; AAD46005.1; -; Genomic_DNA.\n"
+ "DR EMBL; CP002684; AEE31735.1; -; Genomic_DNA.\n"
+ "DR EMBL; CP002684; AEE31736.1; -; Genomic_DNA.\n"
+ "DR EMBL; AK230390; BAF02188.1; -; mRNA.\n"
+ "DR EMBL; BT029457; ABK59686.1; -; mRNA.\n"
+ "DR RefSeq; NP_001077649.1; NM_001084180.2. [Q9S9Z8-1]\n"
+ "DR RefSeq; NP_564451.2; NM_103196.4. [Q9S9Z8-3]\n"
+ "DR UniGene; At.11487; -.\n"
+ "DR ProteinModelPortal; Q9S9Z8; -.\n"
+ "DR SMR; Q9S9Z8; -.\n"
+ "DR BioGrid; 25612; 2.\n"
+ "DR IntAct; Q9S9Z8; 1.\n"
+ "DR STRING; 3702.AT1G34760.1; -.\n"
+ "DR iPTMnet; Q9S9Z8; -.\n"
+ "DR PaxDb; Q9S9Z8; -.\n"
+ "DR PRIDE; Q9S9Z8; -.\n"
+ "DR EnsemblPlants; AT1G34760.2; AT1G34760.2; AT1G34760. [Q9S9Z8-1]\n"
+ "DR GeneID; 840380; -.\n"
+ "DR Gramene; AT1G34760.2; AT1G34760.2; AT1G34760.\n"
+ "DR KEGG; ath:AT1G34760; -.\n"
+ "DR Araport; AT1G34760; -.\n"
+ "DR TAIR; locus:2008381; AT1G34760.\n"
+ "DR eggNOG; KOG0841; Eukaryota.\n"
+ "DR eggNOG; COG5040; LUCA.\n"
+ "DR HOGENOM; HOG000240379; -.\n"
+ "DR InParanoid; Q9S9Z8; -.\n"
+ "DR OMA; NEQAERY; -.\n"
+ "DR Reactome; R-ATH-1445148; Translocation of GLUT4 to the plasma membrane.\n"
+ "DR Reactome; R-ATH-3371453; Regulation of HSF1-mediated heat shock response.\n"
+ "DR Reactome; R-ATH-3371511; HSF1 activation.\n"
+ "DR PRO; PR:Q9S9Z8; -.\n"
+ "DR Proteomes; UP000006548; Chromosome 1.\n"
+ "DR Genevisible; Q9S9Z8; AT.\n"
+ "DR GO; GO:0051117; F:ATPase binding; IDA:TAIR.\n"
+ "DR Gene3D; 1.20.190.20; -; 1.\n"
+ "DR InterPro; IPR000308; 14-3-3.\n"
+ "DR InterPro; IPR023409; 14-3-3_CS.\n"
+ "DR InterPro; IPR023410; 14-3-3_domain.\n"
+ "DR PANTHER; PTHR18860; PTHR18860; 1.\n"
+ "DR Pfam; PF00244; 14-3-3; 1.\n"
+ "DR PIRSF; PIRSF000868; 14-3-3; 1.\n"
+ "DR PRINTS; PR00305; 1433ZETA.\n"
+ "DR SMART; SM00101; 14_3_3; 1.\n"
+ "DR SUPFAM; SSF48445; SSF48445; 1.\n"
+ "DR PROSITE; PS00796; 1433_1; 1.\n"
+ "DR PROSITE; PS00797; 1433_2; 1.\n"
+ "PE 2: Evidence at transcript level;\n"
+ "KW Alternative splicing; Complete proteome; Reference proteome.\n"
+ "FT CHAIN 1 252 14-3-3-like protein GF14 omicron.\n"
+ "FT /FTId=PRO_0000058673.\n"
+ "FT VAR_SEQ 241 241 E -> K (in isoform 2).\n"
+ "FT {ECO:0000303|Ref.1}.\n"
+ "FT /FTId=VSP_042043.\n"
+ "FT VAR_SEQ 242 252 Missing (in isoform 2).\n"
+ "FT {ECO:0000303|Ref.1}.\n"
+ "FT /FTId=VSP_042044.\n"
+ "FT VAR_SEQ 252 252 N -> VNKI (in isoform 3).\n"
+ "FT {ECO:0000303|Ref.5}.\n"
+ "FT /FTId=VSP_042045.\n"
+ "FT CONFLICT 128 128 D -> G (in Ref. 4; BAF02188).\n"
+ "FT {ECO:0000305}.\n"
+ "**\n"
+ "** ################# INTERNAL SECTION ##################\n"
+ "**DR Araport-CDS; AT1G34760.1; Araport11; -. [Q9S9Z8-3]\n"
+ "**DR Araport-CDS; AT1G34760.2; Araport11; -. [Q9S9Z8-1]\n"
+ "**EV ECO:0000250; -; XXX; 01-JAN-1900.\n"
+ "**EV ECO:0000303; Ref.1; XXX; 01-JAN-1900.\n"
+ "**EV ECO:0000303; Ref.5; XXX; 01-JAN-1900.\n"
+ "**EV ECO:0000305; -; XXX; 01-JAN-1900.\n"
+ "**YY According to Araport, the gene encoding this protein is alternatively spliced.\n"
+ "**ZB MIT, 11-OCT-2011;\n"
+ "SQ SEQUENCE 252 AA; 28781 MW; 852335FF39915461 CRC64;\n"
+ " MENERAKQVY LAKLNEQAER YDEMVEAMKK VAALDVELTI EERNLLSVGY KNVIGARRAS\n"
+ " WRILSSIEQK EESKGNEQNA KRIKDYRTKV EEELSKICYD ILAVIDKHLV PFATSGESTV\n"
+ " FYYKMKGDYF RYLAEFKSGA DREEAADLSL KAYEAATSSA STELSTTHPI RLGLALNFSV\n"
+ " FYYEILNSPE RACHLAKRAF DEAIAELDSL NEDSYKDSTL IMQLLRDNLT LWTSDLEEGG\n"
+ " EQSKGHNQQD EN\n"
+ "//\n"
+ "ID W0TYI6_HUMAN Unreviewed; 154 AA.\n"
+ "AC W0TYI6;\n"
+ "DT 19-MAR-2014, integrated into UniProtKB/TrEMBL.\n"
+ "DT 19-MAR-2014, sequence version 1.\n"
+ "DT 15-FEB-2017, entry version 23.\n"
+ "DE SubName: Full=H2B histone family, member M {ECO:0000313|EMBL:CAA97844.2};\n"
+ "GN Name=H2BFM {ECO:0000313|EMBL:CAA97844.2};\n"
+ "GN ORFNames=LL0XNC01-240C2.2-001 {ECO:0000313|EMBL:CAA97844.2};\n"
+ "OS Homo sapiens (Human).\n"
+ "OC Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;\n"
+ "OC Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini;\n"
+ "OC Catarrhini; Hominidae; Homo.\n"
+ "OX NCBI_TaxID=9606 {ECO:0000313|EMBL:CAA97844.2};\n"
+ "RN [1] {ECO:0000313|EMBL:CAA97844.2}\n"
+ "RP NUCLEOTIDE SEQUENCE.\n"
+ "RA Hunt A.;\n"
+ "RL Submitted (JAN-2009) to the EMBL/GenBank/DDBJ databases.\n"
+ "CC -!- FUNCTION: Core component of nucleosome. Nucleosomes wrap and\n"
+ "CC compact DNA into chromatin, limiting DNA accessibility to the\n"
+ "CC cellular machineries which require DNA as a template. Histones\n"
+ "CC thereby play a central role in transcription regulation, DNA\n"
+ "CC repair, DNA replication and chromosomal stability. DNA\n"
+ "CC accessibility is regulated via a complex set of post-translational\n"
+ "CC modifications of histones, also called histone code, and\n"
+ "CC nucleosome remodeling. {ECO:0000256|SAAS:SAAS00295035}.\n"
+ "CC -!- SUBUNIT: The nucleosome is a histone octamer containing two\n"
+ "CC molecules each of H2A, H2B, H3 and H4 assembled in one H3-H4\n"
+ "CC heterotetramer and two H2A-H2B heterodimers. The octamer wraps\n"
+ "CC approximately 147 bp of DNA. {ECO:0000256|SAAS:SAAS00565646}.\n"
+ "CC -!- SUBCELLULAR LOCATION: Chromosome {ECO:0000256|SAAS:SAAS00680978}.\n"
+ "CC -!- SUBCELLULAR LOCATION: Nucleus {ECO:0000256|SAAS:SAAS00593244}.\n"
+ "CC -!- SIMILARITY: Belongs to the histone H2B family.\n"
+ "CC {ECO:0000256|SAAS:SAAS00689654}.\n"
+ "CC -----------------------------------------------------------------------\n"
+ "CC Copyrighted by the UniProt Consortium, see http://www.uniprot.org/terms\n"
+ "CC Distributed under the Creative Commons Attribution-NoDerivs License\n"
+ "CC -----------------------------------------------------------------------\n"
+ "DR EMBL; Z73497; CAA97844.2; -; Genomic_DNA.\n"
+ "DR RefSeq; NP_001157888.1; NM_001164416.1.\n"
+ "DR RefSeq; XP_006724703.1; XM_006724640.2.\n"
+ "DR RefSeq; XP_011529224.1; XM_011530922.2.\n"
+ "DR UniGene; Hs.376474; -.\n"
+ "DR ProteinModelPortal; W0TYI6; -.\n"
+ "DR SMR; W0TYI6; -.\n"
+ "DR STRING; 9606.ENSP00000347119; -.\n"
+ "DR PaxDb; W0TYI6; -.\n"
+ "DR GeneID; 286436; -.\n"
+ "DR KEGG; hsa:286436; -.\n"
+ "DR UCSC; uc004els.2; human.\n"
+ "DR CTD; 286436; -.\n"
+ "DR eggNOG; KOG1744; Eukaryota.\n"
+ "DR eggNOG; ENOG4111NV5; LUCA.\n"
+ "DR KO; K11252; -.\n"
+ "DR OMA; PKEANSM; -.\n"
+ "DR OrthoDB; EOG091G0WZB; -.\n"
+ "DR GenomeRNAi; 286436; -.\n"
+ "DR ExpressionAtlas; W0TYI6; baseline and differential.\n"
+ "DR GO; GO:0000786; C:nucleosome; IEA:UniProtKB-KW.\n"
+ "DR GO; GO:0005634; C:nucleus; IEA:UniProtKB-SubCell.\n"
+ "DR GO; GO:0003677; F:DNA binding; IEA:UniProtKB-KW.\n"
+ "DR Gene3D; 1.10.20.10; -; 1.\n"
+ "DR InterPro; IPR009072; Histone-fold.\n"
+ "DR InterPro; IPR007125; Histone_H2A/H2B/H3.\n"
+ "DR InterPro; IPR000558; Histone_H2B.\n"
+ "DR PANTHER; PTHR23428; PTHR23428; 1.\n"
+ "DR Pfam; PF00125; Histone; 1.\n"
+ "DR PRINTS; PR00621; HISTONEH2B.\n"
+ "DR SMART; SM00427; H2B; 1.\n"
+ "DR SUPFAM; SSF47113; SSF47113; 1.\n"
+ "PE 3: Inferred from homology;\n"
+ "KW Chromosome {ECO:0000256|SAAS:SAAS00454795};\n"
+ "KW DNA-binding {ECO:0000256|SAAS:SAAS00454795};\n"
+ "KW Nucleosome core {ECO:0000256|SAAS:SAAS00454795};\n"
+ "KW Nucleus {ECO:0000256|SAAS:SAAS00486878}.\n"
+ "FT DOMAIN 1 123 Histone. {ECO:0000259|Pfam:PF00125}.\n"
+ "SQ SEQUENCE 154 AA; 17001 MW; 700C131F5F5818B0 CRC64;\n"
+ " MAAASAMAEA SSETTSEEGQ SIQEPKEANS TKAQKQKRRG CRGSRRRHAN RRGDSFGDSF\n"
+ " TPYFPRVLKQ VHQGLSLSQE AVSVMDSMIH DILDRIATEA GQLAHYTKRV TITSRDIQMA\n"
+ " VRLLLPGKMG KLAEAQGTNA ALRTSLCAIW QQRK\n"
+ "//";
final Pattern pattern = Pattern.compile(regex, Pattern.COMMENTS);
final Matcher matcher = pattern.matcher(string);
while (matcher.find()) {
System.out.println("Full match: " + matcher.group(0));
for (int i = 1; i <= matcher.groupCount(); i++) {
System.out.println("Group " + i + ": " + matcher.group(i));
}
}
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Java, please visit: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html