Regular Expressions 101

Save & Share

Flavor

  • PCRE2 (PHP >=7.3)
  • PCRE (PHP <7.3)
  • ECMAScript (JavaScript)
  • Python
  • Golang
  • Java 8
  • .NET 7.0 (C#)
  • Rust
  • Regex Flavor Guide

Function

  • Match
  • Substitution
  • List
  • Unit Tests

Tools

Sponsors
There are currently no sponsors. Become a sponsor today!
An explanation of your regex will be automatically generated as you type.
Detailed match information will be displayed here automatically.
  • All Tokens
  • Common Tokens
  • General Tokens
  • Anchors
  • Meta Sequences
  • Quantifiers
  • Group Constructs
  • Character Classes
  • Flags/Modifiers
  • Substitution
  • A single character of: a, b or c
    [abc]
  • A character except: a, b or c
    [^abc]
  • A character in the range: a-z
    [a-z]
  • A character not in the range: a-z
    [^a-z]
  • A character in the range: a-z or A-Z
    [a-zA-Z]
  • Any single character
    .
  • Alternate - match either a or b
    a|b
  • Any whitespace character
    \s
  • Any non-whitespace character
    \S
  • Any digit
    \d
  • Any non-digit
    \D
  • Any word character
    \w
  • Any non-word character
    \W
  • Non-capturing group
    (?:...)
  • Capturing group
    (...)
  • Zero or one of a
    a?
  • Zero or more of a
    a*
  • One or more of a
    a+
  • Exactly 3 of a
    a{3}
  • 3 or more of a
    a{3,}
  • Between 3 and 6 of a
    a{3,6}
  • Start of string
    ^
  • End of string
    $
  • A word boundary
    \b
  • Non-word boundary
    \B

Regular Expression
No Match

r"
"
gx

Test String

Code Generator

Generated Code

import java.util.regex.Matcher; import java.util.regex.Pattern; public class Example { public static void main(String[] args) { final String regex = "(?ms)ID\\s+.*?^AC\\s+(\\w+);.*?^OX\\s+NCBI_TaxID=(\\d+).*?(?#how to optionally capture group ^FT\\s+VAR_SEQ.*?\\/FTId=\\w+\\. ).*?^\\s{5}(.*?)//"; final String string = "ID TTC29_HUMAN Reviewed; 475 AA.\n" + "AC Q8NA56; A4GU95; Q9BXB6;\n" + "DT 10-JUL-2007, integrated into UniProtKB/Swiss-Prot.\n" + "DT 10-JUL-2007, sequence version 2.\n" + "DT 15-FEB-2017, entry version 114.\n" + "DE RecName: Full=Tetratricopeptide repeat protein 29;\n" + "DE Short=TPR repeat protein 29;\n" + "DE AltName: Full=Protein TBPP2A;\n" + "DE AltName: Full=Testis development protein NYD-SP14;\n" + "GN Name=TTC29;\n" + "OS Homo sapiens (Human).\n" + "OC Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;\n" + "OC Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini;\n" + "OC Catarrhini; Hominidae; Homo.\n" + "OX NCBI_TaxID=9606;\n" + "RN [1]\n" + "RP NUCLEOTIDE SEQUENCE [MRNA] (ISOFORM 1), AND VARIANTS TYR-140 AND\n" + "RP THR-276.\n" + "RC TISSUE=Testis;\n" + "RA Sha J.H.;\n" + "RT \"Cloning and identification of a novel gene related development gene\n" + "RT NYD-SP14.\";\n" + "RL Submitted (FEB-2001) to the EMBL/GenBank/DDBJ databases.\n" + "RN [2]\n" + "RP NUCLEOTIDE SEQUENCE [MRNA] (ISOFORM 1), AND VARIANTS TYR-140 AND\n" + "RP THR-276.\n" + "RC TISSUE=Mammary cancer;\n" + "RA Li J.M., Cheng J., Wang Q.;\n" + "RL Submitted (FEB-2007) to the EMBL/GenBank/DDBJ databases.\n" + "RN [3]\n" + "RP NUCLEOTIDE SEQUENCE [LARGE SCALE MRNA] (ISOFORM 2).\n" + "RC TISSUE=Testis;\n" + "RX PubMed=14702039; DOI=10.1038/ng1285;\n" + "RA Ota T., Suzuki Y., Nishikawa T., Otsuki T., Sugiyama T., Irie R.,\n" + "RA Wakamatsu A., Hayashi K., Sato H., Nagai K., Kimura K., Makita H.,\n" + "RA Sekine M., Obayashi M., Nishi T., Shibahara T., Tanaka T., Ishii S.,\n" + "RA Yamamoto J., Saito K., Kawai Y., Isono Y., Nakamura Y., Nagahari K.,\n" + "RA Murakami K., Yasuda T., Iwayanagi T., Wagatsuma M., Shiratori A.,\n" + "RA Sudo H., Hosoiri T., Kaku Y., Kodaira H., Kondo H., Sugawara M.,\n" + "RA Takahashi M., Kanda K., Yokoi T., Furuya T., Kikkawa E., Omura Y.,\n" + "RA Abe K., Kamihara K., Katsuta N., Sato K., Tanikawa M., Yamazaki M.,\n" + "RA Ninomiya K., Ishibashi T., Yamashita H., Murakawa K., Fujimori K.,\n" + "RA Tanai H., Kimata M., Watanabe M., Hiraoka S., Chiba Y., Ishida S.,\n" + "RA Ono Y., Takiguchi S., Watanabe S., Yosida M., Hotuta T., Kusano J.,\n" + "RA Kanehori K., Takahashi-Fujii A., Hara H., Tanase T.-O., Nomura Y.,\n" + "RA Togiya S., Komai F., Hara R., Takeuchi K., Arita M., Imose N.,\n" + "RA Musashino K., Yuuki H., Oshima A., Sasaki N., Aotsuka S.,\n" + "RA Yoshikawa Y., Matsunawa H., Ichihara T., Shiohata N., Sano S.,\n" + "RA Moriya S., Momiyama H., Satoh N., Takami S., Terashima Y., Suzuki O.,\n" + "RA Nakagawa S., Senoh A., Mizoguchi H., Goto Y., Shimizu F., Wakebe H.,\n" + "RA Hishigaki H., Watanabe T., Sugiyama A., Takemoto M., Kawakami B.,\n" + "RA Yamazaki M., Watanabe K., Kumagai A., Itakura S., Fukuzumi Y.,\n" + "RA Fujimori Y., Komiyama M., Tashiro H., Tanigami A., Fujiwara T.,\n" + "RA Ono T., Yamada K., Fujii Y., Ozaki K., Hirao M., Ohmori Y.,\n" + "RA Kawabata A., Hikiji T., Kobatake N., Inagaki H., Ikema Y., Okamoto S.,\n" + "RA Okitani R., Kawakami T., Noguchi S., Itoh T., Shigeta K., Senba T.,\n" + "RA Matsumura K., Nakajima Y., Mizuno T., Morinaga M., Sasaki M.,\n" + "RA Togashi T., Oyama M., Hata H., Watanabe M., Komatsu T.,\n" + "RA Mizushima-Sugano J., Satoh T., Shirai Y., Takahashi Y., Nakagawa K.,\n" + "RA Okumura K., Nagase T., Nomura N., Kikuchi H., Masuho Y., Yamashita R.,\n" + "RA Nakai K., Yada T., Nakamura Y., Ohara O., Isogai T., Sugano S.;\n" + "RT \"Complete sequencing and characterization of 21,243 full-length human\n" + "RT cDNAs.\";\n" + "RL Nat. Genet. 36:40-45(2004).\n" + "RN [4]\n" + "RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA].\n" + "RX PubMed=15815621; DOI=10.1038/nature03466;\n" + "RA Hillier L.W., Graves T.A., Fulton R.S., Fulton L.A., Pepin K.H.,\n" + "RA Minx P., Wagner-McPherson C., Layman D., Wylie K., Sekhon M.,\n" + "RA Becker M.C., Fewell G.A., Delehaunty K.D., Miner T.L., Nash W.E.,\n" + "RA Kremitzki C., Oddy L., Du H., Sun H., Bradshaw-Cordum H., Ali J.,\n" + "RA Carter J., Cordes M., Harris A., Isak A., van Brunt A., Nguyen C.,\n" + "RA Du F., Courtney L., Kalicki J., Ozersky P., Abbott S., Armstrong J.,\n" + "RA Belter E.A., Caruso L., Cedroni M., Cotton M., Davidson T., Desai A.,\n" + "RA Elliott G., Erb T., Fronick C., Gaige T., Haakenson W., Haglund K.,\n" + "RA Holmes A., Harkins R., Kim K., Kruchowski S.S., Strong C.M.,\n" + "RA Grewal N., Goyea E., Hou S., Levy A., Martinka S., Mead K.,\n" + "RA McLellan M.D., Meyer R., Randall-Maher J., Tomlinson C.,\n" + "RA Dauphin-Kohlberg S., Kozlowicz-Reilly A., Shah N.,\n" + "RA Swearengen-Shahid S., Snider J., Strong J.T., Thompson J., Yoakum M.,\n" + "RA Leonard S., Pearman C., Trani L., Radionenko M., Waligorski J.E.,\n" + "RA Wang C., Rock S.M., Tin-Wollam A.-M., Maupin R., Latreille P.,\n" + "RA Wendl M.C., Yang S.-P., Pohl C., Wallis J.W., Spieth J., Bieri T.A.,\n" + "RA Berkowicz N., Nelson J.O., Osborne J., Ding L., Meyer R., Sabo A.,\n" + "RA Shotland Y., Sinha P., Wohldmann P.E., Cook L.L., Hickenbotham M.T.,\n" + "RA Eldred J., Williams D., Jones T.A., She X., Ciccarelli F.D.,\n" + "RA Izaurralde E., Taylor J., Schmutz J., Myers R.M., Cox D.R., Huang X.,\n" + "RA McPherson J.D., Mardis E.R., Clifton S.W., Warren W.C.,\n" + "RA Chinwalla A.T., Eddy S.R., Marra M.A., Ovcharenko I., Furey T.S.,\n" + "RA Miller W., Eichler E.E., Bork P., Suyama M., Torrents D.,\n" + "RA Waterston R.H., Wilson R.K.;\n" + "RT \"Generation and annotation of the DNA sequences of human chromosomes 2\n" + "RT and 4.\";\n" + "RL Nature 434:724-731(2005).\n" + "CC -!- ALTERNATIVE PRODUCTS:\n" + "CC Event=Alternative splicing; Named isoforms=2;\n" + "CC Name=1;\n" + "CC IsoId=Q8NA56-1; Sequence=Displayed;\n" + "CC Name=2;\n" + "CC IsoId=Q8NA56-2; Sequence=VSP_026638;\n" + "CC Note=No experimental confirmation available.;\n" + "CC -!- SEQUENCE CAUTION:\n" + "CC Sequence=AAK29064.1; Type=Frameshift; Positions=467; Evidence={ECO:0000305};\n" + "CC -----------------------------------------------------------------------\n" + "CC Copyrighted by the UniProt Consortium, see http://www.uniprot.org/terms\n" + "CC Distributed under the Creative Commons Attribution-NoDerivs License\n" + "CC -----------------------------------------------------------------------\n" + "DR EMBL; AF345910; AAK29064.1; ALT_FRAME; mRNA.\n" + "DR EMBL; EF432564; ABO31099.1; -; mRNA.\n" + "DR EMBL; AK093145; BAC04072.1; -; mRNA.\n" + "DR EMBL; AC092435; -; NOT_ANNOTATED_CDS; Genomic_DNA.\n" + "DR EMBL; AC097497; -; NOT_ANNOTATED_CDS; Genomic_DNA.\n" + "DR EMBL; AC093887; -; NOT_ANNOTATED_CDS; Genomic_DNA.\n" + "DR CCDS; CCDS47141.1; -. [Q8NA56-1]\n" + "DR RefSeq; NP_001287690.1; NM_001300761.2.\n" + "DR RefSeq; NP_001304735.1; NM_001317806.1.\n" + "DR RefSeq; NP_114162.2; NM_031956.3. [Q8NA56-1]\n" + "DR UniGene; Hs.378893; -.\n" + "DR ProteinModelPortal; Q8NA56; -.\n" + "DR SMR; Q8NA56; -.\n" + "DR BioGrid; 123805; 16.\n" + "DR STRING; 9606.ENSP00000316740; -.\n" + "DR iPTMnet; Q8NA56; -.\n" + "DR PhosphoSitePlus; Q8NA56; -.\n" + "DR BioMuta; TTC29; -.\n" + "DR DMDM; 152112335; -.\n" + "DR PaxDb; Q8NA56; -.\n" + "DR PeptideAtlas; Q8NA56; -.\n" + "DR PRIDE; Q8NA56; -.\n" + "DR Ensembl; ENST00000325106; ENSP00000316740; ENSG00000137473. [Q8NA56-1]\n" + "DR GeneID; 83894; -.\n" + "DR KEGG; hsa:83894; -.\n" + "DR UCSC; uc003ikw.5; human. [Q8NA56-1]\n" + "DR CTD; 83894; -.\n" + "DR DisGeNET; 83894; -.\n" + "DR GeneCards; TTC29; -.\n" + "DR HGNC; HGNC:29936; TTC29.\n" + "DR HPA; HPA037006; -.\n" + "DR HPA; HPA061473; -.\n" + "DR neXtProt; NX_Q8NA56; -.\n" + "DR OpenTargets; ENSG00000137473; -.\n" + "DR PharmGKB; PA145147799; -.\n" + "DR eggNOG; ENOG410IFVK; Eukaryota.\n" + "DR eggNOG; ENOG410XPVA; LUCA.\n" + "DR GeneTree; ENSGT00390000008611; -.\n" + "DR HOGENOM; HOG000067965; -.\n" + "DR HOVERGEN; HBG108611; -.\n" + "DR InParanoid; Q8NA56; -.\n" + "DR PhylomeDB; Q8NA56; -.\n" + "DR TreeFam; TF328344; -.\n" + "DR ChiTaRS; TTC29; human.\n" + "DR GenomeRNAi; 83894; -.\n" + "DR PRO; PR:Q8NA56; -.\n" + "DR Proteomes; UP000005640; Chromosome 4.\n" + "DR Bgee; ENSG00000137473; -.\n" + "DR CleanEx; HS_TTC29; -.\n" + "DR ExpressionAtlas; Q8NA56; baseline and differential.\n" + "DR Genevisible; Q8NA56; HS.\n" + "DR Gene3D; 1.25.40.10; -; 2.\n" + "DR InterPro; IPR013026; TPR-contain_dom.\n" + "DR InterPro; IPR011990; TPR-like_helical_dom.\n" + "DR InterPro; IPR019734; TPR_repeat.\n" + "DR SMART; SM00028; TPR; 4.\n" + "DR SUPFAM; SSF48452; SSF48452; 1.\n" + "DR PROSITE; PS50293; TPR_REGION; 2.\n" + "PE 2: Evidence at transcript level;\n" + "KW Alternative splicing; Complete proteome; Polymorphism;\n" + "KW Reference proteome; Repeat; TPR repeat.\n" + "FT CHAIN 1 475 Tetratricopeptide repeat protein 29.\n" + "FT /FTId=PRO_0000294435.\n" + "FT REPEAT 182 215 TPR 1.\n" + "FT REPEAT 234 267 TPR 2.\n" + "FT REPEAT 274 307 TPR 3.\n" + "FT REPEAT 314 347 TPR 4.\n" + "FT REPEAT 354 387 TPR 5.\n" + "FT VAR_SEQ 1 1 M -> MIPMFTVTLEDSGTLWKSLHSSSESE (in\n" + "FT isoform 2).\n" + "FT {ECO:0000303|PubMed:14702039}.\n" + "FT /FTId=VSP_026638.\n" + "FT VARIANT 94 94 L -> P (in dbSNP:rs35123039).\n" + "FT /FTId=VAR_033179.\n" + "FT VARIANT 140 140 H -> Y (in dbSNP:rs17610219).\n" + "FT {ECO:0000269|Ref.1, ECO:0000269|Ref.2}.\n" + "FT /FTId=VAR_033180.\n" + "FT VARIANT 276 276 A -> T (in dbSNP:rs10013280).\n" + "FT {ECO:0000269|Ref.1, ECO:0000269|Ref.2}.\n" + "FT /FTId=VAR_033181.\n" + "FT CONFLICT 239 239 L -> F (in Ref. 3; BAC04072).\n" + "FT {ECO:0000305}.\n" + "SQ SEQUENCE 475 AA; 55082 MW; 09BF33E42330C53A CRC64;\n" + " MTTLPPLPMT RPKLTALARQ KLPCSSRKIP RSQLIKEKDD IDHYLEVNFK GLSKEEVAAY\n" + " RNSYKKNICV DMLRDGYHKS FTELFALMER WDALREAARV RSLFWLQKPL EEQPDKLDYL\n" + " YHYLTRAEDA ERKESFEDVH NNLYALACYF NNSEDKWVRN HFYERCFKIA QLIKIDCGKK\n" + " EAEAHMHMGL LYEEDGQLLE AAEHYEAFHQ LTQGRIWKDE TGRSLNLLAC ESLLRTYRLL\n" + " SDKMLENKEY KQAIKILIKA SEIAKEGSDK KMEAEASYYL GLAHLAAEEY ETALTVLDTY\n" + " CKISTDLDDD LSLGRGYEAI AKVLQSQGEM TEAIKYLKKF VKIARNNFQS LDLVRASTML\n" + " GDIYNEKGYY NKASECFQQA FDTTVELMSM PLMDETKVHY GIAKAHQMML TVNNYIESAD\n" + " LTSLNYLLSW KESRGNIEPD PVTEEFRGST VEAVSQNSER LEELSRFPGD QKNET\n" + "//\n" + "ID 2NPD_NEUCR Reviewed; 378 AA.\n" + "AC Q01284; Q7RV78;\n" + "DT 01-NOV-1997, integrated into UniProtKB/Swiss-Prot.\n" + "DT 01-NOV-1996, sequence version 1.\n" + "DT 15-FEB-2017, entry version 106.\n" + "DE RecName: Full=Nitronate monooxygenase;\n" + "DE EC=1.13.12.16;\n" + "DE AltName: Full=2-nitropropane dioxygenase;\n" + "DE Short=2-NPD;\n" + "DE AltName: Full=Nitroalkane oxidase;\n" + "DE Flags: Precursor;\n" + "GN Name=ncd-2; ORFNames=G17A4.200, NCU03949;\n" + "OS Neurospora crassa (strain ATCC 24698 / 74-OR23-1A / CBS 708.71 / DSM\n" + "OS 1257 / FGSC 987).\n" + "OC Eukaryota; Fungi; Dikarya; Ascomycota; Pezizomycotina;\n" + "OC Sordariomycetes; Sordariomycetidae; Sordariales; Sordariaceae;\n" + "OC Neurospora.\n" + "OX NCBI_TaxID=367110;\n" + "RN [1]\n" + "RP NUCLEOTIDE SEQUENCE [MRNA], CATALYTIC ACTIVITY, SUBSTRATE SPECIFICITY,\n" + "RP COFACTOR, BIOPHYSICOCHEMICAL PROPERTIES, SUBUNIT, AND REACTION\n" + "RP MECHANISM.\n" + "RC STRAIN=ATCC 10337 / FGSC 1758 / NBRC 6067 / IMI 53239;\n" + "RX PubMed=9501443;\n" + "RA Gorlatova N., Tchorzewski M., Kurihara T., Soda K., Esaki N.;\n" + "RT \"Purification, characterization, and mechanism of a flavin\n" + "RT mononucleotide-dependent 2-nitropropane dioxygenase from Neurospora\n" + "RT crassa.\";\n" + "RL Appl. Environ. Microbiol. 64:1029-1033(1998).\n" + "RN [2]\n" + "RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA].\n" + "RC STRAIN=ATCC 24698 / 74-OR23-1A / CBS 708.71 / DSM 1257 / FGSC 987;\n" + "RX PubMed=12655011; DOI=10.1093/nar/gkg293;\n" + "RA Mannhaupt G., Montrone C., Haase D., Mewes H.-W., Aign V.,\n" + "RA Hoheisel J.D., Fartmann B., Nyakatura G., Kempken F., Maier J.,\n" + "RA Schulte U.;\n" + "RT \"What's in the genome of a filamentous fungus? Analysis of the\n" + "RT Neurospora genome sequence.\";\n" + "RL Nucleic Acids Res. 31:1944-1954(2003).\n" + "RN [3]\n" + "RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA].\n" + "RC STRAIN=ATCC 24698 / 74-OR23-1A / CBS 708.71 / DSM 1257 / FGSC 987;\n" + "RX PubMed=12712197; DOI=10.1038/nature01554;\n" + "RA Galagan J.E., Calvo S.E., Borkovich K.A., Selker E.U., Read N.D.,\n" + "RA Jaffe D.B., FitzHugh W., Ma L.-J., Smirnov S., Purcell S., Rehman B.,\n" + "RA Elkins T., Engels R., Wang S., Nielsen C.B., Butler J., Endrizzi M.,\n" + "RA Qui D., Ianakiev P., Bell-Pedersen D., Nelson M.A.,\n" + "RA Werner-Washburne M., Selitrennikoff C.P., Kinsey J.A., Braun E.L.,\n" + "RA Zelter A., Schulte U., Kothe G.O., Jedd G., Mewes H.-W., Staben C.,\n" + "RA Marcotte E., Greenberg D., Roy A., Foley K., Naylor J.,\n" + "RA Stange-Thomann N., Barrett R., Gnerre S., Kamal M., Kamvysselis M.,\n" + "RA Mauceli E.W., Bielke C., Rudd S., Frishman D., Krystofova S.,\n" + "RA Rasmussen C., Metzenberg R.L., Perkins D.D., Kroken S., Cogoni C.,\n" + "RA Macino G., Catcheside D.E.A., Li W., Pratt R.J., Osmani S.A.,\n" + "RA DeSouza C.P.C., Glass N.L., Orbach M.J., Berglund J.A., Voelker R.,\n" + "RA Yarden O., Plamann M., Seiler S., Dunlap J.C., Radford A., Aramayo R.,\n" + "RA Natvig D.O., Alex L.A., Mannhaupt G., Ebbole D.J., Freitag M.,\n" + "RA Paulsen I., Sachs M.S., Lander E.S., Nusbaum C., Birren B.W.;\n" + "RT \"The genome sequence of the filamentous fungus Neurospora crassa.\";\n" + "RL Nature 422:859-868(2003).\n" + "RN [4]\n" + "RP COFACTOR, SUBSTRATE SPECIFICITY, AND REACTION MECHANISM.\n" + "RX PubMed=19577534; DOI=10.1016/j.abb.2009.06.018;\n" + "RA Gadda G., Francis K.;\n" + "RT \"Nitronate monooxygenase, a model for anionic flavin semiquinone\n" + "RT intermediates in oxidative catalysis.\";\n" + "RL Arch. Biochem. Biophys. 493:53-61(2010).\n" + "CC -!- FUNCTION: Catalyzes the oxidation of alkyl nitronates to produce\n" + "CC the corresponding carbonyl compounds and nitrites. Anionic forms\n" + "CC of nitroalkanes are much better substrates than are neutral forms.\n" + "CC -!- CATALYTIC ACTIVITY: Ethylnitronate + O(2) = acetaldehyde + nitrite\n" + "CC + other products. {ECO:0000269|PubMed:9501443}.\n" + "CC -!- COFACTOR:\n" + "CC Name=FMN; Xref=ChEBI:CHEBI:58210;\n" + "CC Evidence={ECO:0000269|PubMed:19577534,\n" + "CC ECO:0000269|PubMed:9501443};\n" + "CC Note=Binds 1 FMN per subunit. {ECO:0000269|PubMed:19577534,\n" + "CC ECO:0000269|PubMed:9501443};\n" + "CC -!- BIOPHYSICOCHEMICAL PROPERTIES:\n" + "CC Kinetic parameters:\n" + "CC KM=3.1 mM for 2-nitropropane {ECO:0000269|PubMed:9501443};\n" + "CC KM=6 mM for nitroethane {ECO:0000269|PubMed:9501443};\n" + "CC KM=8.3 mM for 1-nitropropane {ECO:0000269|PubMed:9501443};\n" + "CC -!- SUBUNIT: Homodimer. {ECO:0000269|PubMed:9501443}.\n" + "CC -!- SIMILARITY: Belongs to the nitronate monooxygenase family.\n" + "CC {ECO:0000305}.\n" + "CC -----------------------------------------------------------------------\n" + "CC Copyrighted by the UniProt Consortium, see http://www.uniprot.org/terms\n" + "CC Distributed under the Creative Commons Attribution-NoDerivs License\n" + "CC -----------------------------------------------------------------------\n" + "DR EMBL; U22530; AAA64218.1; -; mRNA.\n" + "DR EMBL; BX908812; CAF06155.1; -; Genomic_DNA.\n" + "DR EMBL; CM002241; EAA28352.1; -; Genomic_DNA.\n" + "DR PIR; T46693; T46693.\n" + "DR RefSeq; XP_957588.1; XM_952495.3.\n" + "DR ProteinModelPortal; Q01284; -.\n" + "DR EnsemblFungi; EAA28352; EAA28352; NCU03949.\n" + "DR GeneID; 3873678; -.\n" + "DR KEGG; ncr:NCU03949; -.\n" + "DR EuPathDB; FungiDB:NCU03949; -.\n" + "DR HOGENOM; HOG000123285; -.\n" + "DR InParanoid; Q01284; -.\n" + "DR KO; K00459; -.\n" + "DR OMA; VDAGGHQ; -.\n" + "DR OrthoDB; EOG092651WG; -.\n" + "DR BioCyc; MetaCyc:MONOMER-302; -.\n" + "DR BRENDA; 1.13.11.32; 3627.\n" + "DR BRENDA; 1.13.12.16; 3627.\n" + "DR SABIO-RK; Q01284; -.\n" + "DR Proteomes; UP000001805; Chromosome 5, Linkage Group VI.\n" + "DR GO; GO:0005634; C:nucleus; IBA:GO_Central.\n" + "DR GO; GO:0018580; F:nitronate monooxygenase activity; IEA:UniProtKB-EC.\n" + "DR GO; GO:0003700; F:transcription factor activity, sequence-specific DNA binding; IBA:GO_Central.\n" + "DR GO; GO:0009410; P:response to xenobiotic stimulus; IBA:GO_Central.\n" + "DR Gene3D; 3.20.20.70; -; 1.\n" + "DR InterPro; IPR013785; Aldolase_TIM.\n" + "DR InterPro; IPR004136; NMO.\n" + "DR Pfam; PF03060; NMO; 1.\n" + "PE 1: Evidence at protein level;\n" + "KW Complete proteome; Flavoprotein; FMN; Monooxygenase; Oxidoreductase;\n" + "KW Reference proteome.\n" + "FT PROPEP 1 15 {ECO:0000255}.\n" + "FT /FTId=PRO_0000020575.\n" + "FT CHAIN 16 378 Nitronate monooxygenase.\n" + "FT /FTId=PRO_0000020576.\n" + "FT NP_BIND 37 39 FMN. {ECO:0000250}.\n" + "FT NP_BIND 229 231 FMN. {ECO:0000250}.\n" + "FT NP_BIND 252 253 FMN. {ECO:0000250}.\n" + "FT ACT_SITE 196 196 Proton acceptor. {ECO:0000255}.\n" + "FT BINDING 196 196 Substrate. {ECO:0000250}.\n" + "SQ SEQUENCE 378 AA; 39916 MW; E453EB43FD23E441 CRC64;\n" + " MHFPGHSSKK EESAQAALTK LNSWFPTTKN PVIISAPMYL IANGTLAAEV SKAGGIGFVA\n" + " GGSDFRPGSS HLTALSTELA SARSRLGLTD RPLTPLPGIG VGLILTHTIS VPYVTDTVLP\n" + " ILIEHSPQAV WLFANDPDFE ASSEPGAKGT AKQIIEALHA SGFVVFFQVG TVKDARKAAA\n" + " DGADVIVAQG IDAGGHQLAT GSGIVSLVPE VRDMLDREFK EREVVVVAAG GVADGRGVVG\n" + " ALGLGAEGVV LGTRFTVAVE ASTPEFRRKV ILETNDGGLN TVKSHFHDQI NCNTIWHNVY\n" + " DGRAVRNASY DDHAAGVPFE ENHKKFKEAA SSGDNSRAVT WSGTAVGLIK DQRPAGDIVR\n" + " ELREEAKERI KKIQAFAA\n" + "//\n" + "ID 5NTD_LUTLO Reviewed; 572 AA.\n" + "AC Q9XZ43;\n" + "DT 11-JAN-2001, integrated into UniProtKB/Swiss-Prot.\n" + "DT 01-NOV-1999, sequence version 1.\n" + "DT 12-APR-2017, entry version 83.\n" + "DE RecName: Full=Protein 5NUC;\n" + "DE Includes:\n" + "DE RecName: Full=UDP-sugar hydrolase;\n" + "DE EC=3.6.1.45;\n" + "DE AltName: Full=UDP-sugar diphosphatase;\n" + "DE AltName: Full=UDP-sugar pyrophosphatase;\n" + "DE Includes:\n" + "DE RecName: Full=5'-nucleotidase;\n" + "DE Short=5'-NT;\n" + "DE EC=3.1.3.5;\n" + "DE Flags: Precursor;\n" + "GN Name=5NUC;\n" + "OS Lutzomyia longipalpis (Sand fly).\n" + "OC Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta;\n" + "OC Pterygota; Neoptera; Holometabola; Diptera; Nematocera; Psychodoidea;\n" + "OC Psychodidae; Lutzomyia; Lutzomyia.\n" + "OX NCBI_TaxID=7200;\n" + "RN [1]\n" + "RP NUCLEOTIDE SEQUENCE [MRNA].\n" + "RC STRAIN=Jacobina; TISSUE=Salivary gland;\n" + "RX PubMed=10611354; DOI=10.1073/pnas.96.26.15155;\n" + "RA Charlab R., Valenzuela J.G., Rowton E.D., Ribeiro J.M.;\n" + "RT \"Toward an understanding of the biochemical and pharmacological\n" + "RT complexity of the saliva of a hematophagous sand fly Lutzomyia\n" + "RT longipalpis.\";\n" + "RL Proc. Natl. Acad. Sci. U.S.A. 96:15155-15160(1999).\n" + "RN [2]\n" + "RP CHARACTERIZATION.\n" + "RC TISSUE=Salivary gland;\n" + "RX PubMed=10727894; DOI=10.1016/S0965-1748(99)00123-X;\n" + "RA Ribeiro J.M.C., Rowton E.D., Charlab R.;\n" + "RT \"The salivary 5'-nucleotidase/phosphodiesterase of the hematophagus\n" + "RT sand fly, Lutzomyia longipalpis.\";\n" + "RL Insect Biochem. Mol. Biol. 30:279-285(2000).\n" + "RN [3]\n" + "RP ERRATUM.\n" + "RA Ribeiro J.M.C., Rowton E.D., Charlab R.;\n" + "RL Insect Biochem. Mol. Biol. 30:609-609(2000).\n" + "CC -!- FUNCTION: Degradation of external UDP-glucose to uridine\n" + "CC monophosphate and glucose-1-phosphate, which can then be used by\n" + "CC the cell. {ECO:0000250}.\n" + "CC -!- CATALYTIC ACTIVITY: UDP-sugar + H(2)O = UMP + alpha-D-aldose 1-\n" + "CC phosphate.\n" + "CC -!- CATALYTIC ACTIVITY: A 5'-ribonucleotide + H(2)O = a ribonucleoside\n" + "CC + phosphate.\n" + "CC -!- COFACTOR:\n" + "CC Name=Zn(2+); Xref=ChEBI:CHEBI:29105; Evidence={ECO:0000250};\n" + "CC -!- SIMILARITY: Belongs to the 5'-nucleotidase family. {ECO:0000305}.\n" + "DR EMBL; AF132510; AAD32190.1; -; mRNA.\n" + "DR ProteinModelPortal; Q9XZ43; -.\n" + "DR SMR; Q9XZ43; -.\n" + "DR Proteomes; UP000092461; Unassembled WGS sequence.\n" + "DR GO; GO:0008253; F:5'-nucleotidase activity; IEA:UniProtKB-EC.\n" + "DR GO; GO:0046872; F:metal ion binding; IEA:UniProtKB-KW.\n" + "DR GO; GO:0000166; F:nucleotide binding; IEA:UniProtKB-KW.\n" + "DR GO; GO:0008768; F:UDP-sugar diphosphatase activity; IEA:UniProtKB-EC.\n" + "DR GO; GO:0009166; P:nucleotide catabolic process; IEA:InterPro.\n" + "DR Gene3D; 3.60.21.10; -; 1.\n" + "DR Gene3D; 3.90.780.10; -; 1.\n" + "DR InterPro; IPR008334; 5'-Nucleotdase_C.\n" + "DR InterPro; IPR006146; 5'-Nucleotdase_CS.\n" + "DR InterPro; IPR006179; 5_nucleotidase/apyrase.\n" + "DR InterPro; IPR004843; Calcineurin-like_PHP_ApaH.\n" + "DR InterPro; IPR029052; Metallo-depent_PP-like.\n" + "DR PANTHER; PTHR11575; PTHR11575; 1.\n" + "DR Pfam; PF02872; 5_nucleotid_C; 1.\n" + "DR Pfam; PF00149; Metallophos; 1.\n" + "DR PRINTS; PR01607; APYRASEFAMLY.\n" + "DR SUPFAM; SSF55816; SSF55816; 1.\n" + "DR SUPFAM; SSF56300; SSF56300; 1.\n" + "DR PROSITE; PS00785; 5_NUCLEOTIDASE_1; 1.\n" + "DR PROSITE; PS00786; 5_NUCLEOTIDASE_2; 1.\n" + "PE 1: Evidence at protein level;\n" + "KW Complete proteome; Disulfide bond; Glycoprotein; Hydrolase;\n" + "KW Metal-binding; Nucleotide-binding; Reference proteome; Signal; Zinc.\n" + "FT SIGNAL 1 25 {ECO:0000255}.\n" + "FT CHAIN 26 572 Protein 5NUC.\n" + "FT /FTId=PRO_0000000023.\n" + "FT REGION 512 518 Substrate binding. {ECO:0000250}.\n" + "FT METAL 39 39 Zinc 1. {ECO:0000250}.\n" + "FT METAL 41 41 Zinc 1. {ECO:0000250}.\n" + "FT METAL 93 93 Zinc 1. {ECO:0000250}.\n" + "FT METAL 93 93 Zinc 2. {ECO:0000250}.\n" + "FT METAL 125 125 Zinc 2. {ECO:0000250}.\n" + "FT METAL 227 227 Zinc 2. {ECO:0000250}.\n" + "FT METAL 250 250 Zinc 2. {ECO:0000250}.\n" + "FT BINDING 361 361 Substrate. {ECO:0000250}.\n" + "FT BINDING 399 399 Substrate. {ECO:0000250}.\n" + "FT BINDING 404 404 Substrate. {ECO:0000250}.\n" + "FT BINDING 427 427 Substrate. {ECO:0000250}.\n" + "FT SITE 126 126 Transition state stabilizer.\n" + "FT {ECO:0000250}.\n" + "FT SITE 129 129 Transition state stabilizer.\n" + "FT {ECO:0000250}.\n" + "FT CARBOHYD 82 82 N-linked (GlcNAc...). {ECO:0000255}.\n" + "FT CARBOHYD 454 454 N-linked (GlcNAc...). {ECO:0000255}.\n" + "FT CARBOHYD 490 490 N-linked (GlcNAc...). {ECO:0000255}.\n" + "FT DISULFID 54 64 {ECO:0000250}.\n" + "FT DISULFID 360 365 {ECO:0000250}.\n" + "FT DISULFID 488 491 {ECO:0000250}.\n" + "**\n" + "** ################# INTERNAL SECTION ##################\n" + "**EV ECO:0000250; -; XXX; 01-JAN-1900.\n" + "**EV ECO:0000255; -; XXX; 01-JAN-1900.\n" + "**EV ECO:0000305; -; XXX; 01-JAN-1900.\n" + "**ZB JSG, 08-MAR-2006; RAB, 07-MAR-2017;\n" + "SQ SEQUENCE 572 AA; 63354 MW; 69A652338C04536D CRC64;\n" + " MLFFLNFFVL VFSIELALLT ASAAAEDGSY EIIILHTNDM HARFDQTNAG SNKCQEKDKI\n" + " ASKCYGGFAR VSTMVKKFRE ENGSSVLFLN AGDTYTGTPW FTLYKETIAT EMMNILRPDA\n" + " ASLGNHEFDK GVEGLVPFLN GVTFPILTAN LDTSQEPTMT NAKNLKRSMI FTVSGHRVGV\n" + " IGYLTPDTKF LSDVGKVNFI PEVEAINTEA QRLKKEENAE IIIVVGHSGL IKDREIAEKC\n" + " PLVDIIVGGH SHTFLYTGSQ PDREVPVDVY PVVVTQSSGK KVPIVQAYCF TKYLGYFKVT\n" + " INGKGNVVGW TGQPILLNNN IPQDQEVLTA LEKYRERVEN YGNRVIGVSR VILNGGHTEC\n" + " RFHECNMGNL ITDAFVYANV ISTPMSTNAW TDASVVLYQS GGIRAPIDPR TAAGSITRLE\n" + " LDNVLPFGNA LYVVKVPGNV LRKALEHSVH RYSNTSGWGE FPQVSGLKIR FNVNEEIGKR\n" + " VKSVKVLCSN CSQPEYQPLR NKKTYNVIMD SFMKDGGDGY SMFKPLKIIK TLPLGDIETV\n" + " EAYIEKMGPI FPAVEGRITV LGGLQKSDED WH\n" + "//\n" + "ID 14311_ARATH Reviewed; 252 AA.\n" + "AC Q9S9Z8; A0JQ87; F4HWN0; Q0WL19;\n" + "DT 05-DEC-2001, integrated into UniProtKB/Swiss-Prot.\n" + "DT 16-NOV-2011, sequence version 2.\n" + "DT 15-MAR-2017, entry version 98.\n" + "DE RecName: Full=14-3-3-like protein GF14 omicron;\n" + "DE AltName: Full=General regulatory factor 11;\n" + "GN Name=GRF11; OrderedLocusNames=At1g34760; ORFNames=F21H2.3;\n" + "OS Arabidopsis thaliana (Mouse-ear cress).\n" + "OC Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta;\n" + "OC Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae;\n" + "OC Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Camelineae;\n" + "OC Arabidopsis.\n" + "OX NCBI_TaxID=3702;\n" + "RN [1]\n" + "RP NUCLEOTIDE SEQUENCE [MRNA] (ISOFORM 2).\n" + "RC TISSUE=Leaf;\n" + "RA Alsterfjord M., Rosenquist M., Larsson C., Sommarin M.;\n" + "RT \"Novel 14-3-3 isoforms in Arabidopsis thaliana.\";\n" + "RL Submitted (NOV-2000) to the EMBL/GenBank/DDBJ databases.\n" + "RN [2]\n" + "RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA].\n" + "RC STRAIN=cv. Columbia;\n" + "RX PubMed=11130712; DOI=10.1038/35048500;\n" + "RA Theologis A., Ecker J.R., Palm C.J., Federspiel N.A., Kaul S.,\n" + "RA White O., Alonso J., Altafi H., Araujo R., Bowman C.L., Brooks S.Y.,\n" + "RA Buehler E., Chan A., Chao Q., Chen H., Cheuk R.F., Chin C.W.,\n" + "RA Chung M.K., Conn L., Conway A.B., Conway A.R., Creasy T.H., Dewar K.,\n" + "RA Dunn P., Etgu P., Feldblyum T.V., Feng J.-D., Fong B., Fujii C.Y.,\n" + "RA Gill J.E., Goldsmith A.D., Haas B., Hansen N.F., Hughes B., Huizar L.,\n" + "RA Hunter J.L., Jenkins J., Johnson-Hopson C., Khan S., Khaykin E.,\n" + "RA Kim C.J., Koo H.L., Kremenetskaia I., Kurtz D.B., Kwan A., Lam B.,\n" + "RA Langin-Hooper S., Lee A., Lee J.M., Lenz C.A., Li J.H., Li Y.-P.,\n" + "RA Lin X., Liu S.X., Liu Z.A., Luros J.S., Maiti R., Marziali A.,\n" + "RA Militscher J., Miranda M., Nguyen M., Nierman W.C., Osborne B.I.,\n" + "RA Pai G., Peterson J., Pham P.K., Rizzo M., Rooney T., Rowley D.,\n" + "RA Sakano H., Salzberg S.L., Schwartz J.R., Shinn P., Southwick A.M.,\n" + "RA Sun H., Tallon L.J., Tambunga G., Toriumi M.J., Town C.D.,\n" + "RA Utterback T., Van Aken S., Vaysberg M., Vysotskaia V.S., Walker M.,\n" + "RA Wu D., Yu G., Fraser C.M., Venter J.C., Davis R.W.;\n" + "RT \"Sequence and analysis of chromosome 1 of the plant Arabidopsis\n" + "RT thaliana.\";\n" + "RL Nature 408:816-820(2000).\n" + "RN [3]\n" + "RP GENOME REANNOTATION.\n" + "RC STRAIN=cv. Columbia;\n" + "RG The Arabidopsis Information Resource (TAIR);\n" + "RL Submitted (APR-2011) to the EMBL/GenBank/DDBJ databases.\n" + "RN [4]\n" + "RP NUCLEOTIDE SEQUENCE [LARGE SCALE MRNA] (ISOFORM 1).\n" + "RC STRAIN=cv. Columbia;\n" + "RA Totoki Y., Seki M., Ishida J., Nakajima M., Enju A., Kamiya A.,\n" + "RA Narusaka M., Shin-i T., Nakagawa M., Sakamoto N., Oishi K., Kohara Y.,\n" + "RA Kobayashi M., Toyoda A., Sakaki Y., Sakurai T., Iida K., Akiyama K.,\n" + "RA Satou M., Toyoda T., Konagaya A., Carninci P., Kawai J.,\n" + "RA Hayashizaki Y., Shinozaki K.;\n" + "RT \"Large-scale analysis of RIKEN Arabidopsis full-length (RAFL) cDNAs.\";\n" + "RL Submitted (JUL-2006) to the EMBL/GenBank/DDBJ databases.\n" + "RN [5]\n" + "RP NUCLEOTIDE SEQUENCE [LARGE SCALE MRNA] (ISOFORM 3).\n" + "RC STRAIN=cv. Columbia;\n" + "RA Bautista V.R., Kim C.J., Chen H., Quinitio C., Ecker J.R.;\n" + "RT \"Arabidopsis ORF Clones.\";\n" + "RL Submitted (NOV-2006) to the EMBL/GenBank/DDBJ databases.\n" + "CC -!- FUNCTION: Is associated with a DNA binding complex that binds to\n" + "CC the G box, a well-characterized cis-acting DNA regulatory element\n" + "CC found in plant genes. {ECO:0000250}.\n" + "CC -!- ALTERNATIVE PRODUCTS:\n" + "CC Event=Alternative splicing; Named isoforms=3;\n" + "CC Comment=A number of isoforms are produced. According to EST\n" + "CC sequences.;\n" + "CC Name=1;\n" + "CC IsoId=Q9S9Z8-1; Sequence=Displayed;\n" + "CC Name=2;\n" + "CC IsoId=Q9S9Z8-2; Sequence=VSP_042043, VSP_042044;\n" + "CC Name=3;\n" + "CC IsoId=Q9S9Z8-3; Sequence=VSP_042045;\n" + "CC -!- SIMILARITY: Belongs to the 14-3-3 family. {ECO:0000305}.\n" + "DR EMBL; AF323920; AAG47840.1; -; mRNA.\n" + "DR EMBL; AC007894; AAD46005.1; -; Genomic_DNA.\n" + "DR EMBL; CP002684; AEE31735.1; -; Genomic_DNA.\n" + "DR EMBL; CP002684; AEE31736.1; -; Genomic_DNA.\n" + "DR EMBL; AK230390; BAF02188.1; -; mRNA.\n" + "DR EMBL; BT029457; ABK59686.1; -; mRNA.\n" + "DR RefSeq; NP_001077649.1; NM_001084180.2. [Q9S9Z8-1]\n" + "DR RefSeq; NP_564451.2; NM_103196.4. [Q9S9Z8-3]\n" + "DR UniGene; At.11487; -.\n" + "DR ProteinModelPortal; Q9S9Z8; -.\n" + "DR SMR; Q9S9Z8; -.\n" + "DR BioGrid; 25612; 2.\n" + "DR IntAct; Q9S9Z8; 1.\n" + "DR STRING; 3702.AT1G34760.1; -.\n" + "DR iPTMnet; Q9S9Z8; -.\n" + "DR PaxDb; Q9S9Z8; -.\n" + "DR PRIDE; Q9S9Z8; -.\n" + "DR EnsemblPlants; AT1G34760.2; AT1G34760.2; AT1G34760. [Q9S9Z8-1]\n" + "DR GeneID; 840380; -.\n" + "DR Gramene; AT1G34760.2; AT1G34760.2; AT1G34760.\n" + "DR KEGG; ath:AT1G34760; -.\n" + "DR Araport; AT1G34760; -.\n" + "DR TAIR; locus:2008381; AT1G34760.\n" + "DR eggNOG; KOG0841; Eukaryota.\n" + "DR eggNOG; COG5040; LUCA.\n" + "DR HOGENOM; HOG000240379; -.\n" + "DR InParanoid; Q9S9Z8; -.\n" + "DR OMA; NEQAERY; -.\n" + "DR Reactome; R-ATH-1445148; Translocation of GLUT4 to the plasma membrane.\n" + "DR Reactome; R-ATH-3371453; Regulation of HSF1-mediated heat shock response.\n" + "DR Reactome; R-ATH-3371511; HSF1 activation.\n" + "DR PRO; PR:Q9S9Z8; -.\n" + "DR Proteomes; UP000006548; Chromosome 1.\n" + "DR Genevisible; Q9S9Z8; AT.\n" + "DR GO; GO:0051117; F:ATPase binding; IDA:TAIR.\n" + "DR Gene3D; 1.20.190.20; -; 1.\n" + "DR InterPro; IPR000308; 14-3-3.\n" + "DR InterPro; IPR023409; 14-3-3_CS.\n" + "DR InterPro; IPR023410; 14-3-3_domain.\n" + "DR PANTHER; PTHR18860; PTHR18860; 1.\n" + "DR Pfam; PF00244; 14-3-3; 1.\n" + "DR PIRSF; PIRSF000868; 14-3-3; 1.\n" + "DR PRINTS; PR00305; 1433ZETA.\n" + "DR SMART; SM00101; 14_3_3; 1.\n" + "DR SUPFAM; SSF48445; SSF48445; 1.\n" + "DR PROSITE; PS00796; 1433_1; 1.\n" + "DR PROSITE; PS00797; 1433_2; 1.\n" + "PE 2: Evidence at transcript level;\n" + "KW Alternative splicing; Complete proteome; Reference proteome.\n" + "FT CHAIN 1 252 14-3-3-like protein GF14 omicron.\n" + "FT /FTId=PRO_0000058673.\n" + "FT VAR_SEQ 241 241 E -> K (in isoform 2).\n" + "FT {ECO:0000303|Ref.1}.\n" + "FT /FTId=VSP_042043.\n" + "FT VAR_SEQ 242 252 Missing (in isoform 2).\n" + "FT {ECO:0000303|Ref.1}.\n" + "FT /FTId=VSP_042044.\n" + "FT VAR_SEQ 252 252 N -> VNKI (in isoform 3).\n" + "FT {ECO:0000303|Ref.5}.\n" + "FT /FTId=VSP_042045.\n" + "FT CONFLICT 128 128 D -> G (in Ref. 4; BAF02188).\n" + "FT {ECO:0000305}.\n" + "**\n" + "** ################# INTERNAL SECTION ##################\n" + "**DR Araport-CDS; AT1G34760.1; Araport11; -. [Q9S9Z8-3]\n" + "**DR Araport-CDS; AT1G34760.2; Araport11; -. [Q9S9Z8-1]\n" + "**EV ECO:0000250; -; XXX; 01-JAN-1900.\n" + "**EV ECO:0000303; Ref.1; XXX; 01-JAN-1900.\n" + "**EV ECO:0000303; Ref.5; XXX; 01-JAN-1900.\n" + "**EV ECO:0000305; -; XXX; 01-JAN-1900.\n" + "**YY According to Araport, the gene encoding this protein is alternatively spliced.\n" + "**ZB MIT, 11-OCT-2011;\n" + "SQ SEQUENCE 252 AA; 28781 MW; 852335FF39915461 CRC64;\n" + " MENERAKQVY LAKLNEQAER YDEMVEAMKK VAALDVELTI EERNLLSVGY KNVIGARRAS\n" + " WRILSSIEQK EESKGNEQNA KRIKDYRTKV EEELSKICYD ILAVIDKHLV PFATSGESTV\n" + " FYYKMKGDYF RYLAEFKSGA DREEAADLSL KAYEAATSSA STELSTTHPI RLGLALNFSV\n" + " FYYEILNSPE RACHLAKRAF DEAIAELDSL NEDSYKDSTL IMQLLRDNLT LWTSDLEEGG\n" + " EQSKGHNQQD EN\n" + "//\n" + "ID W0TYI6_HUMAN Unreviewed; 154 AA.\n" + "AC W0TYI6;\n" + "DT 19-MAR-2014, integrated into UniProtKB/TrEMBL.\n" + "DT 19-MAR-2014, sequence version 1.\n" + "DT 15-FEB-2017, entry version 23.\n" + "DE SubName: Full=H2B histone family, member M {ECO:0000313|EMBL:CAA97844.2};\n" + "GN Name=H2BFM {ECO:0000313|EMBL:CAA97844.2};\n" + "GN ORFNames=LL0XNC01-240C2.2-001 {ECO:0000313|EMBL:CAA97844.2};\n" + "OS Homo sapiens (Human).\n" + "OC Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;\n" + "OC Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini;\n" + "OC Catarrhini; Hominidae; Homo.\n" + "OX NCBI_TaxID=9606 {ECO:0000313|EMBL:CAA97844.2};\n" + "RN [1] {ECO:0000313|EMBL:CAA97844.2}\n" + "RP NUCLEOTIDE SEQUENCE.\n" + "RA Hunt A.;\n" + "RL Submitted (JAN-2009) to the EMBL/GenBank/DDBJ databases.\n" + "CC -!- FUNCTION: Core component of nucleosome. Nucleosomes wrap and\n" + "CC compact DNA into chromatin, limiting DNA accessibility to the\n" + "CC cellular machineries which require DNA as a template. Histones\n" + "CC thereby play a central role in transcription regulation, DNA\n" + "CC repair, DNA replication and chromosomal stability. DNA\n" + "CC accessibility is regulated via a complex set of post-translational\n" + "CC modifications of histones, also called histone code, and\n" + "CC nucleosome remodeling. {ECO:0000256|SAAS:SAAS00295035}.\n" + "CC -!- SUBUNIT: The nucleosome is a histone octamer containing two\n" + "CC molecules each of H2A, H2B, H3 and H4 assembled in one H3-H4\n" + "CC heterotetramer and two H2A-H2B heterodimers. The octamer wraps\n" + "CC approximately 147 bp of DNA. {ECO:0000256|SAAS:SAAS00565646}.\n" + "CC -!- SUBCELLULAR LOCATION: Chromosome {ECO:0000256|SAAS:SAAS00680978}.\n" + "CC -!- SUBCELLULAR LOCATION: Nucleus {ECO:0000256|SAAS:SAAS00593244}.\n" + "CC -!- SIMILARITY: Belongs to the histone H2B family.\n" + "CC {ECO:0000256|SAAS:SAAS00689654}.\n" + "CC -----------------------------------------------------------------------\n" + "CC Copyrighted by the UniProt Consortium, see http://www.uniprot.org/terms\n" + "CC Distributed under the Creative Commons Attribution-NoDerivs License\n" + "CC -----------------------------------------------------------------------\n" + "DR EMBL; Z73497; CAA97844.2; -; Genomic_DNA.\n" + "DR RefSeq; NP_001157888.1; NM_001164416.1.\n" + "DR RefSeq; XP_006724703.1; XM_006724640.2.\n" + "DR RefSeq; XP_011529224.1; XM_011530922.2.\n" + "DR UniGene; Hs.376474; -.\n" + "DR ProteinModelPortal; W0TYI6; -.\n" + "DR SMR; W0TYI6; -.\n" + "DR STRING; 9606.ENSP00000347119; -.\n" + "DR PaxDb; W0TYI6; -.\n" + "DR GeneID; 286436; -.\n" + "DR KEGG; hsa:286436; -.\n" + "DR UCSC; uc004els.2; human.\n" + "DR CTD; 286436; -.\n" + "DR eggNOG; KOG1744; Eukaryota.\n" + "DR eggNOG; ENOG4111NV5; LUCA.\n" + "DR KO; K11252; -.\n" + "DR OMA; PKEANSM; -.\n" + "DR OrthoDB; EOG091G0WZB; -.\n" + "DR GenomeRNAi; 286436; -.\n" + "DR ExpressionAtlas; W0TYI6; baseline and differential.\n" + "DR GO; GO:0000786; C:nucleosome; IEA:UniProtKB-KW.\n" + "DR GO; GO:0005634; C:nucleus; IEA:UniProtKB-SubCell.\n" + "DR GO; GO:0003677; F:DNA binding; IEA:UniProtKB-KW.\n" + "DR Gene3D; 1.10.20.10; -; 1.\n" + "DR InterPro; IPR009072; Histone-fold.\n" + "DR InterPro; IPR007125; Histone_H2A/H2B/H3.\n" + "DR InterPro; IPR000558; Histone_H2B.\n" + "DR PANTHER; PTHR23428; PTHR23428; 1.\n" + "DR Pfam; PF00125; Histone; 1.\n" + "DR PRINTS; PR00621; HISTONEH2B.\n" + "DR SMART; SM00427; H2B; 1.\n" + "DR SUPFAM; SSF47113; SSF47113; 1.\n" + "PE 3: Inferred from homology;\n" + "KW Chromosome {ECO:0000256|SAAS:SAAS00454795};\n" + "KW DNA-binding {ECO:0000256|SAAS:SAAS00454795};\n" + "KW Nucleosome core {ECO:0000256|SAAS:SAAS00454795};\n" + "KW Nucleus {ECO:0000256|SAAS:SAAS00486878}.\n" + "FT DOMAIN 1 123 Histone. {ECO:0000259|Pfam:PF00125}.\n" + "SQ SEQUENCE 154 AA; 17001 MW; 700C131F5F5818B0 CRC64;\n" + " MAAASAMAEA SSETTSEEGQ SIQEPKEANS TKAQKQKRRG CRGSRRRHAN RRGDSFGDSF\n" + " TPYFPRVLKQ VHQGLSLSQE AVSVMDSMIH DILDRIATEA GQLAHYTKRV TITSRDIQMA\n" + " VRLLLPGKMG KLAEAQGTNA ALRTSLCAIW QQRK\n" + "//"; final Pattern pattern = Pattern.compile(regex, Pattern.COMMENTS); final Matcher matcher = pattern.matcher(string); while (matcher.find()) { System.out.println("Full match: " + matcher.group(0)); for (int i = 1; i <= matcher.groupCount(); i++) { System.out.println("Group " + i + ": " + matcher.group(i)); } } } }

Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Java, please visit: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html