#include <StringConstants.au3> ; to declare the Constants of StringRegExp
#include <Array.au3> ; UDF needed for _ArrayDisplay and _ArrayConcatenate
Local $sRegex = "(?x)(?ms)ID\s+.*?^AC\s+(\w+);.*?^OX\s+NCBI_TaxID=(\d+).*?(?#how to optionally capture group ^FT\s+VAR_SEQ.*?\/FTId=\w+\. ).*?^\s{5}(.*?)//"
Local $sString = "ID TTC29_HUMAN Reviewed; 475 AA." & @CRLF & _
"AC Q8NA56; A4GU95; Q9BXB6;" & @CRLF & _
"DT 10-JUL-2007, integrated into UniProtKB/Swiss-Prot." & @CRLF & _
"DT 10-JUL-2007, sequence version 2." & @CRLF & _
"DT 15-FEB-2017, entry version 114." & @CRLF & _
"DE RecName: Full=Tetratricopeptide repeat protein 29;" & @CRLF & _
"DE Short=TPR repeat protein 29;" & @CRLF & _
"DE AltName: Full=Protein TBPP2A;" & @CRLF & _
"DE AltName: Full=Testis development protein NYD-SP14;" & @CRLF & _
"GN Name=TTC29;" & @CRLF & _
"OS Homo sapiens (Human)." & @CRLF & _
"OC Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;" & @CRLF & _
"OC Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini;" & @CRLF & _
"OC Catarrhini; Hominidae; Homo." & @CRLF & _
"OX NCBI_TaxID=9606;" & @CRLF & _
"RN [1]" & @CRLF & _
"RP NUCLEOTIDE SEQUENCE [MRNA] (ISOFORM 1), AND VARIANTS TYR-140 AND" & @CRLF & _
"RP THR-276." & @CRLF & _
"RC TISSUE=Testis;" & @CRLF & _
"RA Sha J.H.;" & @CRLF & _
"RT "Cloning and identification of a novel gene related development gene" & @CRLF & _
"RT NYD-SP14.";" & @CRLF & _
"RL Submitted (FEB-2001) to the EMBL/GenBank/DDBJ databases." & @CRLF & _
"RN [2]" & @CRLF & _
"RP NUCLEOTIDE SEQUENCE [MRNA] (ISOFORM 1), AND VARIANTS TYR-140 AND" & @CRLF & _
"RP THR-276." & @CRLF & _
"RC TISSUE=Mammary cancer;" & @CRLF & _
"RA Li J.M., Cheng J., Wang Q.;" & @CRLF & _
"RL Submitted (FEB-2007) to the EMBL/GenBank/DDBJ databases." & @CRLF & _
"RN [3]" & @CRLF & _
"RP NUCLEOTIDE SEQUENCE [LARGE SCALE MRNA] (ISOFORM 2)." & @CRLF & _
"RC TISSUE=Testis;" & @CRLF & _
"RX PubMed=14702039; DOI=10.1038/ng1285;" & @CRLF & _
"RA Ota T., Suzuki Y., Nishikawa T., Otsuki T., Sugiyama T., Irie R.," & @CRLF & _
"RA Wakamatsu A., Hayashi K., Sato H., Nagai K., Kimura K., Makita H.," & @CRLF & _
"RA Sekine M., Obayashi M., Nishi T., Shibahara T., Tanaka T., Ishii S.," & @CRLF & _
"RA Yamamoto J., Saito K., Kawai Y., Isono Y., Nakamura Y., Nagahari K.," & @CRLF & _
"RA Murakami K., Yasuda T., Iwayanagi T., Wagatsuma M., Shiratori A.," & @CRLF & _
"RA Sudo H., Hosoiri T., Kaku Y., Kodaira H., Kondo H., Sugawara M.," & @CRLF & _
"RA Takahashi M., Kanda K., Yokoi T., Furuya T., Kikkawa E., Omura Y.," & @CRLF & _
"RA Abe K., Kamihara K., Katsuta N., Sato K., Tanikawa M., Yamazaki M.," & @CRLF & _
"RA Ninomiya K., Ishibashi T., Yamashita H., Murakawa K., Fujimori K.," & @CRLF & _
"RA Tanai H., Kimata M., Watanabe M., Hiraoka S., Chiba Y., Ishida S.," & @CRLF & _
"RA Ono Y., Takiguchi S., Watanabe S., Yosida M., Hotuta T., Kusano J.," & @CRLF & _
"RA Kanehori K., Takahashi-Fujii A., Hara H., Tanase T.-O., Nomura Y.," & @CRLF & _
"RA Togiya S., Komai F., Hara R., Takeuchi K., Arita M., Imose N.," & @CRLF & _
"RA Musashino K., Yuuki H., Oshima A., Sasaki N., Aotsuka S.," & @CRLF & _
"RA Yoshikawa Y., Matsunawa H., Ichihara T., Shiohata N., Sano S.," & @CRLF & _
"RA Moriya S., Momiyama H., Satoh N., Takami S., Terashima Y., Suzuki O.," & @CRLF & _
"RA Nakagawa S., Senoh A., Mizoguchi H., Goto Y., Shimizu F., Wakebe H.," & @CRLF & _
"RA Hishigaki H., Watanabe T., Sugiyama A., Takemoto M., Kawakami B.," & @CRLF & _
"RA Yamazaki M., Watanabe K., Kumagai A., Itakura S., Fukuzumi Y.," & @CRLF & _
"RA Fujimori Y., Komiyama M., Tashiro H., Tanigami A., Fujiwara T.," & @CRLF & _
"RA Ono T., Yamada K., Fujii Y., Ozaki K., Hirao M., Ohmori Y.," & @CRLF & _
"RA Kawabata A., Hikiji T., Kobatake N., Inagaki H., Ikema Y., Okamoto S.," & @CRLF & _
"RA Okitani R., Kawakami T., Noguchi S., Itoh T., Shigeta K., Senba T.," & @CRLF & _
"RA Matsumura K., Nakajima Y., Mizuno T., Morinaga M., Sasaki M.," & @CRLF & _
"RA Togashi T., Oyama M., Hata H., Watanabe M., Komatsu T.," & @CRLF & _
"RA Mizushima-Sugano J., Satoh T., Shirai Y., Takahashi Y., Nakagawa K.," & @CRLF & _
"RA Okumura K., Nagase T., Nomura N., Kikuchi H., Masuho Y., Yamashita R.," & @CRLF & _
"RA Nakai K., Yada T., Nakamura Y., Ohara O., Isogai T., Sugano S.;" & @CRLF & _
"RT "Complete sequencing and characterization of 21,243 full-length human" & @CRLF & _
"RT cDNAs.";" & @CRLF & _
"RL Nat. Genet. 36:40-45(2004)." & @CRLF & _
"RN [4]" & @CRLF & _
"RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA]." & @CRLF & _
"RX PubMed=15815621; DOI=10.1038/nature03466;" & @CRLF & _
"RA Hillier L.W., Graves T.A., Fulton R.S., Fulton L.A., Pepin K.H.," & @CRLF & _
"RA Minx P., Wagner-McPherson C., Layman D., Wylie K., Sekhon M.," & @CRLF & _
"RA Becker M.C., Fewell G.A., Delehaunty K.D., Miner T.L., Nash W.E.," & @CRLF & _
"RA Kremitzki C., Oddy L., Du H., Sun H., Bradshaw-Cordum H., Ali J.," & @CRLF & _
"RA Carter J., Cordes M., Harris A., Isak A., van Brunt A., Nguyen C.," & @CRLF & _
"RA Du F., Courtney L., Kalicki J., Ozersky P., Abbott S., Armstrong J.," & @CRLF & _
"RA Belter E.A., Caruso L., Cedroni M., Cotton M., Davidson T., Desai A.," & @CRLF & _
"RA Elliott G., Erb T., Fronick C., Gaige T., Haakenson W., Haglund K.," & @CRLF & _
"RA Holmes A., Harkins R., Kim K., Kruchowski S.S., Strong C.M.," & @CRLF & _
"RA Grewal N., Goyea E., Hou S., Levy A., Martinka S., Mead K.," & @CRLF & _
"RA McLellan M.D., Meyer R., Randall-Maher J., Tomlinson C.," & @CRLF & _
"RA Dauphin-Kohlberg S., Kozlowicz-Reilly A., Shah N.," & @CRLF & _
"RA Swearengen-Shahid S., Snider J., Strong J.T., Thompson J., Yoakum M.," & @CRLF & _
"RA Leonard S., Pearman C., Trani L., Radionenko M., Waligorski J.E.," & @CRLF & _
"RA Wang C., Rock S.M., Tin-Wollam A.-M., Maupin R., Latreille P.," & @CRLF & _
"RA Wendl M.C., Yang S.-P., Pohl C., Wallis J.W., Spieth J., Bieri T.A.," & @CRLF & _
"RA Berkowicz N., Nelson J.O., Osborne J., Ding L., Meyer R., Sabo A.," & @CRLF & _
"RA Shotland Y., Sinha P., Wohldmann P.E., Cook L.L., Hickenbotham M.T.," & @CRLF & _
"RA Eldred J., Williams D., Jones T.A., She X., Ciccarelli F.D.," & @CRLF & _
"RA Izaurralde E., Taylor J., Schmutz J., Myers R.M., Cox D.R., Huang X.," & @CRLF & _
"RA McPherson J.D., Mardis E.R., Clifton S.W., Warren W.C.," & @CRLF & _
"RA Chinwalla A.T., Eddy S.R., Marra M.A., Ovcharenko I., Furey T.S.," & @CRLF & _
"RA Miller W., Eichler E.E., Bork P., Suyama M., Torrents D.," & @CRLF & _
"RA Waterston R.H., Wilson R.K.;" & @CRLF & _
"RT "Generation and annotation of the DNA sequences of human chromosomes 2" & @CRLF & _
"RT and 4.";" & @CRLF & _
"RL Nature 434:724-731(2005)." & @CRLF & _
"CC -!- ALTERNATIVE PRODUCTS:" & @CRLF & _
"CC Event=Alternative splicing; Named isoforms=2;" & @CRLF & _
"CC Name=1;" & @CRLF & _
"CC IsoId=Q8NA56-1; Sequence=Displayed;" & @CRLF & _
"CC Name=2;" & @CRLF & _
"CC IsoId=Q8NA56-2; Sequence=VSP_026638;" & @CRLF & _
"CC Note=No experimental confirmation available.;" & @CRLF & _
"CC -!- SEQUENCE CAUTION:" & @CRLF & _
"CC Sequence=AAK29064.1; Type=Frameshift; Positions=467; Evidence={ECO:0000305};" & @CRLF & _
"CC -----------------------------------------------------------------------" & @CRLF & _
"CC Copyrighted by the UniProt Consortium, see http://www.uniprot.org/terms" & @CRLF & _
"CC Distributed under the Creative Commons Attribution-NoDerivs License" & @CRLF & _
"CC -----------------------------------------------------------------------" & @CRLF & _
"DR EMBL; AF345910; AAK29064.1; ALT_FRAME; mRNA." & @CRLF & _
"DR EMBL; EF432564; ABO31099.1; -; mRNA." & @CRLF & _
"DR EMBL; AK093145; BAC04072.1; -; mRNA." & @CRLF & _
"DR EMBL; AC092435; -; NOT_ANNOTATED_CDS; Genomic_DNA." & @CRLF & _
"DR EMBL; AC097497; -; NOT_ANNOTATED_CDS; Genomic_DNA." & @CRLF & _
"DR EMBL; AC093887; -; NOT_ANNOTATED_CDS; Genomic_DNA." & @CRLF & _
"DR CCDS; CCDS47141.1; -. [Q8NA56-1]" & @CRLF & _
"DR RefSeq; NP_001287690.1; NM_001300761.2." & @CRLF & _
"DR RefSeq; NP_001304735.1; NM_001317806.1." & @CRLF & _
"DR RefSeq; NP_114162.2; NM_031956.3. [Q8NA56-1]" & @CRLF & _
"DR UniGene; Hs.378893; -." & @CRLF & _
"DR ProteinModelPortal; Q8NA56; -." & @CRLF & _
"DR SMR; Q8NA56; -." & @CRLF & _
"DR BioGrid; 123805; 16." & @CRLF & _
"DR STRING; 9606.ENSP00000316740; -." & @CRLF & _
"DR iPTMnet; Q8NA56; -." & @CRLF & _
"DR PhosphoSitePlus; Q8NA56; -." & @CRLF & _
"DR BioMuta; TTC29; -." & @CRLF & _
"DR DMDM; 152112335; -." & @CRLF & _
"DR PaxDb; Q8NA56; -." & @CRLF & _
"DR PeptideAtlas; Q8NA56; -." & @CRLF & _
"DR PRIDE; Q8NA56; -." & @CRLF & _
"DR Ensembl; ENST00000325106; ENSP00000316740; ENSG00000137473. [Q8NA56-1]" & @CRLF & _
"DR GeneID; 83894; -." & @CRLF & _
"DR KEGG; hsa:83894; -." & @CRLF & _
"DR UCSC; uc003ikw.5; human. [Q8NA56-1]" & @CRLF & _
"DR CTD; 83894; -." & @CRLF & _
"DR DisGeNET; 83894; -." & @CRLF & _
"DR GeneCards; TTC29; -." & @CRLF & _
"DR HGNC; HGNC:29936; TTC29." & @CRLF & _
"DR HPA; HPA037006; -." & @CRLF & _
"DR HPA; HPA061473; -." & @CRLF & _
"DR neXtProt; NX_Q8NA56; -." & @CRLF & _
"DR OpenTargets; ENSG00000137473; -." & @CRLF & _
"DR PharmGKB; PA145147799; -." & @CRLF & _
"DR eggNOG; ENOG410IFVK; Eukaryota." & @CRLF & _
"DR eggNOG; ENOG410XPVA; LUCA." & @CRLF & _
"DR GeneTree; ENSGT00390000008611; -." & @CRLF & _
"DR HOGENOM; HOG000067965; -." & @CRLF & _
"DR HOVERGEN; HBG108611; -." & @CRLF & _
"DR InParanoid; Q8NA56; -." & @CRLF & _
"DR PhylomeDB; Q8NA56; -." & @CRLF & _
"DR TreeFam; TF328344; -." & @CRLF & _
"DR ChiTaRS; TTC29; human." & @CRLF & _
"DR GenomeRNAi; 83894; -." & @CRLF & _
"DR PRO; PR:Q8NA56; -." & @CRLF & _
"DR Proteomes; UP000005640; Chromosome 4." & @CRLF & _
"DR Bgee; ENSG00000137473; -." & @CRLF & _
"DR CleanEx; HS_TTC29; -." & @CRLF & _
"DR ExpressionAtlas; Q8NA56; baseline and differential." & @CRLF & _
"DR Genevisible; Q8NA56; HS." & @CRLF & _
"DR Gene3D; 1.25.40.10; -; 2." & @CRLF & _
"DR InterPro; IPR013026; TPR-contain_dom." & @CRLF & _
"DR InterPro; IPR011990; TPR-like_helical_dom." & @CRLF & _
"DR InterPro; IPR019734; TPR_repeat." & @CRLF & _
"DR SMART; SM00028; TPR; 4." & @CRLF & _
"DR SUPFAM; SSF48452; SSF48452; 1." & @CRLF & _
"DR PROSITE; PS50293; TPR_REGION; 2." & @CRLF & _
"PE 2: Evidence at transcript level;" & @CRLF & _
"KW Alternative splicing; Complete proteome; Polymorphism;" & @CRLF & _
"KW Reference proteome; Repeat; TPR repeat." & @CRLF & _
"FT CHAIN 1 475 Tetratricopeptide repeat protein 29." & @CRLF & _
"FT /FTId=PRO_0000294435." & @CRLF & _
"FT REPEAT 182 215 TPR 1." & @CRLF & _
"FT REPEAT 234 267 TPR 2." & @CRLF & _
"FT REPEAT 274 307 TPR 3." & @CRLF & _
"FT REPEAT 314 347 TPR 4." & @CRLF & _
"FT REPEAT 354 387 TPR 5." & @CRLF & _
"FT VAR_SEQ 1 1 M -> MIPMFTVTLEDSGTLWKSLHSSSESE (in" & @CRLF & _
"FT isoform 2)." & @CRLF & _
"FT {ECO:0000303|PubMed:14702039}." & @CRLF & _
"FT /FTId=VSP_026638." & @CRLF & _
"FT VARIANT 94 94 L -> P (in dbSNP:rs35123039)." & @CRLF & _
"FT /FTId=VAR_033179." & @CRLF & _
"FT VARIANT 140 140 H -> Y (in dbSNP:rs17610219)." & @CRLF & _
"FT {ECO:0000269|Ref.1, ECO:0000269|Ref.2}." & @CRLF & _
"FT /FTId=VAR_033180." & @CRLF & _
"FT VARIANT 276 276 A -> T (in dbSNP:rs10013280)." & @CRLF & _
"FT {ECO:0000269|Ref.1, ECO:0000269|Ref.2}." & @CRLF & _
"FT /FTId=VAR_033181." & @CRLF & _
"FT CONFLICT 239 239 L -> F (in Ref. 3; BAC04072)." & @CRLF & _
"FT {ECO:0000305}." & @CRLF & _
"SQ SEQUENCE 475 AA; 55082 MW; 09BF33E42330C53A CRC64;" & @CRLF & _
" MTTLPPLPMT RPKLTALARQ KLPCSSRKIP RSQLIKEKDD IDHYLEVNFK GLSKEEVAAY" & @CRLF & _
" RNSYKKNICV DMLRDGYHKS FTELFALMER WDALREAARV RSLFWLQKPL EEQPDKLDYL" & @CRLF & _
" YHYLTRAEDA ERKESFEDVH NNLYALACYF NNSEDKWVRN HFYERCFKIA QLIKIDCGKK" & @CRLF & _
" EAEAHMHMGL LYEEDGQLLE AAEHYEAFHQ LTQGRIWKDE TGRSLNLLAC ESLLRTYRLL" & @CRLF & _
" SDKMLENKEY KQAIKILIKA SEIAKEGSDK KMEAEASYYL GLAHLAAEEY ETALTVLDTY" & @CRLF & _
" CKISTDLDDD LSLGRGYEAI AKVLQSQGEM TEAIKYLKKF VKIARNNFQS LDLVRASTML" & @CRLF & _
" GDIYNEKGYY NKASECFQQA FDTTVELMSM PLMDETKVHY GIAKAHQMML TVNNYIESAD" & @CRLF & _
" LTSLNYLLSW KESRGNIEPD PVTEEFRGST VEAVSQNSER LEELSRFPGD QKNET" & @CRLF & _
"//" & @CRLF & _
"ID 2NPD_NEUCR Reviewed; 378 AA." & @CRLF & _
"AC Q01284; Q7RV78;" & @CRLF & _
"DT 01-NOV-1997, integrated into UniProtKB/Swiss-Prot." & @CRLF & _
"DT 01-NOV-1996, sequence version 1." & @CRLF & _
"DT 15-FEB-2017, entry version 106." & @CRLF & _
"DE RecName: Full=Nitronate monooxygenase;" & @CRLF & _
"DE EC=1.13.12.16;" & @CRLF & _
"DE AltName: Full=2-nitropropane dioxygenase;" & @CRLF & _
"DE Short=2-NPD;" & @CRLF & _
"DE AltName: Full=Nitroalkane oxidase;" & @CRLF & _
"DE Flags: Precursor;" & @CRLF & _
"GN Name=ncd-2; ORFNames=G17A4.200, NCU03949;" & @CRLF & _
"OS Neurospora crassa (strain ATCC 24698 / 74-OR23-1A / CBS 708.71 / DSM" & @CRLF & _
"OS 1257 / FGSC 987)." & @CRLF & _
"OC Eukaryota; Fungi; Dikarya; Ascomycota; Pezizomycotina;" & @CRLF & _
"OC Sordariomycetes; Sordariomycetidae; Sordariales; Sordariaceae;" & @CRLF & _
"OC Neurospora." & @CRLF & _
"OX NCBI_TaxID=367110;" & @CRLF & _
"RN [1]" & @CRLF & _
"RP NUCLEOTIDE SEQUENCE [MRNA], CATALYTIC ACTIVITY, SUBSTRATE SPECIFICITY," & @CRLF & _
"RP COFACTOR, BIOPHYSICOCHEMICAL PROPERTIES, SUBUNIT, AND REACTION" & @CRLF & _
"RP MECHANISM." & @CRLF & _
"RC STRAIN=ATCC 10337 / FGSC 1758 / NBRC 6067 / IMI 53239;" & @CRLF & _
"RX PubMed=9501443;" & @CRLF & _
"RA Gorlatova N., Tchorzewski M., Kurihara T., Soda K., Esaki N.;" & @CRLF & _
"RT "Purification, characterization, and mechanism of a flavin" & @CRLF & _
"RT mononucleotide-dependent 2-nitropropane dioxygenase from Neurospora" & @CRLF & _
"RT crassa.";" & @CRLF & _
"RL Appl. Environ. Microbiol. 64:1029-1033(1998)." & @CRLF & _
"RN [2]" & @CRLF & _
"RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA]." & @CRLF & _
"RC STRAIN=ATCC 24698 / 74-OR23-1A / CBS 708.71 / DSM 1257 / FGSC 987;" & @CRLF & _
"RX PubMed=12655011; DOI=10.1093/nar/gkg293;" & @CRLF & _
"RA Mannhaupt G., Montrone C., Haase D., Mewes H.-W., Aign V.," & @CRLF & _
"RA Hoheisel J.D., Fartmann B., Nyakatura G., Kempken F., Maier J.," & @CRLF & _
"RA Schulte U.;" & @CRLF & _
"RT "What's in the genome of a filamentous fungus? Analysis of the" & @CRLF & _
"RT Neurospora genome sequence.";" & @CRLF & _
"RL Nucleic Acids Res. 31:1944-1954(2003)." & @CRLF & _
"RN [3]" & @CRLF & _
"RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA]." & @CRLF & _
"RC STRAIN=ATCC 24698 / 74-OR23-1A / CBS 708.71 / DSM 1257 / FGSC 987;" & @CRLF & _
"RX PubMed=12712197; DOI=10.1038/nature01554;" & @CRLF & _
"RA Galagan J.E., Calvo S.E., Borkovich K.A., Selker E.U., Read N.D.," & @CRLF & _
"RA Jaffe D.B., FitzHugh W., Ma L.-J., Smirnov S., Purcell S., Rehman B.," & @CRLF & _
"RA Elkins T., Engels R., Wang S., Nielsen C.B., Butler J., Endrizzi M.," & @CRLF & _
"RA Qui D., Ianakiev P., Bell-Pedersen D., Nelson M.A.," & @CRLF & _
"RA Werner-Washburne M., Selitrennikoff C.P., Kinsey J.A., Braun E.L.," & @CRLF & _
"RA Zelter A., Schulte U., Kothe G.O., Jedd G., Mewes H.-W., Staben C.," & @CRLF & _
"RA Marcotte E., Greenberg D., Roy A., Foley K., Naylor J.," & @CRLF & _
"RA Stange-Thomann N., Barrett R., Gnerre S., Kamal M., Kamvysselis M.," & @CRLF & _
"RA Mauceli E.W., Bielke C., Rudd S., Frishman D., Krystofova S.," & @CRLF & _
"RA Rasmussen C., Metzenberg R.L., Perkins D.D., Kroken S., Cogoni C.," & @CRLF & _
"RA Macino G., Catcheside D.E.A., Li W., Pratt R.J., Osmani S.A.," & @CRLF & _
"RA DeSouza C.P.C., Glass N.L., Orbach M.J., Berglund J.A., Voelker R.," & @CRLF & _
"RA Yarden O., Plamann M., Seiler S., Dunlap J.C., Radford A., Aramayo R.," & @CRLF & _
"RA Natvig D.O., Alex L.A., Mannhaupt G., Ebbole D.J., Freitag M.," & @CRLF & _
"RA Paulsen I., Sachs M.S., Lander E.S., Nusbaum C., Birren B.W.;" & @CRLF & _
"RT "The genome sequence of the filamentous fungus Neurospora crassa.";" & @CRLF & _
"RL Nature 422:859-868(2003)." & @CRLF & _
"RN [4]" & @CRLF & _
"RP COFACTOR, SUBSTRATE SPECIFICITY, AND REACTION MECHANISM." & @CRLF & _
"RX PubMed=19577534; DOI=10.1016/j.abb.2009.06.018;" & @CRLF & _
"RA Gadda G., Francis K.;" & @CRLF & _
"RT "Nitronate monooxygenase, a model for anionic flavin semiquinone" & @CRLF & _
"RT intermediates in oxidative catalysis.";" & @CRLF & _
"RL Arch. Biochem. Biophys. 493:53-61(2010)." & @CRLF & _
"CC -!- FUNCTION: Catalyzes the oxidation of alkyl nitronates to produce" & @CRLF & _
"CC the corresponding carbonyl compounds and nitrites. Anionic forms" & @CRLF & _
"CC of nitroalkanes are much better substrates than are neutral forms." & @CRLF & _
"CC -!- CATALYTIC ACTIVITY: Ethylnitronate + O(2) = acetaldehyde + nitrite" & @CRLF & _
"CC + other products. {ECO:0000269|PubMed:9501443}." & @CRLF & _
"CC -!- COFACTOR:" & @CRLF & _
"CC Name=FMN; Xref=ChEBI:CHEBI:58210;" & @CRLF & _
"CC Evidence={ECO:0000269|PubMed:19577534," & @CRLF & _
"CC ECO:0000269|PubMed:9501443};" & @CRLF & _
"CC Note=Binds 1 FMN per subunit. {ECO:0000269|PubMed:19577534," & @CRLF & _
"CC ECO:0000269|PubMed:9501443};" & @CRLF & _
"CC -!- BIOPHYSICOCHEMICAL PROPERTIES:" & @CRLF & _
"CC Kinetic parameters:" & @CRLF & _
"CC KM=3.1 mM for 2-nitropropane {ECO:0000269|PubMed:9501443};" & @CRLF & _
"CC KM=6 mM for nitroethane {ECO:0000269|PubMed:9501443};" & @CRLF & _
"CC KM=8.3 mM for 1-nitropropane {ECO:0000269|PubMed:9501443};" & @CRLF & _
"CC -!- SUBUNIT: Homodimer. {ECO:0000269|PubMed:9501443}." & @CRLF & _
"CC -!- SIMILARITY: Belongs to the nitronate monooxygenase family." & @CRLF & _
"CC {ECO:0000305}." & @CRLF & _
"CC -----------------------------------------------------------------------" & @CRLF & _
"CC Copyrighted by the UniProt Consortium, see http://www.uniprot.org/terms" & @CRLF & _
"CC Distributed under the Creative Commons Attribution-NoDerivs License" & @CRLF & _
"CC -----------------------------------------------------------------------" & @CRLF & _
"DR EMBL; U22530; AAA64218.1; -; mRNA." & @CRLF & _
"DR EMBL; BX908812; CAF06155.1; -; Genomic_DNA." & @CRLF & _
"DR EMBL; CM002241; EAA28352.1; -; Genomic_DNA." & @CRLF & _
"DR PIR; T46693; T46693." & @CRLF & _
"DR RefSeq; XP_957588.1; XM_952495.3." & @CRLF & _
"DR ProteinModelPortal; Q01284; -." & @CRLF & _
"DR EnsemblFungi; EAA28352; EAA28352; NCU03949." & @CRLF & _
"DR GeneID; 3873678; -." & @CRLF & _
"DR KEGG; ncr:NCU03949; -." & @CRLF & _
"DR EuPathDB; FungiDB:NCU03949; -." & @CRLF & _
"DR HOGENOM; HOG000123285; -." & @CRLF & _
"DR InParanoid; Q01284; -." & @CRLF & _
"DR KO; K00459; -." & @CRLF & _
"DR OMA; VDAGGHQ; -." & @CRLF & _
"DR OrthoDB; EOG092651WG; -." & @CRLF & _
"DR BioCyc; MetaCyc:MONOMER-302; -." & @CRLF & _
"DR BRENDA; 1.13.11.32; 3627." & @CRLF & _
"DR BRENDA; 1.13.12.16; 3627." & @CRLF & _
"DR SABIO-RK; Q01284; -." & @CRLF & _
"DR Proteomes; UP000001805; Chromosome 5, Linkage Group VI." & @CRLF & _
"DR GO; GO:0005634; C:nucleus; IBA:GO_Central." & @CRLF & _
"DR GO; GO:0018580; F:nitronate monooxygenase activity; IEA:UniProtKB-EC." & @CRLF & _
"DR GO; GO:0003700; F:transcription factor activity, sequence-specific DNA binding; IBA:GO_Central." & @CRLF & _
"DR GO; GO:0009410; P:response to xenobiotic stimulus; IBA:GO_Central." & @CRLF & _
"DR Gene3D; 3.20.20.70; -; 1." & @CRLF & _
"DR InterPro; IPR013785; Aldolase_TIM." & @CRLF & _
"DR InterPro; IPR004136; NMO." & @CRLF & _
"DR Pfam; PF03060; NMO; 1." & @CRLF & _
"PE 1: Evidence at protein level;" & @CRLF & _
"KW Complete proteome; Flavoprotein; FMN; Monooxygenase; Oxidoreductase;" & @CRLF & _
"KW Reference proteome." & @CRLF & _
"FT PROPEP 1 15 {ECO:0000255}." & @CRLF & _
"FT /FTId=PRO_0000020575." & @CRLF & _
"FT CHAIN 16 378 Nitronate monooxygenase." & @CRLF & _
"FT /FTId=PRO_0000020576." & @CRLF & _
"FT NP_BIND 37 39 FMN. {ECO:0000250}." & @CRLF & _
"FT NP_BIND 229 231 FMN. {ECO:0000250}." & @CRLF & _
"FT NP_BIND 252 253 FMN. {ECO:0000250}." & @CRLF & _
"FT ACT_SITE 196 196 Proton acceptor. {ECO:0000255}." & @CRLF & _
"FT BINDING 196 196 Substrate. {ECO:0000250}." & @CRLF & _
"SQ SEQUENCE 378 AA; 39916 MW; E453EB43FD23E441 CRC64;" & @CRLF & _
" MHFPGHSSKK EESAQAALTK LNSWFPTTKN PVIISAPMYL IANGTLAAEV SKAGGIGFVA" & @CRLF & _
" GGSDFRPGSS HLTALSTELA SARSRLGLTD RPLTPLPGIG VGLILTHTIS VPYVTDTVLP" & @CRLF & _
" ILIEHSPQAV WLFANDPDFE ASSEPGAKGT AKQIIEALHA SGFVVFFQVG TVKDARKAAA" & @CRLF & _
" DGADVIVAQG IDAGGHQLAT GSGIVSLVPE VRDMLDREFK EREVVVVAAG GVADGRGVVG" & @CRLF & _
" ALGLGAEGVV LGTRFTVAVE ASTPEFRRKV ILETNDGGLN TVKSHFHDQI NCNTIWHNVY" & @CRLF & _
" DGRAVRNASY DDHAAGVPFE ENHKKFKEAA SSGDNSRAVT WSGTAVGLIK DQRPAGDIVR" & @CRLF & _
" ELREEAKERI KKIQAFAA" & @CRLF & _
"//" & @CRLF & _
"ID 5NTD_LUTLO Reviewed; 572 AA." & @CRLF & _
"AC Q9XZ43;" & @CRLF & _
"DT 11-JAN-2001, integrated into UniProtKB/Swiss-Prot." & @CRLF & _
"DT 01-NOV-1999, sequence version 1." & @CRLF & _
"DT 12-APR-2017, entry version 83." & @CRLF & _
"DE RecName: Full=Protein 5NUC;" & @CRLF & _
"DE Includes:" & @CRLF & _
"DE RecName: Full=UDP-sugar hydrolase;" & @CRLF & _
"DE EC=3.6.1.45;" & @CRLF & _
"DE AltName: Full=UDP-sugar diphosphatase;" & @CRLF & _
"DE AltName: Full=UDP-sugar pyrophosphatase;" & @CRLF & _
"DE Includes:" & @CRLF & _
"DE RecName: Full=5'-nucleotidase;" & @CRLF & _
"DE Short=5'-NT;" & @CRLF & _
"DE EC=3.1.3.5;" & @CRLF & _
"DE Flags: Precursor;" & @CRLF & _
"GN Name=5NUC;" & @CRLF & _
"OS Lutzomyia longipalpis (Sand fly)." & @CRLF & _
"OC Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta;" & @CRLF & _
"OC Pterygota; Neoptera; Holometabola; Diptera; Nematocera; Psychodoidea;" & @CRLF & _
"OC Psychodidae; Lutzomyia; Lutzomyia." & @CRLF & _
"OX NCBI_TaxID=7200;" & @CRLF & _
"RN [1]" & @CRLF & _
"RP NUCLEOTIDE SEQUENCE [MRNA]." & @CRLF & _
"RC STRAIN=Jacobina; TISSUE=Salivary gland;" & @CRLF & _
"RX PubMed=10611354; DOI=10.1073/pnas.96.26.15155;" & @CRLF & _
"RA Charlab R., Valenzuela J.G., Rowton E.D., Ribeiro J.M.;" & @CRLF & _
"RT "Toward an understanding of the biochemical and pharmacological" & @CRLF & _
"RT complexity of the saliva of a hematophagous sand fly Lutzomyia" & @CRLF & _
"RT longipalpis.";" & @CRLF & _
"RL Proc. Natl. Acad. Sci. U.S.A. 96:15155-15160(1999)." & @CRLF & _
"RN [2]" & @CRLF & _
"RP CHARACTERIZATION." & @CRLF & _
"RC TISSUE=Salivary gland;" & @CRLF & _
"RX PubMed=10727894; DOI=10.1016/S0965-1748(99)00123-X;" & @CRLF & _
"RA Ribeiro J.M.C., Rowton E.D., Charlab R.;" & @CRLF & _
"RT "The salivary 5'-nucleotidase/phosphodiesterase of the hematophagus" & @CRLF & _
"RT sand fly, Lutzomyia longipalpis.";" & @CRLF & _
"RL Insect Biochem. Mol. Biol. 30:279-285(2000)." & @CRLF & _
"RN [3]" & @CRLF & _
"RP ERRATUM." & @CRLF & _
"RA Ribeiro J.M.C., Rowton E.D., Charlab R.;" & @CRLF & _
"RL Insect Biochem. Mol. Biol. 30:609-609(2000)." & @CRLF & _
"CC -!- FUNCTION: Degradation of external UDP-glucose to uridine" & @CRLF & _
"CC monophosphate and glucose-1-phosphate, which can then be used by" & @CRLF & _
"CC the cell. {ECO:0000250}." & @CRLF & _
"CC -!- CATALYTIC ACTIVITY: UDP-sugar + H(2)O = UMP + alpha-D-aldose 1-" & @CRLF & _
"CC phosphate." & @CRLF & _
"CC -!- CATALYTIC ACTIVITY: A 5'-ribonucleotide + H(2)O = a ribonucleoside" & @CRLF & _
"CC + phosphate." & @CRLF & _
"CC -!- COFACTOR:" & @CRLF & _
"CC Name=Zn(2+); Xref=ChEBI:CHEBI:29105; Evidence={ECO:0000250};" & @CRLF & _
"CC -!- SIMILARITY: Belongs to the 5'-nucleotidase family. {ECO:0000305}." & @CRLF & _
"DR EMBL; AF132510; AAD32190.1; -; mRNA." & @CRLF & _
"DR ProteinModelPortal; Q9XZ43; -." & @CRLF & _
"DR SMR; Q9XZ43; -." & @CRLF & _
"DR Proteomes; UP000092461; Unassembled WGS sequence." & @CRLF & _
"DR GO; GO:0008253; F:5'-nucleotidase activity; IEA:UniProtKB-EC." & @CRLF & _
"DR GO; GO:0046872; F:metal ion binding; IEA:UniProtKB-KW." & @CRLF & _
"DR GO; GO:0000166; F:nucleotide binding; IEA:UniProtKB-KW." & @CRLF & _
"DR GO; GO:0008768; F:UDP-sugar diphosphatase activity; IEA:UniProtKB-EC." & @CRLF & _
"DR GO; GO:0009166; P:nucleotide catabolic process; IEA:InterPro." & @CRLF & _
"DR Gene3D; 3.60.21.10; -; 1." & @CRLF & _
"DR Gene3D; 3.90.780.10; -; 1." & @CRLF & _
"DR InterPro; IPR008334; 5'-Nucleotdase_C." & @CRLF & _
"DR InterPro; IPR006146; 5'-Nucleotdase_CS." & @CRLF & _
"DR InterPro; IPR006179; 5_nucleotidase/apyrase." & @CRLF & _
"DR InterPro; IPR004843; Calcineurin-like_PHP_ApaH." & @CRLF & _
"DR InterPro; IPR029052; Metallo-depent_PP-like." & @CRLF & _
"DR PANTHER; PTHR11575; PTHR11575; 1." & @CRLF & _
"DR Pfam; PF02872; 5_nucleotid_C; 1." & @CRLF & _
"DR Pfam; PF00149; Metallophos; 1." & @CRLF & _
"DR PRINTS; PR01607; APYRASEFAMLY." & @CRLF & _
"DR SUPFAM; SSF55816; SSF55816; 1." & @CRLF & _
"DR SUPFAM; SSF56300; SSF56300; 1." & @CRLF & _
"DR PROSITE; PS00785; 5_NUCLEOTIDASE_1; 1." & @CRLF & _
"DR PROSITE; PS00786; 5_NUCLEOTIDASE_2; 1." & @CRLF & _
"PE 1: Evidence at protein level;" & @CRLF & _
"KW Complete proteome; Disulfide bond; Glycoprotein; Hydrolase;" & @CRLF & _
"KW Metal-binding; Nucleotide-binding; Reference proteome; Signal; Zinc." & @CRLF & _
"FT SIGNAL 1 25 {ECO:0000255}." & @CRLF & _
"FT CHAIN 26 572 Protein 5NUC." & @CRLF & _
"FT /FTId=PRO_0000000023." & @CRLF & _
"FT REGION 512 518 Substrate binding. {ECO:0000250}." & @CRLF & _
"FT METAL 39 39 Zinc 1. {ECO:0000250}." & @CRLF & _
"FT METAL 41 41 Zinc 1. {ECO:0000250}." & @CRLF & _
"FT METAL 93 93 Zinc 1. {ECO:0000250}." & @CRLF & _
"FT METAL 93 93 Zinc 2. {ECO:0000250}." & @CRLF & _
"FT METAL 125 125 Zinc 2. {ECO:0000250}." & @CRLF & _
"FT METAL 227 227 Zinc 2. {ECO:0000250}." & @CRLF & _
"FT METAL 250 250 Zinc 2. {ECO:0000250}." & @CRLF & _
"FT BINDING 361 361 Substrate. {ECO:0000250}." & @CRLF & _
"FT BINDING 399 399 Substrate. {ECO:0000250}." & @CRLF & _
"FT BINDING 404 404 Substrate. {ECO:0000250}." & @CRLF & _
"FT BINDING 427 427 Substrate. {ECO:0000250}." & @CRLF & _
"FT SITE 126 126 Transition state stabilizer." & @CRLF & _
"FT {ECO:0000250}." & @CRLF & _
"FT SITE 129 129 Transition state stabilizer." & @CRLF & _
"FT {ECO:0000250}." & @CRLF & _
"FT CARBOHYD 82 82 N-linked (GlcNAc...). {ECO:0000255}." & @CRLF & _
"FT CARBOHYD 454 454 N-linked (GlcNAc...). {ECO:0000255}." & @CRLF & _
"FT CARBOHYD 490 490 N-linked (GlcNAc...). {ECO:0000255}." & @CRLF & _
"FT DISULFID 54 64 {ECO:0000250}." & @CRLF & _
"FT DISULFID 360 365 {ECO:0000250}." & @CRLF & _
"FT DISULFID 488 491 {ECO:0000250}." & @CRLF & _
"**" & @CRLF & _
"** ################# INTERNAL SECTION ##################" & @CRLF & _
"**EV ECO:0000250; -; XXX; 01-JAN-1900." & @CRLF & _
"**EV ECO:0000255; -; XXX; 01-JAN-1900." & @CRLF & _
"**EV ECO:0000305; -; XXX; 01-JAN-1900." & @CRLF & _
"**ZB JSG, 08-MAR-2006; RAB, 07-MAR-2017;" & @CRLF & _
"SQ SEQUENCE 572 AA; 63354 MW; 69A652338C04536D CRC64;" & @CRLF & _
" MLFFLNFFVL VFSIELALLT ASAAAEDGSY EIIILHTNDM HARFDQTNAG SNKCQEKDKI" & @CRLF & _
" ASKCYGGFAR VSTMVKKFRE ENGSSVLFLN AGDTYTGTPW FTLYKETIAT EMMNILRPDA" & @CRLF & _
" ASLGNHEFDK GVEGLVPFLN GVTFPILTAN LDTSQEPTMT NAKNLKRSMI FTVSGHRVGV" & @CRLF & _
" IGYLTPDTKF LSDVGKVNFI PEVEAINTEA QRLKKEENAE IIIVVGHSGL IKDREIAEKC" & @CRLF & _
" PLVDIIVGGH SHTFLYTGSQ PDREVPVDVY PVVVTQSSGK KVPIVQAYCF TKYLGYFKVT" & @CRLF & _
" INGKGNVVGW TGQPILLNNN IPQDQEVLTA LEKYRERVEN YGNRVIGVSR VILNGGHTEC" & @CRLF & _
" RFHECNMGNL ITDAFVYANV ISTPMSTNAW TDASVVLYQS GGIRAPIDPR TAAGSITRLE" & @CRLF & _
" LDNVLPFGNA LYVVKVPGNV LRKALEHSVH RYSNTSGWGE FPQVSGLKIR FNVNEEIGKR" & @CRLF & _
" VKSVKVLCSN CSQPEYQPLR NKKTYNVIMD SFMKDGGDGY SMFKPLKIIK TLPLGDIETV" & @CRLF & _
" EAYIEKMGPI FPAVEGRITV LGGLQKSDED WH" & @CRLF & _
"//" & @CRLF & _
"ID 14311_ARATH Reviewed; 252 AA." & @CRLF & _
"AC Q9S9Z8; A0JQ87; F4HWN0; Q0WL19;" & @CRLF & _
"DT 05-DEC-2001, integrated into UniProtKB/Swiss-Prot." & @CRLF & _
"DT 16-NOV-2011, sequence version 2." & @CRLF & _
"DT 15-MAR-2017, entry version 98." & @CRLF & _
"DE RecName: Full=14-3-3-like protein GF14 omicron;" & @CRLF & _
"DE AltName: Full=General regulatory factor 11;" & @CRLF & _
"GN Name=GRF11; OrderedLocusNames=At1g34760; ORFNames=F21H2.3;" & @CRLF & _
"OS Arabidopsis thaliana (Mouse-ear cress)." & @CRLF & _
"OC Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta;" & @CRLF & _
"OC Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae;" & @CRLF & _
"OC Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Camelineae;" & @CRLF & _
"OC Arabidopsis." & @CRLF & _
"OX NCBI_TaxID=3702;" & @CRLF & _
"RN [1]" & @CRLF & _
"RP NUCLEOTIDE SEQUENCE [MRNA] (ISOFORM 2)." & @CRLF & _
"RC TISSUE=Leaf;" & @CRLF & _
"RA Alsterfjord M., Rosenquist M., Larsson C., Sommarin M.;" & @CRLF & _
"RT "Novel 14-3-3 isoforms in Arabidopsis thaliana.";" & @CRLF & _
"RL Submitted (NOV-2000) to the EMBL/GenBank/DDBJ databases." & @CRLF & _
"RN [2]" & @CRLF & _
"RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA]." & @CRLF & _
"RC STRAIN=cv. Columbia;" & @CRLF & _
"RX PubMed=11130712; DOI=10.1038/35048500;" & @CRLF & _
"RA Theologis A., Ecker J.R., Palm C.J., Federspiel N.A., Kaul S.," & @CRLF & _
"RA White O., Alonso J., Altafi H., Araujo R., Bowman C.L., Brooks S.Y.," & @CRLF & _
"RA Buehler E., Chan A., Chao Q., Chen H., Cheuk R.F., Chin C.W.," & @CRLF & _
"RA Chung M.K., Conn L., Conway A.B., Conway A.R., Creasy T.H., Dewar K.," & @CRLF & _
"RA Dunn P., Etgu P., Feldblyum T.V., Feng J.-D., Fong B., Fujii C.Y.," & @CRLF & _
"RA Gill J.E., Goldsmith A.D., Haas B., Hansen N.F., Hughes B., Huizar L.," & @CRLF & _
"RA Hunter J.L., Jenkins J., Johnson-Hopson C., Khan S., Khaykin E.," & @CRLF & _
"RA Kim C.J., Koo H.L., Kremenetskaia I., Kurtz D.B., Kwan A., Lam B.," & @CRLF & _
"RA Langin-Hooper S., Lee A., Lee J.M., Lenz C.A., Li J.H., Li Y.-P.," & @CRLF & _
"RA Lin X., Liu S.X., Liu Z.A., Luros J.S., Maiti R., Marziali A.," & @CRLF & _
"RA Militscher J., Miranda M., Nguyen M., Nierman W.C., Osborne B.I.," & @CRLF & _
"RA Pai G., Peterson J., Pham P.K., Rizzo M., Rooney T., Rowley D.," & @CRLF & _
"RA Sakano H., Salzberg S.L., Schwartz J.R., Shinn P., Southwick A.M.," & @CRLF & _
"RA Sun H., Tallon L.J., Tambunga G., Toriumi M.J., Town C.D.," & @CRLF & _
"RA Utterback T., Van Aken S., Vaysberg M., Vysotskaia V.S., Walker M.," & @CRLF & _
"RA Wu D., Yu G., Fraser C.M., Venter J.C., Davis R.W.;" & @CRLF & _
"RT "Sequence and analysis of chromosome 1 of the plant Arabidopsis" & @CRLF & _
"RT thaliana.";" & @CRLF & _
"RL Nature 408:816-820(2000)." & @CRLF & _
"RN [3]" & @CRLF & _
"RP GENOME REANNOTATION." & @CRLF & _
"RC STRAIN=cv. Columbia;" & @CRLF & _
"RG The Arabidopsis Information Resource (TAIR);" & @CRLF & _
"RL Submitted (APR-2011) to the EMBL/GenBank/DDBJ databases." & @CRLF & _
"RN [4]" & @CRLF & _
"RP NUCLEOTIDE SEQUENCE [LARGE SCALE MRNA] (ISOFORM 1)." & @CRLF & _
"RC STRAIN=cv. Columbia;" & @CRLF & _
"RA Totoki Y., Seki M., Ishida J., Nakajima M., Enju A., Kamiya A.," & @CRLF & _
"RA Narusaka M., Shin-i T., Nakagawa M., Sakamoto N., Oishi K., Kohara Y.," & @CRLF & _
"RA Kobayashi M., Toyoda A., Sakaki Y., Sakurai T., Iida K., Akiyama K.," & @CRLF & _
"RA Satou M., Toyoda T., Konagaya A., Carninci P., Kawai J.," & @CRLF & _
"RA Hayashizaki Y., Shinozaki K.;" & @CRLF & _
"RT "Large-scale analysis of RIKEN Arabidopsis full-length (RAFL) cDNAs.";" & @CRLF & _
"RL Submitted (JUL-2006) to the EMBL/GenBank/DDBJ databases." & @CRLF & _
"RN [5]" & @CRLF & _
"RP NUCLEOTIDE SEQUENCE [LARGE SCALE MRNA] (ISOFORM 3)." & @CRLF & _
"RC STRAIN=cv. Columbia;" & @CRLF & _
"RA Bautista V.R., Kim C.J., Chen H., Quinitio C., Ecker J.R.;" & @CRLF & _
"RT "Arabidopsis ORF Clones.";" & @CRLF & _
"RL Submitted (NOV-2006) to the EMBL/GenBank/DDBJ databases." & @CRLF & _
"CC -!- FUNCTION: Is associated with a DNA binding complex that binds to" & @CRLF & _
"CC the G box, a well-characterized cis-acting DNA regulatory element" & @CRLF & _
"CC found in plant genes. {ECO:0000250}." & @CRLF & _
"CC -!- ALTERNATIVE PRODUCTS:" & @CRLF & _
"CC Event=Alternative splicing; Named isoforms=3;" & @CRLF & _
"CC Comment=A number of isoforms are produced. According to EST" & @CRLF & _
"CC sequences.;" & @CRLF & _
"CC Name=1;" & @CRLF & _
"CC IsoId=Q9S9Z8-1; Sequence=Displayed;" & @CRLF & _
"CC Name=2;" & @CRLF & _
"CC IsoId=Q9S9Z8-2; Sequence=VSP_042043, VSP_042044;" & @CRLF & _
"CC Name=3;" & @CRLF & _
"CC IsoId=Q9S9Z8-3; Sequence=VSP_042045;" & @CRLF & _
"CC -!- SIMILARITY: Belongs to the 14-3-3 family. {ECO:0000305}." & @CRLF & _
"DR EMBL; AF323920; AAG47840.1; -; mRNA." & @CRLF & _
"DR EMBL; AC007894; AAD46005.1; -; Genomic_DNA." & @CRLF & _
"DR EMBL; CP002684; AEE31735.1; -; Genomic_DNA." & @CRLF & _
"DR EMBL; CP002684; AEE31736.1; -; Genomic_DNA." & @CRLF & _
"DR EMBL; AK230390; BAF02188.1; -; mRNA." & @CRLF & _
"DR EMBL; BT029457; ABK59686.1; -; mRNA." & @CRLF & _
"DR RefSeq; NP_001077649.1; NM_001084180.2. [Q9S9Z8-1]" & @CRLF & _
"DR RefSeq; NP_564451.2; NM_103196.4. [Q9S9Z8-3]" & @CRLF & _
"DR UniGene; At.11487; -." & @CRLF & _
"DR ProteinModelPortal; Q9S9Z8; -." & @CRLF & _
"DR SMR; Q9S9Z8; -." & @CRLF & _
"DR BioGrid; 25612; 2." & @CRLF & _
"DR IntAct; Q9S9Z8; 1." & @CRLF & _
"DR STRING; 3702.AT1G34760.1; -." & @CRLF & _
"DR iPTMnet; Q9S9Z8; -." & @CRLF & _
"DR PaxDb; Q9S9Z8; -." & @CRLF & _
"DR PRIDE; Q9S9Z8; -." & @CRLF & _
"DR EnsemblPlants; AT1G34760.2; AT1G34760.2; AT1G34760. [Q9S9Z8-1]" & @CRLF & _
"DR GeneID; 840380; -." & @CRLF & _
"DR Gramene; AT1G34760.2; AT1G34760.2; AT1G34760." & @CRLF & _
"DR KEGG; ath:AT1G34760; -." & @CRLF & _
"DR Araport; AT1G34760; -." & @CRLF & _
"DR TAIR; locus:2008381; AT1G34760." & @CRLF & _
"DR eggNOG; KOG0841; Eukaryota." & @CRLF & _
"DR eggNOG; COG5040; LUCA." & @CRLF & _
"DR HOGENOM; HOG000240379; -." & @CRLF & _
"DR InParanoid; Q9S9Z8; -." & @CRLF & _
"DR OMA; NEQAERY; -." & @CRLF & _
"DR Reactome; R-ATH-1445148; Translocation of GLUT4 to the plasma membrane." & @CRLF & _
"DR Reactome; R-ATH-3371453; Regulation of HSF1-mediated heat shock response." & @CRLF & _
"DR Reactome; R-ATH-3371511; HSF1 activation." & @CRLF & _
"DR PRO; PR:Q9S9Z8; -." & @CRLF & _
"DR Proteomes; UP000006548; Chromosome 1." & @CRLF & _
"DR Genevisible; Q9S9Z8; AT." & @CRLF & _
"DR GO; GO:0051117; F:ATPase binding; IDA:TAIR." & @CRLF & _
"DR Gene3D; 1.20.190.20; -; 1." & @CRLF & _
"DR InterPro; IPR000308; 14-3-3." & @CRLF & _
"DR InterPro; IPR023409; 14-3-3_CS." & @CRLF & _
"DR InterPro; IPR023410; 14-3-3_domain." & @CRLF & _
"DR PANTHER; PTHR18860; PTHR18860; 1." & @CRLF & _
"DR Pfam; PF00244; 14-3-3; 1." & @CRLF & _
"DR PIRSF; PIRSF000868; 14-3-3; 1." & @CRLF & _
"DR PRINTS; PR00305; 1433ZETA." & @CRLF & _
"DR SMART; SM00101; 14_3_3; 1." & @CRLF & _
"DR SUPFAM; SSF48445; SSF48445; 1." & @CRLF & _
"DR PROSITE; PS00796; 1433_1; 1." & @CRLF & _
"DR PROSITE; PS00797; 1433_2; 1." & @CRLF & _
"PE 2: Evidence at transcript level;" & @CRLF & _
"KW Alternative splicing; Complete proteome; Reference proteome." & @CRLF & _
"FT CHAIN 1 252 14-3-3-like protein GF14 omicron." & @CRLF & _
"FT /FTId=PRO_0000058673." & @CRLF & _
"FT VAR_SEQ 241 241 E -> K (in isoform 2)." & @CRLF & _
"FT {ECO:0000303|Ref.1}." & @CRLF & _
"FT /FTId=VSP_042043." & @CRLF & _
"FT VAR_SEQ 242 252 Missing (in isoform 2)." & @CRLF & _
"FT {ECO:0000303|Ref.1}." & @CRLF & _
"FT /FTId=VSP_042044." & @CRLF & _
"FT VAR_SEQ 252 252 N -> VNKI (in isoform 3)." & @CRLF & _
"FT {ECO:0000303|Ref.5}." & @CRLF & _
"FT /FTId=VSP_042045." & @CRLF & _
"FT CONFLICT 128 128 D -> G (in Ref. 4; BAF02188)." & @CRLF & _
"FT {ECO:0000305}." & @CRLF & _
"**" & @CRLF & _
"** ################# INTERNAL SECTION ##################" & @CRLF & _
"**DR Araport-CDS; AT1G34760.1; Araport11; -. [Q9S9Z8-3]" & @CRLF & _
"**DR Araport-CDS; AT1G34760.2; Araport11; -. [Q9S9Z8-1]" & @CRLF & _
"**EV ECO:0000250; -; XXX; 01-JAN-1900." & @CRLF & _
"**EV ECO:0000303; Ref.1; XXX; 01-JAN-1900." & @CRLF & _
"**EV ECO:0000303; Ref.5; XXX; 01-JAN-1900." & @CRLF & _
"**EV ECO:0000305; -; XXX; 01-JAN-1900." & @CRLF & _
"**YY According to Araport, the gene encoding this protein is alternatively spliced." & @CRLF & _
"**ZB MIT, 11-OCT-2011;" & @CRLF & _
"SQ SEQUENCE 252 AA; 28781 MW; 852335FF39915461 CRC64;" & @CRLF & _
" MENERAKQVY LAKLNEQAER YDEMVEAMKK VAALDVELTI EERNLLSVGY KNVIGARRAS" & @CRLF & _
" WRILSSIEQK EESKGNEQNA KRIKDYRTKV EEELSKICYD ILAVIDKHLV PFATSGESTV" & @CRLF & _
" FYYKMKGDYF RYLAEFKSGA DREEAADLSL KAYEAATSSA STELSTTHPI RLGLALNFSV" & @CRLF & _
" FYYEILNSPE RACHLAKRAF DEAIAELDSL NEDSYKDSTL IMQLLRDNLT LWTSDLEEGG" & @CRLF & _
" EQSKGHNQQD EN" & @CRLF & _
"//" & @CRLF & _
"ID W0TYI6_HUMAN Unreviewed; 154 AA." & @CRLF & _
"AC W0TYI6;" & @CRLF & _
"DT 19-MAR-2014, integrated into UniProtKB/TrEMBL." & @CRLF & _
"DT 19-MAR-2014, sequence version 1." & @CRLF & _
"DT 15-FEB-2017, entry version 23." & @CRLF & _
"DE SubName: Full=H2B histone family, member M {ECO:0000313|EMBL:CAA97844.2};" & @CRLF & _
"GN Name=H2BFM {ECO:0000313|EMBL:CAA97844.2};" & @CRLF & _
"GN ORFNames=LL0XNC01-240C2.2-001 {ECO:0000313|EMBL:CAA97844.2};" & @CRLF & _
"OS Homo sapiens (Human)." & @CRLF & _
"OC Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;" & @CRLF & _
"OC Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini;" & @CRLF & _
"OC Catarrhini; Hominidae; Homo." & @CRLF & _
"OX NCBI_TaxID=9606 {ECO:0000313|EMBL:CAA97844.2};" & @CRLF & _
"RN [1] {ECO:0000313|EMBL:CAA97844.2}" & @CRLF & _
"RP NUCLEOTIDE SEQUENCE." & @CRLF & _
"RA Hunt A.;" & @CRLF & _
"RL Submitted (JAN-2009) to the EMBL/GenBank/DDBJ databases." & @CRLF & _
"CC -!- FUNCTION: Core component of nucleosome. Nucleosomes wrap and" & @CRLF & _
"CC compact DNA into chromatin, limiting DNA accessibility to the" & @CRLF & _
"CC cellular machineries which require DNA as a template. Histones" & @CRLF & _
"CC thereby play a central role in transcription regulation, DNA" & @CRLF & _
"CC repair, DNA replication and chromosomal stability. DNA" & @CRLF & _
"CC accessibility is regulated via a complex set of post-translational" & @CRLF & _
"CC modifications of histones, also called histone code, and" & @CRLF & _
"CC nucleosome remodeling. {ECO:0000256|SAAS:SAAS00295035}." & @CRLF & _
"CC -!- SUBUNIT: The nucleosome is a histone octamer containing two" & @CRLF & _
"CC molecules each of H2A, H2B, H3 and H4 assembled in one H3-H4" & @CRLF & _
"CC heterotetramer and two H2A-H2B heterodimers. The octamer wraps" & @CRLF & _
"CC approximately 147 bp of DNA. {ECO:0000256|SAAS:SAAS00565646}." & @CRLF & _
"CC -!- SUBCELLULAR LOCATION: Chromosome {ECO:0000256|SAAS:SAAS00680978}." & @CRLF & _
"CC -!- SUBCELLULAR LOCATION: Nucleus {ECO:0000256|SAAS:SAAS00593244}." & @CRLF & _
"CC -!- SIMILARITY: Belongs to the histone H2B family." & @CRLF & _
"CC {ECO:0000256|SAAS:SAAS00689654}." & @CRLF & _
"CC -----------------------------------------------------------------------" & @CRLF & _
"CC Copyrighted by the UniProt Consortium, see http://www.uniprot.org/terms" & @CRLF & _
"CC Distributed under the Creative Commons Attribution-NoDerivs License" & @CRLF & _
"CC -----------------------------------------------------------------------" & @CRLF & _
"DR EMBL; Z73497; CAA97844.2; -; Genomic_DNA." & @CRLF & _
"DR RefSeq; NP_001157888.1; NM_001164416.1." & @CRLF & _
"DR RefSeq; XP_006724703.1; XM_006724640.2." & @CRLF & _
"DR RefSeq; XP_011529224.1; XM_011530922.2." & @CRLF & _
"DR UniGene; Hs.376474; -." & @CRLF & _
"DR ProteinModelPortal; W0TYI6; -." & @CRLF & _
"DR SMR; W0TYI6; -." & @CRLF & _
"DR STRING; 9606.ENSP00000347119; -." & @CRLF & _
"DR PaxDb; W0TYI6; -." & @CRLF & _
"DR GeneID; 286436; -." & @CRLF & _
"DR KEGG; hsa:286436; -." & @CRLF & _
"DR UCSC; uc004els.2; human." & @CRLF & _
"DR CTD; 286436; -." & @CRLF & _
"DR eggNOG; KOG1744; Eukaryota." & @CRLF & _
"DR eggNOG; ENOG4111NV5; LUCA." & @CRLF & _
"DR KO; K11252; -." & @CRLF & _
"DR OMA; PKEANSM; -." & @CRLF & _
"DR OrthoDB; EOG091G0WZB; -." & @CRLF & _
"DR GenomeRNAi; 286436; -." & @CRLF & _
"DR ExpressionAtlas; W0TYI6; baseline and differential." & @CRLF & _
"DR GO; GO:0000786; C:nucleosome; IEA:UniProtKB-KW." & @CRLF & _
"DR GO; GO:0005634; C:nucleus; IEA:UniProtKB-SubCell." & @CRLF & _
"DR GO; GO:0003677; F:DNA binding; IEA:UniProtKB-KW." & @CRLF & _
"DR Gene3D; 1.10.20.10; -; 1." & @CRLF & _
"DR InterPro; IPR009072; Histone-fold." & @CRLF & _
"DR InterPro; IPR007125; Histone_H2A/H2B/H3." & @CRLF & _
"DR InterPro; IPR000558; Histone_H2B." & @CRLF & _
"DR PANTHER; PTHR23428; PTHR23428; 1." & @CRLF & _
"DR Pfam; PF00125; Histone; 1." & @CRLF & _
"DR PRINTS; PR00621; HISTONEH2B." & @CRLF & _
"DR SMART; SM00427; H2B; 1." & @CRLF & _
"DR SUPFAM; SSF47113; SSF47113; 1." & @CRLF & _
"PE 3: Inferred from homology;" & @CRLF & _
"KW Chromosome {ECO:0000256|SAAS:SAAS00454795};" & @CRLF & _
"KW DNA-binding {ECO:0000256|SAAS:SAAS00454795};" & @CRLF & _
"KW Nucleosome core {ECO:0000256|SAAS:SAAS00454795};" & @CRLF & _
"KW Nucleus {ECO:0000256|SAAS:SAAS00486878}." & @CRLF & _
"FT DOMAIN 1 123 Histone. {ECO:0000259|Pfam:PF00125}." & @CRLF & _
"SQ SEQUENCE 154 AA; 17001 MW; 700C131F5F5818B0 CRC64;" & @CRLF & _
" MAAASAMAEA SSETTSEEGQ SIQEPKEANS TKAQKQKRRG CRGSRRRHAN RRGDSFGDSF" & @CRLF & _
" TPYFPRVLKQ VHQGLSLSQE AVSVMDSMIH DILDRIATEA GQLAHYTKRV TITSRDIQMA" & @CRLF & _
" VRLLLPGKMG KLAEAQGTNA ALRTSLCAIW QQRK" & @CRLF & _
"//"
Local $aArray = StringRegExp($sString, $sRegex, $STR_REGEXPARRAYGLOBALFULLMATCH)
Local $aFullArray[0]
For $i = 0 To UBound($aArray) -1
_ArrayConcatenate($aFullArray, $aArray[$i])
Next
$aArray = $aFullArray
; Present the entire match result
_ArrayDisplay($aArray, "Result")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for AutoIt, please visit: https://www.autoitscript.com/autoit3/docs/functions/StringRegExp.htm