$re = '/(?ms)ID\s+.*?^AC\s+(\w+);.*?^OX\s+NCBI_TaxID=(\d+).*?(?#how to optionally capture group ^FT\s+VAR_SEQ.*?\/FTId=\w+\. ).*?^\s{5}(.*?)///x';
$str = 'ID TTC29_HUMAN Reviewed; 475 AA.
AC Q8NA56; A4GU95; Q9BXB6;
DT 10-JUL-2007, integrated into UniProtKB/Swiss-Prot.
DT 10-JUL-2007, sequence version 2.
DT 15-FEB-2017, entry version 114.
DE RecName: Full=Tetratricopeptide repeat protein 29;
DE Short=TPR repeat protein 29;
DE AltName: Full=Protein TBPP2A;
DE AltName: Full=Testis development protein NYD-SP14;
GN Name=TTC29;
OS Homo sapiens (Human).
OC Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
OC Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini;
OC Catarrhini; Hominidae; Homo.
OX NCBI_TaxID=9606;
RN [1]
RP NUCLEOTIDE SEQUENCE [MRNA] (ISOFORM 1), AND VARIANTS TYR-140 AND
RP THR-276.
RC TISSUE=Testis;
RA Sha J.H.;
RT "Cloning and identification of a novel gene related development gene
RT NYD-SP14.";
RL Submitted (FEB-2001) to the EMBL/GenBank/DDBJ databases.
RN [2]
RP NUCLEOTIDE SEQUENCE [MRNA] (ISOFORM 1), AND VARIANTS TYR-140 AND
RP THR-276.
RC TISSUE=Mammary cancer;
RA Li J.M., Cheng J., Wang Q.;
RL Submitted (FEB-2007) to the EMBL/GenBank/DDBJ databases.
RN [3]
RP NUCLEOTIDE SEQUENCE [LARGE SCALE MRNA] (ISOFORM 2).
RC TISSUE=Testis;
RX PubMed=14702039; DOI=10.1038/ng1285;
RA Ota T., Suzuki Y., Nishikawa T., Otsuki T., Sugiyama T., Irie R.,
RA Wakamatsu A., Hayashi K., Sato H., Nagai K., Kimura K., Makita H.,
RA Sekine M., Obayashi M., Nishi T., Shibahara T., Tanaka T., Ishii S.,
RA Yamamoto J., Saito K., Kawai Y., Isono Y., Nakamura Y., Nagahari K.,
RA Murakami K., Yasuda T., Iwayanagi T., Wagatsuma M., Shiratori A.,
RA Sudo H., Hosoiri T., Kaku Y., Kodaira H., Kondo H., Sugawara M.,
RA Takahashi M., Kanda K., Yokoi T., Furuya T., Kikkawa E., Omura Y.,
RA Abe K., Kamihara K., Katsuta N., Sato K., Tanikawa M., Yamazaki M.,
RA Ninomiya K., Ishibashi T., Yamashita H., Murakawa K., Fujimori K.,
RA Tanai H., Kimata M., Watanabe M., Hiraoka S., Chiba Y., Ishida S.,
RA Ono Y., Takiguchi S., Watanabe S., Yosida M., Hotuta T., Kusano J.,
RA Kanehori K., Takahashi-Fujii A., Hara H., Tanase T.-O., Nomura Y.,
RA Togiya S., Komai F., Hara R., Takeuchi K., Arita M., Imose N.,
RA Musashino K., Yuuki H., Oshima A., Sasaki N., Aotsuka S.,
RA Yoshikawa Y., Matsunawa H., Ichihara T., Shiohata N., Sano S.,
RA Moriya S., Momiyama H., Satoh N., Takami S., Terashima Y., Suzuki O.,
RA Nakagawa S., Senoh A., Mizoguchi H., Goto Y., Shimizu F., Wakebe H.,
RA Hishigaki H., Watanabe T., Sugiyama A., Takemoto M., Kawakami B.,
RA Yamazaki M., Watanabe K., Kumagai A., Itakura S., Fukuzumi Y.,
RA Fujimori Y., Komiyama M., Tashiro H., Tanigami A., Fujiwara T.,
RA Ono T., Yamada K., Fujii Y., Ozaki K., Hirao M., Ohmori Y.,
RA Kawabata A., Hikiji T., Kobatake N., Inagaki H., Ikema Y., Okamoto S.,
RA Okitani R., Kawakami T., Noguchi S., Itoh T., Shigeta K., Senba T.,
RA Matsumura K., Nakajima Y., Mizuno T., Morinaga M., Sasaki M.,
RA Togashi T., Oyama M., Hata H., Watanabe M., Komatsu T.,
RA Mizushima-Sugano J., Satoh T., Shirai Y., Takahashi Y., Nakagawa K.,
RA Okumura K., Nagase T., Nomura N., Kikuchi H., Masuho Y., Yamashita R.,
RA Nakai K., Yada T., Nakamura Y., Ohara O., Isogai T., Sugano S.;
RT "Complete sequencing and characterization of 21,243 full-length human
RT cDNAs.";
RL Nat. Genet. 36:40-45(2004).
RN [4]
RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA].
RX PubMed=15815621; DOI=10.1038/nature03466;
RA Hillier L.W., Graves T.A., Fulton R.S., Fulton L.A., Pepin K.H.,
RA Minx P., Wagner-McPherson C., Layman D., Wylie K., Sekhon M.,
RA Becker M.C., Fewell G.A., Delehaunty K.D., Miner T.L., Nash W.E.,
RA Kremitzki C., Oddy L., Du H., Sun H., Bradshaw-Cordum H., Ali J.,
RA Carter J., Cordes M., Harris A., Isak A., van Brunt A., Nguyen C.,
RA Du F., Courtney L., Kalicki J., Ozersky P., Abbott S., Armstrong J.,
RA Belter E.A., Caruso L., Cedroni M., Cotton M., Davidson T., Desai A.,
RA Elliott G., Erb T., Fronick C., Gaige T., Haakenson W., Haglund K.,
RA Holmes A., Harkins R., Kim K., Kruchowski S.S., Strong C.M.,
RA Grewal N., Goyea E., Hou S., Levy A., Martinka S., Mead K.,
RA McLellan M.D., Meyer R., Randall-Maher J., Tomlinson C.,
RA Dauphin-Kohlberg S., Kozlowicz-Reilly A., Shah N.,
RA Swearengen-Shahid S., Snider J., Strong J.T., Thompson J., Yoakum M.,
RA Leonard S., Pearman C., Trani L., Radionenko M., Waligorski J.E.,
RA Wang C., Rock S.M., Tin-Wollam A.-M., Maupin R., Latreille P.,
RA Wendl M.C., Yang S.-P., Pohl C., Wallis J.W., Spieth J., Bieri T.A.,
RA Berkowicz N., Nelson J.O., Osborne J., Ding L., Meyer R., Sabo A.,
RA Shotland Y., Sinha P., Wohldmann P.E., Cook L.L., Hickenbotham M.T.,
RA Eldred J., Williams D., Jones T.A., She X., Ciccarelli F.D.,
RA Izaurralde E., Taylor J., Schmutz J., Myers R.M., Cox D.R., Huang X.,
RA McPherson J.D., Mardis E.R., Clifton S.W., Warren W.C.,
RA Chinwalla A.T., Eddy S.R., Marra M.A., Ovcharenko I., Furey T.S.,
RA Miller W., Eichler E.E., Bork P., Suyama M., Torrents D.,
RA Waterston R.H., Wilson R.K.;
RT "Generation and annotation of the DNA sequences of human chromosomes 2
RT and 4.";
RL Nature 434:724-731(2005).
CC -!- ALTERNATIVE PRODUCTS:
CC Event=Alternative splicing; Named isoforms=2;
CC Name=1;
CC IsoId=Q8NA56-1; Sequence=Displayed;
CC Name=2;
CC IsoId=Q8NA56-2; Sequence=VSP_026638;
CC Note=No experimental confirmation available.;
CC -!- SEQUENCE CAUTION:
CC Sequence=AAK29064.1; Type=Frameshift; Positions=467; Evidence={ECO:0000305};
CC -----------------------------------------------------------------------
CC Copyrighted by the UniProt Consortium, see http://www.uniprot.org/terms
CC Distributed under the Creative Commons Attribution-NoDerivs License
CC -----------------------------------------------------------------------
DR EMBL; AF345910; AAK29064.1; ALT_FRAME; mRNA.
DR EMBL; EF432564; ABO31099.1; -; mRNA.
DR EMBL; AK093145; BAC04072.1; -; mRNA.
DR EMBL; AC092435; -; NOT_ANNOTATED_CDS; Genomic_DNA.
DR EMBL; AC097497; -; NOT_ANNOTATED_CDS; Genomic_DNA.
DR EMBL; AC093887; -; NOT_ANNOTATED_CDS; Genomic_DNA.
DR CCDS; CCDS47141.1; -. [Q8NA56-1]
DR RefSeq; NP_001287690.1; NM_001300761.2.
DR RefSeq; NP_001304735.1; NM_001317806.1.
DR RefSeq; NP_114162.2; NM_031956.3. [Q8NA56-1]
DR UniGene; Hs.378893; -.
DR ProteinModelPortal; Q8NA56; -.
DR SMR; Q8NA56; -.
DR BioGrid; 123805; 16.
DR STRING; 9606.ENSP00000316740; -.
DR iPTMnet; Q8NA56; -.
DR PhosphoSitePlus; Q8NA56; -.
DR BioMuta; TTC29; -.
DR DMDM; 152112335; -.
DR PaxDb; Q8NA56; -.
DR PeptideAtlas; Q8NA56; -.
DR PRIDE; Q8NA56; -.
DR Ensembl; ENST00000325106; ENSP00000316740; ENSG00000137473. [Q8NA56-1]
DR GeneID; 83894; -.
DR KEGG; hsa:83894; -.
DR UCSC; uc003ikw.5; human. [Q8NA56-1]
DR CTD; 83894; -.
DR DisGeNET; 83894; -.
DR GeneCards; TTC29; -.
DR HGNC; HGNC:29936; TTC29.
DR HPA; HPA037006; -.
DR HPA; HPA061473; -.
DR neXtProt; NX_Q8NA56; -.
DR OpenTargets; ENSG00000137473; -.
DR PharmGKB; PA145147799; -.
DR eggNOG; ENOG410IFVK; Eukaryota.
DR eggNOG; ENOG410XPVA; LUCA.
DR GeneTree; ENSGT00390000008611; -.
DR HOGENOM; HOG000067965; -.
DR HOVERGEN; HBG108611; -.
DR InParanoid; Q8NA56; -.
DR PhylomeDB; Q8NA56; -.
DR TreeFam; TF328344; -.
DR ChiTaRS; TTC29; human.
DR GenomeRNAi; 83894; -.
DR PRO; PR:Q8NA56; -.
DR Proteomes; UP000005640; Chromosome 4.
DR Bgee; ENSG00000137473; -.
DR CleanEx; HS_TTC29; -.
DR ExpressionAtlas; Q8NA56; baseline and differential.
DR Genevisible; Q8NA56; HS.
DR Gene3D; 1.25.40.10; -; 2.
DR InterPro; IPR013026; TPR-contain_dom.
DR InterPro; IPR011990; TPR-like_helical_dom.
DR InterPro; IPR019734; TPR_repeat.
DR SMART; SM00028; TPR; 4.
DR SUPFAM; SSF48452; SSF48452; 1.
DR PROSITE; PS50293; TPR_REGION; 2.
PE 2: Evidence at transcript level;
KW Alternative splicing; Complete proteome; Polymorphism;
KW Reference proteome; Repeat; TPR repeat.
FT CHAIN 1 475 Tetratricopeptide repeat protein 29.
FT /FTId=PRO_0000294435.
FT REPEAT 182 215 TPR 1.
FT REPEAT 234 267 TPR 2.
FT REPEAT 274 307 TPR 3.
FT REPEAT 314 347 TPR 4.
FT REPEAT 354 387 TPR 5.
FT VAR_SEQ 1 1 M -> MIPMFTVTLEDSGTLWKSLHSSSESE (in
FT isoform 2).
FT {ECO:0000303|PubMed:14702039}.
FT /FTId=VSP_026638.
FT VARIANT 94 94 L -> P (in dbSNP:rs35123039).
FT /FTId=VAR_033179.
FT VARIANT 140 140 H -> Y (in dbSNP:rs17610219).
FT {ECO:0000269|Ref.1, ECO:0000269|Ref.2}.
FT /FTId=VAR_033180.
FT VARIANT 276 276 A -> T (in dbSNP:rs10013280).
FT {ECO:0000269|Ref.1, ECO:0000269|Ref.2}.
FT /FTId=VAR_033181.
FT CONFLICT 239 239 L -> F (in Ref. 3; BAC04072).
FT {ECO:0000305}.
SQ SEQUENCE 475 AA; 55082 MW; 09BF33E42330C53A CRC64;
MTTLPPLPMT RPKLTALARQ KLPCSSRKIP RSQLIKEKDD IDHYLEVNFK GLSKEEVAAY
RNSYKKNICV DMLRDGYHKS FTELFALMER WDALREAARV RSLFWLQKPL EEQPDKLDYL
YHYLTRAEDA ERKESFEDVH NNLYALACYF NNSEDKWVRN HFYERCFKIA QLIKIDCGKK
EAEAHMHMGL LYEEDGQLLE AAEHYEAFHQ LTQGRIWKDE TGRSLNLLAC ESLLRTYRLL
SDKMLENKEY KQAIKILIKA SEIAKEGSDK KMEAEASYYL GLAHLAAEEY ETALTVLDTY
CKISTDLDDD LSLGRGYEAI AKVLQSQGEM TEAIKYLKKF VKIARNNFQS LDLVRASTML
GDIYNEKGYY NKASECFQQA FDTTVELMSM PLMDETKVHY GIAKAHQMML TVNNYIESAD
LTSLNYLLSW KESRGNIEPD PVTEEFRGST VEAVSQNSER LEELSRFPGD QKNET
//
ID 2NPD_NEUCR Reviewed; 378 AA.
AC Q01284; Q7RV78;
DT 01-NOV-1997, integrated into UniProtKB/Swiss-Prot.
DT 01-NOV-1996, sequence version 1.
DT 15-FEB-2017, entry version 106.
DE RecName: Full=Nitronate monooxygenase;
DE EC=1.13.12.16;
DE AltName: Full=2-nitropropane dioxygenase;
DE Short=2-NPD;
DE AltName: Full=Nitroalkane oxidase;
DE Flags: Precursor;
GN Name=ncd-2; ORFNames=G17A4.200, NCU03949;
OS Neurospora crassa (strain ATCC 24698 / 74-OR23-1A / CBS 708.71 / DSM
OS 1257 / FGSC 987).
OC Eukaryota; Fungi; Dikarya; Ascomycota; Pezizomycotina;
OC Sordariomycetes; Sordariomycetidae; Sordariales; Sordariaceae;
OC Neurospora.
OX NCBI_TaxID=367110;
RN [1]
RP NUCLEOTIDE SEQUENCE [MRNA], CATALYTIC ACTIVITY, SUBSTRATE SPECIFICITY,
RP COFACTOR, BIOPHYSICOCHEMICAL PROPERTIES, SUBUNIT, AND REACTION
RP MECHANISM.
RC STRAIN=ATCC 10337 / FGSC 1758 / NBRC 6067 / IMI 53239;
RX PubMed=9501443;
RA Gorlatova N., Tchorzewski M., Kurihara T., Soda K., Esaki N.;
RT "Purification, characterization, and mechanism of a flavin
RT mononucleotide-dependent 2-nitropropane dioxygenase from Neurospora
RT crassa.";
RL Appl. Environ. Microbiol. 64:1029-1033(1998).
RN [2]
RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA].
RC STRAIN=ATCC 24698 / 74-OR23-1A / CBS 708.71 / DSM 1257 / FGSC 987;
RX PubMed=12655011; DOI=10.1093/nar/gkg293;
RA Mannhaupt G., Montrone C., Haase D., Mewes H.-W., Aign V.,
RA Hoheisel J.D., Fartmann B., Nyakatura G., Kempken F., Maier J.,
RA Schulte U.;
RT "What\'s in the genome of a filamentous fungus? Analysis of the
RT Neurospora genome sequence.";
RL Nucleic Acids Res. 31:1944-1954(2003).
RN [3]
RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA].
RC STRAIN=ATCC 24698 / 74-OR23-1A / CBS 708.71 / DSM 1257 / FGSC 987;
RX PubMed=12712197; DOI=10.1038/nature01554;
RA Galagan J.E., Calvo S.E., Borkovich K.A., Selker E.U., Read N.D.,
RA Jaffe D.B., FitzHugh W., Ma L.-J., Smirnov S., Purcell S., Rehman B.,
RA Elkins T., Engels R., Wang S., Nielsen C.B., Butler J., Endrizzi M.,
RA Qui D., Ianakiev P., Bell-Pedersen D., Nelson M.A.,
RA Werner-Washburne M., Selitrennikoff C.P., Kinsey J.A., Braun E.L.,
RA Zelter A., Schulte U., Kothe G.O., Jedd G., Mewes H.-W., Staben C.,
RA Marcotte E., Greenberg D., Roy A., Foley K., Naylor J.,
RA Stange-Thomann N., Barrett R., Gnerre S., Kamal M., Kamvysselis M.,
RA Mauceli E.W., Bielke C., Rudd S., Frishman D., Krystofova S.,
RA Rasmussen C., Metzenberg R.L., Perkins D.D., Kroken S., Cogoni C.,
RA Macino G., Catcheside D.E.A., Li W., Pratt R.J., Osmani S.A.,
RA DeSouza C.P.C., Glass N.L., Orbach M.J., Berglund J.A., Voelker R.,
RA Yarden O., Plamann M., Seiler S., Dunlap J.C., Radford A., Aramayo R.,
RA Natvig D.O., Alex L.A., Mannhaupt G., Ebbole D.J., Freitag M.,
RA Paulsen I., Sachs M.S., Lander E.S., Nusbaum C., Birren B.W.;
RT "The genome sequence of the filamentous fungus Neurospora crassa.";
RL Nature 422:859-868(2003).
RN [4]
RP COFACTOR, SUBSTRATE SPECIFICITY, AND REACTION MECHANISM.
RX PubMed=19577534; DOI=10.1016/j.abb.2009.06.018;
RA Gadda G., Francis K.;
RT "Nitronate monooxygenase, a model for anionic flavin semiquinone
RT intermediates in oxidative catalysis.";
RL Arch. Biochem. Biophys. 493:53-61(2010).
CC -!- FUNCTION: Catalyzes the oxidation of alkyl nitronates to produce
CC the corresponding carbonyl compounds and nitrites. Anionic forms
CC of nitroalkanes are much better substrates than are neutral forms.
CC -!- CATALYTIC ACTIVITY: Ethylnitronate + O(2) = acetaldehyde + nitrite
CC + other products. {ECO:0000269|PubMed:9501443}.
CC -!- COFACTOR:
CC Name=FMN; Xref=ChEBI:CHEBI:58210;
CC Evidence={ECO:0000269|PubMed:19577534,
CC ECO:0000269|PubMed:9501443};
CC Note=Binds 1 FMN per subunit. {ECO:0000269|PubMed:19577534,
CC ECO:0000269|PubMed:9501443};
CC -!- BIOPHYSICOCHEMICAL PROPERTIES:
CC Kinetic parameters:
CC KM=3.1 mM for 2-nitropropane {ECO:0000269|PubMed:9501443};
CC KM=6 mM for nitroethane {ECO:0000269|PubMed:9501443};
CC KM=8.3 mM for 1-nitropropane {ECO:0000269|PubMed:9501443};
CC -!- SUBUNIT: Homodimer. {ECO:0000269|PubMed:9501443}.
CC -!- SIMILARITY: Belongs to the nitronate monooxygenase family.
CC {ECO:0000305}.
CC -----------------------------------------------------------------------
CC Copyrighted by the UniProt Consortium, see http://www.uniprot.org/terms
CC Distributed under the Creative Commons Attribution-NoDerivs License
CC -----------------------------------------------------------------------
DR EMBL; U22530; AAA64218.1; -; mRNA.
DR EMBL; BX908812; CAF06155.1; -; Genomic_DNA.
DR EMBL; CM002241; EAA28352.1; -; Genomic_DNA.
DR PIR; T46693; T46693.
DR RefSeq; XP_957588.1; XM_952495.3.
DR ProteinModelPortal; Q01284; -.
DR EnsemblFungi; EAA28352; EAA28352; NCU03949.
DR GeneID; 3873678; -.
DR KEGG; ncr:NCU03949; -.
DR EuPathDB; FungiDB:NCU03949; -.
DR HOGENOM; HOG000123285; -.
DR InParanoid; Q01284; -.
DR KO; K00459; -.
DR OMA; VDAGGHQ; -.
DR OrthoDB; EOG092651WG; -.
DR BioCyc; MetaCyc:MONOMER-302; -.
DR BRENDA; 1.13.11.32; 3627.
DR BRENDA; 1.13.12.16; 3627.
DR SABIO-RK; Q01284; -.
DR Proteomes; UP000001805; Chromosome 5, Linkage Group VI.
DR GO; GO:0005634; C:nucleus; IBA:GO_Central.
DR GO; GO:0018580; F:nitronate monooxygenase activity; IEA:UniProtKB-EC.
DR GO; GO:0003700; F:transcription factor activity, sequence-specific DNA binding; IBA:GO_Central.
DR GO; GO:0009410; P:response to xenobiotic stimulus; IBA:GO_Central.
DR Gene3D; 3.20.20.70; -; 1.
DR InterPro; IPR013785; Aldolase_TIM.
DR InterPro; IPR004136; NMO.
DR Pfam; PF03060; NMO; 1.
PE 1: Evidence at protein level;
KW Complete proteome; Flavoprotein; FMN; Monooxygenase; Oxidoreductase;
KW Reference proteome.
FT PROPEP 1 15 {ECO:0000255}.
FT /FTId=PRO_0000020575.
FT CHAIN 16 378 Nitronate monooxygenase.
FT /FTId=PRO_0000020576.
FT NP_BIND 37 39 FMN. {ECO:0000250}.
FT NP_BIND 229 231 FMN. {ECO:0000250}.
FT NP_BIND 252 253 FMN. {ECO:0000250}.
FT ACT_SITE 196 196 Proton acceptor. {ECO:0000255}.
FT BINDING 196 196 Substrate. {ECO:0000250}.
SQ SEQUENCE 378 AA; 39916 MW; E453EB43FD23E441 CRC64;
MHFPGHSSKK EESAQAALTK LNSWFPTTKN PVIISAPMYL IANGTLAAEV SKAGGIGFVA
GGSDFRPGSS HLTALSTELA SARSRLGLTD RPLTPLPGIG VGLILTHTIS VPYVTDTVLP
ILIEHSPQAV WLFANDPDFE ASSEPGAKGT AKQIIEALHA SGFVVFFQVG TVKDARKAAA
DGADVIVAQG IDAGGHQLAT GSGIVSLVPE VRDMLDREFK EREVVVVAAG GVADGRGVVG
ALGLGAEGVV LGTRFTVAVE ASTPEFRRKV ILETNDGGLN TVKSHFHDQI NCNTIWHNVY
DGRAVRNASY DDHAAGVPFE ENHKKFKEAA SSGDNSRAVT WSGTAVGLIK DQRPAGDIVR
ELREEAKERI KKIQAFAA
//
ID 5NTD_LUTLO Reviewed; 572 AA.
AC Q9XZ43;
DT 11-JAN-2001, integrated into UniProtKB/Swiss-Prot.
DT 01-NOV-1999, sequence version 1.
DT 12-APR-2017, entry version 83.
DE RecName: Full=Protein 5NUC;
DE Includes:
DE RecName: Full=UDP-sugar hydrolase;
DE EC=3.6.1.45;
DE AltName: Full=UDP-sugar diphosphatase;
DE AltName: Full=UDP-sugar pyrophosphatase;
DE Includes:
DE RecName: Full=5\'-nucleotidase;
DE Short=5\'-NT;
DE EC=3.1.3.5;
DE Flags: Precursor;
GN Name=5NUC;
OS Lutzomyia longipalpis (Sand fly).
OC Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta;
OC Pterygota; Neoptera; Holometabola; Diptera; Nematocera; Psychodoidea;
OC Psychodidae; Lutzomyia; Lutzomyia.
OX NCBI_TaxID=7200;
RN [1]
RP NUCLEOTIDE SEQUENCE [MRNA].
RC STRAIN=Jacobina; TISSUE=Salivary gland;
RX PubMed=10611354; DOI=10.1073/pnas.96.26.15155;
RA Charlab R., Valenzuela J.G., Rowton E.D., Ribeiro J.M.;
RT "Toward an understanding of the biochemical and pharmacological
RT complexity of the saliva of a hematophagous sand fly Lutzomyia
RT longipalpis.";
RL Proc. Natl. Acad. Sci. U.S.A. 96:15155-15160(1999).
RN [2]
RP CHARACTERIZATION.
RC TISSUE=Salivary gland;
RX PubMed=10727894; DOI=10.1016/S0965-1748(99)00123-X;
RA Ribeiro J.M.C., Rowton E.D., Charlab R.;
RT "The salivary 5\'-nucleotidase/phosphodiesterase of the hematophagus
RT sand fly, Lutzomyia longipalpis.";
RL Insect Biochem. Mol. Biol. 30:279-285(2000).
RN [3]
RP ERRATUM.
RA Ribeiro J.M.C., Rowton E.D., Charlab R.;
RL Insect Biochem. Mol. Biol. 30:609-609(2000).
CC -!- FUNCTION: Degradation of external UDP-glucose to uridine
CC monophosphate and glucose-1-phosphate, which can then be used by
CC the cell. {ECO:0000250}.
CC -!- CATALYTIC ACTIVITY: UDP-sugar + H(2)O = UMP + alpha-D-aldose 1-
CC phosphate.
CC -!- CATALYTIC ACTIVITY: A 5\'-ribonucleotide + H(2)O = a ribonucleoside
CC + phosphate.
CC -!- COFACTOR:
CC Name=Zn(2+); Xref=ChEBI:CHEBI:29105; Evidence={ECO:0000250};
CC -!- SIMILARITY: Belongs to the 5\'-nucleotidase family. {ECO:0000305}.
DR EMBL; AF132510; AAD32190.1; -; mRNA.
DR ProteinModelPortal; Q9XZ43; -.
DR SMR; Q9XZ43; -.
DR Proteomes; UP000092461; Unassembled WGS sequence.
DR GO; GO:0008253; F:5\'-nucleotidase activity; IEA:UniProtKB-EC.
DR GO; GO:0046872; F:metal ion binding; IEA:UniProtKB-KW.
DR GO; GO:0000166; F:nucleotide binding; IEA:UniProtKB-KW.
DR GO; GO:0008768; F:UDP-sugar diphosphatase activity; IEA:UniProtKB-EC.
DR GO; GO:0009166; P:nucleotide catabolic process; IEA:InterPro.
DR Gene3D; 3.60.21.10; -; 1.
DR Gene3D; 3.90.780.10; -; 1.
DR InterPro; IPR008334; 5\'-Nucleotdase_C.
DR InterPro; IPR006146; 5\'-Nucleotdase_CS.
DR InterPro; IPR006179; 5_nucleotidase/apyrase.
DR InterPro; IPR004843; Calcineurin-like_PHP_ApaH.
DR InterPro; IPR029052; Metallo-depent_PP-like.
DR PANTHER; PTHR11575; PTHR11575; 1.
DR Pfam; PF02872; 5_nucleotid_C; 1.
DR Pfam; PF00149; Metallophos; 1.
DR PRINTS; PR01607; APYRASEFAMLY.
DR SUPFAM; SSF55816; SSF55816; 1.
DR SUPFAM; SSF56300; SSF56300; 1.
DR PROSITE; PS00785; 5_NUCLEOTIDASE_1; 1.
DR PROSITE; PS00786; 5_NUCLEOTIDASE_2; 1.
PE 1: Evidence at protein level;
KW Complete proteome; Disulfide bond; Glycoprotein; Hydrolase;
KW Metal-binding; Nucleotide-binding; Reference proteome; Signal; Zinc.
FT SIGNAL 1 25 {ECO:0000255}.
FT CHAIN 26 572 Protein 5NUC.
FT /FTId=PRO_0000000023.
FT REGION 512 518 Substrate binding. {ECO:0000250}.
FT METAL 39 39 Zinc 1. {ECO:0000250}.
FT METAL 41 41 Zinc 1. {ECO:0000250}.
FT METAL 93 93 Zinc 1. {ECO:0000250}.
FT METAL 93 93 Zinc 2. {ECO:0000250}.
FT METAL 125 125 Zinc 2. {ECO:0000250}.
FT METAL 227 227 Zinc 2. {ECO:0000250}.
FT METAL 250 250 Zinc 2. {ECO:0000250}.
FT BINDING 361 361 Substrate. {ECO:0000250}.
FT BINDING 399 399 Substrate. {ECO:0000250}.
FT BINDING 404 404 Substrate. {ECO:0000250}.
FT BINDING 427 427 Substrate. {ECO:0000250}.
FT SITE 126 126 Transition state stabilizer.
FT {ECO:0000250}.
FT SITE 129 129 Transition state stabilizer.
FT {ECO:0000250}.
FT CARBOHYD 82 82 N-linked (GlcNAc...). {ECO:0000255}.
FT CARBOHYD 454 454 N-linked (GlcNAc...). {ECO:0000255}.
FT CARBOHYD 490 490 N-linked (GlcNAc...). {ECO:0000255}.
FT DISULFID 54 64 {ECO:0000250}.
FT DISULFID 360 365 {ECO:0000250}.
FT DISULFID 488 491 {ECO:0000250}.
**
** ################# INTERNAL SECTION ##################
**EV ECO:0000250; -; XXX; 01-JAN-1900.
**EV ECO:0000255; -; XXX; 01-JAN-1900.
**EV ECO:0000305; -; XXX; 01-JAN-1900.
**ZB JSG, 08-MAR-2006; RAB, 07-MAR-2017;
SQ SEQUENCE 572 AA; 63354 MW; 69A652338C04536D CRC64;
MLFFLNFFVL VFSIELALLT ASAAAEDGSY EIIILHTNDM HARFDQTNAG SNKCQEKDKI
ASKCYGGFAR VSTMVKKFRE ENGSSVLFLN AGDTYTGTPW FTLYKETIAT EMMNILRPDA
ASLGNHEFDK GVEGLVPFLN GVTFPILTAN LDTSQEPTMT NAKNLKRSMI FTVSGHRVGV
IGYLTPDTKF LSDVGKVNFI PEVEAINTEA QRLKKEENAE IIIVVGHSGL IKDREIAEKC
PLVDIIVGGH SHTFLYTGSQ PDREVPVDVY PVVVTQSSGK KVPIVQAYCF TKYLGYFKVT
INGKGNVVGW TGQPILLNNN IPQDQEVLTA LEKYRERVEN YGNRVIGVSR VILNGGHTEC
RFHECNMGNL ITDAFVYANV ISTPMSTNAW TDASVVLYQS GGIRAPIDPR TAAGSITRLE
LDNVLPFGNA LYVVKVPGNV LRKALEHSVH RYSNTSGWGE FPQVSGLKIR FNVNEEIGKR
VKSVKVLCSN CSQPEYQPLR NKKTYNVIMD SFMKDGGDGY SMFKPLKIIK TLPLGDIETV
EAYIEKMGPI FPAVEGRITV LGGLQKSDED WH
//
ID 14311_ARATH Reviewed; 252 AA.
AC Q9S9Z8; A0JQ87; F4HWN0; Q0WL19;
DT 05-DEC-2001, integrated into UniProtKB/Swiss-Prot.
DT 16-NOV-2011, sequence version 2.
DT 15-MAR-2017, entry version 98.
DE RecName: Full=14-3-3-like protein GF14 omicron;
DE AltName: Full=General regulatory factor 11;
GN Name=GRF11; OrderedLocusNames=At1g34760; ORFNames=F21H2.3;
OS Arabidopsis thaliana (Mouse-ear cress).
OC Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta;
OC Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae;
OC Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Camelineae;
OC Arabidopsis.
OX NCBI_TaxID=3702;
RN [1]
RP NUCLEOTIDE SEQUENCE [MRNA] (ISOFORM 2).
RC TISSUE=Leaf;
RA Alsterfjord M., Rosenquist M., Larsson C., Sommarin M.;
RT "Novel 14-3-3 isoforms in Arabidopsis thaliana.";
RL Submitted (NOV-2000) to the EMBL/GenBank/DDBJ databases.
RN [2]
RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA].
RC STRAIN=cv. Columbia;
RX PubMed=11130712; DOI=10.1038/35048500;
RA Theologis A., Ecker J.R., Palm C.J., Federspiel N.A., Kaul S.,
RA White O., Alonso J., Altafi H., Araujo R., Bowman C.L., Brooks S.Y.,
RA Buehler E., Chan A., Chao Q., Chen H., Cheuk R.F., Chin C.W.,
RA Chung M.K., Conn L., Conway A.B., Conway A.R., Creasy T.H., Dewar K.,
RA Dunn P., Etgu P., Feldblyum T.V., Feng J.-D., Fong B., Fujii C.Y.,
RA Gill J.E., Goldsmith A.D., Haas B., Hansen N.F., Hughes B., Huizar L.,
RA Hunter J.L., Jenkins J., Johnson-Hopson C., Khan S., Khaykin E.,
RA Kim C.J., Koo H.L., Kremenetskaia I., Kurtz D.B., Kwan A., Lam B.,
RA Langin-Hooper S., Lee A., Lee J.M., Lenz C.A., Li J.H., Li Y.-P.,
RA Lin X., Liu S.X., Liu Z.A., Luros J.S., Maiti R., Marziali A.,
RA Militscher J., Miranda M., Nguyen M., Nierman W.C., Osborne B.I.,
RA Pai G., Peterson J., Pham P.K., Rizzo M., Rooney T., Rowley D.,
RA Sakano H., Salzberg S.L., Schwartz J.R., Shinn P., Southwick A.M.,
RA Sun H., Tallon L.J., Tambunga G., Toriumi M.J., Town C.D.,
RA Utterback T., Van Aken S., Vaysberg M., Vysotskaia V.S., Walker M.,
RA Wu D., Yu G., Fraser C.M., Venter J.C., Davis R.W.;
RT "Sequence and analysis of chromosome 1 of the plant Arabidopsis
RT thaliana.";
RL Nature 408:816-820(2000).
RN [3]
RP GENOME REANNOTATION.
RC STRAIN=cv. Columbia;
RG The Arabidopsis Information Resource (TAIR);
RL Submitted (APR-2011) to the EMBL/GenBank/DDBJ databases.
RN [4]
RP NUCLEOTIDE SEQUENCE [LARGE SCALE MRNA] (ISOFORM 1).
RC STRAIN=cv. Columbia;
RA Totoki Y., Seki M., Ishida J., Nakajima M., Enju A., Kamiya A.,
RA Narusaka M., Shin-i T., Nakagawa M., Sakamoto N., Oishi K., Kohara Y.,
RA Kobayashi M., Toyoda A., Sakaki Y., Sakurai T., Iida K., Akiyama K.,
RA Satou M., Toyoda T., Konagaya A., Carninci P., Kawai J.,
RA Hayashizaki Y., Shinozaki K.;
RT "Large-scale analysis of RIKEN Arabidopsis full-length (RAFL) cDNAs.";
RL Submitted (JUL-2006) to the EMBL/GenBank/DDBJ databases.
RN [5]
RP NUCLEOTIDE SEQUENCE [LARGE SCALE MRNA] (ISOFORM 3).
RC STRAIN=cv. Columbia;
RA Bautista V.R., Kim C.J., Chen H., Quinitio C., Ecker J.R.;
RT "Arabidopsis ORF Clones.";
RL Submitted (NOV-2006) to the EMBL/GenBank/DDBJ databases.
CC -!- FUNCTION: Is associated with a DNA binding complex that binds to
CC the G box, a well-characterized cis-acting DNA regulatory element
CC found in plant genes. {ECO:0000250}.
CC -!- ALTERNATIVE PRODUCTS:
CC Event=Alternative splicing; Named isoforms=3;
CC Comment=A number of isoforms are produced. According to EST
CC sequences.;
CC Name=1;
CC IsoId=Q9S9Z8-1; Sequence=Displayed;
CC Name=2;
CC IsoId=Q9S9Z8-2; Sequence=VSP_042043, VSP_042044;
CC Name=3;
CC IsoId=Q9S9Z8-3; Sequence=VSP_042045;
CC -!- SIMILARITY: Belongs to the 14-3-3 family. {ECO:0000305}.
DR EMBL; AF323920; AAG47840.1; -; mRNA.
DR EMBL; AC007894; AAD46005.1; -; Genomic_DNA.
DR EMBL; CP002684; AEE31735.1; -; Genomic_DNA.
DR EMBL; CP002684; AEE31736.1; -; Genomic_DNA.
DR EMBL; AK230390; BAF02188.1; -; mRNA.
DR EMBL; BT029457; ABK59686.1; -; mRNA.
DR RefSeq; NP_001077649.1; NM_001084180.2. [Q9S9Z8-1]
DR RefSeq; NP_564451.2; NM_103196.4. [Q9S9Z8-3]
DR UniGene; At.11487; -.
DR ProteinModelPortal; Q9S9Z8; -.
DR SMR; Q9S9Z8; -.
DR BioGrid; 25612; 2.
DR IntAct; Q9S9Z8; 1.
DR STRING; 3702.AT1G34760.1; -.
DR iPTMnet; Q9S9Z8; -.
DR PaxDb; Q9S9Z8; -.
DR PRIDE; Q9S9Z8; -.
DR EnsemblPlants; AT1G34760.2; AT1G34760.2; AT1G34760. [Q9S9Z8-1]
DR GeneID; 840380; -.
DR Gramene; AT1G34760.2; AT1G34760.2; AT1G34760.
DR KEGG; ath:AT1G34760; -.
DR Araport; AT1G34760; -.
DR TAIR; locus:2008381; AT1G34760.
DR eggNOG; KOG0841; Eukaryota.
DR eggNOG; COG5040; LUCA.
DR HOGENOM; HOG000240379; -.
DR InParanoid; Q9S9Z8; -.
DR OMA; NEQAERY; -.
DR Reactome; R-ATH-1445148; Translocation of GLUT4 to the plasma membrane.
DR Reactome; R-ATH-3371453; Regulation of HSF1-mediated heat shock response.
DR Reactome; R-ATH-3371511; HSF1 activation.
DR PRO; PR:Q9S9Z8; -.
DR Proteomes; UP000006548; Chromosome 1.
DR Genevisible; Q9S9Z8; AT.
DR GO; GO:0051117; F:ATPase binding; IDA:TAIR.
DR Gene3D; 1.20.190.20; -; 1.
DR InterPro; IPR000308; 14-3-3.
DR InterPro; IPR023409; 14-3-3_CS.
DR InterPro; IPR023410; 14-3-3_domain.
DR PANTHER; PTHR18860; PTHR18860; 1.
DR Pfam; PF00244; 14-3-3; 1.
DR PIRSF; PIRSF000868; 14-3-3; 1.
DR PRINTS; PR00305; 1433ZETA.
DR SMART; SM00101; 14_3_3; 1.
DR SUPFAM; SSF48445; SSF48445; 1.
DR PROSITE; PS00796; 1433_1; 1.
DR PROSITE; PS00797; 1433_2; 1.
PE 2: Evidence at transcript level;
KW Alternative splicing; Complete proteome; Reference proteome.
FT CHAIN 1 252 14-3-3-like protein GF14 omicron.
FT /FTId=PRO_0000058673.
FT VAR_SEQ 241 241 E -> K (in isoform 2).
FT {ECO:0000303|Ref.1}.
FT /FTId=VSP_042043.
FT VAR_SEQ 242 252 Missing (in isoform 2).
FT {ECO:0000303|Ref.1}.
FT /FTId=VSP_042044.
FT VAR_SEQ 252 252 N -> VNKI (in isoform 3).
FT {ECO:0000303|Ref.5}.
FT /FTId=VSP_042045.
FT CONFLICT 128 128 D -> G (in Ref. 4; BAF02188).
FT {ECO:0000305}.
**
** ################# INTERNAL SECTION ##################
**DR Araport-CDS; AT1G34760.1; Araport11; -. [Q9S9Z8-3]
**DR Araport-CDS; AT1G34760.2; Araport11; -. [Q9S9Z8-1]
**EV ECO:0000250; -; XXX; 01-JAN-1900.
**EV ECO:0000303; Ref.1; XXX; 01-JAN-1900.
**EV ECO:0000303; Ref.5; XXX; 01-JAN-1900.
**EV ECO:0000305; -; XXX; 01-JAN-1900.
**YY According to Araport, the gene encoding this protein is alternatively spliced.
**ZB MIT, 11-OCT-2011;
SQ SEQUENCE 252 AA; 28781 MW; 852335FF39915461 CRC64;
MENERAKQVY LAKLNEQAER YDEMVEAMKK VAALDVELTI EERNLLSVGY KNVIGARRAS
WRILSSIEQK EESKGNEQNA KRIKDYRTKV EEELSKICYD ILAVIDKHLV PFATSGESTV
FYYKMKGDYF RYLAEFKSGA DREEAADLSL KAYEAATSSA STELSTTHPI RLGLALNFSV
FYYEILNSPE RACHLAKRAF DEAIAELDSL NEDSYKDSTL IMQLLRDNLT LWTSDLEEGG
EQSKGHNQQD EN
//
ID W0TYI6_HUMAN Unreviewed; 154 AA.
AC W0TYI6;
DT 19-MAR-2014, integrated into UniProtKB/TrEMBL.
DT 19-MAR-2014, sequence version 1.
DT 15-FEB-2017, entry version 23.
DE SubName: Full=H2B histone family, member M {ECO:0000313|EMBL:CAA97844.2};
GN Name=H2BFM {ECO:0000313|EMBL:CAA97844.2};
GN ORFNames=LL0XNC01-240C2.2-001 {ECO:0000313|EMBL:CAA97844.2};
OS Homo sapiens (Human).
OC Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
OC Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini;
OC Catarrhini; Hominidae; Homo.
OX NCBI_TaxID=9606 {ECO:0000313|EMBL:CAA97844.2};
RN [1] {ECO:0000313|EMBL:CAA97844.2}
RP NUCLEOTIDE SEQUENCE.
RA Hunt A.;
RL Submitted (JAN-2009) to the EMBL/GenBank/DDBJ databases.
CC -!- FUNCTION: Core component of nucleosome. Nucleosomes wrap and
CC compact DNA into chromatin, limiting DNA accessibility to the
CC cellular machineries which require DNA as a template. Histones
CC thereby play a central role in transcription regulation, DNA
CC repair, DNA replication and chromosomal stability. DNA
CC accessibility is regulated via a complex set of post-translational
CC modifications of histones, also called histone code, and
CC nucleosome remodeling. {ECO:0000256|SAAS:SAAS00295035}.
CC -!- SUBUNIT: The nucleosome is a histone octamer containing two
CC molecules each of H2A, H2B, H3 and H4 assembled in one H3-H4
CC heterotetramer and two H2A-H2B heterodimers. The octamer wraps
CC approximately 147 bp of DNA. {ECO:0000256|SAAS:SAAS00565646}.
CC -!- SUBCELLULAR LOCATION: Chromosome {ECO:0000256|SAAS:SAAS00680978}.
CC -!- SUBCELLULAR LOCATION: Nucleus {ECO:0000256|SAAS:SAAS00593244}.
CC -!- SIMILARITY: Belongs to the histone H2B family.
CC {ECO:0000256|SAAS:SAAS00689654}.
CC -----------------------------------------------------------------------
CC Copyrighted by the UniProt Consortium, see http://www.uniprot.org/terms
CC Distributed under the Creative Commons Attribution-NoDerivs License
CC -----------------------------------------------------------------------
DR EMBL; Z73497; CAA97844.2; -; Genomic_DNA.
DR RefSeq; NP_001157888.1; NM_001164416.1.
DR RefSeq; XP_006724703.1; XM_006724640.2.
DR RefSeq; XP_011529224.1; XM_011530922.2.
DR UniGene; Hs.376474; -.
DR ProteinModelPortal; W0TYI6; -.
DR SMR; W0TYI6; -.
DR STRING; 9606.ENSP00000347119; -.
DR PaxDb; W0TYI6; -.
DR GeneID; 286436; -.
DR KEGG; hsa:286436; -.
DR UCSC; uc004els.2; human.
DR CTD; 286436; -.
DR eggNOG; KOG1744; Eukaryota.
DR eggNOG; ENOG4111NV5; LUCA.
DR KO; K11252; -.
DR OMA; PKEANSM; -.
DR OrthoDB; EOG091G0WZB; -.
DR GenomeRNAi; 286436; -.
DR ExpressionAtlas; W0TYI6; baseline and differential.
DR GO; GO:0000786; C:nucleosome; IEA:UniProtKB-KW.
DR GO; GO:0005634; C:nucleus; IEA:UniProtKB-SubCell.
DR GO; GO:0003677; F:DNA binding; IEA:UniProtKB-KW.
DR Gene3D; 1.10.20.10; -; 1.
DR InterPro; IPR009072; Histone-fold.
DR InterPro; IPR007125; Histone_H2A/H2B/H3.
DR InterPro; IPR000558; Histone_H2B.
DR PANTHER; PTHR23428; PTHR23428; 1.
DR Pfam; PF00125; Histone; 1.
DR PRINTS; PR00621; HISTONEH2B.
DR SMART; SM00427; H2B; 1.
DR SUPFAM; SSF47113; SSF47113; 1.
PE 3: Inferred from homology;
KW Chromosome {ECO:0000256|SAAS:SAAS00454795};
KW DNA-binding {ECO:0000256|SAAS:SAAS00454795};
KW Nucleosome core {ECO:0000256|SAAS:SAAS00454795};
KW Nucleus {ECO:0000256|SAAS:SAAS00486878}.
FT DOMAIN 1 123 Histone. {ECO:0000259|Pfam:PF00125}.
SQ SEQUENCE 154 AA; 17001 MW; 700C131F5F5818B0 CRC64;
MAAASAMAEA SSETTSEEGQ SIQEPKEANS TKAQKQKRRG CRGSRRRHAN RRGDSFGDSF
TPYFPRVLKQ VHQGLSLSQE AVSVMDSMIH DILDRIATEA GQLAHYTKRV TITSRDIQMA
VRLLLPGKMG KLAEAQGTNA ALRTSLCAIW QQRK
//';
preg_match_all($re, $str, $matches, PREG_SET_ORDER, 0);
// Print the entire match result
var_dump($matches);
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for PHP, please visit: http://php.net/manual/en/ref.pcre.php