{"id":167,"date":"2017-11-16T02:19:49","date_gmt":"2017-11-15T18:19:49","guid":{"rendered":"http:\/\/www.wuchangsong.com\/?p=167"},"modified":"2017-12-09T18:44:04","modified_gmt":"2017-12-09T10:44:04","slug":"go%e5%8a%9f%e8%83%bd%e5%af%8c%e9%9b%86","status":"publish","type":"post","link":"http:\/\/www.wuchangsong.com\/?p=167","title":{"rendered":"GO\u529f\u80fd\u5bcc\u96c6"},"content":{"rendered":"<p>\u524d\u51e0\u5929\u5b9e\u9a8c\u5ba4\u4e00\u4e2a\u5e08\u5144\u7ed9\u6211\u4e00\u4e2a\u8d28\u8c31\u7ed3\u679c\uff0c\u8ba9\u5e2e\u5fd9\u505a\u4e0bgo\u7684\u529f\u80fd\u5bcc\u96c6\uff0c\u6570\u636e\u683c\u5f0f\u5927\u6982\u662f\u8fd9\u6837\u7684\uff1a<img decoding=\"async\" loading=\"lazy\" class=\"alignnone size-full wp-image-168\" src=\"http:\/\/www.wuchangsong.com\/wp-content\/uploads\/2017\/11\/QQ\u56fe\u724720171116012014.png\" alt=\"\" width=\"777\" height=\"114\" \/><\/p>\n<p>\u7531\u4e8e\u4e4b\u524d\u505ago\u548ckegg\u65f6\u90fd\u662f\u8dd1\u6d41\u7a0b\uff0c\u50cf\u8fd9\u79cd\u9488\u5bf9\u6027\u7684go\u5bcc\u96c6\u8fd8\u6ca1\u505a\u8fc7\uff0c\u8bf4\u5230\u5e95\uff0c\u8fd8\u662f\u7531\u4e8e\u81ea\u5df1\u624b\u4e0a\u7f3a\u5c11\u6570\u636e\uff0c\u6ca1\u6709\u5c5e\u4e8e\u81ea\u5df1\u7684\u9879\u76ee\uff0c\u5f88\u591a\u7ec6\u8282\u6027\u7684\u95ee\u9898\u90fd\u6ca1\u6709\u7ecf\u5386\u8fc7\u3002\u4f46\u8fd9\u4e0d\u59a8\u788d\u54b1\u4e00\u9897\u6c42\u77e5\u7684\u5fc3\uff0c\u6211\u4eec\u90fd\u662f\u5728\u5b66\u4e60\u4e2d\u6210\u957f\u3002\u7531\u4e8e\u6ca1\u4e8b\u7684\u65f6\u5019\u901b\u8bba\u575b\u901b\u7684\u6bd4\u8f83\u9891\u7e41\uff0c\u77e5\u9053\u6570\u636e\u7684\u7b2c\u4e8c\u5217\u662fUniPro\u6570\u636e\u5e93\u7684accession\uff0c\u7136\u540e\u8be5\u600e\u4e48\u529e\u5462\uff1f\u4f5c\u4e3a\u751f\u4fe1\u4eba\uff0cGoogle\u662f\u5c11\u4e0d\u4e86\u7684\uff0c\u770b\u5230Google\u7ed3\u679c\uff0c\u77ac\u95f4\u660e\u4e86\u3002\u6839\u636eGoogle\u7684\u6307\u5f15\u6211\u4ece\u7f51\u4e0a\u4e0b\u8f7d\u4e86UniProt\u6570\u636e\u5e93\u91cc\u7684idmapping.tb.gz\u6587\u4ef6\uff08wget -c -t 10000 ftp:\/\/ftp.pir.georgetown.edu\/databases\/idmapping\/idmapping.tb.gz\uff09\uff0c\u5927\u698218G\u5de6\u53f3\uff0c\u6570\u636e\u7ed3\u6784\u5982\u4e0b\uff1a<\/p>\n<p><img decoding=\"async\" loading=\"lazy\" class=\"alignnone size-full wp-image-169\" src=\"http:\/\/www.wuchangsong.com\/wp-content\/uploads\/2017\/11\/QQ\u56fe\u724720171116015323.png\" alt=\"\" width=\"1329\" height=\"105\" \/><\/p>\n<p>\u4e00\u5171\u670922\u5217\uff0c\u4f9d\u6b21\u5206\u522b\u662f\uff1aUniProtKB accession\uff0cUniProtKB ID\uff0cEntrezGene\uff0cRefSeq\uff0cNCBI GI number\uff0cPDB\uff0cPfam\uff0cGO\uff0cPIRSF\uff0cIPI\uff0cUniRef100\uff0cUniRef90\uff0cUniRef50\uff0cUniParc\uff0cPIR-PSD accession\uff0cNCBI taxonomy\uff0cMIM\uff0cUniGene\uff0cEnsembl\uff0cPubMed ID\uff0cEMBL\/GenBank\/DDBJ\uff0cEMBL protein_id\uff1b\u8fd9\u5c31\u6709\u610f\u601d\u4e86\uff0c\u6570\u636e\u7684\u7b2c\u516b\u5217\u5c31\u662f\u6211\u4eec\u60f3\u8981\u7684go\u4fe1\u606f\u3002\u66f4\u6709\u610f\u601d\u7684\u662f\uff0c\u6709\u4e86\u8fd9\u4e2a\u6570\u636e\u5e93\u4fe1\u606f\uff0c\u6211\u4eec\u5c31\u53ef\u4ee5\u6839\u636e\u4e0d\u540c\u6570\u636e\u5e93\u7684\u6ce8\u91ca\u4fe1\u606f\u505ago\u5bcc\u96c6\u5566\uff01<\/p>\n<p>\u4e0b\u9762\u8981\u505a\u7684\u662f\u5199\u4e00\u4e2a\u811a\u672c\uff0c\u6839\u636e\u5e08\u5144\u7ed9\u7684\u7ed3\u679c\u8c03\u51fa\u5bf9\u5e94\u7684go\u53f7\uff0c\u5bf9\u4e8e\u4f1a\u7f16\u7a0b\u7684\u4eba\u6765\u8bf4\uff0c\u8fd9\u70b9\u81ea\u7136\u4e0d\u5728\u8bdd\u4e0b\uff0c\u4ee3\u7801\u5982\u4e0b\uff1a<\/p>\n<pre>import sys\r\n\r\nUSAGE = \"\\nusage: python %s idmapping.tb.gz blastout outputfile outputfile2\\n\" % sys.argv[0]\r\n\r\nif len(sys.argv) != 5:\r\n    print USAGE\r\n    sys.exit()\r\n\r\ndef parseIDmapping(filename):\r\n    UniProt_GO = {}\r\n    with open(filename, 'r') as f:\r\n        for line in f:\r\n            lsplit = line.rstrip().split(\"\\t\")\r\n            if lsplit[7]:\r\n                UniProt_GO[lsplit[0]] = lsplit[7]\r\n    return UniProt_GO\r\n\r\ndef parseBlastOut(filename):\r\n    tab_res = []\r\n    with open(filename, 'r') as f:\r\n        for line in f:\r\n            lsplit = line.strip('\\n').split('\\t')\r\n            tab_res.append(lsplit[0])\r\n    return tab_res\r\n\r\n\r\nUniProtKB_GO = parseIDmapping(sys.argv[1])\r\nBlastOut = parseBlastOut(sys.argv[2])\r\nOUT = open(sys.argv[3], 'w')\r\nOUT1 = open(sys.argv[4], 'w')\r\n\r\nfor i in BlastOut:\r\n    if i in UniProtKB_GO.keys():\r\n        print i \r\n        go = UniProtKB_GO[i]\r\n        print go\r\n        OUT.write(i+\"\\t\"+go+\"\\n\")\r\n    else:\r\n        OUT1.write(i+\"\\n\")\r\n   \r\nOUT.close()\r\nOUT1.close()<\/pre>\n<p>\u5f97\u5230\u7684\u7ed3\u679c\u662f\u8fd9\u6837\u5b50\uff1a<br \/>\n<img decoding=\"async\" loading=\"lazy\" class=\"alignnone size-full wp-image-170\" src=\"http:\/\/www.wuchangsong.com\/wp-content\/uploads\/2017\/11\/QQ\u56fe\u724720171116020533.png\" alt=\"\" width=\"700\" height=\"131\" \/><br \/>\n\u7531\u4e8e\u4f7f\u7528\u8f6f\u4ef6\u7684\u5173\u7cfb\uff0c\u8fd9\u79cd\u683c\u5f0f\u8c8c\u4f3c\u8fd8\u4e0d\u80fd\u8fbe\u5230\u8981\u6c42\uff0c\u518d\u5199\u4e00\u811a\u672c\u8f6c\u6362\u4e00\u4e0b\uff1a<\/p>\n<pre>import re\r\n\r\nfile1 = open(r\"C:\\\\Users\\\\wuchangsong\\\\Desktop\\\\11.txt\")\r\nout_file1 = open(r\"C:\\\\Users\\\\wuchangsong\\\\Desktop\\\\12.txt\", \"w\")\r\n\r\nfor line1 in file1:\r\n    info1 = re.sub('; ','\\t',line1)\r\n    out_file1.write(info1)\r\n\r\n\r\nfile1.close()\r\nout_file1.close()<\/pre>\n<p>\u7ed3\u679c\u957f\u8fd9\u6837\uff1a<img decoding=\"async\" loading=\"lazy\" class=\"alignnone size-full wp-image-171\" src=\"http:\/\/www.wuchangsong.com\/wp-content\/uploads\/2017\/11\/QQ\u56fe\u724720171116021125.png\" alt=\"\" width=\"763\" height=\"135\" \/><br \/>\n\u6700\u7ec8\u7ed3\u679c\uff1a<img decoding=\"async\" loading=\"lazy\" class=\"alignnone size-full wp-image-172\" src=\"http:\/\/www.wuchangsong.com\/wp-content\/uploads\/2017\/11\/QQ\u56fe\u724720171116021513.png\" alt=\"\" width=\"1194\" height=\"611\" \/><br \/>\n\u6ee1\u6ee1\u7684\u6210\u5c31\u611f\u6709\u4e48\u6709^_^\uff01<\/p>\n<p>\u7248\u6743\u58f0\u660e\uff1a\u672c\u6587\u4e3a\u535a\u4e3b\u539f\u521b\u6587\u7ae0\uff0c\u672a\u7ecf\u535a\u4e3b\u5141\u8bb8\u4e0d\u5f97\u8f6c\u8f7d\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u524d\u51e0\u5929\u5b9e\u9a8c\u5ba4\u4e00\u4e2a\u5e08\u5144\u7ed9\u6211\u4e00\u4e2a\u8d28\u8c31\u7ed3\u679c\uff0c\u8ba9\u5e2e\u5fd9\u505a\u4e0bgo\u7684\u529f\u80fd\u5bcc\u96c6\uff0c\u6570\u636e\u683c\u5f0f\u5927\u6982\u662f\u8fd9\u6837\u7684\uff1a \u7531\u4e8e\u4e4b\u524d\u505ago\u548ckegg\u65f6\u90fd\u662f\u8dd1\u6d41\u7a0b\uff0c\u50cf\u8fd9\u79cd\u9488\u5bf9\u6027\u7684go\u5bcc\u96c6\u8fd8\u6ca1\u505a\u8fc7\uff0c\u8bf4\u5230\u5e95\uff0c\u8fd8\u662f\u7531\u4e8e\u81ea\u5df1\u624b\u4e0a\u7f3a\u5c11\u6570\u636e\uff0c\u6ca1\u6709\u5c5e\u4e8e\u81ea\u5df1\u7684\u9879\u76ee\uff0c\u5f88\u591a\u7ec6\u8282\u6027\u7684\u95ee\u9898\u90fd\u6ca1\u6709\u7ecf\u5386\u8fc7\u3002\u4f46\u8fd9\u4e0d\u59a8\u788d\u54b1\u4e00\u9897\u6c42\u77e5\u7684\u5fc3\uff0c\u6211\u4eec\u90fd\u662f\u5728\u5b66\u4e60\u4e2d\u6210\u957f\u3002\u7531\u4e8e\u6ca1\u4e8b\u7684\u65f6\u5019\u901b\u8bba\u575b\u901b\u7684\u6bd4\u8f83\u9891\u7e41\uff0c\u77e5\u9053\u6570\u636e\u7684\u7b2c\u4e8c\u5217\u662fUniPro\u6570\u636e\u5e93\u7684accession\uff0c\u7136\u540e\u8be5\u600e\u4e48\u529e\u5462\uff1f\u4f5c\u4e3a\u751f\u4fe1\u4eba\uff0cGoogle\u662f\u5c11\u4e0d\u4e86\u7684\uff0c\u770b\u5230Google\u7ed3\u679c\uff0c\u77ac\u95f4\u660e\u4e86\u3002\u6839\u636eGoogle\u7684\u6307\u5f15\u6211\u4ece\u7f51\u4e0a\u4e0b\u8f7d\u4e86UniProt\u6570\u636e\u5e93\u91cc\u7684idmapping.tb.gz\u6587\u4ef6\uff08wget -c -t 10000 ftp:\/\/ftp.pir.georgetown.edu\/databases\/idmapping\/idmapping.tb.gz\uff09\uff0c\u5927\u698218G\u5de6\u53f3\uff0c\u6570\u636e\u7ed3\u6784\u5982\u4e0b\uff1a \u4e00\u5171\u670922\u5217\uff0c\u4f9d\u6b21\u5206\u522b\u662f\uff1aUniProtKB accession\uff0cUniProtKB ID\uff0cEntrezGene\uff0cRefSeq\uff0cNCBI GI number\uff0cPDB\uff0cPfam\uff0cGO\uff0cPIRSF\uff0cIPI\uff0cUniRef100\uff0cUniRef90\uff0cUniRef50\uff0cUniParc\uff0cPIR-PSD accession\uff0cNCBI taxonomy\uff0cMIM\uff0cUniGene\uff0cEnsembl\uff0cPubMed ID\uff0cEMBL\/GenBank\/DDBJ\uff0cEMBL protein_id\uff1b\u8fd9\u5c31\u6709\u610f\u601d\u4e86\uff0c\u6570\u636e\u7684\u7b2c\u516b\u5217\u5c31\u662f\u6211\u4eec\u60f3\u8981\u7684go\u4fe1\u606f\u3002\u66f4\u6709\u610f\u601d\u7684\u662f\uff0c\u6709\u4e86\u8fd9\u4e2a\u6570\u636e\u5e93\u4fe1\u606f\uff0c\u6211\u4eec\u5c31\u53ef\u4ee5\u6839\u636e\u4e0d\u540c\u6570\u636e\u5e93\u7684\u6ce8\u91ca\u4fe1\u606f\u505ago\u5bcc\u96c6\u5566\uff01 \u4e0b\u9762\u8981\u505a\u7684\u662f\u5199\u4e00\u4e2a\u811a\u672c\uff0c\u6839\u636e\u5e08\u5144\u7ed9\u7684\u7ed3\u679c\u8c03\u51fa\u5bf9\u5e94\u7684go\u53f7\uff0c\u5bf9\u4e8e\u4f1a\u7f16\u7a0b\u7684\u4eba\u6765\u8bf4\uff0c\u8fd9\u70b9\u81ea\u7136\u4e0d\u5728\u8bdd\u4e0b\uff0c\u4ee3\u7801\u5982\u4e0b\uff1a import sys USAGE = &#8220;\\nusage: python %s idmapping.tb.gz blastout outputfile outputfile2\\n&#8221; % sys.argv[0] if len(sys.argv) != 5: print USAGE sys.exit() def parseIDmapping(filename): UniProt_GO = {} with open(filename, &#8216;r&#8217;) as f: for line in f: lsplit = line.rstrip().split(&#8220;\\t&#8221;) if lsplit[7]: [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[4,2],"tags":[],"_links":{"self":[{"href":"http:\/\/www.wuchangsong.com\/index.php?rest_route=\/wp\/v2\/posts\/167"}],"collection":[{"href":"http:\/\/www.wuchangsong.com\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/www.wuchangsong.com\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/www.wuchangsong.com\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/www.wuchangsong.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=167"}],"version-history":[{"count":4,"href":"http:\/\/www.wuchangsong.com\/index.php?rest_route=\/wp\/v2\/posts\/167\/revisions"}],"predecessor-version":[{"id":188,"href":"http:\/\/www.wuchangsong.com\/index.php?rest_route=\/wp\/v2\/posts\/167\/revisions\/188"}],"wp:attachment":[{"href":"http:\/\/www.wuchangsong.com\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=167"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/www.wuchangsong.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=167"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/www.wuchangsong.com\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=167"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}