python .py -i db.fasta -I blastresult.tab -o selected.txt -O filtered.txt
blastresult.tab是根据P值sort后的文件
from __future__ import division import re import sys, getopt import operator from Bio import SeqIO from Bio.Seq import Seq from Bio.SeqRecord import SeqRecord from Bio.Alphabet import generic_nucleotide opts, args = getopt.getopt(sys.argv[1:], "hI:i:o:O:") input_info1= "" input_info2= "" out_file1 = "" out_file2 = "" for op, value in opts: if op =="-o": #ARG_pattern_MIN_7030.fasta or ARG_pattern_MAX_7030.fasta out_file1 = open(value, "w") filename_pro = str (value) name_item = filename_pro.strip().split(".") filename = name_item[0] elif op == "-i": # All_ARGcontig_remove_S_nr.fasta input_info1 = open(value, "r") elif op == "-I": input_info2 = open(value, "r") elif op == "-O": out_file2 = open(value, "w") filename_pro = str (value) name_item = filename_pro.strip().split(".") filename = name_item[0] dict1 = {} dict2 = {} for record in SeqIO.parse(input_info1,"fasta"): h = len(record.seq) dict1[record.id]=h dict2[record.id]=record.description for line1 in input_info2: info1 = line1.strip("\n").split("\t") a = float(info1[2]) b = float(info1[3]) c = b/float(dict1[info1[1]]) if a >= 80.0 and c >= 0.8: out_file1.write(str(dict1[info1[1]])+'\t'+str(b)+'\t'+dict2[info1[1]]+'\t'+line1) else: out_file2.write(str(dict1[info1[1]])+'\t'+str(b)+'\t'+dict2[info1[1]]+'\t'+line1) input_info1.close() input_info2.close() out_file1.close() out_file2.close()
Leave a Reply