python .py -i db.fasta -I blastresult.tab -o selected.txt -O filtered.txt
blastresult.tab是根据P值sort后的文件
from __future__ import division
import re
import sys, getopt
import operator
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.Alphabet import generic_nucleotide
opts, args = getopt.getopt(sys.argv[1:], "hI:i:o:O:")
input_info1= ""
input_info2= ""
out_file1 = ""
out_file2 = ""
for op, value in opts:
if op =="-o": #ARG_pattern_MIN_7030.fasta or ARG_pattern_MAX_7030.fasta
out_file1 = open(value, "w")
filename_pro = str (value)
name_item = filename_pro.strip().split(".")
filename = name_item[0]
elif op == "-i": # All_ARGcontig_remove_S_nr.fasta
input_info1 = open(value, "r")
elif op == "-I":
input_info2 = open(value, "r")
elif op == "-O":
out_file2 = open(value, "w")
filename_pro = str (value)
name_item = filename_pro.strip().split(".")
filename = name_item[0]
dict1 = {}
dict2 = {}
for record in SeqIO.parse(input_info1,"fasta"):
h = len(record.seq)
dict1[record.id]=h
dict2[record.id]=record.description
for line1 in input_info2:
info1 = line1.strip("\n").split("\t")
a = float(info1[2])
b = float(info1[3])
c = b/float(dict1[info1[1]])
if a >= 80.0 and c >= 0.8:
out_file1.write(str(dict1[info1[1]])+'\t'+str(b)+'\t'+dict2[info1[1]]+'\t'+line1)
else:
out_file2.write(str(dict1[info1[1]])+'\t'+str(b)+'\t'+dict2[info1[1]]+'\t'+line1)
input_info1.close()
input_info2.close()
out_file1.close()
out_file2.close()
Leave a Reply