下载需要的脚本和安装Python模块
wget https://github.com/irusri/Extract-intron-from-gff3/archive/master.zip unzip master.zip rm master.zip && cd Extract-intron-from-gff3-master/scripts/ sudo chmod 755 * pip install misopy pip install gffutils
获取exon、intron的gff文件并提取DNA序列
python /opt/biosoft/Extract-intron-from-gff3-master/scripts/extract_intron_gff3_from_gff3.py out.gff3 out_intron.gff3 ##结果文件out_intron.gff3_introns.gff3 awk '/intron\t/{print}' out_intron.gff3_introns.gff3 | sort -k 1,1 -k4,2n > processed_intron.gff3 awk '/exon\t/{print}' out_intron.gff3_introns.gff3 | sort -k 1,1 -k4,2n > processed_exon.gff3 perl /opt/biosoft/Extract-intron-from-gff3-master/scripts/extract_seq_from_gff3.pl -d out.tmp/genome.fasta - processed_intron.gff3 > output_intron.fa perl /opt/biosoft/Extract-intron-from-gff3-master/scripts/extract_seq_from_gff3.pl -d out.tmp/genome.fasta - processed_exon.gff3 > output_exon.fa ##对序列重命名 genome_seq_clear.pl output_intron.fa --seq_prefix intron --min_length 1 --line_length -1 > output_intron_rename.fa genome_seq_clear.pl output_exon.fa --seq_prefix exon --min_length 1 --line_length -1 > output_exon_rename.fa ##获得每条序列的长度信息 samtools faidx output_intron_rename.fa cut -f 1,2 output_intron_rename.fa.fai > output_intron_length.txt samtools faidx output_exon_rename.fa cut -f 1,2 output_exon_rename.fa.fai > output_exon_length.txt ##R画图 library(ggplot2) library(patchwork) data <- read.table("output_gene_length.txt",header=T,check.names=F) p1 <- ggplot(data,aes(length))+ geom_line(stat="density",color="red")+theme_classic()+theme(axis.title = element_text(size=24),axis.text = element_text(size = 22,color = "black")) p1
Leave a Reply