短序列的质控都可以使用trimmomatic,这里不多做介绍,得到的clean data可做下面分析
一、比对和甲基化位点提取(Bismark)
Bismark安装及使用
git clone https://github.com/FelixKrueger/Bismark.git #conda install -c bioconda bismark #Genome Preparation /opt/biosoft/Bismark-0.23.0/bismark_genome_preparation --parallel 40 --verbose ./ #bismark alignment for i in `cat ../samples.txt` do echo "/opt/biosoft/Bismark-0.23.0/bismark --parallel 40 --genome ./ --phred33-quals -1 ../$i.1.fastq -2 ../$i.2.fastq" done > bismark.list ParaFly -c bismark.list -CPU 4 #deduplicate for i in `cat ../samples.txt` do echo "/opt/biosoft/Bismark-0.23.0/deduplicate_bismark --bam $i.1_bismark_bt2_pe.bam" done > deduplicate_bismark.list ParaFly -c deduplicate_bismark.list -CPU 9 #bismark_methylation_extractor提取甲基化信息 for i in `ls *deduplicated.bam` do echo "/opt/biosoft/Bismark-0.23.0/bismark_methylation_extractor $i -p --genome ./ --gzip --bedGraph --cytosine_report --CX --buffer_size 20G --output ./" done > methylation_extractor.list ParaFly -c methylation_extractor.list -CPU 9 /opt/biosoft/Bismark-0.23.0/bismark2report /opt/biosoft/Bismark-0.23.0/bismark2summary for i in `cat ../samples.txt` do echo "/opt/biosoft/Bismark-0.23.0/coverage2cytosine $i.1_bismark_bt2_pe.deduplicated.bismark.cov.gz --merge_CpG --genome ./ -o $i.CpG.output" done > coverage2cytosine.list ParaFly -c coverage2cytosine.list -CPU 9 /opt/biosoft/Bismark-0.23.0/bam2nuc --genome_folder ./ --genomic_composition_only for i in `cat ../samples.txt` do echo "/opt/biosoft/Bismark-0.23.0/bam2nuc --genome_folder ./ $i.1_bismark_bt2_pe.deduplicated.bam" done > bam2nuc.list ParaFly -c bam2nuc.list -CPU 9 /opt/biosoft/Bismark-0.23.0/bismark2bedGraph -o buffer_CpG.bedGraph --buffer 5G CpG_*
Leave a Reply