六、Deeptools可视化
#deeptools安装conda install deeptools gtf2bed < ~/gc_genome/17.Geta/1.geta/out.gtf > genome.bed for i in `cat ../samples.txt`; do echo "bamCoverage -p 5 --normalizeUsing RPKM -b $i.last.bam -o $i.last.bw"; done > bam2bw.list ParaFly -c bam2bw.list -CPU 9 computeMatrix reference-point --referencePoint TSS -p 15 -b 10000 -a 10000 -R ../genome.bed -S ../*bw --skipZeros -o matrix1_test_TSS.gz --outFileSortedRegions regions1_test_genes.bed plotHeatmap -m matrix1_test_TSS.gz -out test_Heatmap.pdf --plotFileFormat pdf --dpi 720 plotProfile -m matrix1_test_TSS.gz -out test_Profile.pdf --plotFileFormat pdf --perGroup --dpi 720 computeMatrix scale-regions -p 150 -b 10000 -a 10000 -R ../genome.bed -S ../*bw --skipZeros -o matrix1_test_body.gz --outFileSortedRegions regions1_test_body.bed plotHeatmap -m matrix1_test_TSS.gz -out test_Heatmap.pdf --plotFileFormat pdf --dpi 720 plotProfile -m matrix1_test_TSS.gz -out test_Profile.pdf --plotFileFormat pdf --perGroup --dpi 720
七、重复样品处理(至少两个生物学重复)
IDR是通过比较一对经过排序的regions/peaks 的列表,然后计算反映其重复性的值,同时得到了merge后的一致性的peaks。
for i in `cat ../../samples.txt`; do echo "sort -k8,8nr ../$i.peaks_peaks.narrowPeak > $i.peaks.narrowPeak"; done > peaks_sort.list ParaFly -c peaks_sort.list -CPU 9
八、peaks注释(非模式物种)
使用GenomicFeatures包的函数制作TxDb对象:
#makeTxDbFromUCSC: 通过UCSC在线制作TxDb #makeTxDbFromBiomart: 通过ensembl在线制作TxDb #makeTxDbFromGRanges:通过GRanges对象制作TxDb #makeTxDbFromGFF:通过解析GFF文件制作TxDb gc_TxDB <- makeTxDbFromGFF("genome.gff3") library(clusterProfiler) library("ChIPseeker") library(ChIPpeakAnno) library(GenomicFeatures) library("ggupset") library("ggimage") gc_TxDB <- makeTxDbFromGFF("out.gff3") promoter <- getPromoters(TxDb=gc_TxDB, upstream=3000, downstream=3000) files = list(A_summits = "A.peaks_summits.bed", B_summits = "B.peaks_summits.bed", C_summits = "C.peaks_summits.bed", D_summits = "D.peaks_summits.bed") peakAnno <- annotatePeak(files[[1]], tssRegion=c(-3000, 3000), TxDb=gc_TxDB) plotAnnoPie(peakAnno) #tiff("peakAnno.tiff") #plotAnnoPie(peakAnno) #dev.off() 此方法保存的图片标注有颜色模块 plotAnnoBar(peakAnno) vennpie(peakAnno) peakAnnoList <- lapply(files, annotatePeak, TxDb=gc_TxDB,tssRegion=c(-3000, 3000)) plotAnnoBar(peakAnnoList) #tagHeatmap来画热图 peak <- readPeakFile(files[[3]]) tagMatrix <- getTagMatrix(peak, windows=promoter) tagHeatmap(tagMatrix, xlim=c(-3000, 3000), color="red") peakHeatmap(files, TxDb=gc_TxDB, upstream=3000, downstream=3000, color=rainbow(length(files))) #以谱图的形式来展示结合的强度 plotAvgProf2(files[[3]], TxDb=gc_TxDB, upstream=3000, downstream=3000, xlab="Genomic Region (5'->3')", ylab = "Read Count Frequency") tagMatrixList <- lapply(files, getTagMatrix, windows=promoter) plotAvgProf(tagMatrixList, xlim=c(-3000, 3000)) #注释基因 peakAnnoList <- lapply(files, annotatePeak, TxDb=gc_TxDB,tssRegion=c(-3000, 3000),addFlankGeneInfo=TRUE, flankDistance=5000) genes = lapply(peakAnnoList, function(i) as.data.frame(i)$geneId) vennplot(genes) save(peakAnnoList,file="peakAnnolist.rda") write.table(as.data.frame(peakAnnoList$mulvscon),file="Nanog.PeakAnno",sep='\t',quote = F)
九、差异分析
DiffBind安装
conda install libv8 conda install -c conda-forge librsvg ##R version 4.0.3 (2020-10-10) BiocManager::install("systemPipeR") BiocManager::install("DiffBind") library(DiffBind) setwd("D:/Experiment_data/wcs/ATAC/") tamoxifen <- dba(sampleSheet="sheet_ATAC.CSV") tamoxifen <- dba.count(tamoxifen, bUseSummarizeOverlaps=TRUE) plot(tamoxifen) tamoxifen <- dba.contrast(tamoxifen, categories = DBA_FACTOR, minMembers = 3) tamoxifen <- dba.analyze(tamoxifen, method = DBA_DESEQ2) #heatmap hmap <- colorRampPalette(c("green", "black", "red"))(n = 13) dba.plotHeatmap(tamoxifen, correlations=FALSE, scale="row", colScheme = hmap) #MA plots dba.plotMA(tamoxifen, contrast = 2) #火山图 dba.plotVolcano(tamoxifen) tamoxifen.DB <- dba.report(tamoxifen, method=DBA_DESEQ2, contrast = 1, th=1) #write.csv(tamoxifen.DB,"diffbind.csv") out <- as.data.frame(tamoxifen.DB) write.table(out, file="tamoxifen_DB.csv", sep=",", quote=F, col.names = NA) #保存不同样本的表达矩阵 out <- as.data.frame(tamoxifen[["binding"]]) write.table(out, file="deseq2_count.xls", sep="\t", quote=F, row.names=F, col.names=F) #以bed格式保存显著性的差异结果 out <- as.data.frame(tamoxifen.DB) deseq.bed <- out[ which(out$FDR < 0.05), c("seqnames", "start", "end", "strand", "Fold")] write.table(deseq.bed, file="results/contrast_1_deseq2_sig.bed", sep="\t", quote=F, row.names=F, col.names=F)
Leave a Reply