输入表达矩阵数据文件
library(ggplot2) library(gmodels) inname = "COS_deseq_counts_normalized.txt" outname = "COS_PCA.png" group <- factor(c(rep("COS0",3),rep("COS1",3),rep("COS3",3),rep("COS6",3),rep("COS12",3)), levels = c("COS0", "COS1","COS3","COS6","COS12")) ## step 1: 数据的读取和处理 # read the expr data expr <- read.table(inname, header=T, row.names=1) # transpose the data data <- t(expr) ## step2:PCA分析 # do PCA data.pca <- fast.prcomp(data) ## step3: PCA结果解析 # fetch the proportion of PC1 and PC2 # 一般情况下PC1 + PC2 > 70% 二维PCA散点图才有效 a <- summary(data.pca) tmp <- a[4]$importance pro1 <- as.numeric(sprintf("%.3f",tmp[2,1]))*100 pro2 <- as.numeric(sprintf("%.3f",tmp[2,2]))*100 # 将成分矩阵转换为数据框 pc = as.data.frame(a$x) # 给pc的数据框添加名称列和分组列(用来画图) pc$group = group pc$names = rownames(pc) ## step 4: 绘图 # draw PCA plot figure xlab=paste("PC1(",pro1,"%)",sep="") ylab=paste("PC2(",pro2,"%)",sep="") pca=ggplot(pc,aes(PC1,PC2)) + geom_point(size=3,aes(shape=group,color=group)) + geom_text(aes(label=names),size=4)+labs(x=xlab,y=ylab,title="PCA") + geom_hline(yintercept=0,linetype=4,color="grey") + geom_vline(xintercept=0,linetype=4,color="grey") + theme_bw() # 保存结果 ggsave(outname,pca,width=10,height=8)
Leave a Reply