PCA图绘制

输入表达矩阵数据文件

library(ggplot2)
library(gmodels)
inname = "COS_deseq_counts_normalized.txt"
outname = "COS_PCA.png"
group <- factor(c(rep("COS0",3),rep("COS1",3),rep("COS3",3),rep("COS6",3),rep("COS12",3)), levels = c("COS0", "COS1","COS3","COS6","COS12"))
## step 1: 数据的读取和处理
# read the expr data
expr <- read.table(inname, header=T, row.names=1)
# transpose the data
data <- t(expr)
## step2:PCA分析
# do PCA 
data.pca <- fast.prcomp(data)
## step3: PCA结果解析
# fetch the proportion of PC1 and PC2
# 一般情况下PC1 + PC2 > 70% 二维PCA散点图才有效
a <- summary(data.pca)
tmp <- a[4]$importance
pro1 <- as.numeric(sprintf("%.3f",tmp[2,1]))*100
pro2 <- as.numeric(sprintf("%.3f",tmp[2,2]))*100

# 将成分矩阵转换为数据框
pc = as.data.frame(a$x)

# 给pc的数据框添加名称列和分组列(用来画图)
pc$group = group
pc$names = rownames(pc)

## step 4: 绘图
# draw PCA plot figure
xlab=paste("PC1(",pro1,"%)",sep="") 
ylab=paste("PC2(",pro2,"%)",sep="")
pca=ggplot(pc,aes(PC1,PC2)) + 
geom_point(size=3,aes(shape=group,color=group)) + 
geom_text(aes(label=names),size=4)+labs(x=xlab,y=ylab,title="PCA") + 
geom_hline(yintercept=0,linetype=4,color="grey") + 
geom_vline(xintercept=0,linetype=4,color="grey") + 
theme_bw()

# 保存结果
ggsave(outname,pca,width=10,height=8)

Posted

in

by

Tags:

Comments

Leave a Reply

Your email address will not be published. Required fields are marked *