
cd /home/train/Liuping/raw_data
mkdir ProstateCancer_methylation
cd ProstateCancer_methylation/
#合并下载好的数据
nohup ./Merge.pl metadata.cart.2018-08-01.json > /home/train/Liuping/raw_data/TCGA_Methylation_ProstateCancer/methylayion_rawdata/out_methylation 2>&1 &
#注意记录下normal‘和 tumor的个数
# normal count: 50
# tumor count: 503
mkdir ../result
cp geneMethy.txt ../result/
cd ../result/
#差异甲基化
source("http://bioconductor.org/biocLite.R")
biocLite("limma")
library(limma)
setwd("D:/TCGA_methylation")
inputFile <- "geneMethy.txt"
normalNum=50
tumorNum=503
fdrFilter=0.05
logFCfilter=1
outTab <- data.frame()
grade <- c(rep(1,normalNum),rep(2,tumorNum))
Type <- c(rep("Normal",normalNum),rep("Tumor",tumorNum))
rt <- read.table(inputFile,sep="\t",header=T,check.names=F)
rt <- as.matrix(rt)
rownames(rt) <- rt[,1]
exp <- rt[,2:ncol(rt)] #行名可被直接移植
dimnames <- list(rownames(exp),colnames(exp))
data <- matrix(as.numeric(as.matrix(exp)),nrow=nrow(exp),dimnames=dimnames)
data <- avereps(data)
data <- data[rowMeans(data)>0,]
data <- normalizeBetweenArrays(data)
normalData <- cbind(id=row.names(data),data) #为了让id号写入文件
write.table(normalData,file="normalizeMethy.txt",sep="\t",row.names=F,quote=F)
for (i in row.names(data)){
rt <- rbind(expression=data[i,],grade=grade)
rt <- as.matrix(t(rt))
wilcoxTest <- wilcox.test(expression ~ grade, data=rt)
normalGeneMeans <- mean(data[i,1:normalNum])
tumorGeneMeans <- mean(data[i,(normalNum+1):ncol(data)])
logFC <- log2(tumorGeneMeans) - log2(normalGeneMeans)
pvalue <- wilcoxTest$p.value
normalMed <- median(data[i,1:normalNum])
tumorMed <- median(data[i,(normalNum+1):ncol(data)])
diffMed <- tumorMed - normalMed
if((( logFC>0) & ( diffMed >0)) | (( logFC < 0 ) & ( diffMed <0 ))){
outTab <- rbind(outTab,cbind(gene=i,normalMean=normalGeneMeans,TumorMean=tumorGeneMeans,logFC=logFC,pValue=pvalue))
}}
pValue <- outTab[,"pValue"]
fdr <- p.adjust(as.numeric(as.vector(pValue)),method="fdr")
outTab <- cbind(outTab,fdr=fdr)
write.table(outTab,file="allGene.xls",sep="\t",row.names=F,quote=F)
outDiff <- outTab[(abs(as.numeric(as.vector(outTab$logFC))) > logFCfilter & as.numeric(as.vector(outTab$fdr)) < fdrFilter),]
write.table(outDiff,file="diff.xls",sep="\t",row.names=F,quote=F)
heatmap <- data[as.vector(outDiff[,1]),]
heatmap <- cbind(id=row.names(heatmap),heatmap)
write.table(heatmap,file="heatmap.txt",sep="\t",row.names=F,quote=F)
#pheatmap图
install.packages("pheatmap")
setwd("D:/TCGA_methylation/pheatmap")
library(pheatmap)
rt <- read.table("heatmap.txt",sep="\t",header=T,row.names=1,check.names=F)
Type <- c(rep("normal",50),rep("tumor",503))
names(Type) <- colnames(rt)
Type <- as.data.frame(Type)
tiff(file="heatmap.tiff",width=45,height=70,units="cm",compression="lzw",bg="white",res=300)
pheatmap(rt,annotation=Type,color=colorRampPalette(c("green","black","red"))(50),cluster_cols=F,fontsize_row=5,fontsize_col=4)
dev.off()
#甲基化与转录组数据融合
setwd("D:/TCGA_methylation/MethyGeneMerge")
methy <- read.table(file="normalizeMethy.txt",row.names=1,header=T,sep="\t",check.names=F)
rna <- read.table(file="normalizeExp.txt",row.name=1,header=T,sep="\t",check.names=F)
colnames(methy) <- gsub("(.*?)-(.*?)-(.*?)-(.*?)-.*","\\1-\\2-\\3-\\4",colnames(methy))
colnames(rna) <- gsub("(.*?)-(.*?)-(.*?)-(.*?)-.*","\\1-\\2-\\3-\\4",colnames(rna))
sameSample <- intersect(colnames(methy),colnames(rna))
rownames(methy) <- paste(rownames(methy),"methy",sep="|")
rownames(rna) <- paste(rownames(rna),"exp",sep="|")
merge <- rbind(id=sameSample,methy[,sameSample],rna[,sameSample])
write.table(merge,file="merge.txt",sep="\t",quote=F,col.names=F)
#基因甲基化与转录组数据相关关系
setwd("D:/TCGA_methylation/methycor")
inputFile <- "merge.txt"
methyGene <- "NCAPD3|methy"
expGene <- "NCAPD3|exp"
rt <- read.table(inputFile,sep="\t",header=T,check.names=F,row.names=1)
i <- rt[methyGene,]
j <- rt[expGene,]
x <- as.numeric(i)
y <- log2(as.numeric(j)+1)
corT <- cor.test(x,y)
methyGeneName <- unlist(strsplit(methyGene,"\\|"))[[1]]
expGeneName <- unlist(strsplit(expGene,"\\|"))[[1]]
z <- lm(y~x)
estimate <- corT$estimate
cor <- round(estimate,3)
pvalue <- corT$p.value #以小数的方式保留位数
pval <- signif(pvalue,4) #以科学计数法的方式保留位数
pval <- format(pval,scientific=T)
outTiff <- "cor.tiff"
tiff(file=outTiff,width=15,height=15,units="cm",compression="lzw",bg="white",res=300)
plot(x,y,type="p",pch=16,main=paste("Cor=",cor,"(p-value=",pval,")",sep=""),xlab=paste(methyGeneName,"methylation"),ylab=paste(expGeneName,"expression"))
lines(x,fitted(z),col=2)
dev.off()
#甲基化位点提取
nohup ./posget.pl metadata.cart.2018-08-01.json NCAPD3 > /home/train/Liuping/raw_data/TCGA_Methylation_ProstateCancer/NCAPD3_Methy_location.log 2>&1 &
#[1] 40635
#甲基化位点与gene表达合并
setwd("D:/TCGA_methylation/posmethygene")
methyFile="posMethy.txt"
expFile="normalizeExp.txt"
methy <- read.table(methyFile,row.names=1,header=T,sep="\t",check.names=F)
rna <- read.table(expFile,row.names=1,header=T,sep="\t",check.names=F)
colnames(methy) <- gsub("(.*?)-(.*?)-(.*?)-(.*?)-.*","\\1-\\2-\\3-\\4",colnames(methy))
colnames(rna) <- gsub("(.*?)-(.*?)-(.*?)-(.*?)-.*","\\1-\\2-\\3-\\4",colnames(rna))
sameSample <- intersect(colnames(methy),colnames(rna))
rownames(methy) <- paste(rownames(methy),"methy",sep="|")
rownames(rna) <- paste(rownames(rna),"exp",sep="|")
merge <- rbind(id=sameSample,methy[,sameSample],rna[,sameSample])
#甲基化位点与gene表达相关
setwd("D:/TCGA_methylation/cor_posmethygene")
inputFile <- "merge.txt"
posmethy <- "cg14934141|methy"
expGene <- "NCAPD3|exp"
rt <- read.table(inputFile,sep="\t",header=T,check.names=F,row.names=1)
i <- rt[posmethy,]
j <- rt[expGene,]
x <- as.numeric(i)
y <- log2(as.numeric(j)+1)
corT <- cor.test(x,y)
posmethyid <- unlist(strsplit(posmethy,"\\|"))[[1]]
expgenename <- unlist(strsplit(expGene,"\\|"))[[1]]
z <- lm(y~x)
cor <- corT$estimate
cor <- round(cor,3)
pvalue <- corT$p.value
pval <- signif(pvalue,4)
pval <- format(pval,scientific=T)
outTiff <- "posCor.tiff"
tiff(file=outTiff,width=15,height=15,units="cm",compression="lzw",bg="white",res=300)
plot(x,y,type="p",pch=16,main=paste("Cor=",cor,"(p-value=",pval,")",sep=""),xlab=paste(posmethyid,"methylation"),ylab=paste(expgenename,"expression"))
lines(x,fitted(z),col=2)
dev.off()
合并的脚本,类似转录本的合并脚本,需要自己根据需要自行修改
欢迎添加下面这个群提问: 