library(R.utils)
library(hash)
library(data.table)
library(jsonlite)
setwd("C:/Users/daizao/Desktop/practise/test")
dir.create("data_in_one")
for (i in list.files("raw_data/")){
b <- paste("raw_data/",i,sep="")
pathname <- paste(b,dir(b),sep="/")
file.copy(pathname,"data_in_one/")
}
dz <- fromJSON("metadata.cart.2019-07-19.json")
temp_tcgaid <- as.character(lapply(dz$associated_entities,function(x){a <- x$entity_submitter_id;return(a)}))
temp_filename <- as.character(dz$file_name)
h <- hash(temp_filename,temp_tcgaid)
cishu <- 0
for (i in 1:length(dz$file_name)){
if (cishu==0){
test <- fread(paste("data_in_one",dz$file_name[i],sep="/"))
test <- test[-((nrow(test)-5):nrow(test)),]
exp <- matrix(NA,nrow(test),length(dz$file_name))
rownames(exp) <- as.data.frame(test)[,1]
tcgaid <- c()
for (j in keys(h)){
tcgaid_temp <- h[[j]]
tcgaid <- paste(tcgaid,tcgaid_temp,sep=",")
}
ttt <- lapply(strsplit(tcgaid,",")[[1]],function(x){if(x != ""){return (x)}})
ttt <- as.character(ttt)[-1]
colnames(exp) <- ttt
cishu <- cishu + 1
}
if (cishu > 0){
test <- fread(paste("data_in_one",dz$file_name[i],sep="/"))
test <- test[-((nrow(test)-5):nrow(test)),]
new_h <- hash(test$V1,test$V2)
for (j in rownames(exp)){
file_name_new <- dz$file_name[i]
exp[j,h[[file_name_new]]] <- new_h[[j]]
}
}
}
normalSample <- c()
tumorSample <- c()
for ( i in colnames(exp)){
sample <- unlist(strsplit(i,"-"))[4]
if(grepl("^1",sample)){
normalSample <- paste(normalSample,i,sep=",")
}else{
tumorSample <- paste(tumorSample,i,sep=",")
}
}
if ("normalSample" %in% ls()){
normal_name <- strsplit(normalSample,",")[[1]][-1]
tumor_name <- strsplit(tumorSample,",")[[1]][-1]
if (length(normal_name) == 1){
temp_normal <- as.data.frame(exp[,normal_name])
colnames(temp_normal) <- normal_name
normal_data <- temp_normal
}else{
normal_data <- exp[,normal_name]
}
tumor_data <- exp[,tumor_name]
total_sort_sample <- merge(normal_data,tumor_data,by="row.names",all=T)
}else{
total_sort_sample <- tumor_data
}
zanshi <- c("id")
for (i in colnames(total_sort_sample)[-1]){zanshi <- paste(zanshi,i,sep=",")}
colnames(total_sort_sample) <- unlist(strsplit(zanshi,","))
write.table(total_sort_sample,file="RNAmatrix.txt",sep="\t",row.names=F,quote=F)
因为是R语言自身的原因,速度没有perl脚本快