Trinity-2.8.5 install with source code (Trinity-2.8.5源码安装 )

#trinity_2.8.5 install
tar -zxf ~/software/trinityrnaseq-Trinity-v2.8.5.tar.gz -C /opt/biosoft/
cd /opt/biosoft/trinityrnaseq-Trinity-v2.8.5/
source ~/.bashrc.java.1.8
source ~/.bashrc_python-2.7.11
source ~/.bashrc_cmake-3.4.3
source ~/.bashrc_gcc-4.9.3
make
make plugins
echo 'export TRINITY_HOME=/opt/biosoft/trinityrnaseq-Trinity-v2.8.5/' >> ~/.bashrc.trinity-2.8.5
echo 'export PATH=$TRINITY_HOME:$PATH' >> ~/.bashrc.trinity-2.8.5
echo 'source ~/.bashrc.java.1.8 \
source ~/.bashrc_python-2.7.11 \
source ~/.bashrc_python-2.7.11' >> ~/.bashrc.trinity-2.8.5
source ~/.bashrc.trinity-2.8.5
cd sample_data/test_Trinity_Assembly/
#需要samtools版本大于1.3
#install samtools
        #install htslib
        tar -zxf ~/software/htslib-1.9.tar.gz -C /opt/biosoft/
        cd /opt/biosoft/htslib-1.9/
        #autoreconf update
            tar -zxf ~/software/autoconf-2.69.tar.gz
            cd autoconf-2.69/
            ./configure --prefix=/opt/sysoft/autoconf-2.69/
            make -j 8
            make install
            cd ..
            rm -rf autoconf-2.69/
            echo 'export PATH=/opt/sysoft/autoconf-2.69/bin/:$PATH' >> ~/.bashrc_autoconf_2.6.9
        source ~/.bashrc_autoconf_2.6.9
        autoreconf
        sudo yum install xz-devel
        ./configure --prefix=/opt/biosoft/htslib-1.9/
        make -j 8
        make install
        echo 'export PATH=$PATH:/opt/biosoft/htslib-1.9/bin/' >> ~/.bashrc      
        source ~/.bashrc
    tar -zxf ~/software/samtools-1.9.tar.gz -C /opt/biosoft/
    cd /opt/biosoft/samtools-1.9
    autoreconf
    ./configure --prefix=/opt/biosoft/samtools-1.9/
    make -j 8
    make install
    echo 'export PATH=/opt/biosoft/samtools-1.9/bin/:$PATH' >> ~/.bashrc_samtools-1.9
source ~/.bashrc_samtools-1.9
    #install jellyfish
    cd
    tar -zxf ~/software/jellyfish-2.3.0.tar.gz
    cd jellyfish-2.3.0/
    #autoreconf -B --prepend-include=/opt/sysoft/autoconf-2.69/share/
    #automake --add-missing
    source ~/.bashrc_gcc-4.9.3 
    ./configure --prefix=/opt/biosoft/jellyfish-2.3.0
    make -j 8
    make install
    echo 'export PATH=/opt/biosoft/jellyfish-2.3.0/bin/:$PATH' >> ~/.bashrc.trinity-2.8.5
    cd .. && rm -rf jellyfish-2.3.0/
    #install salmon
    #use binary program with salmon
    tar -zxf ~/software/salmon_0.14.2_linux_x86_64.tar.gz -C /opt/biosoft/
    echo 'export PATH=/opt/biosoft/salmon-latest_linux_x86_64/bin/:$PATH' >> ~/.bashrc
    #install bowtie2-2.3.5.1
    tar -zxf ~/software/bowtie2-2.3.5.tar.gz -C /opt/biosoft/
    cd /opt/biosoft/bowtie2-2.3.5/
    #建议用下载好的tbb
    make static-libs && make STATIC_BUILD=1
    #第一次会因为网络中断(github)导致tbb失败
    cd .tmp/
    cp ~/software/tbb-2019_U4.tar.gz && tar -zxf tbb-2019_U4.tar.gz
    perl -i -e 'while(<>){$_ =~ s/\$\$DL\ https:\/\/github.com\/01org\/tbb\/archive\/2019_U4.tar.gz\ \&\&\ tar\ xzf\ 2019\_U4.tar.gz\ \&\&\ cd\ tbb\-2019_U4;/cd tbb-2019_U4;/g;print $_}' Makefile
    make static-libs && make STATIC_BUILD=1
    echo 'export PATH=/opt/biosoft/bowtie2-2.3.5/:$PATH' >> ~/.bashrc_bowtie_tbb
source ~/.bashrc.trinity-2.8.5
./runMe.sh
发表在 Linux, Perl | 留下评论

fastp install with source code(fastp 源码安装)

#install fastp (clean data)
source ~/.bashrc_gcc-4.9.3
    #install zlib 1.2.8
        tar -zxf zlib-1.2.8.tar.gz
        cd zlib-1.2.8/
        ./configure --prefix=/opt/sysoft/zlib-1.2.8
        make -j 16
        make install
        echo 'export CPLUS_INCLUDE_PATH=/opt/sysoft/zlib-1.2.8/include/:$CPLUS_INCLUDE_PATH' >> ~/.bashrc_zlib-1.2.8
        echo 'export C_INCLUDE_PATH=/opt/sysoft/zlib-1.2.8/include/:$C_INCLUDE_PATH' >> ~/.bashrc_zlib-1.2.8
        echo 'export LIBRARY_PATH=/opt/sysoft/zlib-1.2.8/lib/:$LIBRARY_PATH' >> ~/.bashrc_zlib-1.2.8
        echo 'export LD_LIBRARY_PATH=/opt/sysoft/zlib-1.2.8/lib/:$LD_LIBRARY_PATH' >> ~/.bashrc_zlib-1.2.8
        echo 'export PKG_CONFIG_PATH=/opt/sysoft/zlib-1.2.8/lib/pkgconfig/:$PKG_CONFIG_PATH' >> ~/.bashrc_zlib-1.2.8
        source ~/.bashrc_zlib-1.2.8

tar -zxf /home/train/software/fastp-0.20.0.tar.gz -C /opt/biosoft/
cd /opt/biosoft/fastp-0.20.0/
cp Makefile Makefile.bak

#若遇到如下报错,(if have some error like these)
#fastqreader.cpp:32: undefined reference to gzoffset' ./obj/peprocessor.o: In function PairEndProcessor::initOutput()':
#peprocessor.cpp:35: undefined reference to gzbuffer' peprocessor.cpp:38: undefined reference to gzbuffer'

perl -i -e 'while(<>){$_ =~ s/LIBS\ \:\=\ \-lz\ \-lpthread/LIBS\ \:\=\ \-L\/usr\/local\/lib\ \-L\/usr\/local\/include\ \-L\/opt\/sysoft\/zlib\-1\.2\.8\/include\ \-L\/opt\/sysoft\/zlib\-1\.2\.8\/lib\ \-lz\ \-lpthread/g;print $_;}' Makefile  
#source ~/.bashrc_gcc-4.9.3
make
echo 'export PATH=$PATH:/opt/biosoft/fastp-0.20.0/' >> ~/.bashrc_fastp
echo 'source ~/.bashrc_gcc-4.9.3' >> ~/.bashrc_fastp
echo 'source ~/.bashrc_zlib-1.2.8' >> ~/.bashrc_fastp
source ~/.bashrc_fastp
发表在 Linux, NGS_analysis | 留下评论

predict target mRNA (trans) with lncRNA based on sequence (根据序列预测lncRNA靶基因(trans方式))

#install
mkdir /opt/biosoft/RIsearch-2.1
tar -zxf RIsearch-2.1.tar.gz -C /opt/biosoft/RIsearch-2.1/
cd /opt/biosoft/RIsearch-2.1/RIsearch-2.1/
./rebuild.sh
echo 'export PATH=$PATH:/opt/biosoft/RIsearch-2.1/RIsearch-2.1/bin/' >> ~/.bashrc
source ~/.bashrc

#test
cd test_suite/
risearch2.x -c RHOC.fa -o RHOC.pksuf
risearch2.x -q mirnas.fa -i RHOC.pksuf
risearch2.x -q mirnas.fa -i RHOC.pksuf -e -15
risearch2.x -q mirnas.fa -i RHOC.pksuf -e -15 -p
risearch2.x -q mirnas.fa -i RHOC.pksuf -s 7 -e -15 -p

zcat result.gz | perl -e 'while(<>){chomp;my @arr=split(/\t/,$_);if($arr[7] < -20){print $_."\n"}}' | sort -t $'\t' -k 8n | les
发表在 Linux, Perl | 留下评论

GISTIC2.0 安装(GISTIC2.0 Install)

Ubuntu install

#install
mkdir /opt/biosoft/GISTIC_2_0_23
tar -xf GISTIC_2_0_23.tar.gz -C /opt/biosoft/GISTIC_2_0_23/

#install matlab 建议参考CentOS的安装方式
mkdir /opt/sysoft/MATLAB
cd MCR_Installer/
unzip MCRInstaller.zip
./install -mode silent -agreeToLicense yes -destinationFolder /opt/sysoft/MATLAB/
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/sysoft/MATLAB/v83/bin/glnxa64/' >> ~/.bashrc
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/sysoft/MATLAB/v83/runtime/glnxa64/' >> ~/.bashrc
sudo apt-get install libncurses5*
echo 'export PATH=$PATH:/opt/sysoft/MATLAB/v83/' >> ~/.bashrc
source ~/.bashrc
ln -s /opt/sysoft/MATLAB/v83 /opt/biosoft/GISTIC_2_0_23/MATLAB_Compiler_Runtime/

#test
cd /opt/biosoft/GISTIC_2_0_23
./run_gistic_example

CentOS install

#install
mkdir /opt/biosoft/GISTIC_2_0_23
tar -xf GISTIC_2_0_23.tar.gz -C /opt/biosoft/GISTIC_2_0_23/

#install matlab
cd /opt/biosoft/GISTIC_2_0_23/
cd MCR_Installer/
unzip MCRInstaller.zip
./install -mode silent -agreeToLicense yes -destinationFolder /opt/biosoft/GISTIC_2_0_23/MATLAB_Compiler_Runtime/ #注意需要打开X11
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/biosoft/GISTIC_2_0_23/MATLAB_Compiler_Runtime/v83/runtime/glnxa64:/opt/biosoft/GISTIC_2_0_23/MATLAB_Compiler_Runtime/v83/bin/glnxa64:/opt/biosoft/GISTIC_2_0_23/MATLAB_Compiler_Runtime/v83/sys/os/glnxa64/' >> ~/.bashrc
echo 'export XAPPLRESDIR=$XAPPLRESDIR:/opt/biosoft/GISTIC_2_0_23/MATLAB_Compiler_Runtime/v83/X11/app-defaults' >> ~/.bashrc
source ~/.bashrc

#test
cd /opt/biosoft/GISTIC_2_0_23
./run_gistic_example 

#要是遇到ncurses缺少依赖的话需要安装ncurses库
#sudo yum install ncurses-base ncurses-devel #若需要自编译,源码地址:ftp://ftp.gnu.org/gnu/ncurses/ 
发表在 Linux, TCGA | 3条评论

构建自定义注释包(make annotation package)

#以拟南芥为例
library(RSQLite)
library(AnnotationForge)
options(stringsAsFactors=F)

setwd("C:/Users/daizao/Desktop/do_R_annotation/TAIR_DATA_20190711/TAIR_DATA_20190711")
#修改错误行,保存为ATH_GO_GOSLIM_new_dz.txt
test <- read.table("ATH_GO_GOSLIM_new_dz.txt",sep="\t",header=F,check.names=F,quote="",stringsAsFactors=F)
nrow(test)
go_df <- test[,c(1,6,8,10)]
go_df[,3] <- ifelse(go_df[,3]=="C","CC",go_df[,3])
go_df[,3] <- ifelse(go_df[,3]=="P","BP",go_df[,3])
go_df[,3] <- ifelse(go_df[,3]=="F","MF",go_df[,3])
go_df <- go_df[grepl("^AT\\d+",go_df[,1]),]
colnames(go_df) <- c("GID","GO","ONTOLOGY","EVIDENCE")
pub_df <- read.table("Locus_Published_20170630.txt",sep="\t",header=F,quote="",stringsAsFactors=F)
colnames(pub_df) <- c("name","reference_id","pubmed_id","publication_year")
pub_df[,3] <- ifelse((is.na(pub_df[,3]) | is.null(pub_df[,3]) | pub_df[,3] == "NULL"),"",pub_df[,3])
pub_df <- pub_df[grepl("^AT\\d+",pub_df$name),]

#循环好慢
# b <- data.frame()
# for (i in (1:nrow(pub_df))){
    # if((length(unlist(strsplit(pub_df$name, split = "\\.")[i])) == 2)){(b <- rbind(b,cbind(unlist(strsplit(pub_df$name, split = "\\.")[i])[1],paste(unlist(strsplit(pub_df$name, split = "\\.")[i])[1],unlist(strsplit(pub_df$name, split = "\\.")[i])[2],sep="."))))}else{b<- rbind(b,(cbind(unlist(strsplit(pub_df$name, split = "\\.")[i])[1],"")))}
# }
# b <- cbind(b,pub_df[,2:4])

pub_df <- cbind(GID=do.call(rbind,strsplit(pub_df$name,"\\."))[,1],pub_df[,1:4])
colnames(pub_df) <- c("GID","GENEID","REFID","PMID","PUBYEAR")

symbol_df <- read.table("gene_aliases_20170630.txt",sep="\t",header=F,quote="",stringsAsFactors=F,check.names=F,comment.char="")
symbol_df <- symbol_df[grepl("^AT\\d+",symbol_df[,1]),]
colnames(symbol_df) <- c("GID","SYMBOL","SYMBOL_NAME")

func_df <- read.table("Araport11_functional_descriptions_20170630.txt",sep="\t",quote="",header=F,stringsAsFactors=F,check.names=F,comment.char="")
func_df <- func_df[grepl("^AT\\d+",func_df[,1]),]
func_df <- cbind(GID=do.call(rbind,strsplit(func_df[,1],"\\."))[,1],func_df[,1:5])
colnames(func_df) <- c("GID","TXID","GENE_MODEL_TYPE","SHORT_DESCRIPTION","CURATED_DESCRIPTION","DESCRIPTION")
func_df[,4] <- ifelse((is.na(func_df[,4]) | is.null(func_df[,4]) | func_df[,4] == "NULL"),"",func_df[,4])
func_df$DESCRIPTION <- gsub("\\(source\\:Araport11\\)","",func_df$DESCRIPTION)
func_df <- func_df[order(func_df$GID),]


go_df <- go_df[!duplicated(go_df),]
go_df <- go_df[,c(1,2,4)]
pub_df <- pub_df[!duplicated(pub_df),]
symbol_df <- symbol_df[!duplicated(symbol_df),]
func_df <- func_df[!duplicated(func_df),]

#因为需要GID列一致,所以需要选择都有的GID才行

go_and_pub <- intersect(go_df$GID,pub_df$GID)
go_pub_sumbol <- intersect(go_and_pub,symbol_df$GID)
all_jiaoji <- intersect(go_pub_sumbol,func_df$GID)

new_go_df <- data.frame()
for (i in all_jiaoji){new_go_df <- rbind(new_go_df,subset(go_df,go_df$GID==i))}


new_pub_df <- data.frame()
for (i in all_jiaoji){new_pub_df <- rbind(new_pub_df,subset(pub_df,pub_df$GID==i))}


new_symbol_df <- data.frame()
for (i in all_jiaoji){new_symbol_df <- rbind(new_symbol_df,subset(symbol_df,symbol_df$GID==i))}

new_func_df <- data.frame()
for (i in all_jiaoji){new_func_df <- rbind(new_func_df,subset(func_df,func_df$GID==i))}


file_path <- file.path(getwd())
makeOrgPackage(go = new_go_df,
               pub_info = new_pub_df,
               symbol_info = new_symbol_df,
               function_info = new_func_df,
               version = "0.1",
               maintainer = "daizao <dzbioinformatics@qq.com>",
               author="daizao <dzbioinformatics@qq.com>",
               outputDir = file_path,
               tax_id = "3702",
               genus = "At",
               species = "tair10",
               goTable = "go"
)


#测试
install.packages("./org.Atair10.eg.db", repos = NULL,
                 type = "source")
library(org.Atair10.eg.db)
library(clusterProfiler)
library(org.Hs.eg.db)
library(hash)
org <- org.Atair10.eg.db
result <- enrichGO(gene=all_jiaoji[1:100],OrgDb=org,keyType="GID",minGSSize=1,pvalue=1)
bb <- bitr(unlist(strsplit(result$geneID,"\\/")),fromType="GID",toType="SYMBOL",OrgDb=org)
h=hash()
for (i in 1:nrow(bb)){.set(h,keys=bb[i,1],values=bb[i,2])}
dz <- as.data.frame(result)
DT::datatable(dz)
DT::datatable(bb)

其实更好的一种注释方式是:CellMarker enrichment analysis (CellMarker 富集分析)
参考连接:Bioconductor的注释包太旧怎么办?自己做呀

发表在 R | 2条评论

R语言ifelse

#输入文件下载地址 https://zenodo.org/record/2530282#.XYPNfmkzZ9B (拟南芥)
pub_df <- read.table("Locus_Published_20170630.txt",sep="\t",header=F,quote="",stringsAsFactors=F)
colnames(pub_df) <- c("name","reference_id","pubmed_id","publication_year")

#循环判断在R语言中运行非常慢!
a <- data.frame()
for (i in 1:nrow(pub_df)){if(pub_df[i,3]=="NULL" | is.na(pub_df[i,3]) | is.null(pub_df[i,3])){pub_df[i,3] <- ""; a <- rbind(a,pub_df[i,])}else{a <- rbind(a,pub_df[i,])}}
pub_df <- a

#向量化函数,速度很快!用法:ifelse(test, yes, no)
pub_df[,3] <- ifelse((is.na(pub_df[,3]) | is.null(pub_df[,3]) | pub_df[,3] == "NULL"),"",pub_df[,3])
发表在 R | 留下评论