十年的生信 | 勿忘初心，砥砺前行

AR_Chip-seq数据分析

发表于2018年8月12日由daizao

#质控
cd /home/train/Liuping/raw_data/fastq_file
fastqc -t 32 -o /home/train/Liuping/raw_data/fastq_file/fastqc_raw/ /home/train/Liuping/raw_data/fastq_file/*.fastq
#去接头
java -jar /opt/biosoft/Trimmomatic-0.36/trimmomatic-0.36.jar SE -threads 32 -phred33 SRR1206250.fastq SRR1206250.cut.fastq ILLUMINACLIP:TruSeq3-SE:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36
ls SRR120626*.fastq > a
ls SRR120626*.fastq | sed 's/^/java\ \-jar\ \/opt\/biosoft\/Trimmomatic-0.36\/trimmomatic-0.36.jar\ SE\ \-threads\ 32\ \-phred33\ /' > a
ls SRR120626?.fastq | awk -F "." '{print $1}' > b
paste -d " " a b > c
sed 's/$/.cut.fastq\ ILLUMINACLIP:TruSeq3-SE:2:30:10\ LEADING:3\ TRAILING:3\ SLIDINGWINDOW:4:15\ MINLEN:36/' c > d
chmod u+x d
./d > log.trim #运行时候忘记记录日志了
#去除N
cp /home/train/gc_reovirus_data/gc_fastq/removeN ./
ls SRR12062*.cut.fastq > e
sed 's/^/\/home\/train\/Liuping\/raw_data\/fastq_file\/removeN\ /' e > delN.sh
ls SRR12062*.cut.fastq | awk -F "." '{print $1}' > f
paste -d " " e f > g  #合并错了 = = 
sed 's/$/.unknowNu.fastq/' g > h
awk -F " " '{print $2}' h > i
paste -d " " delN.sh i > xindelN.sh
chmod u+x xindelN.sh
./xindelN.sh > log.delN
#去除低质量碱基
ls SRR12062*.unknowNu.fastq | sed 's/^/fastq_quality_filter\ -Q33\ -q\ 20\ -p\ 70\ -i\ /' > del_low_quality_base
sed 's/$/\ -o\ /' del_low_quality_base > del_low_quality_base_1
ls SRR12062*.unknowNu.fastq | awk -F "." '{print $1}' | paste -d " " del_low_quality_base_1 - | sed 's/$/.clean.fq/' > del_low_quality_base_xin #paste 的 - 用于标准输入
chmod u+x del_low_quality_base_xin
./del_low_quality_base_xin
#del_low_quality_base_xin文件内容如下
#重新指控，看清洗后数据质量有无提高
mkdir fastqc_clean
fastqc -t 32 -o /home/train/Liuping/raw_data/fastq_file/fastqc_clean/ /home/train/Liuping/raw_data/fastq_file/SRR12062*.clean.fq

#建立索引文件 (准备使用hg19和hg38两个基因组来做参考基因组)
cd /home/train/Liuping/human_genome/hg19_genome
cp hg19.fa ../../raw_data/fastq_file/
cd ../hg38_genome/
cp hg38.fa ../../raw_data/fastq_file/
cd /home/train/Liuping/raw_data/fastq_file
bowtie2-build hg19.fa hg19
bowtie2-build hg38.fa hg38
#比对
ls SRR12062*.clean.fq | awk -F "." '{print $1}' |sed 's/^/-S\ /' |sed 's/$/_hg19.sam/' > bowtie2_hg19_1
paste -d " " bowtie2_hg19 bowtie2_hg19_1 > bowtie2_hg19_xin
ls SRR12062*.clean.fq | awk -F "." '{print $1}' |sed 's/^/-S\ /' |sed 's/$/_hg38.sam/'> bowtie2_hg38_1
paste -d " " bowtie2_hg38 bowtie2_hg38_1 > bowtie2_hg38_xin
les bowtie2_hg38_xin >> bowtie2_hg19_xin
mv bowtie2_hg19_xin bowtie2_align
chmod u+x bowtie2_align
#nohup ./bowtie2_align > log_bowtie2 2>&1 &
nohup ParaFly -c bowtie2_align -CPU 16 > log_bowtie2 2>&1 & 

#取出uniq比对的文件
ls *.sam | sed 's/^/grep\ -v\ "XS:i:"\ /' | sed 's/$/\ |\ samtools\ view\ -bS\ ->\ /' > uniqsam_1
#ls *.sam | awk -F "." '{print $1}' | sed 's/$/.bam/' | sed 's/^/\ /' > uniqsam_2
ls *.sam | awk -F "." '{print $1}' | sed 's/$/.bam/' > uniqsam_2
paste -d " " uniqsam_1 uniqsam_2 > uniqsam_run
chmod u+x uniqsam_run
nohup ParaFly -c uniqsam_run -CPU 16 > log_uniq_bam 2>&1 & #[1] 19053


#call peaks   #由于差异peak包括刺激下上调的peak和下调的peak，即将两种数据对调计算即可得到,由于之前忘记计算了，所以这次在最后重新计算
mkdir result_call_peaks
cd /home/train/Liuping/raw_data/fastq_file/result_call_peaks
mkdir hg19_diff
cd ..
#macs14 -t SRR1206269_hg19.bam SRR1206268_hg19.bam SRR1206267_hg19.bam SRR1206266_hg19.bam SRR1206259_hg19.bam SRR1206265_hg19.bam SRR1206263_hg19.bam SRR1206262_hg19.bam SRR1206261_hg19.bam SRR1206258_hg19.bam SRR1206255_hg19.bam SRR1206253_hg19.bam SRR1206251_hg19.bam -c SRR1206264_hg19.bam SRR1206260_hg19.bam SRR1206257_hg19.bam SRR1206256_hg19.bam SRR1206254_hg19.bam SRR1206252_hg19.bam SRR1206250_hg19.bam -f BAM -g hs -n result_call_peaks/hg19_diff/hg19_diff_prostate -p 1e-5 -w --call-subpeaks
nohup macs2 callpeak -t SRR1206269_hg19.bam SRR1206268_hg19.bam SRR1206267_hg19.bam SRR1206266_hg19.bam SRR1206259_hg19.bam SRR1206265_hg19.bam SRR1206263_hg19.bam SRR1206262_hg19.bam SRR1206261_hg19.bam SRR1206258_hg19.bam SRR1206255_hg19.bam SRR1206253_hg19.bam SRR1206251_hg19.bam -c SRR1206264_hg19.bam SRR1206260_hg19.bam SRR1206257_hg19.bam SRR1206256_hg19.bam SRR1206254_hg19.bam SRR1206252_hg19.bam SRR1206250_hg19.bam -f BAM -g hs -n result_call_peaks/hg19_diff/hg19_diff_prostate -B -q 0.01 > log_macs_hg19_diff 2>&1 &

#由于分析较慢，且数据多，写批处理再用ParaFly并行计算
cd result_call_peaks/
mkdir hg19_tumor hg19_normal
mkdir hg38_diff hg38_tumor hg38_normal
cd ..
nohup ParaFly -c hg19and38_tumorandnomal_withouthg19diff -CPU 16 > log_hg19and38_macs_withouthg19diff 2>&1 &

cp /home/train/Liuping/raw_data/fastq_file/result_call_peaks/hg19_diff/hg19_diff_prostate_model.r /home/train/Liuping/raw_data/fastq_file/
cd /home/train/Liuping/raw_data/fastq_file/
Rscript hg19_diff_prostate_model.r
evince result_call_peaks/hg19_diff/hg19_diff_prostate_model.pdf

cd /home/train/Liuping/raw_data/fastq_file/result_call_peaks
perl /opt/biosoft/HOMER_v4.9/configureHomer.pl -install human-o
perl /opt/biosoft/HOMER_v4.9/configureHomer.pl -install hg19
perl /opt/biosoft/HOMER_v4.9/configureHomer.pl -install hg38
#nohup perl /opt/biosoft/HOMER_v4.9/configureHomer.pl -install hg19 >log_homer_hg19 2>&1 & 如果下载要很久的话 [1] 57498
cd /home/train/Liuping/raw_data/fastq_file/result_call_peaks/hg19_diff
more +29 hg19_diff_prostate_peaks.xls | awk -F "\t" '{print $10"\t"$1"\t"$2"\t"$3"\t+"}' > hg19_homer.bed
findMotifsGenome.pl hg19_homer.bed hg19 hg19_diff_motif_Dir -size 200 -mask #可以使用-p 多线程计算
#nohup findMotifsGenome.pl hg19_homer.bed hg19 hg19_diff_motif_Dir -size 200 -mask > log_findMotif 2>&1 & #[1] 60238
#先加工homer的bed文件，然后同意findMotifsGenome.pl
cd ..
cd hg19_normal/
more +25 hg19_normal_prostate_peaks.xls | awk -F "\t" '{print $10"\t"$1"\t"$2"\t"$3"\t+"}' > hg19_normal_homer.bed
nohup ParaFly -c findMotif_19and38_without_19diff -CPU 5 > log_findmitf_19_38_without19diff 2>&1 & 
#进行annotatePeak
annotatePeaks.pl hg19_diff/hg19_homer.bed hg19 > hg19_diff/peakAnn.xls 2> hg19_diff/annLog.txt
nohup ParaFly -c annopeaks -CPU 5 > log_annomotif_19_38_without19diff 2>&1 & #[1] 25431
#看下预测的gene_symbol不同基因组下的区别
les hg38_diff/peakAnn.xls | awk -F "\t" '{print $16}' > a_38
les hg19_diff/peakAnn.xls | awk -F "\t" '{print $16}' > b_19
uniq a_38 > a_uniq_38
uniq b_19 > b_uniq_19
sort a_uniq_38 > a_sort_uniq_38
sort b_uniq_19 > b_sort_uniq_19
diff a_sort_uniq_38 b_sort_uniq_19 | les #coding 全部相同


#重新计算normal相对tumor的差异上调，及tumor相对nomorl相对下调
cd /home/train/Liuping/raw_data/fastq_file/result_call_peaks
mkdir hg19_diff_normal
mkdir hg38_diff_normal
cd ..
nohup ParaFly -c macs2_callpeak_normal_diff -CPU 16 > log_macs2_callpeak_normal_diff 2>&1 &  
cd /home/train/Liuping/raw_data/fastq_file/result_call_peaks/hg19_diff_normal
more +29 hg19_diff_normal_prostate_peaks.xls | awk -F "\t" '{print $10"\t"$1"\t"$2"\t"$3"\t+"}' > hg19_diff_normal_prostate_homer.bed
cd /home/train/Liuping/raw_data/fastq_file/result_call_peaks
nohup ParaFly -c findMotif_normal_diff_19and38 -CPU 2 > log_indMotif_normal_diff_19and38 2>&1 & 
annotatePeaks.pl hg19_diff_normal/hg19_diff_normal_prostate_homer.bed hg19 > hg19_diff_normal/peakAnn.xls 2>hg19_diff_normal/annLog.txt

发表在 NGS_analysis | 留下评论

草鱼转录组数据分析部分流程

发表于2018年8月12日由daizao

硕士时候做的分析，整理一些放到博客，有问题可以发邮件咨询我

#建库
 cd /home/train/gc_reovirus_data/gc_database
 bowtie2-build C_idella_female_scaffolds.fasta.v1 genome 
#质控
 cd gc_fastq
 fastqc -t 7 -o /home/train/gc_reovirus_data/gc_spleen_fastqc SRR1045893.fastq SRR1045894.fastq SRR1045895.fastq SRR1045896.fastq SRR1045897.fastq SRR1045898.fastq SRR1045899.fastq
 cd /home/train/gc_reovirus_data/gc_spleen_fastqc
#去除接头
 java -jar /opt/biosoft/Trimmomatic-0.33/trimmomatic-0.33.jar SE -threads 31 -phred33 SRR1045893.fastq SRR1045893.cut.fastq ILLUMINACLIP:/opt/biosoft/Trimmomatic-0.33/adapters/TruSeq3-SE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36
#去除N
 ./removeN SRR1045893.cut.fastq SRR1045893.unknowNu.fq
#去除低质量碱基（Q33表示你使用的是Illumina的编码方式而不是Sanger的编码方式）
 fastq_quality_filter -Q33 -q 20 -p 70 -i SRR1045893.unknowNu.fq -o SRR1045893.clean.fq
#重新质控
 fastqc SRR1045893.clean.fq -t 31 -o /home/train/gc_reovirus_data/gc_clean_fq SRR1045893.clean.fq SRR1045894.clean.fq SRR1045895.clean.fq SRR1045896.clean.fq SRR1045897.clean.fq SRR1045898.clean.fq SRR1045899.clean.fq 
#tophat比对(注意修改输出文件路径！)
 tophat -o mappingout93 -N 5 --read-edit-dist 5 -r 102 --mate-std-dev 20 -p 31 -a 5 -i 20 -I 4000 --min-segment-intron 20 --max-segment-intron 4000 --min-coverage-intron 20 --max-coverage-intron 4000 --coverage-search --microexon-search -G gc.final.gtf  --library-type fr-unstranded /home/train/gc_reovirus_data/gc_database/genome  SRR1045893.clean.fq
#mapping数据的分析 
 cd mappingout93
 samtools view -h accepted_hits.bam | awk '$1~/^@/||$5==50{print $0}' | samtools view -bhS - > SRRgc93.unique.bam
 samtools index SRRgc93.unique.bam
 geneBody_coverage.py -i SRRgc93.unique.bam -r /home/gcfinal/gc.final.bed -o 93
 junction_saturation.py -i SRRgc93.unique.bam -r /home/gcfinal/gc.final.bed  -o 93 
#cuffdiff计算(差异表达)
 cd /home/train/gc_reovirus_data/gc_database/
 cuffdiff -p 31 -b C_idella_female_scaffolds.fasta.v1 -L gc93,gc94,gc95,gc96,gc97,gc98,gc99 -o diff --library-type fr-unstranded -u gc.final.gtf mappingout93/accepted_hits.bam mappingout94/accepted_hits.bam mappingout95/accepted_hits.bam mappingout96/accepted_hits.bam mappingout97/accepted_hits.bam mappingout98/accepted_hits.bam mappingout99/accepted_hits.bam
 cd diff/
 parsing_cuffdiff_out.pl gene_exp.diff DEG
#表达量的计算
 #htseq-count计算
 cd mappingout93
 htseq-count --stranded=no --format=bam --order=pos --idattr=gene_id --mode=intersection-nonempty -qSRRgc93.unique.bam /home/gcfinal/gc.final.gtf > SRR93.htseq_count.xls    
#cufflinks
 cd /home/train/gc_reovirus_data/gc_database/
 cufflinks -o cufflinks93 -p 31 --library-type fr-unstranded -G /home/gcfinal/gc.final.gtf /home/train/gc_reovirus_data/gc_database/mappingout93/SRRgc93.unique.bam
cufflinks -p 31 -u -g gc.final.gtf -b C_idella_female_scaffolds.fasta.v1 -o cufflinks93_nweRNA mappingout93/accepted_hits.bam
#将多个新组装转录本merge
ls cufflinks9*_nweRNA/transcripts.gtf > assembly_GTF_list.txt
cuffmerge -g gc.final.gtf -s C_idella_female_scaffolds.fasta.v1 -p 31 -o cuffmerge_out assembly_GTF_list.txt 
#将merge的gtf与参考基因组的gtf文件进行比较
cuffcompare -s C_idella_female_scaffolds.fasta.v1 -r gc.final.gtf -R -o compareGTF cuffmerge_out/merged.gtf
grep "class_code \"u\"" compareGtf.combined.gtf >novel.gtf
gtf_to_fasta novel.gtf C_idella_female_scaffolds.fasta.v1 novel.fa
#按照序列长度进行排序，然后再用UE将大于200bp的序列筛选出来(一行61bp，选大于3行，并多出17个碱基的序列即可）
seqkit sort -l novel.fa > test
cuffdiff -o test_diff -p 31 -L gc93,gc94,gc95,gc96,gc97,gc98,gc99 --library-type fr-unstranded -u cuffmerge_out/merged.gtf mappingout93/accepted_hits.bam mappingout94/accepted_hits.bam mappingout95/accepted_hits.bam mappingout96/accepted_hits.bam mappingout97/accepted_hits.bam mappingout98/accepted_hits.bam mappingout99/accepted_hits.bam
cuffnorm -o cuffnorm/ -p 31 -L gc93,gc94,gc95,gc96,gc97,gc98,gc99 cuffmerge_out/merged.gtf mappingout93/accepted_hits.bam mappingout94/accepted_hits.bam mappingout95/accepted_hits.bam mappingout96/accepted_hits.bam mappingout97/accepted_hits.bam mappingout98/accepted_hits.bam mappingout99/accepted_hits.bam

文章名：
Transcriptome data analysis of grass carp (Ctenopharyngodon idella) infected by reovirus provides insights into two immune-related genes

发表在 NGS_analysis | 留下评论

Endnote定制自己的输出文献格式

发表于2018年8月10日由daizao

此处以Development and Comparative Immunology杂志的参考文献为例

第一步：

第二步:

第三步：定制自己的输出要求

第四步：

第五步：

第六步：

第七步：

第八步：

第九步：

第十步：

第十一步：

第十二步：

第十三步：点击关闭，根据提示进行保存和并重命名

最终的插入格式如下：

发表在 Endnote | 留下评论

GTF文件转GFF文件

发表于2018年8月5日由daizao

程序来自：https://metacpan.org/pod/distribution/GBrowse/bin/gtf2gff3.pl

/opt/biosoft/GBrowse-2.54/bin/gtf2gff3.pl --cfg gtf2gff3.cfg hg19_genesymbol.gtf > test.gff3
#注意运行pl程序前，先添加一行：
use Config::Std;
#然后再执行
#cfg文件样例：


###################################################################################################
#This config file allows the user to customize the gtf2gff3
#converter.

[INPUT_FEATURE_MAP]
#Use INPUT_FEATURE_MAP to map your GTF feature types (column 3 in GTF) to valid SO types.
#Don't edit the SO tags below.
#Mapping must be many to one.  That means that exon_this and exon_that could both
#map to the SO exon tag, but exon_this could not map to multiple SO tags.

#GTF Tag                  #SO Tag
gene                      = gene
mRNA                      = mRNA
exon                      = exon
five_prime_utr            = five_prime_utr
start_codon               = start_codon
CDS                       = CDS
stop_codon                = stop_codon
three_prime_utr           = three_prime_utr
3UTR                      = three_prime_utr
3'-UTR                    = three_prime_UTR
5UTR                      = five_prime_utr
5'-UTR                    = five_prime_UTR
ARS                       = ARS
binding_site              = binding_site
BLASTN_HIT                = nucleotide_match
CDS_motif                 = nucleotide_motif
CDS_parts                 = mRNA_region
centromere                = centromere
chromosome                = chromosome
conflict                  = conflict
Contig                    = contig
insertion                 = insertion
intron                    = intron
LTR                       = long_terminal_repeat
misc_feature              = sequence_feature
misc_RNA                  = transcript
nc_primary_transcript     = nc_primary_transcript
ncRNA                     = ncRNA
nucleotide_match          = nucleotide_match
polyA_signal              = polyA_signal_sequence
polyA_site                = polyA_site
promoter                  = promoter
pseudogene                = pseudogene
real_mRNA                 = mRNA
region                    = region
repeat_family             = repeat_family
repeat_region             = repeat_region
repeat_unit               = repeat region
rep_origin                = origin_of_replication
rRNA                      = rRNA
snoRNA                    = snoRNA
snRNA                     = snRNA
source                    = sequence_feature
telomere                  = telomere
transcript_region         = transcript_region
transposable_element      = transposable_element
transposable_element_gene = transposable_element
tRNA                      = tRNA

[GTF_ATTRB_MAP]
#Maps attribute keys to keys used internally in the code.
#Don't edit the code tags.
#Note that the gene_id and transcript_id tags tell the script
#who the parents of a feature are.

#Code Tag    #GTF Tag
gene_id    = gene_id
gene_name  = gene_name
trnsc_id   = transcript_id
trnsc_name = transcript_name
id         = ID
parent     = Parent
name       = Name

[GFF3_ATTRB_MAP]
#Maps tags used internally to output GFF3 attribute tags.
#Also, when LIMIT_ATTRB is set to 1 only these tags will be
#Output to the GFF3 attributes column.

#Code Tag  #GFF3 Tag
PARENT   = Parent
ID       = ID
NAME     = Name

[MISC]
# Limit the attribute tags printed to only those in the GFF3_ATTRB_MAP
LIMIT_ATTRB     = 0 
#A perl regexp that splits the attributes column into seperate attributes.
ATTRB_DELIMITER = \s*;\s*
#A perl regexp that captures the tag value pairs.
ATTRB_REGEX     = ^\s*(\S+)\s+(\"[^\"]*\")\s*$
#If CDSs are annotated in the GTF file, are the start codons already included (1=yes 0=no)
START_IN_CDS    = 1
#If CDSs are annotated in the GTF file, are the stop codons already included (1=yes 0=no)
STOP_IN_CDS     = 0
###################################################################################################


#报错解释
DIAGNOSTICS
ERROR: Missing or non-standard attributes: parse_attributes
A line in the GTF file did not have any attributes, or it's attributes column was unparsable.

ERROR: Non-transcript gene feature not supported. Please contact the author for support: build_gene
This warning indicates that a line was skipped because it contained a non-transcript gene feature, and the code is not currently equipped to handle this type of feature. This probably isn't too hard to add, so contact me if you get this error and would like to have these features supported.

ERROR: Must have at least exons or CDSs to build a transcript: build_trnsc
Some feature had a transcript_id and yet there were no exons or CDSs associated with that transcript_id so the script failed to build a transcript.

ERROR: seq_id conflict: validate_and_finish_trnsc
Found two features within the same transcript that didn't share the same seq_id.

ERROR: source conflict: validate_and_finish_trnsc
Found two features within the same transcript that didn't share the same source.

ERROR: type conflict: validate_and_finish_trnsc
Found two features within the same transcript that were expected to share the same type and yet they didn't.

ERROR: strand conflict: validate_and_finish_trnsc
Found two features within the same transcript that didn't share the same strand.

ERROR: seq_id conflict: validate_and_build_gene
Found two features within the same gene that didn't share the same seq_id.

ERROR: source conflict: validate_and_build_gene
Found two features within the same gene that didn't share the same source.

ERROR: strand conflict: validate_and_build_gene
Found two features within the same gene that didn't share the same strand.

ERROR: gene_id conflict: validate_and_build_gene
Found two features within the same gene that didn't share the same gene_id.

FATAL: Can't open GTF file: file_name for reading.
Unable to open the GTF file for reading.

FATAL: Need exons or CDSs to build transcripts: process_start
A start_codon feature was annotated and yet there were no exons or CDSs associated with that transcript_id so the script failed.

FATAL: Untested code in process_start. Contact the aurthor for support.
The script is written to infer a start codon based on the presence of a 5' UTR, but we had no example GTF of this type when we wrote the code, so we killed process rather than run untested code. Contact the author for support.

FATAL: Invalid feature set: process_start
We tried to consider all possible ways of infering a start codon or infering a a non-coding gene, and yet we've failed. Your combination of gene features doesn't make sense to us. You should never get this error, and if you do, we'd really like to see the GTF file that generated it. Please contact the author for support.

FATAL: Need exons or CDSs to build transcripts: process_stop
A stop_codon feature was annotated and yet there were no exons or CDSs associated with that transcript_id so the script failed.

FATAL: Untested code in process_stop. Contact the aurthor for support.
The script is written to infer a stop codon based on the presence of a 3' UTR, but we had no example GTF of this type when we wrote the code, so we killed process rather than run untested code. Contact the author for support.

FATAL: Invalid feature set: process_stop
We tried to consider all possible ways of infering a stop codon or infering a a non-coding gene, and yet we've failed. Your combination of gene features doesn't make sense to us. You should never get this error, and if you do, we'd really like to see the GTF file that generated it. Please contact the author for support.

FATAL: Invalid feature set: process_exon_CDS_UTR
We tried to consider all possible ways of infering exons, CDSs and UTRs and yet we've failed. Your combination of gene features doesn't make sense to us. You really should ever get this error, and if you do, we'd really like to see the GTF file that generated it. Please contact the author for support.

FATAL: Array reference required: sort_features.
A user shouldn't be able to trigger this error. It almost certainly indicates a software bug. Please contact the author.

FATAL: Can't determine strand in: sort_feature_types.
This may indicate that your GTF file does not indicate the strand for features that require it. It may also indicate a software bug. Please contact the author.

FATAL: Hash reference required: sort_feature_types.
A user shouldn't be able to trigger this error. It almost certainly indicates a software bug. Please contact the author.

FATAL: Invalid value passed to strand: strand.
This may indicate that your GTF file does not indicate the strand for features that require it. Consider using the DEFAULT_STRAND paramater in the config file. It may also indicate a software bug. Please contact the author.

CONFIGURATION AND ENVIRONMENT
A configuration file is provided with this script. The script will look for that configuration file in ./gtf2gff3.cfg, ~/gtf2gff3.cfg or /etc/gtf2gff3.cfg in that order. If the configuration file is not found in one of those locations and one is not provided via the --cfg flag it will try to choose some sane defaults, but you really should provide the configuration file. See the supplied configuration file itself as well as the README that came with this package for format and details about the configuration file.

源程序代码

发表在 Perl | 留下评论

mysql_ERROR 1045 (28000): Access denied for user ‘xxx’@’xxx’解决

发表于2018年8月3日由daizao

#1 -> su - root    
#1 -> /etc/init.d/mysqld stop
#1 -> mysqld_safe --skip-grant-tables &
#2 -> mysql -uroot -p密码 -h127.0.0.1 
#或者
#2 -> mysql -uroot -p密码 -h127.0.0.1 --protocol=tcp
#1和2任选一种方式登陆
>use mysql;
>select host,user from mysql.user;
#删除host为%和127.0.0.1的用户
#>DROP USER 'train'@'%';
#>DROP USER 'train'@'127.0.0.1'
>CREATE USER 'train'@'localhost' IDENTIFIED BY '123456'; #注意添加新用户时需要填写如'train'@'localhost'而不是train就可以的
>flush privileges;
>quit;
#1 -> jobs
#1 -> service mysqld stop
#1 -> kill %1
#1 -> service mysqld start
mysql -u用户名 -p密码

成功解决，因为mysql默认登陆方式为localhost但是一旦出现%的host，mysql会默认用%作为host的方式登陆如root@%，所以无法登陆。

发表在 Linux | 留下评论

CentOS6_IGV_安装

发表于2018年8月2日由daizao

unzip ~/software/IGV_2.3.57.zip -d /opt/biosoft/
echo 'PATH=$PATH:/opt/biosoft/IGV_2.3.57/' >> ~/.bashrc
source ~/.bashrc
mkdir -p ~/igv/genomes/
cp ~/software/hg18.genome ~/igv/genomes/
igv.sh
#若出现报错：GLib-GIO-ERROR **: Settings schema 'org.gnome.system.proxy' is not installed
vim /opt/biosoft/IGV_2.3.57/igv.sh
#则添加如下内容
-Dhttp.proxyHost=myproxy.domain \
-Dhttps.proxyHost=myproxy.domain \
-Dftp.proxyHost=myproxy.domain \