Perl多线程编程

可以用于多种perl的编程环境,让程序并行

#!/usr/bin/perl -w
#
use strict;
use warnings;
use threads;

my $j=0;
my $thread;

while(){
    while(scalar (threads -> list())<4){
        $j++;
        threads -> create(\&程序,参数...); 
    }
    for $thread(threads -> list(threads::all)){
        if ($thread -> is_joinable()){
            $thread -> join();
        }
    }
}

sub 自己的程序{
    ......
}

参考的网址: 1、Perl 中的线程 2、Perl多线程 3、perldoc:threads

发表在 Perl | 留下评论

dbGaP申请注意事项

不仅SAM每年需要renew,dbGaP每年也需要renew,登陆dbGaP填写research progress进行renew即可

关于TCGA和dbGaP申请,请加群讨论

发表在 TCGA | 留下评论

根据分组计算平均值(包含NA值)

数据结构如下图 已知Year和Month列有重复,目的是根据Year和Month分组,计算Temperature的平均值(Temperature中存在NA值,求平均值时需要去除NA值后计算均值)

perl语言版本如下

#!/usr/bin/perl -w
use strict;
use warnings;

my $usage=<<USAGE;
Usage:
    perl $0 inputfile
USAGE
if(@ARGV==0){die $usage};

my $file=$ARGV[0];
my @data=();
my %hash_year=();
my %hash_month=();

open(RF,$file) || die $!;
open(WF,">process_1.txt") || die $!;
while(my $line=<RF>){
    chomp($line);
    next if ($.==1);
    my @arr=split('\t',$line);
    $hash_year{$arr[10]}=1;
    $hash_month{$arr[11]}=1;
    print WF $arr[10],"\t",$arr[11],"\t",$arr[14],"\n";
}

close(RF);
close(WF);

my $i=0;
open(WF,">average.txt") || die $!;
for my $key_year (sort {$a <=> $b}keys %hash_year){
    for my $key_month (sort {$a <=> $b}keys %hash_month){
        my @value=();
        open(RF,"process_1.txt") || die $!; 
        while(my $line=<RF>){
            chomp($line);
            my @arr=split('\t',$line);
            if ($arr[0]==$key_year && $arr[1]==$key_month) {
                push @value,$arr[2];
            }
        }
        close(RF);
        if ($i==0){
            print WF "Year\tMonth\tTemperature\n";
        }
        if (scalar @value > 0 ){
            my $average=&average(@value);
            print WF $key_year,"\t",$key_month,"\t",$average,"\n";
        }
        $i++;
    }
}


close(WF);
system("del process_1.txt");

sub average{
    my @num=@_;
    my $j=0;
    my $total=0;
    my $result;
    for my $i (0..$#num){
        next if ($num[$i] eq 'NA');
        $total=$total + $num[$i];
        $j++;
    }
    $result=($total/$j);
    return $result;
}

R语言for循环版本如下

dz_test <- data[,c("Year","Month","Temperature")]
a <- data.frame()
for (i in dz_test[!duplicated(dz_test$Year),]$Year){
    for (j in dz_test[!duplicated(dz_test$Month),]$Month){
        year <- dz_test[dz_test$Year==i,]
        month <- year[year$Month==j,]
        b <- cbind(i,j,mean(month$Temperature,na.rm=T))
        a <- rbind(a,b)
    }
}
names(a) <- names(dz_test)
a

最简单也比较快的方法是使用R语言的tidyverse包

library(tidyverse)
data <- data.table::fread("Temperature.txt",data.table = F)
results1 <- data %>%
    group_by(Year,Month) %>%
    summarise(Mean=mean(Temperature,na.rm=T))

得到如下图的结果

发表在 Perl, R | 留下评论

R语言长宽数据转换

长数据如下图:

宽数据如下图:

library(magrittr)
library(tidyr)
library(reshape2)
#宽转长

    #gather方法
    test <- as.data.frame(ddd)
    test$year <- rownames(test)
    test1 <- gather(test,key="month",value="tempretaure",-year) %>%
    .[order(.$year),]
    write.table(test1,file="chang_data.txt",sep="\t",quote=F,col.names=T,row.names=F)

    #melt方法
    test2 <- melt(test,id.vars=c('year'),variable.name='month',value.name='tempretaure') %>% 
    .[order(.$year),]

#长转宽

    #spread方法
    kuan_data_1 <- spread(test2,month,tempretaure)

    #dcast方法
    kuan_data_2 <- dcast(test2,year~test2$month,value.var='tempretaure')
发表在 R | 留下评论

perl语言长宽数据转换(长 -> 宽)

长数据如下图

perl代码如下

#!/usr/bin/perl -w
use strict;
use warnings;
use Data::Dumper;

my $usage=<<USAGE;
Usage:
    perl $0 inputfile
USAGE
if(@ARGV==0){die $usage};

my $file=$ARGV[0];
my %timeinfo=();
my $reftime=\ %timeinfo;
my $i=0;

open(RF,$file) || die $!;
open(WF,">kuan_data.txt") || die $!;
while(my $line=<RF>){
    next if ($.==1);
    chomp($line);
    my @arr=split('\t',$line);
    $reftime -> {$arr[0]} -> {$arr[1]} =$arr[2];

}

for my $j (keys %{$reftime}){
    $i=0;
    for my $dd (keys %{$reftime -> {$j}}){
        $i++;
    }
}

for my $year (sort {$a cmp $b} keys %{$reftime}){ 
    my $j=1;
    for my $month (sort {$a <=> $b} keys %{$reftime -> {$year}}){  
        if ($j==1){
            print WF $month,"\t";
            $j++;
        }elsif($j<$i){
            print WF $month,"\t";
            $j++;
        }elsif($j==$i){
            print WF $month,"\n";
        }else{
            next;
        }
    }
    last;
}


for my $year (sort {$a cmp $b} keys %{$reftime}){
    my $j=1;
    for my $month (sort {$a <=> $b} keys %{$reftime -> {$year}}){
        if ($j==1){
            print WF $year,"\t",$reftime -> {$year} -> {$month},"\t";
            $j++;
        }elsif($j>0 && $j < $i){
            print WF $reftime -> {$year} -> {$month},"\t";
            $j++;
        }else{
            print WF $reftime -> {$year} -> {$month},"\n";
        }
    }
}

close(RF);
close(WF);

得到最后的宽数据,如下图

发表在 Perl | 留下评论

perl语言长宽数据转换(宽 -> 长 )

首先输入的文件类似如下图 为了转换为长数据,编写如下perl代码

#!/usr/bin/perl -w
use strict;
use warnings;
use Data::Dumper;

my $file=$ARGV[0];
my @matrix=();
my @month=();
my @year=();

open(RF,$file) || die $!;
open(WF,">tidy_data.txt") || die $!;

my $i=0;
while (my $line=<RF>){
    next if ($.==1);
    chomp($line);
    my @arr=split('\t',$line);
    shift(@arr);
    for my $j (0..@arr-1){
        $matrix[$i][$j]=$arr[$j];
    }
    $i++;
}
close(RF);

open(RF,$file) || die $!;
open(WFF,">process.txt") || die $!;

while(my $line=<RF>){
    if ($.==1){
        chomp($line);
        @month=split('\t',$line);
        next;
    }
    chomp($line);
    my @arr=split('\t',$line);
    print WFF $arr[0],"\n";
}
close(RF);
close(WFF);

open(RFF,"process.txt") || die $!;
while(my $line=<RFF>){
    chomp($line);
    push @year,$line;
}
close(RFF);

for my $i (0..$#year){
    for my $j (0..$#month){
        print WF $year[$i],"\t",$month[$j],"\t",$matrix[$i][$j],"\n";
    }
}

close(WF);
system("del process.txt")

便可实现转换,结果如下图

发表在 Perl | 留下评论