Difference between revisions of "Seqcomposition.pl"
From Organic Design wiki
m |
|||
| Line 1: | Line 1: | ||
| − | <perl> | + | <source lang="perl"> |
#!/usr/bin/perl -w | #!/usr/bin/perl -w | ||
| Line 95: | Line 95: | ||
} | } | ||
| − | </ | + | </source> |
[[Category:PERL]] | [[Category:PERL]] | ||
Latest revision as of 16:29, 20 April 2018
#!/usr/bin/perl -w
use strict;
use Data::Dumper;
# =============================================================================
# open input pileup file and determine number of sequences in alignment
# =============================================================================
my $debug = 0;
my $handle;
my $dir = ""; # what's that? -its redundant code
my $file = "2002sequences1.msf";
open($handle, "<$dir$file") or die ("Could not open $dir$file $!\n");
my %alignments;
# read data line by line
while(<$handle>) {
if(m/Name: (\w+?)\s/) {
$alignments{$1} = undef;
}
# Populating %alignments values for each alignment key
if(my($name, $seq) = m/^(\w+)\s+(.+)/) { # carrot ^ is extremely important in RegEx
$seq =~ s/ //g;
if(exists $alignments{$name}) {
$alignments{$name} .= $seq;
}
}
}
close($handle);
# Checking hash capture ok
if($debug) {
print Dumper(\%alignments);
}
# Put hash values into baseition structure
my @base;
foreach (keys %alignments) {
my@elements = split(//, $alignments{$_});
for(my$i=0; $i<=$#elements; $i++) {
$base[$i] .= $elements[$i];
}
}
if($debug) {
print Dumper(\@base);
}
my @printorder = ("A%","T%","G%","C%","N%","Total");
{
local $"="\t";
print "Base position\t@printorder\n";
}
my @basecalc;
for(my$i=0; $i<=$#base; $i++) {
$base[$i] =~ tr/atcgn/ATCGN/;
#print $base[$i];
my %basecount = (
A =>0,
C =>0,
T =>0,
G =>0,
N =>0,
Total=>0,
);
while($base[$i] =~ m/([ATCGN])/g) {
$basecount{Total}++; # increment the total number of bases
$basecount{$1}++; # increment the actual bases
}
foreach( keys %basecount ) {
if(m/(\w)/){
$basecount{$1."%"} = $basecount{$1}/$basecount{Total};
}
}
if($debug) {
print Dumper(\%basecount);
}
# Printing output
print $i+1,"\t"; # printing position
foreach(@printorder) {
print "$basecount{$_}\t";
}
print "\n";
}



