Seqcomposition.pl

From Organic Design wiki
Revision as of 12:52, 8 December 2011 by Nad (talk | contribs) (Category:PERL)

<perl>

  1. !/usr/bin/perl -w

use strict; use Data::Dumper;

  1. =============================================================================
  2. open input pileup file and determine number of sequences in alignment
  3. =============================================================================

my $debug = 0; my $handle; my $dir = ""; # what's that? -its redundant code my $file = "2002sequences1.msf"; open($handle, "<$dir$file") or die ("Could not open $dir$file $!\n");

my %alignments;

  1. read data line by line

while(<$handle>) {

 if(m/Name: (\w+?)\s/) {
   $alignments{$1} = undef;
 }
 
 # Populating %alignments values for each alignment key
 if(my($name, $seq) = m/^(\w+)\s+(.+)/) {  # carrot ^ is extremely important in RegEx
   $seq =~ s/ //g;
   if(exists $alignments{$name}) {
     $alignments{$name} .= $seq;
   }
 }  

}

close($handle);

  1. Checking hash capture ok

if($debug) {

 print Dumper(\%alignments);

}

  1. Put hash values into baseition structure

my @base;


foreach (keys %alignments) {

 my@elements = split(//, $alignments{$_});
 for(my$i=0; $i<=$#elements; $i++) {   
   $base[$i] .= $elements[$i];
 }

}

if($debug) {

 print Dumper(\@base);

}

my @printorder = ("A%","T%","G%","C%","N%","Total"); {

 local $"="\t";
 print "Base position\t@printorder\n";  

}

my @basecalc; for(my$i=0; $i<=$#base; $i++) {

 $base[$i] =~ tr/atcgn/ATCGN/;
 #print $base[$i];
 my %basecount = (
                   A    =>0,
                   C    =>0,
                   T    =>0,
                   G    =>0,
                   N    =>0,
                   Total=>0,
                  );
 while($base[$i] =~ m/([ATCGN])/g) {
  $basecount{Total}++; # increment the total number of bases
  $basecount{$1}++;    # increment the actual bases
 }
 foreach( keys %basecount ) {
   if(m/(\w)/){
     $basecount{$1."%"} = $basecount{$1}/$basecount{Total};
   }
 }
  if($debug) {
     print Dumper(\%basecount);
 }
 
 # Printing output
 print $i+1,"\t";             # printing position
 foreach(@printorder) {
   print "$basecount{$_}\t";
 }
 print "\n";

} </perl>