#!/usr/bin/perl -w
use strict;
# countTags_GB.pl
# how many tags like "/host" are in a GenBank format file?
#usage: perl countTags_GB.pl inputFile
my $inputFile = $ARGV[0];
chomp $inputFile;
open (INPUT, "< $inputFile") || die "\n can't open file: $! \n";
my %tagHash;
while () {
if ($_ =~ /(\/[A-Za-z0-9_.\s]+)="/ ) {
$tagHash{$1} += 1 ;
}
} #close while
#want to print from most frequent to least frequent; thus need to sort keys to both hashes
print "Tag Frequency\n";
sortedPrint (%tagHash);
print "\nDo you want to print results to file? y or n ";
chomp ( my $response = );
if ($response eq 'y') {
print "\nEnter output file name: ";
chomp (my $outputFile = );
open (OUTPUT, "> $outputFile") || die "\n can't open file: $! \n";
select (OUTPUT);
print "Tag Frequency\n";
sortedPrint (%tagHash);
}
close INPUT;
close OUTPUT;
sub sortedPrint {
my %hash = @_;
my @sortedKeys = sort {
$hash{$b} <=> $hash{$a} #high to low value sort of keys
or $a cmp $b #if tied, alphabetical sort of keys
} keys %hash;
foreach my $key (@sortedKeys) {
print " $key: $hash{$key}\n";
}
}