#!/usr/bin/perl -w use strict; # countHostsAndSources.pl # how many genbank types of Hosts and Isolation Sources are in my Bacteroidetes inputs? #usage: perl countHostsAndSources.pl inputFile #for each line of text in $_: 1)preprocess, 2)count, 3)store counts my $inputFile = $ARGV[0]; chomp $inputFile; open (INPUT, "< $inputFile") || die "\n can't open file: $! \n"; my %hostHash; my %isolationSourceHash; while () { if ($_ =~ /host="([A-Za-z0-9_.\s]+)"/) { $hostHash{$1} += 1; } if ($_ =~ /isolation_source="([A-Za-z0-9_.\s]+)"/) { $isolationSourceHash{$1} += 1; } } #close while #want to print from most frequent to least frequent; thus need to sort keys to both hashes print "GenBank Host Frequency\n"; sortedPrint (%hostHash); print "\nGenBank Isolation Source Frequency\n"; sortedPrint (%isolationSourceHash); print "\nDo you want to print results to file? y or n "; chomp ( my $response = ); if ($response eq 'y') { print "\nEnter output file name: "; chomp (my $outputFile = ); open (OUTPUT, "> $outputFile") || die "\n can't open file: $! \n"; #my $hashRefs = [\%hostHash, \%isolationSourceHash]; select (OUTPUT); print "GenBank Host Frequency\n"; sortedPrint (%hostHash); print "\nGenBank Isolation Source Frequency\n"; sortedPrint (%isolationSourceHash); #foreach my $hash(@$hashRefs) { # my %derefHash = %$hash; # sortedPrint (%derefHash); #} # foreach my $hash (@$hashRefs) { # my %derefHash = %$hash; # my @sortedKeys = sort keys %derefHash; # foreach my $key (@sortedKeys) { #foreach my $key (keys %hash) { # to do without sorting # print OUTPUT "$key: $derefHash{$key}\n"; # } # } } close INPUT; close OUTPUT; sub sortedPrint { my %hash = @_; my @sortedKeys = sort { $hash{$b} <=> $hash{$a} #high to low value sort of keys or $a cmp $b #if tied, alphabetical sort of keys } keys %hash; foreach my $key (@sortedKeys) { print " $key: $hash{$key}\n"; } }