# reads a files who's name is given in the command line # prints each type and its frequency, # after eliminating puntuation tokens, stpwords, etc. use strict; my ($line, @s, $i, $j, $w, %W); my %stop_words; open(F, ") { chomp $line; $stop_words{$line} = 1; } close(F); while($line = <>) { chomp $line; $line=~ s/\r//o; $line =~ s/\t//go; $line =~ tr/[A-Z]/[a-z]/; $line =~ s/[,.;:%\[\]\(\)\\\'\"-]//go; $line =~ s/[0-9]//go; $line =~ s/^\s+//o; $line =~ s/\s+&//o; $line=~ s/ +/ /go; @s = split " ",$line; foreach $i (@s) { if (exists($stop_words{$i})) {} else { $W{$i} ++; } } } foreach $i (keys % W) { print $i," ", $W{$i}, "\n";}