Skip to content

Instantly share code, notes, and snippets.

@zaerl
Created May 13, 2025 11:38
A more modern cw.pl
#!/usr/bin/perl
use strict;
use warnings;
use feature 'say';
# Global variables
my %word_frequency;
my %paragraph_stats;
# Main program
die "Usage: $0 file1 file2 ...\n" unless @ARGV;
foreach my $filename (@ARGV) {
process_file($filename);
}
# Display word frequency results
display_word_frequencies();
# Display paragraph statistics
display_paragraph_stats();
# Subroutines
sub process_file {
my $filename = shift;
open(my $fh, '<', $filename) or die "Could not open '$filename': $!\n";
my $line_number = 1;
while (my $line = <$fh>) {
my $length = length($line);
if ($length > 1) {
$paragraph_stats{$length}{occurrence}++;
$paragraph_stats{$length}{file} = $filename;
$paragraph_stats{$length}{line} = $line_number;
}
# Split on whitespace and punctuation
foreach my $word (split(/[\s\'\\\{\}\.\,\;\:\?\!\`\[\]]/, $line)) {
if (length $word) {
$word_frequency{lc($word)}++;
}
}
$line_number++;
}
close $fh;
}
sub display_word_frequencies {
my @sorted_words = reverse sort { $word_frequency{$a} <=> $word_frequency{$b} } keys %word_frequency;
my $current_count = $word_frequency{$sorted_words[0]};
print "$current_count: ";
my $display_count = 0;
foreach my $word (@sorted_words) {
if ($word_frequency{$word} != $current_count) {
$current_count = $word_frequency{$word};
$display_count = 0;
print "\n$current_count: ";
}
if (++$display_count == 10) {
print "more...";
} elsif ($display_count < 10) {
print "$word ";
}
}
print "\n";
}
sub display_paragraph_stats {
say "\nLONGEST PARAGRAPHS\n";
my @sorted_lengths = reverse sort { $a <=> $b } keys %paragraph_stats;
my $count = 0;
foreach my $length (@sorted_lengths) {
if ($count++ < 10) {
say "$length ", $paragraph_stats{$length}{occurrence},
" ", $paragraph_stats{$length}{file},
" ", $paragraph_stats{$length}{line};
} else {
say "more...";
last;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment