#!/usr/local/bin/perl -w # $Header: /home/r/rjk/words/RCS/words.pl,v 1.8 2003/06/04 03:27:50 rjk Exp $ use strict; use File::Basename; use Getopt::Std; use vars qw($opt_w); getopts('w:') and @ARGV >= 2 or die "usage: words.pl [-w ] ", " [ ...]\n"; my $dir = dirname($0); my $wordlist = $opt_w || "$dir/wordlist"; my $wordidx = "$wordlist.idx"; my $minlen = shift @ARGV; # minimum word length if ($minlen =~ /\D/) { die "$0: must be a whole number\n"; } open(DICT, $wordlist) or # open word list die "Unable to open $wordlist: $!\n"; my($idx, %idx); if (!-e $wordidx) { warn "Unable to locate $wordidx.\n"; } elsif (-M $wordlist < -M $wordidx) { warn "$wordlist is newer than $wordidx.\n"; } elsif (! open(IDX, $wordidx)) { warn "Unable to open $wordidx: $!\n"; } else { $idx = 1; # set index flag while() { my($letter, $offset) = split; # load letter/offset pairs $idx{$letter} = $offset; } } if (! $idx) { warn "Proceeding without index.\n"; $idx{0} = 0; } $| = 1; my $letters; foreach $letters (@ARGV) { # for each letter sequence my $words = 0; print "-- $letters --\n"; my %letters; $letters = lc $letters; # convert to lowercase $letters =~ tr/a-z//cd; # strip non-letter characters foreach (split(//, $letters)) { # store letter counts $letters{$_}++; } "\0" =~ /[^$letters]/; # cache regex with successful match my($letter, $word); IDX: foreach $letter ($idx ? sort keys %letters : 0) { # for each letter in # sequence if index loaded # (0) otherwise seek(DICT, $idx{$letter}, 0); # seek to words beginning with letter WORD: while (defined($word = )) { # for each word in list next IDX if ($idx and substr($word, 0, 1) ne $letter); # next letter index if index loaded # and done with current letter chomp($word); next WORD if (length($word) < $minlen); # verify length next WORD if ($word =~ //); # verify letters used, # using cached regex # comments also skipped here my %word; foreach (split(//, $word)) { # verify letter counts $word{$_}++; next WORD if ($word{$_} > $letters{$_}); } print "$word\n"; # success - print word $words++; } # WORD: while (defined($word = )) } # IDX: foreach $letter (sort keys %idx) print "$words\n\n"; } # foreach $letters (ARGV) __END__ =pod =head1 NAME B -- find words which can be made from a string of letters =head1 SYNOPSIS B [B<-w> I] I I [I ...] =head1 DESCRIPTION B prints all the uncapitalized words, of at least the minimum length, that can be made from a string of letters. Each letter can appear in a word once for each time it appears in the string. Several strings of letters can be given; a separate list of words will be printed for each string. A string may contain non-letter characters, which will be ignored. For efficiency, B uses an index of the wordlist file, generated by the L program. The index file should have the same name as the wordlist file, with .idx appended. B will output a warning and proceed without the index if the index file is not found, if the index file is older than the wordlist file, or if the index file cannot be opened. =head2 OPTIONS B accepts the following options: =over 4 =item B<-w> I By default, B looks for a word-file named 'wordlist' in the same directory as the executable. Use the B<-w> option to specify the path to an alternate word list. =back =head1 FILES =over 4 =item F The list of words, found with the executable. For a comprehensive word list, the author recommends the ENABLE word list, with more than 172,000 words, which can be found at http://personal.riverusers.com/~thegrendel/software.html =item F An index of the list of words, found with the wordlist file. The index improves efficiency by allowing B to skip large sections of the wordlist file. The index file is generated by L. =back =head1 SEE ALSO L =head1 BUGS B has no known bugs. =head1 AUTHOR B was written by Ronald J Kimball, I. =head1 COPYRIGHT and LICENSE This program is copyright 2000 by Ronald J Kimball. This program is free and open software. You may use, modify, or distribute this program (and any modified variants) in any way you wish, provided you do not restrict others from doing the same. =cut