#!/usr/local/bin/perl -w # block.pl # Ronald J Kimball, rjk-perl(at)tamias.net use strict; # data file format: # wordstresspronunciation # stress is a sequence of 0s, 1s, and 2s, one per syllable # 0: no stress; 1: primary stress; 2: secondary stress # pronounciation is based on IPA, minus stress marks and slashes # and with two-character vowel sounds remapped to one character # example: chipmunk 10 tSIpm@Nk my $datafile = shift @ARGV || 'mywords.dat'; # begin! open(DATAF, $datafile) or die "Can't open $datafile: $!\n"; my %rhymes; my @block; my $entry; while (defined($entry = )) { if (not $. % 5000) { warn "$.\n"; } chomp($entry); # my($pron) = (split /\t/, $entry)[-1]; my $last = last_syllable($entry); $rhymes{$last} .= "$entry\n"; } warn "rhyming groups: ", scalar(keys %rhymes), "\n"; my($last, $words); while (($last, $words) = each %rhymes) { if (($words =~ tr/\n//) == 1) { push @block, $words; } } print sort @block; exit; BEGIN { # vowels: IPA, with two-character vowel sounds remapped to one: # eI -> e, aI -> !, Oi -> ~, AU -> ^, oU -> o, [@] -> [, (@) -> ( my @vowels = qw/& @ A e - E i I ! ~ ^ O o u U y Y [ (/; # regex to match the last syllable in a word # one vowel sound followed by any number of non-vowels my $syll_re = '[' . join('', map quotemeta, @vowels) . ']' . '[^' . join('', map quotemeta, @vowels) . ']*$'; # last_syllable # extract the last syllable from the pronunciation of a word # each argument is an entry from the data file # returns a list # dies if the last syllable cannot be determined sub last_syllable { my(@entry) = @_; my @syll; foreach (@entry) { if (/($syll_re)/o) { push @syll, $1; } else { die "Can't match last syllable in '$_'\n"; # push @syll, undef; } } return wantarray ? @syll : $syll[0]; } # last_syllable } __END__