#!/usr/local/bin/perl -w # -*- cperl-indent-level: 2 -*- # sonnet.pl # Ronald J Kimball, rjk-perl@tamias.net # $Header: /usr/home/rjk/perlpoetry/RCS/sonnet.pl,v 2.5 2007/02/23 05:15:14 rjk Exp $ use strict; use vars qw/$VERSION/; $VERSION = '1.3'; # $Revision: 2.5 $ use Getopt::Long; my $VERBOSE = my $DUMP = 0; my $alliteration; my %styles = ( # name => # [ 'rhyming scheme', [] ], english => [ 'ABAB CDCD EFEF GG', [13, 14] ], italian => [ 'ABBA ABBA CDE CDE', [2, 3, 6, 7, 10, 13] ], italian2 => [ 'ABBA ABBA CDC DCD', [2, 3, 6, 7, 10, 13] ], ); my $style = 'english'; # default style my $scheme; my @indent; my $datafile = 'word_pron.dat'; my $mem; my $strict_rhymes; my $alternate_strict; my $noblock; my $frequency; my $end_frequency; my $help; options(); if ($help) { usage(); exit 0; } if ($strict_rhymes and $] < 5.005) { $alternate_strict = 1; } if ($style eq '?') { print "Sonnet styles:\n"; my $key; foreach $key (sort keys %styles) { printf " %-12s $styles{$key}[0]\n", "\u$key"; } exit; } if (not $styles{lc $style}) { die "'$style' is not a recognized style. Use -s for a list of styles.\n"; } $style = lc $style; # fetch scheme and indent for chosen style $scheme = $styles{$style}[0]; @indent = @ {$styles{$style}[1]}; my @rhymes; $scheme =~ tr/A-J /0-9/d; # convert to digits, remove spaces for (split //, $scheme) { # count rhyming words in each group if (defined $rhymes[$_]) { $rhymes[$_]++; } else { $rhymes[$_] = 0; } } # data file format: # wordstressrhymablefrequencypronunciation # stress is a sequence of 0s, 1s, and 2s, one per syllable # 0: no stress; 1: primary stress; 2: secondary stress # rhymable is either y or n [optional] # y: word is rhymable; n: word is not rhymable (based on available words) # frequency is a number representing the word's frequency [optional] # preceded by a plus, to distinguish from the stress # example: +23 # pronounciation is based on IPA, minus stress marks and slashes # and with multi-character vowel sounds remapped to one character # example: chipmunk 10 tSIpm@Nk my @vowels; BEGIN { # vowels: IPA, with multi-character vowel sounds remapped to one: # eI -> e, aI -> !, Oi -> ~, AU -> ^, oU -> o, [@] -> [, (@) -> ( @vowels = qw/& @ A e - E i I ! ~ ^ O o u U y Y [ (/; } # begin! open(DATAF, $datafile) or die "Can't open $datafile: $!\n"; my @data; if ($mem) { chomp(@data = ); close(DATAF); } # list of regexes to match line-ending words my @stress = ('\t(?:[02]*1(?:[012]{2})*|(?![02]*1)0*2(?:[02]{2})*)\t') x @rhymes; # choose the first word for each rhyming group my @first = choose_entries({ rhyme => $noblock, frequency => $end_frequency, alliteration => $alliteration, }, \@stress); foreach (@first) { if (not defined $_) { die "Too few words to choose from!\n"; } } print join("\n", @first), "\n\n" if $VERBOSE; my @second; # choose the rhyming words for each rhyming group my %seen; while (1) { my @temp; my $more; my $i = 0; # check for more rhyming words in each group foreach (@rhymes) { if ($_ > 0 and $_--) { push @temp, $first[$i]; $more = 1; } else { push @temp, ''; } $i++; } last unless $more; # find another set of rhyming words my @temp2 = find_rhymes({ frequency => $end_frequency, alliteration => $alliteration, strict_rhymes => $strict_rhymes, alternate_strict => $alternate_strict, }, \@temp, \@stress, \@second, \%seen); # check for non-rhyming words { my $no_rhyme; for (my $i=0; $i<=$#temp2; ++$i) { $temp[$i] or next; if (defined $temp2[$i]) { push @{$second[$i]}, $temp2[$i]; $seen{$temp2[$i]}++; } else { warn "Poet's block! No rhyme for $temp[$i]\n"; $no_rhyme = 1; } } exit 1 if $no_rhyme; } } if ($VERBOSE) { foreach (@second) { print join("\n", @$_), "\n" } print "\n"; } # order the words according to the rhyming scheme my @ends; foreach (split //, $scheme) { if ($first[$_]) { push @ends, $first[$_]; $first[$_] = ''; } elsif (@{$second[$_]}) { push @ends, shift @{$second[$_]}; } else { die "Ran out of words!\n"; # should not happen } } # complete the lines of the sonnet my @sonnet = fill_lines({ frequency => $frequency, alliteration => $alliteration, }, @ends); # make sure all lines were filled # extract just the words from the full entries # capitalize the first word in each line foreach my $line (@sonnet) { foreach my $entry (@$line) { if (not defined $entry) { die "No words to choose from for a certain length.\n"; } $entry = (split /\t/, $entry, 2)[0]; } $line->[0] = ucfirst $line->[0]; } # indent the specified lines foreach (@indent) { if ($_ > @sonnet) { die "Line number $_ for indent out of range.\n"; } unshift @{$sonnet[$_-1]}, ' '; } # print the sonnet foreach (@sonnet) { print "@{$_}\n"; } print "\n"; # done! exit; # choose_entries # choose entries at random from the data file # first argument is a reference to a hash of options # rhyme: choose words which are marked as having rhymes # frequency: choose words based on frequency # remaining arguments: # 0 arguments: # chooses one word, returns a scalar # 1 argument: # argument is a ref to an array of regexes # chooses one word per regex, returns a list # >1 arguments: # each argument is a ref to an array of regexes # chooses one word per regex, returns a list of lists sub choose_entries { my($opts, @stress) = @_; my($rhyme, $frequency, $alliteration) = @{$opts}{qw/rhyme frequency alliteration/}; if (not $mem) { # seek to beginning of data file seek(DATAF, 0, 0) or die "Can't seek in $datafile: $!\n"; } if (not @stress) { # no arguments; just choose one entry @stress = [ undef ]; } my @entry; my @count; foreach (@stress) { push @entry, [(undef) x @$_]; } # generate a loop to choose the entries # avoid recompiling the regexes my $eval = $mem ? <<' EOT' : <<' EOT'; for (@data) { EOT while () { EOT if (defined $alliteration) { # code to skip words that don't fit the alliteration $eval .= <<" EOT"; next unless /^$alliteration/i; EOT } if ($rhyme) { # code to skip unrhymable words $eval .= <<" EOT"; next if \$_ =~ /\\tn\\t/; EOT } # generate one if block per regex for (my $i=0; $i<=$#stress; ++$i) { for (my $j=0; $j<=$#{$stress[$i]}; ++$j) { # code to match stress regex if (defined $stress[$i][$j]) { $eval .= <<" EOT"; if (\$_ =~ /\$stress[$i][$j]/o) { EOT } else { $eval .= <<" EOT"; if (1) { EOT } if ($frequency) { # code to choose by frequency $eval .= <<" EOT"; \$count[$i][$j] += \$f = (/\\t\\+(\\d+)\\t/ && \$1) || 1; if (rand \$count[$i][$j] < \$f) { EOT } else { # code to choose without frequency $eval .= <<" EOT"; \$count[$i][$j]++; if (not int rand \$count[$i][$j]) { EOT } # code to save chosen word $eval .= <<" EOT"; \$entry[$i][$j] = \$_; } } EOT ; } } $eval .= <<' EOT'; } EOT print "# choose_entries\n", $eval, "\n" if $DUMP; # do it! my $f; eval $eval; die $@ if $@; if (not $mem) { for (@entry) { local $^W; # any undefs in @entry will be caught elsewhere chomp(@$_); } } if (@stress == 1) { # flatten if only one list of words return @{$entry[0]}; } else { return @entry; } } # choose_entries BEGIN { # regex to match any non-consonant symbol; tab allows beginning of word my $no_consonant_re = '[' . join('', map quotemeta, @vowels) . '\t]'; # find_rhymes # choose rhyming entries at random from the data file # first argument is a reference to a hash of options # frequency: choose words based on frequency # second argument is a ref to an array of entries # third optional argument is a ref to an array of regexes # fourth optional argument is a ref to an array of rhyming entries # fifth optional argument is a ref to a hash of skip words # chooses one word for each entry/regex pair, returns a list # (rhymes are determined only by the last syllable of each word) sub find_rhymes { my($opts) = shift; my($frequency, $alliteration, $strict_rhymes, $alternate_strict) = @{$opts}{qw/frequency alliteration strict_rhymes alternate_strict/}; my @entry = @{ $_[0] }; my @stress; my @rhymed; my $skip = {}; my @count; my @match = (undef) x @entry; if (ref $_[1]) { @stress = @{ $_[1] }; } if (ref $_[2]) { @rhymed = @{ $_[2] }; } if (ref $_[3]) { $skip = $_[3]; } # extract the last syllable from each word # create a list of regexes for matching rhyming words my @rhyme_re; for (my $i=0; $i<=$#entry; ++$i) { my(@syllables) = last_syllable($entry[$i]); if ($syllables[0]) { if ($strict_rhymes) { # match the same vowel & consonant block at the end # but only if not preceded by any of the same # penultimate consonant blocks push @syllables, last_syllable(@{ $rhymed[$i] || [] }); if (not $alternate_strict) { # preferred method of doing strict rhyme # uses look-behind assertion; requires 5.005 push @rhyme_re, join('', ( map { $_ ? "(?[0]\E)" : () } @syllables ), quotemeta $syllables[0][1] ); } else { # an alternate method of doing strict rhyme # requires reversing the entry, which makes this approach slower # works with pre 5.005 push @rhyme_re, join('', quotemeta(reverse $syllables[0][1]), '(?!(?:', join('|', map { $_ ? quotemeta(reverse $_->[0]) : () } @syllables ), ')', $no_consonant_re, ')' ); } } else { # just match the same vowel & consonant block at the end push @rhyme_re, quotemeta $syllables[0][1]; } } else { push @rhyme_re, undef; } } if (not $mem) { seek(DATAF, 0, 0) or die "Can't seek in $datafile: $!\n"; } my $eval = $mem ? <<' EOT' : <<' EOT'; for (@data) { EOT while () { chomp; EOT if (defined $alliteration) { # code to skip words that don't fit the alliteration $eval .= <<" EOT"; next unless /^$alliteration/i; EOT } if ($strict_rhymes and $alternate_strict) { # code to store the entry in reverse for the regex match $eval .= <<' EOT'; my $reversed = reverse $_; EOT } # generate one if block per entry for (my $i=0; $i<=$#entry; ++$i) { # skip if no entry or unable to find last syllable # (would be no entry if there weren't enough acceptable words # in the first round; for example, alliteration with 'q') if (not $entry[$i] or not defined $rhyme_re[$i]) { $match[$i] = undef; next; } # code to open a block for next # and make sure word is unused so far $eval .= <<" EOT"; { if (\$_ ne \$entry[$i] and not exists \$skip->{\$_}) { EOT if (not $strict_rhymes or not $alternate_strict) { # regular regex match $eval .= <<" EOT"; next unless /\$rhyme_re[$i]\$/o; EOT } else { # alternate strict method requires reversing the entry $eval .= <<" EOT"; next unless \$reversed =~ /^\$rhyme_re[$i]/o; EOT } if (defined $stress[$i]) { # code to match the stress regex $eval .= <<" EOT"; next unless /\$stress[$i]/o; EOT } if ($frequency) { # code to choose by frequency $eval .= <<" EOT"; \$count[$i] += \$f = (/\\t\\+(\\d+)\\t/ && \$1) || 1; if (rand \$count[$i] < \$f) { EOT } else { # code to choose without frequency $eval .= <<" EOT"; \$count[$i]++; if (not int rand \$count[$i]) { EOT } # code to save chosen word $eval .= <<" EOT"; \$match[$i] = \$_; } } } EOT } $eval .= <<' EOT'; } EOT print "# find_rhymes\n", $eval, "\n" if $DUMP; # do it! my $f; eval $eval; die $@ if $@; if (not $mem) { foreach (@match) { chomp $_ if defined $_; } } return @match; } # find_rhymes } BEGIN { # regexes to match iambic words from 1 to 5 syllables my @stress_re = ( # regexes to match words beginning with an unstressed syllable [ qw/ 0 \t[02]\t \t[02][12]\t \t[02][12][02]\t \t(?:[02][12][02][12]|0[012]0[012])\t \t(?:[02][12][02][12][02]|0[012]0[012]0)\t / ], # regexes to match words beginning with a stressed syllable [ qw/ 0 \t[12]\t \t[12][02]\t \t(?:[12][02][12]|[012]0[012])\t \t(?:[12][02][12][02]|[012]0[012]0)\t \t(?:[12][02][12][02][12]|[012]0[012]0[012])\t / ] ); # fill_lines # choose entries at random to fill each line to 10 syllables # first argument is a reference to a hash of options # passed unchanged to choose_entries() # each remaining argument is an entry which ends a line # chooses entries to fill each line, returns a list of lists # includes original entries in return value sub fill_lines { my($opts, @entries) = @_; my @searches; my $entry; foreach $entry (@entries) { my($word, $syllables) = split /\t/, $entry; # syllable position to fill to my $max = 10 - length $syllables; # current syllable position my $pos = 1; my @search; while ($pos <= $max) { # choose number of syllables in next word # distribution: 11 222 33 4 5 my $syl = int(rand 3) + int(rand 3); $syl ||= 5; if ($syl + $pos > $max) { $syl = $max - $pos + 1; } # save appropriate regex for syllable count, position # (odd: unstressed first; even: stressed first) push @search, $stress_re[not $pos & 1][$syl]; $pos += $syl; } # save regexes for current line push @searches, [ @search ]; } # choose all the entries in one pass my @lines = choose_entries($opts, @searches); # include original entries at the ends of the lines for (my $i=0; $i<=$#lines; ++$i) { print join("\n", @{$lines[$i]}), "\n\n" if $VERBOSE; push @{$lines[$i]}, $entries[$i]; } return @lines; } # fill_lines } BEGIN { # regex to match the last syllable in a word # one vowel sound followed by any number of non-vowels my $syll_re = '([^' . join('', "\t", map quotemeta, @vowels) . ']*)' . '([' . join('', map quotemeta, @vowels) . ']' . '[^' . join('', "\n", map quotemeta, @vowels) . ']*)$'; # last_syllable # extract the last syllable from the pronunciation of a word # each argument is an entry from the data file # for each entry, returns an array ref containing the penultimate # consonant block and the ultimate vowel & consonant block # example: chipmunk 10 tSipm@Nk => ['pm', '@nk'] # dies if the last syllable cannot be determined sub last_syllable { my(@entry) = @_; my @syll; foreach (@entry) { if (not defined $_ or not length $_) { push @syll, ''; next; } if (/$syll_re/o) { push @syll, [$1, $2]; } else { die "Can't match last syllable in '$_'\n"; # push @syll, undef; } } return @syll; } # last_syllable } sub options { my %optctl = ( 'verbose' => \$VERBOSE, 'dump' => \$DUMP, 'memory' => \$mem, 'wordlist' => \$datafile, 'alliteration' => \$alliteration, 'rhymes' => \$noblock, 'strict' => \$strict_rhymes, 'Style' => \$style, 'frequency' => \$frequency, 'Frequency' => \$end_frequency, 'help' => \$help, ); Getopt::Long::Configure('no_ignore_case'); GetOptions( \%optctl, 'verbose', 'dump', 'memory', 'wordlist=s', 'alliteration=s', 'rhymes', 'strict', 'Style:s', 'frequency', 'Frequency', 'help|?', ) or (usage(), exit 1); if ($style eq '') { $style = '?'; # no style; show styles help } if (defined $alliteration) { if ($alliteration =~ /[^A-Za-z]/) { die "Argument to --alliteration must be a letter.\n"; } # untaint ($alliteration) = $alliteration =~ /([A-Za-z]+)/; } } sub usage { print <] [--alliteration ] [--rhymes] [--strict] [--Style []] [--frequency] [--Frequency] [--verbose] [--dump] [--memory] --wordfile, -w Specify an alternate data file --alliteration, -a Specify a letter for alliteration Only words beginning with that letter will be used --rhymes, -r Skip words that don't have rhymes (Data file must include the rhymes/no-rhymes column) --strict, -s Require strict rhymes e.g. Don't rhyme 'new' and 'knew' --Style, -S Specify an alternate style With no argument, prints a list of styles --frequency, -f Choose internal words based on word frequency --Frequency, -F Choose ending words based on word frequency (Data file must include frequency column) --verbose, -v Verbose mode; print chosen entries --dump, -d Dump mode; dump generated code --memory, -m Read date file into memory for faster execution EOT } __END__ =pod =head1 NAME B -- create sonnets with correct meter and rhyme =head1 SYNOPSIS B S<[B<-w> I]> S<[B<-a> I]> [B<-r>] [B<-s>] S<[B<-S> I