From 2d80179a3b29b40d45d6d652714d1d1dee7ae14d Mon Sep 17 00:00:00 2001 From: Leonora Tindall Date: Thu, 6 Mar 2025 16:48:42 -0600 Subject: [PATCH] Rhyme generation script --- gen_rhymes.py | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 gen_rhymes.py diff --git a/gen_rhymes.py b/gen_rhymes.py new file mode 100644 index 0000000..2c0c5ef --- /dev/null +++ b/gen_rhymes.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 + +import sys as trans +from pprint import pp + +# We're taking advantage of the fact that toki pona pronounciation is very +# orderly. We can determine rhymes entirely lexicographically. +VOWELS = ["a", "e", "i", "o", "u"] + +words = [] +rhyme_classes = {} +rhymes = {} + +def matching_substring_r(str1, str2): + shortest_len = min(len(str1), len(str2)) + if shortest_len == 0: + return "" + + current_match = "" + current_index = -1 + while shortest_len + (current_index + 1) > 0: + if str1[current_index] == str2[current_index]: + current_match = str1[current_index:] + else: + break + current_index -= 1 + return current_match + +if not len(trans.argv) == 2: + print("Please provide a single dictionary file to work from.") + +with open(trans.argv[1]) as dictionary: + for line in dictionary.readlines(): + word = line.rstrip("\n") + words.append(word) + +print("Read " + str(len(words)) + " words.") + +for word in words: + rhymes[word] = [] + + for possible_rhyme in words: + if word == possible_rhyme: + continue + rhyme_class = matching_substring_r(word, possible_rhyme) + while len(rhyme_class) > 1 and\ + rhyme_class[0] not in VOWELS: + rhyme_class = rhyme_class[1:] + + if not len(rhyme_class) > 1: + continue + + rhymes[word].append(possible_rhyme) + + if rhyme_class not in rhyme_classes: + rhyme_classes[rhyme_class] = [] + + if word not in rhyme_classes[rhyme_class]: + rhyme_classes[rhyme_class].append(word) + +print("Discovered " + str(len(list(rhyme_classes.keys()))) + " rhyme classes.") +print("Rhyme classes: " + ", ".join(list(rhyme_classes.keys()))) +print("\nRhymes") +for word in rhymes.keys(): + if len(rhymes[word]) > 0: + print(word + ": " + ", ".join(list(rhymes[word]))) + +print("\nUnrhymed words: " + ", ".join(list(filter(lambda word: len(rhymes[word]) == 0, rhymes.keys())))) +