Comments

2025-03-06 17:01:57 -06:00 · 2025-03-06 17:01:57 -06:00 · f4101c76e7
parent 99f2f17dfa
commit f4101c76e7
1 changed files with 14 additions and 1 deletions
--- a/gen_rhymes.py
+++ b/gen_rhymes.py
@ -4,7 +4,7 @@ import sys as trans
 from pprint import pp

 # We're taking advantage of the fact that toki pona pronounciation is very
-# orderly. We can determine rhymes entirely lexicographically.
+# orderly. We can determine rhymes entirely lexically.
 VOWELS = ["a", "e", "i", "o", "u"]

 words = []
@ -12,6 +12,8 @@ rhyme_classes = {}
 rhymes = {}

 def matching_substring_r(str1, str2):
+    """Return the longest matching substring of two strings which touches the
+    end of both."""
    shortest_len = min(len(str1), len(str2))
    if shortest_len == 0:
        return ""
@ -26,9 +28,12 @@ def matching_substring_r(str1, str2):
        current_index -= 1
    return current_match

+# Need a dict file to work from
 if not len(trans.argv) == 2:
    print("Please provide a single dictionary file to work from.")
+    trans.exit(1)

+# Read words, one per line
 with open(trans.argv[1]) as dictionary:
    for line in dictionary.readlines():
        word = line.rstrip("\n")
@ -40,21 +45,29 @@ for word in words:
    rhymes[word] = []

    for possible_rhyme in words:
+        # lili doesn't rhyme with lili
        if word == possible_rhyme:
            continue
+
+        # find the rhyme class represented by this relationship,
+        # which is a nonempty substring touching the end of the words
+        # which starts with a vowel
        rhyme_class = matching_substring_r(word, possible_rhyme)
        while len(rhyme_class) > 1 and\
                rhyme_class[0] not in VOWELS:
                    rhyme_class = rhyme_class[1:]

+        # i don't care that lili rhymes with monsi, that's too much
        if not len(rhyme_class) > 1:
            continue

+        # we know that these two rhyme
        rhymes[word].append(possible_rhyme)

        if rhyme_class not in rhyme_classes:
            rhyme_classes[rhyme_class] = []

+        # we add only this word; the other will be added in its own outer loop
        if word not in rhyme_classes[rhyme_class]:
            rhyme_classes[rhyme_class].append(word)