Rhyme generation script

2025-03-06 16:48:42 -06:00 · 2025-03-06 16:48:42 -06:00 · 2d80179a3b
parent da94dd9c10
commit 2d80179a3b
1 changed files with 69 additions and 0 deletions
--- a/gen_rhymes.py
+++ b/gen_rhymes.py
@ -0,0 +1,69 @@
+#!/usr/bin/env python3
+
+import sys as trans
+from pprint import pp
+
+# We're taking advantage of the fact that toki pona pronounciation is very
+# orderly. We can determine rhymes entirely lexicographically.
+VOWELS = ["a", "e", "i", "o", "u"]
+
+words = []
+rhyme_classes = {}
+rhymes = {}
+
+def matching_substring_r(str1, str2):
+    shortest_len = min(len(str1), len(str2))
+    if shortest_len == 0:
+        return ""
+
+    current_match = ""
+    current_index = -1
+    while shortest_len + (current_index + 1) > 0:
+        if str1[current_index] == str2[current_index]:
+            current_match = str1[current_index:]
+        else:
+            break
+        current_index -= 1
+    return current_match
+
+if not len(trans.argv) == 2:
+    print("Please provide a single dictionary file to work from.")
+
+with open(trans.argv[1]) as dictionary:
+    for line in dictionary.readlines():
+        word = line.rstrip("\n")
+        words.append(word)
+
+print("Read " + str(len(words)) + " words.")
+
+for word in words:
+    rhymes[word] = []
+
+    for possible_rhyme in words:
+        if word == possible_rhyme:
+            continue
+        rhyme_class = matching_substring_r(word, possible_rhyme)
+        while len(rhyme_class) > 1 and\
+                rhyme_class[0] not in VOWELS:
+                    rhyme_class = rhyme_class[1:]
+
+        if not len(rhyme_class) > 1:
+            continue
+
+        rhymes[word].append(possible_rhyme)
+
+        if rhyme_class not in rhyme_classes:
+            rhyme_classes[rhyme_class] = []
+
+        if word not in rhyme_classes[rhyme_class]:
+            rhyme_classes[rhyme_class].append(word)
+
+print("Discovered " + str(len(list(rhyme_classes.keys()))) + " rhyme classes.")
+print("Rhyme classes: " + ", ".join(list(rhyme_classes.keys())))
+print("\nRhymes")
+for word in rhymes.keys():
+    if len(rhymes[word]) > 0:
+        print(word + ": " + ", ".join(list(rhymes[word])))
+
+print("\nUnrhymed words: " + ", ".join(list(filter(lambda word: len(rhymes[word]) == 0, rhymes.keys()))))
+