Rhyme generation script
This commit is contained in:
parent
da94dd9c10
commit
2d80179a3b
|
@ -0,0 +1,69 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import sys as trans
|
||||
from pprint import pp
|
||||
|
||||
# We're taking advantage of the fact that toki pona pronounciation is very
|
||||
# orderly. We can determine rhymes entirely lexicographically.
|
||||
VOWELS = ["a", "e", "i", "o", "u"]
|
||||
|
||||
words = []
|
||||
rhyme_classes = {}
|
||||
rhymes = {}
|
||||
|
||||
def matching_substring_r(str1, str2):
|
||||
shortest_len = min(len(str1), len(str2))
|
||||
if shortest_len == 0:
|
||||
return ""
|
||||
|
||||
current_match = ""
|
||||
current_index = -1
|
||||
while shortest_len + (current_index + 1) > 0:
|
||||
if str1[current_index] == str2[current_index]:
|
||||
current_match = str1[current_index:]
|
||||
else:
|
||||
break
|
||||
current_index -= 1
|
||||
return current_match
|
||||
|
||||
if not len(trans.argv) == 2:
|
||||
print("Please provide a single dictionary file to work from.")
|
||||
|
||||
with open(trans.argv[1]) as dictionary:
|
||||
for line in dictionary.readlines():
|
||||
word = line.rstrip("\n")
|
||||
words.append(word)
|
||||
|
||||
print("Read " + str(len(words)) + " words.")
|
||||
|
||||
for word in words:
|
||||
rhymes[word] = []
|
||||
|
||||
for possible_rhyme in words:
|
||||
if word == possible_rhyme:
|
||||
continue
|
||||
rhyme_class = matching_substring_r(word, possible_rhyme)
|
||||
while len(rhyme_class) > 1 and\
|
||||
rhyme_class[0] not in VOWELS:
|
||||
rhyme_class = rhyme_class[1:]
|
||||
|
||||
if not len(rhyme_class) > 1:
|
||||
continue
|
||||
|
||||
rhymes[word].append(possible_rhyme)
|
||||
|
||||
if rhyme_class not in rhyme_classes:
|
||||
rhyme_classes[rhyme_class] = []
|
||||
|
||||
if word not in rhyme_classes[rhyme_class]:
|
||||
rhyme_classes[rhyme_class].append(word)
|
||||
|
||||
print("Discovered " + str(len(list(rhyme_classes.keys()))) + " rhyme classes.")
|
||||
print("Rhyme classes: " + ", ".join(list(rhyme_classes.keys())))
|
||||
print("\nRhymes")
|
||||
for word in rhymes.keys():
|
||||
if len(rhymes[word]) > 0:
|
||||
print(word + ": " + ", ".join(list(rhymes[word])))
|
||||
|
||||
print("\nUnrhymed words: " + ", ".join(list(filter(lambda word: len(rhymes[word]) == 0, rhymes.keys()))))
|
||||
|
Loading…
Reference in New Issue