92 lines
2.8 KiB
Python
92 lines
2.8 KiB
Python
#!/usr/bin/env python3
|
|
|
|
import sys as trans
|
|
from pprint import pp
|
|
|
|
# We're taking advantage of the fact that toki pona pronounciation is very
|
|
# orderly. We can determine rhymes entirely orthographically.
|
|
VOWELS = ["a", "e", "i", "o", "u"]
|
|
|
|
words = []
|
|
rhyme_classes = {}
|
|
rhymes = {}
|
|
|
|
def matching_substring_r(str1, str2):
|
|
"""Return the longest matching substring of two strings which touches the
|
|
end of both."""
|
|
shortest_len = min(len(str1), len(str2))
|
|
if shortest_len == 0:
|
|
return ""
|
|
|
|
current_match = ""
|
|
current_index = -1
|
|
while shortest_len + (current_index + 1) > 0:
|
|
if str1[current_index] == str2[current_index]:
|
|
current_match = str1[current_index:]
|
|
else:
|
|
break
|
|
current_index -= 1
|
|
return current_match
|
|
|
|
# Need a dict file to work from
|
|
if not len(trans.argv) == 2:
|
|
print("Please provide a single dictionary file to work from.")
|
|
trans.exit(1)
|
|
|
|
# Read words, one per line
|
|
with open(trans.argv[1]) as dictionary:
|
|
for line in dictionary.readlines():
|
|
word = line.rstrip("\n")
|
|
words.append(word)
|
|
|
|
print("Read " + str(len(words)) + " words.")
|
|
|
|
for word in words:
|
|
rhymes[word] = []
|
|
|
|
for possible_rhyme in words:
|
|
# lili doesn't rhyme with lili
|
|
if word == possible_rhyme:
|
|
continue
|
|
|
|
# find the rhyme class represented by this relationship,
|
|
# which is a nonempty substring touching the end of the words
|
|
# which starts with a vowel
|
|
rhyme_class = matching_substring_r(word, possible_rhyme)
|
|
while len(rhyme_class) > 1 and\
|
|
rhyme_class[0] not in VOWELS:
|
|
rhyme_class = rhyme_class[1:]
|
|
|
|
# i don't care that lili rhymes with monsi, that's too much
|
|
if not len(rhyme_class) > 1:
|
|
continue
|
|
|
|
# we know that these two rhyme
|
|
rhymes[word].append(possible_rhyme)
|
|
|
|
if rhyme_class not in rhyme_classes:
|
|
rhyme_classes[rhyme_class] = []
|
|
|
|
# we add only this word; the other will be added in its own outer loop
|
|
if word not in rhyme_classes[rhyme_class]:
|
|
rhyme_classes[rhyme_class].append(word)
|
|
|
|
rhyme_classes_list = list(rhyme_classes.keys())
|
|
rhyme_classes_list.sort()
|
|
rhymes_list = list(rhymes.keys())
|
|
rhymes_list.sort()
|
|
unrhymed_words = list(filter(lambda word: len(rhymes[word]) == 0, rhymes.keys()))
|
|
unrhymed_words.sort()
|
|
|
|
print("Discovered " + str(len(rhyme_classes_list)) + " rhyme classes.")
|
|
print("Rhyme classes: " + ", ".join(rhyme_classes_list))
|
|
print("\nRhymes")
|
|
for word in rhymes_list:
|
|
rhymes_for_word_list = rhymes[word]
|
|
rhymes_for_word_list.sort()
|
|
if len(rhymes_for_word_list) > 0:
|
|
print(word + ": " + ", ".join(rhymes_for_word_list))
|
|
|
|
print("\nUnrhymed words: " + ", ".join(unrhymed_words))
|
|
|