#!/usr/bin/env python3 import sys as trans from pprint import pp # We're taking advantage of the fact that toki pona pronounciation is very # orderly. We can determine rhymes entirely orthographically. VOWELS = ["a", "e", "i", "o", "u"] words = [] rhyme_classes = {} rhymes = {} def matching_substring_r(str1, str2): """Return the longest matching substring of two strings which touches the end of both.""" shortest_len = min(len(str1), len(str2)) if shortest_len == 0: return "" current_match = "" current_index = -1 while shortest_len + (current_index + 1) > 0: if str1[current_index] == str2[current_index]: current_match = str1[current_index:] else: break current_index -= 1 return current_match # Need a dict file to work from if not len(trans.argv) == 2: print("Please provide a single dictionary file to work from.") trans.exit(1) # Read words, one per line with open(trans.argv[1]) as dictionary: for line in dictionary.readlines(): word = line.rstrip("\n") words.append(word) print("Read " + str(len(words)) + " words.") for word in words: rhymes[word] = [] for possible_rhyme in words: # lili doesn't rhyme with lili if word == possible_rhyme: continue # find the rhyme class represented by this relationship, # which is a nonempty substring touching the end of the words # which starts with a vowel rhyme_class = matching_substring_r(word, possible_rhyme) while len(rhyme_class) > 1 and\ rhyme_class[0] not in VOWELS: rhyme_class = rhyme_class[1:] # i don't care that lili rhymes with monsi, that's too much if not len(rhyme_class) > 1: continue # we know that these two rhyme rhymes[word].append(possible_rhyme) if rhyme_class not in rhyme_classes: rhyme_classes[rhyme_class] = [] # we add only this word; the other will be added in its own outer loop if word not in rhyme_classes[rhyme_class]: rhyme_classes[rhyme_class].append(word) rhyme_classes_list = list(rhyme_classes.keys()) rhyme_classes_list.sort() rhymes_list = list(rhymes.keys()) rhymes_list.sort() unrhymed_words = list(filter(lambda word: len(rhymes[word]) == 0, rhymes.keys())) unrhymed_words.sort() print("Discovered " + str(len(rhyme_classes_list)) + " rhyme classes.") print("Rhyme classes: " + ", ".join(rhyme_classes_list)) print("\nRhymes") for word in rhymes_list: rhymes_for_word_list = rhymes[word] rhymes_for_word_list.sort() if len(rhymes_for_word_list) > 0: print(word + ": " + ", ".join(rhymes_for_word_list)) print("\nUnrhymed words: " + ", ".join(unrhymed_words))