// Program to find ambiguous words using "T9" phone entry method. // Users of this class will only use the encipher and decipher functions. class T9 { class var initialized = false; // dict is a dictionary of words, keyed by their numeric values, values // are an array of words that this number maps to. class var dict = new dict // Dictionary of mappings from letters to numbers. class var phonedict = new dict class initialize[] := { phonedict@"a" = "2" phonedict@"b" = "2" phonedict@"c" = "2" phonedict@"d" = "3" phonedict@"e" = "3" phonedict@"f" = "3" phonedict@"g" = "4" phonedict@"h" = "4" phonedict@"i" = "4" phonedict@"j" = "5" phonedict@"k" = "5" phonedict@"l" = "5" phonedict@"m" = "6" phonedict@"n" = "6" phonedict@"o" = "6" phonedict@"p" = "7" phonedict@"q" = "7" phonedict@"r" = "7" phonedict@"s" = "7" phonedict@"t" = "8" phonedict@"u" = "8" phonedict@"v" = "8" phonedict@"w" = "9" phonedict@"x" = "9" phonedict@"y" = "9" phonedict@"z" = "9" // Effectively, this is a "set" of the words we've seen. // TODO: need to implement efficient sets. usedDict = new dict // Paths to several Moby wordlists (not included) // The wordlist files are part of the Moby wordlist project, available at: // http://icon.shef.ac.uk/Moby/ files = ["file://///home/eliasen/prog/mobydict/mwords/singlewords.txt", "file://///home/eliasen/prog/mobydict/mwords/compoundwords.txt", "file://///home/eliasen/prog/mobydict/mwords/names.txt", "file://///home/eliasen/prog/mobydict/mwords/places.txt"] // Load in the wordfiles and convert words to their numeric values for file = files numeric[read[file], usedDict] // Read in wordlists initialized = true } //orig = read["file:///c:/prog/frink/rabbitorig.txt"] // Decipher a string. class decipher[string] := { if (! initialized) initialize[]; string =~ %s/(\d+)/decipherNum[$1]/gse } //println[orig] // Function to turn a text into its string representations // and insert each word mapping into a dictionary. // class numeric[string, usedDict] := { words = split[%r/\s+/s, string] // Split into words for word = words { if word =~ %r/[^A-Za-z]/ // Discard non-alphabetic words. next if word =~ %r/[A-Z]{2,}/ // Two or more uppercase letters? Skip 'em. next capWord = lowercase[word] // Make all keys uppercase if (usedDict@capWord) // Already seen it? next usedDict@capWord = true // Mark word as used // Make numeric value. This could be done more concisely if we // had a map[] function. ret = capWord ret =~ %s/([a-z])/phonedict@$1/ges // println["$capWord -> $ret"] // See if number exists already if (dict@ret != undef) { dict@ret.push[word] // Already exists // println[dict@ret] // Comment in to see hash collisions } else dict@ret = [word] // Doesn't exist, create a one-element array } } // Function to do enciphering, for reference. Note that it's a *whole* // lot easier to encipher than decipher. class encipher[str] := { if !initialized initialize[]; lc[str] =~ %s/([A-Za-z])/phonedict@$1/ges } // Function to decipher a single number to its probable word(s) // This should be treated as a private function. class decipherNum[num] := { rev = dict@num if (! rev) return num // Word not found, print numeric value else if (length[rev] == 1) return rev@0 // Only one word; print without brackets else return rev // More than one mapping, print array } }