Created
March 2, 2012 06:02
-
-
Save sumchattering/1956110 to your computer and use it in GitHub Desktop.
Python Word Chainer
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# !/usr/bin/env python | |
# encoding: utf-8 | |
# Written by Sumeru Chatterjee | |
import sys | |
import pdb | |
import os | |
import pprint | |
characters = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z'] | |
def main(): | |
sys.setrecursionlimit(2000) | |
usage = '''\nWord Chainer. | |
\nFinds if a possible chain of operations exists between two words in a list | |
\nUsage\n-------\n%s <path to words file>'''%sys.argv[0] | |
if len(sys.argv)<2: | |
print usage | |
sys.exit(0) | |
wordfile = sys.argv[1]; | |
try: | |
words = open(wordfile).read().splitlines() | |
chaingraph = WordChainGraph(words) | |
except IOError as e: | |
print 'There was a problem reading the file '+wordfile | |
sys.exit(0) | |
while(1): | |
inputwords = raw_input('\nEnter two words seperated by space :').split(' ') | |
if not len(inputwords)==2 or not inputwords[0] or not inputwords[1]: | |
print "Invalid Input" | |
continue | |
first = inputwords[0].lower() | |
second = inputwords[1].lower() | |
if not chaingraph.hasWord(first): | |
print "Word %s does not exist in list"%first | |
continue | |
if not chaingraph.hasWord(second): | |
print "Word %s does not exist in list"%second | |
continue | |
wordchain = chaingraph.findChainBetweenWords(first,second) | |
if len(wordchain): | |
print "The chain between the words is %s"%(",".join(wordchain)) | |
else: | |
print "There is no chain between the given words" | |
class WordChainGraph(object): | |
def __init__(self,words): | |
self.initialize(words) | |
def __str__(self): | |
return "\n\nNodes -> \n" + str(self.nodeMap) + "\nEdges ->\n" + pprint.pformat(self.edgeMap) | |
def __repr__(self): | |
return self.__str__() | |
def hasWord(self,word): | |
return (word in self.nodeMap) | |
def findChainBetweenWords(self,word1,word2): | |
#Get the corresponding nodes | |
startNode = self.nodeMap[word1] | |
endNode = self.nodeMap[word2] | |
wordchain = [] | |
#see if there is a path between them | |
path = self.findPathBetweenNodes(startNode,endNode) | |
if path: | |
for node in path: | |
wordchain.append(node.word) | |
return wordchain | |
def findPathBetweenNodes(self,start, end, path=[]): | |
todo = [[start, [start]]] | |
while 0 < len(todo): | |
(node, path) = todo.pop(0) | |
for next_node in self.edgeMap[node]: | |
if next_node in path: | |
continue | |
elif next_node == end: | |
return path + [next_node] | |
else: | |
todo.append([next_node, path + [next_node]]) | |
def initialize(self,words): | |
print "Processing words...." | |
total = len(words); | |
current = 1 | |
#Create the Nodes and the NodeMap which is a mapping between words and their corresponding node objects | |
self.nodeMap = {} | |
self.edgeMap = {} | |
for word in words: | |
lowerword = word.lower() | |
if lowerword in self.nodeMap: | |
continue; | |
else: | |
node = WordNode(lowerword) | |
self.nodeMap[lowerword] = node | |
#Now create the edgeMap which is a mapping between a node object and a list of all the nodes it is connected to | |
self.edgeMap = {} | |
for word in words: | |
#Show status | |
progress = (float(current)/float(total))*100; | |
sys.stdout.write("\r%f%%"%progress) | |
sys.stdout.flush() | |
current = current+1; | |
lowerword = word.lower() | |
currentNode = self.nodeMap[lowerword] | |
connectedNodes = set() | |
#check all words that can be created by insertions | |
for character in characters: | |
for index in range(0,len(lowerword)+1): | |
#Get a possible word from insertion | |
wordlist = list(lowerword) | |
wordlist.insert(index,character) | |
newword = "".join(wordlist) | |
if(newword!=lowerword) and (newword in self.nodeMap): | |
#get the new node | |
node = self.nodeMap[newword] | |
#add the node to the list of nodes for the current node | |
connectedNodes.add(node) | |
#add the current node to the list of nodes for the node | |
if(node in self.edgeMap): | |
self.edgeMap[node].add(currentNode) | |
else: | |
self.edgeMap[node] = set([currentNode]) | |
#If the index is after the end then dont try replacing | |
if(index==len(lowerword)): | |
continue | |
#get a possible word from replacement | |
wordlist = list(lowerword) | |
wordlist[index] = character | |
newword = "".join(wordlist) | |
if (newword!=lowerword) and (newword in self.nodeMap): | |
#get the new node | |
node = self.nodeMap[newword] | |
#add the node to the list of nodes for the current node | |
connectedNodes.add(node) | |
#add the current node to the list of nodes for the node | |
if(node in self.edgeMap): | |
self.edgeMap[node].add(currentNode) | |
else: | |
self.edgeMap[node]=set([currentNode]) | |
if(currentNode in self.edgeMap): | |
self.edgeMap[currentNode].update(connectedNodes) | |
else: | |
self.edgeMap[currentNode]=connectedNodes | |
class WordNode(object): | |
def __init__(self,word): | |
self.word = word | |
def __str__(self): | |
return "("+self.word+")" | |
def __repr__(self): | |
return self.__str__() | |
if __name__ == "__main__": | |
sys.exit(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment