sumchattering · March 2, 2012 06:02
diff --git a/wordchain.py b/wordchain.py
 # !/usr/bin/env python
 # encoding: utf-8
 # Written by Sumeru Chatterjee

 import sys
 import pdb
 import os
 import pprint

 characters = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']

 def main():
 		
 		sys.setrecursionlimit(2000)
 		
 		usage = '''\nWord Chainer.
 		
 		\nFinds if a possible chain of operations exists between two words in a list
 		\nUsage\n-------\n%s  <path to words file>'''%sys.argv[0]

 		if len(sys.argv)<2:
 			print usage
 			sys.exit(0)
 		
 		wordfile = sys.argv[1];
 		try:
 			words = open(wordfile).read().splitlines()	
 			chaingraph = WordChainGraph(words)	
 		except IOError as e:
 			print 'There was a problem reading the file '+wordfile
 			sys.exit(0)
 		
 		while(1): 
 			
 			inputwords = raw_input('\nEnter two words seperated by space :').split(' ')
 			if not len(inputwords)==2 or not inputwords[0] or not inputwords[1]:
 				print "Invalid Input"
 				continue
 				
 			first = inputwords[0].lower()
 			second = inputwords[1].lower()	
 				
 			if not chaingraph.hasWord(first):
 				print "Word %s does not exist in list"%first
 				continue
 				
 			if not chaingraph.hasWord(second):
 				print "Word %s does not exist in list"%second
 				continue
 			
 			wordchain = chaingraph.findChainBetweenWords(first,second)
 			
 			if len(wordchain):
 				print "The chain between the words is %s"%(",".join(wordchain))
 			else: 
 				print "There is no chain between the given words"
 							

 class WordChainGraph(object):
 		
 		def __init__(self,words):
 			self.initialize(words)
 		def __str__(self):
 			return "\n\nNodes -> \n" + str(self.nodeMap) + "\nEdges ->\n" + pprint.pformat(self.edgeMap)
 		def __repr__(self):
 			return self.__str__()
 			
 		def hasWord(self,word):
 			return (word in self.nodeMap)
 			
 		def findChainBetweenWords(self,word1,word2):
 			
 			#Get the corresponding nodes  
 			startNode = self.nodeMap[word1]
 			endNode = self.nodeMap[word2]
 			
 			wordchain = []
 			#see if there is a path between them
 			path = self.findPathBetweenNodes(startNode,endNode)
 			if path:
 				for node in path:
 					wordchain.append(node.word)
 			return wordchain
 			
 		def findPathBetweenNodes(self,start, end, path=[]):
 			
 			todo = [[start, [start]]]
 			
 			while 0 < len(todo):
 				(node, path) = todo.pop(0)
 				for next_node in self.edgeMap[node]:
 					if next_node in path:
 						continue
 					elif next_node == end:
 						return path + [next_node]
 					else:
 						todo.append([next_node, path + [next_node]])
 						
 		def initialize(self,words):
 			
 			print "Processing words...."
 			total = len(words);
 			current = 1
 			
 			#Create the Nodes and the NodeMap which is a mapping between words and their corresponding node objects
 			self.nodeMap = {}
 			self.edgeMap = {}
 			for word in words:
 				lowerword = word.lower()
 				if lowerword in self.nodeMap:
 					continue;
 				else:
 					node = WordNode(lowerword)
 					self.nodeMap[lowerword] = node
 			
 			#Now create the edgeMap which is a mapping between a node object and a list of all the nodes it is connected to
 			self.edgeMap = {}
 			for word in words:
 				
 				#Show status
 				progress = (float(current)/float(total))*100;
 				sys.stdout.write("\r%f%%"%progress)    
 				sys.stdout.flush()
 				current = current+1;
 				
 				lowerword = word.lower()
 				currentNode = self.nodeMap[lowerword]
 				connectedNodes = set()

 				#check all words that can be created by insertions
 				for character in characters:
 					for index in range(0,len(lowerword)+1):
 						
 						#Get a possible word from insertion
 						wordlist = list(lowerword)
 						wordlist.insert(index,character)
 						newword = "".join(wordlist)	
 						
 						if(newword!=lowerword) and (newword in self.nodeMap):
 							#get the new node
 							node = self.nodeMap[newword]
 							
 							#add the node to the list of nodes for the current node
 							connectedNodes.add(node)
 		
 							#add the current node to the list of nodes for the node
 							if(node in self.edgeMap):
 								self.edgeMap[node].add(currentNode)
 							else:
 								self.edgeMap[node] = set([currentNode])
 						
 						#If the index is after the end then dont try replacing
 						if(index==len(lowerword)):
 							continue
 							
 						#get a possible word from replacement
 						wordlist = list(lowerword)
 						wordlist[index] = character
 						newword = "".join(wordlist)
 						
 						if (newword!=lowerword) and (newword in self.nodeMap):
 							#get the new node
 							node = self.nodeMap[newword]
 							
 							#add the node to the list of nodes for the current node
 							connectedNodes.add(node)
 							
 							#add the current node to the list of nodes for the node
 							if(node in self.edgeMap):
 								self.edgeMap[node].add(currentNode)
 							else:
 								self.edgeMap[node]=set([currentNode])	
 								
 				
 				if(currentNode in self.edgeMap):
 					self.edgeMap[currentNode].update(connectedNodes)
 				else:
 					self.edgeMap[currentNode]=connectedNodes	
 				
 				
 		
 class WordNode(object):
 		def __init__(self,word):
 			self.word = word		
 		def __str__(self):
 			return "("+self.word+")"
 		def __repr__(self):
 			return self.__str__()
 			
 			
 if __name__ == "__main__":
 		sys.exit(main())
	# !/usr/bin/env python
	# encoding: utf-8
	# Written by Sumeru Chatterjee

	import sys
	import pdb
	import os
	import pprint

	characters = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']

	def main():

	sys.setrecursionlimit(2000)

	usage = '''\nWord Chainer.

	\nFinds if a possible chain of operations exists between two words in a list
	\nUsage\n-------\n%s <path to words file>'''%sys.argv[0]

	if len(sys.argv)<2:
	print usage
	sys.exit(0)

	wordfile = sys.argv[1];
	try:
	words = open(wordfile).read().splitlines()
	chaingraph = WordChainGraph(words)
	except IOError as e:
	print 'There was a problem reading the file '+wordfile
	sys.exit(0)

	while(1):

	inputwords = raw_input('\nEnter two words seperated by space :').split(' ')
	if not len(inputwords)==2 or not inputwords[0] or not inputwords[1]:
	print "Invalid Input"
	continue

	first = inputwords[0].lower()
	second = inputwords[1].lower()

	if not chaingraph.hasWord(first):
	print "Word %s does not exist in list"%first
	continue

	if not chaingraph.hasWord(second):
	print "Word %s does not exist in list"%second
	continue

	wordchain = chaingraph.findChainBetweenWords(first,second)

	if len(wordchain):
	print "The chain between the words is %s"%(",".join(wordchain))
	else:
	print "There is no chain between the given words"


	class WordChainGraph(object):

	def __init__(self,words):
	self.initialize(words)
	def __str__(self):
	return "\n\nNodes -> \n" + str(self.nodeMap) + "\nEdges ->\n" + pprint.pformat(self.edgeMap)
	def __repr__(self):
	return self.__str__()

	def hasWord(self,word):
	return (word in self.nodeMap)

	def findChainBetweenWords(self,word1,word2):

	#Get the corresponding nodes
	startNode = self.nodeMap[word1]
	endNode = self.nodeMap[word2]

	wordchain = []
	#see if there is a path between them
	path = self.findPathBetweenNodes(startNode,endNode)
	if path:
	for node in path:
	wordchain.append(node.word)
	return wordchain

	def findPathBetweenNodes(self,start, end, path=[]):

	todo = [[start, [start]]]

	while 0 < len(todo):
	(node, path) = todo.pop(0)
	for next_node in self.edgeMap[node]:
	if next_node in path:
	continue
	elif next_node == end:
	return path + [next_node]
	else:
	todo.append([next_node, path + [next_node]])

	def initialize(self,words):

	print "Processing words...."
	total = len(words);
	current = 1

	#Create the Nodes and the NodeMap which is a mapping between words and their corresponding node objects
	self.nodeMap = {}
	self.edgeMap = {}
	for word in words:
	lowerword = word.lower()
	if lowerword in self.nodeMap:
	continue;
	else:
	node = WordNode(lowerword)
	self.nodeMap[lowerword] = node

	#Now create the edgeMap which is a mapping between a node object and a list of all the nodes it is connected to
	self.edgeMap = {}
	for word in words:

	#Show status
	progress = (float(current)/float(total))*100;
	sys.stdout.write("\r%f%%"%progress)
	sys.stdout.flush()
	current = current+1;

	lowerword = word.lower()
	currentNode = self.nodeMap[lowerword]
	connectedNodes = set()

	#check all words that can be created by insertions
	for character in characters:
	for index in range(0,len(lowerword)+1):

	#Get a possible word from insertion
	wordlist = list(lowerword)
	wordlist.insert(index,character)
	newword = "".join(wordlist)

	if(newword!=lowerword) and (newword in self.nodeMap):
	#get the new node
	node = self.nodeMap[newword]

	#add the node to the list of nodes for the current node
	connectedNodes.add(node)

	#add the current node to the list of nodes for the node
	if(node in self.edgeMap):
	self.edgeMap[node].add(currentNode)
	else:
	self.edgeMap[node] = set([currentNode])

	#If the index is after the end then dont try replacing
	if(index==len(lowerword)):
	continue

	#get a possible word from replacement
	wordlist = list(lowerword)
	wordlist[index] = character
	newword = "".join(wordlist)

	if (newword!=lowerword) and (newword in self.nodeMap):
	#get the new node
	node = self.nodeMap[newword]

	#add the node to the list of nodes for the current node
	connectedNodes.add(node)

	#add the current node to the list of nodes for the node
	if(node in self.edgeMap):
	self.edgeMap[node].add(currentNode)
	else:
	self.edgeMap[node]=set([currentNode])


	if(currentNode in self.edgeMap):
	self.edgeMap[currentNode].update(connectedNodes)
	else:
	self.edgeMap[currentNode]=connectedNodes



	class WordNode(object):
	def __init__(self,word):
	self.word = word
	def __str__(self):
	return "("+self.word+")"
	def __repr__(self):
	return self.__str__()


	if __name__ == "__main__":
	sys.exit(main())