scastillo · February 24, 2023 16:30 · scastillo · Feb 24, 2023
diff --git a/search.py b/search.py
 """

 In this example, we're using the spacy library to load a pre-trained word embedding model (en_core_web_md), which includes word vectors for many common English words. We define a list of documents to search through, and a search function that takes a query string and computes the cosine similarity between the query vector and each document vector. Finally, we test the search function with a query string and print the most similar document. Note that this is a simple example, and in a real-world application you may want to use a more sophisticated search algorithm or pre-process the documents differently to achieve better results.
 """

 import numpy as np
 import spacy

 # Load the pre-trained word embedding model
 nlp = spacy.load("en_core_web_md")

 # Define a list of documents to search through
 documents = [
    "The quick brown fox jumps over the lazy dog.",
    "A stitch in time saves nine.",
    "The early bird catches the worm.",
    "An apple a day keeps the doctor away.",
    "Actions speak louder than words."
 ]

 # Define a search function that takes a query string and returns the most similar document
 def search(query, documents):
    # Convert the query string to a vector of embeddings
    query_vec = np.mean([nlp(word).vector for word in query.lower().split()], axis=0)
    # Convert each document to a vector of embeddings
    document_vecs = [np.mean([nlp(word).vector for word in doc.lower().split()], axis=0) for doc in documents]
    # Compute the cosine similarity between the query vector and each document vector
    similarities = np.dot(document_vecs, query_vec) / (np.linalg.norm(document_vecs, axis=1) * np.linalg.norm(query_vec))
    # Return the index of the document with the highest similarity score
    return np.argmax(similarities)

 # Test the search function
 query = "The lazy dog jumps over the quick brown fox."
 most_similar_doc_index = search(query, documents)
 most_similar_doc = documents[most_similar_doc_index]
 print("Most similar document to query:", most_similar_doc)
	"""

	In this example, we're using the spacy library to load a pre-trained word embedding model (en_core_web_md), which includes word vectors for many common English words. We define a list of documents to search through, and a search function that takes a query string and computes the cosine similarity between the query vector and each document vector. Finally, we test the search function with a query string and print the most similar document. Note that this is a simple example, and in a real-world application you may want to use a more sophisticated search algorithm or pre-process the documents differently to achieve better results.
	"""

	import numpy as np
	import spacy

	# Load the pre-trained word embedding model
	nlp = spacy.load("en_core_web_md")

	# Define a list of documents to search through
	documents = [
	"The quick brown fox jumps over the lazy dog.",
	"A stitch in time saves nine.",
	"The early bird catches the worm.",
	"An apple a day keeps the doctor away.",
	"Actions speak louder than words."
	]

	# Define a search function that takes a query string and returns the most similar document
	def search(query, documents):
	# Convert the query string to a vector of embeddings
	query_vec = np.mean([nlp(word).vector for word in query.lower().split()], axis=0)
	# Convert each document to a vector of embeddings
	document_vecs = [np.mean([nlp(word).vector for word in doc.lower().split()], axis=0) for doc in documents]
	# Compute the cosine similarity between the query vector and each document vector
	similarities = np.dot(document_vecs, query_vec) / (np.linalg.norm(document_vecs, axis=1) * np.linalg.norm(query_vec))
	# Return the index of the document with the highest similarity score
	return np.argmax(similarities)

	# Test the search function
	query = "The lazy dog jumps over the quick brown fox."
	most_similar_doc_index = search(query, documents)
	most_similar_doc = documents[most_similar_doc_index]
	print("Most similar document to query:", most_similar_doc)