Skip to content

Instantly share code, notes, and snippets.

@wenqiglantz
Last active June 26, 2023 19:52
Show Gist options
  • Save wenqiglantz/aa3e58380eaeb0ee6d838122f1da0a40 to your computer and use it in GitHub Desktop.
Save wenqiglantz/aa3e58380eaeb0ee6d838122f1da0a40 to your computer and use it in GitHub Desktop.
def load_index(directory_path):
documents = SimpleDirectoryReader(directory_path, filename_as_id=True).load_data()
print(f"loaded documents with {len(documents)} pages")
try:
# Rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir="./storage")
# Try to load the index from storage
index = load_index_from_storage(storage_context)
logging.info("Index loaded from storage.")
except FileNotFoundError:
# If index not found, create a new one
logging.info("Index not found. Creating a new one...")
index = GPTVectorStoreIndex.from_documents(documents)
# Persist index to disk
index.storage_context.persist()
logging.info("New index created and persisted to storage.")
# Run refresh_ref_docs function to check for document updates
refreshed_docs = index.refresh_ref_docs(documents, update_kwargs={"delete_kwargs": {'delete_from_docstore': True}})
print(refreshed_docs)
print('Number of newly inserted/refreshed docs: ', sum(refreshed_docs))
index.storage_context.persist()
logging.info("Index refreshed and persisted to storage.")
return index
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment