Last active
June 26, 2023 19:52
-
-
Save wenqiglantz/aa3e58380eaeb0ee6d838122f1da0a40 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def load_index(directory_path): | |
documents = SimpleDirectoryReader(directory_path, filename_as_id=True).load_data() | |
print(f"loaded documents with {len(documents)} pages") | |
try: | |
# Rebuild storage context | |
storage_context = StorageContext.from_defaults(persist_dir="./storage") | |
# Try to load the index from storage | |
index = load_index_from_storage(storage_context) | |
logging.info("Index loaded from storage.") | |
except FileNotFoundError: | |
# If index not found, create a new one | |
logging.info("Index not found. Creating a new one...") | |
index = GPTVectorStoreIndex.from_documents(documents) | |
# Persist index to disk | |
index.storage_context.persist() | |
logging.info("New index created and persisted to storage.") | |
# Run refresh_ref_docs function to check for document updates | |
refreshed_docs = index.refresh_ref_docs(documents, update_kwargs={"delete_kwargs": {'delete_from_docstore': True}}) | |
print(refreshed_docs) | |
print('Number of newly inserted/refreshed docs: ', sum(refreshed_docs)) | |
index.storage_context.persist() | |
logging.info("Index refreshed and persisted to storage.") | |
return index |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment