Last active
June 8, 2023 09:48
-
-
Save ashbuilds/3f821cb6c32947c0023047108a816208 to your computer and use it in GitHub Desktop.
The NotionPageAI class enables integration between Notion pages and AI-powered functionalities. It provides methods for generating embeddings, storing them in a vectorDB, and performing conversational question-answering based on indexed Notion pages.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { Client as NotionClient } from '@notionhq/client'; | |
import { createClient, SupabaseClient } from '@supabase/supabase-js'; | |
import { ConversationalRetrievalQAChain } from 'langchain/chains'; | |
import { OpenAIEmbeddings } from 'langchain/embeddings/openai'; | |
import { OpenAIChat } from 'langchain/llms/openai'; | |
import { BufferMemory } from 'langchain/memory'; | |
import { ContextualCompressionRetriever } from 'langchain/retrievers/contextual_compression'; | |
import { LLMChainExtractor } from 'langchain/retrievers/document_compressors/chain_extract'; | |
import { ChainValues, LLMResult } from 'langchain/schema'; | |
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter'; | |
import { SupabaseLibArgs, SupabaseVectorStore } from 'langchain/vectorstores/supabase'; | |
import { NextFetchEvent, NextRequest, NextResponse } from 'next/server'; | |
import { NotionToMarkdown } from 'notion-to-md'; | |
const GPT_EMBEDDING_MODEL = 'text-embedding-ada-002'; | |
const GPT_CHAT_MODEL = 'gpt-3.5-turbo'; | |
const PG_VECTOR_DB_TABLE = 'documents'; | |
const PG_FUNC_QUERY_NAME = 'match_documents'; | |
/** | |
* The `NotionPageAI` class enables integration between Notion pages and AI-powered functionalities. | |
* It provides methods for generating embeddings, storing them in a vectorDB, and performing conversational | |
* question-answering based on indexed Notion pages. | |
*/ | |
class NotionPageAI { | |
// Instances of various clients and utilities | |
notionClient: NotionClient; | |
supabaseClient: SupabaseClient; | |
chatGPTModel: OpenAIChat; | |
openAIEmbeddings: OpenAIEmbeddings; | |
dbConfig: SupabaseLibArgs; | |
baseCompressor: LLMChainExtractor; | |
notionToMarkdown: NotionToMarkdown; | |
/** | |
* Constructs a new instance of the `NotionPageAI` class. | |
* Initialize clients, models, and utilities required for integration. | |
*/ | |
constructor() { | |
// Initialize Notion client with the provided API key - please refer https://developers.notion.com/docs/create-a-notion-integration | |
this.notionClient = new NotionClient({ auth: process.env.NOTION_API_KEY }); | |
// Initialize NotionToMarkdown converter | |
this.notionToMarkdown = new NotionToMarkdown({ notionClient: this.notionClient }); | |
// Initialize Supabase client with the provided URL and private key | |
this.supabaseClient = createClient( | |
process.env.NEXT_PUBLIC_SUPABASE_URL, | |
process.env.SUPABASE_PRIVATE_KEY, | |
); | |
// Initialize OpenAI ChatGPT model with a given configuration and optional logging callbacks | |
this.chatGPTModel = new OpenAIChat({ | |
modelName: GPT_CHAT_MODEL, | |
temperature: 0, | |
callbacks: [ | |
// Logging callbacks for better visibility during language model requests | |
{ | |
handleLLMStart: async (llm, prompts: string[]) => { | |
console.log(JSON.stringify(prompts, null, 2)); | |
}, | |
handleLLMEnd: async (output: LLMResult) => { | |
console.log(JSON.stringify(output, null, 2)); | |
}, | |
handleLLMError: async (err: Error) => { | |
console.error(err); | |
}, | |
}, | |
], | |
}); | |
// Initialize OpenAI Embeddings client | |
this.openAIEmbeddings = new OpenAIEmbeddings({ modelName: GPT_EMBEDDING_MODEL }); | |
// Configure Supabase vectorDB - please refer https://supabase.com/docs/guides/ai/langchain | |
this.dbConfig = { | |
client: this.supabaseClient, | |
tableName: PG_VECTOR_DB_TABLE, | |
queryName: PG_FUNC_QUERY_NAME, | |
}; | |
// Initialize the base compressor for contextual retrieval | |
this.baseCompressor = LLMChainExtractor.fromLLM(this.chatGPTModel); | |
} | |
/** | |
* Checks if embeddings for a given pageId already exist in the supabase vectorDB table. | |
* @param pageId - The ID of the Notion page. | |
* @returns A boolean indicating whether the embeddings exist or not. | |
*/ | |
async checkIfIndexExist({ pageId }): Promise<boolean> { | |
// Perform a query to check if embeddings with the specified pageId exist | |
const embeddings = await this.supabaseClient | |
.from(PG_VECTOR_DB_TABLE) | |
.select() | |
.eq('metadata ->> pageId', pageId); | |
return embeddings.data?.length > 0; | |
} | |
/** | |
* Generates embeddings for a given pageId of a Notion page and stores them in the supabase vectorDB. | |
* This method should be run once for a pageId before performing queries. | |
* @param pageId - The ID of the Notion page. | |
* @param chunkSize - The size of the text chunks for processing. Default: 1000. | |
* @returns A success message if indexing is completed successfully. | |
* @throws An error if the pageId is already indexed. | |
*/ | |
async createNotionIndex({ pageId, chunkSize = 1000 }): Promise<string> { | |
// Check if embeddings for the pageId already exist | |
if (await this.checkIfIndexExist({ pageId })) { | |
throw new Error('Index already exists!'); | |
} | |
// Convert the Notion page to Markdown | |
const markdownBlocks = await this.notionToMarkdown.pageToMarkdown(pageId); | |
const { parent: markdownString } = this.notionToMarkdown.toMarkdownString(markdownBlocks); | |
// Split the Markdown text into chunks for processing | |
const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize }); | |
const docs = await textSplitter.splitText(markdownString); | |
try { | |
// Store the embeddings in the vectorDB using SupabaseVectorStore | |
await SupabaseVectorStore.fromTexts(docs, { pageId }, this.openAIEmbeddings, this.dbConfig); | |
return `Successfully indexed ${docs.length} documents from the page with ID: ${pageId}`; | |
} catch (e) { | |
return e.message; | |
} | |
} | |
/** | |
* Retrieves a conversational QA chain based on the indexed Notion pages filtered by pageId. | |
* @param pageId - The ID of the Notion page. | |
* @returns A conversational QA chain for performing query operations. | |
*/ | |
async getNotionChain({ pageId }): Promise<ConversationalRetrievalQAChain> { | |
// Create a SupabaseVectorStore from an existing index for the specified pageId | |
const vectorStore = await SupabaseVectorStore.fromExistingIndex(this.openAIEmbeddings, { | |
...this.dbConfig, | |
filter: { pageId }, | |
}); | |
// Create a ContextualCompressionRetriever for improved document similarity searches | |
const retriever = new ContextualCompressionRetriever({ | |
baseCompressor: this.baseCompressor, | |
baseRetriever: vectorStore.asRetriever(), | |
}); | |
// Create a conversational QA chain from the ChatGPT model and retriever | |
return ConversationalRetrievalQAChain.fromLLM(this.chatGPTModel, retriever, { | |
memory: new BufferMemory({ memoryKey: 'chat_history' }), | |
qaChainOptions: { type: 'map_reduce' }, | |
}); | |
} | |
/** | |
* Queries an indexed Notion page with a given question. | |
* @param pageId - The ID of the Notion page. | |
* @param query - The question to be answered. | |
* @returns The answer generated by the QA chain. | |
* @throws An error if the query is missing. | |
*/ | |
async queryNotionPage({ pageId, query }): Promise<ChainValues> { | |
if (!query) { | |
throw new Error('Query not found!'); | |
} | |
// Get the conversational QA chain for the specified pageId | |
const notionChain = await this.getNotionChain({ pageId }); | |
// Sanitize the question and replace newlines with spaces | |
const sanitizedQuestion = query.trim().replaceAll('\n', ' '); | |
// Perform the query and return the answer | |
return notionChain.call({ | |
question: sanitizedQuestion, | |
}); | |
} | |
} | |
/** | |
* Edge function specific, which shows how to utilize above class | |
*/ | |
export const config = { | |
runtime: 'edge', | |
}; | |
export default async function handler(request: NextRequest, context: NextFetchEvent) { | |
const { searchParams } = new URL(request.url); | |
const pageId = searchParams.get('pageId'); | |
const query = searchParams.get('query'); | |
/** | |
* Initialize all clients | |
*/ | |
const notionAI = new NotionPageAI(); | |
/** | |
* 1st - Create a new index for notion pageId | |
*/ | |
// const responseIndex = await notionAI.createNotionIndex({ pageId }); | |
// return NextResponse.json({ text: responseIndex }); | |
/** | |
* 2nd - Ask anything by querying indexed page | |
*/ | |
const responseQuery = await notionAI.queryNotionPage({ pageId, query }); | |
return NextResponse.json(responseQuery); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment