Skip to content

Instantly share code, notes, and snippets.

@ashbuilds
Last active June 8, 2023 09:48
Show Gist options
  • Save ashbuilds/3f821cb6c32947c0023047108a816208 to your computer and use it in GitHub Desktop.
Save ashbuilds/3f821cb6c32947c0023047108a816208 to your computer and use it in GitHub Desktop.
The NotionPageAI class enables integration between Notion pages and AI-powered functionalities. It provides methods for generating embeddings, storing them in a vectorDB, and performing conversational question-answering based on indexed Notion pages.
import { Client as NotionClient } from '@notionhq/client';
import { createClient, SupabaseClient } from '@supabase/supabase-js';
import { ConversationalRetrievalQAChain } from 'langchain/chains';
import { OpenAIEmbeddings } from 'langchain/embeddings/openai';
import { OpenAIChat } from 'langchain/llms/openai';
import { BufferMemory } from 'langchain/memory';
import { ContextualCompressionRetriever } from 'langchain/retrievers/contextual_compression';
import { LLMChainExtractor } from 'langchain/retrievers/document_compressors/chain_extract';
import { ChainValues, LLMResult } from 'langchain/schema';
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
import { SupabaseLibArgs, SupabaseVectorStore } from 'langchain/vectorstores/supabase';
import { NextFetchEvent, NextRequest, NextResponse } from 'next/server';
import { NotionToMarkdown } from 'notion-to-md';
const GPT_EMBEDDING_MODEL = 'text-embedding-ada-002';
const GPT_CHAT_MODEL = 'gpt-3.5-turbo';
const PG_VECTOR_DB_TABLE = 'documents';
const PG_FUNC_QUERY_NAME = 'match_documents';
/**
* The `NotionPageAI` class enables integration between Notion pages and AI-powered functionalities.
* It provides methods for generating embeddings, storing them in a vectorDB, and performing conversational
* question-answering based on indexed Notion pages.
*/
class NotionPageAI {
// Instances of various clients and utilities
notionClient: NotionClient;
supabaseClient: SupabaseClient;
chatGPTModel: OpenAIChat;
openAIEmbeddings: OpenAIEmbeddings;
dbConfig: SupabaseLibArgs;
baseCompressor: LLMChainExtractor;
notionToMarkdown: NotionToMarkdown;
/**
* Constructs a new instance of the `NotionPageAI` class.
* Initialize clients, models, and utilities required for integration.
*/
constructor() {
// Initialize Notion client with the provided API key - please refer https://developers.notion.com/docs/create-a-notion-integration
this.notionClient = new NotionClient({ auth: process.env.NOTION_API_KEY });
// Initialize NotionToMarkdown converter
this.notionToMarkdown = new NotionToMarkdown({ notionClient: this.notionClient });
// Initialize Supabase client with the provided URL and private key
this.supabaseClient = createClient(
process.env.NEXT_PUBLIC_SUPABASE_URL,
process.env.SUPABASE_PRIVATE_KEY,
);
// Initialize OpenAI ChatGPT model with a given configuration and optional logging callbacks
this.chatGPTModel = new OpenAIChat({
modelName: GPT_CHAT_MODEL,
temperature: 0,
callbacks: [
// Logging callbacks for better visibility during language model requests
{
handleLLMStart: async (llm, prompts: string[]) => {
console.log(JSON.stringify(prompts, null, 2));
},
handleLLMEnd: async (output: LLMResult) => {
console.log(JSON.stringify(output, null, 2));
},
handleLLMError: async (err: Error) => {
console.error(err);
},
},
],
});
// Initialize OpenAI Embeddings client
this.openAIEmbeddings = new OpenAIEmbeddings({ modelName: GPT_EMBEDDING_MODEL });
// Configure Supabase vectorDB - please refer https://supabase.com/docs/guides/ai/langchain
this.dbConfig = {
client: this.supabaseClient,
tableName: PG_VECTOR_DB_TABLE,
queryName: PG_FUNC_QUERY_NAME,
};
// Initialize the base compressor for contextual retrieval
this.baseCompressor = LLMChainExtractor.fromLLM(this.chatGPTModel);
}
/**
* Checks if embeddings for a given pageId already exist in the supabase vectorDB table.
* @param pageId - The ID of the Notion page.
* @returns A boolean indicating whether the embeddings exist or not.
*/
async checkIfIndexExist({ pageId }): Promise<boolean> {
// Perform a query to check if embeddings with the specified pageId exist
const embeddings = await this.supabaseClient
.from(PG_VECTOR_DB_TABLE)
.select()
.eq('metadata ->> pageId', pageId);
return embeddings.data?.length > 0;
}
/**
* Generates embeddings for a given pageId of a Notion page and stores them in the supabase vectorDB.
* This method should be run once for a pageId before performing queries.
* @param pageId - The ID of the Notion page.
* @param chunkSize - The size of the text chunks for processing. Default: 1000.
* @returns A success message if indexing is completed successfully.
* @throws An error if the pageId is already indexed.
*/
async createNotionIndex({ pageId, chunkSize = 1000 }): Promise<string> {
// Check if embeddings for the pageId already exist
if (await this.checkIfIndexExist({ pageId })) {
throw new Error('Index already exists!');
}
// Convert the Notion page to Markdown
const markdownBlocks = await this.notionToMarkdown.pageToMarkdown(pageId);
const { parent: markdownString } = this.notionToMarkdown.toMarkdownString(markdownBlocks);
// Split the Markdown text into chunks for processing
const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize });
const docs = await textSplitter.splitText(markdownString);
try {
// Store the embeddings in the vectorDB using SupabaseVectorStore
await SupabaseVectorStore.fromTexts(docs, { pageId }, this.openAIEmbeddings, this.dbConfig);
return `Successfully indexed ${docs.length} documents from the page with ID: ${pageId}`;
} catch (e) {
return e.message;
}
}
/**
* Retrieves a conversational QA chain based on the indexed Notion pages filtered by pageId.
* @param pageId - The ID of the Notion page.
* @returns A conversational QA chain for performing query operations.
*/
async getNotionChain({ pageId }): Promise<ConversationalRetrievalQAChain> {
// Create a SupabaseVectorStore from an existing index for the specified pageId
const vectorStore = await SupabaseVectorStore.fromExistingIndex(this.openAIEmbeddings, {
...this.dbConfig,
filter: { pageId },
});
// Create a ContextualCompressionRetriever for improved document similarity searches
const retriever = new ContextualCompressionRetriever({
baseCompressor: this.baseCompressor,
baseRetriever: vectorStore.asRetriever(),
});
// Create a conversational QA chain from the ChatGPT model and retriever
return ConversationalRetrievalQAChain.fromLLM(this.chatGPTModel, retriever, {
memory: new BufferMemory({ memoryKey: 'chat_history' }),
qaChainOptions: { type: 'map_reduce' },
});
}
/**
* Queries an indexed Notion page with a given question.
* @param pageId - The ID of the Notion page.
* @param query - The question to be answered.
* @returns The answer generated by the QA chain.
* @throws An error if the query is missing.
*/
async queryNotionPage({ pageId, query }): Promise<ChainValues> {
if (!query) {
throw new Error('Query not found!');
}
// Get the conversational QA chain for the specified pageId
const notionChain = await this.getNotionChain({ pageId });
// Sanitize the question and replace newlines with spaces
const sanitizedQuestion = query.trim().replaceAll('\n', ' ');
// Perform the query and return the answer
return notionChain.call({
question: sanitizedQuestion,
});
}
}
/**
* Edge function specific, which shows how to utilize above class
*/
export const config = {
runtime: 'edge',
};
export default async function handler(request: NextRequest, context: NextFetchEvent) {
const { searchParams } = new URL(request.url);
const pageId = searchParams.get('pageId');
const query = searchParams.get('query');
/**
* Initialize all clients
*/
const notionAI = new NotionPageAI();
/**
* 1st - Create a new index for notion pageId
*/
// const responseIndex = await notionAI.createNotionIndex({ pageId });
// return NextResponse.json({ text: responseIndex });
/**
* 2nd - Ask anything by querying indexed page
*/
const responseQuery = await notionAI.queryNotionPage({ pageId, query });
return NextResponse.json(responseQuery);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment