ashbuilds · June 8, 2023 09:48
diff --git a/NotionQAClient.ts b/NotionQAClient.ts
 import { Client as NotionClient } from '@notionhq/client';
 import { createClient, SupabaseClient } from '@supabase/supabase-js';

 import { ConversationalRetrievalQAChain } from 'langchain/chains';
 import { OpenAIEmbeddings } from 'langchain/embeddings/openai';
 import { OpenAIChat } from 'langchain/llms/openai';
 import { BufferMemory } from 'langchain/memory';
 import { ContextualCompressionRetriever } from 'langchain/retrievers/contextual_compression';
 import { LLMChainExtractor } from 'langchain/retrievers/document_compressors/chain_extract';
 import { ChainValues, LLMResult } from 'langchain/schema';
 import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
 import { SupabaseLibArgs, SupabaseVectorStore } from 'langchain/vectorstores/supabase';

 import { NextFetchEvent, NextRequest, NextResponse } from 'next/server';
 import { NotionToMarkdown } from 'notion-to-md';

 const GPT_EMBEDDING_MODEL = 'text-embedding-ada-002';
 const GPT_CHAT_MODEL = 'gpt-3.5-turbo';
 const PG_VECTOR_DB_TABLE = 'documents';
 const PG_FUNC_QUERY_NAME = 'match_documents';

 /**
 * The `NotionPageAI` class enables integration between Notion pages and AI-powered functionalities.
 * It provides methods for generating embeddings, storing them in a vectorDB, and performing conversational
 * question-answering based on indexed Notion pages.
 */
 class NotionPageAI {
  // Instances of various clients and utilities
  notionClient: NotionClient;
  supabaseClient: SupabaseClient;
  chatGPTModel: OpenAIChat;
  openAIEmbeddings: OpenAIEmbeddings;
  dbConfig: SupabaseLibArgs;
  baseCompressor: LLMChainExtractor;
  notionToMarkdown: NotionToMarkdown;

  /**
   * Constructs a new instance of the `NotionPageAI` class.
   * Initialize clients, models, and utilities required for integration.
   */
  constructor() {
    // Initialize Notion client with the provided API key - please refer https://developers.notion.com/docs/create-a-notion-integration
    this.notionClient = new NotionClient({ auth: process.env.NOTION_API_KEY });

    // Initialize NotionToMarkdown converter
    this.notionToMarkdown = new NotionToMarkdown({ notionClient: this.notionClient });

    // Initialize Supabase client with the provided URL and private key
    this.supabaseClient = createClient(
      process.env.NEXT_PUBLIC_SUPABASE_URL,
      process.env.SUPABASE_PRIVATE_KEY,
    );

    // Initialize OpenAI ChatGPT model with a given configuration and optional logging callbacks
    this.chatGPTModel = new OpenAIChat({
      modelName: GPT_CHAT_MODEL,
      temperature: 0,
      callbacks: [
        // Logging callbacks for better visibility during language model requests
        {
          handleLLMStart: async (llm, prompts: string[]) => {
            console.log(JSON.stringify(prompts, null, 2));
          },
          handleLLMEnd: async (output: LLMResult) => {
            console.log(JSON.stringify(output, null, 2));
          },
          handleLLMError: async (err: Error) => {
            console.error(err);
          },
        },
      ],
    });

    // Initialize OpenAI Embeddings client
    this.openAIEmbeddings = new OpenAIEmbeddings({ modelName: GPT_EMBEDDING_MODEL });

    // Configure Supabase vectorDB - please refer https://supabase.com/docs/guides/ai/langchain
    this.dbConfig = {
      client: this.supabaseClient,
      tableName: PG_VECTOR_DB_TABLE,
      queryName: PG_FUNC_QUERY_NAME,
    };

    // Initialize the base compressor for contextual retrieval
    this.baseCompressor = LLMChainExtractor.fromLLM(this.chatGPTModel);
  }

  /**
   * Checks if embeddings for a given pageId already exist in the supabase vectorDB table.
   * @param pageId - The ID of the Notion page.
   * @returns A boolean indicating whether the embeddings exist or not.
   */
  async checkIfIndexExist({ pageId }): Promise<boolean> {
    // Perform a query to check if embeddings with the specified pageId exist
    const embeddings = await this.supabaseClient
      .from(PG_VECTOR_DB_TABLE)
      .select()
      .eq('metadata ->> pageId', pageId);

    return embeddings.data?.length > 0;
  }

  /**
   * Generates embeddings for a given pageId of a Notion page and stores them in the supabase vectorDB.
   * This method should be run once for a pageId before performing queries.
   * @param pageId - The ID of the Notion page.
   * @param chunkSize - The size of the text chunks for processing. Default: 1000.
   * @returns A success message if indexing is completed successfully.
   * @throws An error if the pageId is already indexed.
   */
  async createNotionIndex({ pageId, chunkSize = 1000 }): Promise<string> {
    // Check if embeddings for the pageId already exist
    if (await this.checkIfIndexExist({ pageId })) {
      throw new Error('Index already exists!');
    }

    // Convert the Notion page to Markdown
    const markdownBlocks = await this.notionToMarkdown.pageToMarkdown(pageId);
    const { parent: markdownString } = this.notionToMarkdown.toMarkdownString(markdownBlocks);

    // Split the Markdown text into chunks for processing
    const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize });
    const docs = await textSplitter.splitText(markdownString);

    try {
      // Store the embeddings in the vectorDB using SupabaseVectorStore
      await SupabaseVectorStore.fromTexts(docs, { pageId }, this.openAIEmbeddings, this.dbConfig);

      return `Successfully indexed ${docs.length} documents from the page with ID: ${pageId}`;
    } catch (e) {
      return e.message;
    }
  }

  /**
   * Retrieves a conversational QA chain based on the indexed Notion pages filtered by pageId.
   * @param pageId - The ID of the Notion page.
   * @returns A conversational QA chain for performing query operations.
   */
  async getNotionChain({ pageId }): Promise<ConversationalRetrievalQAChain> {
    // Create a SupabaseVectorStore from an existing index for the specified pageId
    const vectorStore = await SupabaseVectorStore.fromExistingIndex(this.openAIEmbeddings, {
      ...this.dbConfig,
      filter: { pageId },
    });

    // Create a ContextualCompressionRetriever for improved document similarity searches
    const retriever = new ContextualCompressionRetriever({
      baseCompressor: this.baseCompressor,
      baseRetriever: vectorStore.asRetriever(),
    });

    // Create a conversational QA chain from the ChatGPT model and retriever
    return ConversationalRetrievalQAChain.fromLLM(this.chatGPTModel, retriever, {
      memory: new BufferMemory({ memoryKey: 'chat_history' }),
      qaChainOptions: { type: 'map_reduce' },
    });
  }

  /**
   * Queries an indexed Notion page with a given question.
   * @param pageId - The ID of the Notion page.
   * @param query - The question to be answered.
   * @returns The answer generated by the QA chain.
   * @throws An error if the query is missing.
   */
  async queryNotionPage({ pageId, query }): Promise<ChainValues> {
    if (!query) {
      throw new Error('Query not found!');
    }

    // Get the conversational QA chain for the specified pageId
    const notionChain = await this.getNotionChain({ pageId });

    // Sanitize the question and replace newlines with spaces
    const sanitizedQuestion = query.trim().replaceAll('\n', ' ');

    // Perform the query and return the answer
    return notionChain.call({
      question: sanitizedQuestion,
    });
  }
 }

 /**
 * Edge function specific, which shows how to utilize above class
 */
 export const config = {
  runtime: 'edge',
 };

 export default async function handler(request: NextRequest, context: NextFetchEvent) {
  const { searchParams } = new URL(request.url);
  const pageId = searchParams.get('pageId');
  const query = searchParams.get('query');

  /**
   * Initialize all clients
   */
  const notionAI = new NotionPageAI();

  /**
   * 1st - Create a new index for notion pageId
   */
  // const responseIndex = await notionAI.createNotionIndex({ pageId });
  // return NextResponse.json({ text: responseIndex });

  /**
   * 2nd - Ask anything by querying indexed page
   */
  const responseQuery = await notionAI.queryNotionPage({ pageId, query });

  return NextResponse.json(responseQuery);
 }
	import { Client as NotionClient } from '@notionhq/client';
	import { createClient, SupabaseClient } from '@supabase/supabase-js';

	import { ConversationalRetrievalQAChain } from 'langchain/chains';
	import { OpenAIEmbeddings } from 'langchain/embeddings/openai';
	import { OpenAIChat } from 'langchain/llms/openai';
	import { BufferMemory } from 'langchain/memory';
	import { ContextualCompressionRetriever } from 'langchain/retrievers/contextual_compression';
	import { LLMChainExtractor } from 'langchain/retrievers/document_compressors/chain_extract';
	import { ChainValues, LLMResult } from 'langchain/schema';
	import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
	import { SupabaseLibArgs, SupabaseVectorStore } from 'langchain/vectorstores/supabase';

	import { NextFetchEvent, NextRequest, NextResponse } from 'next/server';
	import { NotionToMarkdown } from 'notion-to-md';

	const GPT_EMBEDDING_MODEL = 'text-embedding-ada-002';
	const GPT_CHAT_MODEL = 'gpt-3.5-turbo';
	const PG_VECTOR_DB_TABLE = 'documents';
	const PG_FUNC_QUERY_NAME = 'match_documents';

	/**
	* The `NotionPageAI` class enables integration between Notion pages and AI-powered functionalities.
	* It provides methods for generating embeddings, storing them in a vectorDB, and performing conversational
	* question-answering based on indexed Notion pages.
	*/
	class NotionPageAI {
	// Instances of various clients and utilities
	notionClient: NotionClient;
	supabaseClient: SupabaseClient;
	chatGPTModel: OpenAIChat;
	openAIEmbeddings: OpenAIEmbeddings;
	dbConfig: SupabaseLibArgs;
	baseCompressor: LLMChainExtractor;
	notionToMarkdown: NotionToMarkdown;

	/**
	* Constructs a new instance of the `NotionPageAI` class.
	* Initialize clients, models, and utilities required for integration.
	*/
	constructor() {
	// Initialize Notion client with the provided API key - please refer https://developers.notion.com/docs/create-a-notion-integration
	this.notionClient = new NotionClient({ auth: process.env.NOTION_API_KEY });

	// Initialize NotionToMarkdown converter
	this.notionToMarkdown = new NotionToMarkdown({ notionClient: this.notionClient });

	// Initialize Supabase client with the provided URL and private key
	this.supabaseClient = createClient(
	process.env.NEXT_PUBLIC_SUPABASE_URL,
	process.env.SUPABASE_PRIVATE_KEY,
	);

	// Initialize OpenAI ChatGPT model with a given configuration and optional logging callbacks
	this.chatGPTModel = new OpenAIChat({
	modelName: GPT_CHAT_MODEL,
	temperature: 0,
	callbacks: [
	// Logging callbacks for better visibility during language model requests
	{
	handleLLMStart: async (llm, prompts: string[]) => {
	console.log(JSON.stringify(prompts, null, 2));
	},
	handleLLMEnd: async (output: LLMResult) => {
	console.log(JSON.stringify(output, null, 2));
	},
	handleLLMError: async (err: Error) => {
	console.error(err);
	},
	},
	],
	});

	// Initialize OpenAI Embeddings client
	this.openAIEmbeddings = new OpenAIEmbeddings({ modelName: GPT_EMBEDDING_MODEL });

	// Configure Supabase vectorDB - please refer https://supabase.com/docs/guides/ai/langchain
	this.dbConfig = {
	client: this.supabaseClient,
	tableName: PG_VECTOR_DB_TABLE,
	queryName: PG_FUNC_QUERY_NAME,
	};

	// Initialize the base compressor for contextual retrieval
	this.baseCompressor = LLMChainExtractor.fromLLM(this.chatGPTModel);
	}

	/**
	* Checks if embeddings for a given pageId already exist in the supabase vectorDB table.
	* @param pageId - The ID of the Notion page.
	* @returns A boolean indicating whether the embeddings exist or not.
	*/
	async checkIfIndexExist({ pageId }): Promise<boolean> {
	// Perform a query to check if embeddings with the specified pageId exist
	const embeddings = await this.supabaseClient
	.from(PG_VECTOR_DB_TABLE)
	.select()
	.eq('metadata ->> pageId', pageId);

	return embeddings.data?.length > 0;
	}

	/**
	* Generates embeddings for a given pageId of a Notion page and stores them in the supabase vectorDB.
	* This method should be run once for a pageId before performing queries.
	* @param pageId - The ID of the Notion page.
	* @param chunkSize - The size of the text chunks for processing. Default: 1000.
	* @returns A success message if indexing is completed successfully.
	* @throws An error if the pageId is already indexed.
	*/
	async createNotionIndex({ pageId, chunkSize = 1000 }): Promise<string> {
	// Check if embeddings for the pageId already exist
	if (await this.checkIfIndexExist({ pageId })) {
	throw new Error('Index already exists!');
	}

	// Convert the Notion page to Markdown
	const markdownBlocks = await this.notionToMarkdown.pageToMarkdown(pageId);
	const { parent: markdownString } = this.notionToMarkdown.toMarkdownString(markdownBlocks);

	// Split the Markdown text into chunks for processing
	const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize });
	const docs = await textSplitter.splitText(markdownString);

	try {
	// Store the embeddings in the vectorDB using SupabaseVectorStore
	await SupabaseVectorStore.fromTexts(docs, { pageId }, this.openAIEmbeddings, this.dbConfig);

	return `Successfully indexed ${docs.length} documents from the page with ID: ${pageId}`;
	} catch (e) {
	return e.message;
	}
	}

	/**
	* Retrieves a conversational QA chain based on the indexed Notion pages filtered by pageId.
	* @param pageId - The ID of the Notion page.
	* @returns A conversational QA chain for performing query operations.
	*/
	async getNotionChain({ pageId }): Promise<ConversationalRetrievalQAChain> {
	// Create a SupabaseVectorStore from an existing index for the specified pageId
	const vectorStore = await SupabaseVectorStore.fromExistingIndex(this.openAIEmbeddings, {
	...this.dbConfig,
	filter: { pageId },
	});

	// Create a ContextualCompressionRetriever for improved document similarity searches
	const retriever = new ContextualCompressionRetriever({
	baseCompressor: this.baseCompressor,
	baseRetriever: vectorStore.asRetriever(),
	});

	// Create a conversational QA chain from the ChatGPT model and retriever
	return ConversationalRetrievalQAChain.fromLLM(this.chatGPTModel, retriever, {
	memory: new BufferMemory({ memoryKey: 'chat_history' }),
	qaChainOptions: { type: 'map_reduce' },
	});
	}

	/**
	* Queries an indexed Notion page with a given question.
	* @param pageId - The ID of the Notion page.
	* @param query - The question to be answered.
	* @returns The answer generated by the QA chain.
	* @throws An error if the query is missing.
	*/
	async queryNotionPage({ pageId, query }): Promise<ChainValues> {
	if (!query) {
	throw new Error('Query not found!');
	}

	// Get the conversational QA chain for the specified pageId
	const notionChain = await this.getNotionChain({ pageId });

	// Sanitize the question and replace newlines with spaces
	const sanitizedQuestion = query.trim().replaceAll('\n', ' ');

	// Perform the query and return the answer
	return notionChain.call({
	question: sanitizedQuestion,
	});
	}
	}

	/**
	* Edge function specific, which shows how to utilize above class
	*/
	export const config = {
	runtime: 'edge',
	};

	export default async function handler(request: NextRequest, context: NextFetchEvent) {
	const { searchParams } = new URL(request.url);
	const pageId = searchParams.get('pageId');
	const query = searchParams.get('query');

	/**
	* Initialize all clients
	*/
	const notionAI = new NotionPageAI();

	/**
	* 1st - Create a new index for notion pageId
	*/
	// const responseIndex = await notionAI.createNotionIndex({ pageId });
	// return NextResponse.json({ text: responseIndex });

	/**
	* 2nd - Ask anything by querying indexed page
	*/
	const responseQuery = await notionAI.queryNotionPage({ pageId, query });

	return NextResponse.json(responseQuery);
	}