suryakun · July 7, 2024 17:56
diff --git a/gistfile1.txt b/gistfile1.txt
 import 'dotenv/config'
 import 'pdf-parse'
 import { z } from 'zod'
 import { MemoryVectorStore } from 'langchain/vectorstores/memory';
 import { createStuffDocumentsChain } from 'langchain/chains/combine_documents';
 import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
 import { PDFLoader } from '@langchain/community/document_loaders/fs/pdf';
 import { createRetrievalChain } from 'langchain/chains/retrieval';
 import { ChatPromptTemplate } from "@langchain/core/prompts";
 import { ChatGoogleGenerativeAI } from "@langchain/google-genai";
 import { StructuredOutputParser } from 'langchain/output_parsers';
 import { GoogleGenerativeAIEmbeddings } from '@langchain/google-genai';

 const ollamaBaseUrl = "http://localhost:11435"
 const ollamaModel = "llama3"

 console.log(process.env.GOOGLE_APPLICATION_CREDENTIALS)

 !async function() {

    // Use GeminiPro
    const llm = new ChatGoogleGenerativeAI({
        model: 'gemini-pro',
        maxOutputTokens: 2048,
    })

    const textSplitter = new RecursiveCharacterTextSplitter({
        chunkSize: 1000,
        chunkOverlap: 200,
    })

    const loader = new PDFLoader('./profile_linkedin.pdf')
    const docs = await loader.load()

    const splits = await textSplitter.splitDocuments(docs)

    const vectorStore = await MemoryVectorStore.fromDocuments(
        splits,
        new GoogleGenerativeAIEmbeddings()
    )

    const retriever = vectorStore.asRetriever();

    const systemTemplate = 'You are the HR recruitment officer, you need to collect the data from applicant CV'
    const schema = z.object({
        name: z.string().describe('name of applicant').optional().transform(val => val ?? ''),
        phone: z.string().describe('phone number of applicant').optional().transform(val => val ?? ''),
        experiences: z.array(z.object({
            company: z.string().describe('company name').optional().transform(val => val ?? ''),
            time: z.string().describe('work time year start and year end').optional().transform(val => val ?? ''),
            title: z.string().describe('applicant job title').optional().transform(val => val ?? ''),
            jobDescription: z.string().describe('detail of the job, just put empty string if you have no answer').optional().nullable().or(z.literal(''))
        })).describe('Detail of applicant experience').transform(arr => arr ?? [])
    })

    const parser = StructuredOutputParser.fromZodSchema(schema)

    const prompt = ChatPromptTemplate.fromMessages([
        ['system', systemTemplate],
        ['human', '{input} {context} {format_instructions}']
    ])

    const questionAnswerChain = await createStuffDocumentsChain({ llm, prompt, outputParser: parser })

    const ragChain = await createRetrievalChain({
        retriever,
        combineDocsChain: questionAnswerChain,
    })

    const results = await ragChain.invoke({
        input: 'what the data that you get? make the answer as text without markdown tag',
        format_instructions: parser.getFormatInstructions()
    })

    console.log(JSON.stringify(results.answer))

 }()
	import 'dotenv/config'
	import 'pdf-parse'
	import { z } from 'zod'
	import { MemoryVectorStore } from 'langchain/vectorstores/memory';
	import { createStuffDocumentsChain } from 'langchain/chains/combine_documents';
	import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
	import { PDFLoader } from '@langchain/community/document_loaders/fs/pdf';
	import { createRetrievalChain } from 'langchain/chains/retrieval';
	import { ChatPromptTemplate } from "@langchain/core/prompts";
	import { ChatGoogleGenerativeAI } from "@langchain/google-genai";
	import { StructuredOutputParser } from 'langchain/output_parsers';
	import { GoogleGenerativeAIEmbeddings } from '@langchain/google-genai';

	const ollamaBaseUrl = "http://localhost:11435"
	const ollamaModel = "llama3"

	console.log(process.env.GOOGLE_APPLICATION_CREDENTIALS)

	!async function() {

	// Use GeminiPro
	const llm = new ChatGoogleGenerativeAI({
	model: 'gemini-pro',
	maxOutputTokens: 2048,
	})

	const textSplitter = new RecursiveCharacterTextSplitter({
	chunkSize: 1000,
	chunkOverlap: 200,
	})

	const loader = new PDFLoader('./profile_linkedin.pdf')
	const docs = await loader.load()

	const splits = await textSplitter.splitDocuments(docs)

	const vectorStore = await MemoryVectorStore.fromDocuments(
	splits,
	new GoogleGenerativeAIEmbeddings()
	)

	const retriever = vectorStore.asRetriever();

	const systemTemplate = 'You are the HR recruitment officer, you need to collect the data from applicant CV'
	const schema = z.object({
	name: z.string().describe('name of applicant').optional().transform(val => val ?? ''),
	phone: z.string().describe('phone number of applicant').optional().transform(val => val ?? ''),
	experiences: z.array(z.object({
	company: z.string().describe('company name').optional().transform(val => val ?? ''),
	time: z.string().describe('work time year start and year end').optional().transform(val => val ?? ''),
	title: z.string().describe('applicant job title').optional().transform(val => val ?? ''),
	jobDescription: z.string().describe('detail of the job, just put empty string if you have no answer').optional().nullable().or(z.literal(''))
	})).describe('Detail of applicant experience').transform(arr => arr ?? [])
	})

	const parser = StructuredOutputParser.fromZodSchema(schema)

	const prompt = ChatPromptTemplate.fromMessages([
	['system', systemTemplate],
	['human', '{input} {context} {format_instructions}']
	])

	const questionAnswerChain = await createStuffDocumentsChain({ llm, prompt, outputParser: parser })

	const ragChain = await createRetrievalChain({
	retriever,
	combineDocsChain: questionAnswerChain,
	})

	const results = await ragChain.invoke({
	input: 'what the data that you get? make the answer as text without markdown tag',
	format_instructions: parser.getFormatInstructions()
	})

	console.log(JSON.stringify(results.answer))

	}()