Skip to content

Instantly share code, notes, and snippets.

@shreyaskarnik
Created January 20, 2024 02:19
Show Gist options
  • Save shreyaskarnik/2cc099528f14671b096570498330ae54 to your computer and use it in GitHub Desktop.
Save shreyaskarnik/2cc099528f14671b096570498330ae54 to your computer and use it in GitHub Desktop.
This gist demonstrates how to use langchain tools with functionary with llama_cpp for local inference on macos
# This example script showcases how to use llama_cpp to run inference
# and use langchain tools
# please install langchain first: pip install langchain
# this example uses https://github.com/MeetKai/functionary/pull/93 as the basis to demonstrate use of langchain tools.
import asyncio
import json
from typing import List
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
from pydantic import Field
from termcolor import colored
from transformers import AutoTokenizer
from langchain.tools import MoveFileTool, format_tool_to_openai_function
from functionary.prompt_template import get_prompt_template_from_tokenizer
class FunctionaryAPI:
def __init__(self):
# Model repository on the Hugging Face model hub
model_repo = "meetkai/functionary-small-v2.2-GGUF"
# File to download
file_name = "functionary-small-v2.2.q4_0.gguf"
# Download the file
local_file_path = hf_hub_download(repo_id=model_repo, filename=file_name)
# You can download gguf files from https://huggingface.co/meetkai/functionary-7b-v2-GGUF/tree/main
self.llm = Llama(model_path=local_file_path, n_ctx=4096, n_gpu_layers=-1)
# Create tokenizer from HF.
# We found that the tokenizer from llama_cpp is not compatible with tokenizer from HF that we trained
# The reason might be we added new tokens to the original tokenizer
# So we will use tokenizer from HuggingFace
self.tokenizer = AutoTokenizer.from_pretrained(
model_repo,
legacy=True,
)
# prompt_template will be used for creating the prompt
self.prompt_template = get_prompt_template_from_tokenizer(self.tokenizer)
async def create(
self,
messages: List = Field(default_factory=list),
tools: List = Field(default_factory=list),
model="functionary-small-v2.2", # ignore parameter
):
"""Creates a model response for the given chat conversation.
Matches OpenAI's `chat.create()` function."""
# Create the prompt to use for inference
prompt_str = self.prompt_template.get_prompt_from_messages(
messages + [{"role": "assistant"}], tools
)
token_ids = self.tokenizer.encode(prompt_str)
gen_tokens = []
# Get list of stop_tokens
stop_token_ids = [
self.tokenizer.encode(token)[-1]
for token in self.prompt_template.get_stop_tokens_for_generation()
]
# We use function generate (instead of __call__) so we can pass in list of token_ids
for token_id in self.llm.generate(token_ids, temp=0):
if token_id in stop_token_ids:
break
gen_tokens.append(token_id)
llm_output = self.tokenizer.decode(gen_tokens)
# parse the message from llm_output
response = self.prompt_template.parse_assistant_response(
llm_output, tool_choice="auto"
)
return response
async def main():
functionary = FunctionaryAPI()
# Provide some space after the llama_cpp logs
print("\n\n")
messages = []
user_message = {
"role": "user",
"content": "I need to move file /FULLPATH/foo to /FULLPATH/bar",
}
print(colored(f"User: {user_message['content']}", "light_cyan", attrs=["bold"]))
messages.append(user_message)
tools = [MoveFileTool()]
functions = []
tool_registry = {}
for tool in tools:
functions.append({"type": "function", **format_tool_to_openai_function(tool)})
# I am sure there is a better way to do this but to get an working example, I am doing this
tool_registry[tool.name] = tool
print(colored("Tools: ", "dark_grey"))
print(colored(json.dumps(functions, indent=2), "dark_grey"))
response = await functionary.create(messages=messages, tools=functions)
messages.append(response)
if response.get("content") is not None:
print(
colored(
f"Assistant: {response['content']}", "light_magenta", attrs=["bold"]
)
)
if response.get("tool_calls") is not None:
print()
for tool in response["tool_calls"]:
requested_function = tool["function"]
args = json.loads(requested_function["arguments"])
tool_input = {"tool_input": args}
result = tool_registry[requested_function["name"]].run(**tool_input)
print(
colored(
f" 𝑓 {requested_function['name']}({requested_function['arguments']})",
"green",
),
" -> ",
colored(str(result), "light_green"),
)
tool_call_response = tool_result(tool["id"], content=str(result))
# OpenAI does not require the name field, but it is required for functionary's tool_result. See https://github.com/openai/openai-python/issues/1078
tool_call_response["name"] = requested_function["name"]
messages.append(tool_call_response)
print()
# Run inference again after running tools
response = await functionary.create(messages=messages, tools=functions)
print(
colored(
f"Assistant: {response['content']}", "light_magenta", attrs=["bold"]
)
)
messages.append(response)
if __name__ == "__main__":
asyncio.run(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment