Created
January 20, 2024 02:19
-
-
Save shreyaskarnik/2cc099528f14671b096570498330ae54 to your computer and use it in GitHub Desktop.
This gist demonstrates how to use langchain tools with functionary with llama_cpp for local inference on macos
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This example script showcases how to use llama_cpp to run inference | |
# and use langchain tools | |
# please install langchain first: pip install langchain | |
# this example uses https://github.com/MeetKai/functionary/pull/93 as the basis to demonstrate use of langchain tools. | |
import asyncio | |
import json | |
from typing import List | |
from huggingface_hub import hf_hub_download | |
from llama_cpp import Llama | |
from pydantic import Field | |
from termcolor import colored | |
from transformers import AutoTokenizer | |
from langchain.tools import MoveFileTool, format_tool_to_openai_function | |
from functionary.prompt_template import get_prompt_template_from_tokenizer | |
class FunctionaryAPI: | |
def __init__(self): | |
# Model repository on the Hugging Face model hub | |
model_repo = "meetkai/functionary-small-v2.2-GGUF" | |
# File to download | |
file_name = "functionary-small-v2.2.q4_0.gguf" | |
# Download the file | |
local_file_path = hf_hub_download(repo_id=model_repo, filename=file_name) | |
# You can download gguf files from https://huggingface.co/meetkai/functionary-7b-v2-GGUF/tree/main | |
self.llm = Llama(model_path=local_file_path, n_ctx=4096, n_gpu_layers=-1) | |
# Create tokenizer from HF. | |
# We found that the tokenizer from llama_cpp is not compatible with tokenizer from HF that we trained | |
# The reason might be we added new tokens to the original tokenizer | |
# So we will use tokenizer from HuggingFace | |
self.tokenizer = AutoTokenizer.from_pretrained( | |
model_repo, | |
legacy=True, | |
) | |
# prompt_template will be used for creating the prompt | |
self.prompt_template = get_prompt_template_from_tokenizer(self.tokenizer) | |
async def create( | |
self, | |
messages: List = Field(default_factory=list), | |
tools: List = Field(default_factory=list), | |
model="functionary-small-v2.2", # ignore parameter | |
): | |
"""Creates a model response for the given chat conversation. | |
Matches OpenAI's `chat.create()` function.""" | |
# Create the prompt to use for inference | |
prompt_str = self.prompt_template.get_prompt_from_messages( | |
messages + [{"role": "assistant"}], tools | |
) | |
token_ids = self.tokenizer.encode(prompt_str) | |
gen_tokens = [] | |
# Get list of stop_tokens | |
stop_token_ids = [ | |
self.tokenizer.encode(token)[-1] | |
for token in self.prompt_template.get_stop_tokens_for_generation() | |
] | |
# We use function generate (instead of __call__) so we can pass in list of token_ids | |
for token_id in self.llm.generate(token_ids, temp=0): | |
if token_id in stop_token_ids: | |
break | |
gen_tokens.append(token_id) | |
llm_output = self.tokenizer.decode(gen_tokens) | |
# parse the message from llm_output | |
response = self.prompt_template.parse_assistant_response( | |
llm_output, tool_choice="auto" | |
) | |
return response | |
async def main(): | |
functionary = FunctionaryAPI() | |
# Provide some space after the llama_cpp logs | |
print("\n\n") | |
messages = [] | |
user_message = { | |
"role": "user", | |
"content": "I need to move file /FULLPATH/foo to /FULLPATH/bar", | |
} | |
print(colored(f"User: {user_message['content']}", "light_cyan", attrs=["bold"])) | |
messages.append(user_message) | |
tools = [MoveFileTool()] | |
functions = [] | |
tool_registry = {} | |
for tool in tools: | |
functions.append({"type": "function", **format_tool_to_openai_function(tool)}) | |
# I am sure there is a better way to do this but to get an working example, I am doing this | |
tool_registry[tool.name] = tool | |
print(colored("Tools: ", "dark_grey")) | |
print(colored(json.dumps(functions, indent=2), "dark_grey")) | |
response = await functionary.create(messages=messages, tools=functions) | |
messages.append(response) | |
if response.get("content") is not None: | |
print( | |
colored( | |
f"Assistant: {response['content']}", "light_magenta", attrs=["bold"] | |
) | |
) | |
if response.get("tool_calls") is not None: | |
print() | |
for tool in response["tool_calls"]: | |
requested_function = tool["function"] | |
args = json.loads(requested_function["arguments"]) | |
tool_input = {"tool_input": args} | |
result = tool_registry[requested_function["name"]].run(**tool_input) | |
print( | |
colored( | |
f" 𝑓 {requested_function['name']}({requested_function['arguments']})", | |
"green", | |
), | |
" -> ", | |
colored(str(result), "light_green"), | |
) | |
tool_call_response = tool_result(tool["id"], content=str(result)) | |
# OpenAI does not require the name field, but it is required for functionary's tool_result. See https://github.com/openai/openai-python/issues/1078 | |
tool_call_response["name"] = requested_function["name"] | |
messages.append(tool_call_response) | |
print() | |
# Run inference again after running tools | |
response = await functionary.create(messages=messages, tools=functions) | |
print( | |
colored( | |
f"Assistant: {response['content']}", "light_magenta", attrs=["bold"] | |
) | |
) | |
messages.append(response) | |
if __name__ == "__main__": | |
asyncio.run(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment