dat-boris · February 9, 2025 07:05
diff --git a/schema_vs_prompt.py b/schema_vs_prompt.py
 """
 So, you can give instruction to structured output (SO) in 3 format:
 1. Include in desciription in the field of SO
 2. Include the JSON schema in prompt.
 3. Include it directly in prompt in Natural langauge

 If we are given conflicting instructions between prompt and structured output,
 which one will win?  This will tell which of the above is the most powerful!

 It turns out that:

 3 > 2 > 1

 i.e. using Natural language, to specific Structured output is the best.

 This could be a OpenAI model specific result, as they might have intentionally
 prioritize system prompt to improve steerabiltiy and to avoid prompt overwriting.
 """

 import json
 import os
 from pprint import pprint
 from typing import List

 from pydantic import BaseModel, Field
 from openai import OpenAI

 MODEL = "gpt-4o-mini"
 BASE_URL = os.getenv("OPENAI_BASE_URL")

 # For talking to local ollama server
 # Note that desciription was not respected at all
 # MODEL = "deepseek-v2:16b"
 # BASE_URL = "http://localhost:11434/v1"

 client = OpenAI(
    api_key=os.getenv("OPENAI_API_KEY"),
    base_url=BASE_URL,
 )


 class CalculateOutput(BaseModel):
    Explanation: str = Field(description="Explanation of the step")
    X: str = Field(description="What is the answer to x = 1+1?")
    # Try this if you want to see how likely that it sticks to SO!
    # X_is_one_plus_one: str


 prompt_direct = """
 Provide the structure output.
 """

 prompt_with_direct_prompt = """
 Provide the structure output.

 Calculate what is X=3+3.
 """.format(
    json_schema=CalculateOutput.schema_json(indent=2).replace("1+1", "2+2")
 )

 prompt_with_json_schema = """
 Provide the structure output.

 Here is the JSON schema that we expected from output:
 {json_schema}
 """.format(
    json_schema=CalculateOutput.schema_json(indent=2).replace("1+1", "2+2")
 )

 prompt_with_json_schema_and_direct = """
 Here is the JSON schema that we expected from output:
 {json_schema}

 Calculate what is X=3+3.
 """.format(
    json_schema=CalculateOutput.schema_json(indent=2).replace("1+1", "2+2")
 )

 # def get_completion(messages, model="gpt-4o-mini", temperature=0):
 #     response = client.chat.completions.create(
 #         model=model, messages=messages, temperature=temperature
 #     )
 #     return response.choices[0].message.content


 def get_structured_output_completion(
    messages,
    model=MODEL,
    temperature=0,
    response_format=CalculateOutput,
 ):
    response = client.beta.chat.completions.parse(
        model=model,
        messages=messages,
        temperature=temperature,
        response_format=response_format,
    )

    return response.choices[0].message.parsed


 if __name__ == "__main__":
    messages = [
        {
            "role": "system",
            # This ouptut 1+1=2, so SO desciption is obeyed.
            # "content": prompt_direct,
            # This output 2+2=4, so schema in prompt overrides the prompt
            # "content": prompt_with_json_schema,
            # The following output 3+3=6, so direct prompting trumps the schema
            # "content": prompt_with_direct_prompt,
            "content": prompt_with_json_schema_and_direct,
        },
    ]
    print(json.dumps(messages, indent=4))

    result = get_structured_output_completion(messages, response_format=CalculateOutput)
    print(
        "Output will tell which instruction wins (1+1=SO desc, 2+2=In prompt SO Desc, 3+3=prompt):"
    )
    pprint(result.dict())
	"""
	So, you can give instruction to structured output (SO) in 3 format:
	1. Include in desciription in the field of SO
	2. Include the JSON schema in prompt.
	3. Include it directly in prompt in Natural langauge

	If we are given conflicting instructions between prompt and structured output,
	which one will win? This will tell which of the above is the most powerful!

	It turns out that:

	3 > 2 > 1

	i.e. using Natural language, to specific Structured output is the best.

	This could be a OpenAI model specific result, as they might have intentionally
	prioritize system prompt to improve steerabiltiy and to avoid prompt overwriting.
	"""

	import json
	import os
	from pprint import pprint
	from typing import List

	from pydantic import BaseModel, Field
	from openai import OpenAI

	MODEL = "gpt-4o-mini"
	BASE_URL = os.getenv("OPENAI_BASE_URL")

	# For talking to local ollama server
	# Note that desciription was not respected at all
	# MODEL = "deepseek-v2:16b"
	# BASE_URL = "http://localhost:11434/v1"

	client = OpenAI(
	api_key=os.getenv("OPENAI_API_KEY"),
	base_url=BASE_URL,
	)


	class CalculateOutput(BaseModel):
	Explanation: str = Field(description="Explanation of the step")
	X: str = Field(description="What is the answer to x = 1+1?")
	# Try this if you want to see how likely that it sticks to SO!
	# X_is_one_plus_one: str


	prompt_direct = """
	Provide the structure output.
	"""

	prompt_with_direct_prompt = """
	Provide the structure output.

	Calculate what is X=3+3.
	""".format(
	json_schema=CalculateOutput.schema_json(indent=2).replace("1+1", "2+2")
	)

	prompt_with_json_schema = """
	Provide the structure output.

	Here is the JSON schema that we expected from output:
	{json_schema}
	""".format(
	json_schema=CalculateOutput.schema_json(indent=2).replace("1+1", "2+2")
	)

	prompt_with_json_schema_and_direct = """
	Here is the JSON schema that we expected from output:
	{json_schema}

	Calculate what is X=3+3.
	""".format(
	json_schema=CalculateOutput.schema_json(indent=2).replace("1+1", "2+2")
	)

	# def get_completion(messages, model="gpt-4o-mini", temperature=0):
	# response = client.chat.completions.create(
	# model=model, messages=messages, temperature=temperature
	# )
	# return response.choices[0].message.content


	def get_structured_output_completion(
	messages,
	model=MODEL,
	temperature=0,
	response_format=CalculateOutput,
	):
	response = client.beta.chat.completions.parse(
	model=model,
	messages=messages,
	temperature=temperature,
	response_format=response_format,
	)

	return response.choices[0].message.parsed


	if __name__ == "__main__":
	messages = [
	{
	"role": "system",
	# This ouptut 1+1=2, so SO desciption is obeyed.
	# "content": prompt_direct,
	# This output 2+2=4, so schema in prompt overrides the prompt
	# "content": prompt_with_json_schema,
	# The following output 3+3=6, so direct prompting trumps the schema
	# "content": prompt_with_direct_prompt,
	"content": prompt_with_json_schema_and_direct,
	},
	]
	print(json.dumps(messages, indent=4))

	result = get_structured_output_completion(messages, response_format=CalculateOutput)
	print(
	"Output will tell which instruction wins (1+1=SO desc, 2+2=In prompt SO Desc, 3+3=prompt):"
	)
	pprint(result.dict())