aaronvg · November 26, 2024 02:06 · Elijas · Mar 30, 2025
diff --git a/.cursorrules b/.cursorrules
 <Overview>
  BAML (Basically, A Made-Up Language) is a domain-specific language for building LLM prompts as functions.
  You can build an agentic workflow with BAML.
 </Overview>

  <Schema>
    // Define output schemas using classes
    class MyObject {
      // Optional string fields use ?
      // @description is optional, but if you include it, it goes after the field.
      name string? @description("The name of the object")
      
      // Arrays of primitives
      // arrays cannot be optional.
      tags string[]
      
      // Enums must be declared separately and are optional
      status MyEnum?
      
      // Union types
      type "success" | "error"
      
      // Primitive types
      count int
      enabled bool
      score float

      // nested objects
      nested MyObject2

      // image type
      myImg image

      {#// checks and assertions. Uses jinja syntax inside the parentheses.
      // For a single property use one @
      bar int @assert(between_0_and_10, {{ "{{ this > 0 and this < 10 }}" }}) //this = MyObject.bar value
      quux string
      // assertions for multiple fields use @@ and go at the bottom of the class. Uses jinja syntax inside the parentheses.
      // Do NOT add descriptions after the assertion.
      @@assert(length_limit, {{ "{{ this.quux|length < this.baz }}" }})#}
    }

    // Enums are declared separately
    enum MyEnum {
      PENDING
      ACTIVE @description("Item is currently active")
      COMPLETE
    }

    // Comments use double slashes
    // Recursive types and inline definitions are not supported

  </Schema>

  <Functions>
    // Functions define inputs, outputs and prompts
    // function name is always PascalCase
    function MyFunction(input: MyObject) -> string {
      client "openai/gpt-4o"
      // prompt with jinja syntax inside here. with double curly braces for variables.
      // make sure to include: \{\{ ctx.output_format \}\} in the prompt, which prints the output schema instructions so the LLM returns the output in the correct format (json or string, etc.). DO NOT write the output schema manually.
      prompt #"
        
      "#
    }

    <LLMClients>
      You can use any of the following:
      - openai/gpt-4o
      - openai/gpt-4o-mini
      - anthropic/claude-3-5-sonnet-latest (note the "3-5")
      - anthropic/claude-3-5-haiku-latest
    </LLMClients>

    <Prompt>
      When writing the prompt:
      1. Make sure to include the input in the prompt (even if it's an image) using {{ "{{ input }}" }}
      2. Make sure to include {{ "{{ ctx.output_format }}" }} in the prompt so the LLM knows how to format the output.
      3. You do not need to specify to "answer in JSON format". Only write in the prompt brief instruction, and any other task-specific things to keep in mind for the task.
      4. Write a {{ "{{ _.role(\"user\") }}" }} tag to indicate where the user's inputs start. So if there's a convo you can write
      #"{{ "{{ _.role(\"user\") }}" }} {{ "{{ some-variable }}" }}#

      DO NOT REPEAT output schema fields in the prompt. They are included with {{ "{{ ctx.output_format }}" }}.
      ```baml
      class TweetAnalysis {
        mainTopic string @description("The primary topic or subject matter of the tweet")
        isSpam bool @description("Whether the tweet appears to be spam")
      }

      function ClassifyTweets(tweets: string[]) -> TweetAnalysis[] {
        client "openai/gpt-4o-mini"
        prompt #"
          Analyze each of the following tweets and classify them:
          {{ "{{ _.role(\"user\") }}" }} {{ "{{ tweets }}" }}

          {{ "{{ ctx.output_format }}" }}
        "#
      }
      ```
    </Prompt>

  </Functions>

  <Usage in other languages>
    You can use BAML in python, typescript, and other languages.

    ```python
    import asyncio
    from baml_client import b // this client is autogenerated
    from baml_client.types import WeatherAPI

    def main():
        # In python, BAML functions are synchronous.
        weather_info = b.UseTool("What's the weather like in San Francisco?")
        print(weather_info)
        assert isinstance(weather_info, WeatherAPI)
        print(f"City: {weather_info.city}")
        print(f"Time of Day: {weather_info.timeOfDay}")

    if __name__ == '__main__':
        main()
    ```

    ```typescript
    import { b } from './baml_client' // this client is autogenerated
    import { WeatherAPI } from './baml_client/types'
    import assert from 'assert'

    const main = async () => {
      const weatherInfo = await b.UseTool("What's the weather like in San Francisco?")
      console.log(weatherInfo)
      assert(weatherInfo instanceof WeatherAPI)
      console.log(`City: ${weatherInfo.city}`)
      console.log(`Time of Day: ${weatherInfo.timeOfDay}`)
        }
    ```

  </Usage>

 Do NOT use numbers as confidence intervals if you need to use them. Prefer an enum with descriptions or literals like "high", "medium", "low".
 Don't add confidence levels to extraction schemas.

 Don't use LLM functions to "validate" any other output. {#You should use @assert for that on each field in the output type. Search the docs for "assert" to see how to use it.#}

 Dedent all declarations.

 Note that the types exported by BAML are pydantic classes in python, and interfaces in Tyepscript, except for primitive types.
	<Overview>
	BAML (Basically, A Made-Up Language) is a domain-specific language for building LLM prompts as functions.
	You can build an agentic workflow with BAML.
	</Overview>

	<Schema>
	// Define output schemas using classes
	class MyObject {
	// Optional string fields use ?
	// @description is optional, but if you include it, it goes after the field.
	name string? @description("The name of the object")

	// Arrays of primitives
	// arrays cannot be optional.
	tags string[]

	// Enums must be declared separately and are optional
	status MyEnum?

	// Union types
	type "success" \| "error"

	// Primitive types
	count int
	enabled bool
	score float

	// nested objects
	nested MyObject2

	// image type
	myImg image

	{#// checks and assertions. Uses jinja syntax inside the parentheses.
	// For a single property use one @
	bar int @assert(between_0_and_10, {{ "{{ this > 0 and this < 10 }}" }}) //this = MyObject.bar value
	quux string
	// assertions for multiple fields use @@ and go at the bottom of the class. Uses jinja syntax inside the parentheses.
	// Do NOT add descriptions after the assertion.
	@@assert(length_limit, {{ "{{ this.quux\|length < this.baz }}" }})#}
	}

	// Enums are declared separately
	enum MyEnum {
	PENDING
	ACTIVE @description("Item is currently active")
	COMPLETE
	}

	// Comments use double slashes
	// Recursive types and inline definitions are not supported

	</Schema>

	<Functions>
	// Functions define inputs, outputs and prompts
	// function name is always PascalCase
	function MyFunction(input: MyObject) -> string {
	client "openai/gpt-4o"
	// prompt with jinja syntax inside here. with double curly braces for variables.
	// make sure to include: \{\{ ctx.output_format \}\} in the prompt, which prints the output schema instructions so the LLM returns the output in the correct format (json or string, etc.). DO NOT write the output schema manually.
	prompt #"

	"#
	}

	<LLMClients>
	You can use any of the following:
	- openai/gpt-4o
	- openai/gpt-4o-mini
	- anthropic/claude-3-5-sonnet-latest (note the "3-5")
	- anthropic/claude-3-5-haiku-latest
	</LLMClients>

	<Prompt>
	When writing the prompt:
	1. Make sure to include the input in the prompt (even if it's an image) using {{ "{{ input }}" }}
	2. Make sure to include {{ "{{ ctx.output_format }}" }} in the prompt so the LLM knows how to format the output.
	3. You do not need to specify to "answer in JSON format". Only write in the prompt brief instruction, and any other task-specific things to keep in mind for the task.
	4. Write a {{ "{{ _.role(\"user\") }}" }} tag to indicate where the user's inputs start. So if there's a convo you can write
	#"{{ "{{ _.role(\"user\") }}" }} {{ "{{ some-variable }}" }}#

	DO NOT REPEAT output schema fields in the prompt. They are included with {{ "{{ ctx.output_format }}" }}.
	```baml
	class TweetAnalysis {
	mainTopic string @description("The primary topic or subject matter of the tweet")
	isSpam bool @description("Whether the tweet appears to be spam")
	}

	function ClassifyTweets(tweets: string[]) -> TweetAnalysis[] {
	client "openai/gpt-4o-mini"
	prompt #"
	Analyze each of the following tweets and classify them:
	{{ "{{ _.role(\"user\") }}" }} {{ "{{ tweets }}" }}

	{{ "{{ ctx.output_format }}" }}
	"#
	}
	```
	</Prompt>

	</Functions>

	<Usage in other languages>
	You can use BAML in python, typescript, and other languages.

	```python
	import asyncio
	from baml_client import b // this client is autogenerated
	from baml_client.types import WeatherAPI

	def main():
	# In python, BAML functions are synchronous.
	weather_info = b.UseTool("What's the weather like in San Francisco?")
	print(weather_info)
	assert isinstance(weather_info, WeatherAPI)
	print(f"City: {weather_info.city}")
	print(f"Time of Day: {weather_info.timeOfDay}")

	if __name__ == '__main__':
	main()
	```

	```typescript
	import { b } from './baml_client' // this client is autogenerated
	import { WeatherAPI } from './baml_client/types'
	import assert from 'assert'

	const main = async () => {
	const weatherInfo = await b.UseTool("What's the weather like in San Francisco?")
	console.log(weatherInfo)
	assert(weatherInfo instanceof WeatherAPI)
	console.log(`City: ${weatherInfo.city}`)
	console.log(`Time of Day: ${weatherInfo.timeOfDay}`)
	}
	```

	</Usage>

	Do NOT use numbers as confidence intervals if you need to use them. Prefer an enum with descriptions or literals like "high", "medium", "low".
	Don't add confidence levels to extraction schemas.

	Don't use LLM functions to "validate" any other output. {#You should use @assert for that on each field in the output type. Search the docs for "assert" to see how to use it.#}

	Dedent all declarations.

	Note that the types exported by BAML are pydantic classes in python, and interfaces in Tyepscript, except for primitive types.
No results found