Created
August 22, 2025 23:58
-
-
Save 0xatm/d5ffa454b5b3aad436b61ad352505d59 to your computer and use it in GitHub Desktop.
Injecting metadata directly into Pydantic Fields, using the .metadata attr
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from typing import Optional, Dict, List | |
| from pydantic import ( | |
| BaseModel, | |
| Field, | |
| create_model, | |
| ) | |
| from pydantic.fields import FieldInfo | |
| # Existing evaluation config models | |
| class ListMatchConfig(BaseModel): | |
| min_pair_similarity: float = 1.0 | |
| allow_reorder: bool = True | |
| dedupe: bool = True | |
| class FieldEvalMetadata(BaseModel): | |
| comparator: str = "exact" | |
| weight: float = 1.0 | |
| threshold: float = 1.0 | |
| list_match: Optional[ListMatchConfig] = None | |
| normalizer: Optional[str] = None | |
| # Inject metadata into an individual Pydantic Field | |
| # Pydantic Field() actually returns a FieldInfo object | |
| def inject_field_with_metadata( | |
| field: FieldInfo, | |
| metadata: FieldEvalMetadata, | |
| ) -> FieldInfo: | |
| field.metadata.append({"evaluation_metadata": metadata}) | |
| return field | |
| # Create a new Pydantic model class definition with injected metadata, based on an existing entity class | |
| # and an evaluation spec that provides metadata for each field | |
| def inject_entity_with_metadata( | |
| entity_class: type[BaseModel], | |
| evaluation_spec: BaseModel, | |
| ) -> type[BaseModel]: | |
| class_copy: type[BaseModel] = create_model( | |
| f"{entity_class.__name__}WithMetadata", | |
| __base__=entity_class, | |
| ) | |
| class_copy_fields: Dict[str, FieldInfo] = class_copy.model_fields.copy() | |
| # Inject metadata into each field | |
| for key, field in class_copy_fields.items(): | |
| if hasattr(evaluation_spec, key): | |
| metadata: FieldEvalMetadata = getattr(evaluation_spec, key) | |
| class_copy_fields[key] = inject_field_with_metadata(field, metadata) | |
| # Or raise error if metadata not found | |
| else: | |
| raise ValueError( | |
| f"Metadata for field '{key}' not found in entity metadata." | |
| ) | |
| return class_copy | |
| # Example usage | |
| class PricedListing(BaseModel): | |
| """Sample entity for testing purposes.""" | |
| name: str = Field(..., description="Name of the sample entity") | |
| description: str = Field(..., description="Description of the sample entity") | |
| price: float = Field(..., description="Price of the sample entity") | |
| features: List[str] = Field( | |
| ..., description="List of features of the sample entity" | |
| ) | |
| class SampleEntityEvaluationMetadata(BaseModel): | |
| # This doesn't have to be a Pydantic model, it could be a dataclass or dict | |
| name: FieldEvalMetadata = FieldEvalMetadata( | |
| comparator="short_text", weight=2.0, threshold=0.9 | |
| ) | |
| description: FieldEvalMetadata = FieldEvalMetadata( | |
| comparator="long_text", weight=2.0, threshold=0.85 | |
| ) | |
| price: FieldEvalMetadata = FieldEvalMetadata( | |
| comparator="numeric", weight=3.0, threshold=1.0 | |
| ) | |
| features: FieldEvalMetadata = FieldEvalMetadata( | |
| comparator="list_strings", | |
| weight=2.0, | |
| threshold=0.9, | |
| list_match=ListMatchConfig( | |
| min_pair_similarity=0.8, allow_reorder=True, dedupe=True | |
| ), | |
| ) | |
| # Creates a new Pydantic model class with metadata injected | |
| SampleAnnotatedEntity = inject_entity_with_metadata( | |
| PricedListing, | |
| SampleEntityEvaluationMetadata(), | |
| ) | |
| print(SampleAnnotatedEntity.model_fields["name"].metadata) | |
| # [{'evaluation_metadata': FieldEvalMetadata(comparator='short_text', weight=2.0, threshold=0.9, list_match=None, normalizer=None)}] | |
| my_listing = PricedListing( | |
| name="Cozy Cottage", | |
| description="A cozy cottage with a beautiful garden.", | |
| price=250000.0, | |
| features=["garden", "fireplace", "2 bedrooms"], | |
| ) | |
| # Convert to annotated version | |
| my_annotated_listing = SampleAnnotatedEntity.model_validate(my_listing.model_dump()) | |
| # When printed, shows the same fields as my_listing | |
| # However, we can now access metadata per field | |
| print(my_annotated_listing.model_dump_json(indent=2)) | |
| # { | |
| # "name": "Cozy Cottage", | |
| # "description": "A cozy cottage with a beautiful garden.", | |
| # "price": 250000.0, | |
| # "features": [ | |
| # "garden", | |
| # "fireplace", | |
| # "2 bedrooms" | |
| # ] | |
| # } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment