Created
August 22, 2025 23:59
-
-
Save 0xatm/d56c936777101c6b7c5a0a23a40621c4 to your computer and use it in GitHub Desktop.
Defining metadata in matched evaluation metadata spec models (unfortunately blows up type checking)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from typing import Optional, Dict, Any, List | |
| from typing_extensions import Self | |
| from pydantic import ( | |
| BaseModel, | |
| ConfigDict, | |
| Field, | |
| model_validator, | |
| ) | |
| # Existing evaluation config models | |
| class ListMatchConfig(BaseModel): | |
| """Configuration for list matching behavior.""" | |
| min_pair_similarity: float = 1.0 | |
| allow_reorder: bool = True | |
| dedupe: bool = True | |
| class FieldEvalMetadata(BaseModel): | |
| """Annotation configuration for a field.""" | |
| comparator: str = "exact" | |
| weight: float = 1.0 | |
| threshold: float = 1.0 | |
| list_match: Optional[ListMatchConfig] = None | |
| normalizer: Optional[str] = None | |
| # Provide evaluation spec | |
| # Dynamically validate that provided metadata matches entity fields | |
| class EvaluationMetadataSpec(BaseModel): | |
| """Base class for evaluation metadata specifications.""" | |
| model_config = ConfigDict(extra="allow") | |
| mapped_to: type[BaseModel] | |
| @model_validator(mode="after") | |
| def assert_same_extra_field_types(self, exclude="mapped_to") -> Self: | |
| if not self.model_extra: | |
| return self | |
| data: Dict[str, Any] = self.model_extra | |
| # Ensure type of all data values is FieldEvalMetadata | |
| if not all( | |
| isinstance(v, FieldEvalMetadata) for k, v in data.items() if k != exclude | |
| ): | |
| raise ValueError( | |
| "All fields in metadata spec must be of type FieldEvalMetadata." | |
| ) | |
| return self | |
| @model_validator(mode="after") | |
| def assert_all_extra_fields_match(self, exclude="corresponding_entity") -> Self: | |
| if not self.model_extra: | |
| return self | |
| data: Dict[str, Any] = self.model_extra | |
| entity_fields = set(self.mapped_to.model_fields.keys()) | |
| metadata_fields = set(data.keys()) - {exclude} | |
| missing_in_entity = metadata_fields - entity_fields | |
| if missing_in_entity: | |
| raise ValueError( | |
| f"Fields {missing_in_entity} in metadata spec do not exist in entity." | |
| ) | |
| missing_in_metadata = entity_fields - metadata_fields | |
| if missing_in_metadata: | |
| raise ValueError( | |
| f"Fields {missing_in_metadata} in entity do not exist in metadata spec." | |
| ) | |
| return self | |
| # Example usage | |
| class PricedListing(BaseModel): | |
| """Sample entity for testing purposes.""" | |
| name: str = Field(..., description="Name of the sample entity") | |
| description: str = Field(..., description="Description of the sample entity") | |
| price: float = Field(..., description="Price of the sample entity") | |
| features: List[str] = Field( | |
| ..., description="List of features of the sample entity" | |
| ) | |
| priced_listing_evaluation_spec = EvaluationMetadataSpec( | |
| mapped_to=PricedListing, | |
| # Type checking shits itself because they're not explicitly declared parameters | |
| name=FieldEvalMetadata(comparator="exact", weight=1.0, threshold=1.0), | |
| description=FieldEvalMetadata(comparator="fuzzy", weight=0.5, threshold=0.8), | |
| price=FieldEvalMetadata(comparator="numeric", weight=1.0, threshold=0.95), | |
| features=FieldEvalMetadata( | |
| comparator="list", | |
| weight=0.7, | |
| threshold=0.9, | |
| list_match=ListMatchConfig( | |
| min_pair_similarity=0.8, allow_reorder=True, dedupe=True | |
| ), | |
| ), | |
| ) | |
| # Access metadata for a specific field | |
| print(priced_listing_evaluation_spec.name.model_dump_json(indent=2)) | |
| # { | |
| # "comparator": "exact", | |
| # "weight": 1.0, | |
| # "threshold": 1.0, | |
| # "list_match": null, | |
| # "normalizer": null | |
| # } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment