Created
June 18, 2024 08:43
-
-
Save nrbnlulu/e983ab23bed5806cff5bb8ba97434d6d to your computer and use it in GitHub Desktop.
Msgspec vs Pydantic v2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from datetime import datetime | |
import json | |
import re | |
import timeit | |
from contextlib import contextmanager | |
from dataclasses import dataclass | |
from typing import Annotated, Any, Callable, Iterator, TypedDict | |
from pydantic.annotated_handlers import GetJsonSchemaHandler | |
from pydantic.json_schema import JsonSchemaValue | |
from pydantic_core import core_schema | |
import mimesis | |
import msgspec | |
import pydantic | |
from pydantic.type_adapter import TypeAdapter | |
provider = mimesis.Generic() | |
class Email(str): | |
__slots__ = () | |
def validate_email(email: str) -> Email: | |
if re.match(r"^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$", email) is None: | |
raise ValueError("Invalid email") | |
return Email(email) | |
def enc_hook(obj: Any) -> Any: # noqa: ANN401 | |
if isinstance(obj, Email): | |
return str(obj) | |
def dec_hook(type_: type, val: object) -> Any: # noqa: ANN401 | |
if type_ is Email and isinstance(val, str): | |
return validate_email(val) | |
class MsgSpecUser(msgspec.Struct): | |
id: str | |
username: str | |
password: str | |
email: Email | |
blog: str | |
first_name: str | |
last_name: str | |
is_active: bool | |
is_staff: bool | |
is_superuser: bool | |
date_joined: datetime | |
last_login: datetime | |
friend: "MsgSpecUser | None" | |
def create_user_msgspec() -> MsgSpecUser: | |
return MsgSpecUser( | |
id=provider.person.identifier(), | |
username=provider.person.username(), | |
password=provider.person.password(), | |
email=validate_email(provider.person.email()), | |
blog=provider.internet.url(), | |
first_name=provider.person.name(), | |
last_name=provider.person.last_name(), | |
is_active=provider.development.boolean(), | |
is_staff=provider.development.boolean(), | |
is_superuser=provider.development.boolean(), | |
date_joined=provider.datetime.datetime(), | |
last_login=provider.datetime.datetime(), | |
friend=create_user_msgspec() if provider.development.boolean() else None | |
) | |
_data = [create_user_msgspec() for _ in range(10000)] | |
msgspec_encoder = msgspec.json.Encoder(enc_hook=enc_hook) | |
data_raw = msgspec_encoder.encode(_data) | |
class _PydanticEmailAnnot: | |
@classmethod | |
def __get_pydantic_core_schema__( | |
cls, | |
_source_type: Any, | |
_handler: Callable[[Any], core_schema.CoreSchema], | |
) -> core_schema.CoreSchema: | |
""" | |
We return a pydantic_core.CoreSchema that behaves in the following ways: | |
* ints will be parsed as `ThirdPartyType` instances with the int as the x attribute | |
* `ThirdPartyType` instances will be parsed as `ThirdPartyType` instances without any changes | |
* Nothing else will pass validation | |
* Serialization will always return just an int | |
""" | |
from_str_schema = core_schema.chain_schema( | |
[ | |
core_schema.str_schema(), | |
core_schema.no_info_plain_validator_function(validate_email), | |
] | |
) | |
return core_schema.json_or_python_schema( | |
json_schema=from_str_schema, | |
python_schema=core_schema.union_schema( | |
[ | |
# check if it's an instance first before doing any further work | |
core_schema.is_instance_schema(Email), | |
from_str_schema, | |
] | |
), | |
serialization=core_schema.plain_serializer_function_ser_schema( | |
lambda instance: instance | |
), | |
) | |
@classmethod | |
def __get_pydantic_json_schema__( | |
cls, _core_schema: core_schema.CoreSchema, handler: GetJsonSchemaHandler | |
) -> JsonSchemaValue: | |
# Use the same schema that would be used for `int` | |
return handler(core_schema.int_schema()) | |
# We now create an `Annotated` wrapper that we'll use as the annotation for fields on `BaseModel`s, etc. | |
PydanticEmail = Annotated[ | |
Email, _PydanticEmailAnnot | |
] | |
class PydanticUser(pydantic.BaseModel): | |
id: str | |
username: str | |
password: str | |
email: PydanticEmail | |
blog: str | |
first_name: str | |
last_name: str | |
is_active: bool | |
is_staff: bool | |
is_superuser: bool | |
date_joined: datetime | |
last_login: datetime | |
friend: "PydanticUser | None" | |
@dataclass | |
class TimeitResult: | |
task: str | |
seconds: float | None = None | |
@contextmanager | |
def time_it(task: str) -> Iterator[TimeitResult]: | |
start = timeit.default_timer() | |
res = TimeitResult(task=task) | |
yield res | |
end = timeit.default_timer() | |
print(f"{task} took {end - start:1f} seconds") | |
res.seconds = end - start | |
def match_precentage(pydantic: float, msgspec: float) -> str: | |
if pydantic < msgspec: | |
return f"Pydantic is faster by %{((msgspec - pydantic) / pydantic) * 100:1f}" | |
return f"MsgSpec is faster by %{((pydantic - msgspec) / msgspec) * 100:1f}" | |
msgspec_decoder = msgspec.json.Decoder(list[MsgSpecUser], dec_hook=dec_hook) | |
with time_it("msgspec_decode") as msgspec_res: | |
msgspec_data = msgspec_decoder.decode(data_raw) | |
users_ta = TypeAdapter(list[PydanticUser]) | |
with time_it("pydantic_decode") as pydantic_res: | |
pydantic_data = users_ta.validate_json(data_raw) | |
print(f"DECODE: {match_precentage(pydantic_res.seconds, msgspec_res.seconds)}") | |
# ------------ encode ------------ | |
with time_it("msgspec_encode") as msgspec_res: | |
msgspec_data_raw = msgspec_encoder.encode(msgspec_data) | |
with time_it("pydantic_encode") as pydantic_res: | |
pydantic_data_raw = users_ta.dump_json(pydantic_data) | |
print(f"ENCODE: {match_precentage(pydantic_res.seconds, msgspec_res.seconds)}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
results