Skip to content

Instantly share code, notes, and snippets.

@deanm0000
Last active June 18, 2025 10:55
Show Gist options
  • Save deanm0000/4858676d7c49da435129a2e9114f9f66 to your computer and use it in GitHub Desktop.
Save deanm0000/4858676d7c49da435129a2e9114f9f66 to your computer and use it in GitHub Desktop.
store uuid in polars
import polars as pl
from uuid import UUID, uuid4
CONSTANT = 170141183460469231731687303715884105728
def uuid_to_signed(uuid: UUID | str | int) -> int:
if isinstance(uuid, str):
uuid = UUID(uuid)
if isinstance(uuid, UUID):
uuid = uuid.int
return uuid - CONSTANT
def signed_to_uuid(uuid_int: int) -> UUID:
corrected = uuid_int + CONSTANT
return UUID(int=corrected)
def uuid_to_list(uuid: UUID | str) -> list[int]:
if isinstance(uuid, str):
uuid = UUID(uuid)
return list(uuid.int.to_bytes(16, "little"))
def list_to_uuid(uuid_int: list[int]) -> UUID:
return UUID(int=int.from_bytes(uuid_int, "little"))
def uuid_to_binary(uuid: UUID | str) -> bytes:
if isinstance(uuid, str):
uuid = UUID(uuid)
return uuid.int.to_bytes(16, "little")
def binary_to_uuid(uuid_bytes: bytes) -> UUID:
return UUID(int=int.from_bytes(uuid_bytes, "little"))
n = 100_000
uuid_list = [uuid4() for _ in range(n)]
df = pl.DataFrame(
[
pl.Series("id", [uuid_to_list(x) for x in uuid_list], pl.Array(pl.UInt8, 16)),
pl.Series("id2", [uuid_to_signed(x) for x in uuid_list]),
pl.Series("id3", [uuid_to_binary(x) for x in uuid_list]),
]
)
assert uuid_list == [list_to_uuid(x) for x in df["id"]]
assert uuid_list == [signed_to_uuid(x) for x in df["id2"]]
assert uuid_list == [binary_to_uuid(x) for x in df["id3"]]
print(pl.DataFrame(df['id']).estimated_size('mb'))
print(pl.DataFrame(df['id2']).estimated_size('mb'))
print(pl.DataFrame(df['id3']).estimated_size('mb'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment