Skip to content

Instantly share code, notes, and snippets.

@jpivarski
Last active October 28, 2024 20:12
Show Gist options
  • Save jpivarski/98d704d1ce992e2eb154e719fa688c60 to your computer and use it in GitHub Desktop.
Save jpivarski/98d704d1ce992e2eb154e719fa688c60 to your computer and use it in GitHub Desktop.
Draft of lazy layouts in Awkward v2
from typing import Callable
import awkward as ak
class LazyMixin:
def __init__(
self,
materialize: Callable[[], ak.contents.Content],
length: ak._nplikes.shape.ShapeItem,
form: ak.forms.Form,
backend: ak._backends.backend.Backend,
):
self._materialize = materialize
self._length = length
self._form = form
self._backend = backend
self._init(form._parameters, backend)
self._materialized = None
@property
def materialized(self):
if self._materialized is None:
self._materialized = self._materialize()
if not isinstance(self, type(self._materialized)):
# note: there's probably a better way to find the direct Content subclass than __mro__[2]
raise TypeError(
f"when the lazy array with form_key {self._form.form_key!r} was materialized, it had class type {type(self._materialized).__name__}, rather than {type(self).__mro__[2].__name__}"
)
if self._materialized.length != self._length:
raise ValueError(
f"when the lazy array with form_key {self._form.form_key!r} was materialized, it had length {self._materialized.length}, rather than the expected {self._length}"
)
if not self._materialized.form.is_equal_to(
self._form, all_parameters=True, form_key=False
):
eoln = "\n"
raise ValueError(
f"""when the lazy array with form_key {self._form.form_key!r} was materialized, it had form
{str(self._materialized.form).replace(eoln, eoln + ' ')}
rather than the expected
{str(self._form).replace(eoln, eoln + ' ')}"""
)
if self._materialized.backend != self._backend:
raise ValueError(
f"when the lazy array with form_key {self._form.form_key!r} was materialized, it had backend {self._materialized.backend!r}, rather than the expected {self._backend!r}"
)
return self._materialized
# every Content has length and form
@property
def length(self):
return self._length
@property
def form(self):
return self._form
class LazyNumpyArray(LazyMixin, ak.contents.NumpyArray):
# accessing the (private) self._data invokes materialization
@property
def _data(self):
return self.materialized._data
# accessing shape, inner_shape, and dtype don't invoke materialization
@property
def shape(self):
return (self.length,) + self.inner_shape
@property
def inner_shape(self):
return self._form.inner_shape
@property
def dtype(self):
return ak.types.primitive_to_dtype(self._form.primitive)
class LazyListOffsetArray(LazyMixin, ak.contents.ListOffsetArray):
# accessing the (private) self._offsets or self._content invokes materialization
@property
def _offsets(self):
return self.materialized._offsets
@property
def _content(self):
return self.materialized._content
>>> def run():
...     print("run")
...     return ak.from_iter([[1.1, 2.2, 3.3], [], [4.4, 5.5]], highlevel=False)
... 
>>> form = ak.forms.ListOffsetForm("i64", ak.forms.NumpyForm("float64"))
>>> layout = LazyListOffsetArray(run, 3, form, ak._backends.numpy.NumpyBackend.instance())
>>> layout.length
3
>>> layout.parameters
{}
>>> print(layout.form.type)
var * float64
>>> layout[0]
run
<NumpyArray dtype='float64' len='3'>[1.1 2.2 3.3]</NumpyArray>
>>> layout[1]
<NumpyArray dtype='float64' len='0'>[]</NumpyArray>
>>> layout[2]
<NumpyArray dtype='float64' len='2'>[4.4 5.5]</NumpyArray>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment