>>> def run():
... print("run")
... return ak.from_iter([[1.1, 2.2, 3.3], [], [4.4, 5.5]], highlevel=False)
...
>>> form = ak.forms.ListOffsetForm("i64", ak.forms.NumpyForm("float64"))
>>> layout = LazyListOffsetArray(run, 3, form, ak._backends.numpy.NumpyBackend.instance())
>>> layout.length
3
>>> layout.parameters
{}
>>> print(layout.form.type)
var * float64
>>> layout[0]
run
<NumpyArray dtype='float64' len='3'>[1.1 2.2 3.3]</NumpyArray>
>>> layout[1]
<NumpyArray dtype='float64' len='0'>[]</NumpyArray>
>>> layout[2]
<NumpyArray dtype='float64' len='2'>[4.4 5.5]</NumpyArray>
Last active
October 28, 2024 20:12
-
-
Save jpivarski/98d704d1ce992e2eb154e719fa688c60 to your computer and use it in GitHub Desktop.
Draft of lazy layouts in Awkward v2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import Callable | |
import awkward as ak | |
class LazyMixin: | |
def __init__( | |
self, | |
materialize: Callable[[], ak.contents.Content], | |
length: ak._nplikes.shape.ShapeItem, | |
form: ak.forms.Form, | |
backend: ak._backends.backend.Backend, | |
): | |
self._materialize = materialize | |
self._length = length | |
self._form = form | |
self._backend = backend | |
self._init(form._parameters, backend) | |
self._materialized = None | |
@property | |
def materialized(self): | |
if self._materialized is None: | |
self._materialized = self._materialize() | |
if not isinstance(self, type(self._materialized)): | |
# note: there's probably a better way to find the direct Content subclass than __mro__[2] | |
raise TypeError( | |
f"when the lazy array with form_key {self._form.form_key!r} was materialized, it had class type {type(self._materialized).__name__}, rather than {type(self).__mro__[2].__name__}" | |
) | |
if self._materialized.length != self._length: | |
raise ValueError( | |
f"when the lazy array with form_key {self._form.form_key!r} was materialized, it had length {self._materialized.length}, rather than the expected {self._length}" | |
) | |
if not self._materialized.form.is_equal_to( | |
self._form, all_parameters=True, form_key=False | |
): | |
eoln = "\n" | |
raise ValueError( | |
f"""when the lazy array with form_key {self._form.form_key!r} was materialized, it had form | |
{str(self._materialized.form).replace(eoln, eoln + ' ')} | |
rather than the expected | |
{str(self._form).replace(eoln, eoln + ' ')}""" | |
) | |
if self._materialized.backend != self._backend: | |
raise ValueError( | |
f"when the lazy array with form_key {self._form.form_key!r} was materialized, it had backend {self._materialized.backend!r}, rather than the expected {self._backend!r}" | |
) | |
return self._materialized | |
# every Content has length and form | |
@property | |
def length(self): | |
return self._length | |
@property | |
def form(self): | |
return self._form | |
class LazyNumpyArray(LazyMixin, ak.contents.NumpyArray): | |
# accessing the (private) self._data invokes materialization | |
@property | |
def _data(self): | |
return self.materialized._data | |
# accessing shape, inner_shape, and dtype don't invoke materialization | |
@property | |
def shape(self): | |
return (self.length,) + self.inner_shape | |
@property | |
def inner_shape(self): | |
return self._form.inner_shape | |
@property | |
def dtype(self): | |
return ak.types.primitive_to_dtype(self._form.primitive) | |
class LazyListOffsetArray(LazyMixin, ak.contents.ListOffsetArray): | |
# accessing the (private) self._offsets or self._content invokes materialization | |
@property | |
def _offsets(self): | |
return self.materialized._offsets | |
@property | |
def _content(self): | |
return self.materialized._content |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment