Skip to content

Instantly share code, notes, and snippets.

@vthorey
Created March 16, 2017 16:11
Show Gist options
  • Save vthorey/c07b8fc0c17024e773c55f3dec53044e to your computer and use it in GitHub Desktop.
Save vthorey/c07b8fc0c17024e773c55f3dec53044e to your computer and use it in GitHub Desktop.
Create hdf5 file
import h5py
import numpy as np
eeg1 = np.arange(500000)
raw1 = np.arange(500000)
eeg2 = np.arange(500000)
raw2 = np.arange(500000)
hypnogram = np.random.randint(0, 5, size=500)
ecg = np.arange(15368)
cool_tensor = np.arange(24).reshape((2, 3, 4))
# Create h5 file
with h5py.File("h5_test.h5", "w") as f:
# Root
f["/"].attrs["start_time"] = "8h35"
f["/"].attrs["type"] = 2
f["/"].attrs["hey"] = "yo"
f["/"].attrs["device"] = "dreem"
# EEG
group_eeg = f.create_group("eeg")
group_eeg.attrs["fs"] = 250
group_channel1 = group_eeg.create_group("/eeg/channel1")
group_channel1.attrs["location"] = "Fp1"
group_channel1.create_dataset("raw", data=raw1, compression="gzip")
dataset = group_channel1.create_dataset("filtered", data=eeg1, compression="gzip")
dataset.attrs['filter_HP'] = 20
group_channel2 = group_eeg.create_group("/eeg/channel2")
group_channel2.attrs["location"] = "Fp2"
group_channel2.create_dataset("raw", data=raw2, compression="gzip")
dataset = group_channel2.create_dataset("filtered", data=eeg2, compression="gzip")
dataset.attrs['filter_HP'] = 20
# ECG
group_ecg = f.create_group("ecg")
group_ecg.attrs["subsampling"] = 3
group_ecg.create_dataset("ecg", data=ecg, compression="gzip")
# hypnogram
dataset = f.create_dataset("hypnogram", data=hypnogram)
dataset.attrs["start_time"] = "8h85"
dataset.attrs["scorer"] = "Philip J. fry"
# cool_matrix
f.create_dataset("tensor", data=cool_tensor)
# Read h5 file
f = h5py.File("h5_test.h5", "r")
f["/eeg/channel1/filtered"][:16]
f["/hypnogram"][-10:]
f["/tensor"][-1:, 2, 1]
""" Note on chunked storage
>>> dset = f.create_dataset("chunked", (1000, 1000), chunks=(100, 50))
the data in dset[0:100,0:50] will be stored together in the file,
as will the data points in range dset[400:500, 100:150].
"""
""" Note on Modification
Once some data is written int a h5file it should not be modified.
New data can be added to a h5 file (e.g. new group, new dataset etc.)
New data can be APPENDED to a dataset on any axis ONLY IF
max_shape has been set to None on that axis.
One should do a reshape before appending any data on any axis.
http://docs.h5py.org/en/latest/high/dataset.html#resizable-datasets
>>> dset = f.create_dataset("unlimited", (10, 10), maxshape=(None, 10))
dset[0:10, 0:10] = np.ones((10, 10))
dset.resize((40, 10))
dset[10:40, 0:10] = np.ones((30, 10))
"""
@vthorey
Copy link
Author

vthorey commented Mar 16, 2017

HDFView to browse through a h5file

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment