Last active
February 2, 2024 21:22
-
-
Save ankona/d6978d432ce29a993fbea76bb0ae926c to your computer and use it in GitHub Desktop.
Simple method of resampling an input using diff tracking
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pathlib | |
import numpy as np | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
file_name = pathlib.Path("generated.csv") | |
def generate(min_value: float, max_value: float, normal_dy: float, num_timesteps, axis) -> None: | |
sample = np.random.randn() | |
total_range = max_value - min_value | |
difference = sample * total_range | |
last_value = min_value + difference | |
with open(str(file_name), "w", encoding="utf-8") as out_fp: | |
out_fp.write(f"{last_value}\n") | |
for i in range(num_timesteps): | |
sample = np.random.randn() | |
difference = sample * normal_dy | |
spikeprob = np.random.randn() | |
is_spike = spikeprob < 0.01 | |
if is_spike: | |
# let's amp up the signal | |
difference *= 2 | |
last_value += difference | |
out_fp.write(f"{last_value}\n") | |
df = pd.read_csv(str(file_name), header=None, names=["memory"]) | |
print(df.head()) | |
axis.set_title("original faux data") | |
axis.set_ylabel("memory consumption") | |
axis.set_xlabel("time") | |
axis.plot(df, linewidth=2, color="magenta", label="original") | |
def generate_sin(xmin: float, xmax: float, axis) -> None: | |
x = np.linspace(xmin, xmax, 100000) | |
y = np.sin(x) | |
with open(str(file_name), "w", encoding="utf-8") as out_fp: | |
for yval in y: | |
out_fp.write(f"{yval}\n") | |
df = pd.read_csv(str(file_name), header=None, names=["memory"]) | |
print(df.head()) | |
axis.set_title(f"original faux data, sz={df.shape[0]}") | |
axis.set_ylabel("memory consumption") | |
axis.set_xlabel("time") | |
axis.plot(df, linewidth=2, color="magenta", label="original") | |
def downsample(factor: float, axis) -> None: | |
df = pd.read_csv(str(file_name), header=None, names=["memory"]) | |
threshold = (df.max() - df.min()).memory / factor | |
value = df.iloc[0].memory | |
resampled_file = str(file_name).replace(".", f"_resampled_{factor}.") | |
with open(resampled_file, "w", encoding="utf-8") as out_fp: | |
for row in range(df.shape[0]): | |
curr_value = df.iloc[row].memory | |
if abs(abs(curr_value) - abs(value)) > threshold: | |
out_fp.write(f"{row},{curr_value}\n") | |
value = curr_value | |
df2 = pd.read_csv(resampled_file, index_col=0) | |
axis.set_title(f"RESAMPLED f={factor},sz={df2.shape[0]}") | |
axis.set_ylabel("memory consumption") | |
axis.set_xlabel("time") | |
axis.plot(df2, linewidth=1, alpha=0.8, color="cyan", label="resampled") | |
if __name__ == "__main__": | |
fig, ((ax1, ax2, ax3), (ax4, ax5, ax6), (ax7, ax8, ax9)) = plt.subplots(3, 3) | |
fig.set_figheight(12) | |
fig.set_figwidth(12) | |
generate(1000, 1500, 10, 100_000, ax1) | |
# generate_sin(-10, 10, ax1) | |
axes = [ax2, ax3, ax4, ax5, ax6, ax7, ax8, ax9] | |
for axis, factor in zip(axes, (5, 7, 10, 20, 50, 100, 150, 200)): | |
downsample(factor, axis) | |
plt.savefig(pathlib.Path(file_name).with_suffix(".png")) | |
Author
ankona
commented
Feb 2, 2024
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment