ankona · February 2, 2024 21:22 · ankona · Feb 2, 2024 · ankona · Feb 2, 2024
diff --git a/gen_and_resample.py b/gen_and_resample.py
 import pathlib
 import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt


 file_name = pathlib.Path("generated.csv")


 def generate(min_value: float, max_value: float, normal_dy: float, num_timesteps, axis) -> None:
    sample = np.random.randn()
    total_range = max_value - min_value    
    difference = sample * total_range
    last_value = min_value + difference

    with open(str(file_name), "w", encoding="utf-8") as out_fp:
        out_fp.write(f"{last_value}\n")

        for i in range(num_timesteps):
            sample = np.random.randn()
            difference = sample * normal_dy

            spikeprob = np.random.randn()
            is_spike = spikeprob < 0.01
            if is_spike:
                # let's amp up the signal 
                difference *= 2
            
            last_value += difference
            out_fp.write(f"{last_value}\n")

    df = pd.read_csv(str(file_name), header=None, names=["memory"])
    print(df.head())

    axis.set_title("original faux data")
    axis.set_ylabel("memory consumption")
    axis.set_xlabel("time")
    axis.plot(df, linewidth=2, color="magenta", label="original")


 def generate_sin(xmin: float, xmax: float, axis) -> None:
    x = np.linspace(xmin, xmax, 100000)
    y = np.sin(x)

    with open(str(file_name), "w", encoding="utf-8") as out_fp:
        for yval in y:
            out_fp.write(f"{yval}\n")

    df = pd.read_csv(str(file_name), header=None, names=["memory"])
    print(df.head())

    axis.set_title(f"original faux data, sz={df.shape[0]}")
    axis.set_ylabel("memory consumption")
    axis.set_xlabel("time")
    axis.plot(df, linewidth=2, color="magenta", label="original")



 def downsample(factor: float, axis) -> None:
    df = pd.read_csv(str(file_name), header=None, names=["memory"])
    threshold = (df.max() - df.min()).memory / factor

    value = df.iloc[0].memory

    resampled_file = str(file_name).replace(".", f"_resampled_{factor}.")
    with open(resampled_file, "w", encoding="utf-8") as out_fp:
        for row in range(df.shape[0]):
            curr_value = df.iloc[row].memory

            if abs(abs(curr_value) - abs(value)) > threshold:
                out_fp.write(f"{row},{curr_value}\n")
                value = curr_value

    df2 = pd.read_csv(resampled_file, index_col=0)

    axis.set_title(f"RESAMPLED f={factor},sz={df2.shape[0]}")
    axis.set_ylabel("memory consumption")
    axis.set_xlabel("time")
    axis.plot(df2, linewidth=1, alpha=0.8, color="cyan", label="resampled")
    
 if __name__ == "__main__":
    fig, ((ax1, ax2, ax3), (ax4, ax5, ax6), (ax7, ax8, ax9)) = plt.subplots(3, 3)
    fig.set_figheight(12)
    fig.set_figwidth(12)

    generate(1000, 1500, 10, 100_000, ax1)
    # generate_sin(-10, 10, ax1)

    axes = [ax2, ax3, ax4, ax5, ax6, ax7, ax8, ax9]

    for axis, factor in zip(axes, (5, 7, 10, 20, 50, 100, 150, 200)):
        downsample(factor, axis)

    plt.savefig(pathlib.Path(file_name).with_suffix(".png"))
	import pathlib
	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt


	file_name = pathlib.Path("generated.csv")


	def generate(min_value: float, max_value: float, normal_dy: float, num_timesteps, axis) -> None:
	sample = np.random.randn()
	total_range = max_value - min_value
	difference = sample * total_range
	last_value = min_value + difference

	with open(str(file_name), "w", encoding="utf-8") as out_fp:
	out_fp.write(f"{last_value}\n")

	for i in range(num_timesteps):
	sample = np.random.randn()
	difference = sample * normal_dy

	spikeprob = np.random.randn()
	is_spike = spikeprob < 0.01
	if is_spike:
	# let's amp up the signal
	difference *= 2

	last_value += difference
	out_fp.write(f"{last_value}\n")

	df = pd.read_csv(str(file_name), header=None, names=["memory"])
	print(df.head())

	axis.set_title("original faux data")
	axis.set_ylabel("memory consumption")
	axis.set_xlabel("time")
	axis.plot(df, linewidth=2, color="magenta", label="original")


	def generate_sin(xmin: float, xmax: float, axis) -> None:
	x = np.linspace(xmin, xmax, 100000)
	y = np.sin(x)

	with open(str(file_name), "w", encoding="utf-8") as out_fp:
	for yval in y:
	out_fp.write(f"{yval}\n")

	df = pd.read_csv(str(file_name), header=None, names=["memory"])
	print(df.head())

	axis.set_title(f"original faux data, sz={df.shape[0]}")
	axis.set_ylabel("memory consumption")
	axis.set_xlabel("time")
	axis.plot(df, linewidth=2, color="magenta", label="original")



	def downsample(factor: float, axis) -> None:
	df = pd.read_csv(str(file_name), header=None, names=["memory"])
	threshold = (df.max() - df.min()).memory / factor

	value = df.iloc[0].memory

	resampled_file = str(file_name).replace(".", f"_resampled_{factor}.")
	with open(resampled_file, "w", encoding="utf-8") as out_fp:
	for row in range(df.shape[0]):
	curr_value = df.iloc[row].memory

	if abs(abs(curr_value) - abs(value)) > threshold:
	out_fp.write(f"{row},{curr_value}\n")
	value = curr_value

	df2 = pd.read_csv(resampled_file, index_col=0)

	axis.set_title(f"RESAMPLED f={factor},sz={df2.shape[0]}")
	axis.set_ylabel("memory consumption")
	axis.set_xlabel("time")
	axis.plot(df2, linewidth=1, alpha=0.8, color="cyan", label="resampled")

	if __name__ == "__main__":
	fig, ((ax1, ax2, ax3), (ax4, ax5, ax6), (ax7, ax8, ax9)) = plt.subplots(3, 3)
	fig.set_figheight(12)
	fig.set_figwidth(12)

	generate(1000, 1500, 10, 100_000, ax1)
	# generate_sin(-10, 10, ax1)

	axes = [ax2, ax3, ax4, ax5, ax6, ax7, ax8, ax9]

	for axis, factor in zip(axes, (5, 7, 10, 20, 50, 100, 150, 200)):
	downsample(factor, axis)

	plt.savefig(pathlib.Path(file_name).with_suffix(".png"))