Skip to content

Instantly share code, notes, and snippets.

@snk4tr
Last active March 1, 2019 14:51
Show Gist options
  • Save snk4tr/f047cd20e882198eea55fb74a9e2927b to your computer and use it in GitHub Desktop.
Save snk4tr/f047cd20e882198eea55fb74a9e2927b to your computer and use it in GitHub Desktop.
Benchmark of PyFunctional library in compression with procedural approach when applied to map/reduce on Pandas DataFrame
import pandas as pd
import numpy as np
from itertools import product
from time import time
from functional import seq, pseq
from pipe import *
from typing import Callable
dates = list(range(10_000))
def bench_funcional(method: Callable) -> pd.DataFrame:
t = time()
result = method(dates) \
.map(lambda d: pd.DataFrame(np.ones((1, 10)) * d, columns=list('ABCDEFGHIJ'))) \
.reduce(lambda x, y: pd.concat([x, y], ignore_index=True)) \
.to_pandas(columns=list('ABCDEFGHIJ'))
print(f'Method {method.__class__.__name__} took {round(time() - t, 3)} sec')
return result
def bench_dummy() -> pd.DataFrame:
t = time()
result = pd.DataFrame(columns=list('ABCDEFGHIJ'))
for i in dates:
new_r = pd.DataFrame(np.ones((1, 10)) * i, columns=list('ABCDEFGHIJ'))
result = pd.concat([result, new_r], ignore_index=True)
print(f'Dummy took {round(time() - t, 3)} sec')
return result
def bench_pipe() -> pd.DataFrame:
t = time()
result = (dates
| select(lambda d: pd.DataFrame(np.ones((1, 10)) * d, columns=list('ABCDEFGHIJ')))
| aggregate(lambda x, y: pd.concat([x, y], ignore_index=True)))
print(f'Pipe took {round(time() - t, 3)} sec')
return result
results = [
bench_funcional(seq),
bench_funcional(pseq),
bench_dummy(),
bench_pipe()
]
for first, second in product(results, repeat=2):
assert first.equals(second)
@snk4tr
Copy link
Author

snk4tr commented Mar 1, 2019

My results:

>>> Method Stream took 10.144 sec
>>> Method ParallelStream took 8.947 sec
>>> Dummy took 10.135 sec
>>> Pipe took 10.113 sec

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment