Skip to content

Instantly share code, notes, and snippets.

@gubenkoved
Last active November 8, 2021 10:58
Show Gist options
  • Save gubenkoved/7cd3f0cb36da56c219ff049e4518a4bd to your computer and use it in GitHub Desktop.
Save gubenkoved/7cd3f0cb36da56c219ff049e4518a4bd to your computer and use it in GitHub Desktop.
Experiments with memory overhead modeling of the Dynamic Array
import json
import subprocess
import psutil
import random
# uses cpp test program to gather the real data
def run(alpha, size):
# protocol is as follows:
# args:
# -s SIZE -- amount of items in the dynamic array
# -a GROWTH_RATE -- growth factor
# -t TYPE -- dyn array implementation:
# 1 - custom, tweakable growth rate
# 2 - std::vector
#
# when program is ready to start it sends "ready!" to the STDOUT, and waits
# for "go" in the STDIN -- it gives time to measure "before" numbers
# when program ran to completion, it sends "finished!" to the STDOUT, and waits
# for "exit" giving us a time to measure it's process memory after test is done
args = ['./cpp-tester/simulator.exe', '-s', str(size), '-a', '%0.3f' % alpha]
print('run with args: %s' % args)
proc = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
def wait_for(expected):
while True:
line = proc.stdout.readline()
line = line.decode('ascii').strip()
if line == expected:
break
proc_info = psutil.Process(proc.pid)
print(' waiting for ready...')
wait_for('ready!')
before = proc_info.memory_full_info()
print(' sending go...')
proc.stdin.write('go\n'.encode('ascii'))
proc.stdin.flush()
print(' waiting for finished...')
wait_for('finished!')
after = proc_info.memory_full_info()
# send "exit" to the process
print(' sending exit...')
proc.stdin.write('exit\n'.encode('ascii'))
proc.stdin.flush()
print(' waiting for termination...')
retcode = proc.wait(timeout=10)
assert retcode == 0
return {
'rss': after.rss - before.rss,
'uss': after.uss - before.uss,
'vms': after.vms - before.vms,
'size': size
}
def main():
data = []
alpha = 1.1
simulations_per_alpha = 100
max_size = 10 ** 8
while alpha <= 2.0:
for _ in range(simulations_per_alpha):
size = random.randint(0, max_size)
result = run(alpha, size)
useful_size = size * 4 # we store ints
overhead_uss_factor = (result['uss'] - useful_size) / useful_size
overhead_rss_factor = (result['rss'] - useful_size) / useful_size
overhead_vms_factor = (result['vms'] - useful_size) / useful_size
data.append({
'alpha': alpha,
'size': size,
'result': result,
'overhead_uss_factor': overhead_uss_factor,
'overhead_rss_factor': overhead_rss_factor,
'overhead_vms_factor': overhead_vms_factor,
})
alpha += 0.01
with open('driver.json', 'w') as f:
json.dump(data, f)
if __name__ == '__main__':
main()
import random
import json
import os
import os.path
def main():
min_size = 10 ** 2
max_size = 10 ** 8
init_array_size = 1
simulations_per_alpha = 10 ** 4
data = []
random.seed(a=os.urandom(16))
alpha = 1.1
while alpha < 2:
alpha += 0.001
overhead_factors = []
for _ in range(simulations_per_alpha):
# useful size
size = random.randint(min_size, max_size)
# now calculate what the size of the underlying array would be
arr_size = init_array_size
while arr_size < size:
arr_size *= alpha
overhead_size = arr_size - size
overhead_factor = overhead_size / size
overhead_factors.append(overhead_factor)
avg_overhead_factor = sum(overhead_factors) / len(overhead_factors)
print('beta(%0.3f) = %0.5f' % (alpha, avg_overhead_factor))
data.append({
'alpha': alpha,
'beta': avg_overhead_factor,
})
with open('emulator.json', 'w') as f:
json.dump(data, f)
if __name__ == '__main__':
main()
import scipy.integrate as integrate
import math
import json
def beta(alpha):
max_size = 10 ** 8
integral_sum, err = integrate.quad(
lambda n: (math.pow(alpha, math.ceil(math.log(n, alpha))) - n) / n, 1, max_size)
result = integral_sum / max_size
print('beta(%0.3f) = %0.5f' % (alpha, result))
return result
def main():
data = []
alpha = 1.1
while alpha < 16:
data.append({'alpha': alpha, 'beta': beta(alpha)})
alpha += 0.001
with open('model.json', 'w+') as f:
json.dump(data, f)
if __name__ == '__main__':
main()
#include <algorithm>
#include <iostream>
#include <cassert>
#include <random>
#include <string>
template<typename T> class MyDynArray
{
long long _capacity;
double _growthRate;
T* _data;
long long _idx;
public:
MyDynArray(double growthRate = 2.0, long long initCapacity = 4)
{
_growthRate = growthRate;
_capacity = initCapacity;
_data = new T[initCapacity];
_idx = 0; // free idx
}
~MyDynArray()
{
// std::cout << "free data buffer of size " << _capacity << std::endl;
delete _data;
}
long long capacity()
{
return _capacity;
}
void push_back(T item)
{
if (_idx >= _capacity)
{
// resize!
long long newCapacity = _capacity * _growthRate;
if (newCapacity == _capacity)
newCapacity += 1;
// std::cout << "resizing from " << _capacity << " to " << newCapacity << std::endl;
T* newData = new T[newCapacity];
for (long long idx = 0; idx < _capacity; idx += 1)
newData[idx] = _data[idx];
delete _data; // free original array
_capacity = newCapacity;
_data = newData;
}
assert(_idx < _capacity);
_data[_idx] = item;
_idx += 1;
}
};
std::string getCmdOption(
char ** begin, char ** end, const std::string & option,
const std::string & default_val)
{
char ** itr = std::find(begin, end, option);
if (itr != end && ++itr != end)
return *itr;
return default_val;
}
void pauseAtExit()
{
std::cout << "finished!" << std::endl;
while (true)
{
std::cout << "waiting for \"exit\" in STDIN to exit..." << std::endl;
std::string x;
std::cin >> x;
if (x == "exit")
break;
}
std::cout << "done!" << std::endl;
}
void waitForGoAhead()
{
std::cout << "ready!" << std::endl;
while (true)
{
std::cout << "waiting for \"go\" in STDIN to start..." << std::endl;
std::string x;
std::cin >> x;
if (x == "go")
break;
}
}
int main(int argc, char **argv)
{
long long size = std::stoi(getCmdOption(argv, argv + argc, "-s", "1000"));
double growthRate = std::stod(getCmdOption(argv, argv + argc, "-a", "2"));
int type = std::stoi(getCmdOption(argv, argv + argc, "-t", "1"));
int capacity;
// let driver collect initial stats
waitForGoAhead();
if (type == 1)
{
MyDynArray<int> a(growthRate);
for (int k = 0; k < size; k += 1)
a.push_back(k);
pauseAtExit();
} else if (type == 2) // standard vector
{
std::vector<int> a;
for (int k = 0; k < size; k += 1)
a.push_back(k);
pauseAtExit();
} else
{
std::cout << "invalid type " << type << std::endl;
return 1;
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment