gubenkoved · November 8, 2021 10:58
diff --git a/driver.py b/driver.py
 import json
 import subprocess
 import psutil
 import random

 # uses cpp test program to gather the real data


 def run(alpha, size):
    # protocol is as follows:
    # args:
    #   -s SIZE -- amount of items in the dynamic array
    #   -a GROWTH_RATE -- growth factor
    #   -t TYPE -- dyn array implementation:
    #      1 - custom, tweakable growth rate
    #      2 - std::vector
    #
    # when program is ready to start it sends "ready!" to the STDOUT, and waits
    #   for "go" in the STDIN -- it gives time to measure "before" numbers
    # when program ran to completion, it sends "finished!" to the STDOUT, and waits
    #   for "exit" giving us a time to measure it's process memory after test is done
    args = ['./cpp-tester/simulator.exe', '-s', str(size), '-a', '%0.3f' % alpha]
    print('run with args: %s' % args)
    proc = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)

    def wait_for(expected):
        while True:
            line = proc.stdout.readline()
            line = line.decode('ascii').strip()
            if line == expected:
                break

    proc_info = psutil.Process(proc.pid)

    print('  waiting for ready...')
    wait_for('ready!')

    before = proc_info.memory_full_info()

    print('  sending go...')
    proc.stdin.write('go\n'.encode('ascii'))
    proc.stdin.flush()

    print('  waiting for finished...')
    wait_for('finished!')

    after = proc_info.memory_full_info()

    # send "exit" to the process
    print('  sending exit...')
    proc.stdin.write('exit\n'.encode('ascii'))
    proc.stdin.flush()

    print('  waiting for termination...')
    retcode = proc.wait(timeout=10)

    assert retcode == 0

    return {
        'rss': after.rss - before.rss,
        'uss': after.uss - before.uss,
        'vms': after.vms - before.vms,
        'size': size
    }


 def main():
    data = []
    alpha = 1.1
    simulations_per_alpha = 100
    max_size = 10 ** 8

    while alpha <= 2.0:
        for _ in range(simulations_per_alpha):
            size = random.randint(0, max_size)
            result = run(alpha, size)
            useful_size = size * 4  # we store ints
            overhead_uss_factor = (result['uss'] - useful_size) / useful_size
            overhead_rss_factor = (result['rss'] - useful_size) / useful_size
            overhead_vms_factor = (result['vms'] - useful_size) / useful_size
            data.append({
                'alpha': alpha,
                'size': size,
                'result': result,
                'overhead_uss_factor': overhead_uss_factor,
                'overhead_rss_factor': overhead_rss_factor,
                'overhead_vms_factor': overhead_vms_factor,
            })

        alpha += 0.01

    with open('driver.json', 'w') as f:
        json.dump(data, f)


 if __name__ == '__main__':
    main()
diff --git a/emulator.py b/emulator.py
 import random
 import json
 import os
 import os.path


 def main():
    min_size = 10 ** 2
    max_size = 10 ** 8
    init_array_size = 1
    simulations_per_alpha = 10 ** 4
    data = []

    random.seed(a=os.urandom(16))

    alpha = 1.1
    while alpha < 2:
        alpha += 0.001
        overhead_factors = []

        for _ in range(simulations_per_alpha):
            # useful size
            size = random.randint(min_size, max_size)

            # now calculate what the size of the underlying array would be
            arr_size = init_array_size

            while arr_size < size:
                arr_size *= alpha

            overhead_size = arr_size - size
            overhead_factor = overhead_size / size
            overhead_factors.append(overhead_factor)

        avg_overhead_factor = sum(overhead_factors) / len(overhead_factors)

        print('beta(%0.3f) = %0.5f' % (alpha, avg_overhead_factor))

        data.append({
            'alpha': alpha,
            'beta': avg_overhead_factor,
        })

    with open('emulator.json', 'w') as f:
        json.dump(data, f)


 if __name__ == '__main__':
    main()
diff --git a/model.py b/model.py
 import scipy.integrate as integrate
 import math
 import json


 def beta(alpha):
    max_size = 10 ** 8
    integral_sum, err = integrate.quad(
        lambda n: (math.pow(alpha, math.ceil(math.log(n, alpha))) - n) / n, 1, max_size)
    result = integral_sum / max_size
    print('beta(%0.3f) = %0.5f' % (alpha, result))
    return result


 def main():
    data = []
    alpha = 1.1
    while alpha < 16:
        data.append({'alpha': alpha, 'beta': beta(alpha)})
        alpha += 0.001

    with open('model.json', 'w+') as f:
        json.dump(data, f)


 if __name__ == '__main__':
    main()
diff --git a/simulator.cpp b/simulator.cpp
 #include <algorithm>
 #include <iostream>
 #include <cassert>
 #include <random>
 #include <string>


 template<typename T> class MyDynArray
 {
    long long _capacity;
    double _growthRate;
    T* _data;
    long long _idx;

 public:
    MyDynArray(double growthRate = 2.0, long long initCapacity = 4)
    {
        _growthRate = growthRate;
        _capacity = initCapacity;
        _data = new T[initCapacity];
        _idx = 0;  // free idx
    }

    ~MyDynArray()
    {
        // std::cout << "free data buffer of size " << _capacity << std::endl;
        delete _data;
    }

    long long capacity()
    {
        return _capacity;
    }

    void push_back(T item)
    {
        if (_idx >= _capacity)
        {
            // resize!
            long long newCapacity = _capacity * _growthRate;

            if (newCapacity == _capacity)
                newCapacity += 1;

            // std::cout << "resizing from " << _capacity << " to " << newCapacity << std::endl;

            T* newData = new T[newCapacity];

            for (long long idx = 0; idx < _capacity; idx += 1)
                newData[idx] = _data[idx];

            delete _data; // free original array
            _capacity = newCapacity;
            _data = newData;
        }

        assert(_idx < _capacity);
        _data[_idx] = item;
        _idx += 1;
    }
 };

 std::string getCmdOption(
    char ** begin, char ** end, const std::string & option,
    const std::string & default_val)
 {
    char ** itr = std::find(begin, end, option);
    if (itr != end && ++itr != end)
        return *itr;
    return default_val;
 }

 void pauseAtExit()
 {
    std::cout << "finished!" << std::endl;

    while (true)
    {
        std::cout << "waiting for \"exit\" in STDIN to exit..." << std::endl;

        std::string x;
        std::cin >> x;

        if (x == "exit")
            break;
    }

    std::cout << "done!" << std::endl;
 }

 void waitForGoAhead()
 {
    std::cout << "ready!" << std::endl;

    while (true)
    {
        std::cout << "waiting for \"go\" in STDIN to start..." << std::endl;

        std::string x;
        std::cin >> x;

        if (x == "go")
            break;
    }
 }

 int main(int argc, char **argv)
 {
    long long size = std::stoi(getCmdOption(argv, argv + argc, "-s", "1000"));
    double growthRate = std::stod(getCmdOption(argv, argv + argc, "-a", "2"));
    int type = std::stoi(getCmdOption(argv, argv + argc, "-t", "1"));

    int capacity;

    // let driver collect initial stats
    waitForGoAhead();

    if (type == 1)
    {
        MyDynArray<int> a(growthRate);
        for (int k = 0; k < size; k += 1)
            a.push_back(k);

        pauseAtExit();
    } else if (type == 2)  // standard vector
    {
        std::vector<int> a;
        for (int k = 0; k < size; k += 1)
            a.push_back(k);

        pauseAtExit();
    } else
    {
        std::cout << "invalid type " << type << std::endl;
        return 1;
    }

    return 0;
 }
	import json
	import subprocess
	import psutil
	import random

	# uses cpp test program to gather the real data


	def run(alpha, size):
	# protocol is as follows:
	# args:
	# -s SIZE -- amount of items in the dynamic array
	# -a GROWTH_RATE -- growth factor
	# -t TYPE -- dyn array implementation:
	# 1 - custom, tweakable growth rate
	# 2 - std::vector
	#
	# when program is ready to start it sends "ready!" to the STDOUT, and waits
	# for "go" in the STDIN -- it gives time to measure "before" numbers
	# when program ran to completion, it sends "finished!" to the STDOUT, and waits
	# for "exit" giving us a time to measure it's process memory after test is done
	args = ['./cpp-tester/simulator.exe', '-s', str(size), '-a', '%0.3f' % alpha]
	print('run with args: %s' % args)
	proc = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)

	def wait_for(expected):
	while True:
	line = proc.stdout.readline()
	line = line.decode('ascii').strip()
	if line == expected:
	break

	proc_info = psutil.Process(proc.pid)

	print(' waiting for ready...')
	wait_for('ready!')

	before = proc_info.memory_full_info()

	print(' sending go...')
	proc.stdin.write('go\n'.encode('ascii'))
	proc.stdin.flush()

	print(' waiting for finished...')
	wait_for('finished!')

	after = proc_info.memory_full_info()

	# send "exit" to the process
	print(' sending exit...')
	proc.stdin.write('exit\n'.encode('ascii'))
	proc.stdin.flush()

	print(' waiting for termination...')
	retcode = proc.wait(timeout=10)

	assert retcode == 0

	return {
	'rss': after.rss - before.rss,
	'uss': after.uss - before.uss,
	'vms': after.vms - before.vms,
	'size': size
	}


	def main():
	data = []
	alpha = 1.1
	simulations_per_alpha = 100
	max_size = 10 ** 8

	while alpha <= 2.0:
	for _ in range(simulations_per_alpha):
	size = random.randint(0, max_size)
	result = run(alpha, size)
	useful_size = size * 4 # we store ints
	overhead_uss_factor = (result['uss'] - useful_size) / useful_size
	overhead_rss_factor = (result['rss'] - useful_size) / useful_size
	overhead_vms_factor = (result['vms'] - useful_size) / useful_size
	data.append({
	'alpha': alpha,
	'size': size,
	'result': result,
	'overhead_uss_factor': overhead_uss_factor,
	'overhead_rss_factor': overhead_rss_factor,
	'overhead_vms_factor': overhead_vms_factor,
	})

	alpha += 0.01

	with open('driver.json', 'w') as f:
	json.dump(data, f)


	if __name__ == '__main__':
	main()
	import random
	import json
	import os
	import os.path


	def main():
	min_size = 10 ** 2
	max_size = 10 ** 8
	init_array_size = 1
	simulations_per_alpha = 10 ** 4
	data = []

	random.seed(a=os.urandom(16))

	alpha = 1.1
	while alpha < 2:
	alpha += 0.001
	overhead_factors = []

	for _ in range(simulations_per_alpha):
	# useful size
	size = random.randint(min_size, max_size)

	# now calculate what the size of the underlying array would be
	arr_size = init_array_size

	while arr_size < size:
	arr_size *= alpha

	overhead_size = arr_size - size
	overhead_factor = overhead_size / size
	overhead_factors.append(overhead_factor)

	avg_overhead_factor = sum(overhead_factors) / len(overhead_factors)

	print('beta(%0.3f) = %0.5f' % (alpha, avg_overhead_factor))

	data.append({
	'alpha': alpha,
	'beta': avg_overhead_factor,
	})

	with open('emulator.json', 'w') as f:
	json.dump(data, f)


	if __name__ == '__main__':
	main()
	import scipy.integrate as integrate
	import math
	import json


	def beta(alpha):
	max_size = 10 ** 8
	integral_sum, err = integrate.quad(
	lambda n: (math.pow(alpha, math.ceil(math.log(n, alpha))) - n) / n, 1, max_size)
	result = integral_sum / max_size
	print('beta(%0.3f) = %0.5f' % (alpha, result))
	return result


	def main():
	data = []
	alpha = 1.1
	while alpha < 16:
	data.append({'alpha': alpha, 'beta': beta(alpha)})
	alpha += 0.001

	with open('model.json', 'w+') as f:
	json.dump(data, f)


	if __name__ == '__main__':
	main()
	#include <algorithm>
	#include <iostream>
	#include <cassert>
	#include <random>
	#include <string>


	template<typename T> class MyDynArray
	{
	long long _capacity;
	double _growthRate;
	T* _data;
	long long _idx;

	public:
	MyDynArray(double growthRate = 2.0, long long initCapacity = 4)
	{
	_growthRate = growthRate;
	_capacity = initCapacity;
	_data = new T[initCapacity];
	_idx = 0; // free idx
	}

	~MyDynArray()
	{
	// std::cout << "free data buffer of size " << _capacity << std::endl;
	delete _data;
	}

	long long capacity()
	{
	return _capacity;
	}

	void push_back(T item)
	{
	if (_idx >= _capacity)
	{
	// resize!
	long long newCapacity = _capacity * _growthRate;

	if (newCapacity == _capacity)
	newCapacity += 1;

	// std::cout << "resizing from " << _capacity << " to " << newCapacity << std::endl;

	T* newData = new T[newCapacity];

	for (long long idx = 0; idx < _capacity; idx += 1)
	newData[idx] = _data[idx];

	delete _data; // free original array
	_capacity = newCapacity;
	_data = newData;
	}

	assert(_idx < _capacity);
	_data[_idx] = item;
	_idx += 1;
	}
	};

	std::string getCmdOption(
	char begin, char end, const std::string & option,
	const std::string & default_val)
	{
	char ** itr = std::find(begin, end, option);
	if (itr != end && ++itr != end)
	return *itr;
	return default_val;
	}

	void pauseAtExit()
	{
	std::cout << "finished!" << std::endl;

	while (true)
	{
	std::cout << "waiting for \"exit\" in STDIN to exit..." << std::endl;

	std::string x;
	std::cin >> x;

	if (x == "exit")
	break;
	}

	std::cout << "done!" << std::endl;
	}

	void waitForGoAhead()
	{
	std::cout << "ready!" << std::endl;

	while (true)
	{
	std::cout << "waiting for \"go\" in STDIN to start..." << std::endl;

	std::string x;
	std::cin >> x;

	if (x == "go")
	break;
	}
	}

	int main(int argc, char **argv)
	{
	long long size = std::stoi(getCmdOption(argv, argv + argc, "-s", "1000"));
	double growthRate = std::stod(getCmdOption(argv, argv + argc, "-a", "2"));
	int type = std::stoi(getCmdOption(argv, argv + argc, "-t", "1"));

	int capacity;

	// let driver collect initial stats
	waitForGoAhead();

	if (type == 1)
	{
	MyDynArray<int> a(growthRate);
	for (int k = 0; k < size; k += 1)
	a.push_back(k);

	pauseAtExit();
	} else if (type == 2) // standard vector
	{
	std::vector<int> a;
	for (int k = 0; k < size; k += 1)
	a.push_back(k);

	pauseAtExit();
	} else
	{
	std::cout << "invalid type " << type << std::endl;
	return 1;
	}

	return 0;
	}