chris124567 · April 18, 2022 17:29 · chris124567 · Apr 17, 2022
diff --git a/benchmark.cpp b/benchmark.cpp
 #include <benchmark/benchmark.h>

 #include <eigen3/Eigen/Dense>
 #include <iostream>

 using Eigen::MatrixXi;

 static MatrixXi naive_multiply(const MatrixXi &A, const MatrixXi &B) {
    // result is an n x o matrix
    MatrixXi C = MatrixXi::Zero(A.rows(), B.cols());

    for (int i = 0; i < A.rows(); i++) {
        for (int j = 0; j < B.cols(); j++) {
            // calculate dot product of ith row of A and jth column of B
            int sum = 0;
            for (int k = 0; k < A.cols(); k++) {
                sum += A(i, k) * B(k, j);
            }
            // assign entry in result matrix
            C(i, j) = sum;
        }
    }

    return C;
 }

 static MatrixXi strassen_multiply(const Eigen::MatrixXi &A, const Eigen::MatrixXi &B) {
    const int N = A.rows();
    if (N <= 4) return naive_multiply(A, B);

    const auto &a = A.block(0, 0, N / 2, N / 2);
    const auto &b = A.block(N / 2, N / 2, N / 2, N / 2);
    const auto &c = B.block(0, 0, N / 2, N / 2);
    const auto &d = B.block(N / 2, N / 2, N / 2, N / 2);
    const auto &e = A.block(N / 2, 0, N / 2, N / 2);
    const auto &f = B.block(0, N / 2, N / 2, N / 2);
    const auto &g = B.block(N / 2, 0, N / 2, N / 2);
    const auto &h = A.block(0, N / 2, N / 2, N / 2);

    const auto &M1 = strassen_multiply(a + b, c + d);
    const auto &M2 = strassen_multiply(e + b, c);
    const auto &M3 = strassen_multiply(a, f - d);
    const auto &M4 = strassen_multiply(b, g - c);
    const auto &M5 = strassen_multiply(a + h, d);
    const auto &M6 = strassen_multiply(e - a, c + f);
    const auto &M7 = strassen_multiply(h - b, g + d);

    MatrixXi C(N, N);
    C << M1 + M4 - M5 + M7, M3 + M5,
        M2 + M4, M1 - M2 + M3 + M6;
    return C;
 }

 static void naive_multiply_benchmark(int N, benchmark::State &state) {
    const MatrixXi &A = MatrixXi::Random(N, N);
    const MatrixXi &B = MatrixXi::Random(N, N);
    for (auto _ : state) {
        naive_multiply(A, B);
    }
 }

 static void strassen_multiply_benchmark(int N, benchmark::State &state) {
    const MatrixXi &A = MatrixXi::Random(N, N);
    const MatrixXi &B = MatrixXi::Random(N, N);
    for (auto _ : state) {
        strassen_multiply(A, B);
    }
 }

 static void BM_Naive_2x2(benchmark::State &state) {
    naive_multiply_benchmark(2, state);
 }

 static void BM_Strassen_2x2(benchmark::State &state) {
    strassen_multiply_benchmark(2, state);
 }

 static void BM_Naive_4x4(benchmark::State &state) {
    naive_multiply_benchmark(4, state);
 }

 static void BM_Strassen_4x4(benchmark::State &state) {
    strassen_multiply_benchmark(4, state);
 }

 static void BM_Naive_8x8(benchmark::State &state) {
    naive_multiply_benchmark(8, state);
 }

 static void BM_Strassen_8x8(benchmark::State &state) {
    strassen_multiply_benchmark(8, state);
 }

 static void BM_Naive_16x16(benchmark::State &state) {
    naive_multiply_benchmark(16, state);
 }

 static void BM_Strassen_16x16(benchmark::State &state) {
    strassen_multiply_benchmark(16, state);
 }

 static void BM_Naive_32x32(benchmark::State &state) {
    naive_multiply_benchmark(32, state);
 }

 static void BM_Strassen_32x32(benchmark::State &state) {
    strassen_multiply_benchmark(32, state);
 }

 static void BM_Naive_64x64(benchmark::State &state) {
    naive_multiply_benchmark(64, state);
 }

 static void BM_Strassen_64x64(benchmark::State &state) {
    strassen_multiply_benchmark(64, state);
 }

 static void BM_Naive_128x128(benchmark::State &state) {
    naive_multiply_benchmark(128, state);
 }

 static void BM_Strassen_128x128(benchmark::State &state) {
    strassen_multiply_benchmark(128, state);
 }

 static void BM_Naive_256x256(benchmark::State &state) {
    naive_multiply_benchmark(256, state);
 }

 static void BM_Strassen_256x256(benchmark::State &state) {
    strassen_multiply_benchmark(256, state);
 }

 static void BM_Naive_512x512(benchmark::State &state) {
    naive_multiply_benchmark(512, state);
 }

 static void BM_Strassen_512x512(benchmark::State &state) {
    strassen_multiply_benchmark(512, state);
 }

 static void BM_Naive_1024x1024(benchmark::State &state) {
    naive_multiply_benchmark(1024, state);
 }

 static void BM_Strassen_1024x1024(benchmark::State &state) {
    strassen_multiply_benchmark(1024, state);
 }

 BENCHMARK(BM_Naive_2x2);
 BENCHMARK(BM_Strassen_2x2);

 BENCHMARK(BM_Naive_4x4);
 BENCHMARK(BM_Strassen_4x4);

 BENCHMARK(BM_Naive_8x8);
 BENCHMARK(BM_Strassen_8x8);

 BENCHMARK(BM_Naive_16x16);
 BENCHMARK(BM_Strassen_16x16);

 BENCHMARK(BM_Naive_32x32);
 BENCHMARK(BM_Strassen_32x32);

 BENCHMARK(BM_Naive_64x64);
 BENCHMARK(BM_Strassen_64x64);

 BENCHMARK(BM_Naive_128x128);
 BENCHMARK(BM_Strassen_128x128);

 BENCHMARK(BM_Naive_256x256);
 BENCHMARK(BM_Strassen_256x256);

 BENCHMARK(BM_Naive_512x512);
 BENCHMARK(BM_Strassen_512x512);

 BENCHMARK(BM_Naive_1024x1024);
 BENCHMARK(BM_Strassen_1024x1024);

 BENCHMARK_MAIN();
diff --git a/results.txt b/results.txt
 2022-04-17T14:29:32-04:00
 Running ./benchmark
 Run on (4 X 1600 MHz CPU s)
 CPU Caches:
  L1 Data 32 KiB (x4)
  L1 Instruction 32 KiB (x4)
  L2 Unified 256 KiB (x4)
  L3 Unified 6144 KiB (x1)
 Load Average: 2.29, 1.83, 1.74
 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
 ----------------------------------------------------------------
 Benchmark                      Time             CPU   Iterations
 ----------------------------------------------------------------
 BM_Naive_2x2                4452 ns         4452 ns       157289
 BM_Strassen_2x2             4471 ns         4471 ns       156630
 BM_Naive_4x4               25475 ns        25475 ns        27481
 BM_Strassen_4x4            25493 ns        25493 ns        27462
 BM_Naive_8x8              181657 ns       181656 ns         3852
 BM_Strassen_8x8           224344 ns       223743 ns         3130
 BM_Naive_16x16           1382803 ns      1382779 ns          506
 BM_Strassen_16x16        1685322 ns      1684031 ns          417
 BM_Naive_32x32          10813924 ns     10813774 ns           65
 BM_Strassen_32x32       12131297 ns     12131102 ns           58
 BM_Naive_64x64          85585594 ns     85581655 ns            8
 BM_Strassen_64x64       86165807 ns     86162451 ns            8
 BM_Naive_128x128       680977415 ns    680966594 ns            1
 BM_Strassen_128x128    608205666 ns    608195294 ns            1
 BM_Naive_256x256      5450353167 ns   5448782901 ns            1
 BM_Strassen_256x256   4310790179 ns   4310753972 ns            1
 BM_Naive_512x512      43662225555 ns   43639166429 ns            1
 BM_Strassen_512x512   30088482924 ns   30063769568 ns            1
 BM_Naive_1024x1024    359372792062 ns   359182019060 ns            1
 BM_Strassen_1024x1024 210963958482 ns   210924174384 ns            1
	#include <benchmark/benchmark.h>

	#include <eigen3/Eigen/Dense>
	#include <iostream>

	using Eigen::MatrixXi;

	static MatrixXi naive_multiply(const MatrixXi &A, const MatrixXi &B) {
	// result is an n x o matrix
	MatrixXi C = MatrixXi::Zero(A.rows(), B.cols());

	for (int i = 0; i < A.rows(); i++) {
	for (int j = 0; j < B.cols(); j++) {
	// calculate dot product of ith row of A and jth column of B
	int sum = 0;
	for (int k = 0; k < A.cols(); k++) {
	sum += A(i, k) * B(k, j);
	}
	// assign entry in result matrix
	C(i, j) = sum;
	}
	}

	return C;
	}

	static MatrixXi strassen_multiply(const Eigen::MatrixXi &A, const Eigen::MatrixXi &B) {
	const int N = A.rows();
	if (N <= 4) return naive_multiply(A, B);

	const auto &a = A.block(0, 0, N / 2, N / 2);
	const auto &b = A.block(N / 2, N / 2, N / 2, N / 2);
	const auto &c = B.block(0, 0, N / 2, N / 2);
	const auto &d = B.block(N / 2, N / 2, N / 2, N / 2);
	const auto &e = A.block(N / 2, 0, N / 2, N / 2);
	const auto &f = B.block(0, N / 2, N / 2, N / 2);
	const auto &g = B.block(N / 2, 0, N / 2, N / 2);
	const auto &h = A.block(0, N / 2, N / 2, N / 2);

	const auto &M1 = strassen_multiply(a + b, c + d);
	const auto &M2 = strassen_multiply(e + b, c);
	const auto &M3 = strassen_multiply(a, f - d);
	const auto &M4 = strassen_multiply(b, g - c);
	const auto &M5 = strassen_multiply(a + h, d);
	const auto &M6 = strassen_multiply(e - a, c + f);
	const auto &M7 = strassen_multiply(h - b, g + d);

	MatrixXi C(N, N);
	C << M1 + M4 - M5 + M7, M3 + M5,
	M2 + M4, M1 - M2 + M3 + M6;
	return C;
	}

	static void naive_multiply_benchmark(int N, benchmark::State &state) {
	const MatrixXi &A = MatrixXi::Random(N, N);
	const MatrixXi &B = MatrixXi::Random(N, N);
	for (auto _ : state) {
	naive_multiply(A, B);
	}
	}

	static void strassen_multiply_benchmark(int N, benchmark::State &state) {
	const MatrixXi &A = MatrixXi::Random(N, N);
	const MatrixXi &B = MatrixXi::Random(N, N);
	for (auto _ : state) {
	strassen_multiply(A, B);
	}
	}

	static void BM_Naive_2x2(benchmark::State &state) {
	naive_multiply_benchmark(2, state);
	}

	static void BM_Strassen_2x2(benchmark::State &state) {
	strassen_multiply_benchmark(2, state);
	}

	static void BM_Naive_4x4(benchmark::State &state) {
	naive_multiply_benchmark(4, state);
	}

	static void BM_Strassen_4x4(benchmark::State &state) {
	strassen_multiply_benchmark(4, state);
	}

	static void BM_Naive_8x8(benchmark::State &state) {
	naive_multiply_benchmark(8, state);
	}

	static void BM_Strassen_8x8(benchmark::State &state) {
	strassen_multiply_benchmark(8, state);
	}

	static void BM_Naive_16x16(benchmark::State &state) {
	naive_multiply_benchmark(16, state);
	}

	static void BM_Strassen_16x16(benchmark::State &state) {
	strassen_multiply_benchmark(16, state);
	}

	static void BM_Naive_32x32(benchmark::State &state) {
	naive_multiply_benchmark(32, state);
	}

	static void BM_Strassen_32x32(benchmark::State &state) {
	strassen_multiply_benchmark(32, state);
	}

	static void BM_Naive_64x64(benchmark::State &state) {
	naive_multiply_benchmark(64, state);
	}

	static void BM_Strassen_64x64(benchmark::State &state) {
	strassen_multiply_benchmark(64, state);
	}

	static void BM_Naive_128x128(benchmark::State &state) {
	naive_multiply_benchmark(128, state);
	}

	static void BM_Strassen_128x128(benchmark::State &state) {
	strassen_multiply_benchmark(128, state);
	}

	static void BM_Naive_256x256(benchmark::State &state) {
	naive_multiply_benchmark(256, state);
	}

	static void BM_Strassen_256x256(benchmark::State &state) {
	strassen_multiply_benchmark(256, state);
	}

	static void BM_Naive_512x512(benchmark::State &state) {
	naive_multiply_benchmark(512, state);
	}

	static void BM_Strassen_512x512(benchmark::State &state) {
	strassen_multiply_benchmark(512, state);
	}

	static void BM_Naive_1024x1024(benchmark::State &state) {
	naive_multiply_benchmark(1024, state);
	}

	static void BM_Strassen_1024x1024(benchmark::State &state) {
	strassen_multiply_benchmark(1024, state);
	}

	BENCHMARK(BM_Naive_2x2);
	BENCHMARK(BM_Strassen_2x2);

	BENCHMARK(BM_Naive_4x4);
	BENCHMARK(BM_Strassen_4x4);

	BENCHMARK(BM_Naive_8x8);
	BENCHMARK(BM_Strassen_8x8);

	BENCHMARK(BM_Naive_16x16);
	BENCHMARK(BM_Strassen_16x16);

	BENCHMARK(BM_Naive_32x32);
	BENCHMARK(BM_Strassen_32x32);

	BENCHMARK(BM_Naive_64x64);
	BENCHMARK(BM_Strassen_64x64);

	BENCHMARK(BM_Naive_128x128);
	BENCHMARK(BM_Strassen_128x128);

	BENCHMARK(BM_Naive_256x256);
	BENCHMARK(BM_Strassen_256x256);

	BENCHMARK(BM_Naive_512x512);
	BENCHMARK(BM_Strassen_512x512);

	BENCHMARK(BM_Naive_1024x1024);
	BENCHMARK(BM_Strassen_1024x1024);

	BENCHMARK_MAIN();
	2022-04-17T14:29:32-04:00
	Running ./benchmark
	Run on (4 X 1600 MHz CPU s)
	CPU Caches:
	L1 Data 32 KiB (x4)
	L1 Instruction 32 KiB (x4)
	L2 Unified 256 KiB (x4)
	L3 Unified 6144 KiB (x1)
	Load Average: 2.29, 1.83, 1.74
	*WARNING* CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
	----------------------------------------------------------------
	Benchmark Time CPU Iterations
	----------------------------------------------------------------
	BM_Naive_2x2 4452 ns 4452 ns 157289
	BM_Strassen_2x2 4471 ns 4471 ns 156630
	BM_Naive_4x4 25475 ns 25475 ns 27481
	BM_Strassen_4x4 25493 ns 25493 ns 27462
	BM_Naive_8x8 181657 ns 181656 ns 3852
	BM_Strassen_8x8 224344 ns 223743 ns 3130
	BM_Naive_16x16 1382803 ns 1382779 ns 506
	BM_Strassen_16x16 1685322 ns 1684031 ns 417
	BM_Naive_32x32 10813924 ns 10813774 ns 65
	BM_Strassen_32x32 12131297 ns 12131102 ns 58
	BM_Naive_64x64 85585594 ns 85581655 ns 8
	BM_Strassen_64x64 86165807 ns 86162451 ns 8
	BM_Naive_128x128 680977415 ns 680966594 ns 1
	BM_Strassen_128x128 608205666 ns 608195294 ns 1
	BM_Naive_256x256 5450353167 ns 5448782901 ns 1
	BM_Strassen_256x256 4310790179 ns 4310753972 ns 1
	BM_Naive_512x512 43662225555 ns 43639166429 ns 1
	BM_Strassen_512x512 30088482924 ns 30063769568 ns 1
	BM_Naive_1024x1024 359372792062 ns 359182019060 ns 1
	BM_Strassen_1024x1024 210963958482 ns 210924174384 ns 1