Last active
April 18, 2022 17:29
-
-
Save chris124567/c45d46fdf4d922389641cc9f591ae577 to your computer and use it in GitHub Desktop.
MATH214 Final Project: Code to Benchmark various Matrix Multiplication Algorithms
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <benchmark/benchmark.h> | |
#include <eigen3/Eigen/Dense> | |
#include <iostream> | |
using Eigen::MatrixXi; | |
static MatrixXi naive_multiply(const MatrixXi &A, const MatrixXi &B) { | |
// result is an n x o matrix | |
MatrixXi C = MatrixXi::Zero(A.rows(), B.cols()); | |
for (int i = 0; i < A.rows(); i++) { | |
for (int j = 0; j < B.cols(); j++) { | |
// calculate dot product of ith row of A and jth column of B | |
int sum = 0; | |
for (int k = 0; k < A.cols(); k++) { | |
sum += A(i, k) * B(k, j); | |
} | |
// assign entry in result matrix | |
C(i, j) = sum; | |
} | |
} | |
return C; | |
} | |
static MatrixXi strassen_multiply(const Eigen::MatrixXi &A, const Eigen::MatrixXi &B) { | |
const int N = A.rows(); | |
if (N <= 4) return naive_multiply(A, B); | |
const auto &a = A.block(0, 0, N / 2, N / 2); | |
const auto &b = A.block(N / 2, N / 2, N / 2, N / 2); | |
const auto &c = B.block(0, 0, N / 2, N / 2); | |
const auto &d = B.block(N / 2, N / 2, N / 2, N / 2); | |
const auto &e = A.block(N / 2, 0, N / 2, N / 2); | |
const auto &f = B.block(0, N / 2, N / 2, N / 2); | |
const auto &g = B.block(N / 2, 0, N / 2, N / 2); | |
const auto &h = A.block(0, N / 2, N / 2, N / 2); | |
const auto &M1 = strassen_multiply(a + b, c + d); | |
const auto &M2 = strassen_multiply(e + b, c); | |
const auto &M3 = strassen_multiply(a, f - d); | |
const auto &M4 = strassen_multiply(b, g - c); | |
const auto &M5 = strassen_multiply(a + h, d); | |
const auto &M6 = strassen_multiply(e - a, c + f); | |
const auto &M7 = strassen_multiply(h - b, g + d); | |
MatrixXi C(N, N); | |
C << M1 + M4 - M5 + M7, M3 + M5, | |
M2 + M4, M1 - M2 + M3 + M6; | |
return C; | |
} | |
static void naive_multiply_benchmark(int N, benchmark::State &state) { | |
const MatrixXi &A = MatrixXi::Random(N, N); | |
const MatrixXi &B = MatrixXi::Random(N, N); | |
for (auto _ : state) { | |
naive_multiply(A, B); | |
} | |
} | |
static void strassen_multiply_benchmark(int N, benchmark::State &state) { | |
const MatrixXi &A = MatrixXi::Random(N, N); | |
const MatrixXi &B = MatrixXi::Random(N, N); | |
for (auto _ : state) { | |
strassen_multiply(A, B); | |
} | |
} | |
static void BM_Naive_2x2(benchmark::State &state) { | |
naive_multiply_benchmark(2, state); | |
} | |
static void BM_Strassen_2x2(benchmark::State &state) { | |
strassen_multiply_benchmark(2, state); | |
} | |
static void BM_Naive_4x4(benchmark::State &state) { | |
naive_multiply_benchmark(4, state); | |
} | |
static void BM_Strassen_4x4(benchmark::State &state) { | |
strassen_multiply_benchmark(4, state); | |
} | |
static void BM_Naive_8x8(benchmark::State &state) { | |
naive_multiply_benchmark(8, state); | |
} | |
static void BM_Strassen_8x8(benchmark::State &state) { | |
strassen_multiply_benchmark(8, state); | |
} | |
static void BM_Naive_16x16(benchmark::State &state) { | |
naive_multiply_benchmark(16, state); | |
} | |
static void BM_Strassen_16x16(benchmark::State &state) { | |
strassen_multiply_benchmark(16, state); | |
} | |
static void BM_Naive_32x32(benchmark::State &state) { | |
naive_multiply_benchmark(32, state); | |
} | |
static void BM_Strassen_32x32(benchmark::State &state) { | |
strassen_multiply_benchmark(32, state); | |
} | |
static void BM_Naive_64x64(benchmark::State &state) { | |
naive_multiply_benchmark(64, state); | |
} | |
static void BM_Strassen_64x64(benchmark::State &state) { | |
strassen_multiply_benchmark(64, state); | |
} | |
static void BM_Naive_128x128(benchmark::State &state) { | |
naive_multiply_benchmark(128, state); | |
} | |
static void BM_Strassen_128x128(benchmark::State &state) { | |
strassen_multiply_benchmark(128, state); | |
} | |
static void BM_Naive_256x256(benchmark::State &state) { | |
naive_multiply_benchmark(256, state); | |
} | |
static void BM_Strassen_256x256(benchmark::State &state) { | |
strassen_multiply_benchmark(256, state); | |
} | |
static void BM_Naive_512x512(benchmark::State &state) { | |
naive_multiply_benchmark(512, state); | |
} | |
static void BM_Strassen_512x512(benchmark::State &state) { | |
strassen_multiply_benchmark(512, state); | |
} | |
static void BM_Naive_1024x1024(benchmark::State &state) { | |
naive_multiply_benchmark(1024, state); | |
} | |
static void BM_Strassen_1024x1024(benchmark::State &state) { | |
strassen_multiply_benchmark(1024, state); | |
} | |
BENCHMARK(BM_Naive_2x2); | |
BENCHMARK(BM_Strassen_2x2); | |
BENCHMARK(BM_Naive_4x4); | |
BENCHMARK(BM_Strassen_4x4); | |
BENCHMARK(BM_Naive_8x8); | |
BENCHMARK(BM_Strassen_8x8); | |
BENCHMARK(BM_Naive_16x16); | |
BENCHMARK(BM_Strassen_16x16); | |
BENCHMARK(BM_Naive_32x32); | |
BENCHMARK(BM_Strassen_32x32); | |
BENCHMARK(BM_Naive_64x64); | |
BENCHMARK(BM_Strassen_64x64); | |
BENCHMARK(BM_Naive_128x128); | |
BENCHMARK(BM_Strassen_128x128); | |
BENCHMARK(BM_Naive_256x256); | |
BENCHMARK(BM_Strassen_256x256); | |
BENCHMARK(BM_Naive_512x512); | |
BENCHMARK(BM_Strassen_512x512); | |
BENCHMARK(BM_Naive_1024x1024); | |
BENCHMARK(BM_Strassen_1024x1024); | |
BENCHMARK_MAIN(); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
2022-04-17T14:29:32-04:00 | |
Running ./benchmark | |
Run on (4 X 1600 MHz CPU s) | |
CPU Caches: | |
L1 Data 32 KiB (x4) | |
L1 Instruction 32 KiB (x4) | |
L2 Unified 256 KiB (x4) | |
L3 Unified 6144 KiB (x1) | |
Load Average: 2.29, 1.83, 1.74 | |
***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. | |
---------------------------------------------------------------- | |
Benchmark Time CPU Iterations | |
---------------------------------------------------------------- | |
BM_Naive_2x2 4452 ns 4452 ns 157289 | |
BM_Strassen_2x2 4471 ns 4471 ns 156630 | |
BM_Naive_4x4 25475 ns 25475 ns 27481 | |
BM_Strassen_4x4 25493 ns 25493 ns 27462 | |
BM_Naive_8x8 181657 ns 181656 ns 3852 | |
BM_Strassen_8x8 224344 ns 223743 ns 3130 | |
BM_Naive_16x16 1382803 ns 1382779 ns 506 | |
BM_Strassen_16x16 1685322 ns 1684031 ns 417 | |
BM_Naive_32x32 10813924 ns 10813774 ns 65 | |
BM_Strassen_32x32 12131297 ns 12131102 ns 58 | |
BM_Naive_64x64 85585594 ns 85581655 ns 8 | |
BM_Strassen_64x64 86165807 ns 86162451 ns 8 | |
BM_Naive_128x128 680977415 ns 680966594 ns 1 | |
BM_Strassen_128x128 608205666 ns 608195294 ns 1 | |
BM_Naive_256x256 5450353167 ns 5448782901 ns 1 | |
BM_Strassen_256x256 4310790179 ns 4310753972 ns 1 | |
BM_Naive_512x512 43662225555 ns 43639166429 ns 1 | |
BM_Strassen_512x512 30088482924 ns 30063769568 ns 1 | |
BM_Naive_1024x1024 359372792062 ns 359182019060 ns 1 | |
BM_Strassen_1024x1024 210963958482 ns 210924174384 ns 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
README
Introduction
This gist contains code implementing two matrix multiplication algorithms (the naive algorithm and Strassen's algorithm) and code benchmarking them using Google's Benchmark framework for C++.
Compile
Compile with:
g++ benchmark.cpp -o benchmark -lbenchmark -lpthread
Run with:
./benchmark