This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt | |
index 9a4ee49..137e3ea 100644 | |
--- a/ggml/CMakeLists.txt | |
+++ b/ggml/CMakeLists.txt | |
@@ -337,6 +337,11 @@ set(GGML_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of | |
set(GGML_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files") | |
set(GGML_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files") | |
+foreach(lib "ggml" "ggml-base") | |
+ target_link_libraries(${lib} PUBLIC numa) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[root@lnd-jm02 iscsi-hardlock]# ./test-lse | |
Counters: 17171 48463 25182 21011 24232 24657 20559 15806 22230 42319 32158 16342 33570 29741 26467 19637 24702 26923 | |
Counters: 32542 38140 21447 21856 25708 21327 13062 23763 24566 35316 36268 24727 28855 23722 34471 28493 17219 13948 | |
Counters: 39521 29413 29365 26218 24139 10212 34962 16706 6899 30642 54875 30239 29795 22390 13964 29797 14388 21804 | |
Counters: 39066 24223 10731 19933 41360 19116 17735 38524 24549 34395 22350 21955 28910 27429 33503 20678 24180 16701 | |
Counters: 45116 30269 18649 32161 14482 25578 34041 29304 38479 20491 30742 29352 26589 17246 22371 23332 9264 18094 | |
Counters: 35308 41495 16516 28869 27017 19990 22366 28377 18060 13942 29316 25233 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
From 2eee837216e575abd9e48a30c65161a42ad59117 Mon Sep 17 00:00:00 2001 | |
From: Yibo Cai <[email protected]> | |
Date: Wed, 21 Aug 2024 06:40:48 -0400 | |
Subject: [PATCH 2/2] optimize varint with lookup table | |
--- | |
src/google/protobuf/io/coded_stream.h | 20 ++++++++++++-------- | |
1 file changed, 12 insertions(+), 8 deletions(-) | |
diff --git a/src/google/protobuf/io/coded_stream.h b/src/google/protobuf/io/coded_stream.h |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
From a5239aaf894334f4b4c331f1d40245f5f64a2cd4 Mon Sep 17 00:00:00 2001 | |
From: Yibo Cai <[email protected]> | |
Date: Wed, 21 Aug 2024 06:40:29 -0400 | |
Subject: [PATCH 1/2] add string list and map benchmarks | |
--- | |
benchmarks/benchmark.cc | 65 +++++++++++++++++++++++++++++++++++++ | |
benchmarks/descriptor.proto | 8 +++++ | |
2 files changed, 73 insertions(+) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/cmake/set_arch_flags.cmake b/cmake/set_arch_flags.cmake | |
index 538ddfe..81c40e4 100644 | |
--- a/cmake/set_arch_flags.cmake | |
+++ b/cmake/set_arch_flags.cmake | |
@@ -2,8 +2,8 @@ function(set_arch_flags target arch) | |
message(STATUS "Setting architecture flags for ${arch}") | |
if(arch MATCHES "x86_64") | |
target_compile_options(${target} PRIVATE -mavx2 -mpclmul -mbmi -mlzcnt) | |
- elseif(arch MATCHES "arm") | |
- target_compile_options(${target} PRIVATE -march=armv8-a) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/cmake/set_arch_flags.cmake b/cmake/set_arch_flags.cmake | |
index 538ddfe..6dc7754 100644 | |
--- a/cmake/set_arch_flags.cmake | |
+++ b/cmake/set_arch_flags.cmake | |
@@ -2,8 +2,8 @@ function(set_arch_flags target arch) | |
message(STATUS "Setting architecture flags for ${arch}") | |
if(arch MATCHES "x86_64") | |
target_compile_options(${target} PRIVATE -mavx2 -mpclmul -mbmi -mlzcnt) | |
- elseif(arch MATCHES "arm") | |
- target_compile_options(${target} PRIVATE -march=armv8-a) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/src/cpu/aarch64/jit_uni_reorder.cpp b/src/cpu/aarch64/jit_uni_reorder.cpp | |
index 5c7c6d5..dc55c69 100644 | |
--- a/src/cpu/aarch64/jit_uni_reorder.cpp | |
+++ b/src/cpu/aarch64/jit_uni_reorder.cpp | |
@@ -2680,6 +2680,55 @@ status_t jit_uni_reorder_t::pd_t::create(reorder_pd_t **reorder_pd, | |
return safe_ptr_assign(*reorder_pd, _pd.release()); | |
} | |
+#define MY_REORDER | |
+ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################################################## | |
# profile.py | |
################################################################## | |
import tensorflow as tf | |
import timeit | |
import os | |
n_threads = int(os.getenv('OMP_NUM_THREADS')) | |
if n_threads < 1 or n_threads > 999: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Bluewhale memory latency vs. bandwidth | |
====================================== | |
max bw = 574*8*32 MB/s = 147GB/s | |
bw = 0% | |
------- | |
$ numactl -m0 -N0 /usr/lib/lmbench/bin/lat_mem_rd -P 1 512 4096 | |
"stride=4096 | |
0.00391 1.429 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// tested with g++-10.5, probably okay for other versions as | |
// the code is quite simple, check assembly to make sure | |
// g++ -std=c++11 -O3 -pthread -static bw-test.cc -o bw-test | |
// XXX: it costs about half minute to compile this file | |
#include <cstdlib> | |
#include <iostream> | |
#include <thread> | |
#include <vector> |
NewerOlder