Created
May 6, 2024 08:55
-
-
Save huseinzol05/ff59996034604d17c1e53074e9adc03f to your computer and use it in GitHub Desktop.
Simple matmul comparison
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "b8432016", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'2.3.0+cu121'" | |
] | |
}, | |
"execution_count": 1, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import torch\n", | |
"\n", | |
"torch.__version__" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "acbdd506", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/home/husein/.local/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", | |
" from .autonotebook import tqdm as notebook_tqdm\n" | |
] | |
} | |
], | |
"source": [ | |
"import time\n", | |
"from hqq.core.quantize import HQQLinear\n", | |
"\n", | |
"dtype = torch.float16\n", | |
"device = 'cuda'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "45874ba9", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"x = torch.randn((1, 32, 768), dtype = dtype, device = device)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "ab03d293", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"linear = torch.nn.Linear(768, 32000, dtype = dtype, device = device)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"id": "047fc44a", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 5.09 ms, sys: 0 ns, total: 5.09 ms\n", | |
"Wall time: 3.85 ms\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"0.0038194656372070312" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"\n", | |
"before = time.time()\n", | |
"for _ in range(10):\n", | |
" linear(x)\n", | |
"time.time() - before" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"id": "b19d626c", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"8 0.0006330013275146484\n", | |
"4 0.0009520053863525391\n", | |
"2 0.0011856555938720703\n", | |
"1 0.001649618148803711\n" | |
] | |
} | |
], | |
"source": [ | |
"from hqq.core.quantize import *\n", | |
"from hqq.core.quantize import HQQLinear\n", | |
"\n", | |
"ints = [8, 4, 2, 1]\n", | |
"\n", | |
"for i in ints:\n", | |
" quant = BaseQuantizeConfig(nbits=i, \n", | |
" group_size=64,\n", | |
" quant_zero=False,\n", | |
" quant_scale=False,\n", | |
" axis=0,\n", | |
" offload_meta=False)\n", | |
" out_module = HQQLinear(\n", | |
" linear,\n", | |
" quant,\n", | |
" compute_dtype=dtype,\n", | |
" device=device,\n", | |
" )\n", | |
" out_module(x)\n", | |
" before = time.time()\n", | |
" for _ in range(10):\n", | |
" out_module(x)\n", | |
" print(i, time.time() - before)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "bd451b00", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "python3.10", | |
"language": "python", | |
"name": "python3.10" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.10.14" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment