Last active
June 26, 2022 06:54
-
-
Save kaushikcfd/cd7668ac6b68315d588be52c13eb87fd to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "dea29ba5", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import loopy as lp\n", | |
"from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "9c739f94", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Old shape: (19, 19)\n", | |
"New shape: (100,)\n", | |
"#define lid(N) ((int) get_local_id(N))\n", | |
"#define gid(N) ((int) get_group_id(N))\n", | |
"#if __OPENCL_C_VERSION__ < 120\n", | |
"#pragma OPENCL EXTENSION cl_khr_fp64: enable\n", | |
"#endif\n", | |
"\n", | |
"__kernel void __attribute__ ((reqd_work_group_size(1, 1, 1))) loopy_kernel(__global double const *__restrict__ a, __global double *__restrict__ out)\n", | |
"{\n", | |
" double tmp_reindexed[100];\n", | |
"\n", | |
" for (int j = 0; j <= 9; ++j)\n", | |
" for (int i = 0; i <= 9; ++i)\n", | |
" {\n", | |
" tmp_reindexed[((-2 + 2 * i >= 0 && -2 + 2 * j >= 0) ? 5 * 2 * i + (2 * j) / 2 : ((2 * j == 0 && -2 + 2 * i >= 0) ? 5 * 2 * i : ((2 * i == 0 && -2 + 2 * j >= 0) ? (2 * j) / 2 : 0)))] = a[10 * i + j];\n", | |
" out[10 * i + j] = tmp_reindexed[((-2 + 2 * i >= 0 && -2 + 2 * j >= 0) ? 5 * 2 * i + (2 * j) / 2 : ((2 * j == 0 && -2 + 2 * i >= 0) ? 5 * 2 * i : ((2 * i == 0 && -2 + 2 * j >= 0) ? (2 * j) / 2 : 0)))] * tmp_reindexed[((-2 + 2 * i >= 0 && -2 + 2 * j >= 0) ? 5 * 2 * i + (2 * j) / 2 : ((2 * j == 0 && -2 + 2 * i >= 0) ? 5 * 2 * i : ((2 * i == 0 && -2 + 2 * j >= 0) ? (2 * j) / 2 : 0)))];\n", | |
" }\n", | |
"}\n" | |
] | |
} | |
], | |
"source": [ | |
"tunit = lp.make_kernel(\n", | |
" \"{[i, j]: 0<=i,j<10}\",\n", | |
" \"\"\"\n", | |
" <> tmp[2*i, 2*j] = a[i, j]\n", | |
" out[i, j] = tmp[2*i, 2*j] ** 2\n", | |
" \"\"\")\n", | |
"\n", | |
"\n", | |
"tunit = lp.add_dtypes(tunit, {\"a\": \"float64\"})\n", | |
"print(\"Old shape:\", tunit.default_entrypoint.temporary_variables[\"tmp\"].shape)\n", | |
"knl = lp.reindex_using_sehgir_loechner_scheme(tunit.default_entrypoint,\n", | |
" \"tmp\")\n", | |
"print(\"New shape:\", knl.temporary_variables[\"tmp_reindexed\"].shape)\n", | |
"\n", | |
"tunit = tunit.with_kernel(knl)\n", | |
"print(lp.generate_code_v2(tunit).device_code())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "2803e962", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"#define lid(N) ((int) get_local_id(N))\n", | |
"#define gid(N) ((int) get_group_id(N))\n", | |
"#if __OPENCL_C_VERSION__ < 120\n", | |
"#pragma OPENCL EXTENSION cl_khr_fp64: enable\n", | |
"#endif\n", | |
"#define LOOPY_CALL_WITH_INTEGER_TYPES(MACRO_NAME) \\\n", | |
" MACRO_NAME(int8, char) \\\n", | |
" MACRO_NAME(int16, short) \\\n", | |
" MACRO_NAME(int32, int) \\\n", | |
" MACRO_NAME(int64, long)\n", | |
"#define LOOPY_DEFINE_FLOOR_DIV_POS_B(SUFFIX, TYPE) \\\n", | |
" inline TYPE loopy_floor_div_pos_b_##SUFFIX(TYPE a, TYPE b) \\\n", | |
" { \\\n", | |
" if (a<0) \\\n", | |
" a = a - (b-1); \\\n", | |
" return a/b; \\\n", | |
" }\n", | |
"LOOPY_CALL_WITH_INTEGER_TYPES(LOOPY_DEFINE_FLOOR_DIV_POS_B)\n", | |
"#undef LOOPY_DEFINE_FLOOR_DIV_POS_B\n", | |
"#undef LOOPY_CALL_WITH_INTEGER_TYPES\n", | |
"\n", | |
"__kernel void __attribute__ ((reqd_work_group_size(1, 1, 1))) loopy_kernel(__global double const *__restrict__ a, __global double *__restrict__ out)\n", | |
"{\n", | |
" double tmp_reindexed[55];\n", | |
"\n", | |
" for (int j = 0; j <= 9; ++j)\n", | |
" for (int i = 0; i <= j; ++i)\n", | |
" {\n", | |
" tmp_reindexed[((-1 + i >= 0 && -1 + -1 * i + j >= 0) ? (19 * i) / 2 + loopy_floor_div_pos_b_int32(-1 * i * i, 2) + j : ((-1 * i + j == 0 && -1 + i >= 0) ? (21 * i) / 2 + loopy_floor_div_pos_b_int32(-1 * i * i, 2) : ((i == 0 && -1 + j >= 0) ? j : 0)))] = a[10 * i + j];\n", | |
" out[10 * i + j] = tmp_reindexed[((-1 + i >= 0 && -1 + -1 * i + j >= 0) ? (19 * i) / 2 + loopy_floor_div_pos_b_int32(-1 * i * i, 2) + j : ((-1 * i + j == 0 && -1 + i >= 0) ? (21 * i) / 2 + loopy_floor_div_pos_b_int32(-1 * i * i, 2) : ((i == 0 && -1 + j >= 0) ? j : 0)))] * tmp_reindexed[((-1 + i >= 0 && -1 + -1 * i + j >= 0) ? (19 * i) / 2 + loopy_floor_div_pos_b_int32(-1 * i * i, 2) + j : ((-1 * i + j == 0 && -1 + i >= 0) ? (21 * i) / 2 + loopy_floor_div_pos_b_int32(-1 * i * i, 2) : ((i == 0 && -1 + j >= 0) ? j : 0)))];\n", | |
" }\n", | |
"}\n" | |
] | |
} | |
], | |
"source": [ | |
"tunit = lp.make_kernel(\n", | |
" \"{[i, j]: 0<=i<=j<10}\",\n", | |
" \"\"\"\n", | |
" <> tmp[i, j] = a[i, j]\n", | |
" out[i, j] = tmp[i, j] ** 2\n", | |
" \"\"\")\n", | |
"\n", | |
"\n", | |
"tunit = lp.add_dtypes(tunit, {\"a\": \"float64\"})\n", | |
"knl = lp.reindex_using_sehgir_loechner_scheme(tunit.default_entrypoint,\n", | |
" \"tmp\")\n", | |
"tunit = tunit.with_kernel(knl)\n", | |
"print(lp.generate_code_v2(tunit).device_code())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "a0850b1a", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.10.4" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment