Last active
October 16, 2021 15:37
-
-
Save Muhammad-Yunus/f1913016c1e82e45a19b02f666c5d9cd to your computer and use it in GitHub Desktop.
Patch file for PoCL 1.7 with CUDA 6.5 backend on ARMv7 (Jetson TK1)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/lib/CL/devices/cuda/pocl-cuda.c b/lib/CL/devices/cuda/pocl-cuda.c | |
index cd9e044..06a9cde 100644 | |
--- a/lib/CL/devices/cuda/pocl-cuda.c | |
+++ b/lib/CL/devices/cuda/pocl-cuda.c | |
@@ -1082,7 +1082,7 @@ pocl_cuda_submit_kernel (CUstream stream, _cl_command_node *cmd, | |
} | |
if (constantMemBytes > constant_mem_size) | |
- POCL_ABORT ("[CUDA] Total constant buffer size %u exceeds %lu allocated\n", | |
+ POCL_ABORT ("[CUDA] Total constant buffer size %u exceeds %u allocated\n", | |
constantMemBytes, constant_mem_size); | |
unsigned arg_index = meta->num_args; | |
@@ -1191,9 +1191,9 @@ pocl_cuda_submit_node (_cl_command_node *node, cl_command_queue cq, int locked) | |
result = cuMemHostGetDevicePointer (&dev_ext_event_flag, | |
event_data->ext_event_flag, 0); | |
CUDA_CHECK (result, "cuMemHostGetDevicePointer"); | |
- result = cuStreamWaitValue32 (stream, dev_ext_event_flag, 1, | |
- CU_STREAM_WAIT_VALUE_GEQ); | |
- CUDA_CHECK (result, "cuStreamWaitValue32"); | |
+ //result = cuStreamWaitValue32 (stream, dev_ext_event_flag, 1, | |
+ // CU_STREAM_WAIT_VALUE_GEQ); | |
+ //CUDA_CHECK (result, "cuStreamWaitValue32"); | |
} | |
/* Create and record event for command start if profiling enabled */ | |
@@ -1428,6 +1428,7 @@ pocl_cuda_finalize_command (cl_device_id device, cl_event event) | |
cuCtxSetCurrent (((pocl_cuda_device_data_t *)device->data)->context); | |
result = cuEventSynchronize (event_data->end); | |
CUDA_CHECK (result, "cuEventSynchronize"); | |
+ | |
/* Clean up mapped memory allocations */ | |
if (event->command_type == CL_COMMAND_UNMAP_MEM_OBJECT) | |
@@ -1450,8 +1451,11 @@ pocl_cuda_finalize_command (cl_device_id device, cl_event event) | |
{ | |
#if defined __arm__ | |
/* On ARM with USE_HOST_PTR, perform explict copies back from device */ | |
- cl_kernel kernel = event->command.run.kernel; | |
- pocl_argument *arguments = event->command.run.arguments; | |
+ //cl_kernel kernel = event->command.run.kernel; | |
+ //pocl_argument *arguments = event->command.run.arguments; | |
+ cl_kernel kernel = event->command->command.run.kernel; | |
+ pocl_argument *arguments = event->command->command.run.arguments; | |
+ pocl_kernel_metadata_t *meta = kernel->meta; | |
unsigned i; | |
for (i = 0; i < meta->num_args; i++) | |
{ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Idea : pocl/pocl#600