Last active
October 4, 2019 17:08
-
-
Save VibhuJawa/d5aa8a21d6e771f606267ff01e14b5f3 to your computer and use it in GitHub Desktop.
xgboost_dmat_bug
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import xgboost \n", | |
"import dask_cudf\n", | |
"import numpy as np\n", | |
"from dask.distributed import Client, wait\n", | |
"from dask.dataframe import from_delayed\n", | |
"import cudf\n", | |
"import dask\n", | |
"from dask_cuda import LocalCUDACluster" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"_py-xgboost-mutex 2.0 cpu_0 conda-forge\n", | |
"dask-xgboost 0.2.0.dev28 cuda10.0py37_0 rapidsai/label/xgboost\n", | |
"libxgboost 0.90 he1b5a44_2 conda-forge\n", | |
"py-xgboost 0.90 py37he1b5a44_2 conda-forge\n", | |
"xgboost 1.0.0-SNAPSHOT pypi_0 pypi\n" | |
] | |
} | |
], | |
"source": [ | |
"!conda list | grep xgboost" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table style=\"border: 2px solid white;\">\n", | |
"<tr>\n", | |
"<td style=\"vertical-align: top; border: 0px solid white\">\n", | |
"<h3 style=\"text-align: left;\">Client</h3>\n", | |
"<ul style=\"text-align: left; list-style: none; margin: 0; padding: 0;\">\n", | |
" <li><b>Scheduler: </b>tcp://127.0.0.1:42762</li>\n", | |
" <li><b>Dashboard: </b><a href='http://127.0.0.1:8787/status' target='_blank'>http://127.0.0.1:8787/status</a>\n", | |
"</ul>\n", | |
"</td>\n", | |
"<td style=\"vertical-align: top; border: 0px solid white\">\n", | |
"<h3 style=\"text-align: left;\">Cluster</h3>\n", | |
"<ul style=\"text-align: left; list-style:none; margin: 0; padding: 0;\">\n", | |
" <li><b>Workers: </b>8</li>\n", | |
" <li><b>Cores: </b>8</li>\n", | |
" <li><b>Memory: </b>0 B</li>\n", | |
"</ul>\n", | |
"</td>\n", | |
"</tr>\n", | |
"</table>" | |
], | |
"text/plain": [ | |
"<Client: 'tcp://127.0.0.1:42762' processes=8 threads=8>" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"### does not occur if i set memory limit to a bug number\n", | |
"# cluster = LocalCUDACluster(memory_limit = '100 GB')\n", | |
"\n", | |
"### memory limit = 0\n", | |
"## below causes pickling errors \n", | |
"cluster = LocalCUDACluster(memory_limit = 0)\n", | |
"\n", | |
"client = Client(cluster)\n", | |
"client" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df = dask_cudf.from_cudf(cudf.DataFrame({'x':np.ones(shape = 1_000_000),'y':np.ones(shape = 1_000_000)}), npartitions=8)\n", | |
"df = df.persist()\n", | |
"done = wait(df)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def get_dmatrix_from_persisted_df(df,label_col = 'y',non_label_columns=['x']):\n", | |
" gpu_dfs = [(gpu_df[[label_col]], gpu_df[non_label_columns]) for gpu_df in df.to_delayed()]\n", | |
" gpu_dfs = [(gpu_df[0].persist(), gpu_df[1].persist()) for gpu_df in gpu_dfs]\n", | |
"\n", | |
" dmat_ls = [dask.delayed(xgboost.DMatrix)(gpu_df[1], gpu_df[0]) for gpu_df in gpu_dfs]\n", | |
" dmat_ls = [dmat.persist() for dmat in dmat_ls]\n", | |
" wait(dmat_ls)\n", | |
" \n", | |
" return dmat_ls" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"### Hangs here if i use * memory limit = 0 , does not hang if memory_limit is a big number * \n", | |
"### Does not hang if use *Local cuda_cluster * \n", | |
"dmat = get_dmatrix_from_persisted_df(df)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[Delayed('DMatrix-06ad1d19-7b69-4b4c-9fcb-52ae9465a022'),\n", | |
" Delayed('DMatrix-eccf012d-5a4b-4950-8e0c-59a7db45f413'),\n", | |
" Delayed('DMatrix-2ee51dfe-dfe3-4f30-ab8e-cc1cdcb3a7aa'),\n", | |
" Delayed('DMatrix-d9e102c3-1abb-4b30-a077-f4c00fa5366f'),\n", | |
" Delayed('DMatrix-37e4f4ac-1d6f-49a7-996d-78a43acb6a7a'),\n", | |
" Delayed('DMatrix-43e786e2-f872-4c89-b338-6c2bd589e3af'),\n", | |
" Delayed('DMatrix-e2199448-14bc-4900-89d3-b7342ad8fae1'),\n", | |
" Delayed('DMatrix-475438b1-f808-4c4a-a884-f332a1005408')]" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"dmat" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Worker Error trace " | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Stack Trace: \n", | |
"```\n", | |
"distributed.worker - ERROR - Could not serialize object of type DMatrix.\n", | |
"```\n", | |
"\n", | |
"Full Stack Trace:\n", | |
"```\n", | |
"distributed.worker - ERROR - Could not serialize object of type DMatrix. Traceback (most recent call last): File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/distributed/protocol/pickle.py\", line 38, in dumps result = pickle.dumps(x, protocol=pickle.HIGHEST_PROTOCOL) ValueError: ctypes objects containing pointers cannot be pickled During handling of the above exception, another exception occurred: Traceback (most recent call last): File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/distributed/protocol/serialize.py\", line 191, in serialize header, frames = dumps(x, context=context) if wants_context else dumps(x) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/distributed/protocol/serialize.py\", line 58, in pickle_dumps return {\"serializer\": \"pickle\"}, [pickle.dumps(x)] File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/distributed/protocol/pickle.py\", line 51, in dumps return cloudpickle.dumps(x, protocol=pickle.HIGHEST_PROTOCOL) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/cloudpickle/cloudpickle.py\", line 1125, in dumps cp.dump(obj) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/cloudpickle/cloudpickle.py\", line 482, in dump return Pickler.dump(self, obj) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/pickle.py\", line 437, in dump self.save(obj) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/pickle.py\", line 549, in save self.save_reduce(obj=obj, *rv) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/pickle.py\", line 662, in save_reduce save(state) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/pickle.py\", line 504, in save f(self, obj) # Call unbound method with explicit self File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/pickle.py\", line 856, in save_dict self._batch_setitems(obj.items()) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/pickle.py\", line 882, in _batch_setitems save(v) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/pickle.py\", line 524, in save rv = reduce(self.proto) ValueError: ctypes objects containing pointers cannot be pickled Traceback (most recent call last): File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/distributed/worker.py\", line 1632, in transition_executing_done self.send_task_state_to_scheduler(key) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/distributed/worker.py\", line 1788, in send_task_state_to_scheduler if key in self.data or self.actors.get(key): File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/_collections_abc.py\", line 666, in __contains__ self[key] File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/dask_cuda/device_host_file.py\", line 154, in __getitem__ return self.host_buffer[key] File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/zict/buffer.py\", line 70, in __getitem__ return self.slow_to_fast(key) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/zict/buffer.py\", line 57, in slow_to_fast value = self.slow[key] File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/zict/func.py\", line 39, in __getitem__ return self.load(self.d[key]) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/distributed/protocol/serialize.py\", line 468, in deserialize_bytes return deserialize(header, frames) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/distributed/protocol/serialize.py\", line 268, in deserialize return loads(header, frames) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/distributed/protocol/serialize.py\", line 80, in serialization_error_loads raise TypeError(msg) TypeError: Could not serialize object of type DMatrix. Traceback (most recent call last): File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/distributed/protocol/pickle.py\", line 38, in dumps result = pickle.dumps(x, protocol=pickle.HIGHEST_PROTOCOL) ValueError: ctypes objects containing pointers cannot be pickled During handling of the above exception, another exception occurred: Traceback (most recent call last): File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/distributed/protocol/serialize.py\", line 191, in serialize header, frames = dumps(x, context=context) if wants_context else dumps(x) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/distributed/protocol/serialize.py\", line 58, in pickle_dumps return {\"serializer\": \"pickle\"}, [pickle.dumps(x)] File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/distributed/protocol/pickle.py\", line 51, in dumps return cloudpickle.dumps(x, protocol=pickle.HIGHEST_PROTOCOL) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/cloudpickle/cloudpickle.py\", line 1125, in dumps cp.dump(obj) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/cloudpickle/cloudpickle.py\", line 482, in dump return Pickler.dump(self, obj) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/pickle.py\", line 437, in dump self.save(obj) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/pickle.py\", line 549, in save self.save_reduce(obj=obj, *rv) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/pickle.py\", line 662, in save_reduce save(state) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/pickle.py\", line 504, in save f(self, obj) # Call unbound method with explicit self File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/pickle.py\", line 856, in save_dict self._batch_setitems(obj.items()) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/pickle.py\", line 882, in _batch_setitems save(v) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/pickle.py\", line 524, in save rv = reduce(self.proto) ValueError: ctypes objects containing pointers cannot be pickled\n", | |
"```" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment