Skip to content

Instantly share code, notes, and snippets.

@VibhuJawa
Last active October 4, 2019 17:08
Show Gist options
  • Save VibhuJawa/d5aa8a21d6e771f606267ff01e14b5f3 to your computer and use it in GitHub Desktop.
Save VibhuJawa/d5aa8a21d6e771f606267ff01e14b5f3 to your computer and use it in GitHub Desktop.
xgboost_dmat_bug
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import xgboost \n",
"import dask_cudf\n",
"import numpy as np\n",
"from dask.distributed import Client, wait\n",
"from dask.dataframe import from_delayed\n",
"import cudf\n",
"import dask\n",
"from dask_cuda import LocalCUDACluster"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"_py-xgboost-mutex 2.0 cpu_0 conda-forge\n",
"dask-xgboost 0.2.0.dev28 cuda10.0py37_0 rapidsai/label/xgboost\n",
"libxgboost 0.90 he1b5a44_2 conda-forge\n",
"py-xgboost 0.90 py37he1b5a44_2 conda-forge\n",
"xgboost 1.0.0-SNAPSHOT pypi_0 pypi\n"
]
}
],
"source": [
"!conda list | grep xgboost"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table style=\"border: 2px solid white;\">\n",
"<tr>\n",
"<td style=\"vertical-align: top; border: 0px solid white\">\n",
"<h3 style=\"text-align: left;\">Client</h3>\n",
"<ul style=\"text-align: left; list-style: none; margin: 0; padding: 0;\">\n",
" <li><b>Scheduler: </b>tcp://127.0.0.1:42762</li>\n",
" <li><b>Dashboard: </b><a href='http://127.0.0.1:8787/status' target='_blank'>http://127.0.0.1:8787/status</a>\n",
"</ul>\n",
"</td>\n",
"<td style=\"vertical-align: top; border: 0px solid white\">\n",
"<h3 style=\"text-align: left;\">Cluster</h3>\n",
"<ul style=\"text-align: left; list-style:none; margin: 0; padding: 0;\">\n",
" <li><b>Workers: </b>8</li>\n",
" <li><b>Cores: </b>8</li>\n",
" <li><b>Memory: </b>0 B</li>\n",
"</ul>\n",
"</td>\n",
"</tr>\n",
"</table>"
],
"text/plain": [
"<Client: 'tcp://127.0.0.1:42762' processes=8 threads=8>"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"### does not occur if i set memory limit to a bug number\n",
"# cluster = LocalCUDACluster(memory_limit = '100 GB')\n",
"\n",
"### memory limit = 0\n",
"## below causes pickling errors \n",
"cluster = LocalCUDACluster(memory_limit = 0)\n",
"\n",
"client = Client(cluster)\n",
"client"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"df = dask_cudf.from_cudf(cudf.DataFrame({'x':np.ones(shape = 1_000_000),'y':np.ones(shape = 1_000_000)}), npartitions=8)\n",
"df = df.persist()\n",
"done = wait(df)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def get_dmatrix_from_persisted_df(df,label_col = 'y',non_label_columns=['x']):\n",
" gpu_dfs = [(gpu_df[[label_col]], gpu_df[non_label_columns]) for gpu_df in df.to_delayed()]\n",
" gpu_dfs = [(gpu_df[0].persist(), gpu_df[1].persist()) for gpu_df in gpu_dfs]\n",
"\n",
" dmat_ls = [dask.delayed(xgboost.DMatrix)(gpu_df[1], gpu_df[0]) for gpu_df in gpu_dfs]\n",
" dmat_ls = [dmat.persist() for dmat in dmat_ls]\n",
" wait(dmat_ls)\n",
" \n",
" return dmat_ls"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"### Hangs here if i use * memory limit = 0 , does not hang if memory_limit is a big number * \n",
"### Does not hang if use *Local cuda_cluster * \n",
"dmat = get_dmatrix_from_persisted_df(df)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Delayed('DMatrix-06ad1d19-7b69-4b4c-9fcb-52ae9465a022'),\n",
" Delayed('DMatrix-eccf012d-5a4b-4950-8e0c-59a7db45f413'),\n",
" Delayed('DMatrix-2ee51dfe-dfe3-4f30-ab8e-cc1cdcb3a7aa'),\n",
" Delayed('DMatrix-d9e102c3-1abb-4b30-a077-f4c00fa5366f'),\n",
" Delayed('DMatrix-37e4f4ac-1d6f-49a7-996d-78a43acb6a7a'),\n",
" Delayed('DMatrix-43e786e2-f872-4c89-b338-6c2bd589e3af'),\n",
" Delayed('DMatrix-e2199448-14bc-4900-89d3-b7342ad8fae1'),\n",
" Delayed('DMatrix-475438b1-f808-4c4a-a884-f332a1005408')]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dmat"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Worker Error trace "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Stack Trace: \n",
"```\n",
"distributed.worker - ERROR - Could not serialize object of type DMatrix.\n",
"```\n",
"\n",
"Full Stack Trace:\n",
"```\n",
"distributed.worker - ERROR - Could not serialize object of type DMatrix. Traceback (most recent call last): File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/distributed/protocol/pickle.py\", line 38, in dumps result = pickle.dumps(x, protocol=pickle.HIGHEST_PROTOCOL) ValueError: ctypes objects containing pointers cannot be pickled During handling of the above exception, another exception occurred: Traceback (most recent call last): File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/distributed/protocol/serialize.py\", line 191, in serialize header, frames = dumps(x, context=context) if wants_context else dumps(x) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/distributed/protocol/serialize.py\", line 58, in pickle_dumps return {\"serializer\": \"pickle\"}, [pickle.dumps(x)] File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/distributed/protocol/pickle.py\", line 51, in dumps return cloudpickle.dumps(x, protocol=pickle.HIGHEST_PROTOCOL) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/cloudpickle/cloudpickle.py\", line 1125, in dumps cp.dump(obj) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/cloudpickle/cloudpickle.py\", line 482, in dump return Pickler.dump(self, obj) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/pickle.py\", line 437, in dump self.save(obj) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/pickle.py\", line 549, in save self.save_reduce(obj=obj, *rv) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/pickle.py\", line 662, in save_reduce save(state) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/pickle.py\", line 504, in save f(self, obj) # Call unbound method with explicit self File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/pickle.py\", line 856, in save_dict self._batch_setitems(obj.items()) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/pickle.py\", line 882, in _batch_setitems save(v) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/pickle.py\", line 524, in save rv = reduce(self.proto) ValueError: ctypes objects containing pointers cannot be pickled Traceback (most recent call last): File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/distributed/worker.py\", line 1632, in transition_executing_done self.send_task_state_to_scheduler(key) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/distributed/worker.py\", line 1788, in send_task_state_to_scheduler if key in self.data or self.actors.get(key): File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/_collections_abc.py\", line 666, in __contains__ self[key] File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/dask_cuda/device_host_file.py\", line 154, in __getitem__ return self.host_buffer[key] File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/zict/buffer.py\", line 70, in __getitem__ return self.slow_to_fast(key) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/zict/buffer.py\", line 57, in slow_to_fast value = self.slow[key] File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/zict/func.py\", line 39, in __getitem__ return self.load(self.d[key]) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/distributed/protocol/serialize.py\", line 468, in deserialize_bytes return deserialize(header, frames) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/distributed/protocol/serialize.py\", line 268, in deserialize return loads(header, frames) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/distributed/protocol/serialize.py\", line 80, in serialization_error_loads raise TypeError(msg) TypeError: Could not serialize object of type DMatrix. Traceback (most recent call last): File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/distributed/protocol/pickle.py\", line 38, in dumps result = pickle.dumps(x, protocol=pickle.HIGHEST_PROTOCOL) ValueError: ctypes objects containing pointers cannot be pickled During handling of the above exception, another exception occurred: Traceback (most recent call last): File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/distributed/protocol/serialize.py\", line 191, in serialize header, frames = dumps(x, context=context) if wants_context else dumps(x) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/distributed/protocol/serialize.py\", line 58, in pickle_dumps return {\"serializer\": \"pickle\"}, [pickle.dumps(x)] File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/distributed/protocol/pickle.py\", line 51, in dumps return cloudpickle.dumps(x, protocol=pickle.HIGHEST_PROTOCOL) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/cloudpickle/cloudpickle.py\", line 1125, in dumps cp.dump(obj) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/site-packages/cloudpickle/cloudpickle.py\", line 482, in dump return Pickler.dump(self, obj) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/pickle.py\", line 437, in dump self.save(obj) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/pickle.py\", line 549, in save self.save_reduce(obj=obj, *rv) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/pickle.py\", line 662, in save_reduce save(state) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/pickle.py\", line 504, in save f(self, obj) # Call unbound method with explicit self File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/pickle.py\", line 856, in save_dict self._batch_setitems(obj.items()) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/pickle.py\", line 882, in _batch_setitems save(v) File \"/home/nfs/vjawa/anaconda3/envs/RAPIDS/lib/python3.7/pickle.py\", line 524, in save rv = reduce(self.proto) ValueError: ctypes objects containing pointers cannot be pickled\n",
"```"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment