ita9naiwa · December 12, 2021 15:16
diff --git a/gistfile1.ipynb b/gistfile1.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "ee9fbdce",
   "metadata": {},
   "outputs": [],
   "source": [
    "def rowwise_norm(arr, norm='mean'):\n",
    "    newmat = []\n",
    "    for i in range(arr.shape[0]):\n",
    "        a = arr[i].astype(np.float32)\n",
    "        newmat.append(a / (1e-10 + a.sum()))\n",
    "    return np.asarray(newmat).astype(np.float32)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "f758b56e",
   "metadata": {},
   "outputs": [],
   "source": [
    "from tqdm.auto import tqdm\n",
    "\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from scipy.sparse import coo_matrix, csr_matrix\n",
    "from implicit import evaluation"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "538195b2",
   "metadata": {},
   "source": [
    "### Data Preparing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "b756a329",
   "metadata": {},
   "outputs": [],
   "source": [
    "ui_mat = pd.read_csv(\"data/ml-1m/ratings.dat\", sep='::', engine='python', encoding='ISO-8859-1', header=None).to_numpy().astype(int)\n",
    "u = ui_mat[:, 0] - 1\n",
    "i = ui_mat[:, 1] - 1\n",
    "r = ui_mat[:, 2]\n",
    "ui_mat = csr_matrix((r, (u, i,)))\n",
    "ui_mat = ui_mat >= 4\n",
    "ui_mat.eliminate_zeros()\n",
    "ui_mat.data[:] = 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "9eefcb4e",
   "metadata": {},
   "outputs": [],
   "source": [
    "tr, te = evaluation.train_test_split(ui_mat, 0.5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "4904684b",
   "metadata": {},
   "outputs": [],
   "source": [
    "n_users, n_items = tr.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "8ba29d2d",
   "metadata": {},
   "outputs": [],
   "source": [
    "genres = pd.read_csv(\"data/ml-1m/movies.dat\", sep='::', engine='python', encoding='ISO-8859-1', header=None)\n",
    "genres.columns = ['id', 'title', 'genre']\n",
    "genres = {x: y.strip().split('|') for (x, y) in  zip(genres['id'], genres['genre'])}\n",
    "\n",
    "unique_genres = set()\n",
    "for i, g in genres.items():\n",
    "    unique_genres |= set(g)\n",
    "genre_map = {g:i for (i, g) in enumerate(unique_genres)}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "bf67112b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['Drama', 'Thriller']"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "genres[i]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "5a786fc0",
   "metadata": {},
   "outputs": [],
   "source": [
    "genre_mat = []\n",
    "for i in range(n_items):\n",
    "    j = i + 1\n",
    "    u = np.zeros(len(unique_genres))\n",
    "    if j in genres:\n",
    "        for k in genres[j]:\n",
    "            u[genre_map[k]] = 1\n",
    "    genre_mat.append(u)\n",
    "genre_mat = np.asarray(genre_mat)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "bb4a8508",
   "metadata": {},
   "outputs": [],
   "source": [
    "num_topics = genre_mat.shape[1]\n",
    "genre_mat = genre_mat.astype(np.int32)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "7d88b012",
   "metadata": {},
   "outputs": [],
   "source": [
    "res = tr * genre_mat\n",
    "user_phi_dist = rowwise_norm(res)\n",
    "genre_csr = csr_matrix(genre_mat)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "55c686bc",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_gain(test_item_list, uid_true, genre_csr, user_pref, alpha=0.05, beta=0.99):\n",
    "    l = len(test_item_list)\n",
    "    num_topics = genre_csr.shape[1]\n",
    "    topic_array = np.zeros_like(num_topics)\n",
    "    rho = np.zeros(num_topics)\n",
    "    tau = np.zeros(num_topics)\n",
    "    gains = []\n",
    "    for k in range(l):\n",
    "        iid = test_item_list[k]\n",
    "        hit = int(iid in uid_true)\n",
    "        p = 1\n",
    "        for c in genre_csr[iid].indices:\n",
    "            P_a_u_i = (1 - hit) * alpha + hit * beta\n",
    "            p *= (1 - P_a_u_i * user_pref[c] * ((1 - alpha) ** tau[c]) * ((1 - beta) ** rho[c]))\n",
    "#             p *= (1 - P_a_u_i * user_pref[c] * np.max(0.0, (1.0 - tau[c] * alpha)) * ((1 - beta) ** rho[c]))\n",
    "            tau[c] += 1\n",
    "            rho[c] += hit\n",
    "        gain = 1.0 - p\n",
    "        gains.append(gain)\n",
    "    return gains\n",
    "\n",
    "def get_ideal_order(test_item_list, uid_true, genre_csr, user_pref):\n",
    "    _test_item_list = np.copy(test_item_list).tolist()\n",
    "\n",
    "    k = len(_test_item_list)\n",
    "    topic_array = np.zeros(num_topics)\n",
    "    ideal_list = []\n",
    "    for i in range(k):\n",
    "        target_item_list = [x for x in _test_item_list if x in uid_true]\n",
    "        if len(target_item_list) == 0:\n",
    "            target_item_list = _test_item_list\n",
    "            \n",
    "        scores = []\n",
    "        for iid in target_item_list:\n",
    "            score = (genre_csr[iid] * user_pref).sum() - (0.0 / k) * (genre_csr[iid] * topic_array).sum()\n",
    "            scores.append(score)\n",
    "            \n",
    "        chosen = np.argmax(scores)\n",
    "        chosen_item = target_item_list[chosen]\n",
    "        ideal_list.append(chosen_item)\n",
    "        topic_array = topic_array + np.asarray(genre_csr[chosen_item].todense()).ravel()\n",
    "        _test_item_list = [x for x in _test_item_list if x != chosen_item]\n",
    "    return ideal_list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "7dd47079",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_ideal_order_fast(test_item_list, uid_true, genre_csr, user_pref):\n",
    "    _test_item_list = np.copy(test_item_list)\n",
    "\n",
    "    k = len(_test_item_list)\n",
    "    topic_array = np.zeros(num_topics)\n",
    "    ideal_list = []\n",
    "    hit = np.array([100 * (x in uid_true) for x in _test_item_list])\n",
    "    score = hit + (genre_csr[_test_item_list] * user_pref)\n",
    "    o = np.argsort(-score)\n",
    "    return _test_item_list[o]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "653b59af",
   "metadata": {},
   "outputs": [],
   "source": [
    "def ab_ndcg(uid, test_item_list, te_mat, genre_csr, user_phi_dist):\n",
    "    K = len(test_item_list)\n",
    "    uid_true = te_mat[uid].indices\n",
    "    gains = get_gain(test_item_list, uid_true, genre_csr, user_phi_dist[uid])\n",
    "    ideal_order = get_ideal_order_fast(test_item_list, uid_true, genre_csr, user_phi_dist[uid])\n",
    "    ideal_gains = get_gain(ideal_order, uid_true, genre_csr, user_phi_dist[uid])\n",
    "    U = np.sum(gains * (1 / np.log2(1 + np.arange(1, 1 + K))))\n",
    "    D = np.sum(ideal_gains * (1 / np.log2(1 + np.arange(1, 1 + K))))\n",
    "    return min(1, U / (1e-10 + D))\n",
    "#     return U"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "1bb7e11d",
   "metadata": {},
   "outputs": [],
   "source": [
    "from implicit.als import AlternatingLeastSquares as ALS\n",
    "from implicit.bpr import BayesianPersonalizedRanking as BPR"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "d1448f0e",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING:root:Intel MKL BLAS detected. Its highly recommend to set the environment variable 'export MKL_NUM_THREADS=1' to disable its internal multithreading\n"
     ]
    }
   ],
   "source": [
    "model = ALS()\n",
    "bpr = BPR()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "b847777d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "aa9a1deb3526421385a95e84ce0ee590",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/15 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b9483b905f09498090bcf571042ba856",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/100 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "model.fit(tr.T * 10, )\n",
    "bpr.fit(tr.T,)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "527fe5ea",
   "metadata": {},
   "outputs": [],
   "source": [
    "n_users, n_items = tr.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "01b0fd48",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c08b02f378ac4827927a2b18455fdca4",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/6040 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "user_target_list = []\n",
    "for u in tqdm(range(n_users)):\n",
    "    a = [x for x in np.random.choice(n_items, 100, replace=False) if x not in tr[u].indices]\n",
    "    a += np.random.choice(te[u].indices, min(3, len(te[u].indices)), replace=False).tolist()\n",
    "    user_target_list.append(a)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c9a3d8fb",
   "metadata": {},
   "source": [
    "## Random Recommendation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "8ac87c68",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "76763aafc95742ca9f2d876df8af46a8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/6040 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from sklearn.utils import shuffle\n",
    "ndcgs = []\n",
    "for uid in tqdm(range(n_users)):\n",
    "    K = 10\n",
    "    rec = shuffle(user_target_list[uid])\n",
    "    ndcg = ab_ndcg(uid, rec, te, genre_csr, user_phi_dist)\n",
    "    ndcgs.append(ndcg)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "c5022d49",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.38296358741994113"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.mean(ndcgs)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "44debbae",
   "metadata": {},
   "source": [
    "## ALS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "bed7001b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f539f2449c31431284e589715c718ea5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/6040 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "0.7467418337430999"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from tqdm.auto import tqdm\n",
    "ndcgs = []\n",
    "for uid in tqdm(range(n_users)):\n",
    "    rec = [x[0] for x in model.rank_items(uid, tr, user_target_list[uid])]\n",
    "    ndcg = ab_ndcg(uid, rec, te, genre_csr, user_phi_dist)\n",
    "    ndcgs.append(ndcg)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "c8af11e3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.7467418337430999"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.mean(ndcgs)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d5d6da52",
   "metadata": {},
   "source": [
    "## BPRMF"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "86fa474e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ba609d3a3c564d1e815fe1b138e75905",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/6040 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from tqdm.auto import tqdm\n",
    "ndcgs = []\n",
    "for uid in tqdm(range(n_users)):\n",
    "    rec = [x[0] for x in bpr.rank_items(uid, tr, user_target_list[uid])]\n",
    "    ndcg = ab_ndcg(uid, rec, te, genre_csr, user_phi_dist)\n",
    "    ndcgs.append(ndcg)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5e4cac4b",
   "metadata": {},
   "outputs": [],
   "source": [
    "np.mean(ndcgs)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"id": "ee9fbdce",
	"metadata": {},
	"outputs": [],
	"source": [
	"def rowwise_norm(arr, norm='mean'):\n",
	" newmat = []\n",
	" for i in range(arr.shape[0]):\n",
	" a = arr[i].astype(np.float32)\n",
	" newmat.append(a / (1e-10 + a.sum()))\n",
	" return np.asarray(newmat).astype(np.float32)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"id": "f758b56e",
	"metadata": {},
	"outputs": [],
	"source": [
	"from tqdm.auto import tqdm\n",
	"\n",
	"\n",
	"import pandas as pd\n",
	"import numpy as np\n",
	"from scipy.sparse import coo_matrix, csr_matrix\n",
	"from implicit import evaluation"
	]
	},
	{
	"cell_type": "markdown",
	"id": "538195b2",
	"metadata": {},
	"source": [
	"### Data Preparing"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"id": "b756a329",
	"metadata": {},
	"outputs": [],
	"source": [
	"ui_mat = pd.read_csv(\"data/ml-1m/ratings.dat\", sep='::', engine='python', encoding='ISO-8859-1', header=None).to_numpy().astype(int)\n",
	"u = ui_mat[:, 0] - 1\n",
	"i = ui_mat[:, 1] - 1\n",
	"r = ui_mat[:, 2]\n",
	"ui_mat = csr_matrix((r, (u, i,)))\n",
	"ui_mat = ui_mat >= 4\n",
	"ui_mat.eliminate_zeros()\n",
	"ui_mat.data[:] = 1"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"id": "9eefcb4e",
	"metadata": {},
	"outputs": [],
	"source": [
	"tr, te = evaluation.train_test_split(ui_mat, 0.5)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"id": "4904684b",
	"metadata": {},
	"outputs": [],
	"source": [
	"n_users, n_items = tr.shape"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"id": "8ba29d2d",
	"metadata": {},
	"outputs": [],
	"source": [
	"genres = pd.read_csv(\"data/ml-1m/movies.dat\", sep='::', engine='python', encoding='ISO-8859-1', header=None)\n",
	"genres.columns = ['id', 'title', 'genre']\n",
	"genres = {x: y.strip().split('\|') for (x, y) in zip(genres['id'], genres['genre'])}\n",
	"\n",
	"unique_genres = set()\n",
	"for i, g in genres.items():\n",
	" unique_genres \|= set(g)\n",
	"genre_map = {g:i for (i, g) in enumerate(unique_genres)}"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"id": "bf67112b",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"['Drama', 'Thriller']"
	]
	},
	"execution_count": 7,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"genres[i]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"id": "5a786fc0",
	"metadata": {},
	"outputs": [],
	"source": [
	"genre_mat = []\n",
	"for i in range(n_items):\n",
	" j = i + 1\n",
	" u = np.zeros(len(unique_genres))\n",
	" if j in genres:\n",
	" for k in genres[j]:\n",
	" u[genre_map[k]] = 1\n",
	" genre_mat.append(u)\n",
	"genre_mat = np.asarray(genre_mat)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"id": "bb4a8508",
	"metadata": {},
	"outputs": [],
	"source": [
	"num_topics = genre_mat.shape[1]\n",
	"genre_mat = genre_mat.astype(np.int32)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"id": "7d88b012",
	"metadata": {},
	"outputs": [],
	"source": [
	"res = tr * genre_mat\n",
	"user_phi_dist = rowwise_norm(res)\n",
	"genre_csr = csr_matrix(genre_mat)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"id": "55c686bc",
	"metadata": {},
	"outputs": [],
	"source": [
	"def get_gain(test_item_list, uid_true, genre_csr, user_pref, alpha=0.05, beta=0.99):\n",
	" l = len(test_item_list)\n",
	" num_topics = genre_csr.shape[1]\n",
	" topic_array = np.zeros_like(num_topics)\n",
	" rho = np.zeros(num_topics)\n",
	" tau = np.zeros(num_topics)\n",
	" gains = []\n",
	" for k in range(l):\n",
	" iid = test_item_list[k]\n",
	" hit = int(iid in uid_true)\n",
	" p = 1\n",
	" for c in genre_csr[iid].indices:\n",
	" P_a_u_i = (1 - hit) * alpha + hit * beta\n",
	" p = (1 - P_a_u_i user_pref[c] * ((1 - alpha) ** tau[c]) * ((1 - beta) ** rho[c]))\n",
	"# p = (1 - P_a_u_i user_pref[c] * np.max(0.0, (1.0 - tau[c] * alpha)) * ((1 - beta) ** rho[c]))\n",
	" tau[c] += 1\n",
	" rho[c] += hit\n",
	" gain = 1.0 - p\n",
	" gains.append(gain)\n",
	" return gains\n",
	"\n",
	"def get_ideal_order(test_item_list, uid_true, genre_csr, user_pref):\n",
	" _test_item_list = np.copy(test_item_list).tolist()\n",
	"\n",
	" k = len(_test_item_list)\n",
	" topic_array = np.zeros(num_topics)\n",
	" ideal_list = []\n",
	" for i in range(k):\n",
	" target_item_list = [x for x in _test_item_list if x in uid_true]\n",
	" if len(target_item_list) == 0:\n",
	" target_item_list = _test_item_list\n",
	" \n",
	" scores = []\n",
	" for iid in target_item_list:\n",
	" score = (genre_csr[iid] * user_pref).sum() - (0.0 / k) * (genre_csr[iid] * topic_array).sum()\n",
	" scores.append(score)\n",
	" \n",
	" chosen = np.argmax(scores)\n",
	" chosen_item = target_item_list[chosen]\n",
	" ideal_list.append(chosen_item)\n",
	" topic_array = topic_array + np.asarray(genre_csr[chosen_item].todense()).ravel()\n",
	" _test_item_list = [x for x in _test_item_list if x != chosen_item]\n",
	" return ideal_list"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"id": "7dd47079",
	"metadata": {},
	"outputs": [],
	"source": [
	"def get_ideal_order_fast(test_item_list, uid_true, genre_csr, user_pref):\n",
	" _test_item_list = np.copy(test_item_list)\n",
	"\n",
	" k = len(_test_item_list)\n",
	" topic_array = np.zeros(num_topics)\n",
	" ideal_list = []\n",
	" hit = np.array([100 * (x in uid_true) for x in _test_item_list])\n",
	" score = hit + (genre_csr[_test_item_list] * user_pref)\n",
	" o = np.argsort(-score)\n",
	" return _test_item_list[o]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 13,
	"id": "653b59af",
	"metadata": {},
	"outputs": [],
	"source": [
	"def ab_ndcg(uid, test_item_list, te_mat, genre_csr, user_phi_dist):\n",
	" K = len(test_item_list)\n",
	" uid_true = te_mat[uid].indices\n",
	" gains = get_gain(test_item_list, uid_true, genre_csr, user_phi_dist[uid])\n",
	" ideal_order = get_ideal_order_fast(test_item_list, uid_true, genre_csr, user_phi_dist[uid])\n",
	" ideal_gains = get_gain(ideal_order, uid_true, genre_csr, user_phi_dist[uid])\n",
	" U = np.sum(gains * (1 / np.log2(1 + np.arange(1, 1 + K))))\n",
	" D = np.sum(ideal_gains * (1 / np.log2(1 + np.arange(1, 1 + K))))\n",
	" return min(1, U / (1e-10 + D))\n",
	"# return U"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 14,
	"id": "1bb7e11d",
	"metadata": {},
	"outputs": [],
	"source": [
	"from implicit.als import AlternatingLeastSquares as ALS\n",
	"from implicit.bpr import BayesianPersonalizedRanking as BPR"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 15,
	"id": "d1448f0e",
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"WARNING:root:Intel MKL BLAS detected. Its highly recommend to set the environment variable 'export MKL_NUM_THREADS=1' to disable its internal multithreading\n"
	]
	}
	],
	"source": [
	"model = ALS()\n",
	"bpr = BPR()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 16,
	"id": "b847777d",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"application/vnd.jupyter.widget-view+json": {
	"model_id": "aa9a1deb3526421385a95e84ce0ee590",
	"version_major": 2,
	"version_minor": 0
	},
	"text/plain": [
	" 0%\| \| 0/15 [00:00<?, ?it/s]"
	]
	},
	"metadata": {},
	"output_type": "display_data"
	},
	{
	"data": {
	"application/vnd.jupyter.widget-view+json": {
	"model_id": "b9483b905f09498090bcf571042ba856",
	"version_major": 2,
	"version_minor": 0
	},
	"text/plain": [
	" 0%\| \| 0/100 [00:00<?, ?it/s]"
	]
	},
	"metadata": {},
	"output_type": "display_data"
	}
	],
	"source": [
	"model.fit(tr.T * 10, )\n",
	"bpr.fit(tr.T,)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 17,
	"id": "527fe5ea",
	"metadata": {},
	"outputs": [],
	"source": [
	"n_users, n_items = tr.shape"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 18,
	"id": "01b0fd48",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"application/vnd.jupyter.widget-view+json": {
	"model_id": "c08b02f378ac4827927a2b18455fdca4",
	"version_major": 2,
	"version_minor": 0
	},
	"text/plain": [
	" 0%\| \| 0/6040 [00:00<?, ?it/s]"
	]
	},
	"metadata": {},
	"output_type": "display_data"
	}
	],
	"source": [
	"user_target_list = []\n",
	"for u in tqdm(range(n_users)):\n",
	" a = [x for x in np.random.choice(n_items, 100, replace=False) if x not in tr[u].indices]\n",
	" a += np.random.choice(te[u].indices, min(3, len(te[u].indices)), replace=False).tolist()\n",
	" user_target_list.append(a)"
	]
	},
	{
	"cell_type": "markdown",
	"id": "c9a3d8fb",
	"metadata": {},
	"source": [
	"## Random Recommendation"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 19,
	"id": "8ac87c68",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"application/vnd.jupyter.widget-view+json": {
	"model_id": "76763aafc95742ca9f2d876df8af46a8",
	"version_major": 2,
	"version_minor": 0
	},
	"text/plain": [
	" 0%\| \| 0/6040 [00:00<?, ?it/s]"
	]
	},
	"metadata": {},
	"output_type": "display_data"
	}
	],
	"source": [
	"from sklearn.utils import shuffle\n",
	"ndcgs = []\n",
	"for uid in tqdm(range(n_users)):\n",
	" K = 10\n",
	" rec = shuffle(user_target_list[uid])\n",
	" ndcg = ab_ndcg(uid, rec, te, genre_csr, user_phi_dist)\n",
	" ndcgs.append(ndcg)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 20,
	"id": "c5022d49",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"0.38296358741994113"
	]
	},
	"execution_count": 20,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"np.mean(ndcgs)"
	]
	},
	{
	"cell_type": "markdown",
	"id": "44debbae",
	"metadata": {},
	"source": [
	"## ALS"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 21,
	"id": "bed7001b",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"application/vnd.jupyter.widget-view+json": {
	"model_id": "f539f2449c31431284e589715c718ea5",
	"version_major": 2,
	"version_minor": 0
	},
	"text/plain": [
	" 0%\| \| 0/6040 [00:00<?, ?it/s]"
	]
	},
	"metadata": {},
	"output_type": "display_data"
	},
	{
	"data": {
	"text/plain": [
	"0.7467418337430999"
	]
	},
	"execution_count": 21,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"from tqdm.auto import tqdm\n",
	"ndcgs = []\n",
	"for uid in tqdm(range(n_users)):\n",
	" rec = [x[0] for x in model.rank_items(uid, tr, user_target_list[uid])]\n",
	" ndcg = ab_ndcg(uid, rec, te, genre_csr, user_phi_dist)\n",
	" ndcgs.append(ndcg)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 22,
	"id": "c8af11e3",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"0.7467418337430999"
	]
	},
	"execution_count": 22,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"np.mean(ndcgs)"
	]
	},
	{
	"cell_type": "markdown",
	"id": "d5d6da52",
	"metadata": {},
	"source": [
	"## BPRMF"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "86fa474e",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"application/vnd.jupyter.widget-view+json": {
	"model_id": "ba609d3a3c564d1e815fe1b138e75905",
	"version_major": 2,
	"version_minor": 0
	},
	"text/plain": [
	" 0%\| \| 0/6040 [00:00<?, ?it/s]"
	]
	},
	"metadata": {},
	"output_type": "display_data"
	}
	],
	"source": [
	"from tqdm.auto import tqdm\n",
	"ndcgs = []\n",
	"for uid in tqdm(range(n_users)):\n",
	" rec = [x[0] for x in bpr.rank_items(uid, tr, user_target_list[uid])]\n",
	" ndcg = ab_ndcg(uid, rec, te, genre_csr, user_phi_dist)\n",
	" ndcgs.append(ndcg)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "5e4cac4b",
	"metadata": {},
	"outputs": [],
	"source": [
	"np.mean(ndcgs)"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3 (ipykernel)",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.9.7"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 5
	}