Last active
July 16, 2022 02:44
-
-
Save LeaveNhA/323dbea11513623d5a87227c7dc11bff to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "id": "a340d205", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from time import time\n", | |
| "\n", | |
| "import numpy as np\n", | |
| "import pandas as pd\n", | |
| "import plotly.express as px\n", | |
| "from plotly.subplots import make_subplots\n", | |
| "import plotly.graph_objs as go\n", | |
| "from sklearn import svm, datasets\n", | |
| "from sklearn.model_selection import train_test_split\n", | |
| "from mlxtend.feature_selection import SequentialFeatureSelector as sfs\n", | |
| "from mlxtend.feature_selection import ColumnSelector\n", | |
| "from pprint import pprint\n", | |
| "import warnings\n", | |
| "import os\n", | |
| "\n", | |
| "import pandas as pd\n", | |
| "import numpy as np\n", | |
| "import matplotlib.pyplot as plt\n", | |
| "from sklearn.model_selection import train_test_split\n", | |
| "from sklearn.svm import SVC\n", | |
| "from sklearn.multiclass import OneVsRestClassifier\n", | |
| "from sklearn.ensemble import BaggingClassifier\n", | |
| "from sklearn.model_selection import cross_val_score\n", | |
| "from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, mean_squared_error, roc_auc_score, roc_curve, f1_score\n", | |
| "from sklearn import preprocessing\n", | |
| "from sklearn.preprocessing import StandardScaler\n", | |
| "from sklearn.linear_model import LogisticRegression\n", | |
| "from sklearn.model_selection import TimeSeriesSplit\n", | |
| "import seaborn as sns\n", | |
| "import matplotlib\n", | |
| "from multiprocessing import Pool\n", | |
| "from pymonad.maybe import Maybe, Just, Nothing\n", | |
| "from functools import partial\n", | |
| "from joblib import parallel_backend, Parallel, delayed\n", | |
| "from sklearn.pipeline import Pipeline\n", | |
| " \n", | |
| "import defs\n", | |
| "\n", | |
| "from functools import reduce\n", | |
| "\n", | |
| "matplotlib.rcParams['figure.figsize'] = [15, 15]\n", | |
| "\n", | |
| "\n", | |
| "# Uyarıları bastırıyoruz:\n", | |
| "warnings.filterwarnings('ignore')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "id": "3f714cf3", | |
| "metadata": { | |
| "scrolled": true | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>question_id</th>\n", | |
| " <th>bundle_id</th>\n", | |
| " <th>explanation_id</th>\n", | |
| " <th>correct_answer</th>\n", | |
| " <th>part</th>\n", | |
| " <th>tags</th>\n", | |
| " <th>deployed_at</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>q1</td>\n", | |
| " <td>b1</td>\n", | |
| " <td>e1</td>\n", | |
| " <td>b</td>\n", | |
| " <td>1</td>\n", | |
| " <td>1;2;179;181</td>\n", | |
| " <td>1558093217098</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>q2</td>\n", | |
| " <td>b2</td>\n", | |
| " <td>e2</td>\n", | |
| " <td>a</td>\n", | |
| " <td>1</td>\n", | |
| " <td>15;2;182</td>\n", | |
| " <td>1558093219720</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>q3</td>\n", | |
| " <td>b3</td>\n", | |
| " <td>e3</td>\n", | |
| " <td>b</td>\n", | |
| " <td>1</td>\n", | |
| " <td>14;2;179;183</td>\n", | |
| " <td>1558093222784</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>q4</td>\n", | |
| " <td>b4</td>\n", | |
| " <td>e4</td>\n", | |
| " <td>b</td>\n", | |
| " <td>1</td>\n", | |
| " <td>9;2;179;184</td>\n", | |
| " <td>1558093225357</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>q5</td>\n", | |
| " <td>b5</td>\n", | |
| " <td>e5</td>\n", | |
| " <td>c</td>\n", | |
| " <td>1</td>\n", | |
| " <td>8;2;179;181</td>\n", | |
| " <td>1558093228439</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>...</th>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>13164</th>\n", | |
| " <td>q18139</td>\n", | |
| " <td>b12202</td>\n", | |
| " <td>e12202</td>\n", | |
| " <td>b</td>\n", | |
| " <td>2</td>\n", | |
| " <td>24;26;183;182</td>\n", | |
| " <td>1571733814684</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>13165</th>\n", | |
| " <td>q18140</td>\n", | |
| " <td>b12203</td>\n", | |
| " <td>e12203</td>\n", | |
| " <td>a</td>\n", | |
| " <td>2</td>\n", | |
| " <td>24;33;183;182</td>\n", | |
| " <td>1571733815331</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>13166</th>\n", | |
| " <td>q18141</td>\n", | |
| " <td>b12204</td>\n", | |
| " <td>e12204</td>\n", | |
| " <td>a</td>\n", | |
| " <td>2</td>\n", | |
| " <td>24;26;183;182</td>\n", | |
| " <td>1571733815951</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>13167</th>\n", | |
| " <td>q18142</td>\n", | |
| " <td>b12205</td>\n", | |
| " <td>e12205</td>\n", | |
| " <td>a</td>\n", | |
| " <td>2</td>\n", | |
| " <td>24;26;183;182</td>\n", | |
| " <td>1571733816585</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>13168</th>\n", | |
| " <td>q18143</td>\n", | |
| " <td>b12206</td>\n", | |
| " <td>e12206</td>\n", | |
| " <td>c</td>\n", | |
| " <td>2</td>\n", | |
| " <td>27;24;26;183;182</td>\n", | |
| " <td>1571733817400</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "<p>13169 rows × 7 columns</p>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " question_id bundle_id explanation_id correct_answer part \\\n", | |
| "0 q1 b1 e1 b 1 \n", | |
| "1 q2 b2 e2 a 1 \n", | |
| "2 q3 b3 e3 b 1 \n", | |
| "3 q4 b4 e4 b 1 \n", | |
| "4 q5 b5 e5 c 1 \n", | |
| "... ... ... ... ... ... \n", | |
| "13164 q18139 b12202 e12202 b 2 \n", | |
| "13165 q18140 b12203 e12203 a 2 \n", | |
| "13166 q18141 b12204 e12204 a 2 \n", | |
| "13167 q18142 b12205 e12205 a 2 \n", | |
| "13168 q18143 b12206 e12206 c 2 \n", | |
| "\n", | |
| " tags deployed_at \n", | |
| "0 1;2;179;181 1558093217098 \n", | |
| "1 15;2;182 1558093219720 \n", | |
| "2 14;2;179;183 1558093222784 \n", | |
| "3 9;2;179;184 1558093225357 \n", | |
| "4 8;2;179;181 1558093228439 \n", | |
| "... ... ... \n", | |
| "13164 24;26;183;182 1571733814684 \n", | |
| "13165 24;33;183;182 1571733815331 \n", | |
| "13166 24;26;183;182 1571733815951 \n", | |
| "13167 24;26;183;182 1571733816585 \n", | |
| "13168 27;24;26;183;182 1571733817400 \n", | |
| "\n", | |
| "[13169 rows x 7 columns]" | |
| ] | |
| }, | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "questions_data_path = 'https://gist.githubusercontent.com/LeaveNhA/fbb8c6ce2a6422fab97f66ffdb7a7852/raw/d022e4a53f1bbb35435d82f9371c5a4a328aeb13/ednet-kt1.questions.csv' # fill the path for the question.csv\n", | |
| "questions = pd.read_csv(questions_data_path, encoding = \"ISO-8859-15\")\n", | |
| "\n", | |
| "questions" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "id": "a9d60959", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# Bazı yardımcı fonksiyonlar tanımlanıyor:\n", | |
| "def apply_with(d, fn=lambda x: x):\n", | |
| " res = d.copy() # \"= dict(d1)\" for lists of tuples\n", | |
| " for key, val in res.items():\n", | |
| " if type(res[key]) is dict:\n", | |
| " res[key] = apply_with(res[key], fn)\n", | |
| " else:\n", | |
| " res[key] = fn(res[key])\n", | |
| " return res\n", | |
| "\n", | |
| "def merge_with(d1, d2, fn=lambda x, y: x + y):\n", | |
| " # print(\"---------merging---------\")\n", | |
| " # print('d1: {}, d2: {}'.format(d1, d2))\n", | |
| " res = d1.copy() # \"= dict(d1)\" for lists of tuples\n", | |
| " for key, val in d2.items(): # \".. in d2\" for lists of tuples\n", | |
| " try:\n", | |
| " if type(res[key]) is dict:\n", | |
| " #print('dict')\n", | |
| " #print(key)\n", | |
| " #print(res[key])\n", | |
| " res[key] = merge_with(res[key], val, fn)\n", | |
| " else:\n", | |
| " #print('scaler')\n", | |
| " #print(key)\n", | |
| " res[key] = fn(res[key], val)\n", | |
| " #print(res[key])\n", | |
| " except: #KeyError:res[key] = val\n", | |
| " pass\n", | |
| " #print(\"res: {}\".format(res))\n", | |
| " #print(\"--------------------\")\n", | |
| " return res" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "id": "fd4dd6f0", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# KT-1 verilerinin yolu:\n", | |
| "kt1_path = r'/Users/sckn/projects/academic/OMU-DS/paper/kt1/kt1'\n", | |
| "standart_drop_fields = ['user_answer',\n", | |
| " 'explanation_id',\n", | |
| " 'correct_answer',\n", | |
| " 'part',\n", | |
| " 'deployed_at',\n", | |
| " 'user_answer_flag']" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "id": "83762d99", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# Öğrenici gezinimi için bazı ara-evrensel değişkenler.\n", | |
| "s = pd.Series(os.listdir(kt1_path))\n", | |
| "all_student_files = s.to_numpy()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "id": "fa8c593c", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# for the sake of functional composition!\n", | |
| "import functools\n", | |
| "\n", | |
| "def c(*fs):\n", | |
| " return functools.reduce(compose2, fs)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "id": "8e13e180", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from pymonad.tools import curry\n", | |
| "\n", | |
| "@curry(2)\n", | |
| "def filename_to_fullpath(path, filename):\n", | |
| " return Just(path + '/' + filename)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "id": "f8b82262", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def filename_to_pd(filefullpath):\n", | |
| " try:\n", | |
| " return Just(pd.read_csv(filefullpath, encoding = \"ISO-8859-15\"))\n", | |
| " except:\n", | |
| " return Nothing" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "id": "c3e0dbdc", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "@curry(2)\n", | |
| "def studentfile_to_pd(answer_limit, file_name):\n", | |
| " # Öğrenici verisi okunuyor:\n", | |
| " data_raw = pd.read_csv(file_name, encoding = \"ISO-8859-15\")\n", | |
| " # Bazı kısıtlar var.\n", | |
| " # Herhangi bir kayıp verisi varsa, öğrenici pas geçiliyor:\n", | |
| " if data_raw.isnull().values.any():\n", | |
| " return Nothing\n", | |
| " # Öğrenici cevap sayısı, belirlenen limitin altındaysa,\n", | |
| " # öğrenci pas geçiliyor.\n", | |
| " if data_raw.shape[0] < answer_limit:\n", | |
| " return Nothing\n", | |
| " \n", | |
| " return Just(pd.DataFrame(data_raw))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "id": "47842b3e", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "@curry(3)\n", | |
| "def merge_with_another_pd(another_pd_, common_key_, pd_):\n", | |
| " try:\n", | |
| " return Just(pd.merge(pd_, another_pd_, left_on = common_key_, right_on = common_key_))\n", | |
| " except:\n", | |
| " return Nothing" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 11, | |
| "id": "d51f8476", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def generate_user_answer_flag(data):\n", | |
| " try:\n", | |
| " data['user_answer_flag'] = data['user_answer'] == data['correct_answer']\n", | |
| " data['user_answer_flag'] = data['user_answer_flag'].apply(lambda f: int(f))\n", | |
| " return Just(data)\n", | |
| " except:\n", | |
| " return Nothing" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 12, | |
| "id": "334ca8c9", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def prepare_input_structered_data(data):\n", | |
| " return Just({'data': data})" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 13, | |
| "id": "4745d70f", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "@curry(2)\n", | |
| "def generate_X_y_data(drop_fields, sdata):\n", | |
| " try:\n", | |
| " # Veri Setimiz birleşimden sonra daha da farklı bir şekil alıyor,\n", | |
| " # Bağımlı değişkenleri modele girmeden önce bazı sütunları ayıklıyoruz:\n", | |
| " X = sdata['data'].drop(drop_fields, axis = 1)\n", | |
| " # Hedef değişkenimizi ayırıyoruz:\n", | |
| " y = sdata['data']['user_answer_flag']\n", | |
| " \n", | |
| " sdata['X'] = X\n", | |
| " sdata['y'] = y\n", | |
| " \n", | |
| " return Just(sdata)\n", | |
| " except:\n", | |
| " return Nothing" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 14, | |
| "id": "93c4090f", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def mutate_X_data(sdata):\n", | |
| " try:\n", | |
| " # Çıkarımlar:\n", | |
| " X = sdata['X']\n", | |
| " # Etiket kodlama yöntemiyle, kategorik değerleri işliyoruz:\n", | |
| " tags_to_identity_number = dict(zip(np.unique(X['tags']), range(1, len(np.unique(X['tags'])) + 1)))\n", | |
| " X['tags'] = X['tags'].apply(lambda ui: tags_to_identity_number[ui])\n", | |
| "\n", | |
| " question_id_to_identity_number = dict(zip(np.unique(X['question_id']), range(1, len(np.unique(X['question_id'])) + 1)))\n", | |
| " X['question_id'] = X['question_id'].apply(lambda ui: question_id_to_identity_number[ui])\n", | |
| "\n", | |
| " bundle_id_to_identity_number = dict(zip(np.unique(X['bundle_id']), range(1, len(np.unique(X['bundle_id'])) + 1)))\n", | |
| " X['bundle_id'] = X['bundle_id'].apply(lambda ui: bundle_id_to_identity_number[ui])\n", | |
| "\n", | |
| " # Sürede geçirilen süre milisaniye cinsinden,\n", | |
| " # Bunu, saniye cinsine dönüştürerek iyileştirme yapıyoruz:\n", | |
| " X['elapsed_time'] = X['elapsed_time'].apply(lambda et: et / 1000)\n", | |
| " \n", | |
| " sdata['X'] = X\n", | |
| " \n", | |
| " return Just(sdata)\n", | |
| " except:\n", | |
| " return Nothing" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 15, | |
| "id": "7c76f4b5", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def split_user_data(sdata):\n", | |
| " try:\n", | |
| " # Çıkarımlar:\n", | |
| " X = sdata['X']\n", | |
| " y = sdata['y']\n", | |
| " # Verinin ayrıştırılması için ilkleme yapıyoruz:\n", | |
| " tscv = TimeSeriesSplit()\n", | |
| "\n", | |
| " # Verinin ayrıştırılması için ayrıştırıcıdan sağlanan değerlerle,\n", | |
| " # veriyi ayırıyoruz:\n", | |
| " for train_index, test_index in tscv.split(X):\n", | |
| " X_train, X_test = X.iloc[train_index], X.iloc[test_index]\n", | |
| " y_train, y_test = y.iloc[train_index], y.iloc[test_index]\n", | |
| " \n", | |
| " sdata['X_train'] = X_train\n", | |
| " sdata['X_test'] = X_test\n", | |
| " sdata['y_train'] = y_train\n", | |
| " sdata['y_test'] = y_test\n", | |
| " \n", | |
| " return Just(sdata)\n", | |
| " except:\n", | |
| " return Nothing" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 16, | |
| "id": "a9fd063d", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def generate_model_data(sdata):\n", | |
| " try:\n", | |
| " sdata['model'] = Pipeline([('scaler', StandardScaler()),\n", | |
| " ('logreg', LogisticRegression(\n", | |
| " fit_intercept=True, \n", | |
| " penalty = 'l1',\n", | |
| " solver = 'saga',\n", | |
| " tol = 0.00001,\n", | |
| " max_iter = 1000,\n", | |
| " random_state = 0,\n", | |
| " n_jobs = -1))])\n", | |
| " \n", | |
| " return Just(sdata)\n", | |
| " except:\n", | |
| " return Nothing" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 17, | |
| "id": "37fb0a68", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def train_model_data(sdata):\n", | |
| " try:\n", | |
| " sdata['model'].fit(sdata['X_train'], sdata['y_train'])\n", | |
| " return Just(sdata)\n", | |
| " except:\n", | |
| " return Nothing" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 18, | |
| "id": "983d2f2d", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def test_model_data(sdata):\n", | |
| " try:\n", | |
| " # Tahminde bulunuyoruz:\n", | |
| " sdata['y_train_pred'] = sdata['model'].predict(sdata['X_train'])\n", | |
| " sdata['y_test_pred'] = sdata['model'].predict(sdata['X_test'])\n", | |
| " \n", | |
| " return Just(sdata)\n", | |
| " except BaseException as e:\n", | |
| " print(e)\n", | |
| " return Nothing" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 19, | |
| "id": "a90b37a7", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def generate_roc_and_auc_data(sdata):\n", | |
| " try:\n", | |
| " # Tahmin olasılıkları:\n", | |
| " y_pred_proba = sdata['model'].predict_proba(sdata['X_test'])[::,1]\n", | |
| " # ROC için bazı değerler toplanıyor ve depolanıyor:\n", | |
| " [fpr, tpr, _] = roc_curve(sdata['y_test'], y_pred_proba)\n", | |
| "\n", | |
| " # Eğer tahmin birim, ikilik değilse, AUC hesaplanamaz.\n", | |
| " # Bunu önlemek için, hata fırlatılması durumunda,\n", | |
| " # Öğreniciyi ve modeli, yine de atlıyoruz:\n", | |
| " try:\n", | |
| " auc = roc_auc_score(sdata['y_test'], y_pred_proba)\n", | |
| " except:\n", | |
| " return Nothing\n", | |
| " \n", | |
| " sdata['roc'] = [fpr, tpr]\n", | |
| " \n", | |
| " # Ortalama ROC için temel FPR değeri:\n", | |
| " base_fpr = np.linspace(0, 1, 101)\n", | |
| " # Temel FPR değeri ve ilgili FPR, TPR değeri işlenerek,\n", | |
| " # indirgeme yapılıyor:\n", | |
| " tpr = np.interp(base_fpr, fpr, tpr)\n", | |
| " # Basit bir düzeltme ile,\n", | |
| " # başlangıç değeri sıfırlanıyor.\n", | |
| " tpr[0] = 0.0\n", | |
| "\n", | |
| " sdata['auc_score'] = auc\n", | |
| " sdata['tpr'] = tpr\n", | |
| " \n", | |
| " return Just(sdata)\n", | |
| " except:\n", | |
| " return Nothing" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 20, | |
| "id": "21783f7f", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def generate_classification_report_data(sdata):\n", | |
| " try:\n", | |
| " # Sınıflandırma raporu hesaplanıyor:\n", | |
| " clf_report = classification_report(sdata['y_test'], sdata['y_test_pred'],\n", | |
| " output_dict=True)\n", | |
| " \n", | |
| " sdata['clf'] = clf_report\n", | |
| " \n", | |
| " return Just(sdata)\n", | |
| " except BaseException as e:\n", | |
| " print(e)\n", | |
| " return Nothing" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 21, | |
| "id": "9d89a10e", | |
| "metadata": { | |
| "scrolled": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "with parallel_backend('threading'):\n", | |
| " filtered_students = Parallel()(\n", | |
| " delayed(filename_to_fullpath(kt1_path))(f) for f in all_student_files\n", | |
| " )" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 22, | |
| "id": "f39a8780", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "with parallel_backend('threading', n_jobs=12): \n", | |
| " pd_students = Parallel()(\n", | |
| " delayed(lambda sfn_: sfn_.then(studentfile_to_pd(1_000)))(sfn) for sfn in filtered_students\n", | |
| " )" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 23, | |
| "id": "dd37abdd", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "with parallel_backend('threading', n_jobs=12): \n", | |
| " students_pd_with_questions = Parallel()(\n", | |
| " delayed(lambda sfn_: sfn_.then(merge_with_another_pd(questions, 'question_id')))(sfn) for sfn in pd_students\n", | |
| " )" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 24, | |
| "id": "a582ae8a", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "with parallel_backend('threading', n_jobs=12): \n", | |
| " user_answer_generated = Parallel()(\n", | |
| " delayed(lambda sfn_: sfn_.then(generate_user_answer_flag))(sfn) for sfn in students_pd_with_questions\n", | |
| " )" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 25, | |
| "id": "4741ba49", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "with parallel_backend('threading', n_jobs=12): \n", | |
| " user_inputs = Parallel()(\n", | |
| " delayed(lambda sfn_: sfn_.then(prepare_input_structered_data))(sfn) for sfn in user_answer_generated\n", | |
| " )" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 26, | |
| "id": "a8927ffb", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "with parallel_backend('threading', n_jobs=12): \n", | |
| " user_X_y_sdata = Parallel()(\n", | |
| " delayed(lambda sfn_: sfn_.then(generate_X_y_data(standart_drop_fields)))(sfn) for sfn in user_inputs\n", | |
| " )" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 27, | |
| "id": "f1f363b6", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "with parallel_backend('threading', n_jobs=12): \n", | |
| " mutated_user_data = Parallel()(\n", | |
| " delayed(lambda sfn_: sfn_.then(mutate_X_data))(sfn) for sfn in user_X_y_sdata\n", | |
| " )" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 28, | |
| "id": "a53d7d72", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "with parallel_backend('threading', n_jobs=12): \n", | |
| " splitted_user_data = Parallel()(\n", | |
| " delayed(lambda sfn_: sfn_.then(split_user_data))(sfn) for sfn in mutated_user_data\n", | |
| " )" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 29, | |
| "id": "d9b72a14", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "with parallel_backend('threading', n_jobs=12): \n", | |
| " modelled_user_data = Parallel()(\n", | |
| " delayed(lambda sfn_: sfn_.then(generate_model_data))(sfn) for sfn in splitted_user_data\n", | |
| " )" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 30, | |
| "id": "621b78f3", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "with parallel_backend('threading', n_jobs=12): \n", | |
| " trained_user_data = Parallel()(\n", | |
| " delayed(lambda sfn_: sfn_.then(train_model_data))(sfn) for sfn in modelled_user_data\n", | |
| " )" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 31, | |
| "id": "d8a4304c", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "with parallel_backend('threading', n_jobs=12): \n", | |
| " roc_and_auc_generated_user = Parallel()(\n", | |
| " delayed(lambda sfn_: sfn_.then(generate_roc_and_auc_data))(sfn) for sfn in trained_user_data\n", | |
| " )" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 32, | |
| "id": "995650fc", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "with parallel_backend('threading', n_jobs=12): \n", | |
| " predicted_data_generated_user = Parallel()(\n", | |
| " delayed(lambda sfn_: sfn_.then(test_model_data))(sfn) for sfn in roc_and_auc_generated_user\n", | |
| " )" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 33, | |
| "id": "5aa4b8af", | |
| "metadata": { | |
| "scrolled": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "with parallel_backend('threading', n_jobs=12): \n", | |
| " clf_generated_user = Parallel()(\n", | |
| " delayed(lambda sfn_: sfn_.then(generate_classification_report_data))(sfn) for sfn in predicted_data_generated_user\n", | |
| " )" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 34, | |
| "id": "a05f69a0", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "with parallel_backend('threading', n_jobs=12): \n", | |
| " filtered_users_data = Parallel()(\n", | |
| " delayed(lambda sfn_: sfn_.maybe(Nothing, lambda x: x))(sfn) for sfn in clf_generated_user if sfn is not Nothing\n", | |
| " )" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 35, | |
| "id": "15f384b7", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "19761" | |
| ] | |
| }, | |
| "execution_count": 35, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "len(filtered_users_data)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 37, | |
| "id": "484bebc1", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# Sınıflandırma Raporu özeti için,\n", | |
| "# listedeki raporlar toplanıyor:\n", | |
| "summed_clf = reduce(\n", | |
| " lambda acc, e: merge_with(acc, e['clf']), filtered_users_data[1:], filtered_users_data[0]['clf']\n", | |
| ")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 38, | |
| "id": "87b966c6", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "{'0': {'precision': 5746.981650494418,\n", | |
| " 'recall': 2004.3128037991346,\n", | |
| " 'f1-score': 2177.153094992772,\n", | |
| " 'support': 2475281},\n", | |
| " '1': {'precision': 13547.317214748677,\n", | |
| " 'recall': 18140.06600184442,\n", | |
| " 'f1-score': 15323.03995543767,\n", | |
| " 'support': 5623494},\n", | |
| " 'accuracy': 13507.197226535462,\n", | |
| " 'macro avg': {'precision': 9647.149432621609,\n", | |
| " 'recall': 10072.189402821763,\n", | |
| " 'f1-score': 8750.096525215278,\n", | |
| " 'support': 8098775},\n", | |
| " 'weighted avg': {'precision': 11590.080158093126,\n", | |
| " 'recall': 13507.197226535462,\n", | |
| " 'f1-score': 11653.137163800235,\n", | |
| " 'support': 8098775}}" | |
| ] | |
| }, | |
| "execution_count": 38, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "summed_clf" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 39, | |
| "id": "147be365", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# Toplanan değerli rapor, birim sayısına bölünerek ortalama alınıyor:\n", | |
| "averaged_clf = apply_with(summed_clf, lambda x: x / len(filtered_users_data))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 40, | |
| "id": "174b6f43", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "image/png": "\n", | |
| "text/plain": [ | |
| "<Figure size 1080x1080 with 2 Axes>" | |
| ] | |
| }, | |
| "metadata": { | |
| "needs_background": "light" | |
| }, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "# Grafiksel çıktı için figür ilklemesi:\n", | |
| "fig = plt.figure()\n", | |
| "# Isı haritası, ortalanan Sınıflandırma Raporu üzerinden hazırlanıyor:\n", | |
| "sns.heatmap(pd.DataFrame(averaged_clf).iloc[:-1, :].T, annot=True)\n", | |
| "\n", | |
| "# Isı haritası kaydediliyor:\n", | |
| "fig.savefig('clf.compiled.png', dpi = 500)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 41, | |
| "id": "6ab1be83", | |
| "metadata": { | |
| "scrolled": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument.\n" | |
| ] | |
| }, | |
| { | |
| "data": { |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment