Last active
October 7, 2025 16:59
-
-
Save brusangues/d13b5445f271f0f3684974f508b0c5eb to your computer and use it in GitHub Desktop.
target_encoding.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "nbformat": 4, | |
| "nbformat_minor": 0, | |
| "metadata": { | |
| "colab": { | |
| "provenance": [], | |
| "authorship_tag": "ABX9TyO2jh0KKhu5iEVsGo7dKGfX", | |
| "include_colab_link": true | |
| }, | |
| "kernelspec": { | |
| "name": "python3", | |
| "display_name": "Python 3" | |
| }, | |
| "language_info": { | |
| "name": "python" | |
| } | |
| }, | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "view-in-github", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "<a href=\"https://colab.research.google.com/gist/brusangues/d13b5445f271f0f3684974f508b0c5eb/target_encoding.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "# Target Encoding" | |
| ], | |
| "metadata": { | |
| "id": "Id3wmlMua0T1" | |
| } | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "* https://medium.com/@heitornunesrosa/o-que-%C3%A9-target-encoding-e-como-aplic%C3%A1-lo-85996e1bf00d\n", | |
| "* https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.TargetEncoder.html\n", | |
| "* https://github.com/brusangues/papers/blob/main/Catboost/micci-barreca2001.pdf\n", | |
| "* https://catboost.ai/docs/en/concepts/algorithm-main-stages_cat-to-numberic\n", | |
| "* https://github.com/brusangues/papers/blob/main/Catboost/Transforming%20categorical%20features%20to%20numerical%20features%20_%20CatBoost.pdf\n", | |
| "* https://contrib.scikit-learn.org/category_encoders/catboost.html" | |
| ], | |
| "metadata": { | |
| "id": "lulUGtOkaeLF" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "%pip install category_encoders catboost -q" | |
| ], | |
| "metadata": { | |
| "id": "PPXGNFFfbYIB" | |
| }, | |
| "execution_count": 1, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 709 | |
| }, | |
| "id": "16947eae", | |
| "outputId": "7b836942-0522-4b7d-d9ad-18fccc81bbe8" | |
| }, | |
| "source": [ | |
| "import pandas as pd\n", | |
| "import numpy as np\n", | |
| "pd.set_option('future.no_silent_downcasting', True)\n", | |
| "\n", | |
| "# Create a classification dataset\n", | |
| "data_classification = {'city': ['New York', 'London', 'Paris', 'New York', 'London', 'Paris', 'New York', 'London', 'New York', 'London', 'New York', 'London'],\n", | |
| " 'churn': [0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1]}\n", | |
| "df_clf = pd.DataFrame(data_classification)\n", | |
| "\n", | |
| "# Create a regression dataset\n", | |
| "data_regression = {'product': ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B'],\n", | |
| " 'price': [10.5, 20.2, 15.0, 11.0, 19.5, 16.0, 10.0, 21.0]}\n", | |
| "df_reg = pd.DataFrame(data_regression)\n", | |
| "\n", | |
| "display(df_clf)\n", | |
| "display(df_reg)" | |
| ], | |
| "execution_count": 2, | |
| "outputs": [ | |
| { | |
| "output_type": "display_data", | |
| "data": { | |
| "text/plain": [ | |
| " city churn\n", | |
| "0 New York 0\n", | |
| "1 London 1\n", | |
| "2 Paris 0\n", | |
| "3 New York 1\n", | |
| "4 London 0\n", | |
| "5 Paris 1\n", | |
| "6 New York 0\n", | |
| "7 London 1\n", | |
| "8 New York 1\n", | |
| "9 London 1\n", | |
| "10 New York 1\n", | |
| "11 London 1" | |
| ], | |
| "text/html": [ | |
| "\n", | |
| " <div id=\"df-668a1bd6-6a26-4806-aefd-f1e923120ddf\" class=\"colab-df-container\">\n", | |
| " <div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>city</th>\n", | |
| " <th>churn</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>New York</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>London</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>Paris</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>New York</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>London</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>Paris</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>New York</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>London</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>New York</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>London</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>10</th>\n", | |
| " <td>New York</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>11</th>\n", | |
| " <td>London</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>\n", | |
| " <div class=\"colab-df-buttons\">\n", | |
| "\n", | |
| " <div class=\"colab-df-container\">\n", | |
| " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-668a1bd6-6a26-4806-aefd-f1e923120ddf')\"\n", | |
| " title=\"Convert this dataframe to an interactive table.\"\n", | |
| " style=\"display:none;\">\n", | |
| "\n", | |
| " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n", | |
| " <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n", | |
| " </svg>\n", | |
| " </button>\n", | |
| "\n", | |
| " <style>\n", | |
| " .colab-df-container {\n", | |
| " display:flex;\n", | |
| " gap: 12px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-convert {\n", | |
| " background-color: #E8F0FE;\n", | |
| " border: none;\n", | |
| " border-radius: 50%;\n", | |
| " cursor: pointer;\n", | |
| " display: none;\n", | |
| " fill: #1967D2;\n", | |
| " height: 32px;\n", | |
| " padding: 0 0 0 0;\n", | |
| " width: 32px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-convert:hover {\n", | |
| " background-color: #E2EBFA;\n", | |
| " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
| " fill: #174EA6;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-buttons div {\n", | |
| " margin-bottom: 4px;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-convert {\n", | |
| " background-color: #3B4455;\n", | |
| " fill: #D2E3FC;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-convert:hover {\n", | |
| " background-color: #434B5C;\n", | |
| " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", | |
| " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", | |
| " fill: #FFFFFF;\n", | |
| " }\n", | |
| " </style>\n", | |
| "\n", | |
| " <script>\n", | |
| " const buttonEl =\n", | |
| " document.querySelector('#df-668a1bd6-6a26-4806-aefd-f1e923120ddf button.colab-df-convert');\n", | |
| " buttonEl.style.display =\n", | |
| " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
| "\n", | |
| " async function convertToInteractive(key) {\n", | |
| " const element = document.querySelector('#df-668a1bd6-6a26-4806-aefd-f1e923120ddf');\n", | |
| " const dataTable =\n", | |
| " await google.colab.kernel.invokeFunction('convertToInteractive',\n", | |
| " [key], {});\n", | |
| " if (!dataTable) return;\n", | |
| "\n", | |
| " const docLinkHtml = 'Like what you see? Visit the ' +\n", | |
| " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", | |
| " + ' to learn more about interactive tables.';\n", | |
| " element.innerHTML = '';\n", | |
| " dataTable['output_type'] = 'display_data';\n", | |
| " await google.colab.output.renderOutput(dataTable, element);\n", | |
| " const docLink = document.createElement('div');\n", | |
| " docLink.innerHTML = docLinkHtml;\n", | |
| " element.appendChild(docLink);\n", | |
| " }\n", | |
| " </script>\n", | |
| " </div>\n", | |
| "\n", | |
| "\n", | |
| " <div id=\"df-acbb4fd9-e5c7-4b8c-b868-ba08c462c269\">\n", | |
| " <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-acbb4fd9-e5c7-4b8c-b868-ba08c462c269')\"\n", | |
| " title=\"Suggest charts\"\n", | |
| " style=\"display:none;\">\n", | |
| "\n", | |
| "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", | |
| " width=\"24px\">\n", | |
| " <g>\n", | |
| " <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n", | |
| " </g>\n", | |
| "</svg>\n", | |
| " </button>\n", | |
| "\n", | |
| "<style>\n", | |
| " .colab-df-quickchart {\n", | |
| " --bg-color: #E8F0FE;\n", | |
| " --fill-color: #1967D2;\n", | |
| " --hover-bg-color: #E2EBFA;\n", | |
| " --hover-fill-color: #174EA6;\n", | |
| " --disabled-fill-color: #AAA;\n", | |
| " --disabled-bg-color: #DDD;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-quickchart {\n", | |
| " --bg-color: #3B4455;\n", | |
| " --fill-color: #D2E3FC;\n", | |
| " --hover-bg-color: #434B5C;\n", | |
| " --hover-fill-color: #FFFFFF;\n", | |
| " --disabled-bg-color: #3B4455;\n", | |
| " --disabled-fill-color: #666;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-quickchart {\n", | |
| " background-color: var(--bg-color);\n", | |
| " border: none;\n", | |
| " border-radius: 50%;\n", | |
| " cursor: pointer;\n", | |
| " display: none;\n", | |
| " fill: var(--fill-color);\n", | |
| " height: 32px;\n", | |
| " padding: 0;\n", | |
| " width: 32px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-quickchart:hover {\n", | |
| " background-color: var(--hover-bg-color);\n", | |
| " box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
| " fill: var(--button-hover-fill-color);\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-quickchart-complete:disabled,\n", | |
| " .colab-df-quickchart-complete:disabled:hover {\n", | |
| " background-color: var(--disabled-bg-color);\n", | |
| " fill: var(--disabled-fill-color);\n", | |
| " box-shadow: none;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-spinner {\n", | |
| " border: 2px solid var(--fill-color);\n", | |
| " border-color: transparent;\n", | |
| " border-bottom-color: var(--fill-color);\n", | |
| " animation:\n", | |
| " spin 1s steps(1) infinite;\n", | |
| " }\n", | |
| "\n", | |
| " @keyframes spin {\n", | |
| " 0% {\n", | |
| " border-color: transparent;\n", | |
| " border-bottom-color: var(--fill-color);\n", | |
| " border-left-color: var(--fill-color);\n", | |
| " }\n", | |
| " 20% {\n", | |
| " border-color: transparent;\n", | |
| " border-left-color: var(--fill-color);\n", | |
| " border-top-color: var(--fill-color);\n", | |
| " }\n", | |
| " 30% {\n", | |
| " border-color: transparent;\n", | |
| " border-left-color: var(--fill-color);\n", | |
| " border-top-color: var(--fill-color);\n", | |
| " border-right-color: var(--fill-color);\n", | |
| " }\n", | |
| " 40% {\n", | |
| " border-color: transparent;\n", | |
| " border-right-color: var(--fill-color);\n", | |
| " border-top-color: var(--fill-color);\n", | |
| " }\n", | |
| " 60% {\n", | |
| " border-color: transparent;\n", | |
| " border-right-color: var(--fill-color);\n", | |
| " }\n", | |
| " 80% {\n", | |
| " border-color: transparent;\n", | |
| " border-right-color: var(--fill-color);\n", | |
| " border-bottom-color: var(--fill-color);\n", | |
| " }\n", | |
| " 90% {\n", | |
| " border-color: transparent;\n", | |
| " border-bottom-color: var(--fill-color);\n", | |
| " }\n", | |
| " }\n", | |
| "</style>\n", | |
| "\n", | |
| " <script>\n", | |
| " async function quickchart(key) {\n", | |
| " const quickchartButtonEl =\n", | |
| " document.querySelector('#' + key + ' button');\n", | |
| " quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n", | |
| " quickchartButtonEl.classList.add('colab-df-spinner');\n", | |
| " try {\n", | |
| " const charts = await google.colab.kernel.invokeFunction(\n", | |
| " 'suggestCharts', [key], {});\n", | |
| " } catch (error) {\n", | |
| " console.error('Error during call to suggestCharts:', error);\n", | |
| " }\n", | |
| " quickchartButtonEl.classList.remove('colab-df-spinner');\n", | |
| " quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n", | |
| " }\n", | |
| " (() => {\n", | |
| " let quickchartButtonEl =\n", | |
| " document.querySelector('#df-acbb4fd9-e5c7-4b8c-b868-ba08c462c269 button');\n", | |
| " quickchartButtonEl.style.display =\n", | |
| " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
| " })();\n", | |
| " </script>\n", | |
| " </div>\n", | |
| "\n", | |
| " <div id=\"id_385b0259-398c-499c-a384-a61ba970fb02\">\n", | |
| " <style>\n", | |
| " .colab-df-generate {\n", | |
| " background-color: #E8F0FE;\n", | |
| " border: none;\n", | |
| " border-radius: 50%;\n", | |
| " cursor: pointer;\n", | |
| " display: none;\n", | |
| " fill: #1967D2;\n", | |
| " height: 32px;\n", | |
| " padding: 0 0 0 0;\n", | |
| " width: 32px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-generate:hover {\n", | |
| " background-color: #E2EBFA;\n", | |
| " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
| " fill: #174EA6;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-generate {\n", | |
| " background-color: #3B4455;\n", | |
| " fill: #D2E3FC;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-generate:hover {\n", | |
| " background-color: #434B5C;\n", | |
| " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", | |
| " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", | |
| " fill: #FFFFFF;\n", | |
| " }\n", | |
| " </style>\n", | |
| " <button class=\"colab-df-generate\" onclick=\"generateWithVariable('df_clf')\"\n", | |
| " title=\"Generate code using this dataframe.\"\n", | |
| " style=\"display:none;\">\n", | |
| "\n", | |
| " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", | |
| " width=\"24px\">\n", | |
| " <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n", | |
| " </svg>\n", | |
| " </button>\n", | |
| " <script>\n", | |
| " (() => {\n", | |
| " const buttonEl =\n", | |
| " document.querySelector('#id_385b0259-398c-499c-a384-a61ba970fb02 button.colab-df-generate');\n", | |
| " buttonEl.style.display =\n", | |
| " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
| "\n", | |
| " buttonEl.onclick = () => {\n", | |
| " google.colab.notebook.generateWithVariable('df_clf');\n", | |
| " }\n", | |
| " })();\n", | |
| " </script>\n", | |
| " </div>\n", | |
| "\n", | |
| " </div>\n", | |
| " </div>\n" | |
| ], | |
| "application/vnd.google.colaboratory.intrinsic+json": { | |
| "type": "dataframe", | |
| "variable_name": "df_clf", | |
| "summary": "{\n \"name\": \"df_clf\",\n \"rows\": 12,\n \"fields\": [\n {\n \"column\": \"city\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"New York\",\n \"London\",\n \"Paris\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"churn\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" | |
| } | |
| }, | |
| "metadata": {} | |
| }, | |
| { | |
| "output_type": "display_data", | |
| "data": { | |
| "text/plain": [ | |
| " product price\n", | |
| "0 A 10.5\n", | |
| "1 B 20.2\n", | |
| "2 C 15.0\n", | |
| "3 A 11.0\n", | |
| "4 B 19.5\n", | |
| "5 C 16.0\n", | |
| "6 A 10.0\n", | |
| "7 B 21.0" | |
| ], | |
| "text/html": [ | |
| "\n", | |
| " <div id=\"df-c179789c-f5d3-4a22-a285-86dab29da034\" class=\"colab-df-container\">\n", | |
| " <div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>product</th>\n", | |
| " <th>price</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>A</td>\n", | |
| " <td>10.5</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>B</td>\n", | |
| " <td>20.2</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>C</td>\n", | |
| " <td>15.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>A</td>\n", | |
| " <td>11.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>B</td>\n", | |
| " <td>19.5</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>C</td>\n", | |
| " <td>16.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>A</td>\n", | |
| " <td>10.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>B</td>\n", | |
| " <td>21.0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>\n", | |
| " <div class=\"colab-df-buttons\">\n", | |
| "\n", | |
| " <div class=\"colab-df-container\">\n", | |
| " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-c179789c-f5d3-4a22-a285-86dab29da034')\"\n", | |
| " title=\"Convert this dataframe to an interactive table.\"\n", | |
| " style=\"display:none;\">\n", | |
| "\n", | |
| " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n", | |
| " <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n", | |
| " </svg>\n", | |
| " </button>\n", | |
| "\n", | |
| " <style>\n", | |
| " .colab-df-container {\n", | |
| " display:flex;\n", | |
| " gap: 12px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-convert {\n", | |
| " background-color: #E8F0FE;\n", | |
| " border: none;\n", | |
| " border-radius: 50%;\n", | |
| " cursor: pointer;\n", | |
| " display: none;\n", | |
| " fill: #1967D2;\n", | |
| " height: 32px;\n", | |
| " padding: 0 0 0 0;\n", | |
| " width: 32px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-convert:hover {\n", | |
| " background-color: #E2EBFA;\n", | |
| " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
| " fill: #174EA6;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-buttons div {\n", | |
| " margin-bottom: 4px;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-convert {\n", | |
| " background-color: #3B4455;\n", | |
| " fill: #D2E3FC;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-convert:hover {\n", | |
| " background-color: #434B5C;\n", | |
| " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", | |
| " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", | |
| " fill: #FFFFFF;\n", | |
| " }\n", | |
| " </style>\n", | |
| "\n", | |
| " <script>\n", | |
| " const buttonEl =\n", | |
| " document.querySelector('#df-c179789c-f5d3-4a22-a285-86dab29da034 button.colab-df-convert');\n", | |
| " buttonEl.style.display =\n", | |
| " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
| "\n", | |
| " async function convertToInteractive(key) {\n", | |
| " const element = document.querySelector('#df-c179789c-f5d3-4a22-a285-86dab29da034');\n", | |
| " const dataTable =\n", | |
| " await google.colab.kernel.invokeFunction('convertToInteractive',\n", | |
| " [key], {});\n", | |
| " if (!dataTable) return;\n", | |
| "\n", | |
| " const docLinkHtml = 'Like what you see? Visit the ' +\n", | |
| " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", | |
| " + ' to learn more about interactive tables.';\n", | |
| " element.innerHTML = '';\n", | |
| " dataTable['output_type'] = 'display_data';\n", | |
| " await google.colab.output.renderOutput(dataTable, element);\n", | |
| " const docLink = document.createElement('div');\n", | |
| " docLink.innerHTML = docLinkHtml;\n", | |
| " element.appendChild(docLink);\n", | |
| " }\n", | |
| " </script>\n", | |
| " </div>\n", | |
| "\n", | |
| "\n", | |
| " <div id=\"df-c032ba6f-e671-49b0-bf55-31ed738e3e86\">\n", | |
| " <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-c032ba6f-e671-49b0-bf55-31ed738e3e86')\"\n", | |
| " title=\"Suggest charts\"\n", | |
| " style=\"display:none;\">\n", | |
| "\n", | |
| "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", | |
| " width=\"24px\">\n", | |
| " <g>\n", | |
| " <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n", | |
| " </g>\n", | |
| "</svg>\n", | |
| " </button>\n", | |
| "\n", | |
| "<style>\n", | |
| " .colab-df-quickchart {\n", | |
| " --bg-color: #E8F0FE;\n", | |
| " --fill-color: #1967D2;\n", | |
| " --hover-bg-color: #E2EBFA;\n", | |
| " --hover-fill-color: #174EA6;\n", | |
| " --disabled-fill-color: #AAA;\n", | |
| " --disabled-bg-color: #DDD;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-quickchart {\n", | |
| " --bg-color: #3B4455;\n", | |
| " --fill-color: #D2E3FC;\n", | |
| " --hover-bg-color: #434B5C;\n", | |
| " --hover-fill-color: #FFFFFF;\n", | |
| " --disabled-bg-color: #3B4455;\n", | |
| " --disabled-fill-color: #666;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-quickchart {\n", | |
| " background-color: var(--bg-color);\n", | |
| " border: none;\n", | |
| " border-radius: 50%;\n", | |
| " cursor: pointer;\n", | |
| " display: none;\n", | |
| " fill: var(--fill-color);\n", | |
| " height: 32px;\n", | |
| " padding: 0;\n", | |
| " width: 32px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-quickchart:hover {\n", | |
| " background-color: var(--hover-bg-color);\n", | |
| " box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
| " fill: var(--button-hover-fill-color);\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-quickchart-complete:disabled,\n", | |
| " .colab-df-quickchart-complete:disabled:hover {\n", | |
| " background-color: var(--disabled-bg-color);\n", | |
| " fill: var(--disabled-fill-color);\n", | |
| " box-shadow: none;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-spinner {\n", | |
| " border: 2px solid var(--fill-color);\n", | |
| " border-color: transparent;\n", | |
| " border-bottom-color: var(--fill-color);\n", | |
| " animation:\n", | |
| " spin 1s steps(1) infinite;\n", | |
| " }\n", | |
| "\n", | |
| " @keyframes spin {\n", | |
| " 0% {\n", | |
| " border-color: transparent;\n", | |
| " border-bottom-color: var(--fill-color);\n", | |
| " border-left-color: var(--fill-color);\n", | |
| " }\n", | |
| " 20% {\n", | |
| " border-color: transparent;\n", | |
| " border-left-color: var(--fill-color);\n", | |
| " border-top-color: var(--fill-color);\n", | |
| " }\n", | |
| " 30% {\n", | |
| " border-color: transparent;\n", | |
| " border-left-color: var(--fill-color);\n", | |
| " border-top-color: var(--fill-color);\n", | |
| " border-right-color: var(--fill-color);\n", | |
| " }\n", | |
| " 40% {\n", | |
| " border-color: transparent;\n", | |
| " border-right-color: var(--fill-color);\n", | |
| " border-top-color: var(--fill-color);\n", | |
| " }\n", | |
| " 60% {\n", | |
| " border-color: transparent;\n", | |
| " border-right-color: var(--fill-color);\n", | |
| " }\n", | |
| " 80% {\n", | |
| " border-color: transparent;\n", | |
| " border-right-color: var(--fill-color);\n", | |
| " border-bottom-color: var(--fill-color);\n", | |
| " }\n", | |
| " 90% {\n", | |
| " border-color: transparent;\n", | |
| " border-bottom-color: var(--fill-color);\n", | |
| " }\n", | |
| " }\n", | |
| "</style>\n", | |
| "\n", | |
| " <script>\n", | |
| " async function quickchart(key) {\n", | |
| " const quickchartButtonEl =\n", | |
| " document.querySelector('#' + key + ' button');\n", | |
| " quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n", | |
| " quickchartButtonEl.classList.add('colab-df-spinner');\n", | |
| " try {\n", | |
| " const charts = await google.colab.kernel.invokeFunction(\n", | |
| " 'suggestCharts', [key], {});\n", | |
| " } catch (error) {\n", | |
| " console.error('Error during call to suggestCharts:', error);\n", | |
| " }\n", | |
| " quickchartButtonEl.classList.remove('colab-df-spinner');\n", | |
| " quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n", | |
| " }\n", | |
| " (() => {\n", | |
| " let quickchartButtonEl =\n", | |
| " document.querySelector('#df-c032ba6f-e671-49b0-bf55-31ed738e3e86 button');\n", | |
| " quickchartButtonEl.style.display =\n", | |
| " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
| " })();\n", | |
| " </script>\n", | |
| " </div>\n", | |
| "\n", | |
| " <div id=\"id_52d82b0f-a39c-4de9-9b87-e5ef92ab7d52\">\n", | |
| " <style>\n", | |
| " .colab-df-generate {\n", | |
| " background-color: #E8F0FE;\n", | |
| " border: none;\n", | |
| " border-radius: 50%;\n", | |
| " cursor: pointer;\n", | |
| " display: none;\n", | |
| " fill: #1967D2;\n", | |
| " height: 32px;\n", | |
| " padding: 0 0 0 0;\n", | |
| " width: 32px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-generate:hover {\n", | |
| " background-color: #E2EBFA;\n", | |
| " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
| " fill: #174EA6;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-generate {\n", | |
| " background-color: #3B4455;\n", | |
| " fill: #D2E3FC;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-generate:hover {\n", | |
| " background-color: #434B5C;\n", | |
| " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", | |
| " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", | |
| " fill: #FFFFFF;\n", | |
| " }\n", | |
| " </style>\n", | |
| " <button class=\"colab-df-generate\" onclick=\"generateWithVariable('df_reg')\"\n", | |
| " title=\"Generate code using this dataframe.\"\n", | |
| " style=\"display:none;\">\n", | |
| "\n", | |
| " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", | |
| " width=\"24px\">\n", | |
| " <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n", | |
| " </svg>\n", | |
| " </button>\n", | |
| " <script>\n", | |
| " (() => {\n", | |
| " const buttonEl =\n", | |
| " document.querySelector('#id_52d82b0f-a39c-4de9-9b87-e5ef92ab7d52 button.colab-df-generate');\n", | |
| " buttonEl.style.display =\n", | |
| " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
| "\n", | |
| " buttonEl.onclick = () => {\n", | |
| " google.colab.notebook.generateWithVariable('df_reg');\n", | |
| " }\n", | |
| " })();\n", | |
| " </script>\n", | |
| " </div>\n", | |
| "\n", | |
| " </div>\n", | |
| " </div>\n" | |
| ], | |
| "application/vnd.google.colaboratory.intrinsic+json": { | |
| "type": "dataframe", | |
| "variable_name": "df_reg", | |
| "summary": "{\n \"name\": \"df_reg\",\n \"rows\": 8,\n \"fields\": [\n {\n \"column\": \"product\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"A\",\n \"B\",\n \"C\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 4.539666444637913,\n \"min\": 10.0,\n \"max\": 21.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 20.2,\n 16.0,\n 10.5\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" | |
| } | |
| }, | |
| "metadata": {} | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "print(\"Média do target\", df_clf.churn.mean())\n", | |
| "print(\"Média por categoria\\n\", df_clf.groupby(\"city\")[[\"churn\"]].mean())" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "3SkBEFm5YCNT", | |
| "outputId": "f8e1a06d-f41a-459a-b8d5-bf8abf3281dc" | |
| }, | |
| "execution_count": 3, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "Média do target 0.6666666666666666\n", | |
| "Média por categoria\n", | |
| " churn\n", | |
| "city \n", | |
| "London 0.8\n", | |
| "New York 0.6\n", | |
| "Paris 0.5\n" | |
| ] | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "import numpy as np\n", | |
| "from sklearn.preprocessing import TargetEncoder\n", | |
| "\n", | |
| "# Fazendo dados duplicados para burlar a validação cruzada obrigatória\n", | |
| "df_clf_double = pd.concat([df_clf,df_clf]).reset_index(drop=True)\n", | |
| "\n", | |
| "# Parâmetro de mistura = 0 -> peso total para a média a posteriori (média local)\n", | |
| "enc_smooth0 = TargetEncoder(smooth=0, cv=2, shuffle=False)\n", | |
| "X_trans_double = enc_smooth0.fit_transform(df_clf_double[[\"city\"]], df_clf_double.churn)\n", | |
| "df_clf[\"enc_smooth0\"] = X_trans_double[:df_clf.shape[0]]\n", | |
| "\n", | |
| "# Parâmetro de mistura = +inf -> peso total para a média a priori (média global)\n", | |
| "enc_smooth1 = TargetEncoder(smooth=1_000_000_000, cv=2, shuffle=False)\n", | |
| "X_trans_double = enc_smooth1.fit_transform(df_clf_double[[\"city\"]], df_clf_double.churn)\n", | |
| "df_clf[\"enc_smooth1\"] = X_trans_double[:df_clf.shape[0]]\n", | |
| "\n", | |
| "# Parâmetro de mistura = auto -> atribuição seguindo a fórmula teórica\n", | |
| "enc_smooth_auto = TargetEncoder(smooth=\"auto\", cv=2, shuffle=False)\n", | |
| "X_trans_double = enc_smooth_auto.fit_transform(df_clf_double[[\"city\"]], df_clf_double.churn)\n", | |
| "df_clf[\"enc_smooth_auto\"] = X_trans_double[:df_clf.shape[0]]\n", | |
| "\n", | |
| "df_clf" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 426 | |
| }, | |
| "id": "xz1b0Cb-bZd9", | |
| "outputId": "fac3f129-cc62-4176-bb4e-25b18c4b2328" | |
| }, | |
| "execution_count": 4, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/plain": [ | |
| " city churn enc_smooth0 enc_smooth1 enc_smooth_auto\n", | |
| "0 New York 0 0.6 0.666667 0.611842\n", | |
| "1 London 1 0.8 0.666667 0.783217\n", | |
| "2 Paris 0 0.5 0.666667 0.560000\n", | |
| "3 New York 1 0.6 0.666667 0.611842\n", | |
| "4 London 0 0.8 0.666667 0.783217\n", | |
| "5 Paris 1 0.5 0.666667 0.560000\n", | |
| "6 New York 0 0.6 0.666667 0.611842\n", | |
| "7 London 1 0.8 0.666667 0.783217\n", | |
| "8 New York 1 0.6 0.666667 0.611842\n", | |
| "9 London 1 0.8 0.666667 0.783217\n", | |
| "10 New York 1 0.6 0.666667 0.611842\n", | |
| "11 London 1 0.8 0.666667 0.783217" | |
| ], | |
| "text/html": [ | |
| "\n", | |
| " <div id=\"df-64b8d78e-979b-4db8-b529-1762c4b904cb\" class=\"colab-df-container\">\n", | |
| " <div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>city</th>\n", | |
| " <th>churn</th>\n", | |
| " <th>enc_smooth0</th>\n", | |
| " <th>enc_smooth1</th>\n", | |
| " <th>enc_smooth_auto</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>New York</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0.6</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.611842</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>London</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0.8</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.783217</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>Paris</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0.5</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.560000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>New York</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0.6</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.611842</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>London</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0.8</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.783217</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>Paris</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0.5</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.560000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>New York</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0.6</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.611842</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>London</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0.8</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.783217</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>New York</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0.6</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.611842</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>London</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0.8</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.783217</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>10</th>\n", | |
| " <td>New York</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0.6</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.611842</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>11</th>\n", | |
| " <td>London</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0.8</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.783217</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>\n", | |
| " <div class=\"colab-df-buttons\">\n", | |
| "\n", | |
| " <div class=\"colab-df-container\">\n", | |
| " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-64b8d78e-979b-4db8-b529-1762c4b904cb')\"\n", | |
| " title=\"Convert this dataframe to an interactive table.\"\n", | |
| " style=\"display:none;\">\n", | |
| "\n", | |
| " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n", | |
| " <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n", | |
| " </svg>\n", | |
| " </button>\n", | |
| "\n", | |
| " <style>\n", | |
| " .colab-df-container {\n", | |
| " display:flex;\n", | |
| " gap: 12px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-convert {\n", | |
| " background-color: #E8F0FE;\n", | |
| " border: none;\n", | |
| " border-radius: 50%;\n", | |
| " cursor: pointer;\n", | |
| " display: none;\n", | |
| " fill: #1967D2;\n", | |
| " height: 32px;\n", | |
| " padding: 0 0 0 0;\n", | |
| " width: 32px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-convert:hover {\n", | |
| " background-color: #E2EBFA;\n", | |
| " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
| " fill: #174EA6;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-buttons div {\n", | |
| " margin-bottom: 4px;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-convert {\n", | |
| " background-color: #3B4455;\n", | |
| " fill: #D2E3FC;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-convert:hover {\n", | |
| " background-color: #434B5C;\n", | |
| " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", | |
| " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", | |
| " fill: #FFFFFF;\n", | |
| " }\n", | |
| " </style>\n", | |
| "\n", | |
| " <script>\n", | |
| " const buttonEl =\n", | |
| " document.querySelector('#df-64b8d78e-979b-4db8-b529-1762c4b904cb button.colab-df-convert');\n", | |
| " buttonEl.style.display =\n", | |
| " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
| "\n", | |
| " async function convertToInteractive(key) {\n", | |
| " const element = document.querySelector('#df-64b8d78e-979b-4db8-b529-1762c4b904cb');\n", | |
| " const dataTable =\n", | |
| " await google.colab.kernel.invokeFunction('convertToInteractive',\n", | |
| " [key], {});\n", | |
| " if (!dataTable) return;\n", | |
| "\n", | |
| " const docLinkHtml = 'Like what you see? Visit the ' +\n", | |
| " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", | |
| " + ' to learn more about interactive tables.';\n", | |
| " element.innerHTML = '';\n", | |
| " dataTable['output_type'] = 'display_data';\n", | |
| " await google.colab.output.renderOutput(dataTable, element);\n", | |
| " const docLink = document.createElement('div');\n", | |
| " docLink.innerHTML = docLinkHtml;\n", | |
| " element.appendChild(docLink);\n", | |
| " }\n", | |
| " </script>\n", | |
| " </div>\n", | |
| "\n", | |
| "\n", | |
| " <div id=\"df-7c66c1cd-8fec-43d9-acfa-edb28043e1ee\">\n", | |
| " <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-7c66c1cd-8fec-43d9-acfa-edb28043e1ee')\"\n", | |
| " title=\"Suggest charts\"\n", | |
| " style=\"display:none;\">\n", | |
| "\n", | |
| "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", | |
| " width=\"24px\">\n", | |
| " <g>\n", | |
| " <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n", | |
| " </g>\n", | |
| "</svg>\n", | |
| " </button>\n", | |
| "\n", | |
| "<style>\n", | |
| " .colab-df-quickchart {\n", | |
| " --bg-color: #E8F0FE;\n", | |
| " --fill-color: #1967D2;\n", | |
| " --hover-bg-color: #E2EBFA;\n", | |
| " --hover-fill-color: #174EA6;\n", | |
| " --disabled-fill-color: #AAA;\n", | |
| " --disabled-bg-color: #DDD;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-quickchart {\n", | |
| " --bg-color: #3B4455;\n", | |
| " --fill-color: #D2E3FC;\n", | |
| " --hover-bg-color: #434B5C;\n", | |
| " --hover-fill-color: #FFFFFF;\n", | |
| " --disabled-bg-color: #3B4455;\n", | |
| " --disabled-fill-color: #666;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-quickchart {\n", | |
| " background-color: var(--bg-color);\n", | |
| " border: none;\n", | |
| " border-radius: 50%;\n", | |
| " cursor: pointer;\n", | |
| " display: none;\n", | |
| " fill: var(--fill-color);\n", | |
| " height: 32px;\n", | |
| " padding: 0;\n", | |
| " width: 32px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-quickchart:hover {\n", | |
| " background-color: var(--hover-bg-color);\n", | |
| " box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
| " fill: var(--button-hover-fill-color);\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-quickchart-complete:disabled,\n", | |
| " .colab-df-quickchart-complete:disabled:hover {\n", | |
| " background-color: var(--disabled-bg-color);\n", | |
| " fill: var(--disabled-fill-color);\n", | |
| " box-shadow: none;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-spinner {\n", | |
| " border: 2px solid var(--fill-color);\n", | |
| " border-color: transparent;\n", | |
| " border-bottom-color: var(--fill-color);\n", | |
| " animation:\n", | |
| " spin 1s steps(1) infinite;\n", | |
| " }\n", | |
| "\n", | |
| " @keyframes spin {\n", | |
| " 0% {\n", | |
| " border-color: transparent;\n", | |
| " border-bottom-color: var(--fill-color);\n", | |
| " border-left-color: var(--fill-color);\n", | |
| " }\n", | |
| " 20% {\n", | |
| " border-color: transparent;\n", | |
| " border-left-color: var(--fill-color);\n", | |
| " border-top-color: var(--fill-color);\n", | |
| " }\n", | |
| " 30% {\n", | |
| " border-color: transparent;\n", | |
| " border-left-color: var(--fill-color);\n", | |
| " border-top-color: var(--fill-color);\n", | |
| " border-right-color: var(--fill-color);\n", | |
| " }\n", | |
| " 40% {\n", | |
| " border-color: transparent;\n", | |
| " border-right-color: var(--fill-color);\n", | |
| " border-top-color: var(--fill-color);\n", | |
| " }\n", | |
| " 60% {\n", | |
| " border-color: transparent;\n", | |
| " border-right-color: var(--fill-color);\n", | |
| " }\n", | |
| " 80% {\n", | |
| " border-color: transparent;\n", | |
| " border-right-color: var(--fill-color);\n", | |
| " border-bottom-color: var(--fill-color);\n", | |
| " }\n", | |
| " 90% {\n", | |
| " border-color: transparent;\n", | |
| " border-bottom-color: var(--fill-color);\n", | |
| " }\n", | |
| " }\n", | |
| "</style>\n", | |
| "\n", | |
| " <script>\n", | |
| " async function quickchart(key) {\n", | |
| " const quickchartButtonEl =\n", | |
| " document.querySelector('#' + key + ' button');\n", | |
| " quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n", | |
| " quickchartButtonEl.classList.add('colab-df-spinner');\n", | |
| " try {\n", | |
| " const charts = await google.colab.kernel.invokeFunction(\n", | |
| " 'suggestCharts', [key], {});\n", | |
| " } catch (error) {\n", | |
| " console.error('Error during call to suggestCharts:', error);\n", | |
| " }\n", | |
| " quickchartButtonEl.classList.remove('colab-df-spinner');\n", | |
| " quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n", | |
| " }\n", | |
| " (() => {\n", | |
| " let quickchartButtonEl =\n", | |
| " document.querySelector('#df-7c66c1cd-8fec-43d9-acfa-edb28043e1ee button');\n", | |
| " quickchartButtonEl.style.display =\n", | |
| " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
| " })();\n", | |
| " </script>\n", | |
| " </div>\n", | |
| "\n", | |
| " <div id=\"id_548e520c-1533-4d6f-97e9-79226d52d0fc\">\n", | |
| " <style>\n", | |
| " .colab-df-generate {\n", | |
| " background-color: #E8F0FE;\n", | |
| " border: none;\n", | |
| " border-radius: 50%;\n", | |
| " cursor: pointer;\n", | |
| " display: none;\n", | |
| " fill: #1967D2;\n", | |
| " height: 32px;\n", | |
| " padding: 0 0 0 0;\n", | |
| " width: 32px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-generate:hover {\n", | |
| " background-color: #E2EBFA;\n", | |
| " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
| " fill: #174EA6;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-generate {\n", | |
| " background-color: #3B4455;\n", | |
| " fill: #D2E3FC;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-generate:hover {\n", | |
| " background-color: #434B5C;\n", | |
| " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", | |
| " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", | |
| " fill: #FFFFFF;\n", | |
| " }\n", | |
| " </style>\n", | |
| " <button class=\"colab-df-generate\" onclick=\"generateWithVariable('df_clf')\"\n", | |
| " title=\"Generate code using this dataframe.\"\n", | |
| " style=\"display:none;\">\n", | |
| "\n", | |
| " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", | |
| " width=\"24px\">\n", | |
| " <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n", | |
| " </svg>\n", | |
| " </button>\n", | |
| " <script>\n", | |
| " (() => {\n", | |
| " const buttonEl =\n", | |
| " document.querySelector('#id_548e520c-1533-4d6f-97e9-79226d52d0fc button.colab-df-generate');\n", | |
| " buttonEl.style.display =\n", | |
| " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
| "\n", | |
| " buttonEl.onclick = () => {\n", | |
| " google.colab.notebook.generateWithVariable('df_clf');\n", | |
| " }\n", | |
| " })();\n", | |
| " </script>\n", | |
| " </div>\n", | |
| "\n", | |
| " </div>\n", | |
| " </div>\n" | |
| ], | |
| "application/vnd.google.colaboratory.intrinsic+json": { | |
| "type": "dataframe", | |
| "variable_name": "df_clf", | |
| "summary": "{\n \"name\": \"df_clf\",\n \"rows\": 12,\n \"fields\": [\n {\n \"column\": \"city\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"New York\",\n \"London\",\n \"Paris\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"churn\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"enc_smooth0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.12309149097933277,\n \"min\": 0.5,\n \"max\": 0.8,\n \"num_unique_values\": 3,\n \"samples\": [\n 0.6,\n 0.8\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"enc_smooth1\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 5.149286359812627e-10,\n \"min\": 0.6666666663333333,\n \"max\": 0.6666666673333332,\n \"num_unique_values\": 2,\n \"samples\": [\n 0.6666666673333332,\n 0.6666666663333333\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"enc_smooth_auto\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.097676234197722,\n \"min\": 0.56,\n \"max\": 0.7832167832167832,\n \"num_unique_values\": 3,\n \"samples\": [\n 0.6118421052631579,\n 0.7832167832167832\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" | |
| } | |
| }, | |
| "metadata": {}, | |
| "execution_count": 4 | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "df_clf.drop_duplicates([\"city\"])" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 143 | |
| }, | |
| "id": "QlqtnLrbchL3", | |
| "outputId": "230b61c2-f9c3-4bdb-c9f2-6028ac4bf867" | |
| }, | |
| "execution_count": 5, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/plain": [ | |
| " city churn enc_smooth0 enc_smooth1 enc_smooth_auto\n", | |
| "0 New York 0 0.6 0.666667 0.611842\n", | |
| "1 London 1 0.8 0.666667 0.783217\n", | |
| "2 Paris 0 0.5 0.666667 0.560000" | |
| ], | |
| "text/html": [ | |
| "\n", | |
| " <div id=\"df-d1280231-a2e8-4950-9a40-10597fba380c\" class=\"colab-df-container\">\n", | |
| " <div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>city</th>\n", | |
| " <th>churn</th>\n", | |
| " <th>enc_smooth0</th>\n", | |
| " <th>enc_smooth1</th>\n", | |
| " <th>enc_smooth_auto</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>New York</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0.6</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.611842</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>London</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0.8</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.783217</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>Paris</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0.5</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.560000</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>\n", | |
| " <div class=\"colab-df-buttons\">\n", | |
| "\n", | |
| " <div class=\"colab-df-container\">\n", | |
| " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-d1280231-a2e8-4950-9a40-10597fba380c')\"\n", | |
| " title=\"Convert this dataframe to an interactive table.\"\n", | |
| " style=\"display:none;\">\n", | |
| "\n", | |
| " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n", | |
| " <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n", | |
| " </svg>\n", | |
| " </button>\n", | |
| "\n", | |
| " <style>\n", | |
| " .colab-df-container {\n", | |
| " display:flex;\n", | |
| " gap: 12px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-convert {\n", | |
| " background-color: #E8F0FE;\n", | |
| " border: none;\n", | |
| " border-radius: 50%;\n", | |
| " cursor: pointer;\n", | |
| " display: none;\n", | |
| " fill: #1967D2;\n", | |
| " height: 32px;\n", | |
| " padding: 0 0 0 0;\n", | |
| " width: 32px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-convert:hover {\n", | |
| " background-color: #E2EBFA;\n", | |
| " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
| " fill: #174EA6;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-buttons div {\n", | |
| " margin-bottom: 4px;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-convert {\n", | |
| " background-color: #3B4455;\n", | |
| " fill: #D2E3FC;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-convert:hover {\n", | |
| " background-color: #434B5C;\n", | |
| " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", | |
| " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", | |
| " fill: #FFFFFF;\n", | |
| " }\n", | |
| " </style>\n", | |
| "\n", | |
| " <script>\n", | |
| " const buttonEl =\n", | |
| " document.querySelector('#df-d1280231-a2e8-4950-9a40-10597fba380c button.colab-df-convert');\n", | |
| " buttonEl.style.display =\n", | |
| " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
| "\n", | |
| " async function convertToInteractive(key) {\n", | |
| " const element = document.querySelector('#df-d1280231-a2e8-4950-9a40-10597fba380c');\n", | |
| " const dataTable =\n", | |
| " await google.colab.kernel.invokeFunction('convertToInteractive',\n", | |
| " [key], {});\n", | |
| " if (!dataTable) return;\n", | |
| "\n", | |
| " const docLinkHtml = 'Like what you see? Visit the ' +\n", | |
| " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", | |
| " + ' to learn more about interactive tables.';\n", | |
| " element.innerHTML = '';\n", | |
| " dataTable['output_type'] = 'display_data';\n", | |
| " await google.colab.output.renderOutput(dataTable, element);\n", | |
| " const docLink = document.createElement('div');\n", | |
| " docLink.innerHTML = docLinkHtml;\n", | |
| " element.appendChild(docLink);\n", | |
| " }\n", | |
| " </script>\n", | |
| " </div>\n", | |
| "\n", | |
| "\n", | |
| " <div id=\"df-d52efc19-b139-4cf2-b4a8-8844dae90329\">\n", | |
| " <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-d52efc19-b139-4cf2-b4a8-8844dae90329')\"\n", | |
| " title=\"Suggest charts\"\n", | |
| " style=\"display:none;\">\n", | |
| "\n", | |
| "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", | |
| " width=\"24px\">\n", | |
| " <g>\n", | |
| " <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n", | |
| " </g>\n", | |
| "</svg>\n", | |
| " </button>\n", | |
| "\n", | |
| "<style>\n", | |
| " .colab-df-quickchart {\n", | |
| " --bg-color: #E8F0FE;\n", | |
| " --fill-color: #1967D2;\n", | |
| " --hover-bg-color: #E2EBFA;\n", | |
| " --hover-fill-color: #174EA6;\n", | |
| " --disabled-fill-color: #AAA;\n", | |
| " --disabled-bg-color: #DDD;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-quickchart {\n", | |
| " --bg-color: #3B4455;\n", | |
| " --fill-color: #D2E3FC;\n", | |
| " --hover-bg-color: #434B5C;\n", | |
| " --hover-fill-color: #FFFFFF;\n", | |
| " --disabled-bg-color: #3B4455;\n", | |
| " --disabled-fill-color: #666;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-quickchart {\n", | |
| " background-color: var(--bg-color);\n", | |
| " border: none;\n", | |
| " border-radius: 50%;\n", | |
| " cursor: pointer;\n", | |
| " display: none;\n", | |
| " fill: var(--fill-color);\n", | |
| " height: 32px;\n", | |
| " padding: 0;\n", | |
| " width: 32px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-quickchart:hover {\n", | |
| " background-color: var(--hover-bg-color);\n", | |
| " box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
| " fill: var(--button-hover-fill-color);\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-quickchart-complete:disabled,\n", | |
| " .colab-df-quickchart-complete:disabled:hover {\n", | |
| " background-color: var(--disabled-bg-color);\n", | |
| " fill: var(--disabled-fill-color);\n", | |
| " box-shadow: none;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-spinner {\n", | |
| " border: 2px solid var(--fill-color);\n", | |
| " border-color: transparent;\n", | |
| " border-bottom-color: var(--fill-color);\n", | |
| " animation:\n", | |
| " spin 1s steps(1) infinite;\n", | |
| " }\n", | |
| "\n", | |
| " @keyframes spin {\n", | |
| " 0% {\n", | |
| " border-color: transparent;\n", | |
| " border-bottom-color: var(--fill-color);\n", | |
| " border-left-color: var(--fill-color);\n", | |
| " }\n", | |
| " 20% {\n", | |
| " border-color: transparent;\n", | |
| " border-left-color: var(--fill-color);\n", | |
| " border-top-color: var(--fill-color);\n", | |
| " }\n", | |
| " 30% {\n", | |
| " border-color: transparent;\n", | |
| " border-left-color: var(--fill-color);\n", | |
| " border-top-color: var(--fill-color);\n", | |
| " border-right-color: var(--fill-color);\n", | |
| " }\n", | |
| " 40% {\n", | |
| " border-color: transparent;\n", | |
| " border-right-color: var(--fill-color);\n", | |
| " border-top-color: var(--fill-color);\n", | |
| " }\n", | |
| " 60% {\n", | |
| " border-color: transparent;\n", | |
| " border-right-color: var(--fill-color);\n", | |
| " }\n", | |
| " 80% {\n", | |
| " border-color: transparent;\n", | |
| " border-right-color: var(--fill-color);\n", | |
| " border-bottom-color: var(--fill-color);\n", | |
| " }\n", | |
| " 90% {\n", | |
| " border-color: transparent;\n", | |
| " border-bottom-color: var(--fill-color);\n", | |
| " }\n", | |
| " }\n", | |
| "</style>\n", | |
| "\n", | |
| " <script>\n", | |
| " async function quickchart(key) {\n", | |
| " const quickchartButtonEl =\n", | |
| " document.querySelector('#' + key + ' button');\n", | |
| " quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n", | |
| " quickchartButtonEl.classList.add('colab-df-spinner');\n", | |
| " try {\n", | |
| " const charts = await google.colab.kernel.invokeFunction(\n", | |
| " 'suggestCharts', [key], {});\n", | |
| " } catch (error) {\n", | |
| " console.error('Error during call to suggestCharts:', error);\n", | |
| " }\n", | |
| " quickchartButtonEl.classList.remove('colab-df-spinner');\n", | |
| " quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n", | |
| " }\n", | |
| " (() => {\n", | |
| " let quickchartButtonEl =\n", | |
| " document.querySelector('#df-d52efc19-b139-4cf2-b4a8-8844dae90329 button');\n", | |
| " quickchartButtonEl.style.display =\n", | |
| " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
| " })();\n", | |
| " </script>\n", | |
| " </div>\n", | |
| "\n", | |
| " </div>\n", | |
| " </div>\n" | |
| ], | |
| "application/vnd.google.colaboratory.intrinsic+json": { | |
| "type": "dataframe", | |
| "summary": "{\n \"name\": \"df_clf\",\n \"rows\": 3,\n \"fields\": [\n {\n \"column\": \"city\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"New York\",\n \"London\",\n \"Paris\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"churn\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"enc_smooth0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.1527525231651947,\n \"min\": 0.5,\n \"max\": 0.8,\n \"num_unique_values\": 3,\n \"samples\": [\n 0.6,\n 0.8\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"enc_smooth1\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 5.773502528610522e-10,\n \"min\": 0.6666666663333333,\n \"max\": 0.6666666673333332,\n \"num_unique_values\": 2,\n \"samples\": [\n 0.6666666673333332,\n 0.6666666663333333\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"enc_smooth_auto\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.11682081466322225,\n \"min\": 0.56,\n \"max\": 0.7832167832167832,\n \"num_unique_values\": 3,\n \"samples\": [\n 0.6118421052631579,\n 0.7832167832167832\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" | |
| } | |
| }, | |
| "metadata": {}, | |
| "execution_count": 5 | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [], | |
| "metadata": { | |
| "id": "4BAbfszQhoBq" | |
| }, | |
| "execution_count": 5, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "def target_encoding(df_, target_mean, alpha=\"auto\", k = 20, f = 100, target_col=\"churn\"):\n", | |
| " if alpha==\"auto\":\n", | |
| " ni = df_.shape[0]\n", | |
| " alpha = 1 / (1 + np.exp(-(ni - k)/f))\n", | |
| " encode = alpha * df_[target_col].mean() + (1-alpha) * target_mean\n", | |
| " return encode\n", | |
| "\n", | |
| "alpha=1 # \"auto\"\n", | |
| "target_mean = df_clf.churn.mean()\n", | |
| "\n", | |
| "encoding = df_clf.groupby(\"city\").apply(lambda x: target_encoding(x, target_mean, alpha=alpha), include_groups=False)\n", | |
| "df_clf['enc_smooth_manual'] = df_clf[\"city\"].map(encoding.to_dict())\n", | |
| "df_clf" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 426 | |
| }, | |
| "id": "Y1jQROJVfEEv", | |
| "outputId": "e4cc3882-e59b-41b4-b78d-20b6fd1438b1" | |
| }, | |
| "execution_count": 6, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/plain": [ | |
| " city churn enc_smooth0 enc_smooth1 enc_smooth_auto \\\n", | |
| "0 New York 0 0.6 0.666667 0.611842 \n", | |
| "1 London 1 0.8 0.666667 0.783217 \n", | |
| "2 Paris 0 0.5 0.666667 0.560000 \n", | |
| "3 New York 1 0.6 0.666667 0.611842 \n", | |
| "4 London 0 0.8 0.666667 0.783217 \n", | |
| "5 Paris 1 0.5 0.666667 0.560000 \n", | |
| "6 New York 0 0.6 0.666667 0.611842 \n", | |
| "7 London 1 0.8 0.666667 0.783217 \n", | |
| "8 New York 1 0.6 0.666667 0.611842 \n", | |
| "9 London 1 0.8 0.666667 0.783217 \n", | |
| "10 New York 1 0.6 0.666667 0.611842 \n", | |
| "11 London 1 0.8 0.666667 0.783217 \n", | |
| "\n", | |
| " enc_smooth_manual \n", | |
| "0 0.6 \n", | |
| "1 0.8 \n", | |
| "2 0.5 \n", | |
| "3 0.6 \n", | |
| "4 0.8 \n", | |
| "5 0.5 \n", | |
| "6 0.6 \n", | |
| "7 0.8 \n", | |
| "8 0.6 \n", | |
| "9 0.8 \n", | |
| "10 0.6 \n", | |
| "11 0.8 " | |
| ], | |
| "text/html": [ | |
| "\n", | |
| " <div id=\"df-78bc24f8-9554-43ac-ad65-10de51549c79\" class=\"colab-df-container\">\n", | |
| " <div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>city</th>\n", | |
| " <th>churn</th>\n", | |
| " <th>enc_smooth0</th>\n", | |
| " <th>enc_smooth1</th>\n", | |
| " <th>enc_smooth_auto</th>\n", | |
| " <th>enc_smooth_manual</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>New York</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0.6</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.611842</td>\n", | |
| " <td>0.6</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>London</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0.8</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.783217</td>\n", | |
| " <td>0.8</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>Paris</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0.5</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.560000</td>\n", | |
| " <td>0.5</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>New York</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0.6</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.611842</td>\n", | |
| " <td>0.6</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>London</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0.8</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.783217</td>\n", | |
| " <td>0.8</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>Paris</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0.5</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.560000</td>\n", | |
| " <td>0.5</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>New York</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0.6</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.611842</td>\n", | |
| " <td>0.6</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>London</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0.8</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.783217</td>\n", | |
| " <td>0.8</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>New York</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0.6</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.611842</td>\n", | |
| " <td>0.6</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>London</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0.8</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.783217</td>\n", | |
| " <td>0.8</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>10</th>\n", | |
| " <td>New York</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0.6</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.611842</td>\n", | |
| " <td>0.6</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>11</th>\n", | |
| " <td>London</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0.8</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.783217</td>\n", | |
| " <td>0.8</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>\n", | |
| " <div class=\"colab-df-buttons\">\n", | |
| "\n", | |
| " <div class=\"colab-df-container\">\n", | |
| " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-78bc24f8-9554-43ac-ad65-10de51549c79')\"\n", | |
| " title=\"Convert this dataframe to an interactive table.\"\n", | |
| " style=\"display:none;\">\n", | |
| "\n", | |
| " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n", | |
| " <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n", | |
| " </svg>\n", | |
| " </button>\n", | |
| "\n", | |
| " <style>\n", | |
| " .colab-df-container {\n", | |
| " display:flex;\n", | |
| " gap: 12px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-convert {\n", | |
| " background-color: #E8F0FE;\n", | |
| " border: none;\n", | |
| " border-radius: 50%;\n", | |
| " cursor: pointer;\n", | |
| " display: none;\n", | |
| " fill: #1967D2;\n", | |
| " height: 32px;\n", | |
| " padding: 0 0 0 0;\n", | |
| " width: 32px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-convert:hover {\n", | |
| " background-color: #E2EBFA;\n", | |
| " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
| " fill: #174EA6;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-buttons div {\n", | |
| " margin-bottom: 4px;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-convert {\n", | |
| " background-color: #3B4455;\n", | |
| " fill: #D2E3FC;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-convert:hover {\n", | |
| " background-color: #434B5C;\n", | |
| " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", | |
| " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", | |
| " fill: #FFFFFF;\n", | |
| " }\n", | |
| " </style>\n", | |
| "\n", | |
| " <script>\n", | |
| " const buttonEl =\n", | |
| " document.querySelector('#df-78bc24f8-9554-43ac-ad65-10de51549c79 button.colab-df-convert');\n", | |
| " buttonEl.style.display =\n", | |
| " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
| "\n", | |
| " async function convertToInteractive(key) {\n", | |
| " const element = document.querySelector('#df-78bc24f8-9554-43ac-ad65-10de51549c79');\n", | |
| " const dataTable =\n", | |
| " await google.colab.kernel.invokeFunction('convertToInteractive',\n", | |
| " [key], {});\n", | |
| " if (!dataTable) return;\n", | |
| "\n", | |
| " const docLinkHtml = 'Like what you see? Visit the ' +\n", | |
| " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", | |
| " + ' to learn more about interactive tables.';\n", | |
| " element.innerHTML = '';\n", | |
| " dataTable['output_type'] = 'display_data';\n", | |
| " await google.colab.output.renderOutput(dataTable, element);\n", | |
| " const docLink = document.createElement('div');\n", | |
| " docLink.innerHTML = docLinkHtml;\n", | |
| " element.appendChild(docLink);\n", | |
| " }\n", | |
| " </script>\n", | |
| " </div>\n", | |
| "\n", | |
| "\n", | |
| " <div id=\"df-106a9b8b-f643-4ea9-a8c6-0f6e592048ec\">\n", | |
| " <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-106a9b8b-f643-4ea9-a8c6-0f6e592048ec')\"\n", | |
| " title=\"Suggest charts\"\n", | |
| " style=\"display:none;\">\n", | |
| "\n", | |
| "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", | |
| " width=\"24px\">\n", | |
| " <g>\n", | |
| " <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n", | |
| " </g>\n", | |
| "</svg>\n", | |
| " </button>\n", | |
| "\n", | |
| "<style>\n", | |
| " .colab-df-quickchart {\n", | |
| " --bg-color: #E8F0FE;\n", | |
| " --fill-color: #1967D2;\n", | |
| " --hover-bg-color: #E2EBFA;\n", | |
| " --hover-fill-color: #174EA6;\n", | |
| " --disabled-fill-color: #AAA;\n", | |
| " --disabled-bg-color: #DDD;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-quickchart {\n", | |
| " --bg-color: #3B4455;\n", | |
| " --fill-color: #D2E3FC;\n", | |
| " --hover-bg-color: #434B5C;\n", | |
| " --hover-fill-color: #FFFFFF;\n", | |
| " --disabled-bg-color: #3B4455;\n", | |
| " --disabled-fill-color: #666;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-quickchart {\n", | |
| " background-color: var(--bg-color);\n", | |
| " border: none;\n", | |
| " border-radius: 50%;\n", | |
| " cursor: pointer;\n", | |
| " display: none;\n", | |
| " fill: var(--fill-color);\n", | |
| " height: 32px;\n", | |
| " padding: 0;\n", | |
| " width: 32px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-quickchart:hover {\n", | |
| " background-color: var(--hover-bg-color);\n", | |
| " box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
| " fill: var(--button-hover-fill-color);\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-quickchart-complete:disabled,\n", | |
| " .colab-df-quickchart-complete:disabled:hover {\n", | |
| " background-color: var(--disabled-bg-color);\n", | |
| " fill: var(--disabled-fill-color);\n", | |
| " box-shadow: none;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-spinner {\n", | |
| " border: 2px solid var(--fill-color);\n", | |
| " border-color: transparent;\n", | |
| " border-bottom-color: var(--fill-color);\n", | |
| " animation:\n", | |
| " spin 1s steps(1) infinite;\n", | |
| " }\n", | |
| "\n", | |
| " @keyframes spin {\n", | |
| " 0% {\n", | |
| " border-color: transparent;\n", | |
| " border-bottom-color: var(--fill-color);\n", | |
| " border-left-color: var(--fill-color);\n", | |
| " }\n", | |
| " 20% {\n", | |
| " border-color: transparent;\n", | |
| " border-left-color: var(--fill-color);\n", | |
| " border-top-color: var(--fill-color);\n", | |
| " }\n", | |
| " 30% {\n", | |
| " border-color: transparent;\n", | |
| " border-left-color: var(--fill-color);\n", | |
| " border-top-color: var(--fill-color);\n", | |
| " border-right-color: var(--fill-color);\n", | |
| " }\n", | |
| " 40% {\n", | |
| " border-color: transparent;\n", | |
| " border-right-color: var(--fill-color);\n", | |
| " border-top-color: var(--fill-color);\n", | |
| " }\n", | |
| " 60% {\n", | |
| " border-color: transparent;\n", | |
| " border-right-color: var(--fill-color);\n", | |
| " }\n", | |
| " 80% {\n", | |
| " border-color: transparent;\n", | |
| " border-right-color: var(--fill-color);\n", | |
| " border-bottom-color: var(--fill-color);\n", | |
| " }\n", | |
| " 90% {\n", | |
| " border-color: transparent;\n", | |
| " border-bottom-color: var(--fill-color);\n", | |
| " }\n", | |
| " }\n", | |
| "</style>\n", | |
| "\n", | |
| " <script>\n", | |
| " async function quickchart(key) {\n", | |
| " const quickchartButtonEl =\n", | |
| " document.querySelector('#' + key + ' button');\n", | |
| " quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n", | |
| " quickchartButtonEl.classList.add('colab-df-spinner');\n", | |
| " try {\n", | |
| " const charts = await google.colab.kernel.invokeFunction(\n", | |
| " 'suggestCharts', [key], {});\n", | |
| " } catch (error) {\n", | |
| " console.error('Error during call to suggestCharts:', error);\n", | |
| " }\n", | |
| " quickchartButtonEl.classList.remove('colab-df-spinner');\n", | |
| " quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n", | |
| " }\n", | |
| " (() => {\n", | |
| " let quickchartButtonEl =\n", | |
| " document.querySelector('#df-106a9b8b-f643-4ea9-a8c6-0f6e592048ec button');\n", | |
| " quickchartButtonEl.style.display =\n", | |
| " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
| " })();\n", | |
| " </script>\n", | |
| " </div>\n", | |
| "\n", | |
| " <div id=\"id_0df18604-079a-48ee-afef-450ab2d146fe\">\n", | |
| " <style>\n", | |
| " .colab-df-generate {\n", | |
| " background-color: #E8F0FE;\n", | |
| " border: none;\n", | |
| " border-radius: 50%;\n", | |
| " cursor: pointer;\n", | |
| " display: none;\n", | |
| " fill: #1967D2;\n", | |
| " height: 32px;\n", | |
| " padding: 0 0 0 0;\n", | |
| " width: 32px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-generate:hover {\n", | |
| " background-color: #E2EBFA;\n", | |
| " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
| " fill: #174EA6;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-generate {\n", | |
| " background-color: #3B4455;\n", | |
| " fill: #D2E3FC;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-generate:hover {\n", | |
| " background-color: #434B5C;\n", | |
| " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", | |
| " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", | |
| " fill: #FFFFFF;\n", | |
| " }\n", | |
| " </style>\n", | |
| " <button class=\"colab-df-generate\" onclick=\"generateWithVariable('df_clf')\"\n", | |
| " title=\"Generate code using this dataframe.\"\n", | |
| " style=\"display:none;\">\n", | |
| "\n", | |
| " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", | |
| " width=\"24px\">\n", | |
| " <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n", | |
| " </svg>\n", | |
| " </button>\n", | |
| " <script>\n", | |
| " (() => {\n", | |
| " const buttonEl =\n", | |
| " document.querySelector('#id_0df18604-079a-48ee-afef-450ab2d146fe button.colab-df-generate');\n", | |
| " buttonEl.style.display =\n", | |
| " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
| "\n", | |
| " buttonEl.onclick = () => {\n", | |
| " google.colab.notebook.generateWithVariable('df_clf');\n", | |
| " }\n", | |
| " })();\n", | |
| " </script>\n", | |
| " </div>\n", | |
| "\n", | |
| " </div>\n", | |
| " </div>\n" | |
| ], | |
| "application/vnd.google.colaboratory.intrinsic+json": { | |
| "type": "dataframe", | |
| "variable_name": "df_clf", | |
| "summary": "{\n \"name\": \"df_clf\",\n \"rows\": 12,\n \"fields\": [\n {\n \"column\": \"city\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"New York\",\n \"London\",\n \"Paris\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"churn\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"enc_smooth0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.12309149097933277,\n \"min\": 0.5,\n \"max\": 0.8,\n \"num_unique_values\": 3,\n \"samples\": [\n 0.6,\n 0.8\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"enc_smooth1\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 5.149286359812627e-10,\n \"min\": 0.6666666663333333,\n \"max\": 0.6666666673333332,\n \"num_unique_values\": 2,\n \"samples\": [\n 0.6666666673333332,\n 0.6666666663333333\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"enc_smooth_auto\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.097676234197722,\n \"min\": 0.56,\n \"max\": 0.7832167832167832,\n \"num_unique_values\": 3,\n \"samples\": [\n 0.6118421052631579,\n 0.7832167832167832\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"enc_smooth_manual\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.12309149097933277,\n \"min\": 0.5,\n \"max\": 0.8,\n \"num_unique_values\": 3,\n \"samples\": [\n 0.6,\n 0.8\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" | |
| } | |
| }, | |
| "metadata": {}, | |
| "execution_count": 6 | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "print(enc_smooth_auto.target_type_)\n", | |
| "print(enc_smooth_auto.target_mean_)\n", | |
| "print(enc_smooth_auto.get_params())" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "Hw4QHwnpZ80-", | |
| "outputId": "0538ccee-df38-4c6a-e4e4-576a636d1d41" | |
| }, | |
| "execution_count": 7, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "binary\n", | |
| "0.6666666666666666\n", | |
| "{'categories': 'auto', 'cv': 2, 'random_state': None, 'shuffle': False, 'smooth': 'auto', 'target_type': 'auto'}\n" | |
| ] | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "from category_encoders.cat_boost import CatBoostEncoder\n", | |
| "\n", | |
| "cbe = CatBoostEncoder()\n", | |
| "\n", | |
| "df_clf['enc_catboost'] = cbe.fit_transform(df_clf[['city']], df_clf['churn'])\n", | |
| "\n", | |
| "df_clf" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 426 | |
| }, | |
| "id": "9W13_R2pbuCE", | |
| "outputId": "c9ee987a-aef9-45fd-a7bd-f37997973a27" | |
| }, | |
| "execution_count": 8, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/plain": [ | |
| " city churn enc_smooth0 enc_smooth1 enc_smooth_auto \\\n", | |
| "0 New York 0 0.6 0.666667 0.611842 \n", | |
| "1 London 1 0.8 0.666667 0.783217 \n", | |
| "2 Paris 0 0.5 0.666667 0.560000 \n", | |
| "3 New York 1 0.6 0.666667 0.611842 \n", | |
| "4 London 0 0.8 0.666667 0.783217 \n", | |
| "5 Paris 1 0.5 0.666667 0.560000 \n", | |
| "6 New York 0 0.6 0.666667 0.611842 \n", | |
| "7 London 1 0.8 0.666667 0.783217 \n", | |
| "8 New York 1 0.6 0.666667 0.611842 \n", | |
| "9 London 1 0.8 0.666667 0.783217 \n", | |
| "10 New York 1 0.6 0.666667 0.611842 \n", | |
| "11 London 1 0.8 0.666667 0.783217 \n", | |
| "\n", | |
| " enc_smooth_manual enc_catboost \n", | |
| "0 0.6 0.666667 \n", | |
| "1 0.8 0.666667 \n", | |
| "2 0.5 0.666667 \n", | |
| "3 0.6 0.333333 \n", | |
| "4 0.8 0.833333 \n", | |
| "5 0.5 0.333333 \n", | |
| "6 0.6 0.555556 \n", | |
| "7 0.8 0.555556 \n", | |
| "8 0.6 0.416667 \n", | |
| "9 0.8 0.666667 \n", | |
| "10 0.6 0.533333 \n", | |
| "11 0.8 0.733333 " | |
| ], | |
| "text/html": [ | |
| "\n", | |
| " <div id=\"df-3d161394-93ff-487e-9da8-295a7260a2c8\" class=\"colab-df-container\">\n", | |
| " <div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>city</th>\n", | |
| " <th>churn</th>\n", | |
| " <th>enc_smooth0</th>\n", | |
| " <th>enc_smooth1</th>\n", | |
| " <th>enc_smooth_auto</th>\n", | |
| " <th>enc_smooth_manual</th>\n", | |
| " <th>enc_catboost</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>New York</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0.6</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.611842</td>\n", | |
| " <td>0.6</td>\n", | |
| " <td>0.666667</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>London</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0.8</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.783217</td>\n", | |
| " <td>0.8</td>\n", | |
| " <td>0.666667</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>Paris</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0.5</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.560000</td>\n", | |
| " <td>0.5</td>\n", | |
| " <td>0.666667</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>New York</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0.6</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.611842</td>\n", | |
| " <td>0.6</td>\n", | |
| " <td>0.333333</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>London</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0.8</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.783217</td>\n", | |
| " <td>0.8</td>\n", | |
| " <td>0.833333</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>Paris</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0.5</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.560000</td>\n", | |
| " <td>0.5</td>\n", | |
| " <td>0.333333</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>New York</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0.6</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.611842</td>\n", | |
| " <td>0.6</td>\n", | |
| " <td>0.555556</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>London</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0.8</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.783217</td>\n", | |
| " <td>0.8</td>\n", | |
| " <td>0.555556</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>New York</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0.6</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.611842</td>\n", | |
| " <td>0.6</td>\n", | |
| " <td>0.416667</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>London</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0.8</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.783217</td>\n", | |
| " <td>0.8</td>\n", | |
| " <td>0.666667</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>10</th>\n", | |
| " <td>New York</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0.6</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.611842</td>\n", | |
| " <td>0.6</td>\n", | |
| " <td>0.533333</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>11</th>\n", | |
| " <td>London</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0.8</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.783217</td>\n", | |
| " <td>0.8</td>\n", | |
| " <td>0.733333</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>\n", | |
| " <div class=\"colab-df-buttons\">\n", | |
| "\n", | |
| " <div class=\"colab-df-container\">\n", | |
| " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-3d161394-93ff-487e-9da8-295a7260a2c8')\"\n", | |
| " title=\"Convert this dataframe to an interactive table.\"\n", | |
| " style=\"display:none;\">\n", | |
| "\n", | |
| " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n", | |
| " <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n", | |
| " </svg>\n", | |
| " </button>\n", | |
| "\n", | |
| " <style>\n", | |
| " .colab-df-container {\n", | |
| " display:flex;\n", | |
| " gap: 12px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-convert {\n", | |
| " background-color: #E8F0FE;\n", | |
| " border: none;\n", | |
| " border-radius: 50%;\n", | |
| " cursor: pointer;\n", | |
| " display: none;\n", | |
| " fill: #1967D2;\n", | |
| " height: 32px;\n", | |
| " padding: 0 0 0 0;\n", | |
| " width: 32px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-convert:hover {\n", | |
| " background-color: #E2EBFA;\n", | |
| " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
| " fill: #174EA6;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-buttons div {\n", | |
| " margin-bottom: 4px;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-convert {\n", | |
| " background-color: #3B4455;\n", | |
| " fill: #D2E3FC;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-convert:hover {\n", | |
| " background-color: #434B5C;\n", | |
| " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", | |
| " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", | |
| " fill: #FFFFFF;\n", | |
| " }\n", | |
| " </style>\n", | |
| "\n", | |
| " <script>\n", | |
| " const buttonEl =\n", | |
| " document.querySelector('#df-3d161394-93ff-487e-9da8-295a7260a2c8 button.colab-df-convert');\n", | |
| " buttonEl.style.display =\n", | |
| " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
| "\n", | |
| " async function convertToInteractive(key) {\n", | |
| " const element = document.querySelector('#df-3d161394-93ff-487e-9da8-295a7260a2c8');\n", | |
| " const dataTable =\n", | |
| " await google.colab.kernel.invokeFunction('convertToInteractive',\n", | |
| " [key], {});\n", | |
| " if (!dataTable) return;\n", | |
| "\n", | |
| " const docLinkHtml = 'Like what you see? Visit the ' +\n", | |
| " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", | |
| " + ' to learn more about interactive tables.';\n", | |
| " element.innerHTML = '';\n", | |
| " dataTable['output_type'] = 'display_data';\n", | |
| " await google.colab.output.renderOutput(dataTable, element);\n", | |
| " const docLink = document.createElement('div');\n", | |
| " docLink.innerHTML = docLinkHtml;\n", | |
| " element.appendChild(docLink);\n", | |
| " }\n", | |
| " </script>\n", | |
| " </div>\n", | |
| "\n", | |
| "\n", | |
| " <div id=\"df-4a77a084-1b24-4d7b-a3eb-434cb139f278\">\n", | |
| " <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-4a77a084-1b24-4d7b-a3eb-434cb139f278')\"\n", | |
| " title=\"Suggest charts\"\n", | |
| " style=\"display:none;\">\n", | |
| "\n", | |
| "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", | |
| " width=\"24px\">\n", | |
| " <g>\n", | |
| " <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n", | |
| " </g>\n", | |
| "</svg>\n", | |
| " </button>\n", | |
| "\n", | |
| "<style>\n", | |
| " .colab-df-quickchart {\n", | |
| " --bg-color: #E8F0FE;\n", | |
| " --fill-color: #1967D2;\n", | |
| " --hover-bg-color: #E2EBFA;\n", | |
| " --hover-fill-color: #174EA6;\n", | |
| " --disabled-fill-color: #AAA;\n", | |
| " --disabled-bg-color: #DDD;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-quickchart {\n", | |
| " --bg-color: #3B4455;\n", | |
| " --fill-color: #D2E3FC;\n", | |
| " --hover-bg-color: #434B5C;\n", | |
| " --hover-fill-color: #FFFFFF;\n", | |
| " --disabled-bg-color: #3B4455;\n", | |
| " --disabled-fill-color: #666;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-quickchart {\n", | |
| " background-color: var(--bg-color);\n", | |
| " border: none;\n", | |
| " border-radius: 50%;\n", | |
| " cursor: pointer;\n", | |
| " display: none;\n", | |
| " fill: var(--fill-color);\n", | |
| " height: 32px;\n", | |
| " padding: 0;\n", | |
| " width: 32px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-quickchart:hover {\n", | |
| " background-color: var(--hover-bg-color);\n", | |
| " box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
| " fill: var(--button-hover-fill-color);\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-quickchart-complete:disabled,\n", | |
| " .colab-df-quickchart-complete:disabled:hover {\n", | |
| " background-color: var(--disabled-bg-color);\n", | |
| " fill: var(--disabled-fill-color);\n", | |
| " box-shadow: none;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-spinner {\n", | |
| " border: 2px solid var(--fill-color);\n", | |
| " border-color: transparent;\n", | |
| " border-bottom-color: var(--fill-color);\n", | |
| " animation:\n", | |
| " spin 1s steps(1) infinite;\n", | |
| " }\n", | |
| "\n", | |
| " @keyframes spin {\n", | |
| " 0% {\n", | |
| " border-color: transparent;\n", | |
| " border-bottom-color: var(--fill-color);\n", | |
| " border-left-color: var(--fill-color);\n", | |
| " }\n", | |
| " 20% {\n", | |
| " border-color: transparent;\n", | |
| " border-left-color: var(--fill-color);\n", | |
| " border-top-color: var(--fill-color);\n", | |
| " }\n", | |
| " 30% {\n", | |
| " border-color: transparent;\n", | |
| " border-left-color: var(--fill-color);\n", | |
| " border-top-color: var(--fill-color);\n", | |
| " border-right-color: var(--fill-color);\n", | |
| " }\n", | |
| " 40% {\n", | |
| " border-color: transparent;\n", | |
| " border-right-color: var(--fill-color);\n", | |
| " border-top-color: var(--fill-color);\n", | |
| " }\n", | |
| " 60% {\n", | |
| " border-color: transparent;\n", | |
| " border-right-color: var(--fill-color);\n", | |
| " }\n", | |
| " 80% {\n", | |
| " border-color: transparent;\n", | |
| " border-right-color: var(--fill-color);\n", | |
| " border-bottom-color: var(--fill-color);\n", | |
| " }\n", | |
| " 90% {\n", | |
| " border-color: transparent;\n", | |
| " border-bottom-color: var(--fill-color);\n", | |
| " }\n", | |
| " }\n", | |
| "</style>\n", | |
| "\n", | |
| " <script>\n", | |
| " async function quickchart(key) {\n", | |
| " const quickchartButtonEl =\n", | |
| " document.querySelector('#' + key + ' button');\n", | |
| " quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n", | |
| " quickchartButtonEl.classList.add('colab-df-spinner');\n", | |
| " try {\n", | |
| " const charts = await google.colab.kernel.invokeFunction(\n", | |
| " 'suggestCharts', [key], {});\n", | |
| " } catch (error) {\n", | |
| " console.error('Error during call to suggestCharts:', error);\n", | |
| " }\n", | |
| " quickchartButtonEl.classList.remove('colab-df-spinner');\n", | |
| " quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n", | |
| " }\n", | |
| " (() => {\n", | |
| " let quickchartButtonEl =\n", | |
| " document.querySelector('#df-4a77a084-1b24-4d7b-a3eb-434cb139f278 button');\n", | |
| " quickchartButtonEl.style.display =\n", | |
| " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
| " })();\n", | |
| " </script>\n", | |
| " </div>\n", | |
| "\n", | |
| " <div id=\"id_702e4c53-ff90-40ab-87ec-365c428b77e4\">\n", | |
| " <style>\n", | |
| " .colab-df-generate {\n", | |
| " background-color: #E8F0FE;\n", | |
| " border: none;\n", | |
| " border-radius: 50%;\n", | |
| " cursor: pointer;\n", | |
| " display: none;\n", | |
| " fill: #1967D2;\n", | |
| " height: 32px;\n", | |
| " padding: 0 0 0 0;\n", | |
| " width: 32px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-generate:hover {\n", | |
| " background-color: #E2EBFA;\n", | |
| " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
| " fill: #174EA6;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-generate {\n", | |
| " background-color: #3B4455;\n", | |
| " fill: #D2E3FC;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-generate:hover {\n", | |
| " background-color: #434B5C;\n", | |
| " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", | |
| " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", | |
| " fill: #FFFFFF;\n", | |
| " }\n", | |
| " </style>\n", | |
| " <button class=\"colab-df-generate\" onclick=\"generateWithVariable('df_clf')\"\n", | |
| " title=\"Generate code using this dataframe.\"\n", | |
| " style=\"display:none;\">\n", | |
| "\n", | |
| " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", | |
| " width=\"24px\">\n", | |
| " <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n", | |
| " </svg>\n", | |
| " </button>\n", | |
| " <script>\n", | |
| " (() => {\n", | |
| " const buttonEl =\n", | |
| " document.querySelector('#id_702e4c53-ff90-40ab-87ec-365c428b77e4 button.colab-df-generate');\n", | |
| " buttonEl.style.display =\n", | |
| " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
| "\n", | |
| " buttonEl.onclick = () => {\n", | |
| " google.colab.notebook.generateWithVariable('df_clf');\n", | |
| " }\n", | |
| " })();\n", | |
| " </script>\n", | |
| " </div>\n", | |
| "\n", | |
| " </div>\n", | |
| " </div>\n" | |
| ], | |
| "application/vnd.google.colaboratory.intrinsic+json": { | |
| "type": "dataframe", | |
| "variable_name": "df_clf", | |
| "summary": "{\n \"name\": \"df_clf\",\n \"rows\": 12,\n \"fields\": [\n {\n \"column\": \"city\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"New York\",\n \"London\",\n \"Paris\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"churn\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"enc_smooth0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.12309149097933277,\n \"min\": 0.5,\n \"max\": 0.8,\n \"num_unique_values\": 3,\n \"samples\": [\n 0.6,\n 0.8\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"enc_smooth1\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 5.149286359812627e-10,\n \"min\": 0.6666666663333333,\n \"max\": 0.6666666673333332,\n \"num_unique_values\": 2,\n \"samples\": [\n 0.6666666673333332,\n 0.6666666663333333\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"enc_smooth_auto\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.097676234197722,\n \"min\": 0.56,\n \"max\": 0.7832167832167832,\n \"num_unique_values\": 3,\n \"samples\": [\n 0.6118421052631579,\n 0.7832167832167832\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"enc_smooth_manual\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.12309149097933277,\n \"min\": 0.5,\n \"max\": 0.8,\n \"num_unique_values\": 3,\n \"samples\": [\n 0.6,\n 0.8\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"enc_catboost\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.15651684098335997,\n \"min\": 0.3333333333333333,\n \"max\": 0.8333333333333333,\n \"num_unique_values\": 7,\n \"samples\": [\n 0.6666666666666666,\n 0.3333333333333333\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" | |
| } | |
| }, | |
| "metadata": {}, | |
| "execution_count": 8 | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [], | |
| "metadata": { | |
| "id": "UMb_bL1FliCG" | |
| }, | |
| "execution_count": 8, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# CBE MANUAL WORK IN PROGRESS\n", | |
| "\n", | |
| "# All categorical feature values are transformed to numerical using the following formula:\n", | |
| "# avg_target=(countInClass+prior)/totalCount+1\n", | |
| "# countInClass is how many times the label value was equal to 1 for objects with the current categorical feature value.\n", | |
| "# prior is the preliminary value for the numerator. It is determined by the starting parameters.\n", | |
| "# totalCount is the total number of objects (up to the current one) that have a categorical feature value matching the current one.\n", | |
| "\n", | |
| "cbe_manual = []\n", | |
| "prior = 1\n", | |
| "counts_class = {k:0 for k in df_clf.city.unique()}\n", | |
| "counts_total = {k:0 for k in df_clf.city.unique()}\n", | |
| "\n", | |
| "for i, row in df_clf.iterrows():\n", | |
| " if row.churn==1:\n", | |
| " counts_class[row.city] += 1\n", | |
| " counts_total[row.city] += 1\n", | |
| "\n", | |
| " avg_target = (counts_class[row.city] + prior) / (counts_total[row.city] + 1)\n", | |
| " cbe_manual.append(avg_target)\n", | |
| "\n", | |
| "\n", | |
| "df_clf['enc_catboost_manual'] = cbe_manual\n", | |
| "\n", | |
| "df_clf" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 426 | |
| }, | |
| "id": "rPUaj7DEk20P", | |
| "outputId": "a3eb891b-ef6a-479f-d2b1-8f36626c8c9e" | |
| }, | |
| "execution_count": 9, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/plain": [ | |
| " city churn enc_smooth0 enc_smooth1 enc_smooth_auto \\\n", | |
| "0 New York 0 0.6 0.666667 0.611842 \n", | |
| "1 London 1 0.8 0.666667 0.783217 \n", | |
| "2 Paris 0 0.5 0.666667 0.560000 \n", | |
| "3 New York 1 0.6 0.666667 0.611842 \n", | |
| "4 London 0 0.8 0.666667 0.783217 \n", | |
| "5 Paris 1 0.5 0.666667 0.560000 \n", | |
| "6 New York 0 0.6 0.666667 0.611842 \n", | |
| "7 London 1 0.8 0.666667 0.783217 \n", | |
| "8 New York 1 0.6 0.666667 0.611842 \n", | |
| "9 London 1 0.8 0.666667 0.783217 \n", | |
| "10 New York 1 0.6 0.666667 0.611842 \n", | |
| "11 London 1 0.8 0.666667 0.783217 \n", | |
| "\n", | |
| " enc_smooth_manual enc_catboost enc_catboost_manual \n", | |
| "0 0.6 0.666667 0.500000 \n", | |
| "1 0.8 0.666667 1.000000 \n", | |
| "2 0.5 0.666667 0.500000 \n", | |
| "3 0.6 0.333333 0.666667 \n", | |
| "4 0.8 0.833333 0.666667 \n", | |
| "5 0.5 0.333333 0.666667 \n", | |
| "6 0.6 0.555556 0.500000 \n", | |
| "7 0.8 0.555556 0.750000 \n", | |
| "8 0.6 0.416667 0.600000 \n", | |
| "9 0.8 0.666667 0.800000 \n", | |
| "10 0.6 0.533333 0.666667 \n", | |
| "11 0.8 0.733333 0.833333 " | |
| ], | |
| "text/html": [ | |
| "\n", | |
| " <div id=\"df-161910c4-1beb-4485-9bda-8af5eb33e26b\" class=\"colab-df-container\">\n", | |
| " <div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>city</th>\n", | |
| " <th>churn</th>\n", | |
| " <th>enc_smooth0</th>\n", | |
| " <th>enc_smooth1</th>\n", | |
| " <th>enc_smooth_auto</th>\n", | |
| " <th>enc_smooth_manual</th>\n", | |
| " <th>enc_catboost</th>\n", | |
| " <th>enc_catboost_manual</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>New York</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0.6</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.611842</td>\n", | |
| " <td>0.6</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.500000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>London</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0.8</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.783217</td>\n", | |
| " <td>0.8</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>1.000000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>Paris</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0.5</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.560000</td>\n", | |
| " <td>0.5</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.500000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>New York</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0.6</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.611842</td>\n", | |
| " <td>0.6</td>\n", | |
| " <td>0.333333</td>\n", | |
| " <td>0.666667</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>London</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0.8</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.783217</td>\n", | |
| " <td>0.8</td>\n", | |
| " <td>0.833333</td>\n", | |
| " <td>0.666667</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>Paris</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0.5</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.560000</td>\n", | |
| " <td>0.5</td>\n", | |
| " <td>0.333333</td>\n", | |
| " <td>0.666667</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>New York</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0.6</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.611842</td>\n", | |
| " <td>0.6</td>\n", | |
| " <td>0.555556</td>\n", | |
| " <td>0.500000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>London</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0.8</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.783217</td>\n", | |
| " <td>0.8</td>\n", | |
| " <td>0.555556</td>\n", | |
| " <td>0.750000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>New York</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0.6</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.611842</td>\n", | |
| " <td>0.6</td>\n", | |
| " <td>0.416667</td>\n", | |
| " <td>0.600000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>London</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0.8</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.783217</td>\n", | |
| " <td>0.8</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.800000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>10</th>\n", | |
| " <td>New York</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0.6</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.611842</td>\n", | |
| " <td>0.6</td>\n", | |
| " <td>0.533333</td>\n", | |
| " <td>0.666667</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>11</th>\n", | |
| " <td>London</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0.8</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.783217</td>\n", | |
| " <td>0.8</td>\n", | |
| " <td>0.733333</td>\n", | |
| " <td>0.833333</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>\n", | |
| " <div class=\"colab-df-buttons\">\n", | |
| "\n", | |
| " <div class=\"colab-df-container\">\n", | |
| " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-161910c4-1beb-4485-9bda-8af5eb33e26b')\"\n", | |
| " title=\"Convert this dataframe to an interactive table.\"\n", | |
| " style=\"display:none;\">\n", | |
| "\n", | |
| " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n", | |
| " <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n", | |
| " </svg>\n", | |
| " </button>\n", | |
| "\n", | |
| " <style>\n", | |
| " .colab-df-container {\n", | |
| " display:flex;\n", | |
| " gap: 12px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-convert {\n", | |
| " background-color: #E8F0FE;\n", | |
| " border: none;\n", | |
| " border-radius: 50%;\n", | |
| " cursor: pointer;\n", | |
| " display: none;\n", | |
| " fill: #1967D2;\n", | |
| " height: 32px;\n", | |
| " padding: 0 0 0 0;\n", | |
| " width: 32px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-convert:hover {\n", | |
| " background-color: #E2EBFA;\n", | |
| " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
| " fill: #174EA6;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-buttons div {\n", | |
| " margin-bottom: 4px;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-convert {\n", | |
| " background-color: #3B4455;\n", | |
| " fill: #D2E3FC;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-convert:hover {\n", | |
| " background-color: #434B5C;\n", | |
| " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", | |
| " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", | |
| " fill: #FFFFFF;\n", | |
| " }\n", | |
| " </style>\n", | |
| "\n", | |
| " <script>\n", | |
| " const buttonEl =\n", | |
| " document.querySelector('#df-161910c4-1beb-4485-9bda-8af5eb33e26b button.colab-df-convert');\n", | |
| " buttonEl.style.display =\n", | |
| " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
| "\n", | |
| " async function convertToInteractive(key) {\n", | |
| " const element = document.querySelector('#df-161910c4-1beb-4485-9bda-8af5eb33e26b');\n", | |
| " const dataTable =\n", | |
| " await google.colab.kernel.invokeFunction('convertToInteractive',\n", | |
| " [key], {});\n", | |
| " if (!dataTable) return;\n", | |
| "\n", | |
| " const docLinkHtml = 'Like what you see? Visit the ' +\n", | |
| " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", | |
| " + ' to learn more about interactive tables.';\n", | |
| " element.innerHTML = '';\n", | |
| " dataTable['output_type'] = 'display_data';\n", | |
| " await google.colab.output.renderOutput(dataTable, element);\n", | |
| " const docLink = document.createElement('div');\n", | |
| " docLink.innerHTML = docLinkHtml;\n", | |
| " element.appendChild(docLink);\n", | |
| " }\n", | |
| " </script>\n", | |
| " </div>\n", | |
| "\n", | |
| "\n", | |
| " <div id=\"df-f85b4bf4-c7f8-4a31-bbd0-3feb0fef7920\">\n", | |
| " <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-f85b4bf4-c7f8-4a31-bbd0-3feb0fef7920')\"\n", | |
| " title=\"Suggest charts\"\n", | |
| " style=\"display:none;\">\n", | |
| "\n", | |
| "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", | |
| " width=\"24px\">\n", | |
| " <g>\n", | |
| " <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n", | |
| " </g>\n", | |
| "</svg>\n", | |
| " </button>\n", | |
| "\n", | |
| "<style>\n", | |
| " .colab-df-quickchart {\n", | |
| " --bg-color: #E8F0FE;\n", | |
| " --fill-color: #1967D2;\n", | |
| " --hover-bg-color: #E2EBFA;\n", | |
| " --hover-fill-color: #174EA6;\n", | |
| " --disabled-fill-color: #AAA;\n", | |
| " --disabled-bg-color: #DDD;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-quickchart {\n", | |
| " --bg-color: #3B4455;\n", | |
| " --fill-color: #D2E3FC;\n", | |
| " --hover-bg-color: #434B5C;\n", | |
| " --hover-fill-color: #FFFFFF;\n", | |
| " --disabled-bg-color: #3B4455;\n", | |
| " --disabled-fill-color: #666;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-quickchart {\n", | |
| " background-color: var(--bg-color);\n", | |
| " border: none;\n", | |
| " border-radius: 50%;\n", | |
| " cursor: pointer;\n", | |
| " display: none;\n", | |
| " fill: var(--fill-color);\n", | |
| " height: 32px;\n", | |
| " padding: 0;\n", | |
| " width: 32px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-quickchart:hover {\n", | |
| " background-color: var(--hover-bg-color);\n", | |
| " box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
| " fill: var(--button-hover-fill-color);\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-quickchart-complete:disabled,\n", | |
| " .colab-df-quickchart-complete:disabled:hover {\n", | |
| " background-color: var(--disabled-bg-color);\n", | |
| " fill: var(--disabled-fill-color);\n", | |
| " box-shadow: none;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-spinner {\n", | |
| " border: 2px solid var(--fill-color);\n", | |
| " border-color: transparent;\n", | |
| " border-bottom-color: var(--fill-color);\n", | |
| " animation:\n", | |
| " spin 1s steps(1) infinite;\n", | |
| " }\n", | |
| "\n", | |
| " @keyframes spin {\n", | |
| " 0% {\n", | |
| " border-color: transparent;\n", | |
| " border-bottom-color: var(--fill-color);\n", | |
| " border-left-color: var(--fill-color);\n", | |
| " }\n", | |
| " 20% {\n", | |
| " border-color: transparent;\n", | |
| " border-left-color: var(--fill-color);\n", | |
| " border-top-color: var(--fill-color);\n", | |
| " }\n", | |
| " 30% {\n", | |
| " border-color: transparent;\n", | |
| " border-left-color: var(--fill-color);\n", | |
| " border-top-color: var(--fill-color);\n", | |
| " border-right-color: var(--fill-color);\n", | |
| " }\n", | |
| " 40% {\n", | |
| " border-color: transparent;\n", | |
| " border-right-color: var(--fill-color);\n", | |
| " border-top-color: var(--fill-color);\n", | |
| " }\n", | |
| " 60% {\n", | |
| " border-color: transparent;\n", | |
| " border-right-color: var(--fill-color);\n", | |
| " }\n", | |
| " 80% {\n", | |
| " border-color: transparent;\n", | |
| " border-right-color: var(--fill-color);\n", | |
| " border-bottom-color: var(--fill-color);\n", | |
| " }\n", | |
| " 90% {\n", | |
| " border-color: transparent;\n", | |
| " border-bottom-color: var(--fill-color);\n", | |
| " }\n", | |
| " }\n", | |
| "</style>\n", | |
| "\n", | |
| " <script>\n", | |
| " async function quickchart(key) {\n", | |
| " const quickchartButtonEl =\n", | |
| " document.querySelector('#' + key + ' button');\n", | |
| " quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n", | |
| " quickchartButtonEl.classList.add('colab-df-spinner');\n", | |
| " try {\n", | |
| " const charts = await google.colab.kernel.invokeFunction(\n", | |
| " 'suggestCharts', [key], {});\n", | |
| " } catch (error) {\n", | |
| " console.error('Error during call to suggestCharts:', error);\n", | |
| " }\n", | |
| " quickchartButtonEl.classList.remove('colab-df-spinner');\n", | |
| " quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n", | |
| " }\n", | |
| " (() => {\n", | |
| " let quickchartButtonEl =\n", | |
| " document.querySelector('#df-f85b4bf4-c7f8-4a31-bbd0-3feb0fef7920 button');\n", | |
| " quickchartButtonEl.style.display =\n", | |
| " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
| " })();\n", | |
| " </script>\n", | |
| " </div>\n", | |
| "\n", | |
| " <div id=\"id_c080b352-207c-43ab-9047-c20feb26def7\">\n", | |
| " <style>\n", | |
| " .colab-df-generate {\n", | |
| " background-color: #E8F0FE;\n", | |
| " border: none;\n", | |
| " border-radius: 50%;\n", | |
| " cursor: pointer;\n", | |
| " display: none;\n", | |
| " fill: #1967D2;\n", | |
| " height: 32px;\n", | |
| " padding: 0 0 0 0;\n", | |
| " width: 32px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-generate:hover {\n", | |
| " background-color: #E2EBFA;\n", | |
| " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
| " fill: #174EA6;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-generate {\n", | |
| " background-color: #3B4455;\n", | |
| " fill: #D2E3FC;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-generate:hover {\n", | |
| " background-color: #434B5C;\n", | |
| " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", | |
| " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", | |
| " fill: #FFFFFF;\n", | |
| " }\n", | |
| " </style>\n", | |
| " <button class=\"colab-df-generate\" onclick=\"generateWithVariable('df_clf')\"\n", | |
| " title=\"Generate code using this dataframe.\"\n", | |
| " style=\"display:none;\">\n", | |
| "\n", | |
| " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", | |
| " width=\"24px\">\n", | |
| " <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n", | |
| " </svg>\n", | |
| " </button>\n", | |
| " <script>\n", | |
| " (() => {\n", | |
| " const buttonEl =\n", | |
| " document.querySelector('#id_c080b352-207c-43ab-9047-c20feb26def7 button.colab-df-generate');\n", | |
| " buttonEl.style.display =\n", | |
| " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
| "\n", | |
| " buttonEl.onclick = () => {\n", | |
| " google.colab.notebook.generateWithVariable('df_clf');\n", | |
| " }\n", | |
| " })();\n", | |
| " </script>\n", | |
| " </div>\n", | |
| "\n", | |
| " </div>\n", | |
| " </div>\n" | |
| ], | |
| "application/vnd.google.colaboratory.intrinsic+json": { | |
| "type": "dataframe", | |
| "variable_name": "df_clf", | |
| "summary": "{\n \"name\": \"df_clf\",\n \"rows\": 12,\n \"fields\": [\n {\n \"column\": \"city\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"New York\",\n \"London\",\n \"Paris\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"churn\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"enc_smooth0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.12309149097933277,\n \"min\": 0.5,\n \"max\": 0.8,\n \"num_unique_values\": 3,\n \"samples\": [\n 0.6,\n 0.8\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"enc_smooth1\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 5.149286359812627e-10,\n \"min\": 0.6666666663333333,\n \"max\": 0.6666666673333332,\n \"num_unique_values\": 2,\n \"samples\": [\n 0.6666666673333332,\n 0.6666666663333333\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"enc_smooth_auto\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.097676234197722,\n \"min\": 0.56,\n \"max\": 0.7832167832167832,\n \"num_unique_values\": 3,\n \"samples\": [\n 0.6118421052631579,\n 0.7832167832167832\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"enc_smooth_manual\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.12309149097933277,\n \"min\": 0.5,\n \"max\": 0.8,\n \"num_unique_values\": 3,\n \"samples\": [\n 0.6,\n 0.8\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"enc_catboost\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.15651684098335997,\n \"min\": 0.3333333333333333,\n \"max\": 0.8333333333333333,\n \"num_unique_values\": 7,\n \"samples\": [\n 0.6666666666666666,\n 0.3333333333333333\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"enc_catboost_manual\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.1506090329564625,\n \"min\": 0.5,\n \"max\": 1.0,\n \"num_unique_values\": 7,\n \"samples\": [\n 0.5,\n 1.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" | |
| } | |
| }, | |
| "metadata": {}, | |
| "execution_count": 9 | |
| } | |
| ] | |
| } | |
| ] | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment