Created
March 20, 2021 08:07
-
-
Save ashutoshsahu2015/e706aa5ab88194443ae12c0b74054057 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Survived</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Cabin</th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>A</th>\n", | |
| " <td>0.466667</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>B</th>\n", | |
| " <td>0.744681</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>C</th>\n", | |
| " <td>0.593220</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>D</th>\n", | |
| " <td>0.757576</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>E</th>\n", | |
| " <td>0.750000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>F</th>\n", | |
| " <td>0.615385</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>G</th>\n", | |
| " <td>0.500000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>M</th>\n", | |
| " <td>0.299854</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>T</th>\n", | |
| " <td>0.000000</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Survived\n", | |
| "Cabin \n", | |
| "A 0.466667\n", | |
| "B 0.744681\n", | |
| "C 0.593220\n", | |
| "D 0.757576\n", | |
| "E 0.750000\n", | |
| "F 0.615385\n", | |
| "G 0.500000\n", | |
| "M 0.299854\n", | |
| "T 0.000000" | |
| ] | |
| }, | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "prob=dataset.groupby(['Cabin'])['Survived'].mean()\n", | |
| "prob_df=pd.DataFrame(prob)\n", | |
| "prob_df" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Survived</th>\n", | |
| " <th>Died</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Cabin</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>A</th>\n", | |
| " <td>0.466667</td>\n", | |
| " <td>0.533333</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>B</th>\n", | |
| " <td>0.744681</td>\n", | |
| " <td>0.255319</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>C</th>\n", | |
| " <td>0.593220</td>\n", | |
| " <td>0.406780</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>D</th>\n", | |
| " <td>0.757576</td>\n", | |
| " <td>0.242424</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>E</th>\n", | |
| " <td>0.750000</td>\n", | |
| " <td>0.250000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>F</th>\n", | |
| " <td>0.615385</td>\n", | |
| " <td>0.384615</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>G</th>\n", | |
| " <td>0.500000</td>\n", | |
| " <td>0.500000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>M</th>\n", | |
| " <td>0.299854</td>\n", | |
| " <td>0.700146</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>T</th>\n", | |
| " <td>0.000000</td>\n", | |
| " <td>1.000000</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Survived Died\n", | |
| "Cabin \n", | |
| "A 0.466667 0.533333\n", | |
| "B 0.744681 0.255319\n", | |
| "C 0.593220 0.406780\n", | |
| "D 0.757576 0.242424\n", | |
| "E 0.750000 0.250000\n", | |
| "F 0.615385 0.384615\n", | |
| "G 0.500000 0.500000\n", | |
| "M 0.299854 0.700146\n", | |
| "T 0.000000 1.000000" | |
| ] | |
| }, | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "prob_df['Died']=1-prob_df['Survived']\n", | |
| "prob_df" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Survived</th>\n", | |
| " <th>Died</th>\n", | |
| " <th>Probability Ratio</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Cabin</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>A</th>\n", | |
| " <td>0.466667</td>\n", | |
| " <td>0.533333</td>\n", | |
| " <td>0.875000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>B</th>\n", | |
| " <td>0.744681</td>\n", | |
| " <td>0.255319</td>\n", | |
| " <td>2.916667</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>C</th>\n", | |
| " <td>0.593220</td>\n", | |
| " <td>0.406780</td>\n", | |
| " <td>1.458333</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>D</th>\n", | |
| " <td>0.757576</td>\n", | |
| " <td>0.242424</td>\n", | |
| " <td>3.125000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>E</th>\n", | |
| " <td>0.750000</td>\n", | |
| " <td>0.250000</td>\n", | |
| " <td>3.000000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>F</th>\n", | |
| " <td>0.615385</td>\n", | |
| " <td>0.384615</td>\n", | |
| " <td>1.600000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>G</th>\n", | |
| " <td>0.500000</td>\n", | |
| " <td>0.500000</td>\n", | |
| " <td>1.000000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>M</th>\n", | |
| " <td>0.299854</td>\n", | |
| " <td>0.700146</td>\n", | |
| " <td>0.428274</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>T</th>\n", | |
| " <td>0.000000</td>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>0.000000</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Survived Died Probability Ratio\n", | |
| "Cabin \n", | |
| "A 0.466667 0.533333 0.875000\n", | |
| "B 0.744681 0.255319 2.916667\n", | |
| "C 0.593220 0.406780 1.458333\n", | |
| "D 0.757576 0.242424 3.125000\n", | |
| "E 0.750000 0.250000 3.000000\n", | |
| "F 0.615385 0.384615 1.600000\n", | |
| "G 0.500000 0.500000 1.000000\n", | |
| "M 0.299854 0.700146 0.428274\n", | |
| "T 0.000000 1.000000 0.000000" | |
| ] | |
| }, | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "prob_df['Probability Ratio']=prob_df['Survived']/prob_df['Died']\n", | |
| "prob_df" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "{'A': 0.875,\n", | |
| " 'B': 2.916666666666666,\n", | |
| " 'C': 1.4583333333333333,\n", | |
| " 'D': 3.125,\n", | |
| " 'E': 3.0,\n", | |
| " 'F': 1.6000000000000003,\n", | |
| " 'G': 1.0,\n", | |
| " 'M': 0.42827442827442824,\n", | |
| " 'T': 0.0}" | |
| ] | |
| }, | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "prob_encod_dictionary=prob_df['Probability Ratio'].to_dict()\n", | |
| "prob_encod_dictionary" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Survived</th>\n", | |
| " <th>Cabin</th>\n", | |
| " <th>Cabin_probabilty_ratio</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>0</td>\n", | |
| " <td>M</td>\n", | |
| " <td>0.428274</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>1</td>\n", | |
| " <td>C</td>\n", | |
| " <td>1.458333</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>1</td>\n", | |
| " <td>M</td>\n", | |
| " <td>0.428274</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>1</td>\n", | |
| " <td>C</td>\n", | |
| " <td>1.458333</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>0</td>\n", | |
| " <td>M</td>\n", | |
| " <td>0.428274</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Survived Cabin Cabin_probabilty_ratio\n", | |
| "0 0 M 0.428274\n", | |
| "1 1 C 1.458333\n", | |
| "2 1 M 0.428274\n", | |
| "3 1 C 1.458333\n", | |
| "4 0 M 0.428274" | |
| ] | |
| }, | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "dataset['Cabin_probabilty_ratio']=dataset['Cabin'].map(prob_encod_dictionary)\n", | |
| "dataset.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.7.3" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment