Created
March 20, 2021 07:48
-
-
Save ashutoshsahu2015/17b24a43e6c3636ae2ca45e2e91198d1 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import numpy as np\n", | |
| "import pandas as pd\n", | |
| "import matplotlib.pyplot as plt" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>X0</th>\n", | |
| " <th>X1</th>\n", | |
| " <th>X2</th>\n", | |
| " <th>X3</th>\n", | |
| " <th>X4</th>\n", | |
| " <th>X5</th>\n", | |
| " <th>X6</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>k</td>\n", | |
| " <td>v</td>\n", | |
| " <td>at</td>\n", | |
| " <td>a</td>\n", | |
| " <td>d</td>\n", | |
| " <td>u</td>\n", | |
| " <td>j</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>k</td>\n", | |
| " <td>t</td>\n", | |
| " <td>av</td>\n", | |
| " <td>e</td>\n", | |
| " <td>d</td>\n", | |
| " <td>y</td>\n", | |
| " <td>l</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>az</td>\n", | |
| " <td>w</td>\n", | |
| " <td>n</td>\n", | |
| " <td>c</td>\n", | |
| " <td>d</td>\n", | |
| " <td>x</td>\n", | |
| " <td>j</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>az</td>\n", | |
| " <td>t</td>\n", | |
| " <td>n</td>\n", | |
| " <td>f</td>\n", | |
| " <td>d</td>\n", | |
| " <td>x</td>\n", | |
| " <td>l</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>az</td>\n", | |
| " <td>v</td>\n", | |
| " <td>n</td>\n", | |
| " <td>f</td>\n", | |
| " <td>d</td>\n", | |
| " <td>h</td>\n", | |
| " <td>d</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " X0 X1 X2 X3 X4 X5 X6\n", | |
| "0 k v at a d u j\n", | |
| "1 k t av e d y l\n", | |
| "2 az w n c d x j\n", | |
| "3 az t n f d x l\n", | |
| "4 az v n f d h d" | |
| ] | |
| }, | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "dataset=pd.read_csv('mercendez.csv',usecols=['X0','X1','X2','X3','X4','X5','X6'])\n", | |
| "dataset.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "47\n", | |
| "27\n", | |
| "44\n", | |
| "7\n", | |
| "4\n", | |
| "29\n", | |
| "12\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "## Check for unique labels in each column\n", | |
| "for col in dataset.columns:\n", | |
| " print(len(dataset[col].unique()))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "Index(['aa', 's', 'b', 'l', 'v', 'r', 'i', 'a', 'c', 'o'], dtype='object')" | |
| ] | |
| }, | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "list_top_10=dataset.X1.value_counts().sort_values(ascending=False).head(10).index\n", | |
| "list_top_10" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>X0</th>\n", | |
| " <th>X1</th>\n", | |
| " <th>X2</th>\n", | |
| " <th>X3</th>\n", | |
| " <th>X4</th>\n", | |
| " <th>X5</th>\n", | |
| " <th>X6</th>\n", | |
| " <th>aa</th>\n", | |
| " <th>s</th>\n", | |
| " <th>b</th>\n", | |
| " <th>l</th>\n", | |
| " <th>v</th>\n", | |
| " <th>r</th>\n", | |
| " <th>i</th>\n", | |
| " <th>a</th>\n", | |
| " <th>c</th>\n", | |
| " <th>o</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>k</td>\n", | |
| " <td>v</td>\n", | |
| " <td>at</td>\n", | |
| " <td>a</td>\n", | |
| " <td>d</td>\n", | |
| " <td>u</td>\n", | |
| " <td>j</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>k</td>\n", | |
| " <td>t</td>\n", | |
| " <td>av</td>\n", | |
| " <td>e</td>\n", | |
| " <td>d</td>\n", | |
| " <td>y</td>\n", | |
| " <td>l</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>az</td>\n", | |
| " <td>w</td>\n", | |
| " <td>n</td>\n", | |
| " <td>c</td>\n", | |
| " <td>d</td>\n", | |
| " <td>x</td>\n", | |
| " <td>j</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>az</td>\n", | |
| " <td>t</td>\n", | |
| " <td>n</td>\n", | |
| " <td>f</td>\n", | |
| " <td>d</td>\n", | |
| " <td>x</td>\n", | |
| " <td>l</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>az</td>\n", | |
| " <td>v</td>\n", | |
| " <td>n</td>\n", | |
| " <td>f</td>\n", | |
| " <td>d</td>\n", | |
| " <td>h</td>\n", | |
| " <td>d</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " X0 X1 X2 X3 X4 X5 X6 aa s b l v r i a c o\n", | |
| "0 k v at a d u j 0 0 0 0 1 0 0 0 0 0\n", | |
| "1 k t av e d y l 0 0 0 0 0 0 0 0 0 0\n", | |
| "2 az w n c d x j 0 0 0 0 0 0 0 0 0 0\n", | |
| "3 az t n f d x l 0 0 0 0 0 0 0 0 0 0\n", | |
| "4 az v n f d h d 0 0 0 0 1 0 0 0 0 0" | |
| ] | |
| }, | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "for category in list_top_10:\n", | |
| " dataset[category]=np.where(dataset['X1']==category,1,0)\n", | |
| "dataset.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>X0</th>\n", | |
| " <th>X2</th>\n", | |
| " <th>X3</th>\n", | |
| " <th>X4</th>\n", | |
| " <th>X5</th>\n", | |
| " <th>X6</th>\n", | |
| " <th>aa</th>\n", | |
| " <th>s</th>\n", | |
| " <th>b</th>\n", | |
| " <th>l</th>\n", | |
| " <th>v</th>\n", | |
| " <th>r</th>\n", | |
| " <th>i</th>\n", | |
| " <th>a</th>\n", | |
| " <th>c</th>\n", | |
| " <th>o</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>k</td>\n", | |
| " <td>at</td>\n", | |
| " <td>a</td>\n", | |
| " <td>d</td>\n", | |
| " <td>u</td>\n", | |
| " <td>j</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>k</td>\n", | |
| " <td>av</td>\n", | |
| " <td>e</td>\n", | |
| " <td>d</td>\n", | |
| " <td>y</td>\n", | |
| " <td>l</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>az</td>\n", | |
| " <td>n</td>\n", | |
| " <td>c</td>\n", | |
| " <td>d</td>\n", | |
| " <td>x</td>\n", | |
| " <td>j</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>az</td>\n", | |
| " <td>n</td>\n", | |
| " <td>f</td>\n", | |
| " <td>d</td>\n", | |
| " <td>x</td>\n", | |
| " <td>l</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>az</td>\n", | |
| " <td>n</td>\n", | |
| " <td>f</td>\n", | |
| " <td>d</td>\n", | |
| " <td>h</td>\n", | |
| " <td>d</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " X0 X2 X3 X4 X5 X6 aa s b l v r i a c o\n", | |
| "0 k at a d u j 0 0 0 0 1 0 0 0 0 0\n", | |
| "1 k av e d y l 0 0 0 0 0 0 0 0 0 0\n", | |
| "2 az n c d x j 0 0 0 0 0 0 0 0 0 0\n", | |
| "3 az n f d x l 0 0 0 0 0 0 0 0 0 0\n", | |
| "4 az n f d h d 0 0 0 0 1 0 0 0 0 0" | |
| ] | |
| }, | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "dataset.drop('X1',axis=1,inplace=True)\n", | |
| "dataset.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.7.3" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment