Created
December 1, 2022 18:33
-
-
Save eguar11011/a1bebce02904acc3cf36cbad317ac62f to your computer and use it in GitHub Desktop.
Kaggle_MNIST.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"private_outputs": true, | |
"provenance": [], | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/eguar11011/a1bebce02904acc3cf36cbad317ac62f/kaggle_mnist.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "xUfKuq6xVQ8n" | |
}, | |
"source": [ | |
"Descarga de los datos" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "hCA9eQT1J4ti" | |
}, | |
"source": [ | |
"import os\n", | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"#import shutil" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "Mmz74XTYK_Ee" | |
}, | |
"source": [ | |
"os.environ['KAGGLE_USERNAME']= \"eduardschipatecua\"\n", | |
"os.environ['KAGGLE_KEY']= \"47e27b1aaeb355ea4e23cd8fd7ce0108\"" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "dw5YIwe_LBkP" | |
}, | |
"source": [ | |
"!kaggle competitions download -c digit-recognizer" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "ex7T5kBfVWB-" | |
}, | |
"source": [ | |
"Tratamiento de los datos" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "aXkcp881LRY-" | |
}, | |
"source": [ | |
"!unzip -q digit-recognizer.zip\n" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "aGQkF6HZLVPM" | |
}, | |
"source": [ | |
"from keras import models\n", | |
"from keras import layers\n", | |
"from tensorflow.keras.utils import to_categorical" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "viPpDzySMLNm" | |
}, | |
"source": [ | |
"table_train = pd.read_csv(\"train.csv\")\n", | |
"table_test = pd.read_csv(\"test.csv\")" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "y9lHJMBpNr96" | |
}, | |
"source": [ | |
"table_train.head()" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "0eQP6fPEN6co" | |
}, | |
"source": [ | |
"table_test.head()" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "2ZjBQFtfOAlb" | |
}, | |
"source": [ | |
"#Separamos las etiquetas y los pixeles y pasamos los datos a un arreglo de numpy\n", | |
"train_labels = table_train['label'].to_numpy()\n", | |
"train_images =table_train.drop(['label'], axis=1).to_numpy()\n", | |
"#Convertimos test en un arreglo de numpy\n", | |
"table_test = table_test.to_numpy()\n" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "ghQkcl4QO4zv" | |
}, | |
"source": [ | |
"train_images" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "gMrIHmrsSXtZ" | |
}, | |
"source": [ | |
"#Vizualización de los números con los pixeles " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "nG21CgHhSPK-" | |
}, | |
"source": [ | |
"import matplotlib.pyplot as plt" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "kJljfW9fTfj7" | |
}, | |
"source": [ | |
"#dígitos de train_images con los pixeles\n", | |
"for i in range(4):\n", | |
" image = train_images[i].reshape(28,28) # 784 = 28 x 28\n", | |
" plt.imshow(image, cmap=plt.cm.binary)\n", | |
" plt.show()" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "ML6fzHU0Se_y" | |
}, | |
"source": [ | |
"#Primeros 4 números del test\n", | |
"for i in range(9):\n", | |
" print(i)\n", | |
" image = table_test[i].reshape(28,28) # 784 = 28 x 28\n", | |
" plt.imshow(image, cmap=plt.cm.binary)\n", | |
" plt.show()" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "yBIF2n_kVj8D" | |
}, | |
"source": [ | |
"Tratamiento de los datos " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "I65A4visT3xu" | |
}, | |
"source": [ | |
"#Pixeles con valores de 0 a 1\n", | |
"train_images = train_images.astype('float32') / 255\n", | |
"table_test = table_test.astype('float32') / 255\n", | |
"#Vector de enteros convertidos en una matriz de clase binaria \n", | |
"train_labels = to_categorical(train_labels)" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "JLdlwcNSYhdc" | |
}, | |
"source": [ | |
"len(train_images)" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "Fj-qWaSyWff3" | |
}, | |
"source": [ | |
"Modelo secuencial " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "OIQnhs09Wj0B" | |
}, | |
"source": [ | |
"#Creando la red\n", | |
"network = models.Sequential()\n", | |
"network.add(layers.Dense(512, activation= 'relu', input_shape=(28 * 28,)))\n", | |
"network.add(layers.Dense(10, activation= 'softmax'))\n", | |
"\n", | |
"network.compile(optimizer ='rmsprop',\n", | |
" loss='categorical_crossentropy',\n", | |
" metrics =['accuracy'])" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "jTdVVvY-XZTB" | |
}, | |
"source": [ | |
"#entrenando al modelo\n", | |
"network.fit(train_images, train_labels, epochs=5, batch_size=128)" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "Qw4RRtToXvf3" | |
}, | |
"source": [ | |
"#Realizando las predicciones y empaquetandolas para la entrega\n", | |
"predictions = network.predict(table_test)" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "WLb8-6Vkkqvr" | |
}, | |
"source": [ | |
"#Encontrando la mayor probabilidad y tomando su indice, tal que esa será la etiqueta \n", | |
"predictions = [list(x).index(max(x)) for x in predictions]" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "ChcIPQ-tlLCU" | |
}, | |
"source": [ | |
"len(predictions)\n" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "Jie-styBYzI1" | |
}, | |
"source": [ | |
" # csv de entrega\n", | |
"output = pd.DataFrame({'Label': predictions})\n", | |
"output = output.rename_axis('ImageId').reset_index()\n", | |
"\n", | |
"output['ImageId'] = range(1, len(predictions)+1)\n", | |
"\n", | |
"output.to_csv('submission.csv', index=False)\n", | |
"\n" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "meRR9Dy9edLM" | |
}, | |
"source": [ | |
"output.dtypes" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "PZO44p0b3BKx" | |
}, | |
"source": [ | |
"output.shape" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "o-2zaQTw5KOP" | |
}, | |
"source": [ | |
"output.head(10)" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "IckrHjsieyBX" | |
}, | |
"source": [ | |
"! kaggle competitions submit -c digit-recognizer -f submission.csv -m \"MNIST\"" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"El cual tuvimos una precisión del 0.97314. El cual se puede ver en la imagen de abajo que es el puntaje de la compentencia." | |
], | |
"metadata": { | |
"id": "DEi1hkXe9FVG" | |
} | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"" | |
], | |
"metadata": { | |
"id": "hh7DzHxU8z4O" | |
} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment