Skip to content

Instantly share code, notes, and snippets.

@aaizemberg
Created September 2, 2024 19:25
Show Gist options
  • Save aaizemberg/b0e7992b451df44f027f379ebfc6d1da to your computer and use it in GitHub Desktop.
Save aaizemberg/b0e7992b451df44f027f379ebfc6d1da to your computer and use it in GitHub Desktop.
BA Data - Bicicletas - 2024
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 112,
"metadata": {},
"outputs": [],
"source": [
"import duckdb"
]
},
{
"cell_type": "code",
"execution_count": 113,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"┌───────┐\n",
"│ count │\n",
"│ int64 │\n",
"├───────┤\n",
"│ 89477 │\n",
"└───────┘"
]
},
"execution_count": 113,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"duckdb.execute(\"CREATE TABLE IF NOT EXISTS usuarios AS SELECT * FROM 'usuarios.parquet'\")\n",
"duckdb.sql(\"SELECT count(*) AS count FROM usuarios\")"
]
},
{
"cell_type": "code",
"execution_count": 114,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"┌─────────┬─────────┬───────┬─────────┬─────────────────────┐\n",
"│ id │ genero │ edad │ has_dni │ fecha │\n",
"│ int64 │ varchar │ int32 │ boolean │ timestamp │\n",
"├─────────┼─────────┼───────┼─────────┼─────────────────────┤\n",
"│ 1173031 │ MALE │ 21 │ false │ 2024-07-02 22:48:02 │\n",
"│ 1172998 │ FEMALE │ 18 │ false │ 2024-07-02 17:13:35 │\n",
"│ 1172922 │ FEMALE │ 53 │ false │ 2024-07-02 15:00:13 │\n",
"│ 1172973 │ FEMALE │ 57 │ false │ 2024-07-02 16:40:18 │\n",
"│ 1172894 │ FEMALE │ 22 │ false │ 2024-07-02 13:11:33 │\n",
"│ 1172876 │ OTHER │ 29 │ false │ 2024-07-02 12:16:26 │\n",
"│ 1172791 │ FEMALE │ 18 │ true │ 2024-07-02 01:07:44 │\n",
"│ 1172788 │ MALE │ 18 │ true │ 2024-07-02 00:57:57 │\n",
"│ 1172978 │ FEMALE │ 18 │ false │ 2024-07-02 16:44:07 │\n",
"│ 1172839 │ MALE │ 18 │ false │ 2024-07-02 10:52:51 │\n",
"├─────────┴─────────┴───────┴─────────┴─────────────────────┤\n",
"│ 10 rows 5 columns │\n",
"└───────────────────────────────────────────────────────────┘"
]
},
"execution_count": 114,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"duckdb.sql(\"SELECT * FROM usuarios limit 10\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Vamos a ver ahora la tabla 'trips_2024'"
]
},
{
"cell_type": "code",
"execution_count": 115,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"┌─────────┐\n",
"│ count │\n",
"│ int64 │\n",
"├─────────┤\n",
"│ 1528727 │\n",
"└─────────┘"
]
},
"execution_count": 115,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"duckdb.sql(\"select count(*) as count from trips_2024.parquet\")"
]
},
{
"cell_type": "code",
"execution_count": 136,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<duckdb.duckdb.DuckDBPyConnection at 0x2a6bb57b1f0>"
]
},
"execution_count": 136,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"duckdb.execute(\"drop table if exists trips_2024;\")"
]
},
{
"cell_type": "code",
"execution_count": 137,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"┌─────────┐\n",
"│ count │\n",
"│ int64 │\n",
"├─────────┤\n",
"│ 1528727 │\n",
"└─────────┘"
]
},
"execution_count": 137,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"duckdb.execute(\"CREATE TABLE IF NOT EXISTS trips_2024 AS SELECT * FROM 'trips_2024.parquet'\")\n",
"duckdb.sql(\"SELECT count(*) AS count FROM trips_2024\")"
]
},
{
"cell_type": "code",
"execution_count": 138,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Id_recorrido</th>\n",
" <th>duracion_recorrido</th>\n",
" <th>fecha_origen_recorrido</th>\n",
" <th>id_estacion_origen</th>\n",
" <th>nombre_estacion_origen</th>\n",
" <th>direccion_estacion_origen</th>\n",
" <th>long_estacion_origen</th>\n",
" <th>lat_estacion_origen</th>\n",
" <th>fecha_destino_recorrido</th>\n",
" <th>id_estacion_destino</th>\n",
" <th>nombre_estacion_destino</th>\n",
" <th>direccion_estacion_destino</th>\n",
" <th>long_estacion_destino</th>\n",
" <th>lat_estacion_destino</th>\n",
" <th>id_usuario</th>\n",
" <th>modelo_bicicleta</th>\n",
" <th>género</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>20610093BAEcobici</td>\n",
" <td>858</td>\n",
" <td>2024-02-08 20:58:32</td>\n",
" <td>516BAEcobici</td>\n",
" <td>368 - PLAZA DE LA SHOÁ</td>\n",
" <td>Av. Del Libertador 3899</td>\n",
" <td>-58.423347</td>\n",
" <td>-34.570998</td>\n",
" <td>2024-02-08 21:12:50</td>\n",
" <td>308BAEcobici</td>\n",
" <td>251 - Solar de la Abadía</td>\n",
" <td>Maure 1819</td>\n",
" <td>-58.436752</td>\n",
" <td>-34.567633</td>\n",
" <td>85441BAEcobici</td>\n",
" <td>ICONIC</td>\n",
" <td>MALE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>21419508BAEcobici</td>\n",
" <td>512</td>\n",
" <td>2024-04-17 08:37:36</td>\n",
" <td>252BAEcobici</td>\n",
" <td>331 - CALIFORNIA</td>\n",
" <td>California 1774 y General Hornos</td>\n",
" <td>-58.374336</td>\n",
" <td>-34.647121</td>\n",
" <td>2024-04-17 08:46:08</td>\n",
" <td>73BAEcobici</td>\n",
" <td>073 - Ruy Díaz de Guzmán</td>\n",
" <td>Avenida Martin Garcia y Ruy Díaz de Guzmán</td>\n",
" <td>-58.371823</td>\n",
" <td>-34.630681</td>\n",
" <td>512851BAEcobici</td>\n",
" <td>ICONIC</td>\n",
" <td>MALE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>21421689BAEcobici</td>\n",
" <td>964</td>\n",
" <td>2024-04-17 11:38:10</td>\n",
" <td>277BAEcobici</td>\n",
" <td>292 - PLAZA BOLIVIA</td>\n",
" <td>Olleros Av. &amp; Del Libertador Av.</td>\n",
" <td>-58.436115</td>\n",
" <td>-34.563539</td>\n",
" <td>2024-04-17 11:54:14</td>\n",
" <td>322BAEcobici</td>\n",
" <td>289 - MONTAÑESES</td>\n",
" <td>2786 Montañeses</td>\n",
" <td>-58.454181</td>\n",
" <td>-34.551304</td>\n",
" <td>27717BAEcobici</td>\n",
" <td>ICONIC</td>\n",
" <td>FEMALE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>21436007BAEcobici</td>\n",
" <td>564</td>\n",
" <td>2024-04-18 09:25:38</td>\n",
" <td>66BAEcobici</td>\n",
" <td>066 - Billinghurst</td>\n",
" <td>3508 Carcova</td>\n",
" <td>-58.413871</td>\n",
" <td>-34.594547</td>\n",
" <td>2024-04-18 09:35:02</td>\n",
" <td>124BAEcobici</td>\n",
" <td>124 - UGARTECHE</td>\n",
" <td>3050 Ugarteche</td>\n",
" <td>-58.411965</td>\n",
" <td>-34.580538</td>\n",
" <td>870951BAEcobici</td>\n",
" <td>ICONIC</td>\n",
" <td>MALE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>21286197BAEcobici</td>\n",
" <td>1,145</td>\n",
" <td>2024-04-05 19:56:39</td>\n",
" <td>202BAEcobici</td>\n",
" <td>001 - FACULTAD DE DERECHO</td>\n",
" <td>Av. Pres. Figueroa Alcorta 2120</td>\n",
" <td>-58.390602</td>\n",
" <td>-34.583749</td>\n",
" <td>2024-04-05 20:15:44</td>\n",
" <td>128BAEcobici</td>\n",
" <td>128 - PARQUE DEL BAJO</td>\n",
" <td>Tte. Gral. Juan Domingo Perón 133</td>\n",
" <td>-58.368821</td>\n",
" <td>-34.605152</td>\n",
" <td>23367BAEcobici</td>\n",
" <td>ICONIC</td>\n",
" <td>None</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Id_recorrido duracion_recorrido fecha_origen_recorrido \\\n",
"0 20610093BAEcobici 858 2024-02-08 20:58:32 \n",
"1 21419508BAEcobici 512 2024-04-17 08:37:36 \n",
"2 21421689BAEcobici 964 2024-04-17 11:38:10 \n",
"3 21436007BAEcobici 564 2024-04-18 09:25:38 \n",
"4 21286197BAEcobici 1,145 2024-04-05 19:56:39 \n",
"\n",
" id_estacion_origen nombre_estacion_origen \\\n",
"0 516BAEcobici 368 - PLAZA DE LA SHOÁ \n",
"1 252BAEcobici 331 - CALIFORNIA \n",
"2 277BAEcobici 292 - PLAZA BOLIVIA \n",
"3 66BAEcobici 066 - Billinghurst \n",
"4 202BAEcobici 001 - FACULTAD DE DERECHO \n",
"\n",
" direccion_estacion_origen long_estacion_origen \\\n",
"0 Av. Del Libertador 3899 -58.423347 \n",
"1 California 1774 y General Hornos -58.374336 \n",
"2 Olleros Av. & Del Libertador Av. -58.436115 \n",
"3 3508 Carcova -58.413871 \n",
"4 Av. Pres. Figueroa Alcorta 2120 -58.390602 \n",
"\n",
" lat_estacion_origen fecha_destino_recorrido id_estacion_destino \\\n",
"0 -34.570998 2024-02-08 21:12:50 308BAEcobici \n",
"1 -34.647121 2024-04-17 08:46:08 73BAEcobici \n",
"2 -34.563539 2024-04-17 11:54:14 322BAEcobici \n",
"3 -34.594547 2024-04-18 09:35:02 124BAEcobici \n",
"4 -34.583749 2024-04-05 20:15:44 128BAEcobici \n",
"\n",
" nombre_estacion_destino direccion_estacion_destino \\\n",
"0 251 - Solar de la Abadía Maure 1819 \n",
"1 073 - Ruy Díaz de Guzmán Avenida Martin Garcia y Ruy Díaz de Guzmán \n",
"2 289 - MONTAÑESES 2786 Montañeses \n",
"3 124 - UGARTECHE 3050 Ugarteche \n",
"4 128 - PARQUE DEL BAJO Tte. Gral. Juan Domingo Perón 133 \n",
"\n",
" long_estacion_destino lat_estacion_destino id_usuario \\\n",
"0 -58.436752 -34.567633 85441BAEcobici \n",
"1 -58.371823 -34.630681 512851BAEcobici \n",
"2 -58.454181 -34.551304 27717BAEcobici \n",
"3 -58.411965 -34.580538 870951BAEcobici \n",
"4 -58.368821 -34.605152 23367BAEcobici \n",
"\n",
" modelo_bicicleta género \n",
"0 ICONIC MALE \n",
"1 ICONIC MALE \n",
"2 ICONIC FEMALE \n",
"3 ICONIC MALE \n",
"4 ICONIC None "
]
},
"execution_count": 138,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"duckdb.sql(\"SELECT * FROM trips_2024 limit 5\").df()"
]
},
{
"cell_type": "code",
"execution_count": 139,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Id_recorrido</th>\n",
" <th>duracion_recorrido</th>\n",
" <th>fecha_origen_recorrido</th>\n",
" <th>id_estacion_origen</th>\n",
" <th>nombre_estacion_origen</th>\n",
" <th>direccion_estacion_origen</th>\n",
" <th>long_estacion_origen</th>\n",
" <th>lat_estacion_origen</th>\n",
" <th>fecha_destino_recorrido</th>\n",
" <th>id_estacion_destino</th>\n",
" <th>nombre_estacion_destino</th>\n",
" <th>direccion_estacion_destino</th>\n",
" <th>long_estacion_destino</th>\n",
" <th>lat_estacion_destino</th>\n",
" <th>id_usuario</th>\n",
" <th>modelo_bicicleta</th>\n",
" <th>género</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>20610093</td>\n",
" <td>858</td>\n",
" <td>2024-02-08 20:58:32</td>\n",
" <td>516</td>\n",
" <td>368 - PLAZA DE LA SHOÁ</td>\n",
" <td>Av. Del Libertador 3899</td>\n",
" <td>-58.423347</td>\n",
" <td>-34.570998</td>\n",
" <td>2024-02-08 21:12:50</td>\n",
" <td>308</td>\n",
" <td>251 - Solar de la Abadía</td>\n",
" <td>Maure 1819</td>\n",
" <td>-58.436752</td>\n",
" <td>-34.567633</td>\n",
" <td>85441</td>\n",
" <td>ICONIC</td>\n",
" <td>MALE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>21419508</td>\n",
" <td>512</td>\n",
" <td>2024-04-17 08:37:36</td>\n",
" <td>252</td>\n",
" <td>331 - CALIFORNIA</td>\n",
" <td>California 1774 y General Hornos</td>\n",
" <td>-58.374336</td>\n",
" <td>-34.647121</td>\n",
" <td>2024-04-17 08:46:08</td>\n",
" <td>73</td>\n",
" <td>073 - Ruy Díaz de Guzmán</td>\n",
" <td>Avenida Martin Garcia y Ruy Díaz de Guzmán</td>\n",
" <td>-58.371823</td>\n",
" <td>-34.630681</td>\n",
" <td>512851</td>\n",
" <td>ICONIC</td>\n",
" <td>MALE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>21421689</td>\n",
" <td>964</td>\n",
" <td>2024-04-17 11:38:10</td>\n",
" <td>277</td>\n",
" <td>292 - PLAZA BOLIVIA</td>\n",
" <td>Olleros Av. &amp; Del Libertador Av.</td>\n",
" <td>-58.436115</td>\n",
" <td>-34.563539</td>\n",
" <td>2024-04-17 11:54:14</td>\n",
" <td>322</td>\n",
" <td>289 - MONTAÑESES</td>\n",
" <td>2786 Montañeses</td>\n",
" <td>-58.454181</td>\n",
" <td>-34.551304</td>\n",
" <td>27717</td>\n",
" <td>ICONIC</td>\n",
" <td>FEMALE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>21436007</td>\n",
" <td>564</td>\n",
" <td>2024-04-18 09:25:38</td>\n",
" <td>66</td>\n",
" <td>066 - Billinghurst</td>\n",
" <td>3508 Carcova</td>\n",
" <td>-58.413871</td>\n",
" <td>-34.594547</td>\n",
" <td>2024-04-18 09:35:02</td>\n",
" <td>124</td>\n",
" <td>124 - UGARTECHE</td>\n",
" <td>3050 Ugarteche</td>\n",
" <td>-58.411965</td>\n",
" <td>-34.580538</td>\n",
" <td>870951</td>\n",
" <td>ICONIC</td>\n",
" <td>MALE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>21286197</td>\n",
" <td>1,145</td>\n",
" <td>2024-04-05 19:56:39</td>\n",
" <td>202</td>\n",
" <td>001 - FACULTAD DE DERECHO</td>\n",
" <td>Av. Pres. Figueroa Alcorta 2120</td>\n",
" <td>-58.390602</td>\n",
" <td>-34.583749</td>\n",
" <td>2024-04-05 20:15:44</td>\n",
" <td>128</td>\n",
" <td>128 - PARQUE DEL BAJO</td>\n",
" <td>Tte. Gral. Juan Domingo Perón 133</td>\n",
" <td>-58.368821</td>\n",
" <td>-34.605152</td>\n",
" <td>23367</td>\n",
" <td>ICONIC</td>\n",
" <td>None</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Id_recorrido duracion_recorrido fecha_origen_recorrido id_estacion_origen \\\n",
"0 20610093 858 2024-02-08 20:58:32 516 \n",
"1 21419508 512 2024-04-17 08:37:36 252 \n",
"2 21421689 964 2024-04-17 11:38:10 277 \n",
"3 21436007 564 2024-04-18 09:25:38 66 \n",
"4 21286197 1,145 2024-04-05 19:56:39 202 \n",
"\n",
" nombre_estacion_origen direccion_estacion_origen \\\n",
"0 368 - PLAZA DE LA SHOÁ Av. Del Libertador 3899 \n",
"1 331 - CALIFORNIA California 1774 y General Hornos \n",
"2 292 - PLAZA BOLIVIA Olleros Av. & Del Libertador Av. \n",
"3 066 - Billinghurst 3508 Carcova \n",
"4 001 - FACULTAD DE DERECHO Av. Pres. Figueroa Alcorta 2120 \n",
"\n",
" long_estacion_origen lat_estacion_origen fecha_destino_recorrido \\\n",
"0 -58.423347 -34.570998 2024-02-08 21:12:50 \n",
"1 -58.374336 -34.647121 2024-04-17 08:46:08 \n",
"2 -58.436115 -34.563539 2024-04-17 11:54:14 \n",
"3 -58.413871 -34.594547 2024-04-18 09:35:02 \n",
"4 -58.390602 -34.583749 2024-04-05 20:15:44 \n",
"\n",
" id_estacion_destino nombre_estacion_destino \\\n",
"0 308 251 - Solar de la Abadía \n",
"1 73 073 - Ruy Díaz de Guzmán \n",
"2 322 289 - MONTAÑESES \n",
"3 124 124 - UGARTECHE \n",
"4 128 128 - PARQUE DEL BAJO \n",
"\n",
" direccion_estacion_destino long_estacion_destino \\\n",
"0 Maure 1819 -58.436752 \n",
"1 Avenida Martin Garcia y Ruy Díaz de Guzmán -58.371823 \n",
"2 2786 Montañeses -58.454181 \n",
"3 3050 Ugarteche -58.411965 \n",
"4 Tte. Gral. Juan Domingo Perón 133 -58.368821 \n",
"\n",
" lat_estacion_destino id_usuario modelo_bicicleta género \n",
"0 -34.567633 85441 ICONIC MALE \n",
"1 -34.630681 512851 ICONIC MALE \n",
"2 -34.551304 27717 ICONIC FEMALE \n",
"3 -34.580538 870951 ICONIC MALE \n",
"4 -34.605152 23367 ICONIC None "
]
},
"execution_count": 139,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"duckdb.execute(\"\"\"UPDATE trips_2024 set \n",
" id_usuario = id_usuario.replace('BAEcobici',''),\n",
" Id_recorrido = Id_recorrido.replace('BAEcobici',''),\n",
" id_estacion_origen = id_estacion_origen.replace('BAEcobici',''),\n",
" id_estacion_destino = id_estacion_destino.replace('BAEcobici','')\"\"\")\n",
"\n",
"duckdb.sql(\"SELECT * FROM trips_2024 limit 5\").df()"
]
},
{
"cell_type": "code",
"execution_count": 140,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>column_name</th>\n",
" <th>column_type</th>\n",
" <th>null</th>\n",
" <th>key</th>\n",
" <th>default</th>\n",
" <th>extra</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Id_recorrido</td>\n",
" <td>VARCHAR</td>\n",
" <td>YES</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>duracion_recorrido</td>\n",
" <td>VARCHAR</td>\n",
" <td>YES</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>fecha_origen_recorrido</td>\n",
" <td>TIMESTAMP</td>\n",
" <td>YES</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>id_estacion_origen</td>\n",
" <td>VARCHAR</td>\n",
" <td>YES</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>nombre_estacion_origen</td>\n",
" <td>VARCHAR</td>\n",
" <td>YES</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>direccion_estacion_origen</td>\n",
" <td>VARCHAR</td>\n",
" <td>YES</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>long_estacion_origen</td>\n",
" <td>DOUBLE</td>\n",
" <td>YES</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>lat_estacion_origen</td>\n",
" <td>DOUBLE</td>\n",
" <td>YES</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>fecha_destino_recorrido</td>\n",
" <td>TIMESTAMP</td>\n",
" <td>YES</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>id_estacion_destino</td>\n",
" <td>VARCHAR</td>\n",
" <td>YES</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>nombre_estacion_destino</td>\n",
" <td>VARCHAR</td>\n",
" <td>YES</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>direccion_estacion_destino</td>\n",
" <td>VARCHAR</td>\n",
" <td>YES</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>long_estacion_destino</td>\n",
" <td>DOUBLE</td>\n",
" <td>YES</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>lat_estacion_destino</td>\n",
" <td>DOUBLE</td>\n",
" <td>YES</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>id_usuario</td>\n",
" <td>VARCHAR</td>\n",
" <td>YES</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>modelo_bicicleta</td>\n",
" <td>VARCHAR</td>\n",
" <td>YES</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>género</td>\n",
" <td>VARCHAR</td>\n",
" <td>YES</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" column_name column_type null key default extra\n",
"0 Id_recorrido VARCHAR YES None None None\n",
"1 duracion_recorrido VARCHAR YES None None None\n",
"2 fecha_origen_recorrido TIMESTAMP YES None None None\n",
"3 id_estacion_origen VARCHAR YES None None None\n",
"4 nombre_estacion_origen VARCHAR YES None None None\n",
"5 direccion_estacion_origen VARCHAR YES None None None\n",
"6 long_estacion_origen DOUBLE YES None None None\n",
"7 lat_estacion_origen DOUBLE YES None None None\n",
"8 fecha_destino_recorrido TIMESTAMP YES None None None\n",
"9 id_estacion_destino VARCHAR YES None None None\n",
"10 nombre_estacion_destino VARCHAR YES None None None\n",
"11 direccion_estacion_destino VARCHAR YES None None None\n",
"12 long_estacion_destino DOUBLE YES None None None\n",
"13 lat_estacion_destino DOUBLE YES None None None\n",
"14 id_usuario VARCHAR YES None None None\n",
"15 modelo_bicicleta VARCHAR YES None None None\n",
"16 género VARCHAR YES None None None"
]
},
"execution_count": 140,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"duckdb.sql(\"describe trips_2024\").df()"
]
},
{
"cell_type": "code",
"execution_count": 141,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Id_recorrido</th>\n",
" <th>duracion_recorrido</th>\n",
" <th>fecha_origen_recorrido</th>\n",
" <th>id_estacion_origen</th>\n",
" <th>nombre_estacion_origen</th>\n",
" <th>direccion_estacion_origen</th>\n",
" <th>long_estacion_origen</th>\n",
" <th>lat_estacion_origen</th>\n",
" <th>fecha_destino_recorrido</th>\n",
" <th>id_estacion_destino</th>\n",
" <th>nombre_estacion_destino</th>\n",
" <th>direccion_estacion_destino</th>\n",
" <th>long_estacion_destino</th>\n",
" <th>lat_estacion_destino</th>\n",
" <th>id_usuario</th>\n",
" <th>modelo_bicicleta</th>\n",
" <th>género</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>20610093</td>\n",
" <td>858</td>\n",
" <td>2024-02-08 20:58:32</td>\n",
" <td>516</td>\n",
" <td>368 - PLAZA DE LA SHOÁ</td>\n",
" <td>Av. Del Libertador 3899</td>\n",
" <td>-58.423347</td>\n",
" <td>-34.570998</td>\n",
" <td>2024-02-08 21:12:50</td>\n",
" <td>308</td>\n",
" <td>251 - Solar de la Abadía</td>\n",
" <td>Maure 1819</td>\n",
" <td>-58.436752</td>\n",
" <td>-34.567633</td>\n",
" <td>85441</td>\n",
" <td>ICONIC</td>\n",
" <td>MALE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>21419508</td>\n",
" <td>512</td>\n",
" <td>2024-04-17 08:37:36</td>\n",
" <td>252</td>\n",
" <td>331 - CALIFORNIA</td>\n",
" <td>California 1774 y General Hornos</td>\n",
" <td>-58.374336</td>\n",
" <td>-34.647121</td>\n",
" <td>2024-04-17 08:46:08</td>\n",
" <td>73</td>\n",
" <td>073 - Ruy Díaz de Guzmán</td>\n",
" <td>Avenida Martin Garcia y Ruy Díaz de Guzmán</td>\n",
" <td>-58.371823</td>\n",
" <td>-34.630681</td>\n",
" <td>512851</td>\n",
" <td>ICONIC</td>\n",
" <td>MALE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>21421689</td>\n",
" <td>964</td>\n",
" <td>2024-04-17 11:38:10</td>\n",
" <td>277</td>\n",
" <td>292 - PLAZA BOLIVIA</td>\n",
" <td>Olleros Av. &amp; Del Libertador Av.</td>\n",
" <td>-58.436115</td>\n",
" <td>-34.563539</td>\n",
" <td>2024-04-17 11:54:14</td>\n",
" <td>322</td>\n",
" <td>289 - MONTAÑESES</td>\n",
" <td>2786 Montañeses</td>\n",
" <td>-58.454181</td>\n",
" <td>-34.551304</td>\n",
" <td>27717</td>\n",
" <td>ICONIC</td>\n",
" <td>FEMALE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>21436007</td>\n",
" <td>564</td>\n",
" <td>2024-04-18 09:25:38</td>\n",
" <td>66</td>\n",
" <td>066 - Billinghurst</td>\n",
" <td>3508 Carcova</td>\n",
" <td>-58.413871</td>\n",
" <td>-34.594547</td>\n",
" <td>2024-04-18 09:35:02</td>\n",
" <td>124</td>\n",
" <td>124 - UGARTECHE</td>\n",
" <td>3050 Ugarteche</td>\n",
" <td>-58.411965</td>\n",
" <td>-34.580538</td>\n",
" <td>870951</td>\n",
" <td>ICONIC</td>\n",
" <td>MALE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>21286197</td>\n",
" <td>1,145</td>\n",
" <td>2024-04-05 19:56:39</td>\n",
" <td>202</td>\n",
" <td>001 - FACULTAD DE DERECHO</td>\n",
" <td>Av. Pres. Figueroa Alcorta 2120</td>\n",
" <td>-58.390602</td>\n",
" <td>-34.583749</td>\n",
" <td>2024-04-05 20:15:44</td>\n",
" <td>128</td>\n",
" <td>128 - PARQUE DEL BAJO</td>\n",
" <td>Tte. Gral. Juan Domingo Perón 133</td>\n",
" <td>-58.368821</td>\n",
" <td>-34.605152</td>\n",
" <td>23367</td>\n",
" <td>ICONIC</td>\n",
" <td>None</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Id_recorrido duracion_recorrido fecha_origen_recorrido id_estacion_origen \\\n",
"0 20610093 858 2024-02-08 20:58:32 516 \n",
"1 21419508 512 2024-04-17 08:37:36 252 \n",
"2 21421689 964 2024-04-17 11:38:10 277 \n",
"3 21436007 564 2024-04-18 09:25:38 66 \n",
"4 21286197 1,145 2024-04-05 19:56:39 202 \n",
"\n",
" nombre_estacion_origen direccion_estacion_origen \\\n",
"0 368 - PLAZA DE LA SHOÁ Av. Del Libertador 3899 \n",
"1 331 - CALIFORNIA California 1774 y General Hornos \n",
"2 292 - PLAZA BOLIVIA Olleros Av. & Del Libertador Av. \n",
"3 066 - Billinghurst 3508 Carcova \n",
"4 001 - FACULTAD DE DERECHO Av. Pres. Figueroa Alcorta 2120 \n",
"\n",
" long_estacion_origen lat_estacion_origen fecha_destino_recorrido \\\n",
"0 -58.423347 -34.570998 2024-02-08 21:12:50 \n",
"1 -58.374336 -34.647121 2024-04-17 08:46:08 \n",
"2 -58.436115 -34.563539 2024-04-17 11:54:14 \n",
"3 -58.413871 -34.594547 2024-04-18 09:35:02 \n",
"4 -58.390602 -34.583749 2024-04-05 20:15:44 \n",
"\n",
" id_estacion_destino nombre_estacion_destino \\\n",
"0 308 251 - Solar de la Abadía \n",
"1 73 073 - Ruy Díaz de Guzmán \n",
"2 322 289 - MONTAÑESES \n",
"3 124 124 - UGARTECHE \n",
"4 128 128 - PARQUE DEL BAJO \n",
"\n",
" direccion_estacion_destino long_estacion_destino \\\n",
"0 Maure 1819 -58.436752 \n",
"1 Avenida Martin Garcia y Ruy Díaz de Guzmán -58.371823 \n",
"2 2786 Montañeses -58.454181 \n",
"3 3050 Ugarteche -58.411965 \n",
"4 Tte. Gral. Juan Domingo Perón 133 -58.368821 \n",
"\n",
" lat_estacion_destino id_usuario modelo_bicicleta género \n",
"0 -34.567633 85441 ICONIC MALE \n",
"1 -34.630681 512851 ICONIC MALE \n",
"2 -34.551304 27717 ICONIC FEMALE \n",
"3 -34.580538 870951 ICONIC MALE \n",
"4 -34.605152 23367 ICONIC None "
]
},
"execution_count": 141,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"duckdb.sql(\"SELECT * FROM trips_2024 limit 5\").df()"
]
},
{
"cell_type": "code",
"execution_count": 142,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>género</th>\n",
" <th>count_star()</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>MALE</td>\n",
" <td>924193</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>FEMALE</td>\n",
" <td>475246</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>OTHER</td>\n",
" <td>123823</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>None</td>\n",
" <td>5465</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" género count_star()\n",
"0 MALE 924193\n",
"1 FEMALE 475246\n",
"2 OTHER 123823\n",
"3 None 5465"
]
},
"execution_count": 142,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# género en los trips_2024\n",
"duckdb.sql(\"select género,count(*) from trips_2024 group by género\").df()"
]
},
{
"cell_type": "code",
"execution_count": 143,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>genero</th>\n",
" <th>count_star()</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>MALE</td>\n",
" <td>40348</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>FEMALE</td>\n",
" <td>34913</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>OTHER</td>\n",
" <td>14216</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" genero count_star()\n",
"0 MALE 40348\n",
"1 FEMALE 34913\n",
"2 OTHER 14216"
]
},
"execution_count": 143,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# genero en la tabla de usuarios\n",
"duckdb.sql(\"select genero,count(*) from usuarios group by genero\").df()"
]
},
{
"cell_type": "code",
"execution_count": 144,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>género</th>\n",
" <th>count_star()</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>FEMALE</td>\n",
" <td>475246</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>OTHER</td>\n",
" <td>129288</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>MALE</td>\n",
" <td>924193</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" género count_star()\n",
"0 FEMALE 475246\n",
"1 OTHER 129288\n",
"2 MALE 924193"
]
},
"execution_count": 144,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"duckdb.execute(\"update trips_2024 set género = 'OTHER' where género is NULL\")\n",
"\n",
"# finalmente el género en los trips_2024, queda de la siguiente manera:\n",
"duckdb.sql(\"select género,count(*) from trips_2024 group by género\").df()"
]
},
{
"cell_type": "code",
"execution_count": 145,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Id_recorrido</th>\n",
" <th>duracion_recorrido</th>\n",
" <th>fecha_origen_recorrido</th>\n",
" <th>id_estacion_origen</th>\n",
" <th>nombre_estacion_origen</th>\n",
" <th>direccion_estacion_origen</th>\n",
" <th>long_estacion_origen</th>\n",
" <th>lat_estacion_origen</th>\n",
" <th>fecha_destino_recorrido</th>\n",
" <th>id_estacion_destino</th>\n",
" <th>nombre_estacion_destino</th>\n",
" <th>direccion_estacion_destino</th>\n",
" <th>long_estacion_destino</th>\n",
" <th>lat_estacion_destino</th>\n",
" <th>id_usuario</th>\n",
" <th>modelo_bicicleta</th>\n",
" <th>género</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>20610093</td>\n",
" <td>858</td>\n",
" <td>2024-02-08 20:58:32</td>\n",
" <td>516</td>\n",
" <td>368 - PLAZA DE LA SHOÁ</td>\n",
" <td>Av. Del Libertador 3899</td>\n",
" <td>-58.423347</td>\n",
" <td>-34.570998</td>\n",
" <td>2024-02-08 21:12:50</td>\n",
" <td>308</td>\n",
" <td>251 - Solar de la Abadía</td>\n",
" <td>Maure 1819</td>\n",
" <td>-58.436752</td>\n",
" <td>-34.567633</td>\n",
" <td>85441</td>\n",
" <td>ICONIC</td>\n",
" <td>MALE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>21419508</td>\n",
" <td>512</td>\n",
" <td>2024-04-17 08:37:36</td>\n",
" <td>252</td>\n",
" <td>331 - CALIFORNIA</td>\n",
" <td>California 1774 y General Hornos</td>\n",
" <td>-58.374336</td>\n",
" <td>-34.647121</td>\n",
" <td>2024-04-17 08:46:08</td>\n",
" <td>73</td>\n",
" <td>073 - Ruy Díaz de Guzmán</td>\n",
" <td>Avenida Martin Garcia y Ruy Díaz de Guzmán</td>\n",
" <td>-58.371823</td>\n",
" <td>-34.630681</td>\n",
" <td>512851</td>\n",
" <td>ICONIC</td>\n",
" <td>MALE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>21421689</td>\n",
" <td>964</td>\n",
" <td>2024-04-17 11:38:10</td>\n",
" <td>277</td>\n",
" <td>292 - PLAZA BOLIVIA</td>\n",
" <td>Olleros Av. &amp; Del Libertador Av.</td>\n",
" <td>-58.436115</td>\n",
" <td>-34.563539</td>\n",
" <td>2024-04-17 11:54:14</td>\n",
" <td>322</td>\n",
" <td>289 - MONTAÑESES</td>\n",
" <td>2786 Montañeses</td>\n",
" <td>-58.454181</td>\n",
" <td>-34.551304</td>\n",
" <td>27717</td>\n",
" <td>ICONIC</td>\n",
" <td>FEMALE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>21436007</td>\n",
" <td>564</td>\n",
" <td>2024-04-18 09:25:38</td>\n",
" <td>66</td>\n",
" <td>066 - Billinghurst</td>\n",
" <td>3508 Carcova</td>\n",
" <td>-58.413871</td>\n",
" <td>-34.594547</td>\n",
" <td>2024-04-18 09:35:02</td>\n",
" <td>124</td>\n",
" <td>124 - UGARTECHE</td>\n",
" <td>3050 Ugarteche</td>\n",
" <td>-58.411965</td>\n",
" <td>-34.580538</td>\n",
" <td>870951</td>\n",
" <td>ICONIC</td>\n",
" <td>MALE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>21286197</td>\n",
" <td>1,145</td>\n",
" <td>2024-04-05 19:56:39</td>\n",
" <td>202</td>\n",
" <td>001 - FACULTAD DE DERECHO</td>\n",
" <td>Av. Pres. Figueroa Alcorta 2120</td>\n",
" <td>-58.390602</td>\n",
" <td>-34.583749</td>\n",
" <td>2024-04-05 20:15:44</td>\n",
" <td>128</td>\n",
" <td>128 - PARQUE DEL BAJO</td>\n",
" <td>Tte. Gral. Juan Domingo Perón 133</td>\n",
" <td>-58.368821</td>\n",
" <td>-34.605152</td>\n",
" <td>23367</td>\n",
" <td>ICONIC</td>\n",
" <td>OTHER</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Id_recorrido duracion_recorrido fecha_origen_recorrido id_estacion_origen \\\n",
"0 20610093 858 2024-02-08 20:58:32 516 \n",
"1 21419508 512 2024-04-17 08:37:36 252 \n",
"2 21421689 964 2024-04-17 11:38:10 277 \n",
"3 21436007 564 2024-04-18 09:25:38 66 \n",
"4 21286197 1,145 2024-04-05 19:56:39 202 \n",
"\n",
" nombre_estacion_origen direccion_estacion_origen \\\n",
"0 368 - PLAZA DE LA SHOÁ Av. Del Libertador 3899 \n",
"1 331 - CALIFORNIA California 1774 y General Hornos \n",
"2 292 - PLAZA BOLIVIA Olleros Av. & Del Libertador Av. \n",
"3 066 - Billinghurst 3508 Carcova \n",
"4 001 - FACULTAD DE DERECHO Av. Pres. Figueroa Alcorta 2120 \n",
"\n",
" long_estacion_origen lat_estacion_origen fecha_destino_recorrido \\\n",
"0 -58.423347 -34.570998 2024-02-08 21:12:50 \n",
"1 -58.374336 -34.647121 2024-04-17 08:46:08 \n",
"2 -58.436115 -34.563539 2024-04-17 11:54:14 \n",
"3 -58.413871 -34.594547 2024-04-18 09:35:02 \n",
"4 -58.390602 -34.583749 2024-04-05 20:15:44 \n",
"\n",
" id_estacion_destino nombre_estacion_destino \\\n",
"0 308 251 - Solar de la Abadía \n",
"1 73 073 - Ruy Díaz de Guzmán \n",
"2 322 289 - MONTAÑESES \n",
"3 124 124 - UGARTECHE \n",
"4 128 128 - PARQUE DEL BAJO \n",
"\n",
" direccion_estacion_destino long_estacion_destino \\\n",
"0 Maure 1819 -58.436752 \n",
"1 Avenida Martin Garcia y Ruy Díaz de Guzmán -58.371823 \n",
"2 2786 Montañeses -58.454181 \n",
"3 3050 Ugarteche -58.411965 \n",
"4 Tte. Gral. Juan Domingo Perón 133 -58.368821 \n",
"\n",
" lat_estacion_destino id_usuario modelo_bicicleta género \n",
"0 -34.567633 85441 ICONIC MALE \n",
"1 -34.630681 512851 ICONIC MALE \n",
"2 -34.551304 27717 ICONIC FEMALE \n",
"3 -34.580538 870951 ICONIC MALE \n",
"4 -34.605152 23367 ICONIC OTHER "
]
},
"execution_count": 145,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"duckdb.sql(\"SELECT * FROM trips_2024 limit 5\").df()"
]
},
{
"cell_type": "code",
"execution_count": 146,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>column_name</th>\n",
" <th>column_type</th>\n",
" <th>null</th>\n",
" <th>key</th>\n",
" <th>default</th>\n",
" <th>extra</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>id</td>\n",
" <td>VARCHAR</td>\n",
" <td>NO</td>\n",
" <td>PRI</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>nombre</td>\n",
" <td>VARCHAR</td>\n",
" <td>YES</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>direccion</td>\n",
" <td>VARCHAR</td>\n",
" <td>YES</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>longitude</td>\n",
" <td>DOUBLE</td>\n",
" <td>YES</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>latitude</td>\n",
" <td>DOUBLE</td>\n",
" <td>YES</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" column_name column_type null key default extra\n",
"0 id VARCHAR NO PRI None None\n",
"1 nombre VARCHAR YES None None None\n",
"2 direccion VARCHAR YES None None None\n",
"3 longitude DOUBLE YES None None None\n",
"4 latitude DOUBLE YES None None None"
]
},
"execution_count": 146,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"duckdb.execute(\"drop table if exists estaciones\")\n",
"duckdb.execute(\"\"\"CREATE TABLE IF NOT EXISTS estaciones \n",
" (id VARCHAR PRIMARY KEY, \n",
" nombre VARCHAR,\n",
" direccion VARCHAR,\n",
" longitude DOUBLE,\n",
" latitude DOUBLE);\"\"\")\n",
"\n",
"duckdb.sql(\"describe estaciones\").df()"
]
},
{
"cell_type": "code",
"execution_count": 147,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>nombre</th>\n",
" <th>direccion</th>\n",
" <th>longitude</th>\n",
" <th>latitude</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>64</td>\n",
" <td>064 - RIOBAMBA</td>\n",
" <td>Riobamba 1264 &amp; Juncal</td>\n",
" <td>-58.394109</td>\n",
" <td>-34.593651</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>156</td>\n",
" <td>156 - Plaza Alemania</td>\n",
" <td>2939 Cavia</td>\n",
" <td>-58.407470</td>\n",
" <td>-34.577590</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>432</td>\n",
" <td>187 - Jose Maria Moreno</td>\n",
" <td>141 Jose Maria Moreno</td>\n",
" <td>-58.435801</td>\n",
" <td>-34.619879</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>168</td>\n",
" <td>168 - Estados Unidos</td>\n",
" <td>Estados Unidos &amp; Lima</td>\n",
" <td>-58.381227</td>\n",
" <td>-34.618622</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>005 - Plaza Italia</td>\n",
" <td>Av. Sarmiento 2601</td>\n",
" <td>-58.420954</td>\n",
" <td>-34.580550</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>350</th>\n",
" <td>124</td>\n",
" <td>124 - UGARTECHE</td>\n",
" <td>3050 Ugarteche</td>\n",
" <td>-58.411965</td>\n",
" <td>-34.580538</td>\n",
" </tr>\n",
" <tr>\n",
" <th>351</th>\n",
" <td>309</td>\n",
" <td>298 - PLAZA DR. JUAN B. TERAN</td>\n",
" <td>Nogoya 5525</td>\n",
" <td>-58.519781</td>\n",
" <td>-34.621412</td>\n",
" </tr>\n",
" <tr>\n",
" <th>352</th>\n",
" <td>431</td>\n",
" <td>362 - PARQUE DE LA MEMORIA</td>\n",
" <td>Av. Costanera Rafael Obligado 7200</td>\n",
" <td>-58.436913</td>\n",
" <td>-34.542628</td>\n",
" </tr>\n",
" <tr>\n",
" <th>353</th>\n",
" <td>79</td>\n",
" <td>079 - AZUCENA VILLAFLOR</td>\n",
" <td>Villaflor, Azucena &amp; Dealessi, Pierina</td>\n",
" <td>-58.363930</td>\n",
" <td>-34.611890</td>\n",
" </tr>\n",
" <tr>\n",
" <th>354</th>\n",
" <td>207</td>\n",
" <td>123 - BASUALDO Y RODO</td>\n",
" <td>Guardia Nacional 1700</td>\n",
" <td>-58.487359</td>\n",
" <td>-34.652377</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>355 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" id nombre \\\n",
"0 64 064 - RIOBAMBA \n",
"1 156 156 - Plaza Alemania \n",
"2 432 187 - Jose Maria Moreno \n",
"3 168 168 - Estados Unidos \n",
"4 5 005 - Plaza Italia \n",
".. ... ... \n",
"350 124 124 - UGARTECHE \n",
"351 309 298 - PLAZA DR. JUAN B. TERAN \n",
"352 431 362 - PARQUE DE LA MEMORIA \n",
"353 79 079 - AZUCENA VILLAFLOR \n",
"354 207 123 - BASUALDO Y RODO \n",
"\n",
" direccion longitude latitude \n",
"0 Riobamba 1264 & Juncal -58.394109 -34.593651 \n",
"1 2939 Cavia -58.407470 -34.577590 \n",
"2 141 Jose Maria Moreno -58.435801 -34.619879 \n",
"3 Estados Unidos & Lima -58.381227 -34.618622 \n",
"4 Av. Sarmiento 2601 -58.420954 -34.580550 \n",
".. ... ... ... \n",
"350 3050 Ugarteche -58.411965 -34.580538 \n",
"351 Nogoya 5525 -58.519781 -34.621412 \n",
"352 Av. Costanera Rafael Obligado 7200 -58.436913 -34.542628 \n",
"353 Villaflor, Azucena & Dealessi, Pierina -58.363930 -34.611890 \n",
"354 Guardia Nacional 1700 -58.487359 -34.652377 \n",
"\n",
"[355 rows x 5 columns]"
]
},
"execution_count": 147,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"duckdb.execute(\"\"\"insert into estaciones \n",
" select DISTINCT id, nombre, direccion, longitude, latitude from \n",
" (SELECT DISTINCT id_estacion_origen AS id, \n",
" nombre_estacion_origen AS nombre, \n",
" direccion_estacion_origen as direccion, \n",
" long_estacion_origen as longitude, \n",
" lat_estacion_origen as latitude from trips_2024\n",
" UNION\n",
" SELECT DISTINCT id_estacion_destino AS id, \n",
" nombre_estacion_destino AS nombre, \n",
" direccion_estacion_destino as direccion, \n",
" long_estacion_destino as longitude, \n",
" lat_estacion_destino as latitude from trips_2024) as estaciones;\"\"\")\n",
"\n",
"estaciones = duckdb.sql(\"select * from estaciones\").df()\n",
"estaciones"
]
},
{
"cell_type": "code",
"execution_count": 148,
"metadata": {},
"outputs": [],
"source": [
"estaciones.to_csv('estaciones.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": 152,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<duckdb.duckdb.DuckDBPyConnection at 0x2a6bb57b1f0>"
]
},
"execution_count": 152,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"duckdb.execute(\"alter table trips_2024 drop column IF EXISTS nombre_estacion_origen\")\n",
"duckdb.execute(\"alter table trips_2024 drop column IF EXISTS direccion_estacion_origen\")\n",
"duckdb.execute(\"alter table trips_2024 drop column IF EXISTS long_estacion_origen\")\n",
"duckdb.execute(\"alter table trips_2024 drop column IF EXISTS lat_estacion_origen\")\n",
"duckdb.execute(\"alter table trips_2024 drop column IF EXISTS nombre_estacion_destino\")\n",
"duckdb.execute(\"alter table trips_2024 drop column IF EXISTS direccion_estacion_destino\")\n",
"duckdb.execute(\"alter table trips_2024 drop column IF EXISTS long_estacion_destino\")\n",
"duckdb.execute(\"alter table trips_2024 drop column IF EXISTS lat_estacion_destino\")"
]
},
{
"cell_type": "code",
"execution_count": 156,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<duckdb.duckdb.DuckDBPyConnection at 0x2a6bb57b1f0>"
]
},
"execution_count": 156,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"duckdb.execute(\"alter table trips_2024 alter column Id_recorrido set type INTEGER\")"
]
},
{
"cell_type": "code",
"execution_count": 153,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Id_recorrido</th>\n",
" <th>duracion_recorrido</th>\n",
" <th>fecha_origen_recorrido</th>\n",
" <th>id_estacion_origen</th>\n",
" <th>fecha_destino_recorrido</th>\n",
" <th>id_estacion_destino</th>\n",
" <th>id_usuario</th>\n",
" <th>modelo_bicicleta</th>\n",
" <th>género</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>20610093</td>\n",
" <td>858</td>\n",
" <td>2024-02-08 20:58:32</td>\n",
" <td>516</td>\n",
" <td>2024-02-08 21:12:50</td>\n",
" <td>308</td>\n",
" <td>85441</td>\n",
" <td>ICONIC</td>\n",
" <td>MALE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>21419508</td>\n",
" <td>512</td>\n",
" <td>2024-04-17 08:37:36</td>\n",
" <td>252</td>\n",
" <td>2024-04-17 08:46:08</td>\n",
" <td>73</td>\n",
" <td>512851</td>\n",
" <td>ICONIC</td>\n",
" <td>MALE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>21421689</td>\n",
" <td>964</td>\n",
" <td>2024-04-17 11:38:10</td>\n",
" <td>277</td>\n",
" <td>2024-04-17 11:54:14</td>\n",
" <td>322</td>\n",
" <td>27717</td>\n",
" <td>ICONIC</td>\n",
" <td>FEMALE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>21436007</td>\n",
" <td>564</td>\n",
" <td>2024-04-18 09:25:38</td>\n",
" <td>66</td>\n",
" <td>2024-04-18 09:35:02</td>\n",
" <td>124</td>\n",
" <td>870951</td>\n",
" <td>ICONIC</td>\n",
" <td>MALE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>21286197</td>\n",
" <td>1,145</td>\n",
" <td>2024-04-05 19:56:39</td>\n",
" <td>202</td>\n",
" <td>2024-04-05 20:15:44</td>\n",
" <td>128</td>\n",
" <td>23367</td>\n",
" <td>ICONIC</td>\n",
" <td>OTHER</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Id_recorrido duracion_recorrido fecha_origen_recorrido id_estacion_origen \\\n",
"0 20610093 858 2024-02-08 20:58:32 516 \n",
"1 21419508 512 2024-04-17 08:37:36 252 \n",
"2 21421689 964 2024-04-17 11:38:10 277 \n",
"3 21436007 564 2024-04-18 09:25:38 66 \n",
"4 21286197 1,145 2024-04-05 19:56:39 202 \n",
"\n",
" fecha_destino_recorrido id_estacion_destino id_usuario modelo_bicicleta \\\n",
"0 2024-02-08 21:12:50 308 85441 ICONIC \n",
"1 2024-04-17 08:46:08 73 512851 ICONIC \n",
"2 2024-04-17 11:54:14 322 27717 ICONIC \n",
"3 2024-04-18 09:35:02 124 870951 ICONIC \n",
"4 2024-04-05 20:15:44 128 23367 ICONIC \n",
"\n",
" género \n",
"0 MALE \n",
"1 MALE \n",
"2 FEMALE \n",
"3 MALE \n",
"4 OTHER "
]
},
"execution_count": 153,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"duckdb.sql(\"select * from trips_2024 limit 5\").df()"
]
},
{
"cell_type": "code",
"execution_count": 160,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<duckdb.duckdb.DuckDBPyConnection at 0x2a6bb57b1f0>"
]
},
"execution_count": 160,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"duckdb.execute(\"update trips_2024 set duracion_recorrido = duracion_recorrido.replace(',','')\")"
]
},
{
"cell_type": "code",
"execution_count": 162,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<duckdb.duckdb.DuckDBPyConnection at 0x2a6bb57b1f0>"
]
},
"execution_count": 162,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"duckdb.execute(\"alter table trips_2024 alter column duracion_recorrido set type INTEGER\")"
]
},
{
"cell_type": "code",
"execution_count": 167,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<duckdb.duckdb.DuckDBPyConnection at 0x2a6bb57b1f0>"
]
},
"execution_count": 167,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"duckdb.execute(\"update trips_2024 set duracion_recorrido = duracion_recorrido/60\")"
]
},
{
"cell_type": "code",
"execution_count": 168,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>min(duracion_recorrido)</th>\n",
" <th>max(duracion_recorrido)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>42645</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" min(duracion_recorrido) max(duracion_recorrido)\n",
"0 1 42645"
]
},
"execution_count": 168,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"duckdb.sql(\"select min(duracion_recorrido),max(duracion_recorrido) from trips_2024\").df()"
]
},
{
"cell_type": "code",
"execution_count": 171,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>column_name</th>\n",
" <th>column_type</th>\n",
" <th>null</th>\n",
" <th>key</th>\n",
" <th>default</th>\n",
" <th>extra</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Id_recorrido</td>\n",
" <td>INTEGER</td>\n",
" <td>YES</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>duracion_recorrido</td>\n",
" <td>INTEGER</td>\n",
" <td>YES</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>fecha_origen_recorrido</td>\n",
" <td>TIMESTAMP</td>\n",
" <td>YES</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>id_estacion_origen</td>\n",
" <td>VARCHAR</td>\n",
" <td>YES</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>fecha_destino_recorrido</td>\n",
" <td>TIMESTAMP</td>\n",
" <td>YES</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>id_estacion_destino</td>\n",
" <td>VARCHAR</td>\n",
" <td>YES</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>id_usuario</td>\n",
" <td>VARCHAR</td>\n",
" <td>YES</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>modelo_bicicleta</td>\n",
" <td>VARCHAR</td>\n",
" <td>YES</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>género</td>\n",
" <td>VARCHAR</td>\n",
" <td>YES</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" column_name column_type null key default extra\n",
"0 Id_recorrido INTEGER YES None None None\n",
"1 duracion_recorrido INTEGER YES None None None\n",
"2 fecha_origen_recorrido TIMESTAMP YES None None None\n",
"3 id_estacion_origen VARCHAR YES None None None\n",
"4 fecha_destino_recorrido TIMESTAMP YES None None None\n",
"5 id_estacion_destino VARCHAR YES None None None\n",
"6 id_usuario VARCHAR YES None None None\n",
"7 modelo_bicicleta VARCHAR YES None None None\n",
"8 género VARCHAR YES None None None"
]
},
"execution_count": 171,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"duckdb.sql(\"describe trips_2024\").df()"
]
},
{
"cell_type": "code",
"execution_count": 173,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "27fb87ae6ef141c08ae2470bcd1fb1f8",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<duckdb.duckdb.DuckDBPyConnection at 0x2a6bb57b1f0>"
]
},
"execution_count": 173,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# snappy es igual al default y genera un archivo de 46.2 MB\n",
"# duckdb.execute(\"COPY trips_2024 TO 'viajes_2024.parquet' (FORMAT PARQUET);\")\n",
"# duckdb.execute(\"COPY trips_2024 TO 'viajes_2024_snappy.parquet' (FORMAT PARQUET, COMPRESSION 'SNAPPY');\")\n",
"\n",
"# gzip (33.3 MB)\n",
"duckdb.execute(\"COPY trips_2024 TO 'viajes_2024_gzip.parquet' (FORMAT PARQUET, COMPRESSION 'GZIP');\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment