Skip to content

Instantly share code, notes, and snippets.

@xerial
Last active August 22, 2018 05:56
Show Gist options
  • Save xerial/702f1c28bd60df58b0aff8d0d143c9c1 to your computer and use it in GitHub Desktop.
Save xerial/702f1c28bd60df58b0aff8d0d143c9c1 to your computer and use it in GitHub Desktop.
Interactive Analysis Using Treasure Data Presto and Python
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Interactive Analysis using Presto and Pandas\n",
"```shell\n",
"$ pip install presto-python-client\n",
"$ pip install pandas\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import prestodb\n",
"import pandas\n",
"import os\n",
"conn=prestodb.dbapi.connect(\n",
" host='api-presto.treasuredata.com',\n",
" port=443,\n",
" http_scheme='https',\n",
" # Set your TD API key to TD_API_KEY environment variable,\n",
" user=os.environ['TD_API_KEY'],\n",
" catalog='td-presto',\n",
" schema='sample_datasets'\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def query(sql):\n",
" cur = conn.cursor()\n",
" cur.execute(sql)\n",
" rows = cur.fetchall()\n",
" column_names = [desc[0] for desc in cur.description]\n",
" return pandas.DataFrame(rows, columns = column_names)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>symbol</th>\n",
" <th>cnt</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>CMCSA</td>\n",
" <td>9268</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>LANC</td>\n",
" <td>9268</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>ERIC</td>\n",
" <td>9268</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>YRCW</td>\n",
" <td>9268</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>MSEX</td>\n",
" <td>9268</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>RAVN</td>\n",
" <td>9268</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>VLGEA</td>\n",
" <td>9268</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>GK</td>\n",
" <td>9268</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>HELE</td>\n",
" <td>9268</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>ASBC</td>\n",
" <td>9268</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" symbol cnt\n",
"0 CMCSA 9268\n",
"1 LANC 9268\n",
"2 ERIC 9268\n",
"3 YRCW 9268\n",
"4 MSEX 9268\n",
"5 RAVN 9268\n",
"6 VLGEA 9268\n",
"7 GK 9268\n",
"8 HELE 9268\n",
"9 ASBC 9268"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"query(\"SELECT symbol, count(*) cnt from nasdaq group by 1 order by cnt desc limit 10\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment