Skip to content

Instantly share code, notes, and snippets.

@jpz
Created February 8, 2023 14:11
Show Gist options
  • Save jpz/596d7c11c87a3036ed8fadadea28a806 to your computer and use it in GitHub Desktop.
Save jpz/596d7c11c87a3036ed8fadadea28a806 to your computer and use it in GitHub Desktop.
Example of using SQL over pandas dataframes
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "049a914b-34df-451f-83a1-b94231eb0a5a",
"metadata": {},
"outputs": [],
"source": [
"# %pip install jupysql duckdb duckdb-engine --quiet"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "f4190914-bd93-4097-8b6f-d6b32c4f369c",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import pandas as pd\n",
"%load_ext sql\n",
"%sql duckdb://"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "1c439d3a-ab9c-46c0-9bae-2e534f27f6ff",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"df1 = pd.DataFrame.from_dict({'a': [42, 34, 66], \n",
" 'b': ['ball', 'bag', 'bummer'],\n",
" 'c': ['cack', 'cook', 'creek']})\n",
"\n",
"df2 = pd.DataFrame.from_dict({'a': [42, 34], \n",
" 'b': ['blag', 'bleary'],\n",
" 'd': ['doggy', 'ditto']})"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "6f739a14-01af-4e03-909b-783fa81b1393",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"* duckdb://\n",
"Done.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" </tr>\n",
" <tr>\n",
" <td>42</td>\n",
" <td>ball</td>\n",
" <td>cack</td>\n",
" </tr>\n",
" <tr>\n",
" <td>34</td>\n",
" <td>bag</td>\n",
" <td>cook</td>\n",
" </tr>\n",
" <tr>\n",
" <td>66</td>\n",
" <td>bummer</td>\n",
" <td>creek</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[(42, 'ball', 'cack'), (34, 'bag', 'cook'), (66, 'bummer', 'creek')]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%sql select * from df1"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "dadf42ca-2c93-4215-892e-a18aedd5bc5e",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"* duckdb://\n",
"Done.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" <th>a_1</th>\n",
" <th>b_1</th>\n",
" <th>d</th>\n",
" </tr>\n",
" <tr>\n",
" <td>42</td>\n",
" <td>ball</td>\n",
" <td>cack</td>\n",
" <td>42</td>\n",
" <td>blag</td>\n",
" <td>doggy</td>\n",
" </tr>\n",
" <tr>\n",
" <td>34</td>\n",
" <td>bag</td>\n",
" <td>cook</td>\n",
" <td>34</td>\n",
" <td>bleary</td>\n",
" <td>ditto</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[(42, 'ball', 'cack', 42, 'blag', 'doggy'),\n",
" (34, 'bag', 'cook', 34, 'bleary', 'ditto')]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%sql select * from df1, df2 where df1.a = df2.a"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "28d2eeb4-0191-479e-ba53-a74050f08328",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"* duckdb://\n",
"Done.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" <th>a_1</th>\n",
" <th>b_1</th>\n",
" <th>d</th>\n",
" </tr>\n",
" <tr>\n",
" <td>42</td>\n",
" <td>ball</td>\n",
" <td>cack</td>\n",
" <td>42</td>\n",
" <td>blag</td>\n",
" <td>doggy</td>\n",
" </tr>\n",
" <tr>\n",
" <td>34</td>\n",
" <td>bag</td>\n",
" <td>cook</td>\n",
" <td>34</td>\n",
" <td>bleary</td>\n",
" <td>ditto</td>\n",
" </tr>\n",
" <tr>\n",
" <td>66</td>\n",
" <td>bummer</td>\n",
" <td>creek</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[(42, 'ball', 'cack', 42, 'blag', 'doggy'),\n",
" (34, 'bag', 'cook', 34, 'bleary', 'ditto'),\n",
" (66, 'bummer', 'creek', None, None, None)]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%sql select * from df1 left join df2 on df1.a = df2.a"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment