Skip to content

Instantly share code, notes, and snippets.

@voutilad
Created October 12, 2022 15:38
Show Gist options
  • Save voutilad/ff789a5502b695f88d26f5a5a572d802 to your computer and use it in GitHub Desktop.
Save voutilad/ff789a5502b695f88d26f5a5a572d802 to your computer and use it in GitHub Desktop.
AuraDS PyArrow Test.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"collapsed_sections": [],
"authorship_tag": "ABX9TyN3PzRA3bEyB+wQ389+dmSw",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/voutilad/ff789a5502b695f88d26f5a5a572d802/aurads-pyarrow-test.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"id": "-aciz45rtJfz"
},
"outputs": [],
"source": [
"%%capture\n",
"%pip install graphdatascience ipywidgets"
]
},
{
"cell_type": "code",
"source": [
"from graphdatascience import GraphDataScience\n",
"from time import time\n",
"\n",
"gds = GraphDataScience(\n",
" \"neo4j+s://dab85ed2.databases.neo4j.io:7687\",\n",
" auth=(\"neo4j\", \"xxxxxxx\"),\n",
" aura_ds = True\n",
")\n",
"gds.version()\n",
"gds.set_database(\"neo4j\")"
],
"metadata": {
"id": "vMUXzYz2tZcd"
},
"execution_count": 2,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# dump any existing projections\n",
"for g in gds.graph.list()[\"graphName\"]:\n",
" gds.graph.get(g).drop()"
],
"metadata": {
"id": "zNGsemYdzhq0"
},
"execution_count": 3,
"outputs": []
},
{
"cell_type": "code",
"source": [
"g, _ = gds.beta.graph.generate(\"rando\", 100_000, 3)\n",
"\n",
"g.memory_usage()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
},
"id": "ofLXUDBstpz-",
"outputId": "7feba07f-fcc7-4bda-9b82-62f7205f565c"
},
"execution_count": 4,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"'2979 KiB'"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "string"
}
},
"metadata": {},
"execution_count": 4
}
]
},
{
"cell_type": "code",
"source": [
"gds.fastRP.mutate(g, embeddingDimension=128, mutateProperty=\"fastrp\")"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "raoCog22uymg",
"outputId": "e1df7ebb-dc01-46e2-fdd1-fcd795bf48ac"
},
"execution_count": 5,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"nodePropertiesWritten 100000\n",
"mutateMillis 0\n",
"nodeCount 100000\n",
"preProcessingMillis 0\n",
"computeMillis 157\n",
"configuration {'nodeSelfInfluence': 0, 'relationshipWeightPr...\n",
"Name: 0, dtype: object"
]
},
"metadata": {},
"execution_count": 5
}
]
},
{
"cell_type": "code",
"source": [
"start = time()\n",
"df = gds.graph.streamNodeProperty(g, \"fastrp\")\n",
"end = time()\n",
"\n",
"print(\n",
" f\"Took {int(end - start):,} seconds to stream {len(df)} rows\"\n",
" \" using PyArrow 🏹 on AuraDS.\"\n",
")\n",
"df"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 441
},
"id": "uBEuAn80wo6H",
"outputId": "878ec221-c75d-4d21-e33f-412e883abd34"
},
"execution_count": 6,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Took 2 seconds to stream 100000 rows using PyArrow 🏹 on AuraDS.\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
" nodeId propertyValue\n",
"0 30000 [-0.10017271, -0.032030858, -0.09249937, 0.077...\n",
"1 30001 [-0.09373895, 0.019914307, -0.1586251, 0.22110...\n",
"2 30002 [0.11671814, -0.029563732, 0.06739389, 0.09481...\n",
"3 30003 [-0.030445129, 0.16517806, 0.028812088, 0.1198...\n",
"4 30004 [-0.058878437, -0.009920623, 0.074649066, 0.02...\n",
"... ... ...\n",
"99995 29995 [-0.1089322, -0.01667827, 0.036708266, -0.0617...\n",
"99996 29996 [0.15115936, -0.18490574, -0.10231302, -0.0028...\n",
"99997 29997 [0.22973713, -0.22368807, 0.036445748, -0.0670...\n",
"99998 29998 [0.05255282, 0.14090458, 0.015201651, -0.01980...\n",
"99999 29999 [0.10274908, 0.0168579, 0.034535658, 0.0184479...\n",
"\n",
"[100000 rows x 2 columns]"
],
"text/html": [
"\n",
" <div id=\"df-cd171858-7bbc-4037-9de5-af5ae68983dc\">\n",
" <div class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>nodeId</th>\n",
" <th>propertyValue</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>30000</td>\n",
" <td>[-0.10017271, -0.032030858, -0.09249937, 0.077...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>30001</td>\n",
" <td>[-0.09373895, 0.019914307, -0.1586251, 0.22110...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>30002</td>\n",
" <td>[0.11671814, -0.029563732, 0.06739389, 0.09481...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>30003</td>\n",
" <td>[-0.030445129, 0.16517806, 0.028812088, 0.1198...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>30004</td>\n",
" <td>[-0.058878437, -0.009920623, 0.074649066, 0.02...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99995</th>\n",
" <td>29995</td>\n",
" <td>[-0.1089322, -0.01667827, 0.036708266, -0.0617...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99996</th>\n",
" <td>29996</td>\n",
" <td>[0.15115936, -0.18490574, -0.10231302, -0.0028...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99997</th>\n",
" <td>29997</td>\n",
" <td>[0.22973713, -0.22368807, 0.036445748, -0.0670...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99998</th>\n",
" <td>29998</td>\n",
" <td>[0.05255282, 0.14090458, 0.015201651, -0.01980...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99999</th>\n",
" <td>29999</td>\n",
" <td>[0.10274908, 0.0168579, 0.034535658, 0.0184479...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>100000 rows × 2 columns</p>\n",
"</div>\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-cd171858-7bbc-4037-9de5-af5ae68983dc')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
" \n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
" </svg>\n",
" </button>\n",
" \n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" flex-wrap:wrap;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-cd171858-7bbc-4037-9de5-af5ae68983dc button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-cd171858-7bbc-4037-9de5-af5ae68983dc');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
" </div>\n",
" "
]
},
"metadata": {},
"execution_count": 6
}
]
},
{
"cell_type": "code",
"source": [
"start = time()\n",
"df = gds.run_cypher(\n",
" \"CALL gds.graph.streamNodeProperty($name, 'fastrp');\", \n",
" params={\"name\": g.name()}\n",
")\n",
"end = time()\n",
"\n",
"print(\n",
" f\"Took {int(end - start):,} seconds to stream {len(df)} rows\"\n",
" \" using Bolt 🔩 on AuraDS.\"\n",
")\n",
"df"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 441
},
"id": "31G2p6MrwKo3",
"outputId": "2cdd3f08-dedd-4cb5-acb7-a2e4c61e2fc0"
},
"execution_count": 7,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Took 46 seconds to stream 100000 rows using Bolt 🔩 on AuraDS.\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
" nodeId propertyValue\n",
"0 0 [0.06900186091661453, 0.09999401867389679, -0....\n",
"1 1 [-0.20994877815246582, 0.033983130007982254, 0...\n",
"2 2 [0.1813955307006836, 0.3565533757209778, 0.063...\n",
"3 3 [0.07619422674179077, -0.08010841906070709, 0....\n",
"4 4 [-0.07976201921701431, -0.0306154265999794, 0....\n",
"... ... ...\n",
"99995 99995 [0.10703882575035095, -0.16264069080352783, 0....\n",
"99996 99996 [0.19086328148841858, 0.00901761930435896, 0.2...\n",
"99997 99997 [-0.18905845284461975, -0.1414109170436859, 0....\n",
"99998 99998 [-0.23975840210914612, -0.13943567872047424, 0...\n",
"99999 99999 [0.1638825386762619, 0.0505363866686821, 0.058...\n",
"\n",
"[100000 rows x 2 columns]"
],
"text/html": [
"\n",
" <div id=\"df-c53068e6-315c-479e-b546-e791c55c1844\">\n",
" <div class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>nodeId</th>\n",
" <th>propertyValue</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>[0.06900186091661453, 0.09999401867389679, -0....</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>[-0.20994877815246582, 0.033983130007982254, 0...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>[0.1813955307006836, 0.3565533757209778, 0.063...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>[0.07619422674179077, -0.08010841906070709, 0....</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4</td>\n",
" <td>[-0.07976201921701431, -0.0306154265999794, 0....</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99995</th>\n",
" <td>99995</td>\n",
" <td>[0.10703882575035095, -0.16264069080352783, 0....</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99996</th>\n",
" <td>99996</td>\n",
" <td>[0.19086328148841858, 0.00901761930435896, 0.2...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99997</th>\n",
" <td>99997</td>\n",
" <td>[-0.18905845284461975, -0.1414109170436859, 0....</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99998</th>\n",
" <td>99998</td>\n",
" <td>[-0.23975840210914612, -0.13943567872047424, 0...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99999</th>\n",
" <td>99999</td>\n",
" <td>[0.1638825386762619, 0.0505363866686821, 0.058...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>100000 rows × 2 columns</p>\n",
"</div>\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-c53068e6-315c-479e-b546-e791c55c1844')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
" \n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
" </svg>\n",
" </button>\n",
" \n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" flex-wrap:wrap;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-c53068e6-315c-479e-b546-e791c55c1844 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-c53068e6-315c-479e-b546-e791c55c1844');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
" </div>\n",
" "
]
},
"metadata": {},
"execution_count": 7
}
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment