Created
May 28, 2021 13:36
-
-
Save SETIADEEPANSHU/5c55e18f583772102e0e0461a40c0189 to your computer and use it in GitHub Desktop.
Data Exploration.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 5, | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.10" | |
}, | |
"colab": { | |
"name": "Data Exploration.ipynb", | |
"provenance": [], | |
"collapsed_sections": [], | |
"include_colab_link": true | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/SETIADEEPANSHU/5c55e18f583772102e0e0461a40c0189/data-exploration.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "d9058e05-6ca9-4b01-a063-2e8e531ec756", | |
"outputId": "6bbc4344-eb52-4560-b361-5c9b055764e4" | |
}, | |
"source": [ | |
"import json\n", | |
"import pandas as pd\n", | |
"pd.set_option('display.max_colwidth', -1)" | |
], | |
"id": "d9058e05-6ca9-4b01-a063-2e8e531ec756", | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:3: FutureWarning: Passing a negative integer is deprecated in version 1.0 and will not be supported in future version. Instead, use None to not limit the column width.\n", | |
" This is separate from the ipykernel package so we can avoid doing imports until\n" | |
], | |
"name": "stderr" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"collapsed": true, | |
"jupyter": { | |
"outputs_hidden": true | |
}, | |
"tags": [], | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "a7c5449b-e24b-40d0-b1ca-8aae842ca28b", | |
"outputId": "d9dc892b-7e63-4884-a107-77b4c906d848" | |
}, | |
"source": [ | |
"!pip install --quiet sentence-transformers==1.2.0\n", | |
"!pip install --quiet umap-learn[plot]==0.5.1\n", | |
"!pip install --quiet hdbscan==0.8.26" | |
], | |
"id": "a7c5449b-e24b-40d0-b1ca-8aae842ca28b", | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"\u001b[K |████████████████████████████████| 81kB 7.5MB/s \n", | |
"\u001b[K |████████████████████████████████| 2.3MB 27.2MB/s \n", | |
"\u001b[K |████████████████████████████████| 1.2MB 40.6MB/s \n", | |
"\u001b[K |████████████████████████████████| 901kB 20.4MB/s \n", | |
"\u001b[K |████████████████████████████████| 3.3MB 37.4MB/s \n", | |
"\u001b[?25h Building wheel for sentence-transformers (setup.py) ... \u001b[?25l\u001b[?25hdone\n", | |
"\u001b[K |████████████████████████████████| 81kB 7.1MB/s \n", | |
"\u001b[K |████████████████████████████████| 1.2MB 34.9MB/s \n", | |
"\u001b[K |████████████████████████████████| 15.8MB 228kB/s \n", | |
"\u001b[K |████████████████████████████████| 81kB 7.5MB/s \n", | |
"\u001b[K |████████████████████████████████| 706kB 43.7MB/s \n", | |
"\u001b[K |████████████████████████████████| 112kB 54.7MB/s \n", | |
"\u001b[?25h Building wheel for umap-learn (setup.py) ... \u001b[?25l\u001b[?25hdone\n", | |
" Building wheel for pynndescent (setup.py) ... \u001b[?25l\u001b[?25hdone\n", | |
" Building wheel for datashape (setup.py) ... \u001b[?25l\u001b[?25hdone\n", | |
"\u001b[31mERROR: distributed 2021.5.0 has requirement cloudpickle>=1.5.0, but you'll have cloudpickle 1.3.0 which is incompatible.\u001b[0m\n", | |
"\u001b[31mERROR: distributed 2021.5.0 has requirement dask==2021.05.0, but you'll have dask 2.12.0 which is incompatible.\u001b[0m\n", | |
"\u001b[K |████████████████████████████████| 4.7MB 23.8MB/s \n", | |
"\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", | |
" Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", | |
" Preparing wheel metadata ... \u001b[?25l\u001b[?25hdone\n", | |
" Building wheel for hdbscan (PEP 517) ... \u001b[?25l\u001b[?25hdone\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "8d3a63b0-492f-4c15-afc8-73decb7b2bbd", | |
"outputId": "85f07cca-6f74-4299-b0df-dbdd4ea01115" | |
}, | |
"source": [ | |
"import nltk\n", | |
"from nltk import FreqDist\n", | |
"nltk.download('stopwords')" | |
], | |
"id": "8d3a63b0-492f-4c15-afc8-73decb7b2bbd", | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"[nltk_data] Downloading package stopwords to /root/nltk_data...\n", | |
"[nltk_data] Package stopwords is already up-to-date!\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 2 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"collapsed": true, | |
"jupyter": { | |
"outputs_hidden": true | |
}, | |
"tags": [], | |
"id": "98985f48-27b8-4502-9492-91934619799e" | |
}, | |
"source": [ | |
"# !pip install --quiet umap\n", | |
"!pip install --quiet gensim\n", | |
"!pip install --quiet matplotlib\n", | |
"!pip install --quiet seaborn" | |
], | |
"id": "98985f48-27b8-4502-9492-91934619799e", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"collapsed": true, | |
"jupyter": { | |
"outputs_hidden": true | |
}, | |
"tags": [], | |
"id": "c4de968a-a729-4e58-aee4-73a8fcf7ddcb", | |
"outputId": "6a78d278-ab97-4fd8-aca7-106653885b5e" | |
}, | |
"source": [ | |
"# !pip install --upgrade torch" | |
], | |
"id": "c4de968a-a729-4e58-aee4-73a8fcf7ddcb", | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Collecting torch\n", | |
" Downloading torch-1.8.1-cp36-none-macosx_10_9_x86_64.whl (119.5 MB)\n", | |
"\u001b[K |████████████████████████████████| 119.5 MB 2.7 MB/s eta 0:00:012\n", | |
"\u001b[?25hRequirement already satisfied, skipping upgrade: typing-extensions in /Users/setiadeepanshu/miniconda3/envs/paperai/lib/python3.6/site-packages (from torch) (3.7.4.3)\n", | |
"Requirement already satisfied, skipping upgrade: dataclasses; python_version < \"3.7\" in /Users/setiadeepanshu/miniconda3/envs/paperai/lib/python3.6/site-packages (from torch) (0.7)\n", | |
"Requirement already satisfied, skipping upgrade: numpy in /Users/setiadeepanshu/miniconda3/envs/paperai/lib/python3.6/site-packages (from torch) (1.19.5)\n", | |
"Installing collected packages: torch\n", | |
" Attempting uninstall: torch\n", | |
" Found existing installation: torch 1.7.0\n", | |
" Uninstalling torch-1.7.0:\n", | |
" Successfully uninstalled torch-1.7.0\n", | |
"Successfully installed torch-1.8.1\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"tags": [], | |
"id": "ec75c4fd-bb4d-4771-af54-427a4e505c81" | |
}, | |
"source": [ | |
"# !pip install pandas==1.1.5\n", | |
"# !pip install numpy==1.19.5\n", | |
"!pip install pyLDAvis" | |
], | |
"id": "ec75c4fd-bb4d-4771-af54-427a4e505c81", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"tags": [], | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "f2bee53d-35d2-4a27-9eb4-03d43851b5f6", | |
"outputId": "3ef2ba6a-d639-4f58-aafc-f9e2a7319e6d" | |
}, | |
"source": [ | |
"import pandas as pd\n", | |
"pd.set_option(\"display.max_colwidth\", 200)\n", | |
"import numpy as np\n", | |
"import re\n", | |
"import spacy\n", | |
"\n", | |
"import gensim\n", | |
"from gensim import corpora\n", | |
"\n", | |
"# libraries for visualization\n", | |
"import pyLDAvis\n", | |
"import pyLDAvis.gensim_models\n", | |
"import matplotlib.pyplot as plt\n", | |
"import seaborn as sns\n", | |
"%matplotlib inline" | |
], | |
"id": "f2bee53d-35d2-4a27-9eb4-03d43851b5f6", | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"/usr/local/lib/python3.7/dist-packages/past/types/oldstr.py:5: DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated since Python 3.3,and in 3.9 it will stop working\n", | |
" from collections import Iterable\n", | |
"/usr/local/lib/python3.7/dist-packages/past/builtins/misc.py:4: DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated since Python 3.3,and in 3.9 it will stop working\n", | |
" from collections import Mapping\n", | |
"/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_least_angle.py:30: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n", | |
"Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n", | |
" method='lar', copy_X=True, eps=np.finfo(np.float).eps,\n", | |
"/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_least_angle.py:169: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n", | |
"Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n", | |
" method='lar', copy_X=True, eps=np.finfo(np.float).eps,\n", | |
"/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_least_angle.py:286: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n", | |
"Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n", | |
" eps=np.finfo(np.float).eps, copy_Gram=True, verbose=0,\n", | |
"/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_least_angle.py:858: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n", | |
"Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n", | |
" eps=np.finfo(np.float).eps, copy_X=True, fit_path=True):\n", | |
"/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_least_angle.py:1094: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n", | |
"Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n", | |
" eps=np.finfo(np.float).eps, copy_X=True, fit_path=True,\n", | |
"/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_least_angle.py:1120: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n", | |
"Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n", | |
" eps=np.finfo(np.float).eps, positive=False):\n", | |
"/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_least_angle.py:1349: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n", | |
"Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n", | |
" max_n_alphas=1000, n_jobs=None, eps=np.finfo(np.float).eps,\n", | |
"/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_least_angle.py:1590: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n", | |
"Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n", | |
" max_n_alphas=1000, n_jobs=None, eps=np.finfo(np.float).eps,\n", | |
"/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_least_angle.py:1723: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n", | |
"Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n", | |
" eps=np.finfo(np.float).eps, copy_X=True, positive=False):\n", | |
"/usr/local/lib/python3.7/dist-packages/sklearn/decomposition/_lda.py:29: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n", | |
"Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n", | |
" EPS = np.finfo(np.float).eps\n" | |
], | |
"name": "stderr" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "875f1c3c-47a0-499e-b151-f6ee444eb351" | |
}, | |
"source": [ | |
"df = pd.read_csv(\"asks.csv\")" | |
], | |
"id": "875f1c3c-47a0-499e-b151-f6ee444eb351", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 1000 | |
}, | |
"id": "d15cdedb-1741-4a29-848d-204b43cc78e6", | |
"outputId": "a8d27036-f00f-499c-bd60-c038d8ee2993" | |
}, | |
"source": [ | |
"df.head()" | |
], | |
"id": "d15cdedb-1741-4a29-848d-204b43cc78e6", | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>__v</th>\n", | |
" <th>_id</th>\n", | |
" <th>body</th>\n", | |
" <th>communities</th>\n", | |
" <th>createdAt</th>\n", | |
" <th>desiredResponseType</th>\n", | |
" <th>followers</th>\n", | |
" <th>isActive</th>\n", | |
" <th>lookingFor</th>\n", | |
" <th>owner</th>\n", | |
" <th>repliesSearchBlob</th>\n", | |
" <th>sentToSlacks</th>\n", | |
" <th>tags</th>\n", | |
" <th>updatedAt</th>\n", | |
" <th>upvotes</th>\n", | |
" <th>visibility</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>5</td>\n", | |
" <td>5be9bbf1e8440d56f772cd16</td>\n", | |
" <td>Looking for advice on how to do a friends-and-family fundraising round for a startup.</td>\n", | |
" <td>[{\"$oid\":\"5bc8933342fc75b1f8cb4d36\"}]</td>\n", | |
" <td>2018-11-12T17:44:17.606Z</td>\n", | |
" <td>quick-reply</td>\n", | |
" <td>[{\"$oid\":\"5d9cd884c138fc005e032d94\"},{\"$oid\":\"5bfdc12c7c6f2b001d1435b4\"},{\"$oid\":\"5be90dfbab55f1001d99024d\"}]</td>\n", | |
" <td>True</td>\n", | |
" <td>advice</td>\n", | |
" <td>5bc8933542fc75b1f8cb4d38</td>\n", | |
" <td>i can help! cool i can help happy to chat</td>\n", | |
" <td>[]</td>\n", | |
" <td>[\"startups\"]</td>\n", | |
" <td>2019-12-11T02:57:04.221Z</td>\n", | |
" <td>[{\"$oid\":\"5be90dfbab55f1001d99024d\"}]</td>\n", | |
" <td>specific-communities</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>5bec5e1d5c0fbf001d4ed176</td>\n", | |
" <td>Sales pitch coaching</td>\n", | |
" <td>[{\"$oid\":\"5bc8933342fc75b1f8cb4d36\"}]</td>\n", | |
" <td>2018-11-14T17:40:45.134Z</td>\n", | |
" <td>chat</td>\n", | |
" <td>[]</td>\n", | |
" <td>True</td>\n", | |
" <td>advice</td>\n", | |
" <td>5beb303501d7a2001d44e479</td>\n", | |
" <td>NaN</td>\n", | |
" <td>[]</td>\n", | |
" <td>[\"coaching\",\"sales\"]</td>\n", | |
" <td>2018-11-14T17:40:45.134Z</td>\n", | |
" <td>[]</td>\n", | |
" <td>specific-communities</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>3</td>\n", | |
" <td>5bed00802bb378001d84ab9f</td>\n", | |
" <td>I would like advice on bettering my career options and opening more roads. I have a Bachelors in Psychology, and am looking to pursue a Masters degree in Applied Psychology. I would like advice on...</td>\n", | |
" <td>[{\"$oid\":\"5be7f9fb765dd919cbf2b349\"}]</td>\n", | |
" <td>2018-11-15T05:13:36.494Z</td>\n", | |
" <td>quick-reply</td>\n", | |
" <td>[{\"$oid\":\"5c64c8a2d01619007795ec90\"},{\"$oid\":\"5be90dfbab55f1001d99024d\"}]</td>\n", | |
" <td>True</td>\n", | |
" <td>advice</td>\n", | |
" <td>5beb0693f7d184001de5f110</td>\n", | |
" <td>hi maria i have a few contacts that may be useful for you, feel free to mssg me if you'd like to be connected! hi maria, \\nhave a look at ciis located in san francisco. \\nhttps://www.ciis.edu\\nor ...</td>\n", | |
" <td>[]</td>\n", | |
" <td>[\"career advice\"]</td>\n", | |
" <td>2019-12-11T02:56:59.683Z</td>\n", | |
" <td>[]</td>\n", | |
" <td>specific-communities</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>2</td>\n", | |
" <td>5bfcc04f9ac565001d95b7a4</td>\n", | |
" <td>I'm hoping to find a connection to someone who idetifies the same way I do and for them to share their professional experiences on how I too may advance in my professional and academic career.</td>\n", | |
" <td>[{\"$oid\":\"5be7f9fb765dd919cbf2b349\"}]</td>\n", | |
" <td>2018-11-27T03:55:59.815Z</td>\n", | |
" <td>chat</td>\n", | |
" <td>[{\"$oid\":\"5cffe8c7fcf88d00684add3e\"}]</td>\n", | |
" <td>True</td>\n", | |
" <td>connection</td>\n", | |
" <td>5bfcbfd99ac565001d95b678</td>\n", | |
" <td>hi crystal, \\n\\ni would love to connect after reading your message! some days i feel like it would be nice to sit down and chit chat about my professional life and career, and where i would like f...</td>\n", | |
" <td>[]</td>\n", | |
" <td>[\"connections\",\"advice\",\"mentorship\",\"academic adivce\",\"professional advice\",\"mba application advice\"]</td>\n", | |
" <td>2019-06-14T06:56:57.665Z</td>\n", | |
" <td>[]</td>\n", | |
" <td>specific-communities</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>4</td>\n", | |
" <td>5be9a38eb6619e001d47acc9</td>\n", | |
" <td>Best coworking spaces near Palo Alto</td>\n", | |
" <td>[{\"$oid\":\"5bc8933342fc75b1f8cb4d36\"}]</td>\n", | |
" <td>2018-11-12T16:00:14.970Z</td>\n", | |
" <td>quick-reply</td>\n", | |
" <td>[{\"$oid\":\"5dd772ccb67fc2005d208f52\"}]</td>\n", | |
" <td>True</td>\n", | |
" <td>advice</td>\n", | |
" <td>5be90dfbab55f1001d99024d</td>\n", | |
" <td>NaN</td>\n", | |
" <td>[]</td>\n", | |
" <td>[]</td>\n", | |
" <td>2020-04-05T06:10:56.557Z</td>\n", | |
" <td>[{\"$oid\":\"5be90dfbab55f1001d99024d\"},{\"$oid\":\"5bc89373999a1a7c8a2e7c22\"}]</td>\n", | |
" <td>specific-communities</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" __v ... visibility\n", | |
"0 5 ... specific-communities\n", | |
"1 1 ... specific-communities\n", | |
"2 3 ... specific-communities\n", | |
"3 2 ... specific-communities\n", | |
"4 4 ... specific-communities\n", | |
"\n", | |
"[5 rows x 16 columns]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 7 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "bbbaa5ad-3edb-4119-8ebb-333f04772587" | |
}, | |
"source": [ | |
"# function to plot most frequent terms\n", | |
"def freq_words(x, terms = 30):\n", | |
" all_words = ' '.join([text for text in x])\n", | |
" all_words = all_words.split()\n", | |
"\n", | |
" fdist = FreqDist(all_words)\n", | |
" words_df = pd.DataFrame({'word':list(fdist.keys()), 'count':list(fdist.values())})\n", | |
"\n", | |
" # selecting top 20 most frequent words\n", | |
" d = words_df.nlargest(columns=\"count\", n = terms) \n", | |
" plt.figure(figsize=(20,5))\n", | |
" ax = sns.barplot(data=d, x= \"word\", y = \"count\")\n", | |
" ax.set(ylabel = 'Count')\n", | |
" plt.show()" | |
], | |
"id": "bbbaa5ad-3edb-4119-8ebb-333f04772587", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "7f6ca5c7-91f6-42c0-b37b-939ffa3ecbef", | |
"outputId": "21545647-51ac-4e9e-f1e7-df538fb4906f" | |
}, | |
"source": [ | |
"# remove unwanted characters, numbers and symbols\n", | |
"df['body'] = df['body'].str.replace(\"[^a-zA-Z#]\", \" \")" | |
], | |
"id": "7f6ca5c7-91f6-42c0-b37b-939ffa3ecbef", | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:2: FutureWarning: The default value of regex will change from True to False in a future version.\n", | |
" \n" | |
], | |
"name": "stderr" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "d35f8e9f-32a7-4489-9e9a-0c419eaf9583" | |
}, | |
"source": [ | |
"from nltk.corpus import stopwords\n", | |
"stop_words = stopwords.words('english')" | |
], | |
"id": "d35f8e9f-32a7-4489-9e9a-0c419eaf9583", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "9f6a70ff-973d-4704-948b-8fba23eca5af" | |
}, | |
"source": [ | |
"# function to remove stopwords\n", | |
"def remove_stopwords(rev):\n", | |
" rev_new = \" \".join([i for i in rev if i not in stop_words])\n", | |
" return rev_new\n", | |
"\n", | |
"# remove short words (length < 3)\n", | |
"df['body'] = df['body'].apply(lambda x: ' '.join([w for w in x.split() if len(w)>2]))\n", | |
"\n", | |
"# remove stopwords from the text\n", | |
"reviews = [remove_stopwords(r.split()) for r in df['body']]\n", | |
"\n", | |
"# make entire text lowercase\n", | |
"reviews = [r.lower() for r in reviews]" | |
], | |
"id": "9f6a70ff-973d-4704-948b-8fba23eca5af", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 293 | |
}, | |
"id": "245c9dde-ea45-410d-b060-f94e07ce90c9", | |
"outputId": "0d5c624d-3937-44b0-ad79-0243d0fca96f" | |
}, | |
"source": [ | |
"freq_words(reviews, 20)" | |
], | |
"id": "245c9dde-ea45-410d-b060-f94e07ce90c9", | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAABJIAAAE9CAYAAABQn0iDAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3deZwsVX03/s9XEFFRUbkhBDSXCGqMidvVqKhBzWNwxQW3xyiokWiMSxIxmpjIk5/5xS1xj4qKoEFEEQR3EUVwhYvILkpwAYNwfRR3VPA8f1QNtxl6ZmqG6emZy/v9es1rqk9XV31PV/U51d86VV2ttQAAAADAQq437QAAAAAAWBskkgAAAAAYRCIJAAAAgEEkkgAAAAAYRCIJAAAAgEEkkgAAAAAYZOtpB3Bt7LDDDm39+vXTDgMAAABgi3Hqqad+v7W2btxzazqRtH79+mzcuHHaYQAAAABsMarq23M959I2AAAAAAaRSAIAAABgEIkkAAAAAAaRSAIAAABgEIkkAAAAAAaRSAIAAABgEIkkAAAAAAaRSAIAAABgEIkkAAAAAAaRSAIAAABgEIkkAAAAAAbZetoBTMKmN//XtENYlHXP+vNphwAAAACwICOSAAAAABhkYomkqjq4qi6tqrPGPPd3VdWqaof+cVXV66vq/Ko6o6ruOqm4AAAAAFiaSY5IOiTJXrMLq+pWSR6U5DsjxQ9Osnv/t3+SN08wLgAAAACWYGKJpNbaiUl+MOap1yR5YZI2UrZ3kne1zpeSbF9VO00qNgAAAAAWb0XvkVRVeyf5bmvt9FlP7ZzkwpHHF/Vl45axf1VtrKqNmzZtmlCkAAAAAMy2YomkqrpRkn9I8s/XZjmttYNaaxtaaxvWrVu3PMEBAAAAsKCtV3Bdt0mya5LTqypJdknylaq6R5LvJrnVyLy79GUAAAAArBIrNiKptXZma+23WmvrW2vr012+dtfW2veSHJvkKf2vt90zyY9aaxevVGwAAAAALGxiiaSqOjzJF5PcrqouqqqnzzP7R5NckOT8JG9L8leTigsAAACApZnYpW2ttScu8Pz6kemW5NmTigUAAACAa29Ff7UNAAAAgLVLIgkAAACAQSSSAAAAABhEIgkAAACAQSSSAAAAABhEIgkAAACAQSSSAAAAABhEIgkAAACAQSSSAAAAABhEIgkAAACAQSSSAAAAABhEIgkAAACAQSSSAAAAABhEIgkAAACAQSSSAAAAABhEIgkAAACAQSSSAAAAABhEIgkAAACAQSSSAAAAABhEIgkAAACAQSSSAAAAABhEIgkAAACAQSSSAAAAABhEIgkAAACAQSSSAAAAABhEIgkAAACAQSSSAAAAABhkYomkqjq4qi6tqrNGyl5VVV+rqjOq6uiq2n7kuRdX1flVdV5V/dmk4gIAAABgaSY5IumQJHvNKjsuyR1ba3+U5OtJXpwkVXWHJE9I8gf9a/6zqraaYGwAAAAALNLEEkmttROT/GBW2Sdba1f0D7+UZJd+eu8k722t/bK19s0k5ye5x6RiAwAAAGDxpnmPpKcl+Vg/vXOSC0eeu6gvAwAAAGCVmEoiqar+MckVSQ5bwmv3r6qNVbVx06ZNyx8cAAAAAGOteCKpqvZL8rAkT2qttb74u0luNTLbLn3ZNbTWDmqtbWitbVi3bt1EYwUAAABgsxVNJFXVXklemOQRrbWfjzx1bJInVNUNqmrXJLsnOXklYwMAAABgfltPasFVdXiSPZPsUFUXJXlpul9pu0GS46oqSb7UWntma+3sqnpfknPSXfL27NbalZOKDQAAAIDFm1giqbX2xDHF75hn/n9N8q+TigcAAACAa2eav9oGAAAAwBoikQQAAADAIBJJAAAAAAwikQQAAADAIBJJAAAAAAwikQQAAADAIBJJAAAAAAwikQQAAADAIBJJAAAAAAwikQQAAADAIBJJAAAAAAwikQQAAADAIBJJAAAAAAwikQQAAADAIBJJAAAAAAwikQQAAADAIBJJAAAAAAwikQQAAADAIBJJAAAAAAwikQQAAADAIBJJAAAAAAwikQQAAADAIBJJAAAAAAwikQQAAADAIBJJAAAAAAwikQQAAADAIBJJAAAAAAwysURSVR1cVZdW1VkjZbeoquOq6hv9/5v35VVVr6+q86vqjKq666TiAgAAAGBpJjki6ZAke80qe1GS41truyc5vn+cJA9Osnv/t3+SN08wLgAAAACWYGKJpNbaiUl+MKt47ySH9tOHJnnkSPm7WudLSbavqp0mFRsAAAAAi7fS90jasbV2cT/9vSQ79tM7J7lwZL6L+jIAAAAAVomp3Wy7tdaStMW+rqr2r6qNVbVx06ZNE4gMAAAAgHFWOpF0ycwla/3/S/vy7ya51ch8u/Rl19BaO6i1tqG1tmHdunUTDRYAAACAzVY6kXRskn376X2THDNS/pT+19vumeRHI5fAAQAAALAKbD2pBVfV4Un2TLJDVV2U5KVJXp7kfVX19CTfTvK4fvaPJnlIkvOT/DzJUycVFwAAAABLM7FEUmvtiXM89cAx87Ykz55ULAAAAABce1O72TYAAAAAa4tEEgAAAACDSCQBAAAAMIhEEgAAAACDSCQBAAAAMIhEEgAAAACDSCQBAAAAMIhEEgAAAACDSCQBAAAAMIhEEgAAAACDSCQBAAAAMIhEEgAAAACDSCQBAAAAMIhEEgAAAACDSCQBAAAAMIhEEgAAAACDSCQBAAAAMIhEEgAAAACDSCQBAAAAMIhEEgAAAACDSCQBAAAAMIhEEgAAAACDSCQBAAAAMIhEEgAAAACDSCQBAAAAMIhEEgAAAACDSCQBAAAAMMigRFJV7TGkbKiq+puqOruqzqqqw6tq26rataq+XFXnV9URVbXNUpcPAAAAwPIbOiLpDQPLFlRVOyd5bpINrbU7JtkqyROSvCLJa1pruyX5YZKnL2X5AAAAAEzG1vM9WVX3SnLvJOuq6m9HnrppugTQtVnvDavq10lulOTiJA9I8r/75w9NcmCSN1+LdQAAAACwjBYakbRNku3SJX5uMvL34yT7LGWFrbXvJnl1ku+kSyD9KMmpSS5rrV3Rz3ZRkp2XsnwAAAAAJmPeEUmttc8m+WxVHdJa+/ZyrLCqbp5k7yS7JrksyfuT7LWI1++fZP8kufWtb70cIQEAAAAwwLyJpBE3qKqDkqwffU1r7QFLWOefJvlma21TklTVUUn2SLJ9VW3dj0raJcl3x724tXZQkoOSZMOGDW0J6wcAAABgCYYmkt6f5C1J3p7kymu5zu8kuWdV3SjJL5I8MMnGJJ9Jd7nce5Psm+SYa7keAAAAAJbR0ETSFa21ZbnxdWvty1V1ZJKvJLkiyWnpRhh9JMl7q+plfdk7lmN9AAAAACyPoYmkD1XVXyU5OskvZwpbaz9Yykpbay9N8tJZxRckucdSlgcAAADA5A1NJO3b/z9gpKwl+b3lDQcAAACA1WpQIqm1tuukAwEAAABgdRuUSKqqp4wrb629a3nDAQAAAGC1Gnpp291HprdN90trX0kikQQAAABwHTH00rbnjD6uqu2TvHciEQEAAACwKl1via/7WRL3TQIAAAC4Dhl6j6QPpfuVtiTZKsnvJ3nfpIICAAAAYPUZeo+kV49MX5Hk2621iyYQDwAAAACr1KBL21prn03ytSQ3SXLzJL+aZFAAAAAArD6DEklV9bgkJyd5bJLHJflyVe0zycAAAAAAWF2GXtr2j0nu3lq7NEmqal2STyU5clKBAQAAALC6DP3VtuvNJJF6/3cRrwUAAABgCzB0RNLHq+oTSQ7vHz8+yUcnExIAAAAAq9G8iaSq2i3Jjq21A6rq0Unu0z/1xSSHTTo4AAAAAFaPhUYkvTbJi5OktXZUkqOSpKr+sH/u4RONDgAAAIBVY6H7HO3YWjtzdmFftn4iEQEAAACwKi2USNp+nuduuJyBAAAAALC6LZRI2lhVz5hdWFV/keTUyYQEAAAAwGq00D2Snp/k6Kp6UjYnjjYk2SbJoyYZGAAAAACry7yJpNbaJUnuXVX3T3LHvvgjrbVPTzwyAAAAAFaVhUYkJUlaa59J8pkJxwIAAADAKrbQPZIAAAAAIIlEEgAAAAADSSQBAAAAMIhEEgAAAACDSCQBAAAAMIhEEgAAAACDTCWRVFXbV9WRVfW1qjq3qu5VVbeoquOq6hv9/5tPIzYAAAAAxpvWiKTXJfl4a+32Se6U5NwkL0pyfGtt9yTH948BAAAAWCVWPJFUVTdLcr8k70iS1tqvWmuXJdk7yaH9bIcmeeRKxwYAAADA3KYxImnXJJuSvLOqTquqt1fVjZPs2Fq7uJ/ne0l2nEJsAAAAAMxhGomkrZPcNcmbW2t3SfKzzLqMrbXWkrRxL66q/atqY1Vt3LRp08SDBQAAAKAzjUTSRUkuaq19uX98ZLrE0iVVtVOS9P8vHffi1tpBrbUNrbUN69atW5GAAQAAAOhGB62o1tr3qurCqrpda+28JA9Mck7/t2+Sl/f/j1np2NaK7735ZdMOYVF++1kvmXYIAAAAwDJY8URS7zlJDquqbZJckOSp6UZHva+qnp7k20keN6XYAAAAABhjKomk1tpXk2wY89QDVzoWAAAAAIaZxj2SAAAAAFiDJJIAAAAAGGRa90iCsb72pr2nHcKi3f7Z7gsPAADAdYMRSQAAAAAMIpEEAAAAwCASSQAAAAAMIpEEAAAAwCASSQAAAAAMIpEEAAAAwCASSQAAAAAMIpEEAAAAwCASSQAAAAAMIpEEAAAAwCASSQAAAAAMIpEEAAAAwCASSQAAAAAMIpEEAAAAwCASSQAAAAAMIpEEAAAAwCASSQAAAAAMIpEEAAAAwCASSQAAAAAMIpEEAAAAwCASSQAAAAAMsvW0A4DrkhPe9tBph7Boez7jI9MOAQAAgFXCiCQAAAAABpFIAgAAAGAQiSQAAAAABplaIqmqtqqq06rqw/3jXavqy1V1flUdUVXbTCs2AAAAAK5pmiOSnpfk3JHHr0jymtbabkl+mOTpU4kKAAAAgLGmkkiqql2SPDTJ2/vHleQBSY7sZzk0ySOnERsAAAAA401rRNJrk7wwyW/6x7dMcllr7Yr+8UVJdh73wqrav6o2VtXGTZs2TT5SAAAAAJJMIZFUVQ9Lcmlr7dSlvL61dlBrbUNrbcO6deuWOToAAAAA5rL1FNa5R5JHVNVDkmyb5KZJXpdk+6rauh+VtEuS704hNgAAAADmsOIjklprL26t7dJaW5/kCUk+3Vp7UpLPJNmnn23fJMesdGwAAAAAzG2av9o2298n+duqOj/dPZPeMeV4AAAAABgxjUvbrtJaOyHJCf30BUnuMc14AAAAAJjbahqRBAAAAMAqJpEEAAAAwCASSQAAAAAMMtV7JAFbliPfude0Q1i0fZ768cHzvvXdfzbBSJbfXz75E9MOAQAA2MIYkQQAAADAIEYkAZAkOfB9a2vE1YGPM+IKAABWmhFJAAAAAAxiRBIAW7wHH/OYaYewaB/b+wPTDgEAAK7BiCQAAAAABpFIAgAAAGAQiSQAAAAABpFIAgAAAGAQN9sGgDXuIUe/bNohLNpHH/WSwfM+9Kg3TzCSyfjIo5817RAAACbCiCQAAAAABjEiCQBgih525GHTDmFRPrzPk6YdAgAwRUYkAQAAADCIEUkAAEzMI4780LRDWJRj93n4tEMAgFXNiCQAAAAABjEiCQAAluBRH/jctENYtKMfc59phwDAGmdEEgAAAACDSCQBAAAAMIhEEgAAAACDSCQBAAAAMIibbQMAANfw+KPOn3YIi3bEo3cbPO+bjr5kgpFMxrMftePgeT92xPcnGMnye/Djd5h2CMBARiQBAAAAMIgRSQAAAKwpp7390mmHsCh3+YvfGjzvxa/87gQjmYydXrjztENgBRmRBAAAAMAgK55IqqpbVdVnquqcqjq7qp7Xl9+iqo6rqm/0/2++0rEBAAAAMLdpjEi6IsnftdbukOSeSZ5dVXdI8qIkx7fWdk9yfP8YAAAAgFVixe+R1Fq7OMnF/fRPqurcJDsn2TvJnv1shyY5Icnfr3R8AAAAwGRc8tpTpx3Cou34/LsNnvfSN35ygpFMxm/99YMWNf9U75FUVeuT3CXJl5Ps2CeZkuR7SYb/tiUAAAAAEze1RFJVbZfkA0me31r78ehzrbWWpM3xuv2ramNVbdy0adMKRAoAAABAMqVEUlVdP10S6bDW2lF98SVVtVP//E5Jxv6eY2vtoNbahtbahnXr1q1MwAAAAABM5VfbKsk7kpzbWvuPkaeOTbJvP71vkmNWOjYAAAAA5rbiN9tOskeSJyc5s6q+2pf9Q5KXJ3lfVT09ybeTPG4KsQEAAAAwh2n8atvnktQcTz9wJWMBAAAAYLip/mobAAAAAGuHRBIAAAAAg0gkAQAAADCIRBIAAAAAg0gkAQAAADCIRBIAAAAAg0gkAQAAADCIRBIAAAAAg0gkAQAAADCIRBIAAAAAg0gkAQAAADCIRBIAAAAAg0gkAQAAADCIRBIAAAAAg0gkAQAAADCIRBIAAAAAg0gkAQAAADCIRBIAAAAAg0gkAQAAADCIRBIAAAAAg0gkAQAAADCIRBIAAAAAg0gkAQAAADCIRBIAAAAAg0gkAQAAADCIRBIAAAAAg0gkAQAAADDIqkskVdVeVXVeVZ1fVS+adjwAAAAAdFZVIqmqtkrypiQPTnKHJE+sqjtMNyoAAAAAklWWSEpyjyTnt9YuaK39Ksl7k+w95ZgAAAAAyOpLJO2c5MKRxxf1ZQAAAABMWbXWph3DVapqnyR7tdb+on/85CR/3Fr765F59k+yf//wdknOW8EQd0jy/RVc30pTv7VtS67flly3RP3WOvVbu7bkuiXqt9ap39q1JdctUb+1Tv3WrpWu2++21taNe2LrFQxiiO8mudXI4136squ01g5KctBKBjWjqja21jZMY90rQf3Wti25flty3RL1W+vUb+3akuuWqN9ap35r15Zct0T91jr1W7tWU91W26VtpyTZvap2raptkjwhybFTjgkAAACArLIRSa21K6rqr5N8IslWSQ5urZ095bAAAAAAyCpLJCVJa+2jST467TjmMJVL6laQ+q1tW3L9tuS6Jeq31qnf2rUl1y1Rv7VO/dauLbluifqtdeq3dq2auq2qm20DAAAAsHqttnskAQAAALBKXWcSSVX10yW+bs+q+vCY8kdU1YuufWRrR1XtV1VvnHYcy6mqDqyqF4wpX19VZ00jpqWqqn+pqj+ddhxLNWT/mnYdq+oLA+Z5flXdaBnX+faqusNyLW+16D9j/3vk8Yaqev00Y2J5VNXvVNWR045jS7bYPmquvm4tq6pvVdUOY8oXbKdXi5lj09HPzGo51ppGfzfHOh65lD5wqcf98yxvzR0XTttSt91qNle7w5ajqp5ZVU/pp/erqt9ZwXUva7s1adeZRNJya60d21p7+bTjgCSpqq1aa//cWvvUtGOZpGnXsbV27wGzPT/Jog6sq2qredb5F621cxazvDVifZKrEkmttY2ttedOLxyWS2vtf1pr+0w7DrZcC7SZQ9rpVWU1fmam0d/N4ZFJtqhkxFpUVUu5r+4Wte2WsO+yBrXW3tJae1f/cL8kK5ZIWmuuc4mk6ryqqs6qqjOr6vHzlc967d2r6rSqus3oGaOqOqSqXl9VX6iqC6pqn778elX1n1X1tao6rqo+OvPchOq2vl/XYVV1blUdWVU3qqq7VdVnq+rUqvpEVe3Uz3/nqvpSVZ1RVUdX1c378hOq6nVV9dX+/bjHmHWtq6oPVNUp/d8ek6rXrPUeUFXP7adfU1Wf7qcf0Nf7if32O6uqXjHyup+OTO9TVYeMWfbdqur0qjo9ybOXKd4/r6qT+/fyrVX1x/37vW1V3biqzq6qO1Y38u3EqvpIVZ1XVW+pquv1y3hQVX2xqr5SVe+vqu368m9V1Suq6itJHtvvhzP73lzb/IT+NSdX1der6r59+VZV9er+fTujqp4z33KW+F58sF/O2VW1f1/21D6Ok5Ps0ZfdrKq+PVL/G1fVhVV1/Vl1vHv/mTu9r89N+nq8qt8nz6iqv1xqvHPUYebs8Z79e3nkyGeu+n3zd5J8pqo+0887ZPsd0L8HM+tZX1Vn9tMnVNWGuZbVvw9H9c/vXVW/qKpt+n3sgjF1eEr/3pxeVe/u1/Xpvuz4qrp1P99c7drYuvfPzbXf7VZVn+rX+ZWquk2Slye5b3Wfjb+pkdGfVXWLfn85o7o26o/68gOr6uB+/RfU5rbgxtV9dk7v9+FrtN9zzVdVD6yuXT+zX/YNRrbPv/Xxbayqu/Z1+u+qeubIMg8Y2d/+z0j53/brOKuqnj+yXc+tqrdV9zn4ZFXdsH/uNlX18f69O6mqbj9gf1zMtnxz/15e0L/XB/exHDK6f1fXrp7dv35dX/6Mvo6nV9fu32iBfeSqM/c1x2eyqnaqrs2b6Wfuu1B9x9R/XJvy06r61z7WL1XVjtW1Dd+squv389x05nHN3w/O1VZOrI1ZpK1m70tD9qMa0MdPUl2Lfryq/r26PvpeI+U3rKqPVdUzZubr/8/XVj2kLzu134evMfJ8JdUco12q6qHVtfk71Bx9yQRjmlR/99h55nt5VZ3Tf7ZeXVX3TvKIJK/q99fbzLWPV9Wu/TLPrKqXTfi9+b3q+o0DquqoPp5vVNUrR+a5xn5cVY+tqv/op59XfR/dL+/zk4x5IVX1T9Udf36uqg6vqhf02/21VbUxyfNq7j7+Gn3EuG035fpNtN1ZI/V4VXX9xaeq6h61+VjqEf08c/XXc7ali4x9EscsC9VpvxoZ4VlVH66qPUdef7Xjhb78wH7/3yfJhiSH9fvwQ6vqgyPL+l9VdfRi34eB71XV+HzFe6vqoSPzHVLd99rpHJu01q4Tf0l+2v9/TJLjkmyVZMck30my0zzleyb5cJJ7Jzk1ya375eyX5I399CFJ3p8uMXeHJOf35fuk+wW66yX57SQ/TLLPBOu4PklLskf/+OAkByT5QpJ1fdnjkxzcT5+R5E/66X9J8tp++oQkb+un75fkrDF1fk+S+/TTt05y7gptx3smeX8/fVKSk5NcP8lL+7/vJFmX7hcJP53kkaPbf2S7HNJPH5jkBSPvx/366VfN1PtaxPr7ST6U5Pr94/9M8pQkL0vy6iRvSvLi/rk9k1ye5Pf6ffC4Ps4dkpyY5Mb9fH+f5J/76W8leeHI+g7pX3P9ebb5CUn+vZ9+SJJP9dPPSnJkkq37x7eYbzlLfD9u0f+/YZKzkuw8sr22SfL5kf3rmCT3H1nv22fVcZskFyS5e19+036b75/kJX3ZDZJsTLLrMu5/M+3Inkl+lGSXdJ/vL2bz5+FbSXbopxez/b46E2s/30w9TkjXkY1dVl/vC/qyVyc5JV1S7k+SHD4r/j9I8vWR+G6Rbh/dt3/8tCQfHHmvx7VrY+s+3/6S5MtJHtVPb5vuDPaeST48EttVj5O8IclL++kHJPnqyOf1C/223SHJ/+3X+5j0bVY/383m2H7XmC/JhUlu2z9+V5Lnj2yfZ/XTr0nXPtwk3f56SV/+oHS/nlH9e/HhdG3m3ZKcmeTGSbZLcnaSu6Rro69Icuf+9e9L8uf99PFJdu+n/zjJpxfYFxe7Ld/bx7l3kh8n+cM+5lNH4mlJntRP/3M2fx5vObLelyV5zgL7yPps7jfGfiaT/F2Sf+zLt0pyk2VoU27Z1+HhffkrR9b9zmzuD/bP5nZwvn5wXFs50TZmEXUfuy/NtR/l6n3dCRnTx69g7Evtx1uSx40s51v9+/CpJE8ZKZ+3nU7XBl2Yze3t4Rlpi1b4vZiJdfQzs1+SNyZ5VP/+3Dzz9CUrENvY93FkGyyqv5trvnSf3/OSq34IaPv+/yEZOXaeZx8/dmY/SHcy8KfL9V6MbqMkt0tyWpI79dvqgnR9ybZJvp3kVukSbNfYj9N9FzilX96R6frrnZPsm+TfprEP9rHcPd0xyLbp+rlvJHlBurbiP/t55uvj5+sjJva9Z5F1nGi7s0bq8eB++ugkn+xfd6dsPsaaq7/eM3O0AYuIe1LHLAvVab/0xzL94w8n2XPk9eOOFw7M1fvLDf10JflaNn8G3jPz+mXcvgvlKx6V5NB+nm3S9WU3nGvbTXp/XMowxbXuPum+XF2Z5JKq+my6BnSu8h+nSwgclORBrbX/mWO5H2yt/SbJOTMZzX6Z7+/Lv1f9GZsJu7C1NnNW47+S/EOSOyY5rk8eb5Xk4qq6WbpO+rP9vIem+0Iw4/Akaa2dWN3Z2+1nredPk9xhJCF906rarrU26Ws7T01yt6q6aZJfJvlKui/Z903XIJ3QWtuUJFV1WLqD5A/Osayr9PXbvrV2Yl/07iQPvpaxPjDdF8pT+vfphkkuTfdl5ZR0iaPRS3lObq3NnJ06PN3+c3m6L2if75exTboGfMYRY9Z7u4zZ5iPPH9X/PzVdZ5h02/MtrbUrkqS19oOquuMCy1ms51bVo/rpWyV5cq6+vY5IctuRej0+yWeSPCFdEm52HS9urZ3Sx/vjfhkPSvJHtXnk382S7J7km9ci7rmc3Fq7qF/vV9O9l5+bNc89M3z7vS9dnV/e/589qmbsslprV1Q3Sub3k9wjyX+k2++3SneQMeoB6dqk7ydXbed7JXl0//y703WmM8a1a3PV/bKMb2tukmTn1trR/Tov71+XedwnXSea1tqnq+qW/Wc+ST7SWvtlkl9W1aXpOtgzk/x7f/btw6212fWecbX50rXv32ytfb1//tB0X0Be2z8+duR127XWfpLkJ1X1y77NeFD/d1o/33bp9rftkhzdWvtZX9ej0rVRx/br+2o//6lJ1ld3Nv7eSd4/8r7cYL43KIvflh9qrbXqRrpd0lqbGfF2drrt99Ukv8nmffK/srmtuGN1Z/i37+v2iZHlzrWPzJjrM3lKkoOrGyX0wZH3ZDFmtym7J/lVum2bdO/v/+qn357khen6g6cmecaAfnBcW7mSbcxCrrEvZfh+dI0+vrV22SSDHbHUfvzKJB+YtaxjkryytXbYHOsa11b9NF3yfWabHZ7uIHw1eUC69+RBrRyQKtAAAAtmSURBVLUfV9XDMn9fMmnL2d/NNd+P0h3zvKO6EWLj7k86X1u5R/p+I13794osv3Xp9rlHt9bOqaq7JDm+tfajPr5zkvxuuqTYNfbj1toHqxtJfJN0bdZ70u3f983m9mYa9khyTN8/X15VHxp5bma7zXdsOV8fsVqsZLszSUutx6+SfLxfxplJftla+3V/TLC+L5+rf/tVhrUB85nEMcuQOs1nruOFsfp43p3kz6vqnelGqD1lwHqWYq68xMeSvK660fN7JTmxtfaLFf7+c5XrYiJpKS5Ol6W/S5K5Ekm/HJle9HC/ZdRmPf5JkrNba/caLewPoBeznNmPr5fknjNfCldK30B8M12G+QvpzibfP8lu6c4S3G2ul45MbzvBEEdVuqzxi69W2A0F3i5d1nzbJD/rnxr3nleS41prT5xjHT8bU1YZs81HzOyrV2b+NmCh5QzWDyP90yT3aq39vKpOSJfVn+va+WOT/P9VdYt02/TTQ1eV7kzYShzEjH7m53ovF7P9jkh3cHxUuv7qG4tY1onpEp+/TneW7JB0B3kHLFSJBczVro2r+9j9pT9gXk7XWHdr7etVddd0I0deVlXHt9b+ZfYLZ8+XhfermXX9ZtZ6f5PNdf631tpbR19UVc9bRPw3TNeeXtZau/MC8VwbC9VlnJk26ZB0ZzVPr6r90p2dnL3cZHzfN+dnsqrul+ShSQ6pqv9om+9JsKA52pRtk/y69afkMvK5bK19vrph9Hsm2aq1dtaAfnBcW7mSbcxCZu9LO2b4frRQHz8x16Ifv7w/qB71+SR7VdV7Rrb7qCHt9Gr03+lGKN823ZnlhfqSSVvO/m7O+aq7zPKB6UYe/3W6L5+jFmorJ70f/yjdyID7JJm5d+Fi97EvpEtmn5fuZM/T0n0h/btljXT5jG63uY4JD8ncfcSqsMLtzsRci3qM9o1XHQe01n5Tm+9/NbZ/6/vNlW5LhxyzDKnTFbn6rXxGvwOOPV5YwDvTJewuT5cYu2LAa5ZNa+3y/njnz9KdcH5v/9RUjk2uc/dIStdoP766awnXpcvUnjxPedKdaX9okn/rP0xDfT7JY6q7V9KOWZmG9dZ9hjfpbmT7pSTrZsqquyfEH/RnT35Ym+9L8eQknx1Zzsy1mPdJ8qOZsy0jPpnkOTMPqmqSX4BmOyndkNsT++lnphsRcHKSP6nuXgJbJXliNtfpkqr6/eruu/Oo2Qvsz8Re1tc3SZ60DHEen2SfqvqtJKnuvi+/m+StSf4pyWG5+hmze1R3jf/10r3/n0u3/faoqt36Zdy4qm6b+Z2XMdt8gdccl+QvZxrePoGzlOXM5WZJfth/4bt9ujOSN0y3vW7Zj0p47MzM/ci2U5K8Lt0Ik9md+HlJdqqqu/ex3aSP/RNJnlWb74Vy26q68RJjXqqfpBsanixi+7XW/jtdR/ZPGT/SbL5lnZTupqdf7M9E3TLd2cPZ99z4dLr7U9yyX8Yt0h2IPKF//km55iimocbuL/0onouq6pF9+Q2qu8fO6Ps020l9LDMHMN+fGXU2TnW/qPHz1tp/pbss9a4D57tXuhFBu/WzzG4HF/KJJE+rzff32Ln/vJ+U5JHV3Sfixtl8icpYfd2+WVWP7ZdTVXWnBdY9iW15vXRf4JKu/5g523iTdKPLrp/Ft41jP5N9W3hJa+1t6UYLjd1m8xjXpizkXelGALwzSQb0g4Prs8jYJ2Ux+9FCffykLaUfH+ef09024E2LWPd5SX6vqtb3j8feU23Kvp1udM27+n53KccCK2Ep/d3Y+fp29GattY8m+Zt0l6dcbR0LtJWfz9Xbv0n4Vbr2/Ck18qujY8y3H4/u+6elSwL8cgqfwVGfT/Lw6u6tuF2Sh42ZZ75jwrn6iPn6+WmYZruznJarHrNNsn+b5PHnfL6V5M79d/FbpRu5vxhX24dbd2XS/yR5SfpjiQmZLy9xRLpk9H2zeUTWVI5N1sqZmeV0dLovD6enO3Pxwtba96q7Wda48tsnSWvtkuqGFn+sqp42cF0fSHdm5Zx01zB+Jd3ZjEk6L8mzq+rgfr1vSLdzvb66s69bp7ts4+x012S/pf9Sd0G6nXLG5VV1WrpRM+Pq+9wkb6qqM/plnpiuIVsJJyX5x3Rfmn9WVZcnOam1dnFVvSjd5VCV7hKYY/rXvCjd8MVN6c7ujbtJ5VPTXWrR0iXKrpV+2PNLknyyTw79Ot1w2F+31t7TN/JfqKoHpMukn5Luvgi79XU4us+q75fk8OpvApyu8fp65tBa+1V1QxvHbfO5vD3dmc8zqurX6e6f8cYlLGcuH0/yzKo6N90++qV0I/0OTDek/bJ0w1RHHZHuMpM956jj45O8obqbFf8i3eiEt6cbzvqVqqp02/uRS4j32jgoycer6n9aa/df5PY7Il2CY9fZT7TWNs2zrC+nG40wc2nmGUl+e/aZstba2VX1r0k+W1VXpjvweE6Sd1bVAener9F2YLAF9rsnJ3lrVf1Lus/BY/sYr6zuBpaHZPPlYUm3Xxzcty8/T9dWzecP093Q8zf98p+1iPlulm4k2NbpPoNvWUSdP1ndJYVf7Ha3/DTdPY++Ut0NIWc6/be31k4b+eI6zpOSvLlvM66f7izT6fOsexLb8mfpEtovSXcZ7swX7H9Kt49t6v8v5ovBXJ/JPdPdZP7X6d63xQ4PH9emLOSwdCPRDh8pm68fHGc1tDHzGbofLdTHT9pS+vG5PC9de/HK1toLF1pxfxnAX6Vrp3+W7nO/6rTWvlZVT0rXDz483QiEwccCK2TR/d08fdlPkhxTVdum2/Z/2z/33iRvq+4Gw/tk7n38eUneU1V/n+5YayL6/fVh6U7AvXuOeebbj09Kd1nbia21K6vqwnQjtKemtXZKVR2brl++JN1lQj+aNc98ffxcfcTVtl1/wmyaptbuLLPlrMeoifVvkzz+XMDn013edU6Sc9N9F1+MQ9IdI/wi3QjoX6Q7lljXWjt3OQOdZWxeon/uk+nanmNaa7/qy6ZybDJzQzsmpPr7BvUZ2JPT3Qj7ewu9bonrWp9u9MYdr+VyTkh3k7GNyxAWA1Q36uIFrbVxZ4EAJq6qftpam+gvQU1T/yVo79bak6cdyzTp4692bFbpRhV8o7X2mmnHBdMy8pm4UbqTUvu31hb7pRu2eNX9CtxprbV3TDuWabsujkhaaR+u7qas2yT5/yaVRAIAxquqN6S7j9hDph0Lq8IzqmrfdMdmp6W75Byuyw6qqjuku4fMoZJIcE1VdWq60dur9Z5mK8qIJAAAAAAGuS7ebBsAAACAJZBIAgAAAGAQiSQAAAAABpFIAgBYJapqv/5XYQAAViWJJACAKamqraYdAwDAYkgkAQAsQVUdUFXP7adfU1Wf7qcfUFWHVdUTq+rMqjqrql4x8rqfVtW/V9XpSe5VVU+tqq9X1clJ9phObQAAhpFIAgBYmpOS3Lef3pBku6q6fl/29SSvSPKAJHdOcveqemQ/742TfLm1dqck/53k/6RLIN0nyR1WLnwAgMWTSAIAWJpTk9ytqm6a5JdJvpguoXTfJJclOaG1tqm1dkWSw5Lcr3/dlUk+0E//8ch8v0pyxEpWAABgsSSSAACWoLX26yTfTLJfki+kG6F0/yS7JfnWPC+9vLV25aTjAwCYBIkkAIClOynJC5Kc2E8/M8lpSU5O8idVtUN/Q+0nJvnsmNd/uZ/vlv1lcY9dmbABAJZGIgkAYOlOSrJTki+21i5JcnmSk1prFyd5UZLPJDk9yamttWNmv7if78B0l8V9Psm5KxQ3AMCSVGtt2jEAAAAAsAYYkQQAAADAIBJJAAAAAAwikQQAAADAIBJJAAAAAAwikQQAAADAIBJJAAAAAAwikQQAAADAIBJJAAAAAAzy/wCnb9mhnUEALgAAAABJRU5ErkJggg==\n", | |
"text/plain": [ | |
"<Figure size 1440x360 with 1 Axes>" | |
] | |
}, | |
"metadata": { | |
"tags": [], | |
"needs_background": "light" | |
} | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"collapsed": true, | |
"jupyter": { | |
"outputs_hidden": true | |
}, | |
"tags": [], | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "60188af1-b83f-4100-a16a-c47e8e08e424", | |
"outputId": "bb7a70ae-0073-4226-f56b-7fc522e4f6f6" | |
}, | |
"source": [ | |
"!python -m spacy download en" | |
], | |
"id": "60188af1-b83f-4100-a16a-c47e8e08e424", | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Requirement already satisfied: en_core_web_sm==2.2.5 from https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.2.5/en_core_web_sm-2.2.5.tar.gz#egg=en_core_web_sm==2.2.5 in /usr/local/lib/python3.7/dist-packages (2.2.5)\n", | |
"Requirement already satisfied: spacy>=2.2.2 in /usr/local/lib/python3.7/dist-packages (from en_core_web_sm==2.2.5) (2.2.4)\n", | |
"Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_sm==2.2.5) (2.23.0)\n", | |
"Requirement already satisfied: srsly<1.1.0,>=1.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_sm==2.2.5) (1.0.5)\n", | |
"Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_sm==2.2.5) (1.0.5)\n", | |
"Requirement already satisfied: catalogue<1.1.0,>=0.0.7 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_sm==2.2.5) (1.0.0)\n", | |
"Requirement already satisfied: plac<1.2.0,>=0.9.6 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_sm==2.2.5) (1.1.3)\n", | |
"Requirement already satisfied: numpy>=1.15.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_sm==2.2.5) (1.20.3)\n", | |
"Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_sm==2.2.5) (3.0.5)\n", | |
"Requirement already satisfied: wasabi<1.1.0,>=0.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_sm==2.2.5) (0.8.2)\n", | |
"Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_sm==2.2.5) (2.0.5)\n", | |
"Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_sm==2.2.5) (4.41.1)\n", | |
"Requirement already satisfied: thinc==7.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_sm==2.2.5) (7.4.0)\n", | |
"Requirement already satisfied: setuptools in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_sm==2.2.5) (56.1.0)\n", | |
"Requirement already satisfied: blis<0.5.0,>=0.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_sm==2.2.5) (0.4.1)\n", | |
"Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=2.2.2->en_core_web_sm==2.2.5) (3.0.4)\n", | |
"Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=2.2.2->en_core_web_sm==2.2.5) (1.24.3)\n", | |
"Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=2.2.2->en_core_web_sm==2.2.5) (2.10)\n", | |
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=2.2.2->en_core_web_sm==2.2.5) (2020.12.5)\n", | |
"Requirement already satisfied: importlib-metadata>=0.20; python_version < \"3.8\" in /usr/local/lib/python3.7/dist-packages (from catalogue<1.1.0,>=0.0.7->spacy>=2.2.2->en_core_web_sm==2.2.5) (4.0.1)\n", | |
"Requirement already satisfied: typing-extensions>=3.6.4; python_version < \"3.8\" in /usr/local/lib/python3.7/dist-packages (from importlib-metadata>=0.20; python_version < \"3.8\"->catalogue<1.1.0,>=0.0.7->spacy>=2.2.2->en_core_web_sm==2.2.5) (3.7.4.3)\n", | |
"Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata>=0.20; python_version < \"3.8\"->catalogue<1.1.0,>=0.0.7->spacy>=2.2.2->en_core_web_sm==2.2.5) (3.4.1)\n", | |
"\u001b[38;5;2m✔ Download and installation successful\u001b[0m\n", | |
"You can now load the model via spacy.load('en_core_web_sm')\n", | |
"\u001b[38;5;2m✔ Linking successful\u001b[0m\n", | |
"/usr/local/lib/python3.7/dist-packages/en_core_web_sm -->\n", | |
"/usr/local/lib/python3.7/dist-packages/spacy/data/en\n", | |
"You can now load the model via spacy.load('en')\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"tags": [], | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "ce9998fa-80a8-4dab-a883-3e33d8ba2759", | |
"outputId": "86b8257d-6dda-4451-b116-f45e120b794f" | |
}, | |
"source": [ | |
"nlp = spacy.load('en', disable=['parser', 'ner'])\n", | |
"\n", | |
"def lemmatization(texts, tags=['NOUN', 'ADJ']): # filter noun and adjective\n", | |
" output = []\n", | |
" for sent in texts:\n", | |
" doc = nlp(\" \".join(sent)) \n", | |
" output.append([token.lemma_ for token in doc if token.pos_ in tags])\n", | |
" return output" | |
], | |
"id": "ce9998fa-80a8-4dab-a883-3e33d8ba2759", | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"/usr/local/lib/python3.7/dist-packages/catalogue.py:138: DeprecationWarning: SelectableGroups dict interface is deprecated. Use select.\n", | |
" for entry_point in AVAILABLE_ENTRY_POINTS.get(self.entry_point_namespace, []):\n", | |
"/usr/local/lib/python3.7/dist-packages/catalogue.py:138: DeprecationWarning: SelectableGroups dict interface is deprecated. Use select.\n", | |
" for entry_point in AVAILABLE_ENTRY_POINTS.get(self.entry_point_namespace, []):\n", | |
"/usr/local/lib/python3.7/dist-packages/catalogue.py:126: DeprecationWarning: SelectableGroups dict interface is deprecated. Use select.\n", | |
" for entry_point in AVAILABLE_ENTRY_POINTS.get(self.entry_point_namespace, []):\n", | |
"/usr/local/lib/python3.7/dist-packages/catalogue.py:138: DeprecationWarning: SelectableGroups dict interface is deprecated. Use select.\n", | |
" for entry_point in AVAILABLE_ENTRY_POINTS.get(self.entry_point_namespace, []):\n" | |
], | |
"name": "stderr" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "30b89ef5-aeaa-4973-99a9-4f346d2fe728", | |
"outputId": "794938c4-729c-4c20-c083-40347e22eac8" | |
}, | |
"source": [ | |
"tokenized_reviews = pd.Series(reviews).apply(lambda x: x.split())\n", | |
"print(tokenized_reviews[1])" | |
], | |
"id": "30b89ef5-aeaa-4973-99a9-4f346d2fe728", | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"['sales', 'pitch', 'coaching']\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "b26cf451-ac9f-4eeb-be79-5c9f7d31bdc8", | |
"outputId": "e4916b5c-ca3b-4d01-812a-c22208ccea8e" | |
}, | |
"source": [ | |
"print(tokenized_reviews[2])" | |
], | |
"id": "b26cf451-ac9f-4eeb-be79-5c9f7d31bdc8", | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"['would', 'like', 'advice', 'bettering', 'career', 'options', 'opening', 'roads', 'bachelors', 'psychology', 'looking', 'pursue', 'masters', 'degree', 'applied', 'psychology', 'would', 'like', 'advice', 'anything', 'help', 'along', 'way']\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "491e5438-8722-4539-9bdf-c48d31beea68" | |
}, | |
"source": [ | |
"reviews_2 = lemmatization(tokenized_reviews)" | |
], | |
"id": "491e5438-8722-4539-9bdf-c48d31beea68", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "3oWBoWQl5L5G", | |
"outputId": "0347c941-75ca-45f6-97a2-17424550090e" | |
}, | |
"source": [ | |
"print(reviews_2[1]) # print lemmatized review" | |
], | |
"id": "3oWBoWQl5L5G", | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"['sale', 'pitch', 'coaching']\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "6470ea31-ae9a-431a-a793-b6920f66ede1", | |
"outputId": "27e769a9-e8c8-41dc-abd0-1417f439eb3d" | |
}, | |
"source": [ | |
"print(reviews_2[2])" | |
], | |
"id": "6470ea31-ae9a-431a-a793-b6920f66ede1", | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"['advice', 'career', 'option', 'road', 'bachelor', 'psychology', 'pursue', 'master', 'degree', 'psychology', 'advice', 'help', 'way']\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "6cdc9836-2005-4a35-b209-63e149ad9b16" | |
}, | |
"source": [ | |
"As you can see, we have not just lemmatized the words but also filtered only nouns and adjectives. Let’s de-tokenize the lemmatized reviews and plot the most common words" | |
], | |
"id": "6cdc9836-2005-4a35-b209-63e149ad9b16" | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 294 | |
}, | |
"id": "4d77f574-667e-42dc-adf2-8e975947cf55", | |
"outputId": "75d84786-81c2-432d-c834-94867a9e0365" | |
}, | |
"source": [ | |
"reviews_3 = []\n", | |
"for i in range(len(reviews_2)):\n", | |
" reviews_3.append(' '.join(reviews_2[i]))\n", | |
"\n", | |
"df['body'] = reviews_3\n", | |
"\n", | |
"freq_words(df['body'], 20)" | |
], | |
"id": "4d77f574-667e-42dc-adf2-8e975947cf55", | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAABIwAAAE9CAYAAABp+PnYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3debgmVX0n8O+PRUFcEOkQxiVtRqNxTNzafQmicdTEgAmijjHgqERjdExGEzMxkWTMTExMNHHHDdxRBMElbgiCKzaCoCIuiCNGoTVi1IgKnvmjzrXfun3X7vvet7v9fJ7nPrfeems5p5ZzTv3qVL3VWgsAAAAAzNlj1gkAAAAAYOciYAQAAADAiIARAAAAACMCRgAAAACMCBgBAAAAMCJgBAAAAMDIXrNOwEoceOCBbePGjbNOBgAAAMBu49xzz/1ma23DQt/tEgGjjRs3ZvPmzbNOBgAAAMBuo6q+sth3HkkDAAAAYETACAAAAIARASMAAAAARqYaMKqq/avqpKr6XFVdVFV3r6oDqup9VfWF/v+G00wDAAAAAKsz7R5G/5Tk3a21Wye5XZKLkjwjyemttVsmOb1/BgAAAGAnMbWAUVXdIMl9krwySVprP2qtXZnksCQn9MlOSHL4tNIAAAAAwOpNs4fRzZNsSfLqqjqvql5RVfslOai19vU+zTeSHDTFNAAAAACwStMMGO2V5I5JXtJau0OS72fe42ettZakLTRzVR1TVZuravOWLVummEwAAAAAJk0zYHRZkstaax/vn0/KEEC6vKoOTpL+/4qFZm6tHdda29Ra27Rhw4YpJhMAAACASVMLGLXWvpHkq1V1qz7qfkk+m+S0JEf1cUclOXVaaQAAAABg9faa8vKfnOT1VXWtJJckeUyGINWbq+qxSb6S5MgppwEAAACAVZhqwKi1dn6STQt8db8dXfaWl7xuRxex7jY88XdnnQQAAACAZU3zHUYAAAAA7IIEjAAAAAAYETACAAAAYETACAAAAIARASMAAAAARgSMAAAAABgRMAIAAABgRMAIAAAAgBEBIwAAAABGBIwAAAAAGBEwAgAAAGBEwAgAAACAEQEjAAAAAEYEjAAAAAAYETACAAAAYETACAAAAIARASMAAAAARgSMAAAAABgRMAIAAABgRMAIAAAAgBEBIwAAAABGBIwAAAAAGBEwAgAAAGBEwAgAAACAEQEjAAAAAEYEjAAAAAAYETACAAAAYETACAAAAIARASMAAAAARgSMAAAAABgRMAIAAABgRMAIAAAAgBEBIwAAAABGBIwAAAAAGBEwAgAAAGBkr2kuvKouTfLdJNckubq1tqmqDkhyYpKNSS5NcmRr7dvTTAcAAAAAK7cePYzu21q7fWttU//8jCSnt9ZumeT0/hkAAACAncQsHkk7LMkJffiEJIfPIA0AAAAALGLaAaOW5L1VdW5VHdPHHdRa+3of/kaSgxaasaqOqarNVbV5y5YtU04mAAAAAHOm+g6jJPdqrX2tqn4uyfuq6nOTX7bWWlW1hWZsrR2X5Lgk2bRp04LTAAAAALD2ptrDqLX2tf7/iiSnJLlLksur6uAk6f+vmGYaAAAAAFidqQWMqmq/qrre3HCSByT5dJLTkhzVJzsqyanTSgMAAAAAqzfNR9IOSnJKVc2t5w2ttXdX1SeSvLmqHpvkK0mOnGIaAAAAAFilqQWMWmuXJLndAuO/leR+01ovAAAAADtm2r+SBgAAAMAuRsAIAAAAgBEBIwAAAABGBIwAAAAAGBEwAgAAAGBEwAgAAACAEQEjAAAAAEYEjAAAAAAYETACAAAAYETACAAAAIARASMAAAAARgSMAAAAABgRMAIAAABgRMAIAAAAgBEBIwAAAABGBIwAAAAAGBEwAgAAAGBEwAgAAACAEQEjAAAAAEYEjAAAAAAYETACAAAAYETACAAAAIARASMAAAAARgSMAAAAABgRMAIAAABgRMAIAAAAgBEBIwAAAABGBIwAAAAAGBEwAgAAAGBEwAgAAACAEQEjAAAAAEYEjAAAAAAYETACAAAAYETACAAAAIARASMAAAAARqYeMKqqPavqvKp6R/9886r6eFV9sapOrKprTTsNAAAAAKzcevQw+h9JLpr4/Jwkz2ut3SLJt5M8dh3SAAAAAMAKTTVgVFU3SfIbSV7RP1eSQ5Oc1Cc5Icnh00wDAAAAAKsz7R5Gz0/yJ0l+0j/fKMmVrbWr++fLktx4oRmr6piq2lxVm7ds2TLlZAIAAAAwZ2oBo6r6zSRXtNbO3Z75W2vHtdY2tdY2bdiwYY1TBwAAAMBi9prisu+Z5Leq6sFJ9kly/ST/lGT/qtqr9zK6SZKvTTENAAAAAKzS1HoYtdb+rLV2k9baxiSPSPKB1tqjkpyR5Ig+2VFJTp1WGgAAAABYvfX4lbT5/jTJH1fVFzO80+iVM0gDAAAAAIuY5iNpP9VaOzPJmX34kiR3WY/1AgAAALB6s+hhBAAAAMBOTMAIAAAAgBEBIwAAAABG1uUdRqzeN17y7FknYdV+/onPnHUSAAAAgDWghxEAAAAAIwJGAAAAAIwIGAEAAAAwImAEAAAAwIiAEQAAAAAjAkYAAAAAjAgYAQAAADAiYAQAAADAyF6zTgA/mz73osNmnYRVu/WTTp11EgAAAGBd6GEEAAAAwIiAEQAAAAAjAkYAAAAAjHiHEUzBmS//jVknYVUOefw7VzX9Sa9+4JRSMh1HPObds04CAADALkUPIwAAAABGBIwAAAAAGBEwAgAAAGBEwAgAAACAkRUFjKrqnisZBwAAAMCub6U9jF6wwnEAAAAA7OL2WurLqrp7knsk2VBVfzzx1fWT7DnNhAEAAAAwG0sGjJJcK8l1+3TXmxj/70mOmFaiAAAAAJidJQNGrbUPJvlgVR3fWvvKOqUJAAAAgBlarofRnGtX1XFJNk7O01o7dBqJAgAAAGB2VhowekuSlyZ5RZJrppccAAAAAGZtpQGjq1trL5lqSgAAAADYKeyxwuneXlV/UFUHV9UBc39TTRkAAAAAM7HSHkZH9f9PnxjXkvzi2iYHAAAAgFlbUcCotXbzaScEAAAAgJ3DigJGVfV7C41vrb1mbZMDAAAAwKyt9JG0O08M75Pkfkk+mUTACAAAAGA3s9JH0p48+bmq9k/ypqXmqap9kpyV5Np9PSe11p5VVTfv894oyblJHt1a+9F2pB0AAACAKVjpr6TN9/0ky73X6IdJDm2t3S7J7ZM8sKruluQ5SZ7XWrtFkm8neex2pgEAAACAKVjpO4zenuFX0ZJkzyS/nOTNS83TWmtJvtc/7t3/WpJDk/y3Pv6EJMcmeclqEg0AAADA9Kz0HUbPnRi+OslXWmuXLTdTVe2Z4bGzWyR5UZIvJbmytXZ1n+SyJDdeeXIBAAAAmLYVPZLWWvtgks8luV6SGyZZ0TuHWmvXtNZun+QmSe6S5NYrTVhVHVNVm6tq85YtW1Y6GwAAAAA7aEUBo6o6Msk5SR6W5MgkH6+qI1a6ktbalUnOSHL3JPtX1VzPppsk+doi8xzXWtvUWtu0YcOGla4KAAAAgB200kfS/jzJnVtrVyRJVW1I8v4kJy02Q5/mx621K6tq3yS/nuGF12ckOSLDL6UdleTU7U8+AAAAAGttpQGjPeaCRd23snzvpIOTnNDfY7RHkje31t5RVZ9N8qaqenaS85K8crWJBgAAAGB6VhowendVvSfJG/vnhyd511IztNYuSHKHBcZfkuF9RgAAAADshJYMGFXVLZIc1Fp7elX9dpJ79a8+muT1004cwHp72Wv/66yTsGq//+j3zDoJAADAbma5HkbPT/JnSdJaOznJyUlSVb/Sv3vIVFMHAAAAwLpb7j1EB7XWLpw/so/bOJUUAQAAADBTywWM9l/iu33XMiEAAAAA7ByWeyRtc1U9vrX28smRVfW4JOdOL1kATMOxb9713tF07JHe0QQAAOttuYDRU5OcUlWPytYA0aYk10ry0GkmDAAAAIDZWDJg1Fq7PMk9quq+SW7bR7+ztfaBqacMAAAAgJlYrodRkqS1dkaSM6acFgAAAAB2Asu99BoAAACAnzECRgAAAACMCBgBAAAAMCJgBAAAAMCIgBEAAAAAIwJGAAAAAIwIGAEAAAAwImAEAAAAwIiAEQAAAAAje806AQCwVh506u/MOgmr9i+HvXXWSQAAgG3oYQQAAADAiIARAAAAACMCRgAAAACMCBgBAAAAMCJgBAAAAMCIgBEAAAAAIwJGAAAAAIwIGAEAAAAwImAEAAAAwIiAEQAAAAAjAkYAAAAAjAgYAQAAADAiYAQAAADAiIARAAAAACMCRgAAAACMCBgBAAAAMCJgBAAAAMDIXtNacFXdNMlrkhyUpCU5rrX2T1V1QJITk2xMcmmSI1tr355WOgBgd/HgU5496ySsyrse+sxVTf8bJ79kSimZjnf+9hNnnQQAgKmZZg+jq5P8z9babZLcLcmTquo2SZ6R5PTW2i2TnN4/AwAAALCTmFrAqLX29dbaJ/vwd5NclOTGSQ5LckKf7IQkh08rDQAAAACs3rq8w6iqNia5Q5KPJzmotfb1/tU3MjyyBgAAAMBOYmrvMJpTVddN8tYkT22t/XtV/fS71lqrqrbIfMckOSZJbnazm007mQAAU/ObJ71+1klYtXcc8agVT/tbJ719iimZjtOOeMiKp33oWz80xZRMxym/c69ZJwGAXdxUexhV1d4ZgkWvb62d3EdfXlUH9+8PTnLFQvO21o5rrW1qrW3asGHDNJMJAAAAwISpBYxq6Er0yiQXtdb+ceKr05Ic1YePSnLqtNIAAAAAwOpN85G0eyZ5dJILq+r8Pu5/JfnbJG+uqscm+UqSI6eYBgAAAABWaWoBo9bah5LUIl/fb1rrBQAAVu7hJ39x1klYlRN/+xarmv5Fp1w+pZRMx5Me6jeBgJ3DuvxKGgAAAAC7DgEjAAAAAEYEjAAAAAAYETACAAAAYGSav5IGAADAlPzLid+cdRJW7UEPP3DF0573iiummJLpuMPjfm7WSYA1o4cRAAAAACMCRgAAAACMCBgBAAAAMOIdRgAAALDOvv53X5t1Elbt4D+58Yqnvfz5504xJWvvoKfeaVXTX/HC904pJdPxc3/4gFXPo4cRAAAAACMCRgAAAACMCBgBAAAAMCJgBAAAAMCIgBEAAAAAIwJGAAAAAIwIGAEAAAAwImAEAAAAwIiAEQAAAAAjAkYAAAAAjAgYAQAAADAiYAQAAADAiIARAAAAACMCRgAAAACMCBgBAAAAMCJgBAAAAMCIgBEAAAAAIwJGAAAAAIwIGAEAAAAwImAEAAAAwIiAEQAAAAAjAkYAAAAAjAgYAQAAADAiYAQAAADAiIARAAAAACMCRgAAAACMTC1gVFWvqqorqurTE+MOqKr3VdUX+v8bTmv9AAAAAGyfafYwOj7JA+eNe0aS01trt0xyev8MAAAAwE5kagGj1tpZSf5t3ujDkpzQh09Icvi01g8AAADA9lnvdxgd1Fr7eh/+RpKD1nn9AAAAACxjZi+9bq21JG2x76vqmKraXFWbt2zZso4pAwAAAPjZtt4Bo8ur6uAk6f+vWGzC1tpxrbVNrbVNGzZsWLcEAgAAAPysW++A0WlJjurDRyU5dZ3XDwAAAMAyphYwqqo3JvlokltV1WVV9dgkf5vk16vqC0nu3z8DAAAAsBPZa1oLbq09cpGv7jetdQIAAACw42b20msAAAAAdk4CRgAAAACMCBgBAAAAMCJgBAAAAMCIgBEAAAAAIwJGAAAAAIwIGAEAAAAwImAEAAAAwIiAEQAAAAAjAkYAAAAAjAgYAQAAADAiYAQAAADAiIARAAAAACMCRgAAAACMCBgBAAAAMCJgBAAAAMCIgBEAAAAAIwJGAAAAAIwIGAEAAAAwImAEAAAAwIiAEQAAAAAjAkYAAAAAjAgYAQAAADAiYAQAAADAiIARAAAAACMCRgAAAACMCBgBAAAAMCJgBAAAAMCIgBEAAAAAIwJGAAAAAIwIGAEAAAAwImAEAAAAwIiAEQAAAAAjAkYAAAAAjAgYAQAAADAyk4BRVT2wqi6uqi9W1TNmkQYAAAAAFrbuAaOq2jPJi5I8KMltkjyyqm6z3ukAAAAAYGGz6GF0lyRfbK1d0lr7UZI3JTlsBukAAAAAYAGzCBjdOMlXJz5f1scBAAAAsBOo1tr6rrDqiCQPbK09rn9+dJK7ttb+cN50xyQ5pn+8VZKL1zGZByb55jqub73tzvnbnfOWyN+uTv52Xbtz3hL529XJ365rd85bIn+7Ovnbde3OeUvkb639Qmttw0Jf7LWOiZjztSQ3nfh8kz5upLV2XJLj1itRk6pqc2tt0yzWvR525/ztznlL5G9XJ3+7rt05b4n87erkb9e1O+ctkb9dnfztunbnvCXyt55m8UjaJ5LcsqpuXlXXSvKIJKfNIB0AAAAALGDdexi11q6uqj9M8p4keyZ5VWvtM+udDgAAAAAWNotH0tJae1eSd81i3Ss0k0fh1tHunL/dOW+J/O3q5G/XtTvnLZG/XZ387bp257wl8rerk79d1+6ct0T+1s26v/QaAAAAgJ3bLN5hBAAAAMBOTMBoO1TV0VX1wlmnY1qqamNV/beJz5uq6p9nmab5VrIPquqvq+r+65Wm9bK75mtnUlUfWcE0T62q66zhOl9RVbdZq+Wxc6mqJ1TV7/Xho6vqP01xXftX1R9Ma/mztL3nXVV9bwfWOdX9tV563f7pWadjZ7ZeZX9VHVtVT9vOeUfrr6p3VdX+O5CWda/vFlnH4dtTB+7Iuf2zpKoeVlUXVdUZa7S8NW+LVtUhVfWOtVzmWtjeY3NaVluW70h5sytxfTQ9AkYsZGOSnwaMWmubW2tPmV1ytk9r7S9ba++fdTrWUlXtuTvma2fTWrvHCiZ7apJVNaCras8l1vm41tpnV7M8dh2ttZe21l7TPx6dZJoBiP2T7JYBo2zHebcGjs509xczUFXbvMdzR8v+pcr4NTRaf2vtwa21K7d3YbOo7xZxeJKd5qJ8ra3HsbHMOh6b5PGttfuuxbp21bboQuf9CuzWx+auZKn9t6sekyu1ncfu2mit7XZ/GQIen0vy+iQXJTkpQ0V3pyQfTHJuhl9pO7hPf/skH0tyQZJTktywjz8zyT8lOT/Jp5PcpY8/OskL+/CGJG9N8on+d88dSPfv9TR8Kslrez4+0MednuRmfbrjk/xzko8kuSTJEX38IT3NJ03kf+49VYvl/RZJ3t/X+ckk/7lvi+/0fP9RX+47+vQHJHlbT9PHkvxqH39sklf19V+S5Ck7uA/f1tP6mSTH9HGPSfL5JOckeXmSFya5QZKvJNmjT7Nfkq8m2btvp7ltc+e+vT7V579ehl/p+/u+3y5I8vs7kN7f7cs9P8nLkty1L3OfnqbPJLlt35ZnJXlnkouTvHQi7Q9I8tG+H96S5Lp9/KVJntPHP2Jevhbbr2f2ec7p2+zeffyeSZ6b4Xi+IMmTl1rOGh6vL+nHyyV9G7wqw7l5/MTyvpfkeX1bnZ5kQx//+L6PPpXhXLvOMufBa5IcPrHc1yc5bJX783tLnVNJnpLkR0kuTHLGKvbfM5KcM6+sunBin21abFkZjuGT+/eHJflBkmtlOMYu2cHz7enp52zfBx/ow4f2PD+y5/XTSZ4zb5/9fd9n709yl2wtA35rIo9n97x8Msk9liuvplhurvQ4XC5PR6fXAf3zO5IcMjH/3/T0fCzJQRNl5NOSHNGnuThDefEbSd42saxfT3LKDu7PN/Xj4/yel6dnazn3V0uVsyvdBuvxl6HsfGfflp9O8qxse959b2L6I+b2ZZKbZziHLkzy7HnTbbM9+nFzUYa65TNJ3ptk3wX2177rmP+/6Ov9UJI39uNnsfbKYuPv1Lffp/o+/fSU07wxw/l8fIa65/VJ7p/kw0m+0I+lu/R9c16G8vtWE+fVyUne3af9u4nlviTJ5r5vJo/hB/f1nZuhPphrq+yX4fw+p6/nsIl1nJahjPjgAunfnrL/e0n+oW/jeyX54368fjrJUyeW/ed9m/x0f/bxZ2Zr2X9gkkv78Db19SLrvzTJgX14m3VnkWN7B/O80vbKYtP9bZLP9nw9N8k9kvxbki9nOM/+c/97d9+3Zye59XLn9hoex389b9/9TZL/ke0rS396bKzRuTX/2mb+Nt+mvk7yl9lajv19Fmn/Jjk4Qxt17rrn3n3a4/vnC5P8UZ/2+Gxtd90vw3l2YYbz7toTx8Nf9bRdOLEPFysDDkk/h9e43DwzyfMzlCH/M4u3nbdpby50bE6zDF3FsbBQfbXYOXNsxuXNNte2M8jD/Pr94Uvsl8n996zsBNd9a5D/+W3XhyT5eD8n3p9x2/G1GerQN2YN4w6rSu+sD/op7YSNSdrcRsxQeD29HzhzF6EPT/KqPnxBkl/rw3+d5PkTB+jL+/B90htaGQeM3pBeCSS5WZKLtjPN/yVDQ2Ku0j8gyduTHNU///f0i4p+QrwlQw+x2yT5Yh9/SIZAz036dx/N0HjZe4m8fzzJQ/vwPhkKx0MyUWBnHDB6QZJn9eFDk5w/cUB/JMm1MzR4vpVk7x3Yhwf0//tmKEhunOT/9RPlWv3EmdsHpya570TeXjGxnY7o01+S5M59/PUz/ELgMUme2cddO0NBdPPtSOsv9321d//84gwFwbMzNIRelOTPJrblVUl+MUPB9b6exgMzVNL79en+NMlf9uFLk/zJxPrm8rXUfj0zyT/04QcneX8ffmKGRsZeE8fZostZw+P1TRkanocl+fckv5LhGD03ye37dC3Jo/rwX07s3xtNrPfZ2RrkOj4Lnwe/NrHuG2So5Pda5T6dbEBvc05N7Je5/K9m/52ffpz16eaOwTOTbFpsWRmO2Uv6uOemVxQ9v2/c3nOtL+9uSd7Sh8/OULnunaFifla2nnt7ZbjQOnxinz2oD5+SodGyd5LbZWvZcJ0k+/ThWybZvNy2XUW6p3UcLpeno7N4wKgleUgf/ruJ/XtsFr5ArAwXAXPn3xvm5t+B/bkxW+urB2T4pY3qeX1HkvvMba/+f66cvdFKt8F6/CX5nfQ6eOJ8vnRuf0+eq314MmB0WpLf68NPytZzesHt0bfZ1RPHwZuT/O78/bWOeb9zhrJinwwN3S9kuPBZrL2y1Pi5/b1eAaOrMz63XpWt593b0uvgPv39k7x14ry6pO/nfTJcFNx03rG6Z98fv9qn+Wq2lqdvzNa2yv+Z2H/7Zygn9uvruGxueQukf1Vl/8T5cmQfvlOGC+P9MgT6P5PkDhPjr9Pz/8UsHzDapr5eZP2X9vkWW/fcPtnm2N6ePGeF9d1i0yW5UYYL+rkbmvv3/8enX+z1z6cnuWUfvmu23shY8NyewnH8yT68R5IvZWgbbU9ZeuQapqllfG3ztHnb/D9l8fp68jhbsP2bIZjy5xPn2vX6cfW+iXSM9le2noe/1Me/JluDlZdma5vtD7K1fb5YGXBIdiBglMXLzTOTvLhPs1Tbean25hHbm64pHZ/bnNNZ/Jw5NuPyZptr2xnkYaH6falrmhdPTDvT6741yPtCbdcbZmuZ+LhsvYY7NkM9um//vCZxh9X+za5r0/R9tbX24T78uiT/K0MPj/dVVTIUhF+vqhtkKPw+2Kc9IcNF6Jw3Jklr7ayquv4Cz4nfP8lt+jKT5PpVdd3W2mqfqT40wwXbN/v6/q2q7p7kt/v3r81w8THnba21nyT5bFUdNDH+nNbaZUlSVednKFSuXCTv10ty49baKX2dV/X5lkrnvTKc5GmtfaCqblRV1+/fvbO19sMkP6yqK5IclKFhtj2eUlUP7cM3TfLoJGe21rb0NJ6Y5Jf69ydmKDDOyHB35cXzlnWrJF9vrX2ip/vf+zIekORXq+qIPt0NMlzQfnmVab1fhgr1E33b7ZvkigyN9k9kCBBNPtJ3Tmvtkp6GN2bYpldlCHp8uC/jWhkaa3NOXGC9t8oC+3Xi+5P7/3MzHAfJcLy+tLV2dfLT4+y2yyxnIas9Xt/eWmtVdWGSy1trF/b8f6an7fwkP5nI5+sm0n/bqnp2hkb/dTPcdZizzXnQWvtgVb24qjZkOFbfOpff7bTQOfWhedPcLSvff2/OcLz+bf//8JUsq7V2dVV9qap+OcPduX/MUNnvmSHIsyPOTXKnfi7/MMPdwE0Z7i6+PeNz7/V9vW/LcNf53X0ZFyb5YWvtx30/b+zj907ywqq6fZJrsvW8TVa2bZcyjeNwJXlayo8yXEgkw3b99aUm7ul5bZLfrapXJ7l7hoDzWnlA/zuvf75uhnLurGxbzt4yQ7B/R7fBWrkwyT9U1XMyXEScvUz9NOme6XVVhuPgOX14se3x/5J8ubV2fh8/WW7Owj2TnNrr5auq6u0ZAgHbtFcWa8f09sr+rbWz+vjXJnnQOqT9y/POrdMnzruNGeraE6rqlhkugPeemPf01tp3+ryfTfILGS5Gj6yqYzI0+g/OUEbukSGIPldnvzHDBUEy7OPfmnhvxz4ZGtfJcPH7byvIx0rLp2sy3PFNhvr8lNba9/t8J2coR/fo4/+jjz9tBevfpr5eZvrF1n1aVn5sr2V9t9h038nQ5nllf1/NNu+sqarrZujZ8ZaJc/7a/f9i5/aaaa1dWlXfqqo7ZGjLnpchGLHasnTy2FgL869t5tqWc9v8zlm8vp60WPv3E0leVVV7Z2hfnV9VlyT5xap6QYYeIe+dt6xbZTi+Pt8/n5AhkPf8/nmyLTpXNy9VBuyIhcrNOXPbaKm281LtzZ3NQuf0YufMfNtc27YdeKx1O43q9yTfztLXIifOG57ldd+OWqjt+itJTqyqgzOUlZNpOq219oM+vFZxh1XZnQNGbd7n7yb5TGvt7pMje0NrNcuZ/3mPJHebC7asox9ODNci46/JsI8rC+f9elNM09y6V62qDslwQty9tfYfVXVmhjvwiz0/fFqS/1NVB2QI3HxgpavKcPdgRyuESnJCa+3PRiOHk/66GSrCfZJ8v3+10DFVGRqxj1xkHd9fYNyC+3XC3P5Ybl8st5y1MJeWn2R8nPwki6dtbjsdn+EO2aeq6ugMd/nJI6QAAAp+SURBVKDmLzcZnwevyXC35REZHmXcESs5rlez/07MUKGfnCFe8IVVLOusDBd8P87QZfX4DJXq05fLxFJ6MODLGe6+fyRDr4T7Znhk9dIM59VCftz6bY5M7NvW2k8mnrX+oySXZ+idskeGC4U5a1JmrMJKjsOV5OnqjN8BuM/E8OT8K83TqzME5q7K0IjYkQDnfJXk/7bWXjYauXA5O5ePlWyDqWutfb6q7pihl+Szq+r0hSabGN5nie/mLLY9Nmbb43Hf1aaZJNueW5Pn3V5J/neGR5se2rf7mYvMe02Svarq5hl6Cdy5tfbtqjo+2+7r+SrJ77TWLh6NrLprFq5Pl8vHUufyVa21a1a4zIVMlifL5Wt7rfTYXsv6btHpquouGW62HZHkDzNcQE3aI8mVrbXbL7KOhc7ttfaKDHXiz2fozXO/rL4s3dFjY77FrklWekzPWbT9W1X3yfCo9PFV9Y+ttddU1e2S/NckT0hyZIbeuyu1UFt0qTJgWiaPy8XavMdn8fbmzmb+uXpQlj5nJi13bTt18+v3DNduS12LTB7js77um4YXJPnH1tppvUw5duK7ybzPJO6wO7/0+mb9TnMyvMD5Y0k2zI2rqr2r6r/0O1nfrqp792kfneH5yTkP79PfK8l35u58TXhvhufK06dbyYm6kA8keVhV3agv54AMF26P6N8/Ktvfi+DiLJz37ya5rKoO7+OvXcOvYHw3Q1fOhZzd0zJXSX5zLnK7hm6Q5Nu94r11hrtU+yb5tRp6NO2d5GFzE/eo6icyPJP7jgUq54uTHFxVd+7pvl6/6HlPkif25aWqfqmq9tuO9J6e5Iiq+rm+nAOq6hcyvMvoLzI8bz55B+wuVXXzqtojw/H1oQzH5z2r6hZ9GftV1WRPjIUsuF+Xmed9SX5/7qKvH2fbs5xpHK97ZGg8JsM5O3dX83oZesTt3Ze7EsdneEln2vReJD15nqx4/7XWvpShcv+LLNxzbKllnZ0hXx/tdxBvlOFOylr88tHZGS7KzurDT8hwJ/WcDOfegTW8UPORGZeRy7lBhjs9P8lQvq7liz+nWW4u5dIkt6+qParqphl6fK3GqIxtrf1rkn9N8swMwaMdNbn89yT57/2Ofarqxr2sWqic3anU8Mtk/9Fae12Gx6numG3rp8ur6pd7efrQifEfzvg4mLPY9ljKUnXitHw4yUOqap+e1t/M0Gjcpr2yWDum3zG+srdfkpWXn9N2gyRf68NHr2D662fI+3dq6Ek610vq4gw9Hzb2z5O9Nd+T5MnVb8PW0FNkrSzXRjq8qq7T2xMP7ePO6uP37TfrHjIxz6XZGpQ/YmL8QvX1UutfbN1rYXvquwWn68fzDVpr78pwQ+F289fR25VfrqqH9XmrBy2Sxc/ttXZKkgdm6LXznuwcZen8a5v5vb9WWl8v2P7tbdfLW2svzxAwu2NVHZjhfTFvzVBH3XHesi5OsnFuP2fb66iFrLYMWKmFys35lmrzLtbenEUdsFpLnTPzLXdtO3UL1O93zQqvRXaC674dtVDbdfKcOGqJedcq7rAqu3MPo4uTPKmqXpXhxXovyHCg/HMNvYr2ytBd8jMZdsxLe7Dkkox7JFxVVedl6CWyUET9KUleVFUX9GWeleFCa1Vaa5+pqr9J8sGquibDhdqTk7y6qp6eZEu2s6dEa+1HNXS/Wyjvj07ysqr66wy9Fh6WoXfBNVX1qQwX3udNLO7YDN1VL0jyH1n6oN5e707yhKq6KMN+/FiGbonHZujOfGWGR0cmnZjhUcJD5i+s5//hSV5QVftmeBns/TNUhhuTfLI3Krdk+CWEVWmtfbaqnpnkvf2i5ccZnq/9cWvtDb3S/khVHZrhDusnMryw+xYZulOe0u/cH53kjVU114X0mRmecV1svUvt18W8IsMjQRdU1Y8zPD/8wtUuZ0rH6/czBNOemeGRvrnG/19keNfWlv5/2Uq7tXZ5P37md8NeS8cleXdV/Wtr7b6r3H8nZqggbz7/i9baliWW9fEMd5HmHjG5IMnPT/QG2RFnZ3gp60dba9+vqquSnN1a+3pVPSPDsVoZHj09dRXLfXGSt9bwk/LvzurvhC5qmuXmMj6cobvwZzO8ePKTq5z/+Ax1zg8y3JX+QYbA8obW2kU7mrjW2req6sM1/Ozuv2R45v2j/dr5exl63y1Uzu5sfiXJ31fVTzKUq0/M8MjeT8+7DC+Sf0eGfb05Q6/OZHhB7Ruq6k8zlMdJktbae2t4rHP+9liqF8Dx2XZ/TVVr7RM1PLZ0QYYeehdmeJRnsfbKYuMfk6HObtn2UZJZ+bsMj6M8M8MjLkvqd/vPy9DT+KsZzr+01n5QVX+Q4Xj4foa6dc7/zlCPXdDr5S9n4YvH7TEq++el9ZM19IA6p496RWvtvOSnj9J/KkP9NpnW5yZ5cw2P3E1uj23q6wxthwXXv9i6a2tAbUesur5boi77bpJTq2qfDHXKH/fv3pTk5VX1lAyBs0cleUk/Tvbu338qi5zba623sc7I0GvjmgxtvIXKjvUsS+df27wkExePq6ivF2v/HpLk6f14+16Gx6NvnKFOnetkMOpN31q7qqoek6Hn9F4Zju2XLpOPVZUBK7VEuTk5zVJt58Xam6Njs9/42xktds7Mt9y17XpYqH6/Oiu/FpnZdd+OWqTtemyGc+jbGQJK21wfdGsSd1itWpvrjJ1Lrxzf0Vq77Q4u58wMLwnbvAbJgrleWU9rra1Vw3W3UVXfa61dd/kpV7Ss62RoKNxxFndOYLWq6oVJzmutvXLWaWHnUP29BL08OyvDry+tNji5W5vYRpXhBya+0Fp73qzTxa6vB0g+meRhCzw6Pov0bMwaXNvs7pSbS3Nty/bYnR9JA34GVdX9M/T6eIFgEbuCqjo3w68+vW7WaWGnclwNLx7+ZIaX97vo2dbj+zb6TIYu/S9bZnpYVlXdJsMv2Z2+MwSLWBXlJqyx3bKHEQAAAADbTw8jAAAAAEYEjAAAAAAYETACAAAAYETACABgnVXV0f3X8QAAdkoCRgAAU1ZVe846DQAAqyFgBACwhKp6elU9pQ8/r6o+0IcPrarXV9Ujq+rCqvp0VT1nYr7vVdU/VNWnkty9qh5TVZ+vqnOS3HM2uQEAWBkBIwCApZ2d5N59eFOS61bV3n3c55M8J8mhSW6f5M5VdXifdr8kH2+t3S7Jl5L8VYZA0b2S3Gb9kg8AsHoCRgAASzs3yZ2q6vpJfpjkoxkCR/dOcmWSM1trW1prVyd5fZL79PmuSfLWPnzXiel+lOTE9cwAAMBqCRgBACyhtfbjJF9OcnSSj2TocXTfJLdIcukSs17VWrtm2ukDAJgGASMAgOWdneRpSc7qw09Icl6Sc5L8WlUd2F9s/cgkH1xg/o/36W7UH2d72PokGwBg+wgYAQAs7+wkByf5aGvt8iRXJTm7tfb1JM9IckaSTyU5t7V26vyZ+3THZnic7cNJLlqndAMAbJdqrc06DQAAAADsRPQwAgAAAGBEwAgAAACAEQEjAAAAAEYEjAAAAAAYETACAAAAYETACAAAAIARASMAAAAARgSMAAAAABj5/1mVbz3mx0NsAAAAAElFTkSuQmCC\n", | |
"text/plain": [ | |
"<Figure size 1440x360 with 1 Axes>" | |
] | |
}, | |
"metadata": { | |
"tags": [], | |
"needs_background": "light" | |
} | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "6d545418-a111-4f49-8453-fea55ebb8bda" | |
}, | |
"source": [ | |
"dictionary = corpora.Dictionary(reviews_2)" | |
], | |
"id": "6d545418-a111-4f49-8453-fea55ebb8bda", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "aa582f02-79c2-4b58-9b42-77965f47c949" | |
}, | |
"source": [ | |
"doc_term_matrix = [dictionary.doc2bow(rev) for rev in reviews_2]" | |
], | |
"id": "aa582f02-79c2-4b58-9b42-77965f47c949", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "5aa4a197-3d59-457f-9c7e-111efcd04226" | |
}, | |
"source": [ | |
"# Creating the object for LDA model using gensim library\n", | |
"LDA = gensim.models.ldamodel.LdaModel\n", | |
"\n", | |
"# Build LDA model\n", | |
"lda_model = LDA(corpus=doc_term_matrix, id2word=dictionary, num_topics=7, random_state=100,\n", | |
" chunksize=1000, passes=50)" | |
], | |
"id": "5aa4a197-3d59-457f-9c7e-111efcd04226", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "c3a299ae-6f4a-4ef3-8bfe-927742424586", | |
"outputId": "98b674df-c1dd-4db2-a3a7-7e84e4d41f5d" | |
}, | |
"source": [ | |
"lda_model.print_topics()" | |
], | |
"id": "c3a299ae-6f4a-4ef3-8bfe-927742424586", | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"[(0,\n", | |
" '0.013*\"interview\" + 0.012*\"step\" + 0.011*\"willing\" + 0.010*\"industry\" + 0.009*\"connection\" + 0.008*\"people\" + 0.008*\"partner\" + 0.008*\"next\" + 0.008*\"world\" + 0.008*\"year\"'),\n", | |
" (1,\n", | |
" '0.032*\"advice\" + 0.019*\"people\" + 0.018*\"woman\" + 0.013*\"experience\" + 0.011*\"role\" + 0.010*\"team\" + 0.010*\"career\" + 0.008*\"technology\" + 0.008*\"year\" + 0.008*\"new\"'),\n", | |
" (2,\n", | |
" '0.030*\"connection\" + 0.028*\"social\" + 0.019*\"people\" + 0.014*\"expert\" + 0.010*\"woman\" + 0.010*\"brand\" + 0.010*\"app\" + 0.010*\"medium\" + 0.010*\"thank\" + 0.008*\"friend\"'),\n", | |
" (3,\n", | |
" '0.020*\"company\" + 0.020*\"people\" + 0.018*\"community\" + 0.016*\"good\" + 0.012*\"manager\" + 0.011*\"online\" + 0.010*\"friend\" + 0.010*\"trucking\" + 0.010*\"advice\" + 0.009*\"employee\"'),\n", | |
" (4,\n", | |
" '0.017*\"people\" + 0.016*\"experience\" + 0.011*\"interview\" + 0.011*\"introduction\" + 0.010*\"student\" + 0.010*\"executive\" + 0.010*\"senior\" + 0.009*\"business\" + 0.009*\"woman\" + 0.007*\"contact\"'),\n", | |
" (5,\n", | |
" '0.018*\"product\" + 0.017*\"job\" + 0.015*\"company\" + 0.014*\"help\" + 0.014*\"experience\" + 0.012*\"student\" + 0.011*\"interview\" + 0.011*\"people\" + 0.011*\"founder\" + 0.011*\"opportunity\"'),\n", | |
" (6,\n", | |
" '0.031*\"care\" + 0.028*\"great\" + 0.027*\"connection\" + 0.026*\"grandparent\" + 0.024*\"service\" + 0.024*\"provider\" + 0.021*\"company\" + 0.017*\"early\" + 0.017*\"stage\" + 0.016*\"professional\"')]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 24 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 881 | |
}, | |
"id": "590d409d-6f64-4c19-8056-e97e5cf82963", | |
"outputId": "457202b4-9d85-4d29-edf9-e8ae515cc4bc" | |
}, | |
"source": [ | |
"# Visualize the topics\n", | |
"pyLDAvis.enable_notebook()\n", | |
"vis = pyLDAvis.gensim_models.prepare(lda_model, doc_term_matrix, dictionary)\n", | |
"vis" | |
], | |
"id": "590d409d-6f64-4c19-8056-e97e5cf82963", | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"\n", | |
"<link rel=\"stylesheet\" type=\"text/css\" href=\"https://cdn.jsdelivr.net/gh/bmabey/[email protected]/pyLDAvis/js/ldavis.v1.0.0.css\">\n", | |
"\n", | |
"\n", | |
"<div id=\"ldavis_el2941400786923529763956816973\"></div>\n", | |
"<script type=\"text/javascript\">\n", | |
"\n", | |
"var ldavis_el2941400786923529763956816973_data = {\"mdsDat\": {\"x\": [-0.02069201593784128, -0.04701978094349936, -0.09982633502616893, -0.02161730194200003, -0.0036183557730646797, 0.00633661105478321, 0.18643717856779093], \"y\": [-0.024725900885002886, 0.12352462452370629, 0.013287663001148328, 0.05247474687422122, -0.11408986542794797, -0.09302689339558314, 0.04255562530945809], \"topics\": [1, 2, 3, 4, 5, 6, 7], \"cluster\": [1, 1, 1, 1, 1, 1, 1], \"Freq\": [18.468724456340205, 17.586615935604975, 15.809765568105478, 14.158737404998782, 12.533246800994743, 11.882349916205252, 9.560559917750567]}, \"tinfo\": {\"Term\": [\"connection\", \"care\", \"social\", \"advice\", \"great\", \"grandparent\", \"company\", \"provider\", \"service\", \"job\", \"product\", \"stage\", \"early\", \"professional\", \"woman\", \"expert\", \"good\", \"step\", \"name\", \"experience\", \"trucking\", \"community\", \"chat\", \"insurance\", \"willing\", \"friend\", \"student\", \"brand\", \"executive\", \"medium\", \"event\", \"curriculum\", \"reciprocity\", \"executive\", \"educator\", \"affordable\", \"party\", \"old\", \"sure\", \"contact\", \"nonprofit\", \"podcast\", \"birthday\", \"standard\", \"task\", \"celebration\", \"interesting\", \"rental\", \"various\", \"game\", \"looking\", \"conduct\", \"multiple\", \"dynamic\", \"source\", \"well\", \"senior\", \"hour\", \"organization\", \"government\", \"experience\", \"introduction\", \"business\", \"student\", \"interview\", \"people\", \"area\", \"housing\", \"market\", \"management\", \"next\", \"practice\", \"brand\", \"woman\", \"process\", \"early\", \"community\", \"program\", \"year\", \"good\", \"team\", \"online\", \"manager\", \"professional\", \"way\", \"trucking\", \"pain\", \"truck\", \"curious\", \"convoy\", \"freight\", \"chief\", \"officer\", \"employee\", \"kid\", \"scale\", \"core\", \"dispatcher\", \"advertising\", \"buyer\", \"grader\", \"match\", \"special\", \"talent\", \"estate\", \"asset\", \"real\", \"stock\", \"bond\", \"testing\", \"common\", \"resource\", \"potential\", \"broker\", \"place\", \"good\", \"community\", \"space\", \"company\", \"online\", \"digital\", \"small\", \"talk\", \"parent\", \"friend\", \"manager\", \"msx\", \"content\", \"people\", \"startup\", \"idea\", \"time\", \"advice\", \"point\", \"year\", \"interested\", \"field\", \"individual\", \"new\", \"product\", \"service\", \"team\", \"experience\", \"prototype\", \"gsber\", \"volunteer\", \"boss\", \"break\", \"thumb\", \"facility\", \"eye\", \"operator\", \"non\", \"psychology\", \"build\", \"moodle\", \"campus\", \"agency\", \"riverbank\", \"nanny\", \"rating\", \"passionate\", \"movie\", \"director\", \"pursue\", \"architecture\", \"education\", \"coach\", \"technology\", \"lack\", \"phase\", \"entrepreneurial\", \"programmatic\", \"system\", \"advice\", \"role\", \"private\", \"man\", \"woman\", \"career\", \"family\", \"people\", \"new\", \"school\", \"experience\", \"team\", \"language\", \"startup\", \"grade\", \"year\", \"general\", \"program\", \"thank\", \"male\", \"interested\", \"community\", \"interview\", \"introduction\", \"individual\", \"time\", \"investor\", \"connected\", \"mobile\", \"test\", \"job\", \"civic\", \"peer\", \"response\", \"participant\", \"able\", \"behavioral\", \"accountability\", \"engineering\", \"com\", \"research\", \"founder\", \"product\", \"daysmarketingideaschallenge\", \"devpost\", \"divyankjain\", \"link\", \"ping\", \"host\", \"present\", \"station\", \"structure\", \"sunday\", \"exam\", \"season\", \"study\", \"week\", \"opportunity\", \"connect\", \"first\", \"cannabis\", \"project\", \"help\", \"user\", \"tech\", \"love\", \"design\", \"student\", \"person\", \"leader\", \"strategy\", \"company\", \"software\", \"experience\", \"interview\", \"advice\", \"time\", \"woman\", \"industry\", \"people\", \"team\", \"good\", \"connection\", \"introduction\", \"video\", \"current\", \"grade\", \"interested\", \"step\", \"tax\", \"developmental\", \"introduce\", \"oracle\", \"information\", \"wall\", \"appropriate\", \"bubble\", \"diverse\", \"dress\", \"schooler\", \"portal\", \"prior\", \"prompt\", \"taxis\", \"consignment\", \"furniture\", \"hardware\", \"openness\", \"store\", \"bearish\", \"bet\", \"fear\", \"ncov\", \"particular\", \"tank\", \"bonus\", \"boutique\", \"bra\", \"willing\", \"world\", \"level\", \"restaurant\", \"life\", \"document\", \"skill\", \"music\", \"gsb\", \"option\", \"voice\", \"many\", \"preparation\", \"term\", \"adult\", \"disability\", \"supply\", \"goal\", \"group\", \"partner\", \"next\", \"industry\", \"interview\", \"career\", \"family\", \"age\", \"year\", \"person\", \"connection\", \"friend\", \"people\", \"business\", \"small\", \"man\", \"long\", \"designer\", \"software\", \"social\", \"academic\", \"pattern\", \"nibh\", \"nunc\", \"website\", \"medium\", \"psychologist\", \"canadian\", \"clarity\", \"specialist\", \"record\", \"visa\", \"horse\", \"expert\", \"bar\", \"capture\", \"crew\", \"data\", \"madewell\", \"popular\", \"price\", \"sizing\", \"practitioner\", \"changed\", \"external\", \"effort\", \"gas\", \"oil\", \"bartender\", \"knowledgeable\", \"habit\", \"money\", \"favorite\", \"jean\", \"size\", \"connection\", \"brand\", \"start\", \"app\", \"solution\", \"thank\", \"male\", \"city\", \"people\", \"tool\", \"friend\", \"woman\", \"practice\", \"professional\", \"language\", \"manager\", \"program\", \"new\", \"interested\", \"business\", \"experience\", \"interview\", \"advice\", \"grandparent\", \"provider\", \"self\", \"name\", \"care\", \"assumption\", \"association\", \"racer\", \"hemp\", \"construction\", \"email\", \"great\", \"stage\", \"possible\", \"authentic\", \"capability\", \"credibility\", \"fast\", \"misaligned\", \"trust\", \"fintech\", \"powerful\", \"seeking\", \"weight\", \"benefit\", \"look\", \"mission\", \"empowerment\", \"foster\", \"optimism\", \"service\", \"insurance\", \"early\", \"problem\", \"chat\", \"professional\", \"connection\", \"work\", \"current\", \"company\", \"example\", \"management\", \"food\", \"startup\", \"member\", \"network\", \"student\", \"people\", \"partner\", \"personal\", \"process\"], \"Freq\": [27.0, 12.0, 11.0, 27.0, 13.0, 8.0, 26.0, 8.0, 12.0, 9.0, 12.0, 8.0, 10.0, 13.0, 21.0, 7.0, 15.0, 5.0, 4.0, 27.0, 6.0, 19.0, 8.0, 7.0, 8.0, 11.0, 16.0, 8.0, 6.0, 5.0, 4.016579270611484, 3.236563506735122, 3.236561167660885, 5.576903442540938, 2.4553238872386673, 2.4553238872386673, 2.4553238872386673, 2.4553238872386673, 2.4540179041231505, 4.017845749362113, 1.6740845276393497, 1.6740845276393497, 1.6740845276393497, 1.6740845276393497, 1.6740845276393497, 1.6740845276393497, 1.674084397690781, 1.674084397690781, 1.674084007845075, 1.6740833581022314, 1.6740832281536626, 1.6740829682565253, 1.6740677642739865, 1.6740672444797116, 1.671945444249845, 3.236610808014132, 5.4858765493379815, 2.4554377221848553, 2.4556661717686445, 2.4555115329718817, 9.156212536775088, 6.1371241973043436, 4.800075576952124, 5.580521210693772, 6.361627502958733, 9.346729613563824, 3.237188819247746, 2.4556076949127257, 2.455446818584665, 3.9555538633633147, 3.236514646073288, 3.2369970151603322, 3.236095172093498, 4.797834224039046, 3.1167005063415543, 3.236777661976353, 3.7994796547106726, 3.061372564139825, 3.237362170638402, 3.2369229444761687, 3.1741871541666633, 2.991358354496976, 2.951092750894208, 2.5919894974633357, 2.4557418018356327, 5.484748099537996, 4.716742339402339, 3.9489073431271215, 2.4133172878620814, 2.413305903604719, 2.413305903604719, 2.413247497414771, 2.413247497414771, 4.717067533188742, 4.591527882605789, 3.9492072935602422, 1.645444674040796, 1.645444674040796, 1.645444674040796, 1.645444674040796, 1.645444674040796, 1.645444674040796, 1.645444674040796, 1.6454443028150125, 1.645443807847301, 1.6454435603634454, 1.6454435603634454, 1.6454435603634454, 1.6454435603634454, 1.6454426941699503, 1.6453548374011733, 3.181376504510572, 2.412660218225171, 2.413378416374442, 2.413467015594786, 8.516579761913297, 9.507072518396956, 4.608657230194911, 10.858489790435762, 5.702673968425261, 3.1610234322169695, 4.275369731774629, 3.949564412764032, 2.4133843559869788, 5.485091112162011, 6.364608148003548, 3.1812052456824205, 3.1814693109564636, 10.563896887823473, 4.716681953341546, 3.950262069753279, 3.949841099714716, 5.483744305019236, 3.1814769829559904, 3.903384915223941, 3.8257101272384646, 3.1169307185482262, 3.05703096353142, 3.121186203447162, 3.1812067305855547, 3.180608314622448, 3.1810832361415557, 3.180621183782945, 3.881237858724992, 3.8811163849111425, 3.1265519779300908, 2.3718689893688523, 2.3718689893688523, 2.3718680994508023, 2.3718567529956625, 2.3715056803248666, 2.36790952148409, 3.881469682377064, 1.6171834422932199, 1.6171834422932199, 1.6171834422932199, 1.6171834422932199, 1.6171834422932199, 1.6171834422932199, 1.6171833310534636, 1.6171833310534636, 1.6171829973341947, 1.617183108573951, 1.6171829973341947, 1.6171829973341947, 1.6171819961763882, 1.6170922256930764, 1.5520744792276768, 3.971197449622535, 0.8624978395977091, 0.8624978395977091, 0.8624978395977091, 0.8624978395977091, 1.7280595558024308, 15.192629337421353, 5.143835344999957, 1.6171863345268829, 3.126707268629847, 8.411817771534373, 4.638117609993099, 3.043891272400609, 8.95225788406412, 3.881495934959544, 3.125967524250635, 6.1722348778452245, 4.674631392550999, 3.126741085515754, 3.881485700901967, 3.126850990394951, 3.8819871697232435, 2.3729273244100284, 3.2452579241640516, 3.211188301530971, 2.372117498984365, 3.126905275396012, 3.0911812951076474, 3.1259746435950366, 2.589662193461537, 2.372427635424853, 2.3723221801359062, 2.3722761268768098, 2.3264869180869203, 2.32648651959537, 2.3264863203495945, 7.4760617213158245, 1.5862411439101922, 1.5862411439101922, 1.586240446549979, 1.5862402473042039, 1.586239749189766, 1.5862395499439907, 1.5858488289788524, 1.5858266130749192, 3.8074293596296207, 2.3266331644859064, 4.547661684716523, 7.508403295543911, 0.8459952701105767, 0.8459952701105767, 0.8459952701105767, 0.8459952701105767, 0.8459952701105767, 0.8459952701105767, 0.8459952701105767, 0.8459952701105767, 0.8459952701105767, 0.8459952701105767, 0.8459952701105767, 0.8459952701105767, 0.8459952701105767, 0.8459952701105767, 4.547336117119871, 3.067252225622444, 2.326894375697174, 1.5863047033124764, 2.326598495721024, 6.030980331374084, 3.8074895318537267, 3.066451257606199, 2.3265225830806786, 3.067048197948654, 5.288053804915125, 3.0675451169119663, 2.3265439023786234, 2.1030726303715883, 6.571906709051532, 3.067402656182709, 6.0285901790549605, 4.727217195414956, 4.547229719875921, 2.9729063623855536, 3.8072205500572265, 3.067501681332976, 4.696402242316339, 3.0672289138667472, 3.0311863493645936, 3.0645889073455415, 2.3269744724987986, 2.326937213538839, 2.326786384487024, 2.326752313459467, 2.326719039415011, 4.425028152335622, 2.2639684948367647, 1.54361492292766, 1.54361492292766, 1.54361492292766, 1.54361492292766, 1.54361492292766, 1.5436147465562504, 1.543612100985106, 1.543612100985106, 1.5431676450328826, 1.5304177558319916, 0.8232612628328504, 0.8232612628328504, 0.8232612628328504, 0.8232612628328504, 0.8232612628328504, 0.8232612628328504, 0.8232612628328504, 0.8232612628328504, 0.8232612628328504, 0.8232612628328504, 0.8232612628328504, 0.8232612628328504, 0.8232612628328504, 0.8232612628328504, 0.8232612628328504, 0.8232612628328504, 0.8232612628328504, 0.8232612628328504, 4.329963257068822, 2.9847203133887747, 2.264245574321266, 2.264635531507919, 2.2641360476758967, 1.5436870588341916, 2.9007458287298213, 1.5438817728704037, 1.543779653824238, 1.543700463061322, 1.5437981728222474, 1.5436383803251383, 1.5434784114566198, 1.5438613137868886, 1.5437057542036103, 1.5437057542036103, 1.5436350292683558, 1.5435939347299161, 2.8103244399353002, 2.9856466160320596, 2.9848501227462494, 3.705271244905236, 4.882635767831221, 2.9823269533603325, 2.264280672231779, 2.096153744161573, 2.9842483434966516, 2.2643877296774138, 3.3500133693790723, 2.2635579021951866, 3.1234406599628715, 2.2641817278709864, 1.9472669967945901, 1.5442295772901597, 1.5441539139554359, 1.5439881248304002, 1.5438581391015156, 10.016952859011035, 2.242929138950961, 2.242929138950961, 2.242928470103746, 2.242928470103746, 2.242927968468335, 3.6676671781581023, 1.5292698294638818, 1.5292698294638818, 1.5292698294638818, 1.5292698294638818, 1.5292698294638818, 1.529269662252078, 1.5292663180160035, 5.0980976087997405, 2.9568200367862114, 0.8156106035827043, 0.8156106035827043, 0.8156106035827043, 0.8156106035827043, 0.8156106035827043, 0.8156106035827043, 0.8156106035827043, 0.8156106035827043, 0.8156106035827043, 0.8156106035827043, 0.8156106035827043, 0.8156106035827043, 0.8156106035827043, 0.8156106035827043, 1.5294089496845882, 2.2430179284187437, 1.5293034390364324, 1.529345074775562, 1.529345074775562, 1.529326848688955, 10.858994716015784, 3.670991348816323, 2.2433480045193135, 3.669078111358005, 2.2432011925556354, 3.637886087065963, 2.243231792315719, 2.2437583422856755, 6.730793456862274, 2.243552838978887, 2.957813274900388, 3.6716321044482294, 2.395271131401988, 2.957594561861105, 2.2434469939071238, 2.551735563662259, 2.2904900283458804, 2.243366397817724, 2.358138907782316, 2.2431966778369348, 2.5188140676842017, 2.3314801627012907, 2.2518634328362426, 7.479521475233086, 6.809350310793062, 2.1095257935438045, 3.451906117912317, 8.823975851519354, 1.4383969811049102, 1.4383969811049102, 1.4383969811049102, 1.438396442949249, 2.781095355469114, 2.7810800180327733, 8.153111004432212, 4.7952946769118405, 2.7815121570286108, 0.7671450655585464, 0.7671450655585464, 0.7671450655585464, 0.7671450655585464, 0.7671450655585464, 0.7671450655585464, 0.7671450655585464, 0.7671450655585464, 0.7671450655585464, 0.7671450655585464, 0.7671450655585464, 0.7671450655585464, 0.7671450655585464, 0.7671450655585464, 0.7671450655585464, 0.7671450655585464, 6.809422423651644, 3.4526974758119175, 4.7955777467895615, 1.4386935048741585, 3.4531220806285, 4.733429378426806, 7.7676946830886635, 2.7812697179033, 2.7815054300828472, 6.137549611011324, 1.43848254785502, 2.781636201908487, 1.4395495759920023, 2.1096375953823916, 1.438525869385736, 1.438815666209221, 2.1099380207801826, 1.5470507436134178, 1.4388078629521355, 1.4385905826039793, 1.4385449739117038], \"Total\": [27.0, 12.0, 11.0, 27.0, 13.0, 8.0, 26.0, 8.0, 12.0, 9.0, 12.0, 8.0, 10.0, 13.0, 21.0, 7.0, 15.0, 5.0, 4.0, 27.0, 6.0, 19.0, 8.0, 7.0, 8.0, 11.0, 16.0, 8.0, 6.0, 5.0, 4.641792656654158, 3.8605739123757803, 3.86057407777967, 6.875083188441709, 3.0793342593335504, 3.079334274694717, 3.079334360143002, 3.079334555759784, 3.0792992531939345, 5.396492155154699, 2.2980948782602812, 2.2980949228292697, 2.298095016686237, 2.298095029139098, 2.298095029139098, 2.298095029139098, 2.298095066211898, 2.29809508852555, 2.2980948239556653, 2.298094965097195, 2.2980948224123843, 2.2980947088255754, 2.298094385692552, 2.2980943762734802, 2.2980369734630277, 4.628412378102658, 8.43078767715733, 3.7930035475683255, 3.819604601601187, 3.8340263828663996, 27.255305542026093, 15.914082129983935, 11.973452510734004, 16.417064830018596, 22.40094852858184, 44.96057148820632, 7.4806794635054885, 4.567567401751262, 4.587459526238108, 11.850214566601231, 8.169319517639275, 8.216613280516492, 8.938172941665064, 21.761821052031625, 8.849950220646024, 10.054170784686681, 19.637063612587, 11.315232681144977, 15.093446399589743, 15.977784827806058, 16.516490701965978, 11.424002539369132, 15.824216108398753, 13.700855662230547, 10.500623100039528, 6.110737256007399, 5.34285636973307, 4.574989485017119, 3.0392387444450617, 3.0392383743051616, 3.0392383743051616, 3.039239428748048, 3.039239428748048, 6.014137933688078, 6.126056160768041, 5.315238509458474, 2.271364353020712, 2.2713643898558296, 2.2713643898558296, 2.271364539181524, 2.271364539181524, 2.271364539181524, 2.271364539181524, 2.271364216969639, 2.2713643997278954, 2.2713642358499415, 2.271364267202155, 2.2713643090051057, 2.271364340357319, 2.2713643358637037, 2.2713581189729455, 4.588349693692199, 3.7104437054804107, 3.759596722399372, 3.779490183409434, 15.977784827806058, 19.637063612587, 8.198829564459764, 26.38851672597895, 11.424002539369132, 5.316138453878003, 8.29264150335002, 7.4827081729667615, 3.8204680530004524, 11.8810336772953, 15.824216108398753, 6.051591951085917, 6.109844267046715, 44.96057148820632, 12.692522729879366, 9.73910186816591, 11.299097974301665, 27.786383849991076, 7.456349896194304, 15.093446399589743, 14.949897568444943, 8.998870711535444, 9.0897526110927, 10.459595179085106, 12.650285583912538, 12.781227121216205, 16.516490701965978, 27.255305542026093, 4.509044232134553, 4.509048642166689, 3.75435880458585, 2.9996728154857455, 2.999672924134175, 2.9996728431273416, 2.9996727949901354, 2.9996810908865, 2.99974263509782, 5.222727563557848, 2.2449872226718237, 2.2449872600014307, 2.2449872600014307, 2.244987280862974, 2.2449873347230485, 2.2449873938698235, 2.244987281286527, 2.244987447659313, 2.2449871470798586, 2.244987368538854, 2.2449873431365615, 2.2449874162001704, 2.244987007036611, 2.2449855659774363, 2.2379113162475393, 6.051777057113066, 1.4903016115676306, 1.4903016115676306, 1.4903016199763128, 1.4903016199763128, 3.0223253102698675, 27.786383849991076, 9.736098772497023, 2.91623980874967, 5.935298250137594, 21.761821052031625, 11.833025639593757, 6.699538615016535, 44.96057148820632, 10.459595179085106, 7.523819223742659, 27.255305542026093, 16.516490701965978, 8.212271825408905, 12.692522729879366, 8.868289367039809, 15.093446399589743, 5.289002658398242, 11.315232681144977, 11.161229245383526, 5.880890707788068, 14.949897568444943, 19.637063612587, 22.40094852858184, 15.914082129983935, 9.0897526110927, 11.299097974301665, 6.036999154358352, 2.9563537266559297, 2.956353965277294, 2.956353872877574, 9.61219864108939, 2.216107706326219, 2.2161080640318485, 2.216107752506887, 2.216107564714408, 2.216107801943799, 2.216107871682974, 2.216071745787406, 2.216092966244214, 5.911882019283806, 3.711036762455502, 7.323388077633741, 12.650285583912538, 1.4758618406483885, 1.4758618406483885, 1.4758618406483885, 1.4758618406483885, 1.4758618406483885, 1.4758618510991262, 1.4758618510991262, 1.4758618510991262, 1.4758618510991262, 1.4758618510991262, 1.4758618621223394, 1.4758618621223394, 1.4758618621223394, 1.4758618621223394, 8.851588969042238, 5.966050572610586, 4.444580659517366, 2.929769446170509, 4.490701699723734, 13.390498964517212, 8.289930270227552, 7.466069280862632, 5.273338631314511, 7.522940924094549, 16.417064830018596, 8.187941770308104, 5.903757251819194, 5.238206135573126, 26.38851672597895, 9.016726670187587, 27.255305542026093, 22.40094852858184, 27.786383849991076, 11.299097974301665, 21.761821052031625, 12.606187540445282, 44.96057148820632, 16.516490701965978, 15.977784827806058, 27.65556947921486, 15.914082129983935, 7.536129625448672, 8.631036555281845, 8.868289367039809, 14.949897568444943, 5.05773819195025, 2.8966767552381625, 2.176323250093779, 2.176323260544517, 2.1763232759056836, 2.1763232759056836, 2.1763234009304635, 2.176323226347641, 2.1763233162765974, 2.1763233162765974, 2.1763355875888153, 2.17668291348251, 1.4559695495920786, 1.4559695495920786, 1.4559695495920786, 1.4559695495920786, 1.4559695899989695, 1.4559695899989695, 1.4559695899989695, 1.4559695899989695, 1.4559695899989695, 1.4559695899989695, 1.4559695899989695, 1.4559695899989695, 1.4559695899989695, 1.4559695899989695, 1.4559695899989695, 1.4559696004497071, 1.4559696004497071, 1.4559696004497071, 8.594611760815052, 5.9208019227836735, 4.418142863858014, 4.419196936288869, 4.459141990562344, 2.916567294998952, 5.926341014285707, 2.9309963030064026, 2.9310012441133924, 2.931004887760865, 2.9441855277171367, 2.944195733732966, 2.944206677734685, 2.9575422543918526, 2.9575551387637518, 2.9575551387637518, 2.9575615851347195, 2.957563274885342, 5.931833303715293, 6.454272782997847, 8.169319517639275, 12.606187540445282, 22.40094852858184, 11.833025639593757, 6.699538615016535, 5.901211354149025, 15.093446399589743, 8.187941770308104, 27.65556947921486, 11.8810336772953, 44.96057148820632, 11.973452510734004, 8.29264150335002, 5.935298250137594, 3.671143507589993, 4.315929687586433, 9.016726670187587, 11.397510274281041, 2.876593811836463, 2.87659382719763, 2.876594023023566, 2.876594023023566, 2.876593919473532, 5.085177050283106, 2.1629344913261708, 2.162934517710551, 2.1629345445894197, 2.1629346015583018, 2.1629346616025105, 2.162934510883547, 2.162934810511023, 7.219430242509296, 4.330490737761974, 1.4492751986802717, 1.4492751986802717, 1.4492751986802717, 1.4492751986802717, 1.4492751986802717, 1.4492751986802717, 1.4492751986802717, 1.4492752764682062, 1.4492752848768884, 1.4492752848768884, 1.449275291829373, 1.449275291829373, 1.449275291829373, 1.4492753187082423, 2.8341950478729965, 4.337197589880684, 2.917618430341255, 2.9441670347447517, 2.9441670347447517, 2.944168880409384, 27.65556947921486, 8.938172941665064, 5.070411090293355, 9.632172724658304, 5.179300000381673, 11.161229245383526, 5.880890707788068, 5.9205744694852465, 44.96057148820632, 6.557544268041509, 11.8810336772953, 21.761821052031625, 8.216613280516492, 13.700855662230547, 8.212271825408905, 15.824216108398753, 11.315232681144977, 10.459595179085106, 14.949897568444943, 11.973452510734004, 27.255305542026093, 22.40094852858184, 27.786383849991076, 8.119410574435214, 8.215876333575181, 2.749384707352517, 4.859800730835172, 12.446121440093464, 2.078119818449291, 2.078119848045027, 2.078119854997512, 2.078119726670751, 4.160849693400185, 4.16085147563277, 13.384735928504004, 8.518271816049833, 4.9829452918249615, 1.4068679110247126, 1.4068679110247126, 1.4068679110247126, 1.4068679110247126, 1.4068679110247126, 1.4068679110247126, 1.4068679324986637, 1.4068679324986637, 1.4068679324986637, 1.4068679324986637, 1.4068679324986637, 1.4068679324986637, 1.4068679394511483, 1.4068679394511483, 1.4068679394511483, 1.4068679394511483, 12.781227121216205, 7.040632294432713, 10.054170784686681, 2.7917605756430044, 8.629657422107023, 13.700855662230547, 27.65556947921486, 7.98723484960642, 8.631036555281845, 26.38851672597895, 3.5054329701669413, 11.850214566601231, 4.2666987682421205, 12.692522729879366, 5.788326095045781, 5.935835301439295, 16.417064830018596, 44.96057148820632, 6.454272782997847, 6.56949888017617, 8.849950220646024], \"Category\": [\"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic3\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic4\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic5\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic6\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\", \"Topic7\"], \"logprob\": [30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, -4.9342, -5.1501, -5.1501, -4.6059, -5.4263, -5.4263, -5.4263, -5.4263, -5.4269, -4.9338, -5.8093, -5.8093, -5.8093, -5.8093, -5.8093, -5.8093, -5.8093, -5.8093, -5.8093, -5.8093, -5.8093, -5.8093, -5.8093, -5.8093, -5.8106, -5.1501, -4.6224, -5.4263, -5.4262, -5.4262, -4.1102, -4.5102, -4.756, -4.6053, -4.4743, -4.0896, -5.1499, -5.4262, -5.4263, -4.9495, -5.1501, -5.1499, -5.1502, -4.7564, -5.1878, -5.15, -4.9897, -5.2057, -5.1498, -5.15, -5.1695, -5.2289, -5.2424, -5.3722, -5.4262, -4.5737, -4.7245, -4.9022, -5.3946, -5.3946, -5.3946, -5.3947, -5.3947, -4.7245, -4.7514, -4.9021, -5.7776, -5.7776, -5.7776, -5.7776, -5.7776, -5.7776, -5.7776, -5.7776, -5.7776, -5.7776, -5.7776, -5.7776, -5.7776, -5.7776, -5.7777, -5.1183, -5.3949, -5.3946, -5.3946, -4.1336, -4.0236, -4.7477, -3.8907, -4.5347, -5.1247, -4.8228, -4.902, -5.3946, -4.5736, -4.4249, -5.1184, -5.1183, -3.9182, -4.7245, -4.9019, -4.902, -4.5739, -5.1183, -4.9138, -4.9339, -5.1388, -5.1582, -5.1374, -5.1184, -5.1186, -5.1184, -5.1186, -4.813, -4.813, -5.0292, -5.3055, -5.3055, -5.3055, -5.3055, -5.3056, -5.3071, -4.8129, -5.6884, -5.6884, -5.6884, -5.6884, -5.6884, -5.6884, -5.6884, -5.6884, -5.6884, -5.6884, -5.6884, -5.6884, -5.6884, -5.6885, -5.7295, -4.7901, -6.3171, -6.3171, -6.3171, -6.3171, -5.6221, -3.4483, -4.5313, -5.6884, -5.0292, -4.0395, -4.6348, -5.056, -3.9772, -4.8129, -5.0294, -4.3491, -4.627, -5.0291, -4.8129, -5.0291, -4.8128, -5.305, -4.9919, -5.0025, -5.3053, -5.0291, -5.0406, -5.0294, -5.2176, -5.3052, -5.3053, -5.3053, -5.2145, -5.2145, -5.2145, -4.0471, -5.5975, -5.5975, -5.5975, -5.5975, -5.5975, -5.5975, -5.5977, -5.5977, -4.7219, -5.2144, -4.5442, -4.0428, -6.2261, -6.2261, -6.2261, -6.2261, -6.2261, -6.2261, -6.2261, -6.2261, -6.2261, -6.2261, -6.2261, -6.2261, -6.2261, -6.2261, -4.5443, -4.9381, -5.2143, -5.5974, -5.2144, -4.2619, -4.7219, -4.9383, -5.2145, -4.9381, -4.3934, -4.938, -5.2145, -5.3154, -4.176, -4.938, -4.2623, -4.5055, -4.5443, -4.9693, -4.7219, -4.938, -4.512, -4.9381, -4.9499, -4.9389, -5.2143, -5.2143, -5.2143, -5.2144, -5.2144, -4.4496, -5.1198, -5.5028, -5.5028, -5.5028, -5.5028, -5.5028, -5.5028, -5.5028, -5.5028, -5.5031, -5.5113, -6.1314, -6.1314, -6.1314, -6.1314, -6.1314, -6.1314, -6.1314, -6.1314, -6.1314, -6.1314, -6.1314, -6.1314, -6.1314, -6.1314, -6.1314, -6.1314, -6.1314, -6.1314, -4.4713, -4.8434, -5.1196, -5.1195, -5.1197, -5.5027, -4.8719, -5.5026, -5.5027, -5.5027, -5.5026, -5.5027, -5.5029, -5.5026, -5.5027, -5.5027, -5.5027, -5.5028, -4.9036, -4.8431, -4.8433, -4.6271, -4.3512, -4.8442, -5.1196, -5.1968, -4.8435, -5.1196, -4.7279, -5.12, -4.798, -5.1197, -5.2705, -5.5024, -5.5024, -5.5025, -5.5026, -3.5793, -5.0758, -5.0758, -5.0758, -5.0758, -5.0758, -4.584, -5.4588, -5.4588, -5.4588, -5.4588, -5.4588, -5.4588, -5.4588, -4.2547, -4.7994, -6.0874, -6.0874, -6.0874, -6.0874, -6.0874, -6.0874, -6.0874, -6.0874, -6.0874, -6.0874, -6.0874, -6.0874, -6.0874, -6.0874, -5.4587, -5.0757, -5.4587, -5.4587, -5.4587, -5.4587, -3.4986, -4.5831, -5.0756, -4.5836, -5.0757, -4.5922, -5.0756, -5.0754, -3.9769, -5.0755, -4.7991, -4.5829, -5.0101, -4.7992, -5.0755, -4.9468, -5.0548, -5.0756, -5.0257, -5.0757, -4.9598, -5.0371, -5.0718, -3.654, -3.7479, -4.9197, -4.4272, -3.4887, -5.3026, -5.3026, -5.3026, -5.3026, -4.6433, -4.6433, -3.5678, -4.0985, -4.6432, -5.9312, -5.9312, -5.9312, -5.9312, -5.9312, -5.9312, -5.9312, -5.9312, -5.9312, -5.9312, -5.9312, -5.9312, -5.9312, -5.9312, -5.9312, -5.9312, -3.7478, -4.427, -4.0985, -5.3024, -4.4269, -4.1115, -3.6162, -4.6432, -4.6432, -3.8517, -5.3026, -4.6431, -5.3018, -4.9196, -5.3025, -5.3023, -4.9195, -5.2298, -5.3023, -5.3025, -5.3025], \"loglift\": [30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 1.5444, 1.5128, 1.5128, 1.4798, 1.4626, 1.4626, 1.4626, 1.4626, 1.4621, 1.3941, 1.3723, 1.3723, 1.3723, 1.3723, 1.3723, 1.3723, 1.3723, 1.3723, 1.3723, 1.3723, 1.3723, 1.3723, 1.3723, 1.3723, 1.371, 1.3314, 1.2594, 1.2542, 1.2473, 1.2435, 0.5983, 0.7362, 0.775, 0.6101, 0.4303, 0.1183, 0.8515, 1.0685, 1.0641, 0.5919, 0.7632, 0.7576, 0.6731, 0.1771, 0.6455, 0.5557, 0.0465, 0.3818, 0.1496, 0.0925, 0.0398, 0.3491, 0.0097, 0.0241, 0.2361, 1.63, 1.6134, 1.5909, 1.5074, 1.5074, 1.5074, 1.5074, 1.5074, 1.4951, 1.4497, 1.441, 1.4157, 1.4157, 1.4157, 1.4157, 1.4157, 1.4157, 1.4157, 1.4157, 1.4157, 1.4157, 1.4157, 1.4157, 1.4157, 1.4157, 1.4156, 1.3718, 1.3076, 1.2947, 1.2895, 1.1088, 1.0126, 1.162, 0.8501, 1.0433, 1.2182, 1.0755, 1.099, 1.2787, 0.9651, 0.8272, 1.095, 1.0855, 0.2897, 0.7481, 0.8357, 0.687, 0.1153, 0.8863, 0.3856, 0.3751, 0.6778, 0.6483, 0.5287, 0.3576, 0.3471, 0.0909, -0.4101, 1.6946, 1.6946, 1.6616, 1.6097, 1.6097, 1.6097, 1.6097, 1.6096, 1.608, 1.5477, 1.5165, 1.5165, 1.5165, 1.5165, 1.5165, 1.5165, 1.5165, 1.5165, 1.5165, 1.5165, 1.5165, 1.5165, 1.5165, 1.5165, 1.4786, 1.4233, 1.2976, 1.2976, 1.2976, 1.2976, 1.2855, 1.2408, 1.2065, 1.2549, 1.2036, 0.894, 0.908, 1.0556, 0.2307, 0.8532, 0.9662, 0.3594, 0.5823, 0.8789, 0.6597, 0.8021, 0.4866, 1.043, 0.5956, 0.5987, 0.9366, 0.2799, -0.0043, -0.1248, 0.0289, 0.5013, 0.2837, 0.9105, 1.7152, 1.7152, 1.7152, 1.7035, 1.6205, 1.6205, 1.6205, 1.6205, 1.6205, 1.6205, 1.6202, 1.6202, 1.5148, 1.4879, 1.4784, 1.4332, 1.3984, 1.3984, 1.3984, 1.3984, 1.3984, 1.3984, 1.3984, 1.3984, 1.3984, 1.3984, 1.3984, 1.3984, 1.3984, 1.3984, 1.2888, 1.2895, 1.3077, 1.3413, 1.2972, 1.1572, 1.1768, 1.065, 1.1365, 1.0576, 0.822, 0.9731, 1.0236, 1.0423, 0.5647, 0.8766, 0.4461, 0.3991, 0.1448, 0.6197, 0.2116, 0.5415, -0.3042, 0.2713, 0.2926, -0.2451, 0.0322, 0.7797, 0.644, 0.6168, 0.0946, 1.9431, 1.8303, 1.7333, 1.7333, 1.7333, 1.7333, 1.7333, 1.7333, 1.7333, 1.7333, 1.733, 1.7245, 1.5066, 1.5066, 1.5066, 1.5066, 1.5066, 1.5066, 1.5066, 1.5066, 1.5066, 1.5066, 1.5066, 1.5066, 1.5066, 1.5066, 1.5066, 1.5066, 1.5066, 1.5066, 1.3912, 1.3918, 1.4083, 1.4082, 1.399, 1.4406, 1.3623, 1.4357, 1.4357, 1.4356, 1.4312, 1.4311, 1.431, 1.4267, 1.4266, 1.4266, 1.4266, 1.4265, 1.3298, 1.3059, 1.0699, 0.8524, 0.5534, 0.6986, 0.992, 1.0417, 0.4559, 0.7914, -0.0341, 0.4188, -0.5901, 0.4113, 0.6278, 0.7304, 1.2108, 1.0488, 0.312, 2.001, 1.8813, 1.8813, 1.8813, 1.8813, 1.8813, 1.8033, 1.7834, 1.7834, 1.7834, 1.7834, 1.7834, 1.7834, 1.7834, 1.7822, 1.7485, 1.5552, 1.5552, 1.5552, 1.5552, 1.5552, 1.5552, 1.5552, 1.5552, 1.5552, 1.5552, 1.5552, 1.5552, 1.5552, 1.5552, 1.5132, 1.4707, 1.4842, 1.4751, 1.4751, 1.4751, 1.1953, 1.2402, 1.3147, 1.1649, 1.2934, 1.0091, 1.1663, 1.1598, 0.231, 1.0576, 0.7396, 0.3506, 0.8975, 0.597, 0.8325, 0.3053, 0.5327, 0.5906, 0.2833, 0.4553, -0.2513, -0.1325, -0.3827, 2.2654, 2.1598, 2.0826, 2.0055, 2.0036, 1.9796, 1.9796, 1.9796, 1.9796, 1.9446, 1.9446, 1.8518, 1.7729, 1.7645, 1.7411, 1.7411, 1.7411, 1.7411, 1.7411, 1.7411, 1.7411, 1.7411, 1.7411, 1.7411, 1.7411, 1.7411, 1.7411, 1.7411, 1.7411, 1.7411, 1.7179, 1.635, 1.6072, 1.6846, 1.4316, 1.2847, 1.0777, 1.2926, 1.2152, 0.889, 1.4568, 0.8982, 1.261, 0.553, 0.9553, 0.9303, 0.2959, -1.0219, 0.8466, 0.8288, 0.5307]}, \"token.table\": {\"Topic\": [4, 6, 4, 1, 5, 2, 2, 3, 4, 6, 1, 1, 2, 4, 5, 6, 3, 1, 3, 5, 6, 5, 3, 1, 3, 4, 6, 7, 2, 7, 7, 7, 4, 6, 6, 5, 4, 7, 5, 1, 2, 5, 3, 5, 5, 1, 3, 6, 3, 2, 5, 5, 3, 1, 2, 4, 5, 6, 2, 3, 6, 4, 6, 7, 6, 3, 5, 7, 1, 3, 4, 5, 6, 7, 1, 6, 1, 4, 6, 7, 2, 1, 2, 3, 4, 6, 4, 6, 3, 3, 4, 5, 2, 1, 2, 3, 6, 7, 2, 3, 4, 5, 7, 1, 1, 2, 4, 5, 4, 2, 3, 4, 5, 6, 7, 5, 4, 7, 1, 3, 1, 2, 4, 2, 2, 7, 6, 2, 1, 4, 6, 7, 1, 6, 4, 1, 2, 3, 4, 3, 5, 6, 7, 5, 4, 2, 3, 3, 1, 5, 2, 5, 4, 4, 5, 5, 1, 1, 3, 4, 7, 3, 1, 6, 4, 7, 2, 7, 7, 4, 3, 2, 1, 4, 6, 7, 1, 7, 1, 2, 3, 4, 6, 2, 5, 6, 6, 3, 3, 1, 3, 5, 7, 1, 6, 5, 1, 2, 4, 5, 6, 7, 2, 4, 5, 3, 5, 6, 7, 7, 3, 4, 5, 7, 2, 2, 3, 5, 6, 5, 1, 6, 1, 2, 3, 4, 1, 5, 1, 2, 4, 6, 1, 3, 1, 3, 4, 5, 7, 2, 7, 1, 2, 3, 7, 2, 5, 3, 5, 3, 4, 5, 6, 5, 1, 2, 3, 4, 5, 7, 7, 6, 4, 1, 6, 1, 2, 5, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 6, 1, 2, 3, 4, 5, 6, 5, 2, 4, 5, 7, 1, 2, 3, 4, 5, 6, 1, 1, 2, 3, 4, 5, 6, 5, 1, 2, 3, 4, 5, 6, 1, 2, 3, 5, 1, 6, 3, 4, 5, 1, 2, 6, 7, 3, 1, 2, 3, 6, 1, 4, 6, 7, 1, 4, 5, 1, 5, 4, 1, 5, 6, 7, 1, 1, 2, 4, 6, 3, 4, 6, 3, 4, 5, 1, 2, 3, 4, 7, 1, 2, 3, 4, 5, 6, 2, 5, 1, 2, 4, 2, 1, 6, 3, 4, 5, 7, 7, 7, 4, 3, 6, 3, 3, 1, 2, 3, 6, 1, 3, 5, 2, 7, 3, 5, 1, 2, 3, 7, 2, 3, 4, 6, 1, 5, 6, 6, 3, 6, 1, 6, 2, 6, 1, 1, 2, 4, 6, 5, 3, 1, 4, 5, 7, 7, 3, 5, 5, 1, 4, 2, 1, 2, 4, 5, 1, 5, 6, 7, 1, 3, 6, 4, 1, 2, 3, 4, 5, 6, 7, 1, 2, 4, 5, 1, 5, 6, 7, 3, 4, 2, 4, 1, 1, 2, 5, 6, 6, 5, 1, 7, 2, 7, 7, 1, 4, 6, 6, 2, 5, 4, 6, 5, 3, 7, 6, 7, 1, 2, 3, 5, 6, 7, 2, 4, 5, 1, 2, 3, 6, 7, 1, 2, 3, 6, 3, 1, 3, 4, 5, 3, 2, 7, 6, 3, 3, 7, 3, 2, 1, 6, 1, 3, 4, 1, 2, 4, 2, 3, 5, 3, 2, 3, 4, 5, 2, 4, 1, 2, 3, 7, 5, 4, 7, 7, 1, 3, 5, 1, 2, 4, 7, 1, 6, 6, 1, 2, 3, 5, 1, 2, 3, 5, 6, 7, 1, 2, 4, 5, 1, 4, 6, 1, 2, 4, 6, 7, 2, 6, 1, 4, 7, 1, 2, 3, 6, 7, 1, 2, 3, 7, 4, 5, 2, 5, 1, 2, 4, 5, 4, 1, 2, 4, 6, 7, 4, 4, 1, 5, 1, 1, 3, 2, 2, 3, 5, 6, 5, 1, 5, 5, 1, 2, 3, 4, 6, 7, 1, 2, 4, 7, 1, 2, 3, 1, 5, 4, 2, 1, 2, 3, 4, 6, 3, 1, 2, 3, 4, 5, 1, 2, 4, 5, 6, 7, 2, 2, 7, 1, 3, 4, 1, 1, 2, 4, 6, 6, 2, 5, 3, 5, 1, 2, 3, 4, 5, 6, 6, 4, 7, 1, 2, 2, 5, 6, 7, 1, 2, 3, 4, 6, 1, 2, 4, 7, 1, 2, 3, 5, 1, 2, 3, 4, 5], \"Freq\": [0.9024831726352636, 0.6952667393535024, 0.9024978563089652, 0.3381171112901032, 0.6762342225802064, 0.8805280248876959, 0.17994424992446817, 0.5398327497734045, 0.17994424992446817, 0.07197769996978727, 0.6494910333170238, 0.16945673353944862, 0.16945673353944862, 0.16945673353944862, 0.33891346707889725, 0.16945673353944862, 0.890873622788936, 0.20763747257978588, 0.20763747257978588, 0.10381873628989294, 0.41527494515957175, 0.9189811402033553, 0.8908737528240779, 0.40103308992659115, 0.13367769664219703, 0.26735539328439406, 0.13367769664219703, 0.13367769664219703, 0.8805280845903619, 0.48120420048956325, 0.4812042073426775, 0.7107987837121365, 0.2309207109669992, 0.6927621329009976, 0.6900000207630065, 0.6868275318859564, 0.9024831442348267, 0.710798772862747, 0.6868275318859564, 0.8702860349455532, 0.8805280440765265, 0.6868275269560083, 0.6667393822669738, 0.6868275269560083, 0.6868275269560083, 0.3356390639988153, 0.22375937599921022, 0.44751875199842045, 0.666739358117612, 0.5319719500988397, 0.26598597504941984, 0.9189811022296709, 0.8908736524405602, 0.41759049827253936, 0.08351809965450786, 0.16703619930901573, 0.16703619930901573, 0.16703619930901573, 0.880527966999384, 0.8908736441621172, 0.9246696946318025, 0.6826475723590445, 0.34132378617952225, 0.7107987837121365, 0.6900000779083315, 0.1606926310036857, 0.08034631550184285, 0.7231168395165856, 0.16901847937419517, 0.4225461984354879, 0.08450923968709759, 0.25352771906129273, 0.08450923968709759, 0.08450923968709759, 0.8702860302296686, 0.6900000368701154, 0.23175891025250903, 0.23175891025250903, 0.11587945512625451, 0.3476383653787635, 0.6580593753430801, 0.16890252882621762, 0.16890252882621762, 0.16890252882621762, 0.16890252882621762, 0.33780505765243524, 0.9024832115743714, 0.9246696831408975, 0.8936904628345766, 0.16915087221600963, 0.6766034888640385, 0.16915087221600963, 0.8805304558949748, 0.20369644254938774, 0.5092411063734693, 0.1527723319120408, 0.10184822127469387, 0.050924110637346934, 0.4168479840767529, 0.03789527127970481, 0.26526689895793365, 0.07579054255940962, 0.22737162767822885, 0.87028615153206, 0.16761507262289707, 0.16761507262289707, 0.5028452178686912, 0.16761507262289707, 0.6765090327206189, 0.07231816367054539, 0.036159081835272694, 0.10847724550581808, 0.10847724550581808, 0.3977499001879996, 0.28927265468218155, 0.6868275318859564, 0.2403355260792453, 0.721006578237736, 0.7412222393725195, 0.18530555984312988, 0.32734058555091944, 0.4910108783263792, 0.16367029277545972, 0.6580596036522621, 0.8805280391673747, 0.7107987837121365, 0.6900000779083315, 0.6580595235091288, 0.23172187803747407, 0.23172187803747407, 0.23172187803747407, 0.3475828170562111, 0.7770865337878774, 0.6900000779083315, 0.6775701982786351, 0.13292673837131835, 0.2658534767426367, 0.2658534767426367, 0.39878021511395506, 0.23169978947437928, 0.46339957894875855, 0.23169978947437928, 0.23169978947437928, 0.918981130176236, 0.6775701982786351, 0.5643193882227743, 0.37621292548184954, 0.8908736194502194, 0.3381171112901032, 0.6762342225802064, 0.8805280248876959, 0.9189811022296709, 0.6775701982786351, 0.34286882449607914, 0.6857376489921583, 0.9189759205361434, 0.8702862774692217, 0.29838363244925614, 0.09946121081641872, 0.09946121081641872, 0.4973060540820936, 0.8908743246771064, 0.6494910365569904, 0.6900000335600371, 0.24033542313545883, 0.7210062694063765, 0.831374347434334, 0.16627486948686682, 0.7107987693501091, 0.902489214335424, 0.6710051083591348, 0.8805280210606434, 0.8617360351643178, 0.6775701884199149, 0.5705429306510895, 0.28527146532554476, 0.8727167127355074, 0.14545278545591792, 0.33021093768780263, 0.11007031256260087, 0.22014062512520174, 0.22014062512520174, 0.11007031256260087, 0.13851508587364986, 0.13851508587364986, 0.6925754293682493, 0.6900000368701154, 0.6667375428929138, 0.6667393868225475, 0.1492640101750547, 0.44779203052516414, 0.2985280203501094, 0.7107987837121365, 0.33965464194075395, 0.6793092838815079, 0.6868275318859564, 0.22225010938719747, 0.3333751640807962, 0.22225010938719747, 0.11112505469359873, 0.22225010938719747, 0.710798772862747, 0.22499310432327474, 0.4499862086465495, 0.22499310432327474, 0.234373236621061, 0.234373236621061, 0.234373236621061, 0.234373236621061, 0.7107987693501091, 0.13654881994497686, 0.6827440997248844, 0.13654881994497686, 0.13654881994497686, 0.6580596036522621, 0.4208388037444098, 0.08416776074888196, 0.16833552149776393, 0.25250328224664587, 0.6868275318859564, 0.8702860544822666, 0.6900000335600371, 0.18907156312582513, 0.18907156312582513, 0.37814312625165025, 0.18907156312582513, 0.3381161811453612, 0.6762323622907224, 0.18776069601207265, 0.5632820880362179, 0.18776069601207265, 0.0625868986706909, 0.521644819383, 0.2608224096915, 0.11276131828948147, 0.33828395486844437, 0.22552263657896293, 0.22552263657896293, 0.11276131828948147, 0.880527966999384, 0.8621315470904019, 0.1494239416215018, 0.0747119708107509, 0.1494239416215018, 0.5976957664860072, 0.5057458371463348, 0.5057458371463348, 0.3411803396564211, 0.6823606793128422, 0.8871050896620888, 0.23056362530799757, 0.23056362530799757, 0.46112725061599513, 0.6868275318859564, 0.14935963217649292, 0.14935963217649292, 0.07467981608824646, 0.44807889652947874, 0.14935963217649292, 0.07467981608824646, 0.4812042285946867, 0.9246695694575614, 0.6775701934806871, 0.5272866146624591, 0.26364330733122954, 0.43786983838118626, 0.21893491919059313, 0.21893491919059313, 0.10267887260412466, 0.41071549041649863, 0.10267887260412466, 0.20535774520824931, 0.10267887260412466, 0.20535774520824931, 0.2200279903722897, 0.33004198555843456, 0.2200279903722897, 0.11001399518614485, 0.11001399518614485, 0.07932612431725553, 0.15865224863451105, 0.15865224863451105, 0.23797837295176655, 0.3173044972690221, 0.07932612431725553, 0.9189811192768197, 0.14203269794258916, 0.14203269794258916, 0.2840653958851783, 0.4260980938277675, 0.13378018082354232, 0.26756036164708463, 0.20067027123531347, 0.13378018082354232, 0.13378018082354232, 0.13378018082354232, 0.8702860161902406, 0.26784580091974564, 0.044640966819957606, 0.13392290045987282, 0.22320483409978803, 0.22320483409978803, 0.08928193363991521, 0.9189811257632743, 0.37702457175933, 0.12567485725311, 0.188512285879665, 0.12567485725311, 0.062837428626555, 0.12567485725311, 0.16564521121028483, 0.33129042242056966, 0.33129042242056966, 0.16564521121028483, 0.33965464194075395, 0.6793092838815079, 0.10403447091961739, 0.7282412964373217, 0.10403447091961739, 0.16323715841916592, 0.8161857920958295, 0.7056677350067907, 0.35283386750339535, 0.671005112145126, 0.12176898442474632, 0.24353796884949264, 0.36530695327423895, 0.24353796884949264, 0.3387673162516492, 0.3387673162516492, 0.1693836581258246, 0.1693836581258246, 0.22633944415432036, 0.22633944415432036, 0.4526788883086407, 0.4485167783921093, 0.4485167783921093, 0.6775701982786351, 0.27239469062773664, 0.5447893812554733, 0.27239469062773664, 0.710798772862747, 0.870286108516852, 0.18963318495454276, 0.3792663699090855, 0.3792663699090855, 0.6900000779083315, 0.3400845381043042, 0.1700422690521521, 0.3400845381043042, 0.5054505896027134, 0.16848352986757115, 0.3369670597351423, 0.3375466306976113, 0.16877331534880566, 0.16877331534880566, 0.16877331534880566, 0.2531599730232085, 0.18958285070485992, 0.37916570140971984, 0.12638856713657326, 0.06319428356828663, 0.06319428356828663, 0.18958285070485992, 0.33965133110633683, 0.6793026622126737, 0.43597114885939414, 0.21798557442969707, 0.21798557442969707, 0.880527966999384, 0.1966499868366092, 0.7865999473464368, 0.34552303501210774, 0.34552303501210774, 0.17276151750605387, 0.17276151750605387, 0.7107987837121365, 0.7107987693501091, 0.676508978116363, 0.3427452985629229, 0.6854905971258458, 0.8908736524405602, 0.8908736093698809, 0.1652457746792655, 0.49573732403779647, 0.1652457746792655, 0.1652457746792655, 0.8702862739022277, 0.34118091482212815, 0.6823618296442563, 0.20576975382036844, 0.6173092614611053, 0.8908736439940395, 0.6868275318859564, 0.33693657226558305, 0.16846828613279152, 0.16846828613279152, 0.16846828613279152, 0.28681798373982653, 0.3824239783197687, 0.09560599457994218, 0.19121198915988435, 0.3672276489519562, 0.3672276489519562, 0.24481843263463748, 0.6952666883100227, 0.7658833342007798, 0.19147083355019495, 0.8702860873673123, 0.6952666883100227, 0.6580593753430801, 0.6900000335600371, 0.6494909740349817, 0.26260498364399604, 0.5252099672879921, 0.17506998909599736, 0.08753499454799868, 0.6868275318859564, 0.6667238637739937, 0.2259481328148933, 0.5648703320372332, 0.2259481328148933, 0.11297406640744664, 0.7107987693501091, 0.3411799155217199, 0.6823598310434398, 0.9189811192768197, 0.5236144074079279, 0.26180720370396393, 0.9358290124220203, 0.2617480335203006, 0.5234960670406013, 0.902483269244082, 0.6868275318859564, 0.1549361227238873, 0.46480836817166193, 0.1549361227238873, 0.1549361227238873, 0.6494910152943318, 0.8908736972509964, 0.6952667356407403, 0.9024830659030793, 0.20017539150632915, 0.24465881184106897, 0.20017539150632915, 0.11120855083684952, 0.06672513050210972, 0.15569197117158934, 0.044483420334739815, 0.2442616296140003, 0.12213081480700015, 0.36639244442100044, 0.2442616296140003, 0.3044372236724344, 0.1522186118362172, 0.3044372236724344, 0.1522186118362172, 0.671005112145126, 0.6775701982786351, 0.5291718996332525, 0.26458594981662625, 0.870286070489084, 0.13411387795929436, 0.4023416338778831, 0.2682277559185887, 0.2682277559185887, 0.6900000779083315, 0.6868275509471827, 0.4013690463913396, 0.6020535695870093, 0.5390190927963558, 0.2695095463981779, 0.710798772862747, 0.3651139341209717, 0.24340928941398116, 0.24340928941398116, 0.6900000408734894, 0.33965006857786706, 0.6793001371557341, 0.6775701934806871, 0.6900000779083315, 0.6868275509471827, 0.6858146555709679, 0.34290732778548394, 0.35819690582516295, 0.35819690582516295, 0.3389849575652198, 0.11299498585507327, 0.11299498585507327, 0.11299498585507327, 0.22598997171014654, 0.11299498585507327, 0.23714879637303385, 0.6323967903280903, 0.15809919758202257, 0.21896442630734128, 0.1459762842048942, 0.0729881421024471, 0.21896442630734128, 0.3649407105122355, 0.2651293247375301, 0.1767528831583534, 0.2651293247375301, 0.1767528831583534, 0.6710051083591348, 0.22268234829793296, 0.22268234829793296, 0.4453646965958659, 0.6868275509471827, 0.8871059572876324, 0.12171556135933764, 0.8520089295153636, 0.9246697059113104, 0.8908736672539912, 0.8908735904565416, 0.48120419887966337, 0.8908735779726771, 0.8805280724362109, 0.7770865004940893, 0.9246696331169834, 0.8702860077400858, 0.26946647635425863, 0.5389329527085173, 0.2179432839163813, 0.6538298517491439, 0.90248319276785, 0.22628545738443034, 0.22628545738443034, 0.4525709147688607, 0.8908735993178458, 0.10271054386022108, 0.5135527193011054, 0.20542108772044215, 0.10271054386022108, 0.7525532472121419, 0.18813831180303547, 0.26582244210342915, 0.26582244210342915, 0.3987336631551437, 0.13291122105171457, 0.9188292826722142, 0.6775701884199149, 0.710798772862747, 0.7274354857112277, 0.5930643958152539, 0.23722575832610157, 0.11861287916305079, 0.07823974885322627, 0.2347192465596788, 0.15647949770645253, 0.5476782419725839, 0.33965442901527815, 0.6793088580305563, 0.6900000779083315, 0.1687381805382876, 0.1687381805382876, 0.1687381805382876, 0.5062145416148628, 0.12058883765758173, 0.4823553506303269, 0.12058883765758173, 0.24117767531516346, 0.8773846006145235, 0.08773846006145236, 0.2218099841722707, 0.2218099841722707, 0.33271497625840607, 0.2218099841722707, 0.3861525688515081, 0.19307628442575406, 0.3861525688515081, 0.8703080164050186, 0.6098431441573038, 0.24393725766292151, 0.12196862883146076, 0.12196862883146076, 0.880527966999384, 0.9246696587863016, 0.23478940836704335, 0.11739470418352167, 0.5869735209176083, 0.8702860302296686, 0.19722266739167765, 0.19722266739167765, 0.3944453347833553, 0.19722266739167765, 0.15757308791670044, 0.3939327197917511, 0.3151461758334009, 0.15757308791670044, 0.6775701934806871, 0.7908673498296698, 0.8805280562306768, 0.6868275318859564, 0.19090504919401904, 0.19090504919401904, 0.3818100983880381, 0.19090504919401904, 0.6775701934806871, 0.3654733694557265, 0.12182445648524216, 0.3045611412131054, 0.06091222824262108, 0.12182445648524216, 0.6775701884199149, 0.6775701934806871, 0.3381163743220749, 0.6762327486441498, 0.6494984201115057, 0.33087106692386753, 0.6617421338477351, 0.8805280919095917, 0.534565815950306, 0.1336414539875765, 0.1336414539875765, 0.1336414539875765, 0.6868275318859564, 0.8702860302296686, 0.6904463870134386, 0.6868275509471827, 0.18163664752603326, 0.18163664752603326, 0.3027277458767221, 0.18163664752603326, 0.12109109835068885, 0.060545549175344424, 0.267878575025615, 0.267878575025615, 0.4018178625384225, 0.1339392875128075, 0.16524072029795478, 0.16524072029795478, 0.6609628811918191, 0.3381185842789002, 0.6762371685578004, 0.6765089992603949, 0.8805280458185432, 0.17919173202425118, 0.17919173202425118, 0.2687875980363768, 0.08959586601212559, 0.35838346404850235, 0.6667393761230569, 0.08850263996952416, 0.35401055987809665, 0.17700527993904833, 0.2655079199085725, 0.08850263996952416, 0.15249611121552706, 0.15249611121552706, 0.15249611121552706, 0.15249611121552706, 0.3049922224310541, 0.15249611121552706, 0.8743189493877126, 0.8182318745720175, 0.7107987837121365, 0.2412565528063364, 0.2412565528063364, 0.4825131056126728, 0.8702861079324131, 0.2653882164189722, 0.2653882164189722, 0.2653882164189722, 0.1326941082094861, 0.9246696975503946, 0.3396525085072951, 0.6793050170145902, 0.7990712012755892, 0.9189810664834654, 0.1904648877448493, 0.1904648877448493, 0.1904648877448493, 0.09523244387242465, 0.1904648877448493, 0.1904648877448493, 0.6952667133378477, 0.6775701884199149, 0.710798772862747, 0.6481704210699136, 0.21605680702330454, 0.11635196886487018, 0.4654078754594807, 0.23270393772974035, 0.11635196886487018, 0.22976018358230244, 0.04595203671646049, 0.3676162937316839, 0.18380814686584196, 0.18380814686584196, 0.12519977424343245, 0.2503995484868649, 0.2503995484868649, 0.3755993227302974, 0.1688960402731812, 0.1688960402731812, 0.1688960402731812, 0.5066881208195436, 0.1987617619314263, 0.26501568257523506, 0.26501568257523506, 0.06625392064380876, 0.1987617619314263], \"Term\": [\"able\", \"academic\", \"accountability\", \"adult\", \"adult\", \"advertising\", \"advice\", \"advice\", \"advice\", \"advice\", \"affordable\", \"age\", \"age\", \"age\", \"age\", \"age\", \"agency\", \"app\", \"app\", \"app\", \"app\", \"appropriate\", \"architecture\", \"area\", \"area\", \"area\", \"area\", \"area\", \"asset\", \"association\", \"assumption\", \"authentic\", \"bar\", \"bar\", \"bartender\", \"bearish\", \"behavioral\", \"benefit\", \"bet\", \"birthday\", \"bond\", \"bonus\", \"boss\", \"boutique\", \"bra\", \"brand\", \"brand\", \"brand\", \"break\", \"broker\", \"broker\", \"bubble\", \"build\", \"business\", \"business\", \"business\", \"business\", \"business\", \"buyer\", \"campus\", \"canadian\", \"cannabis\", \"cannabis\", \"capability\", \"capture\", \"care\", \"care\", \"care\", \"career\", \"career\", \"career\", \"career\", \"career\", \"career\", \"celebration\", \"changed\", \"chat\", \"chat\", \"chat\", \"chat\", \"chief\", \"city\", \"city\", \"city\", \"city\", \"city\", \"civic\", \"clarity\", \"coach\", \"com\", \"com\", \"com\", \"common\", \"community\", \"community\", \"community\", \"community\", \"community\", \"company\", \"company\", \"company\", \"company\", \"company\", \"conduct\", \"connect\", \"connect\", \"connect\", \"connect\", \"connected\", \"connection\", \"connection\", \"connection\", \"connection\", \"connection\", \"connection\", \"consignment\", \"construction\", \"construction\", \"contact\", \"contact\", \"content\", \"content\", \"content\", \"convoy\", \"core\", \"credibility\", \"crew\", \"curious\", \"current\", \"current\", \"current\", \"current\", \"curriculum\", \"data\", \"daysmarketingideaschallenge\", \"design\", \"design\", \"design\", \"design\", \"designer\", \"designer\", \"designer\", \"designer\", \"developmental\", \"devpost\", \"digital\", \"digital\", \"director\", \"disability\", \"disability\", \"dispatcher\", \"diverse\", \"divyankjain\", \"document\", \"document\", \"dress\", \"dynamic\", \"early\", \"early\", \"early\", \"early\", \"education\", \"educator\", \"effort\", \"email\", \"email\", \"employee\", \"employee\", \"empowerment\", \"engineering\", \"entrepreneurial\", \"estate\", \"event\", \"exam\", \"example\", \"example\", \"executive\", \"executive\", \"experience\", \"experience\", \"experience\", \"experience\", \"experience\", \"expert\", \"expert\", \"expert\", \"external\", \"eye\", \"facility\", \"family\", \"family\", \"family\", \"fast\", \"favorite\", \"favorite\", \"fear\", \"field\", \"field\", \"field\", \"field\", \"field\", \"fintech\", \"first\", \"first\", \"first\", \"food\", \"food\", \"food\", \"food\", \"foster\", \"founder\", \"founder\", \"founder\", \"founder\", \"freight\", \"friend\", \"friend\", \"friend\", \"friend\", \"furniture\", \"game\", \"gas\", \"general\", \"general\", \"general\", \"general\", \"goal\", \"goal\", \"good\", \"good\", \"good\", \"good\", \"government\", \"government\", \"grade\", \"grade\", \"grade\", \"grade\", \"grade\", \"grader\", \"grandparent\", \"great\", \"great\", \"great\", \"great\", \"group\", \"group\", \"gsb\", \"gsb\", \"gsber\", \"habit\", \"habit\", \"habit\", \"hardware\", \"help\", \"help\", \"help\", \"help\", \"help\", \"help\", \"hemp\", \"horse\", \"host\", \"hour\", \"hour\", \"housing\", \"housing\", \"housing\", \"idea\", \"idea\", \"idea\", \"idea\", \"idea\", \"idea\", \"individual\", \"individual\", \"individual\", \"individual\", \"individual\", \"industry\", \"industry\", \"industry\", \"industry\", \"industry\", \"industry\", \"information\", \"insurance\", \"insurance\", \"insurance\", \"insurance\", \"interested\", \"interested\", \"interested\", \"interested\", \"interested\", \"interested\", \"interesting\", \"interview\", \"interview\", \"interview\", \"interview\", \"interview\", \"interview\", \"introduce\", \"introduction\", \"introduction\", \"introduction\", \"introduction\", \"introduction\", \"introduction\", \"investor\", \"investor\", \"investor\", \"investor\", \"jean\", \"jean\", \"job\", \"job\", \"job\", \"kid\", \"kid\", \"knowledgeable\", \"knowledgeable\", \"lack\", \"language\", \"language\", \"language\", \"language\", \"leader\", \"leader\", \"leader\", \"leader\", \"level\", \"level\", \"level\", \"life\", \"life\", \"link\", \"long\", \"long\", \"long\", \"look\", \"looking\", \"love\", \"love\", \"love\", \"madewell\", \"male\", \"male\", \"male\", \"man\", \"man\", \"man\", \"management\", \"management\", \"management\", \"management\", \"management\", \"manager\", \"manager\", \"manager\", \"manager\", \"manager\", \"manager\", \"many\", \"many\", \"market\", \"market\", \"market\", \"match\", \"medium\", \"medium\", \"member\", \"member\", \"member\", \"member\", \"misaligned\", \"mission\", \"mobile\", \"money\", \"money\", \"moodle\", \"movie\", \"msx\", \"msx\", \"msx\", \"msx\", \"multiple\", \"music\", \"music\", \"name\", \"name\", \"nanny\", \"ncov\", \"network\", \"network\", \"network\", \"network\", \"new\", \"new\", \"new\", \"new\", \"next\", \"next\", \"next\", \"nibh\", \"non\", \"non\", \"nonprofit\", \"nunc\", \"officer\", \"oil\", \"old\", \"online\", \"online\", \"online\", \"online\", \"openness\", \"operator\", \"opportunity\", \"opportunity\", \"opportunity\", \"opportunity\", \"optimism\", \"option\", \"option\", \"oracle\", \"organization\", \"organization\", \"pain\", \"parent\", \"parent\", \"participant\", \"particular\", \"partner\", \"partner\", \"partner\", \"partner\", \"party\", \"passionate\", \"pattern\", \"peer\", \"people\", \"people\", \"people\", \"people\", \"people\", \"people\", \"people\", \"person\", \"person\", \"person\", \"person\", \"personal\", \"personal\", \"personal\", \"personal\", \"phase\", \"ping\", \"place\", \"place\", \"podcast\", \"point\", \"point\", \"point\", \"point\", \"popular\", \"portal\", \"possible\", \"possible\", \"potential\", \"potential\", \"powerful\", \"practice\", \"practice\", \"practice\", \"practitioner\", \"preparation\", \"preparation\", \"present\", \"price\", \"prior\", \"private\", \"private\", \"problem\", \"problem\", \"process\", \"process\", \"process\", \"process\", \"process\", \"process\", \"product\", \"product\", \"product\", \"professional\", \"professional\", \"professional\", \"professional\", \"professional\", \"program\", \"program\", \"program\", \"program\", \"programmatic\", \"project\", \"project\", \"project\", \"prompt\", \"prototype\", \"provider\", \"provider\", \"psychologist\", \"psychology\", \"pursue\", \"racer\", \"rating\", \"real\", \"reciprocity\", \"record\", \"rental\", \"research\", \"research\", \"resource\", \"resource\", \"response\", \"restaurant\", \"restaurant\", \"restaurant\", \"riverbank\", \"role\", \"role\", \"role\", \"role\", \"scale\", \"scale\", \"school\", \"school\", \"school\", \"school\", \"schooler\", \"season\", \"seeking\", \"self\", \"senior\", \"senior\", \"senior\", \"service\", \"service\", \"service\", \"service\", \"size\", \"size\", \"sizing\", \"skill\", \"skill\", \"skill\", \"skill\", \"small\", \"small\", \"small\", \"small\", \"social\", \"social\", \"software\", \"software\", \"software\", \"software\", \"solution\", \"solution\", \"solution\", \"source\", \"space\", \"space\", \"space\", \"space\", \"special\", \"specialist\", \"stage\", \"stage\", \"stage\", \"standard\", \"start\", \"start\", \"start\", \"start\", \"startup\", \"startup\", \"startup\", \"startup\", \"station\", \"step\", \"stock\", \"store\", \"strategy\", \"strategy\", \"strategy\", \"strategy\", \"structure\", \"student\", \"student\", \"student\", \"student\", \"student\", \"study\", \"sunday\", \"supply\", \"supply\", \"sure\", \"system\", \"system\", \"talent\", \"talk\", \"talk\", \"talk\", \"talk\", \"tank\", \"task\", \"tax\", \"taxis\", \"team\", \"team\", \"team\", \"team\", \"team\", \"team\", \"tech\", \"tech\", \"tech\", \"tech\", \"technology\", \"technology\", \"technology\", \"term\", \"term\", \"test\", \"testing\", \"thank\", \"thank\", \"thank\", \"thank\", \"thank\", \"thumb\", \"time\", \"time\", \"time\", \"time\", \"time\", \"tool\", \"tool\", \"tool\", \"tool\", \"tool\", \"tool\", \"truck\", \"trucking\", \"trust\", \"user\", \"user\", \"user\", \"various\", \"video\", \"video\", \"video\", \"video\", \"visa\", \"voice\", \"voice\", \"volunteer\", \"wall\", \"way\", \"way\", \"way\", \"way\", \"way\", \"way\", \"website\", \"week\", \"weight\", \"well\", \"well\", \"willing\", \"willing\", \"willing\", \"willing\", \"woman\", \"woman\", \"woman\", \"woman\", \"woman\", \"work\", \"work\", \"work\", \"work\", \"world\", \"world\", \"world\", \"world\", \"year\", \"year\", \"year\", \"year\", \"year\"]}, \"R\": 30, \"lambda.step\": 0.01, \"plot.opts\": {\"xlab\": \"PC1\", \"ylab\": \"PC2\"}, \"topic.order\": [5, 4, 2, 6, 1, 3, 7]};\n", | |
"\n", | |
"function LDAvis_load_lib(url, callback){\n", | |
" var s = document.createElement('script');\n", | |
" s.src = url;\n", | |
" s.async = true;\n", | |
" s.onreadystatechange = s.onload = callback;\n", | |
" s.onerror = function(){console.warn(\"failed to load library \" + url);};\n", | |
" document.getElementsByTagName(\"head\")[0].appendChild(s);\n", | |
"}\n", | |
"\n", | |
"if(typeof(LDAvis) !== \"undefined\"){\n", | |
" // already loaded: just create the visualization\n", | |
" !function(LDAvis){\n", | |
" new LDAvis(\"#\" + \"ldavis_el2941400786923529763956816973\", ldavis_el2941400786923529763956816973_data);\n", | |
" }(LDAvis);\n", | |
"}else if(typeof define === \"function\" && define.amd){\n", | |
" // require.js is available: use it to load d3/LDAvis\n", | |
" require.config({paths: {d3: \"https://d3js.org/d3.v5\"}});\n", | |
" require([\"d3\"], function(d3){\n", | |
" window.d3 = d3;\n", | |
" LDAvis_load_lib(\"https://cdn.jsdelivr.net/gh/bmabey/[email protected]/pyLDAvis/js/ldavis.v3.0.0.js\", function(){\n", | |
" new LDAvis(\"#\" + \"ldavis_el2941400786923529763956816973\", ldavis_el2941400786923529763956816973_data);\n", | |
" });\n", | |
" });\n", | |
"}else{\n", | |
" // require.js not available: dynamically load d3 & LDAvis\n", | |
" LDAvis_load_lib(\"https://d3js.org/d3.v5.js\", function(){\n", | |
" LDAvis_load_lib(\"https://cdn.jsdelivr.net/gh/bmabey/[email protected]/pyLDAvis/js/ldavis.v3.0.0.js\", function(){\n", | |
" new LDAvis(\"#\" + \"ldavis_el2941400786923529763956816973\", ldavis_el2941400786923529763956816973_data);\n", | |
" })\n", | |
" });\n", | |
"}\n", | |
"</script>" | |
], | |
"text/plain": [ | |
"PreparedData(topic_coordinates= x y topics cluster Freq\n", | |
"topic \n", | |
"4 -0.020692 -0.024726 1 1 18.468724\n", | |
"3 -0.047020 0.123525 2 1 17.586616\n", | |
"1 -0.099826 0.013288 3 1 15.809766\n", | |
"5 -0.021617 0.052475 4 1 14.158737\n", | |
"0 -0.003618 -0.114090 5 1 12.533247\n", | |
"2 0.006337 -0.093027 6 1 11.882350\n", | |
"6 0.186437 0.042556 7 1 9.560560, topic_info= Term Freq Total Category logprob loglift\n", | |
"20 connection 27.000000 27.000000 Default 30.0000 30.0000\n", | |
"362 care 12.000000 12.000000 Default 29.0000 29.0000\n", | |
"48 social 11.000000 11.000000 Default 28.0000 28.0000\n", | |
"0 advice 27.000000 27.000000 Default 27.0000 27.0000\n", | |
"380 great 13.000000 13.000000 Default 26.0000 26.0000\n", | |
".. ... ... ... ... ... ...\n", | |
"358 student 2.109938 16.417065 Topic7 -4.9195 0.2959\n", | |
"90 people 1.547051 44.960571 Topic7 -5.2298 -1.0219\n", | |
"278 partner 1.438808 6.454273 Topic7 -5.3023 0.8466\n", | |
"66 personal 1.438591 6.569499 Topic7 -5.3025 0.8288\n", | |
"67 process 1.438545 8.849950 Topic7 -5.3025 0.5307\n", | |
"\n", | |
"[439 rows x 6 columns], token_table= Topic Freq Term\n", | |
"term \n", | |
"879 4 0.902483 able\n", | |
"19 6 0.695267 academic\n", | |
"228 4 0.902498 accountability\n", | |
"453 1 0.338117 adult\n", | |
"453 5 0.676234 adult\n", | |
"... ... ... ...\n", | |
"198 1 0.198762 year\n", | |
"198 2 0.265016 year\n", | |
"198 3 0.265016 year\n", | |
"198 4 0.066254 year\n", | |
"198 5 0.198762 year\n", | |
"\n", | |
"[703 rows x 3 columns], R=30, lambda_step=0.01, plot_opts={'xlab': 'PC1', 'ylab': 'PC2'}, topic_order=[5, 4, 2, 6, 1, 3, 7])" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 27 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "bYVPuhuhJv3d", | |
"outputId": "088154dd-5fed-44a4-bf02-95b3c0a65c72" | |
}, | |
"source": [ | |
"!pip install spikex" | |
], | |
"id": "bYVPuhuhJv3d", | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Requirement already satisfied: spikex in /usr/local/lib/python3.7/dist-packages (0.5.1)\n", | |
"Requirement already satisfied: smart-open in /usr/local/lib/python3.7/dist-packages (from spikex) (5.0.0)\n", | |
"Requirement already satisfied: bidict in /usr/local/lib/python3.7/dist-packages (from spikex) (0.21.2)\n", | |
"Requirement already satisfied: regex in /usr/local/lib/python3.7/dist-packages (from spikex) (2019.12.20)\n", | |
"Requirement already satisfied: scipy in /usr/local/lib/python3.7/dist-packages (from spikex) (1.4.1)\n", | |
"Requirement already satisfied: spacy>2.3 in /usr/local/lib/python3.7/dist-packages (from spikex) (3.0.6)\n", | |
"Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from spikex) (1.20.3)\n", | |
"Requirement already satisfied: typer in /usr/local/lib/python3.7/dist-packages (from spikex) (0.3.2)\n", | |
"Requirement already satisfied: wasabi in /usr/local/lib/python3.7/dist-packages (from spikex) (0.8.2)\n", | |
"Requirement already satisfied: cyac in /usr/local/lib/python3.7/dist-packages (from spikex) (1.3)\n", | |
"Requirement already satisfied: jsonschema in /usr/local/lib/python3.7/dist-packages (from spikex) (2.6.0)\n", | |
"Requirement already satisfied: yarl in /usr/local/lib/python3.7/dist-packages (from spikex) (1.6.3)\n", | |
"Requirement already satisfied: memory-profiler in /usr/local/lib/python3.7/dist-packages (from spikex) (0.58.0)\n", | |
"Requirement already satisfied: gensim>=4.0 in /usr/local/lib/python3.7/dist-packages (from spikex) (4.0.1)\n", | |
"Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.7/dist-packages (from spacy>2.3->spikex) (4.41.1)\n", | |
"Requirement already satisfied: pydantic<1.8.0,>=1.7.1 in /usr/local/lib/python3.7/dist-packages (from spacy>2.3->spikex) (1.7.4)\n", | |
"Requirement already satisfied: pathy>=0.3.5 in /usr/local/lib/python3.7/dist-packages (from spacy>2.3->spikex) (0.5.2)\n", | |
"Requirement already satisfied: blis<0.8.0,>=0.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy>2.3->spikex) (0.4.1)\n", | |
"Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.7/dist-packages (from spacy>2.3->spikex) (1.0.5)\n", | |
"Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.7/dist-packages (from spacy>2.3->spikex) (20.9)\n", | |
"Requirement already satisfied: catalogue<2.1.0,>=2.0.3 in /usr/local/lib/python3.7/dist-packages (from spacy>2.3->spikex) (2.0.4)\n", | |
"Requirement already satisfied: setuptools in /usr/local/lib/python3.7/dist-packages (from spacy>2.3->spikex) (56.1.0)\n", | |
"Requirement already satisfied: thinc<8.1.0,>=8.0.3 in /usr/local/lib/python3.7/dist-packages (from spacy>2.3->spikex) (8.0.3)\n", | |
"Requirement already satisfied: srsly<3.0.0,>=2.4.1 in /usr/local/lib/python3.7/dist-packages (from spacy>2.3->spikex) (2.4.1)\n", | |
"Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy>2.3->spikex) (3.0.5)\n", | |
"Requirement already satisfied: typing-extensions<4.0.0.0,>=3.7.4; python_version < \"3.8\" in /usr/local/lib/python3.7/dist-packages (from spacy>2.3->spikex) (3.7.4.3)\n", | |
"Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy>2.3->spikex) (2.0.5)\n", | |
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.7/dist-packages (from spacy>2.3->spikex) (2.11.3)\n", | |
"Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/local/lib/python3.7/dist-packages (from spacy>2.3->spikex) (2.23.0)\n", | |
"Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.4 in /usr/local/lib/python3.7/dist-packages (from spacy>2.3->spikex) (3.0.5)\n", | |
"Requirement already satisfied: click<7.2.0,>=7.1.1 in /usr/local/lib/python3.7/dist-packages (from typer->spikex) (7.1.2)\n", | |
"Requirement already satisfied: cython in /usr/local/lib/python3.7/dist-packages (from cyac->spikex) (0.29.23)\n", | |
"Requirement already satisfied: idna>=2.0 in /usr/local/lib/python3.7/dist-packages (from yarl->spikex) (2.10)\n", | |
"Requirement already satisfied: multidict>=4.0 in /usr/local/lib/python3.7/dist-packages (from yarl->spikex) (5.1.0)\n", | |
"Requirement already satisfied: psutil in /usr/local/lib/python3.7/dist-packages (from memory-profiler->spikex) (5.4.8)\n", | |
"Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=20.0->spacy>2.3->spikex) (2.4.7)\n", | |
"Requirement already satisfied: zipp>=0.5; python_version < \"3.8\" in /usr/local/lib/python3.7/dist-packages (from catalogue<2.1.0,>=2.0.3->spacy>2.3->spikex) (3.4.1)\n", | |
"Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.7/dist-packages (from jinja2->spacy>2.3->spikex) (2.0.1)\n", | |
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy>2.3->spikex) (2020.12.5)\n", | |
"Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy>2.3->spikex) (1.24.3)\n", | |
"Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy>2.3->spikex) (3.0.4)\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "o_qZStt8KMXR", | |
"outputId": "e9ed2cb1-43d7-44dd-ad9e-4602b98f6def" | |
}, | |
"source": [ | |
"!python -m spacy download en_core_web_sm" | |
], | |
"id": "o_qZStt8KMXR", | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Collecting en-core-web-sm==3.0.0\n", | |
"\u001b[?25l Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0-py3-none-any.whl (13.7MB)\n", | |
"\u001b[K |████████████████████████████████| 13.7MB 25.7MB/s \n", | |
"\u001b[?25hRequirement already satisfied: spacy<3.1.0,>=3.0.0 in /usr/local/lib/python3.7/dist-packages (from en-core-web-sm==3.0.0) (3.0.6)\n", | |
"Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.4 in /usr/local/lib/python3.7/dist-packages (from spacy<3.1.0,>=3.0.0->en-core-web-sm==3.0.0) (3.0.5)\n", | |
"Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/local/lib/python3.7/dist-packages (from spacy<3.1.0,>=3.0.0->en-core-web-sm==3.0.0) (2.23.0)\n", | |
"Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy<3.1.0,>=3.0.0->en-core-web-sm==3.0.0) (3.0.5)\n", | |
"Requirement already satisfied: typer<0.4.0,>=0.3.0 in /usr/local/lib/python3.7/dist-packages (from spacy<3.1.0,>=3.0.0->en-core-web-sm==3.0.0) (0.3.2)\n", | |
"Requirement already satisfied: numpy>=1.15.0 in /usr/local/lib/python3.7/dist-packages (from spacy<3.1.0,>=3.0.0->en-core-web-sm==3.0.0) (1.20.3)\n", | |
"Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.7/dist-packages (from spacy<3.1.0,>=3.0.0->en-core-web-sm==3.0.0) (4.41.1)\n", | |
"Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy<3.1.0,>=3.0.0->en-core-web-sm==3.0.0) (2.0.5)\n", | |
"Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.7/dist-packages (from spacy<3.1.0,>=3.0.0->en-core-web-sm==3.0.0) (20.9)\n", | |
"Requirement already satisfied: blis<0.8.0,>=0.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy<3.1.0,>=3.0.0->en-core-web-sm==3.0.0) (0.4.1)\n", | |
"Requirement already satisfied: thinc<8.1.0,>=8.0.3 in /usr/local/lib/python3.7/dist-packages (from spacy<3.1.0,>=3.0.0->en-core-web-sm==3.0.0) (8.0.3)\n", | |
"Requirement already satisfied: pathy>=0.3.5 in /usr/local/lib/python3.7/dist-packages (from spacy<3.1.0,>=3.0.0->en-core-web-sm==3.0.0) (0.5.2)\n", | |
"Requirement already satisfied: wasabi<1.1.0,>=0.8.1 in /usr/local/lib/python3.7/dist-packages (from spacy<3.1.0,>=3.0.0->en-core-web-sm==3.0.0) (0.8.2)\n", | |
"Requirement already satisfied: pydantic<1.8.0,>=1.7.1 in /usr/local/lib/python3.7/dist-packages (from spacy<3.1.0,>=3.0.0->en-core-web-sm==3.0.0) (1.7.4)\n", | |
"Requirement already satisfied: setuptools in /usr/local/lib/python3.7/dist-packages (from spacy<3.1.0,>=3.0.0->en-core-web-sm==3.0.0) (56.1.0)\n", | |
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.7/dist-packages (from spacy<3.1.0,>=3.0.0->en-core-web-sm==3.0.0) (2.11.3)\n", | |
"Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.7/dist-packages (from spacy<3.1.0,>=3.0.0->en-core-web-sm==3.0.0) (1.0.5)\n", | |
"Requirement already satisfied: catalogue<2.1.0,>=2.0.3 in /usr/local/lib/python3.7/dist-packages (from spacy<3.1.0,>=3.0.0->en-core-web-sm==3.0.0) (2.0.4)\n", | |
"Requirement already satisfied: srsly<3.0.0,>=2.4.1 in /usr/local/lib/python3.7/dist-packages (from spacy<3.1.0,>=3.0.0->en-core-web-sm==3.0.0) (2.4.1)\n", | |
"Requirement already satisfied: typing-extensions<4.0.0.0,>=3.7.4; python_version < \"3.8\" in /usr/local/lib/python3.7/dist-packages (from spacy<3.1.0,>=3.0.0->en-core-web-sm==3.0.0) (3.7.4.3)\n", | |
"Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.1.0,>=3.0.0->en-core-web-sm==3.0.0) (3.0.4)\n", | |
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.1.0,>=3.0.0->en-core-web-sm==3.0.0) (2020.12.5)\n", | |
"Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.1.0,>=3.0.0->en-core-web-sm==3.0.0) (1.24.3)\n", | |
"Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.1.0,>=3.0.0->en-core-web-sm==3.0.0) (2.10)\n", | |
"Requirement already satisfied: click<7.2.0,>=7.1.1 in /usr/local/lib/python3.7/dist-packages (from typer<0.4.0,>=0.3.0->spacy<3.1.0,>=3.0.0->en-core-web-sm==3.0.0) (7.1.2)\n", | |
"Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=20.0->spacy<3.1.0,>=3.0.0->en-core-web-sm==3.0.0) (2.4.7)\n", | |
"Collecting smart-open<4.0.0,>=2.2.0\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/11/9a/ba2d5f67f25e8d5bbf2fcec7a99b1e38428e83cb715f64dd179ca43a11bb/smart_open-3.0.0.tar.gz (113kB)\n", | |
"\u001b[K |████████████████████████████████| 122kB 19.8MB/s \n", | |
"\u001b[?25hRequirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.7/dist-packages (from jinja2->spacy<3.1.0,>=3.0.0->en-core-web-sm==3.0.0) (2.0.1)\n", | |
"Requirement already satisfied: zipp>=0.5; python_version < \"3.8\" in /usr/local/lib/python3.7/dist-packages (from catalogue<2.1.0,>=2.0.3->spacy<3.1.0,>=3.0.0->en-core-web-sm==3.0.0) (3.4.1)\n", | |
"Building wheels for collected packages: smart-open\n", | |
" Building wheel for smart-open (setup.py) ... \u001b[?25l\u001b[?25hdone\n", | |
" Created wheel for smart-open: filename=smart_open-3.0.0-cp37-none-any.whl size=107098 sha256=e1fc58bbdb01057b2030855079568bcf8c4d3207d2655cacec98f8a6376178a1\n", | |
" Stored in directory: /root/.cache/pip/wheels/18/88/7c/f06dabd5e9cabe02d2269167bcacbbf9b47d0c0ff7d6ebcb78\n", | |
"Successfully built smart-open\n", | |
"Installing collected packages: en-core-web-sm, smart-open\n", | |
" Found existing installation: en-core-web-sm 2.2.5\n", | |
" Uninstalling en-core-web-sm-2.2.5:\n", | |
" Successfully uninstalled en-core-web-sm-2.2.5\n", | |
" Found existing installation: smart-open 5.0.0\n", | |
" Uninstalling smart-open-5.0.0:\n", | |
" Successfully uninstalled smart-open-5.0.0\n", | |
"Successfully installed en-core-web-sm-3.0.0 smart-open-3.0.0\n", | |
"\u001b[38;5;2m✔ Download and installation successful\u001b[0m\n", | |
"You can now load the package via spacy.load('en_core_web_sm')\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "BN2AbFLhKh-I", | |
"outputId": "fe94238e-0037-4390-8fa3-fb8c39b3f52f" | |
}, | |
"source": [ | |
"!spikex download-wikigraph simplewiki_core" | |
], | |
"id": "BN2AbFLhKh-I", | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"--2021-05-27 14:11:14-- https://errequadrosrl-my.sharepoint.com/:u:/g/personal/paolo_arduin_errequadrosrl_onmicrosoft_com/EbwV-u0YtVdNo4f02X7HbDsBs3BRTEu4ix-_n0JYLKOJzQ?Download=1\n", | |
"Resolving errequadrosrl-my.sharepoint.com (errequadrosrl-my.sharepoint.com)... 40.108.211.41\n", | |
"Connecting to errequadrosrl-my.sharepoint.com (errequadrosrl-my.sharepoint.com)|40.108.211.41|:443... connected.\n", | |
"HTTP request sent, awaiting response... 302 Found\n", | |
"Location: /personal/paolo_arduin_errequadrosrl_onmicrosoft_com/Documents/spikex/wikigraphs/simplewiki_core-20210401.tar.gz?originalPath=aHR0cHM6Ly9lcnJlcXVhZHJvc3JsLW15LnNoYXJlcG9pbnQuY29tLzp1Oi9nL3BlcnNvbmFsL3Bhb2xvX2FyZHVpbl9lcnJlcXVhZHJvc3JsX29ubWljcm9zb2Z0X2NvbS9FYndWLXUwWXRWZE5vNGYwMlg3SGJEc0JzM0JSVEV1NGl4LV9uMEpZTEtPSnpRP3J0aW1lPURhTkpTaGtoMlVn [following]\n", | |
"--2021-05-27 14:11:15-- https://errequadrosrl-my.sharepoint.com/personal/paolo_arduin_errequadrosrl_onmicrosoft_com/Documents/spikex/wikigraphs/simplewiki_core-20210401.tar.gz?originalPath=aHR0cHM6Ly9lcnJlcXVhZHJvc3JsLW15LnNoYXJlcG9pbnQuY29tLzp1Oi9nL3BlcnNvbmFsL3Bhb2xvX2FyZHVpbl9lcnJlcXVhZHJvc3JsX29ubWljcm9zb2Z0X2NvbS9FYndWLXUwWXRWZE5vNGYwMlg3SGJEc0JzM0JSVEV1NGl4LV9uMEpZTEtPSnpRP3J0aW1lPURhTkpTaGtoMlVn\n", | |
"Reusing existing connection to errequadrosrl-my.sharepoint.com:443.\n", | |
"HTTP request sent, awaiting response... 200 OK\n", | |
"Length: 20912154 (20M) [application/x-gzip]\n", | |
"Saving to: ‘simplewiki_core.tar.gz’\n", | |
"\n", | |
"simplewiki_core.tar 100%[===================>] 19.94M 14.1MB/s in 1.4s \n", | |
"\n", | |
"2021-05-27 14:11:18 (14.1 MB/s) - ‘simplewiki_core.tar.gz’ saved [20912154/20912154]\n", | |
"\n", | |
"Processing ./simplewiki_core.tar.gz\n", | |
"Building wheels for collected packages: simplewiki-core\n", | |
" Building wheel for simplewiki-core (setup.py) ... \u001b[?25l\u001b[?25hdone\n", | |
" Created wheel for simplewiki-core: filename=simplewiki_core-20210401-cp37-none-any.whl size=20898038 sha256=3d7e025ecf736766e3dc13d569c2905c5dfbd3f82ee53c2ab5433de1c51d1932\n", | |
" Stored in directory: /tmp/pip-ephem-wheel-cache-s4yyyxh3/wheels/eb/20/87/cc5f49a0f5c929a5cd25994de487d67652116778a793b1e7be\n", | |
"Successfully built simplewiki-core\n", | |
"Installing collected packages: simplewiki-core\n", | |
"Successfully installed simplewiki-core-20210401\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "wzG0BEnJ_IM1", | |
"outputId": "334f4fc6-59be-4b40-8ca4-4fc6b58a2296" | |
}, | |
"source": [ | |
"from spacy import load as spacy_load\n", | |
"from spikex.wikigraph import load as wg_load\n", | |
"from spikex.pipes import WikiPageX\n", | |
"\n", | |
"# load a spacy model and get a doc\n", | |
"nlp = spacy_load('en_core_web_sm')\n", | |
"doc = nlp('Looking for advice on how to do a friends-and-family fundraising round for a startup')\n", | |
"# load a WikiGraph\n", | |
"wg = wg_load('simplewiki_core')\n", | |
"# get a WikiPageX and extract all pages\n", | |
"wikipagex = WikiPageX(wg)\n", | |
"doc = wikipagex(doc)\n", | |
"# see all pages extracted from the doc\n", | |
"for span in doc._.wiki_spans:\n", | |
" print(span._.wiki_pages)" | |
], | |
"id": "wzG0BEnJ_IM1", | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"('Advice_(constitutional)', 'Advice')\n", | |
"('How_to',)\n", | |
"('To',)\n", | |
"('Do', 'Do')\n", | |
"('A', 'A_(Cyrillic)', 'A_(New_York_City_Subway_service)', 'A_(musical_note)', 'A_(New_York_City_Subway_service)', 'A_(disambiguation)')\n", | |
"('Friend', 'Friend_(2001_movie)', 'Friend_(2001_film)', 'Friend', 'Friend_(2001_film)')\n", | |
"('And_(disambiguation)', 'And_(logic)', 'And_(disambiguation)', 'And_(logic)', 'And')\n", | |
"('Family', 'Family_(biology)', 'Family_(taxonomy)', 'Family_(taxonomy)')\n", | |
"('Fundraising',)\n", | |
"('Round_(music)', 'Round')\n", | |
"('A', 'A_(Cyrillic)', 'A_(New_York_City_Subway_service)', 'A_(musical_note)', 'A_(New_York_City_Subway_service)', 'A_(disambiguation)')\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "Fo-bDa72JtSM" | |
}, | |
"source": [ | |
"from spacy import load as spacy_load\n", | |
"from spikex.pipes import ClusterX\n", | |
"\n", | |
"nlp = spacy_load(\"en_core_web_sm\")\n", | |
"doc = nlp(\"Sales pitch coaching\")\n", | |
"clusterx = ClusterX(min_score=0.65)\n", | |
"try:\n", | |
" doc = clusterx(doc)\n", | |
" print(doc)\n", | |
" for cluster in doc._.cluster_chunks:\n", | |
" print(cluster)\n", | |
"except:\n", | |
" cluster = None" | |
], | |
"id": "Fo-bDa72JtSM", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 35 | |
}, | |
"id": "Lt1ZlBJIK11I", | |
"outputId": "8d6591a6-a807-4cb1-be16-5781f4ce0124" | |
}, | |
"source": [ | |
"df['body'][0]" | |
], | |
"id": "Lt1ZlBJIK11I", | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"application/vnd.google.colaboratory.intrinsic+json": { | |
"type": "string" | |
}, | |
"text/plain": [ | |
"'Looking for advice on how to do a friends-and-family fundraising round for a startup.'" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 13 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "HObeFmKNMkGa" | |
}, | |
"source": [ | |
"def generate_clusters(n):\n", | |
" #print(n)\n", | |
" clusters = []\n", | |
" doc = nlp(n)\n", | |
" clusterx = ClusterX(min_score=0.65)\n", | |
" try:\n", | |
" doc = clusterx(doc)\n", | |
" for cluster in doc._.cluster_chunks:\n", | |
" clusters.append(cluster)\n", | |
" except:\n", | |
" clusters = None\n", | |
" print(clusters)\n", | |
" return clusters" | |
], | |
"id": "HObeFmKNMkGa", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "Cvz1wVr4K-RJ" | |
}, | |
"source": [ | |
"# def replace(row):\n", | |
"# for i, item in enumerate(row):\n", | |
"# print(item)\n", | |
"# break\n", | |
"# row[i]=generate_clusters(item)\n", | |
"# return row" | |
], | |
"id": "Cvz1wVr4K-RJ", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 238 | |
}, | |
"id": "Xl6fOK_xLpS0", | |
"outputId": "f6b47bdf-2004-4c80-9d44-01e77a02cc58" | |
}, | |
"source": [ | |
"data=pd.DataFrame(data=df['body'],columns=['body'])\n", | |
"data.head()" | |
], | |
"id": "Xl6fOK_xLpS0", | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>body</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>Looking for advice on how to do a friends-and-family fundraising round for a startup.</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>Sales pitch coaching</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>I would like advice on bettering my career options and opening more roads. I have a Bachelors in Psychology, and am looking to pursue a Masters degree in Applied Psychology. I would like advice on...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>I'm hoping to find a connection to someone who idetifies the same way I do and for them to share their professional experiences on how I too may advance in my professional and academic career.</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>Best coworking spaces near Palo Alto</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" body\n", | |
"0 Looking for advice on how to do a friends-and-family fundraising round for a startup.\n", | |
"1 Sales pitch coaching\n", | |
"2 I would like advice on bettering my career options and opening more roads. I have a Bachelors in Psychology, and am looking to pursue a Masters degree in Applied Psychology. I would like advice on...\n", | |
"3 I'm hoping to find a connection to someone who idetifies the same way I do and for them to share their professional experiences on how I too may advance in my professional and academic career.\n", | |
"4 Best coworking spaces near Palo Alto" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 33 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "na1ZlX11OLOk", | |
"outputId": "956a8db7-eba1-40c3-ba31-4bdd0c2260ad" | |
}, | |
"source": [ | |
"data.size" | |
], | |
"id": "na1ZlX11OLOk", | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"393" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 34 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "dj6iXbPfNc4K" | |
}, | |
"source": [ | |
"data.dropna(subset = [\"body\"], inplace=True)" | |
], | |
"id": "dj6iXbPfNc4K", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "iXZN20l-ONk4", | |
"outputId": "f9d5508d-85e1-4134-9633-75af73bf5b52" | |
}, | |
"source": [ | |
"data.size" | |
], | |
"id": "iXZN20l-ONk4", | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"393" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 36 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "9rsNcwaULXNf", | |
"outputId": "02445906-d023-4da8-e36c-01057488fce9" | |
}, | |
"source": [ | |
"data_sample = [generate_clusters(x) for x in data['body']]\n", | |
"data_sample[0]" | |
], | |
"id": "9rsNcwaULXNf", | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"[[advice], [a startup], [a friends-and-family fundraising round]]\n", | |
"None\n", | |
"[[I], [me], [anything], [Psychology], [a Bachelors], [advice], [Applied Psychology], [my career options], [the way], [more roads], [a Masters degree]]\n", | |
"[[I], [who], [them], [someone], [their professional experiences], [a connection], [my professional and academic career]]\n", | |
"[[Best coworking spaces], [Palo Alto]]\n", | |
"[[Growth equity investors], [a lean operator/ investor private equity model], [cap deals], [-], [mid]]\n", | |
"[[others], [sales], [the areas], [bouncing ideas], [each other's businesses], [online, social media practices], [marketing], [the Social Selling & Social Media practitioner/leader space]]\n", | |
"[[leadership development programs], [the future]]\n", | |
"[[I], [it], [me], [anything], [this screen], [the community], [an ask]]\n", | |
"[[Connections], [mission driven companies/startups], [environment], [potential opportunities], [food], [the areas], [work], ['future]]\n", | |
"[[Examples], [workers, networks, employees], [you], [personal values], [-], [co], [a process], [your personal career path]]\n", | |
"[[ideas], [I]]\n", | |
"[[I], [individuals], [chief operating officer positions]]\n", | |
"[[Women], [North Africa], [the Middle East]]\n", | |
"[[I], [founders], [funding], [finance], [my network], [like-minded powerful leaders], [tech / fintech], [the world], [private equity]]\n", | |
"[[I], [other members], [the next few months], [Sundays], [a Civic Service Design], [15 people], [their own civic-minded service projects], [structure], [my past and current projects], [Oakland], [the Bart station], [a space]]\n", | |
"[[I], [my artwork, my art], [a platform, a place], [Charcoal], [(medium]]\n", | |
"[[Dynamic, versatile, hands-on Senior Program and Process Improvement Manager], [Portfolio Management, Program Management, Change Management], [end], [business], [under-performing programs], [Six Sigma], [integrated end], [best practice techniques], [tools], [my next role], [expertise], [others], [recruiters], [myself], [My skills], [you], [I], [process and systems solutions, complex programs, executive strategic goals, an introduction, management methodologies, executive leadership, multiple concurrent projects, the execution, the high tech sector, a variety, an accomplished leader, the gap], [process and systems solutions, executive strategic goals, complex programs, management methodologies, executive leadership, multiple concurrent projects, the high tech sector, an accomplished leader], [process and systems solutions, complex programs, executive strategic goals, an introduction, experience leading teams, management methodologies, executive leadership, multiple concurrent projects, the execution, the high tech sector, a variety, an accomplished leader, the gap], [technology, advice]]\n", | |
"[[we, I], [Oracle Recruiting], [https://www.eventbrite.com/e/oracle-girl-geek-x-dinner-registration-52422676637?discount=NEWSLETTER], [a talk], [you], [jon opportunities], [Oracle]]\n", | |
"[[We, I], [customers], [documents], [The software], [these documents], [folks], [what], [changes], [a portal], [the taxes], [a software], [the person's prior taxes], [the tax expert, personal information, the Tax preparation field, appropriate tax data]]\n", | |
"[[startups], [companies], [need], [quality video production services]]\n", | |
"[[Introduction], [the restaurant opening], [local food/drink writers], [Menlo Tavern], [January]]\n", | |
"[[interview preparation], [Google]]\n", | |
"[[Tech lead], [Engineering Manager role]]\n", | |
"None\n", | |
"None\n", | |
"[[strategy communication skills], [info graphs], [visuals], [quickly digestible information]]\n", | |
"[[a few people], [experience reviewing startup pitch decks], [us], [feedback], [ours]]\n", | |
"[[Women], [networks], [Accelerate], [other Stanford GSB programming], [who]]\n", | |
"[[I], [who], [Someone], [anyone], [this insight], [Finance], [People Ops], [insights], [Product/Engineering], [a company], [help], [my next dream role], [Product], [leadership budgets], [counts]]\n", | |
"[[I], [who], [me], [Venture Capital], [experienced professionals], [10+ years], [Udacity], [several startups], [roles, people, introductions, companies], [the growth product team, VC firms, the product management space, product experience], [their portfolio, my growth]]\n", | |
"[[Someone], [I], [my Masters degree], [a possible career change, the consulting field, an informational interview]]\n", | |
"[[I], [someone], [me], [professional advice]]\n", | |
"[[Advice], [someone], [corporate America], [a teaching / coaching business, their own teaching / coaching business], [who]]\n", | |
"[[Topics], [ideas], [the next Stanford GSB Accelerate program]]\n", | |
"[[People], [educators], [funders, careers], [they], [educators/principles/administrators], [nonprofits], [volunteers], [their high schools, their true north], [youth hone], [our curriculum], [who], [a curriculum], [align], [GROWmyfuture.org], [a nonprofit]]\n", | |
"[[Someone], [food justice], [health], [wellness], [nutrition], [the native american community], [the bay area]]\n", | |
"[[I], [who], [strong women], [mentors]]\n", | |
"None\n", | |
"[[a recruiter, a recommendation], [anyone], [Silicon Valley], [High Tech], [you], [(IoT]]\n", | |
"None\n", | |
"None\n", | |
"[[I], [me], [founder], [anybody], [Mindvalley Inc], [a opportunity], [Vishen Lakhiani], [that organization]]\n", | |
"[[I], [me], [my current job], [job hunting], [experience], [the things], [General info], [my job], [other fields], [a new job, a job]]\n", | |
"None\n", | |
"[[My husband], [you], [grocery shopping], [I], [suggestions], [the first time], [everything], [Iceland], [SUP], [things], [Paddling], [any tips], [, places], [what]]\n", | |
"[[I], [connections], [job opportunities]]\n", | |
"[[another VP Product], [peer mentoring], [guidance]]\n", | |
"[[a client], [the ancient tool - astrology consultation], [your path], [clarity], [who]]\n", | |
"[[Hey everyone], [feedback], [reply], [the cofounder, the future], [DM me], [Reciprocity], [phone call], [I], [you], [we], [a few user interviews], [your experience, our app, your needs], [an early-stage startup, a 20-minute coffee chat]]\n", | |
"[[a business cofounder], [the helpfulness], [product], [Reciprocity Community], [Someone], [human nature, biz dev], [monetization, engagement, fundraising, marketing]]\n", | |
"None\n", | |
"[[I], [something], [Denver], [work], [a firm], [Palo Alto], [tennis], [weight lifting], [running], [a workout partner], [the activity]]\n", | |
"[[opportunities], [customer success], [product management, business development]]\n", | |
"[[founders], [sustainability], [idea stage], [pre-seed companies], [foodtech], [smart cities, clean energy, circular economy]]\n", | |
"None\n", | |
"[[I], [bathroom], [my camping gear], [reunion]]\n", | |
"[[Friends], [Stockholm]]\n", | |
"[[anyone], [the case], [me], [a venture capital division], [senior executives], [materials], [a Fortune]]\n", | |
"[[recs], [Santorini], [specific hotels], [Mykonos], [general location]]\n", | |
"[[due diligence acumen], [a non-financial background], [processes]]\n", | |
"[[I], [you], [Thanks], [a seed venture], [advice], [shares], [what], [the historical funder], [an explanation], [scheduling and dilution talks]]\n", | |
"[[friends], [cool places], [Thanks], [trusted locals], [travel concierge services], [advice], [my Co-Founder], [Netflix], [Ben Murphy], [Beta], [design], [you], [our Alpha], [I], [someone], [LLM], [it], [me], [a CTO], [We], [every foreign city, a friend, a network, the intersection, the recommendation algorithms, the airport], [the airport, every foreign city, a network, the emerging world, the intersection, the recommendation algorithms], [science, production, scale]]\n", | |
"[[my network], [the Bay Area], [emerging technologies], [particularly digital health], [IoT.]]\n", | |
"None\n", | |
"[[job opportunities], [business development], [tech companies], [VC firms], [strategy], [content], [operations], [investing]]\n", | |
"[[I], [women], [VC], [education, advice], [any relevant connections], [startups], [the past 8 years], [minority, healthcare]]\n", | |
"[[We, I], [The topic], [And the importance], [our guard], [empathy], [compassion], [conversation], [kindness], [life], [the workplace], [small steps], [such a divided world, a keynote, a big step, our world, a proposal, the world]]\n", | |
"[[I], [Industry], [my career goals], [an MBA]]\n", | |
"[[A connection], [the Beauty/Cosmeceuticals Industry]]\n", | |
"[[I], [you], [someone], [any intros], [Deloitte], [the Center], [Consumer Industry Insights division]]\n", | |
"[[I], [Anyone], [my startup], [no idea], [buyers], [some advice], [resources]]\n", | |
"[[mentors], [the skills], [them], [their professional lives], [that profession], [they], [our school], [who], [what], [an experience], [our students, our educators]]\n", | |
"[[you, I], [other resource], [social entrepreneurship], [a recommended book]]\n", | |
"[[Our team], [We], [community], [conversation], [a next-gen platform], [a cofounding CTO], [referrals]]\n", | |
"[[friends], [founder or business partner], [co], [-], [the future], [who]]\n", | |
"[[I], [General advice], [grades], [the system]]\n", | |
"[[an accountability buddy], [self care practices], [we]]\n", | |
"[[I], [our students], [Humanities teachers], [order], [IEPS], [grade levels], [the classroom], [the best way]]\n", | |
"[[the Hamilton Habits], [our lesson plans]]\n", | |
"[[I], [General advice], [grades], [the system]]\n", | |
"[[I], [help], [Skedula]]\n", | |
"[[We], [they], [who], [them], [class], [the classroom], [advice], [the best way], [all their letters], [a 6th grade student], [reading, writing]]\n", | |
"[[1st year teacher advice], [great instruction]]\n", | |
"[[Best practices], [discussion], [encouraging and tracking student engagement]]\n", | |
"[[I], [someone], [part], [the clarity], [the students], [feedback], [a lesson], [my body language, my spoken language]]\n", | |
"[[, age appropriate books], [decoding skills]]\n", | |
"[[assistance], [racial equity], [more content], [a Science lesson]]\n", | |
"[[I], [my socratic seminar], [my ratios], [the school], [stats], [unit], [proportions], [prison pipeline]]\n", | |
"[[I], [you], [-], [teacher], [my teacher], [that please) voice], [a 2nd year], [ideas], [co], [all four 6th Humanities], [, ENL]]\n", | |
"[[I], [students], [who], [them], [someone], [any insight], [order], [students' lives], [this school], [content], [ideas], [any common interests]]\n", | |
"[[team members], [change], [problems], [empowerment], [self-efficacy], [optimism], [a strong sense]]\n", | |
"[[What], [I], [they], [we], [a grasp], [students], [my students], [RACER paragraphs], [8th grade], [RACER]]\n", | |
"[[I], [anyone], [who], [me], [science teacher], [advice], [Dan], [encouragement], [permits], [Riverbank], [the courts], [athletic coach], [a Tennis], [school program, school club]]\n", | |
"[[What], [I], [very good intentions], [an easy way], [Dojo Points], [student behavior, daily classroom data, teaching content]]\n", | |
"[[math standards], [curriculum], [the content area], [I], [It]]\n", | |
"[[I], [ideas], [resources time management]]\n", | |
"[[I], [my multiple daily tasks], [support], [my tasks], [my time management], [better use]]\n", | |
"[[I], [math], [advice], [different forms]]\n", | |
"[[I], [What], [them], [6th graders, 8th graders], [the past], [advice], [the best way], [engagement]]\n", | |
"[[I], [who], [difficulty], [students], [word problems], [a useful tool]]\n", | |
"[[6th grade, 8th grade students], [experience], [them], [I]]\n", | |
"[[What], [Collaborative Problem Solving], [best practices]]\n", | |
"[[I], [ways], [1-2 grade], [a 6th grade setting], [low level readers]]\n", | |
"[[we], [a 3rd party vendor, a math class], [google classroom]]\n", | |
"[[Advice], [VC funding]]\n", | |
"None\n", | |
"[[We], [Our team], [anyone], [adults], [the Bay Area], [affordable housing communities, any local affordable housing communities, the affordable housing space, cognitive disabilities]]\n", | |
"[[Interviews], [cooking enthusiasts], [home chefs]]\n", | |
"[[We], [us], [who], [them], [full-time freelancers]]\n", | |
"[[We], [who], [contractors, individuals], [full-time employees], [the company, a company]]\n", | |
"[[We, I], [you], [your wing-woman], [ways], [a bar or coffee shop], [meaningful romantic connections], [a partner]]\n", | |
"[[We], [topics], [her/him], [referral], [person], [insurance products, interview participants], [the age], [the Bay Area], [video call], [an interview], [a $20 amazon gift card], [the US], [up to 30 minutes]]\n", | |
"[[Hey everyone], [feedback], [reply], [the cofounder, the future, the app], [DM me], [Reciprocity], [phone call], [I], [you], [we], [a few user interviews], [your experience, your needs], [an early-stage startup, a 20-minute coffee chat]]\n", | |
"[[I], [people], [politics]]\n", | |
"[[master students], [the past year], [PubMed], [who]]\n", | |
"[[We], [students], [PhD], [master], [the biomedical field]]\n", | |
"[[I], [who], [CO2 emission], [the extreme user]]\n", | |
"[[we], [who], [a regulator], [challenges], [GHG reduction activities, Paris agreement]]\n", | |
"[[I], [who], [connections], [Chief People Officers, Chief Human Resource Officers], [the above two teams], [persons]]\n", | |
"[[people], [experiences], [new cities/environments], [scratch], [a community], [no one], [they], [who]]\n", | |
"[[We], [anyone], [who], [people], [Convoy], [Uber Freight], [experience], [digital brokers], [small-scale trucking companies], [trucking industry], [this field], [a start-up]]\n", | |
"[[We], [anyone], [who], [people], [Convoy], [Uber Freight], [experience], [digital brokers], [small-scale trucking companies], [trucking industry], [this field], [a start-up]]\n", | |
"[[We], [anyone], [who], [people], [Convoy], [Uber Freight], [digital brokers], [experience], [<50 trucks], [pain points], [small-scale truck operators, small-scale truck companies], [this field]]\n", | |
"[[We], [Any volunteers], [GSBers], [people], [-], [introduction], [a new language], [non]]\n", | |
"[[We], [Any volunteers], [GSBers], [people], [-], [introduction], [a new language], [non]]\n", | |
"[[We], [Any volunteers], [GSBers], [people], [-], [introduction], [a new language], [non]]\n", | |
"[[What], [good tv shows]]\n", | |
"None\n", | |
"[[Introduction], [publisher], [Our team], [a major game developer], [parent-child relationships], [gaming effects], [they]]\n", | |
"[[We], [you], [who], [us], [actuaries], [computer scientist], [industry experts], [mind], [a team, a friend, an experienced marketer], [the insurance space, the pain points]]\n", | |
"[[Introductions], [current college students], [contact info]]\n", | |
"[[Introductions], [current college students], [contact info]]\n", | |
"[[We], [Any volunteers], [GSBers], [people], [-], [introduction], [a new language], [non]]\n", | |
"[[We], [you], [who], [anyone], [us], [touch], [this age group], [high schoolers, their social media usage]]\n", | |
"[[Connections], [current professionals], [construction management], [ (Name, email]]\n", | |
"[[Connections], [current professionals], [construction management], [(Name, email]]\n", | |
"[[they, We], [you], [who], [them], [Thanks], [anyone], [parents, individuals], [70 years], [whom], [grandparents], [care], [the elderly care space], [this description], [pain points], [age]]\n", | |
"[[a prototype, computer science knowledge, the prototype phase], [this knowledge], [our team], [the necessary skills], [designers], [Any recommendations], [team members], [we]]\n", | |
"[[We], [managers], [dispatchers], [subscale trucking companies, trucking companies, truck drivers, pain points]]\n", | |
"[[We], [men], [your friends], [all ages], [demographics], [the GSB bubble], [person], [a diverse group], [family], [us, you]]\n", | |
"[[We], [who], [men], [their skincare preferences], [the GSB bubble], [habits], [a diverse group], [30-minutes], [person], [friends/family], [us, you]]\n", | |
"None\n", | |
"[[Connections], [current professionals], [construction management], [ (Name, email]]\n", | |
"[[Connections], [current professionals], [construction management], [ (Name, email]]\n", | |
"[[they, We], [people], [their infrastructure], [vision], [any connections], [companies], [the pain points], [what]]\n", | |
"[[We], [who], [Thanks], [people], [intros], [cities], [the small business departments]]\n", | |
"[[someone], [a restaurant], [1 year], [the US], [who]]\n", | |
"[[We], [who], [anyone], [ML products], [industry], [an understanding, a lack], [the machine learning space]]\n", | |
"[[Keen], [O&G], [Oil & Gas industry], [Environmental/Sustainability specialists], [current efforts], [sustainability practices], [climate change]]\n", | |
"[[We], [you], [who], [Thanks], [anyone], [touch], [individuals], [this description], [elderly care facilities, professional caregivers]]\n", | |
"[[Intros], [people], [the past 1-3 years], [who], [a significant change], [their exercise], [routine]]\n", | |
"[[Intros], [people], [they], [who], [their exercise]]\n", | |
"[[Introduction], [land], [ranchers], [who], [landowners, farmers]]\n", | |
"[[Intros], [experts], [fitness instructors, fitness influencers, personal trainers, medical professionals], [ex. sports medicine], [the fitness field]]\n", | |
"[[We], [who], [someone], [Expedia, Yelp], [a ratings platform], [experience], [an aggregation site], [Kayak, Travelocity, TripAdvisor]]\n", | |
"[[Introductions], [farmland], [REITs / funds]]\n", | |
"[[Recommendations], [ways], [farmers], [small businesses], [capital risk and maturity mismatch]]\n", | |
"[[connection], [fashion designers], [women's apparel]]\n", | |
"[[We], [who], [coaches, administrators], [part], [individuals], [travel agents, club leaders], [their job function], [trips]]\n", | |
"[[We], [they], [women], [the various pressures]]\n", | |
"[[boutique designers, a) manufacturing partners, manufacturing partners, product manufacturing partners], [bonus points], [inexpensive samples], [dresses], [\n", | |
"b], [bras], [apparel], [who]]\n", | |
"[[I], [Thanks], [intros], [restaurant owners]]\n", | |
"[[We], [a male], [their morning/nighttime routines]]\n", | |
"[[connections], [male undergrads], [ready routines], [their]]\n", | |
"[[We], [their skincare], [routine], [a male], [face]]\n", | |
"[[connections/introductions], [CFO/CEOs], [Venture Capital]]\n", | |
"[[connections/introductions], [Strategy / Corp Development professionals], [either Carta], [iLevel]]\n", | |
"[[Intro], [someone], [who], [a seed]]\n", | |
"[[names], [any softwares], [portfolio company data, investment firms]]\n", | |
"[[We], [who], [websites], [i.e. psychologists], [loneliness], [some apps], [connections, friends], [people, experts], [social patterns, new cities, knowledgeable academics, other solutions]]\n", | |
"[[We], [who], [websites], [i.e. psychologists], [loneliness], [some apps], [connections, friends], [people, experts], [social patterns, new cities, knowledgeable academics, other solutions]]\n", | |
"[[your], [the provided service], [any care provider], [they], [great-)grandparents]]\n", | |
"[[your], [the provided service], [any care provider], [they], [great-)grandparents]]\n", | |
"[[your], [the provided service], [any care provider], [they], [great-)grandparents]]\n", | |
"[[your], [the provided service], [any care provider], [they], [great-)grandparents]]\n", | |
"[[your], [the provided service], [any care provider], [they], [great-)grandparents]]\n", | |
"[[your], [the provided service], [any care provider], [they], [great-)grandparents]]\n", | |
"[[your], [the provided service], [any care provider], [they], [great-)grandparents]]\n", | |
"[[your], [the provided service], [any care provider], [they], [great-)grandparents]]\n", | |
"[[your], [the provided service], [any care provider], [they], [great-)grandparents]]\n", | |
"[[your], [the provided service], [any care provider], [they], [great-)grandparents]]\n", | |
"None\n", | |
"[[Connections], [the dating / friend-making / social club scene], [people, companies]]\n", | |
"[[Connections], [I.e. psychologists], [field experts], [experts], [loneliness and social patterns]]\n", | |
"[[Connections], [websites], [people], [friends], [some apps], [who], [other solutions, new cities]]\n", | |
"[[We], [connections], [managers], [restaurant owners]]\n", | |
"[[anyone], [someone], [program managers], [the film industry], [film festivals], [who], [owners], [cinemas/screening facilities, Directors/products/writers]]\n", | |
"[[anyone], [We], [any diehard movie], [15-20 min], [passionate binge-watchers and/or film festival goers], [tv], [fanatics]]\n", | |
"[[Nutritionally-conscious or diet-restricted Instacart shopper], [groceries], [GSB], [us]]\n", | |
"[[Connections], [early stage insurance companies]]\n", | |
"[[Connections], [early stage insurance companies]]\n", | |
"[[Connections], [early stage insurance companies]]\n", | |
"[[Connections], [early stage insurance companies]]\n", | |
"[[Connections], [early stage insurance companies]]\n", | |
"[[Nutritionally-conscious or diet-restricted shopper], [Whole Foods], [groceries], [us]]\n", | |
"[[Professional women], [cannabis], [they], [themselves], [who]]\n", | |
"[[people], [any such people], [experience], [peak hours], [their experience], [the phone], [either a bartender], [who], [bar/nightclub manager, a bar/club], [they, We]]\n", | |
"[[people], [various solutions], [consistent, healthy sleep]]\n", | |
"[[Doctors], [therapists], [people], [habits], [other experts], [experience], [their sleep hygiene], [sleep]]\n", | |
"[[Connections], [military personnel, frequent international business travelers], [ER doctors], [people], [night-shift workers], [odd sleep schedules]]\n", | |
"[[you], ['s], [someone], [vintage shops], [i.e. The RealReal], [second-hand clothing/accessories], [Poshmark, Goodwill]]\n", | |
"[[Someone], [Vivino]]\n", | |
"[[Introductions], [individuals], [impact], [an interest], [who], [money], [their money]]\n", | |
"[[you, We, I], [ways], [a fly], [the wall], [a first date], [meaningful romantic connections]]\n", | |
"[[Introductions], [GSBers], [sleep], [non], [-], [who], [the past]]\n", | |
"[[We], [you], [parents], [their kids], [intros], [school admins], [young children], [a new language]]\n", | |
"[[We], [individuals], [key swing election states], [Iowa, Ohio, Pennsylvania, Florida, Michigan]]\n", | |
"[[you], [who], [-], [social media influencers/micro], [influencers]]\n", | |
"[[We], [who], [people], [engagement parties], [baby showers], [anniversaries], [(adult/child birthday parties], [personal events]]\n", | |
"[[We], [who], [woman], [their careers, their challenges], [a break], [pivots, returners], [role, change industry, location]]\n", | |
"None\n", | |
"[[We], [who], [birthday parties], [venue owners], [vendors], [anniversaries], [event celebrations], [personal celebrations]]\n", | |
"[[We], [introductions], [brand managers, beauty and wellness brands, marketing team members]]\n", | |
"[[Intros], [male managers], [who], [gender], [the workplace]]\n", | |
"[[any young females], [sexual harassment prevention], [ages], [who], [steps, regards]]\n", | |
"[[We], [who], [adults], [autism], [family members], [down's syndrome], [long term care and housing options], [intellectual or developmental disabilities]]\n", | |
"[[They, I], [you], [them], [the chance], [any contacts], [the bill], [any age], [video games], [\"hardcore\" gamers - casual players], [women, introductions]]\n", | |
"[[Introductions], [individuals], [foundations], [family offices], [high net-worth individuals], [who], [work]]\n", | |
"[[We], [you], [someone], [these companies], [openness], [large furniture stores], [a connection], [their supply chain model], [West Elm], [the manager level], [consignment models]]\n", | |
"[[We], [they], [strangers], [who], [these platforms], [people], [experience], [the social needs], [online communities], [dating apps], [well common pitfalls]]\n", | |
"[[An introduction], [someone], [Airbnb rentals], [similar short-term rentals]]\n", | |
"[[Team], [We], [Thanks], [the nightlife experience, any connection], [bartenders], [bar managers, bar owners]]\n", | |
"[[We], [people], [them], [experience], [roles], [two software engineers], [-], [iOS experience], [our team], [part time], [any leads], [current students], [introductions], [founder], [a co], [the right candidate], [work], [a backend engineer, a mobile developer], [streaming video / video distribution, their full time role]]\n", | |
"[[an introduction], [the restaurant service provider space], [anyone], [experience], [(inventory, POS], [who], [book keeping, staffing, marketing]]\n", | |
"[[anyone], [someone], [the retail space], [returns], [who]]\n", | |
"[[introductions], [independent musicians], [concert promoters, talent buyers, artist managers, venue owners], [booking concerts], [the process]]\n", | |
"[[you], [who], [anyone], [HATES], [the time], [their job]]\n", | |
"[[musicians], [Stanford alumni], [the music industry]]\n", | |
"[[introductions], [people], [music startups], [experience]]\n", | |
"[[Our team], [We], [their own cannabis], [you], [Thanks], [someone], [us], [who], [the help], [cannabis], [advance], [these categories], [it], [a DM], [women]]\n", | |
"[[people], [CPG manufacturers], [emerging markets], [experience digitizing distribution operations], [who]]\n", | |
"[[I], [widows]]\n", | |
"[[I], [a senior care facility]]\n", | |
"[[an elderly person], [herself], [who], [himself]]\n", | |
"[[men], [female direct reports], [who]]\n", | |
"[[a 5 minute short video], [tools], [recommendations], [it], [what kinds], [I]]\n", | |
"[[women], [terrible male boss, either really great male boss, great female boss]]\n", | |
"[[We], [you], [who], [anyone], [training videos], [people], [film, television], [foreign languages], [audiovisual content, video game], [them, it]]\n", | |
"[[intros], [high schoolers], [their friendships], [they], [who]]\n", | |
"[[the market, a specific feature, a software, the competition], [all construction documents], [our product], [our proposed solution], [submittals], [ourselves], [we]]\n", | |
"[[What], [we], [you], [their favorite pair], [the size], [the ways], [some creative ways], [data], [clothing sizes], [measurement data], [a uniquely interesting and accurate experience], [it], [anything], [clothing size], [brand], [people, jeans]]\n", | |
"[[we], [benchmarks], [Madewell], [brands], [consistent sizing], [brand], [order], [Some brands], [jeans], [popular brands], [the size], [a variety], [women, data], [price points, clothing size], [example, mind], [your favorite jeans, their favorite pair], [Who, what], [American Eagle, J. Crew]]\n", | |
"[[anyone], [any connections], [we], [horse people], [Thanks]]\n", | |
"[[I], [Thanks], [senior people], [more social connections]]\n", | |
"[[Transcription apps], [voice memos], [our interview recordings]]\n", | |
"[[people], [sustainability initiatives], [sustainable practices, early adopters], [some extreme users], [environmental/climate change initiatives], [We], [who]]\n", | |
"[[A relative, preferably parent], [an individual], [housing], [autism], [their experience, their child], [who]]\n", | |
"[[We], [who], [life], [a senior person]]\n", | |
"[[We, it], [you], [Thanks], [women], [safety], [a 5 min interview max]]\n", | |
"[[you], [we], [experience], [the US], [knowledge], [sexual assaults], [a quick chat]]\n", | |
"[[I], [who], [clients], [a life insurance broker or financial advisor, the purchasing process, life insurance], [us, them]]\n", | |
"[[I], [help], [product ideas]]\n", | |
"[[anyone], [something], [community-based program], [e.g app], [they], [fitness, language learning habits], [eg diet], [Thanks team], [folks], [who], [an external tool, a habit]]\n", | |
"None\n", | |
"[[someone], [experience], [foreign language pedagogy], [they], [online resources/software/apps]]\n", | |
"[[experts], [cannabis], [the cannabis space], [commercial, regulatory/legal, and/or scientific expertise]]\n", | |
"[[women], [eating behaviors], [body image], [the GSB community]]\n", | |
"[[People], [technology sales roles], [who]]\n", | |
"None\n", | |
"[[I], [My role], [my experience], [35%], [experience], [data scientists], [features], [almost 20 years], [the architecture, that capacity, some advice, the functionality, the implementation], [stakeholders], [architecture], [30% POC/demo, a full-time Director role, a Senior consultant, Chatbot-AI/ML initiatives, a Principal AI/ML Consultant, a healthcare company], [a healthcare company, a Senior consultant, a full-time Director role, Chatbot-AI/ML initiatives, a Principal AI/ML Consultant], [role, design], [engineers, roles]]\n", | |
"[[a technical co], [-], [founder], [everyone]]\n", | |
"[[I], [large plants]]\n", | |
"[[We], [the name], [contact], [names, contacts], [clothing/footwear brands, medium apparel brands, $10M+ revenue companies]]\n", | |
"[[Someone], [a food delivery company], [who]]\n", | |
"[[managers], [5 or more trucks], [long-haul trucking companies]]\n", | |
"[[We], [you], [who], [ages], [women], [any connections], [sizes], [horses], [all shapes]]\n", | |
"[[I], [HR tech], [interesting Software Partners]]\n", | |
"[[the moderation process, a connection], [someone], [big platforms], [comments]]\n", | |
"[[help], [SEO], [creative marketing tactics]]\n", | |
"[[you, I], [What], [their members], [a part], [a look], [some great examples], [amazing benefits, professional associations]]\n", | |
"[[We], [who], [the past], [campaigns], [some people]]\n", | |
"[[A founder], [series B], [who]]\n", | |
"[[people], [technology], [a General Counsel], [a local government agency], [who]]\n", | |
"[[intros], [people], [the music industry]]\n", | |
"[[Media connections], [Oregon], [Portland]]\n", | |
"[[it], [Twitter], [a personal brand], [social media], [Linkedin]]\n", | |
"[[What], [you], [the underrepresented/POC business community, the general business community]]\n", | |
"[[What], [it], [you], [the next 'right' path], [your decision making process], [your career], [the next stage]]\n", | |
"[[we], [women], [C Suites], [a better representation], [the Tech industry]]\n", | |
"[[you], [your kids], [kids], [the same time], [the startup]]\n", | |
"[[the latest McKinsey research, the same rate], [their career], [management], [their careers], [what advice], [you], [men], [women]]\n", | |
"[[you], [companies], [executives], [leadership], [the board], [women], [quotas], [either targets]]\n", | |
"[[What], [they], [women], [some key questions], [private companies], [a corporate board]]\n", | |
"[[you], [a woman’s voice], [innovation], [strategy], [a team, the bottom line]]\n", | |
"[[What qualities], [a manager], [traits], [success], [you], [company]]\n", | |
"[[a woman], [a high bar], [the clock], [deal rooms], [confront, behavior], [you], [slide], [peers], [What]]\n", | |
"[[you], [mentor women], [women, men], [the workplace], [recent surveys], [40%, 60%]]\n", | |
"[[what circumstances], [you], [your race], [it], [gender], [your way, your identity, your job]]\n", | |
"[[a minority female], [the trust], [my authentic self], [the three things], [credibility], [my capabilities], [support], [me], [misaligned assumptions], [people], [What], [I], [executives, assumptions, others]]\n", | |
"[[my leadership skills, my professional career]]\n", | |
"[[What], [I], [your recommendations], [many years], [people], [a job], [sponsors], [me], [work]]\n", | |
"[[a next step, the next step, a point, a wall], [you], [your career, your goals]]\n", | |
"[[Any advice], [the charge], [color], [moments], [challenging times], [females]]\n", | |
"[[What], [you], [that support], [the three best corporate culture practices], [women]]\n", | |
"[[you], [your favor], [power], [the politics], [bad manager situations], [dynamics], [senior and executive positions]]\n", | |
"[[you], [your success], [their thoughts], [male leaders], [the preconceived notions]]\n", | |
"[[you], [they], [who], [someone], [industries], [what]]\n", | |
"[[What], [you], [an example], [either conscious or unconscious bias], [it]]\n", | |
"[[a mentee/mentor relationship], [a good mentor], [what qualities]]\n", | |
"[[you], [this feeling], [imposter syndrome]]\n", | |
"[[I], [me], [my role, my career], [a mentor], [mid], [the next step], [senior level], [more leadership skills]]\n", | |
"[[someone], [Anyone], [an active community, a high volume], [I], [The lower the friction], [the \"Personal CRM\" stuff], [who], [the best tools/products/templates], [suggestions, relationships]]\n", | |
"[[People], [mortgages], [land leases], [who]]\n", | |
"[[We], [us], [who], [someone], [user research], [kenya], [prototypes], [the ground]]\n", | |
"[[advice], [career advancement]]\n", | |
"[[I], [friends]]\n", | |
"None\n", | |
"None\n", | |
"[[It, I], [who], [their study habits], [my situation], [what], [a response], [time management], [college students], [a week], [exam], [test], [exam season], [these areas], [strategies], [help, advice]]\n", | |
"[[Regulatory environment], [a short term lender], [the costs], [small loans, south american countries]]\n", | |
"[[analytics], [ads], [D2C consumer brand]]\n", | |
"[[experience], [connections], [remote workers]]\n", | |
"[[I], [who], [me], [someone], [NLP], [potential opportunities], [a close-knit, small-group environment]]\n", | |
"[[best design advertising], [landing pages], [advertising spend], [efficacy], [outreach], [Facebook], [digital channels], [Instagram], [Google]]\n", | |
"[[Product Design, User Experience Design / Research Leaders]]\n", | |
"[[We], [people, tools, teams], [any industry], [computer vision companies, deep learning model deployments]]\n", | |
"[[a community], [a Design lead fund], [ups], [designers]]\n", | |
"[[someone], [experience], [community building], [University alumni relationships]]\n", | |
"[[I], [it], [who], [someone], [his dog], [my socializing hours], [conversations], [online networking], [the world], [inspired and energized leaders], [creative individuals, impactful visions], [the park, a form]]\n", | |
"[[Someone], [communities], [slack groups], [who]]\n", | |
"[[I], [it], [my portfolio], [some feedback], [advice], [the first iteration], [response], [coffee/phone call], [level], [the User Experience domain, any text reply advice]]\n", | |
"[[I], [who], [events, companies], [field marketing professionals, community managers, employee engagement managers, event marketing managers], [over 150 employees]]\n", | |
"[[Best places], [south bay]]\n", | |
"[[They, I], [anyone], [• Americans], [The nCoV], [• China manufacturing], [What tools], [retail investors], [the US economy], [them], [fear], [companies], [a herd], [a long supply chain], [the next few months], [bearish bets], [options calls], [China]]\n", | |
"[[digital marketing agencies], [their team], [a hard time], [ROI], [who], [clients, marketers]]\n", | |
"[[My mother], [his Canadian citizenship], [my husband], [anyone], [a Canadian immigration lawyer], [I], [this process], [law], [advice]]\n", | |
"[[Senior manager mentor], [executive coach]]\n", | |
"[[I], [our group], [real estate], [other assets], [tech employees], [the silicon valley investors club, the community manager], [stocks, bonds]]\n", | |
"[[I], [our group], [real estate], [other assets], [tech employees], [the silicon valley investors club, the community manager], [stocks, bonds]]\n", | |
"[[I], [another founder], [ideas]]\n", | |
"[[My eyes], [kids], [we], [I], [kids / families, kids/families], [glasses], [eye pains], [online work], [use], [Just a thought], [the upswing], [technology], [technology / online learning], [a talk-back], [a space, a possible source, those blue light, the sheer amount]]\n", | |
"[[I], [students], [expectations], [an online classroom], [online behavior]]\n", | |
"[[person], [likeminded person], [healing industry], [www.facebook.com/globalhealingmovement2020], [more insight], [a Global Healing Movement], [My website], [I], [stigma, scale], [me, you]]\n", | |
"None\n", | |
"[[we, I], [a new friend], [this platform]]\n", | |
"[[I], [who], [The ideal person], [advice], [experience], [my first job], [the field], [a UX Bootcamp], [UX], [connections, opportunities]]\n", | |
"[[I], [who], [More involvement], [people], [2+ years], [an hour], [software experience], [an environmental hackathon], [\"office hours], [Saturday], [mentor]]\n", | |
"[[👋🏻], [one team member], [founder], ['s], [Divyank], [currently a research intern], [a computer science undergrad], [entrepreneurship enthusiast], [also creator], [this project], [me], [you], [my work], [marketing], [I], [ADDabbas, India, LinkedIn]]\n", | |
"[[you, I], [me], [Cal Earth Hacks], [front end developer], [Backend developer], [UI/UX designer]]\n", | |
"[[people], [engaged online communities]]\n", | |
"[[advice], [you], [the MSx Community], [All ideas]]\n", | |
"None\n", | |
"[[me], [people], [friends], [software por favor], [I], [who]]\n", | |
"[[introductions], [the hassle], [Engineering], [CTOs/Head], [manual QA], [We], [who], [mobile first companies, mobile applications, early adopters], [an AI based test automation platform, exceptional user experience]]\n", | |
"[[test post], [new ask]]\n", | |
"[[the street], [your eyes], [years], [I], [what], [y']]\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"[[I], [twelve shoes]]\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"[[I], [the best courses], [energy, entrepreneurship]]\n", | |
"[[myself], [my MSx year], [new opportunities]]\n", | |
"[[I], [it], [care], [two child], [advice], [a nanny, the campus]]\n", | |
"[[My wife], [the area], [we], [organizations], [personal experiences], [Any basic starting points], [I], [adoption], [the Bay Area]]\n", | |
"[[I], [you], [Which classes], [my interest], [AUM], [organizations], [impact investing], [GSB], [an impact investor, a family office], [$5B]]\n", | |
"[[POC], [Prototype]]\n", | |
"[[I], [an online university], [WordPress], [design thinking], [user data], [Moodle], [some assle-free synchronization], [advice, entrepreneurship]]\n", | |
"[[I], [who], [myself], [it], [me], [something], [some great talents, a positive experience, a consultant, an online community], [people], [what], [so many ppl], [market ones], [speaker], [online program], [thing], [this type], [video], [self]]\n", | |
"None\n", | |
"[[advice], [everything], [anyone], [either experience], [folks], [no idea], [apps], [a movie ratings aggregator], [the plan], [Thumb Thump], [Tinder], [the entire World], [a \"thumbs], [It, I], [who], [an app, a prototype], [you, ya]]\n", | |
"[[I], [me], [my understanding], [Biopharma sector], [early stage Biopharma companies]]\n", | |
"[[I], [anyone], [January], [my kids], [experience], [daycare program], [advice], [the Bay Area], [the enrollment process]]\n", | |
"[[MSx], [Program], [the upcoming MSx program, core courses, any soft skills, the core courses], [I], [we], [preparation], [Any other advices], [this lead-time], [any advices], [6 months], [Jan-2021]]\n", | |
"[[people], [experience], [ecosystem services], [payment], [impact investing], [carbon credit trading], [pes], [green finance]]\n", | |
"[[I], [inspirations], [parenting], [a podcast/video platform], [content design], [sources, tips, ideas], [podcast production, early child development]]\n", | |
"[[part, advice], [my visa status], [me], [COVID19], [Any guidance], [I], [the MSx], [the process], [a G4 visa, the US, an F1, a specialist, this community], [the right direction, the current situation]]\n", | |
"[[Someone], [work], [online communities], [who]]\n", | |
"[[Transition], [investment], [China or Asia region], [MSx program, healthcare technology]]\n", | |
"[[my MSx], [my key objectives], [the documentary series project, the pitch process], [you], [Community], [networks/platforms], [January], [the program], [My team], [producer and social impact filmmaker], [I], [-], [advice], [an Emmy-nominated co], [anyone]]\n", | |
"[[you, I], [anyone], [anything], [experience], [campus], [search funds], [investment management], [operator], [money], [-mingled funds], [programmatic deals], [the capital, a founder, an investor]]\n", | |
"[[Community managers], [online communities], [executives], [whose organizations]]\n", | |
"[[Brainstorming partner], [my creative mental health startup]]\n", | |
"[[Best practices], [COVID social distancing, physical making]]\n", | |
"[[government relations, US or European businesses, market opportunities, senior business and government officials], [Sub Saharan African countries], [US Navy SEAL officer, Kennedy School, Public Administration, Harvard University, Government, Africa], [operations], [nearly all high growth], [strong network], [contracted consulting work], [consulting services], [a beachhead market], [2 years, 10 years], [Master], [value-chain optimization], [experience, analysis], [their strategy], [MSx class], [Background], [I], [government relations, US or European businesses, senior business and government officials, market opportunities, growth opportunity]]\n", | |
"[[a new potential match feature, a special ask], [anyone], [testing], [it], [me], [I]]\n", | |
"[[a new feature, a special offer], [potential matches], [testing]]\n", | |
"[[nec mi eleifend], [tempor], [nunc.nnn], [velit blandit eget, lacus vitae, Praesent pretium risus nibh]]\n", | |
"[[nec mi eleifend], [tempor], [nunc.nnn], [velit blandit eget, lacus vitae, Praesent pretium risus nibh]]\n", | |
"[[nec mi eleifend], [tempor], [nunc.nnn], [velit blandit eget, lacus vitae, Praesent pretium risus nibh]]\n", | |
"[[Launchpad], [the D School], [people], [it], [who]]\n", | |
"[[people], [hackathons], [front], [a coding esports tournament series], [us], [coding education], [the CodeCombat AI League], [middle and high-school], [who], [tech companies, relevant folks, underserved students]]\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"[[advice], [a startup], [a friends-and-family fundraising round]]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 81 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "EA44OT4waiKu" | |
}, | |
"source": [ | |
"def flatten_list(_2d_list):\n", | |
" flat_list = []\n", | |
" # Iterate through the outer list\n", | |
" for element in _2d_list:\n", | |
" if type(element) is list:\n", | |
" # If the element is of type list, iterate through the sublist\n", | |
" for item in element:\n", | |
" flat_list.append(item)\n", | |
" else:\n", | |
" flat_list.append(element)\n", | |
" return flat_list" | |
], | |
"id": "EA44OT4waiKu", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "3zqcTxUsbXzx" | |
}, | |
"source": [ | |
"import itertools" | |
], | |
"id": "3zqcTxUsbXzx", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "X0SsxnA8PWpe" | |
}, | |
"source": [ | |
"data['categories'] = pd.Series(data_sample)" | |
], | |
"id": "X0SsxnA8PWpe", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 546 | |
}, | |
"id": "hiTwh1lpaD3I", | |
"outputId": "2939bb77-a693-42aa-c4d7-a48009ca4171" | |
}, | |
"source": [ | |
"data.head(10)" | |
], | |
"id": "hiTwh1lpaD3I", | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>body</th>\n", | |
" <th>categories</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>Looking for advice on how to do a friends-and-family fundraising round for a startup.</td>\n", | |
" <td>[[(advice)], [(a, startup)], [(a, friends, -, and, -, family, fundraising, round)]]</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>Sales pitch coaching</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>I would like advice on bettering my career options and opening more roads. I have a Bachelors in Psychology, and am looking to pursue a Masters degree in Applied Psychology. I would like advice on...</td>\n", | |
" <td>[[(I)], [(me)], [(anything)], [(Psychology)], [(a, Bachelors)], [(advice)], [(Applied, Psychology)], [(my, career, options)], [(the, way)], [(more, roads)], [(a, Masters, degree)]]</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>I'm hoping to find a connection to someone who idetifies the same way I do and for them to share their professional experiences on how I too may advance in my professional and academic career.</td>\n", | |
" <td>[[(I)], [(who)], [(them)], [(someone)], [(their, professional, experiences)], [(a, connection)], [(my, professional, and, academic, career)]]</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>Best coworking spaces near Palo Alto</td>\n", | |
" <td>[[(Best, coworking, spaces)], [(Palo, Alto)]]</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>Growth equity investors interested small to mid-cap deals through a lean operator/ investor private equity model.</td>\n", | |
" <td>[[(Growth, equity, investors)], [(a, lean, operator/, investor, private, equity, model)], [(cap, deals)], [(-)], [(mid)]]</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>to others in the Social Selling & Social Media practitioner/leader space. Interested in sharing and bouncing ideas off each other's businesses, especially in the areas of marketing & sales relatin...</td>\n", | |
" <td>[[(others)], [(sales)], [(the, areas)], [(bouncing, ideas)], [(each, other, 's, businesses)], [(online, ,, social, media, practices)], [(marketing)], [(the, Social, Selling, &, Social, Media, prac...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>to talk to about leadership development programs of the future.</td>\n", | |
" <td>[[(leadership, development, programs)], [(the, future)]]</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>I'm not ready to ask for anything yet but wanted to check out the community and it won't let me through this screen without making an ask.</td>\n", | |
" <td>[[(I)], [(it)], [(me)], [(anything)], [(this, screen)], [(the, community)], [(an, ask)]]</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>Connections and potential opportunities with mission driven companies/startups especially in the areas of food; environment, and 'future of work'</td>\n", | |
" <td>[[(Connections)], [(mission, driven, companies, /, startups)], [(environment)], [(potential, opportunities)], [(food)], [(the, areas)], [(work)], [(', future)]]</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" body categories\n", | |
"0 Looking for advice on how to do a friends-and-family fundraising round for a startup. [[(advice)], [(a, startup)], [(a, friends, -, and, -, family, fundraising, round)]]\n", | |
"1 Sales pitch coaching None\n", | |
"2 I would like advice on bettering my career options and opening more roads. I have a Bachelors in Psychology, and am looking to pursue a Masters degree in Applied Psychology. I would like advice on... [[(I)], [(me)], [(anything)], [(Psychology)], [(a, Bachelors)], [(advice)], [(Applied, Psychology)], [(my, career, options)], [(the, way)], [(more, roads)], [(a, Masters, degree)]]\n", | |
"3 I'm hoping to find a connection to someone who idetifies the same way I do and for them to share their professional experiences on how I too may advance in my professional and academic career. [[(I)], [(who)], [(them)], [(someone)], [(their, professional, experiences)], [(a, connection)], [(my, professional, and, academic, career)]]\n", | |
"4 Best coworking spaces near Palo Alto [[(Best, coworking, spaces)], [(Palo, Alto)]]\n", | |
"5 Growth equity investors interested small to mid-cap deals through a lean operator/ investor private equity model. [[(Growth, equity, investors)], [(a, lean, operator/, investor, private, equity, model)], [(cap, deals)], [(-)], [(mid)]]\n", | |
"6 to others in the Social Selling & Social Media practitioner/leader space. Interested in sharing and bouncing ideas off each other's businesses, especially in the areas of marketing & sales relatin... [[(others)], [(sales)], [(the, areas)], [(bouncing, ideas)], [(each, other, 's, businesses)], [(online, ,, social, media, practices)], [(marketing)], [(the, Social, Selling, &, Social, Media, prac...\n", | |
"7 to talk to about leadership development programs of the future. [[(leadership, development, programs)], [(the, future)]]\n", | |
"8 I'm not ready to ask for anything yet but wanted to check out the community and it won't let me through this screen without making an ask. [[(I)], [(it)], [(me)], [(anything)], [(this, screen)], [(the, community)], [(an, ask)]]\n", | |
"9 Connections and potential opportunities with mission driven companies/startups especially in the areas of food; environment, and 'future of work' [[(Connections)], [(mission, driven, companies, /, startups)], [(environment)], [(potential, opportunities)], [(food)], [(the, areas)], [(work)], [(', future)]]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 88 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "t7R8kGeRaFsL" | |
}, | |
"source": [ | |
"" | |
], | |
"id": "t7R8kGeRaFsL", | |
"execution_count": null, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment