tts · March 9, 2020 14:57
diff --git a/rtnetworkwithtweets.ipynb b/rtnetworkwithtweets.ipynb
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# From a CSV file with tweet data, build a directed RT network with edge labels\n",
    "\n",
    "Abridged and adapted from https://github.com/christopherkullenberg/digitalametoder.science/blob/master/cgi-bin/rtnetwork.py"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "import re\n",
    "import pandas as pd\n",
    "import networkx as nx"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Read CSV (originating from a TAGS spreadsheet on GDrive) and see what you got"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {},
   "outputs": [],
   "source": [
    "fileitem = \"C:\\\\Users\\\\Me\\\\Desktop\\\\aalto.csv\"\n",
    "df = pd.read_csv(fileitem)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'723'"
      ]
     },
     "execution_count": 103,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "str(len(df))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id_str</th>\n",
       "      <th>from_user</th>\n",
       "      <th>text</th>\n",
       "      <th>created_at</th>\n",
       "      <th>time</th>\n",
       "      <th>geo_coordinates</th>\n",
       "      <th>user_lang</th>\n",
       "      <th>in_reply_to_user_id_str</th>\n",
       "      <th>in_reply_to_screen_name</th>\n",
       "      <th>from_user_id_str</th>\n",
       "      <th>in_reply_to_status_id_str</th>\n",
       "      <th>source</th>\n",
       "      <th>profile_image_url</th>\n",
       "      <th>user_followers_count</th>\n",
       "      <th>user_friends_count</th>\n",
       "      <th>user_location</th>\n",
       "      <th>status_url</th>\n",
       "      <th>entities_str</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1236999252204797953</td>\n",
       "      <td>kkeenanstA</td>\n",
       "      <td>RT @udemsar: Due to coronavirus, I was a remot...</td>\n",
       "      <td>Mon Mar 09 12:56:18 +0000 2020</td>\n",
       "      <td>09/03/2020 12:56:18</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2483455146</td>\n",
       "      <td>NaN</td>\n",
       "      <td>&lt;a href=\"https://mobile.twitter.com\" rel=\"nofo...</td>\n",
       "      <td>http://pbs.twimg.com/profile_images/9105249790...</td>\n",
       "      <td>247</td>\n",
       "      <td>395.0</td>\n",
       "      <td>St Andrews, UK</td>\n",
       "      <td>http://twitter.com/kkeenanstA/statuses/1236999...</td>\n",
       "      <td>{\"hashtags\":[],\"symbols\":[],\"user_mentions\":[{...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1236998720090181633</td>\n",
       "      <td>AaltoResearch</td>\n",
       "      <td>RT @rsarvas: Happy International Women's day(a...</td>\n",
       "      <td>Mon Mar 09 12:54:11 +0000 2020</td>\n",
       "      <td>09/03/2020 12:54:11</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1094939193275543552</td>\n",
       "      <td>NaN</td>\n",
       "      <td>&lt;a href=\"https://mobile.twitter.com\" rel=\"nofo...</td>\n",
       "      <td>http://pbs.twimg.com/profile_images/1106452690...</td>\n",
       "      <td>808</td>\n",
       "      <td>395.0</td>\n",
       "      <td>Finland</td>\n",
       "      <td>http://twitter.com/AaltoResearch/statuses/1236...</td>\n",
       "      <td>{\"hashtags\":[],\"symbols\":[],\"user_mentions\":[{...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1236998358922866688</td>\n",
       "      <td>udemsar</td>\n",
       "      <td>Due to coronavirus, I was a remote oponent in ...</td>\n",
       "      <td>Mon Mar 09 12:52:45 +0000 2020</td>\n",
       "      <td>09/03/2020 12:52:45</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>761744581</td>\n",
       "      <td>NaN</td>\n",
       "      <td>&lt;a href=\"https://mobile.twitter.com\" rel=\"nofo...</td>\n",
       "      <td>http://pbs.twimg.com/profile_images/1231656555...</td>\n",
       "      <td>823</td>\n",
       "      <td>454.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>http://twitter.com/udemsar/statuses/1236998358...</td>\n",
       "      <td>{\"hashtags\":[{\"text\":\"EnvironmentallyFriendlyD...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1236989781936087042</td>\n",
       "      <td>jd162a</td>\n",
       "      <td>RT @chem_25: For the first time in @AaltoUnive...</td>\n",
       "      <td>Mon Mar 09 12:18:40 +0000 2020</td>\n",
       "      <td>09/03/2020 12:18:40</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1324822470</td>\n",
       "      <td>NaN</td>\n",
       "      <td>&lt;a href=\"https://mobile.twitter.com\" rel=\"nofo...</td>\n",
       "      <td>http://pbs.twimg.com/profile_images/3471590226...</td>\n",
       "      <td>615</td>\n",
       "      <td>494.0</td>\n",
       "      <td>University of Glasgow</td>\n",
       "      <td>http://twitter.com/jd162a/statuses/12369897819...</td>\n",
       "      <td>{\"hashtags\":[],\"symbols\":[],\"user_mentions\":[{...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1236988295147663361</td>\n",
       "      <td>helsinkiunipeda</td>\n",
       "      <td>RT @VirtanenH: Hieno uutinen!  Tutkimuksemmeki...</td>\n",
       "      <td>Mon Mar 09 12:12:46 +0000 2020</td>\n",
       "      <td>09/03/2020 12:12:46</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>733232238092034048</td>\n",
       "      <td>NaN</td>\n",
       "      <td>&lt;a href=\"https://mobile.twitter.com\" rel=\"nofo...</td>\n",
       "      <td>http://pbs.twimg.com/profile_images/1006139754...</td>\n",
       "      <td>462</td>\n",
       "      <td>45.0</td>\n",
       "      <td>Helsingin yliopisto</td>\n",
       "      <td>http://twitter.com/helsinkiunipeda/statuses/12...</td>\n",
       "      <td>{\"hashtags\":[],\"symbols\":[],\"user_mentions\":[{...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                id_str        from_user  \\\n",
       "0  1236999252204797953       kkeenanstA   \n",
       "1  1236998720090181633    AaltoResearch   \n",
       "2  1236998358922866688          udemsar   \n",
       "3  1236989781936087042           jd162a   \n",
       "4  1236988295147663361  helsinkiunipeda   \n",
       "\n",
       "                                                text  \\\n",
       "0  RT @udemsar: Due to coronavirus, I was a remot...   \n",
       "1  RT @rsarvas: Happy International Women's day(a...   \n",
       "2  Due to coronavirus, I was a remote oponent in ...   \n",
       "3  RT @chem_25: For the first time in @AaltoUnive...   \n",
       "4  RT @VirtanenH: Hieno uutinen!  Tutkimuksemmeki...   \n",
       "\n",
       "                       created_at                 time geo_coordinates  \\\n",
       "0  Mon Mar 09 12:56:18 +0000 2020  09/03/2020 12:56:18             NaN   \n",
       "1  Mon Mar 09 12:54:11 +0000 2020  09/03/2020 12:54:11             NaN   \n",
       "2  Mon Mar 09 12:52:45 +0000 2020  09/03/2020 12:52:45             NaN   \n",
       "3  Mon Mar 09 12:18:40 +0000 2020  09/03/2020 12:18:40             NaN   \n",
       "4  Mon Mar 09 12:12:46 +0000 2020  09/03/2020 12:12:46             NaN   \n",
       "\n",
       "   user_lang  in_reply_to_user_id_str in_reply_to_screen_name  \\\n",
       "0        NaN                      NaN                     NaN   \n",
       "1        NaN                      NaN                     NaN   \n",
       "2        NaN                      NaN                     NaN   \n",
       "3        NaN                      NaN                     NaN   \n",
       "4        NaN                      NaN                     NaN   \n",
       "\n",
       "      from_user_id_str  in_reply_to_status_id_str  \\\n",
       "0           2483455146                        NaN   \n",
       "1  1094939193275543552                        NaN   \n",
       "2            761744581                        NaN   \n",
       "3           1324822470                        NaN   \n",
       "4   733232238092034048                        NaN   \n",
       "\n",
       "                                              source  \\\n",
       "0  <a href=\"https://mobile.twitter.com\" rel=\"nofo...   \n",
       "1  <a href=\"https://mobile.twitter.com\" rel=\"nofo...   \n",
       "2  <a href=\"https://mobile.twitter.com\" rel=\"nofo...   \n",
       "3  <a href=\"https://mobile.twitter.com\" rel=\"nofo...   \n",
       "4  <a href=\"https://mobile.twitter.com\" rel=\"nofo...   \n",
       "\n",
       "                                   profile_image_url  user_followers_count  \\\n",
       "0  http://pbs.twimg.com/profile_images/9105249790...                   247   \n",
       "1  http://pbs.twimg.com/profile_images/1106452690...                   808   \n",
       "2  http://pbs.twimg.com/profile_images/1231656555...                   823   \n",
       "3  http://pbs.twimg.com/profile_images/3471590226...                   615   \n",
       "4  http://pbs.twimg.com/profile_images/1006139754...                   462   \n",
       "\n",
       "   user_friends_count          user_location  \\\n",
       "0               395.0         St Andrews, UK   \n",
       "1               395.0                Finland   \n",
       "2               454.0                    NaN   \n",
       "3               494.0  University of Glasgow   \n",
       "4                45.0    Helsingin yliopisto   \n",
       "\n",
       "                                          status_url  \\\n",
       "0  http://twitter.com/kkeenanstA/statuses/1236999...   \n",
       "1  http://twitter.com/AaltoResearch/statuses/1236...   \n",
       "2  http://twitter.com/udemsar/statuses/1236998358...   \n",
       "3  http://twitter.com/jd162a/statuses/12369897819...   \n",
       "4  http://twitter.com/helsinkiunipeda/statuses/12...   \n",
       "\n",
       "                                        entities_str  \n",
       "0  {\"hashtags\":[],\"symbols\":[],\"user_mentions\":[{...  \n",
       "1  {\"hashtags\":[],\"symbols\":[],\"user_mentions\":[{...  \n",
       "2  {\"hashtags\":[{\"text\":\"EnvironmentallyFriendlyD...  \n",
       "3  {\"hashtags\":[],\"symbols\":[],\"user_mentions\":[{...  \n",
       "4  {\"hashtags\":[],\"symbols\":[],\"user_mentions\":[{...  "
      ]
     },
     "execution_count": 104,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id_str</th>\n",
       "      <th>from_user</th>\n",
       "      <th>text</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1236999252204797953</td>\n",
       "      <td>kkeenanstA</td>\n",
       "      <td>RT @udemsar: Due to coronavirus, I was a remot...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1236998720090181633</td>\n",
       "      <td>AaltoResearch</td>\n",
       "      <td>RT @rsarvas: Happy International Women's day(a...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1236998358922866688</td>\n",
       "      <td>udemsar</td>\n",
       "      <td>Due to coronavirus, I was a remote oponent in ...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                id_str      from_user  \\\n",
       "0  1236999252204797953     kkeenanstA   \n",
       "1  1236998720090181633  AaltoResearch   \n",
       "2  1236998358922866688        udemsar   \n",
       "\n",
       "                                                text  \n",
       "0  RT @udemsar: Due to coronavirus, I was a remot...  \n",
       "1  RT @rsarvas: Happy International Women's day(a...  \n",
       "2  Due to coronavirus, I was a remote oponent in ...  "
      ]
     },
     "execution_count": 96,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.iloc[0:3,0:3]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## About RE used\n",
    "\n",
    "\n",
    "Documentation from https://docs.python.org/3/library/re.html\n",
    "\n",
    ">(?...) is an extension notation. a '?' following a '(' is not meaningful otherwise. \n",
    "The first character after the '?' determines what the meaning and further syntax of the construct is. \n",
    "Extensions usually do not create a new group.\n",
    " \n",
    ">(?<=...) is a positive lookbehind assertion. \n",
    "(?<=abc)def will find a match in 'abcdef', since the lookbehind will back up 3 characters \n",
    "and check if the contained pattern matches.\n",
    " \n",
    ">(?=\\:) is a lookahead assertion.\n",
    "For example, Isaac (?=Asimov) will match 'Isaac ' only if it’s followed by 'Asimov'.\n",
    "\n",
    "So in the following cell, in **re.findall()**, that string is matched which appears between **RT @** and a semicolon, ie. the Twitter account name. The greedyness of .* is suppressed by a **?** (I guess)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## The graph"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {},
   "outputs": [],
   "source": [
    "G = nx.DiGraph()\n",
    "\n",
    "for tweet in df.iterrows():\n",
    "    if tweet[1][2].startswith(\"RT\"):\n",
    "        match = re.findall(\"(?<=RT\\s\\@).*?(?=\\:)\", tweet[1][2], re.IGNORECASE)\n",
    "        if match:\n",
    "            G.add_edge(tweet[1][1], match[0], tweet = tweet[1][2])  # add tweet as an edge attribute      "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# nx.draw_networkx(G, pos=None, with_labels=True) "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Write to file"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {},
   "outputs": [],
   "source": [
    "nx.write_gexf(G, \"C:\\\\Users\\\\Me\\\\Desktop\\\\aalto.gexf\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Gephi\n",
    "\n",
    "After import to Gephi, you cannot always see a column named **tweet** in the Data Laboratory. Instead, there might be two Label columns: the first one is empty, and the second one has your tweet text. \n",
    "\n",
    "When you hover over the column header, you see that the first one has id **label** but the id of the other one is 0.\n",
    "\n",
    "Your task is to copy all values from the second one (id=0) to the first (id=label). When you click the **Copy data to other column** button, you'll see the **tweet** column in the list - if not earlier. Choose it. Target is the **Label** column. When finished, delete the tweet column.\n",
    "\n",
    "Now, after export to a graph file, with the GEXF Viewer, when you click a node you can see the tweet text\n",
    "proper in the side panel, like this https://users.aalto.fi/~sonkkila/unitwnet/aalto_tw/index.html"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# From a CSV file with tweet data, build a directed RT network with edge labels\n",
	"\n",
	"Abridged and adapted from https://github.com/christopherkullenberg/digitalametoder.science/blob/master/cgi-bin/rtnetwork.py"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 101,
	"metadata": {},
	"outputs": [],
	"source": [
	"import sys\n",
	"import re\n",
	"import pandas as pd\n",
	"import networkx as nx"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Read CSV (originating from a TAGS spreadsheet on GDrive) and see what you got"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 102,
	"metadata": {},
	"outputs": [],
	"source": [
	"fileitem = \"C:\\\\Users\\\\Me\\\\Desktop\\\\aalto.csv\"\n",
	"df = pd.read_csv(fileitem)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 103,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"'723'"
	]
	},
	"execution_count": 103,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"str(len(df))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 104,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>id_str</th>\n",
	" <th>from_user</th>\n",
	" <th>text</th>\n",
	" <th>created_at</th>\n",
	" <th>time</th>\n",
	" <th>geo_coordinates</th>\n",
	" <th>user_lang</th>\n",
	" <th>in_reply_to_user_id_str</th>\n",
	" <th>in_reply_to_screen_name</th>\n",
	" <th>from_user_id_str</th>\n",
	" <th>in_reply_to_status_id_str</th>\n",
	" <th>source</th>\n",
	" <th>profile_image_url</th>\n",
	" <th>user_followers_count</th>\n",
	" <th>user_friends_count</th>\n",
	" <th>user_location</th>\n",
	" <th>status_url</th>\n",
	" <th>entities_str</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>1236999252204797953</td>\n",
	" <td>kkeenanstA</td>\n",
	" <td>RT @udemsar: Due to coronavirus, I was a remot...</td>\n",
	" <td>Mon Mar 09 12:56:18 +0000 2020</td>\n",
	" <td>09/03/2020 12:56:18</td>\n",
	" <td>NaN</td>\n",
	" <td>NaN</td>\n",
	" <td>NaN</td>\n",
	" <td>NaN</td>\n",
	" <td>2483455146</td>\n",
	" <td>NaN</td>\n",
	" <td><a href=\"https://mobile.twitter.com\" rel=\"nofo...</td>\n",
	" <td>http://pbs.twimg.com/profile_images/9105249790...</td>\n",
	" <td>247</td>\n",
	" <td>395.0</td>\n",
	" <td>St Andrews, UK</td>\n",
	" <td>http://twitter.com/kkeenanstA/statuses/1236999...</td>\n",
	" <td>{\"hashtags\":[],\"symbols\":[],\"user_mentions\":[{...</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>1236998720090181633</td>\n",
	" <td>AaltoResearch</td>\n",
	" <td>RT @rsarvas: Happy International Women's day(a...</td>\n",
	" <td>Mon Mar 09 12:54:11 +0000 2020</td>\n",
	" <td>09/03/2020 12:54:11</td>\n",
	" <td>NaN</td>\n",
	" <td>NaN</td>\n",
	" <td>NaN</td>\n",
	" <td>NaN</td>\n",
	" <td>1094939193275543552</td>\n",
	" <td>NaN</td>\n",
	" <td><a href=\"https://mobile.twitter.com\" rel=\"nofo...</td>\n",
	" <td>http://pbs.twimg.com/profile_images/1106452690...</td>\n",
	" <td>808</td>\n",
	" <td>395.0</td>\n",
	" <td>Finland</td>\n",
	" <td>http://twitter.com/AaltoResearch/statuses/1236...</td>\n",
	" <td>{\"hashtags\":[],\"symbols\":[],\"user_mentions\":[{...</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>1236998358922866688</td>\n",
	" <td>udemsar</td>\n",
	" <td>Due to coronavirus, I was a remote oponent in ...</td>\n",
	" <td>Mon Mar 09 12:52:45 +0000 2020</td>\n",
	" <td>09/03/2020 12:52:45</td>\n",
	" <td>NaN</td>\n",
	" <td>NaN</td>\n",
	" <td>NaN</td>\n",
	" <td>NaN</td>\n",
	" <td>761744581</td>\n",
	" <td>NaN</td>\n",
	" <td><a href=\"https://mobile.twitter.com\" rel=\"nofo...</td>\n",
	" <td>http://pbs.twimg.com/profile_images/1231656555...</td>\n",
	" <td>823</td>\n",
	" <td>454.0</td>\n",
	" <td>NaN</td>\n",
	" <td>http://twitter.com/udemsar/statuses/1236998358...</td>\n",
	" <td>{\"hashtags\":[{\"text\":\"EnvironmentallyFriendlyD...</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>1236989781936087042</td>\n",
	" <td>jd162a</td>\n",
	" <td>RT @chem_25: For the first time in @AaltoUnive...</td>\n",
	" <td>Mon Mar 09 12:18:40 +0000 2020</td>\n",
	" <td>09/03/2020 12:18:40</td>\n",
	" <td>NaN</td>\n",
	" <td>NaN</td>\n",
	" <td>NaN</td>\n",
	" <td>NaN</td>\n",
	" <td>1324822470</td>\n",
	" <td>NaN</td>\n",
	" <td><a href=\"https://mobile.twitter.com\" rel=\"nofo...</td>\n",
	" <td>http://pbs.twimg.com/profile_images/3471590226...</td>\n",
	" <td>615</td>\n",
	" <td>494.0</td>\n",
	" <td>University of Glasgow</td>\n",
	" <td>http://twitter.com/jd162a/statuses/12369897819...</td>\n",
	" <td>{\"hashtags\":[],\"symbols\":[],\"user_mentions\":[{...</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>1236988295147663361</td>\n",
	" <td>helsinkiunipeda</td>\n",
	" <td>RT @VirtanenH: Hieno uutinen! Tutkimuksemmeki...</td>\n",
	" <td>Mon Mar 09 12:12:46 +0000 2020</td>\n",
	" <td>09/03/2020 12:12:46</td>\n",
	" <td>NaN</td>\n",
	" <td>NaN</td>\n",
	" <td>NaN</td>\n",
	" <td>NaN</td>\n",
	" <td>733232238092034048</td>\n",
	" <td>NaN</td>\n",
	" <td><a href=\"https://mobile.twitter.com\" rel=\"nofo...</td>\n",
	" <td>http://pbs.twimg.com/profile_images/1006139754...</td>\n",
	" <td>462</td>\n",
	" <td>45.0</td>\n",
	" <td>Helsingin yliopisto</td>\n",
	" <td>http://twitter.com/helsinkiunipeda/statuses/12...</td>\n",
	" <td>{\"hashtags\":[],\"symbols\":[],\"user_mentions\":[{...</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" id_str from_user \\\n",
	"0 1236999252204797953 kkeenanstA \n",
	"1 1236998720090181633 AaltoResearch \n",
	"2 1236998358922866688 udemsar \n",
	"3 1236989781936087042 jd162a \n",
	"4 1236988295147663361 helsinkiunipeda \n",
	"\n",
	" text \\\n",
	"0 RT @udemsar: Due to coronavirus, I was a remot... \n",
	"1 RT @rsarvas: Happy International Women's day(a... \n",
	"2 Due to coronavirus, I was a remote oponent in ... \n",
	"3 RT @chem_25: For the first time in @AaltoUnive... \n",
	"4 RT @VirtanenH: Hieno uutinen! Tutkimuksemmeki... \n",
	"\n",
	" created_at time geo_coordinates \\\n",
	"0 Mon Mar 09 12:56:18 +0000 2020 09/03/2020 12:56:18 NaN \n",
	"1 Mon Mar 09 12:54:11 +0000 2020 09/03/2020 12:54:11 NaN \n",
	"2 Mon Mar 09 12:52:45 +0000 2020 09/03/2020 12:52:45 NaN \n",
	"3 Mon Mar 09 12:18:40 +0000 2020 09/03/2020 12:18:40 NaN \n",
	"4 Mon Mar 09 12:12:46 +0000 2020 09/03/2020 12:12:46 NaN \n",
	"\n",
	" user_lang in_reply_to_user_id_str in_reply_to_screen_name \\\n",
	"0 NaN NaN NaN \n",
	"1 NaN NaN NaN \n",
	"2 NaN NaN NaN \n",
	"3 NaN NaN NaN \n",
	"4 NaN NaN NaN \n",
	"\n",
	" from_user_id_str in_reply_to_status_id_str \\\n",
	"0 2483455146 NaN \n",
	"1 1094939193275543552 NaN \n",
	"2 761744581 NaN \n",
	"3 1324822470 NaN \n",
	"4 733232238092034048 NaN \n",
	"\n",
	" source \\\n",
	"0 <a href=\"https://mobile.twitter.com\" rel=\"nofo... \n",
	"1 <a href=\"https://mobile.twitter.com\" rel=\"nofo... \n",
	"2 <a href=\"https://mobile.twitter.com\" rel=\"nofo... \n",
	"3 <a href=\"https://mobile.twitter.com\" rel=\"nofo... \n",
	"4 <a href=\"https://mobile.twitter.com\" rel=\"nofo... \n",
	"\n",
	" profile_image_url user_followers_count \\\n",
	"0 http://pbs.twimg.com/profile_images/9105249790... 247 \n",
	"1 http://pbs.twimg.com/profile_images/1106452690... 808 \n",
	"2 http://pbs.twimg.com/profile_images/1231656555... 823 \n",
	"3 http://pbs.twimg.com/profile_images/3471590226... 615 \n",
	"4 http://pbs.twimg.com/profile_images/1006139754... 462 \n",
	"\n",
	" user_friends_count user_location \\\n",
	"0 395.0 St Andrews, UK \n",
	"1 395.0 Finland \n",
	"2 454.0 NaN \n",
	"3 494.0 University of Glasgow \n",
	"4 45.0 Helsingin yliopisto \n",
	"\n",
	" status_url \\\n",
	"0 http://twitter.com/kkeenanstA/statuses/1236999... \n",
	"1 http://twitter.com/AaltoResearch/statuses/1236... \n",
	"2 http://twitter.com/udemsar/statuses/1236998358... \n",
	"3 http://twitter.com/jd162a/statuses/12369897819... \n",
	"4 http://twitter.com/helsinkiunipeda/statuses/12... \n",
	"\n",
	" entities_str \n",
	"0 {\"hashtags\":[],\"symbols\":[],\"user_mentions\":[{... \n",
	"1 {\"hashtags\":[],\"symbols\":[],\"user_mentions\":[{... \n",
	"2 {\"hashtags\":[{\"text\":\"EnvironmentallyFriendlyD... \n",
	"3 {\"hashtags\":[],\"symbols\":[],\"user_mentions\":[{... \n",
	"4 {\"hashtags\":[],\"symbols\":[],\"user_mentions\":[{... "
	]
	},
	"execution_count": 104,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"df.head()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 96,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>id_str</th>\n",
	" <th>from_user</th>\n",
	" <th>text</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>1236999252204797953</td>\n",
	" <td>kkeenanstA</td>\n",
	" <td>RT @udemsar: Due to coronavirus, I was a remot...</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>1236998720090181633</td>\n",
	" <td>AaltoResearch</td>\n",
	" <td>RT @rsarvas: Happy International Women's day(a...</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>1236998358922866688</td>\n",
	" <td>udemsar</td>\n",
	" <td>Due to coronavirus, I was a remote oponent in ...</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" id_str from_user \\\n",
	"0 1236999252204797953 kkeenanstA \n",
	"1 1236998720090181633 AaltoResearch \n",
	"2 1236998358922866688 udemsar \n",
	"\n",
	" text \n",
	"0 RT @udemsar: Due to coronavirus, I was a remot... \n",
	"1 RT @rsarvas: Happy International Women's day(a... \n",
	"2 Due to coronavirus, I was a remote oponent in ... "
	]
	},
	"execution_count": 96,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"df.iloc[0:3,0:3]"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## About RE used\n",
	"\n",
	"\n",
	"Documentation from https://docs.python.org/3/library/re.html\n",
	"\n",
	">(?...) is an extension notation. a '?' following a '(' is not meaningful otherwise. \n",
	"The first character after the '?' determines what the meaning and further syntax of the construct is. \n",
	"Extensions usually do not create a new group.\n",
	" \n",
	">(?<=...) is a positive lookbehind assertion. \n",
	"(?<=abc)def will find a match in 'abcdef', since the lookbehind will back up 3 characters \n",
	"and check if the contained pattern matches.\n",
	" \n",
	">(?=\\:) is a lookahead assertion.\n",
	"For example, Isaac (?=Asimov) will match 'Isaac ' only if it’s followed by 'Asimov'.\n",
	"\n",
	"So in the following cell, in re.findall(), that string is matched which appears between RT @ and a semicolon, ie. the Twitter account name. The greedyness of .* is suppressed by a ? (I guess)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## The graph"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 105,
	"metadata": {},
	"outputs": [],
	"source": [
	"G = nx.DiGraph()\n",
	"\n",
	"for tweet in df.iterrows():\n",
	" if tweet[1][2].startswith(\"RT\"):\n",
	" match = re.findall(\"(?<=RT\\s\\@).*?(?=\\:)\", tweet[1][2], re.IGNORECASE)\n",
	" if match:\n",
	" G.add_edge(tweet[1][1], match[0], tweet = tweet[1][2]) # add tweet as an edge attribute "
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# nx.draw_networkx(G, pos=None, with_labels=True) "
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Write to file"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 106,
	"metadata": {},
	"outputs": [],
	"source": [
	"nx.write_gexf(G, \"C:\\\\Users\\\\Me\\\\Desktop\\\\aalto.gexf\")"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Gephi\n",
	"\n",
	"After import to Gephi, you cannot always see a column named tweet in the Data Laboratory. Instead, there might be two Label columns: the first one is empty, and the second one has your tweet text. \n",
	"\n",
	"When you hover over the column header, you see that the first one has id label but the id of the other one is 0.\n",
	"\n",
	"Your task is to copy all values from the second one (id=0) to the first (id=label). When you click the Copy data to other column button, you'll see the tweet column in the list - if not earlier. Choose it. Target is the Label column. When finished, delete the tweet column.\n",
	"\n",
	"Now, after export to a graph file, with the GEXF Viewer, when you click a node you can see the tweet text\n",
	"proper in the side panel, like this https://users.aalto.fi/~sonkkila/unitwnet/aalto_tw/index.html"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.7.3"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}