Created
February 14, 2018 00:40
-
-
Save jwlin/70e6ce90d3a48ff4891c173f873a3438 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"import os" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"col_types = {'Id': 'int', \n", | |
" 'OwnerUserId': 'float', \n", | |
" 'CreationDate': 'str', \n", | |
" 'ParentId': 'int', \n", | |
" 'Score': 'int',\n", | |
" 'Title': 'str',\n", | |
" 'Body':'str'}\n", | |
"\n", | |
"questions = pd.read_csv('/pythonquestions/Questions.csv', encoding = \"ISO-8859-1\", dtype=col_types)\n", | |
"answers = pd.read_csv('/pythonquestions/Answers.csv', encoding = \"ISO-8859-1\", dtype=col_types)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# A brief look of the data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<class 'pandas.core.frame.DataFrame'>\n", | |
"RangeIndex: 607282 entries, 0 to 607281\n", | |
"Data columns (total 6 columns):\n", | |
"Id 607282 non-null int32\n", | |
"OwnerUserId 601070 non-null float64\n", | |
"CreationDate 607282 non-null object\n", | |
"Score 607282 non-null int32\n", | |
"Title 607282 non-null object\n", | |
"Body 607282 non-null object\n", | |
"dtypes: float64(1), int32(2), object(3)\n", | |
"memory usage: 23.2+ MB\n" | |
] | |
} | |
], | |
"source": [ | |
"questions.info()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<class 'pandas.core.frame.DataFrame'>\n", | |
"RangeIndex: 987122 entries, 0 to 987121\n", | |
"Data columns (total 6 columns):\n", | |
"Id 987122 non-null int32\n", | |
"OwnerUserId 981755 non-null float64\n", | |
"CreationDate 987122 non-null object\n", | |
"ParentId 987122 non-null int32\n", | |
"Score 987122 non-null int32\n", | |
"Body 987122 non-null object\n", | |
"dtypes: float64(1), int32(3), object(2)\n", | |
"memory usage: 33.9+ MB\n" | |
] | |
} | |
], | |
"source": [ | |
"answers.info()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Id</th>\n", | |
" <th>OwnerUserId</th>\n", | |
" <th>CreationDate</th>\n", | |
" <th>Score</th>\n", | |
" <th>Title</th>\n", | |
" <th>Body</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>469</td>\n", | |
" <td>147.0</td>\n", | |
" <td>2008-08-02T15:11:16Z</td>\n", | |
" <td>21</td>\n", | |
" <td>How can I find the full path to a font from it...</td>\n", | |
" <td><p>I am using the Photoshop's javascript API t...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>502</td>\n", | |
" <td>147.0</td>\n", | |
" <td>2008-08-02T17:01:58Z</td>\n", | |
" <td>27</td>\n", | |
" <td>Get a preview JPEG of a PDF on Windows?</td>\n", | |
" <td><p>I have a cross-platform (Python) applicatio...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>535</td>\n", | |
" <td>154.0</td>\n", | |
" <td>2008-08-02T18:43:54Z</td>\n", | |
" <td>40</td>\n", | |
" <td>Continuous Integration System for a Python Cod...</td>\n", | |
" <td><p>I'm starting work on a hobby project with a...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>594</td>\n", | |
" <td>116.0</td>\n", | |
" <td>2008-08-03T01:15:08Z</td>\n", | |
" <td>25</td>\n", | |
" <td>cx_Oracle: How do I iterate over a result set?</td>\n", | |
" <td><p>There are several ways to iterate over a re...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>683</td>\n", | |
" <td>199.0</td>\n", | |
" <td>2008-08-03T13:19:16Z</td>\n", | |
" <td>28</td>\n", | |
" <td>Using 'in' to match an attribute of Python obj...</td>\n", | |
" <td><p>I don't remember whether I was dreaming or ...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>742</td>\n", | |
" <td>189.0</td>\n", | |
" <td>2008-08-03T15:55:28Z</td>\n", | |
" <td>30</td>\n", | |
" <td>Class views in Django</td>\n", | |
" <td><p><a href=\"http://www.djangoproject.com/\">Dja...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>766</td>\n", | |
" <td>1384652.0</td>\n", | |
" <td>2008-08-03T17:44:07Z</td>\n", | |
" <td>20</td>\n", | |
" <td>Python and MySQL</td>\n", | |
" <td><p>I can get Python to work with Postgresql bu...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>773</td>\n", | |
" <td>207.0</td>\n", | |
" <td>2008-08-03T18:27:09Z</td>\n", | |
" <td>256</td>\n", | |
" <td>How do I use Python's itertools.groupby()?</td>\n", | |
" <td><p>I haven't been able to find an understandab...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>972</td>\n", | |
" <td>145.0</td>\n", | |
" <td>2008-08-04T02:17:51Z</td>\n", | |
" <td>364</td>\n", | |
" <td>Adding a Method to an Existing Object Instance</td>\n", | |
" <td><p>I've read that it is possible to add a meth...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>1476</td>\n", | |
" <td>92.0</td>\n", | |
" <td>2008-08-04T18:20:36Z</td>\n", | |
" <td>251</td>\n", | |
" <td>How do you express binary literals in Python?</td>\n", | |
" <td><p>How do you express an integer as a binary n...</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Id OwnerUserId CreationDate Score \\\n", | |
"0 469 147.0 2008-08-02T15:11:16Z 21 \n", | |
"1 502 147.0 2008-08-02T17:01:58Z 27 \n", | |
"2 535 154.0 2008-08-02T18:43:54Z 40 \n", | |
"3 594 116.0 2008-08-03T01:15:08Z 25 \n", | |
"4 683 199.0 2008-08-03T13:19:16Z 28 \n", | |
"5 742 189.0 2008-08-03T15:55:28Z 30 \n", | |
"6 766 1384652.0 2008-08-03T17:44:07Z 20 \n", | |
"7 773 207.0 2008-08-03T18:27:09Z 256 \n", | |
"8 972 145.0 2008-08-04T02:17:51Z 364 \n", | |
"9 1476 92.0 2008-08-04T18:20:36Z 251 \n", | |
"\n", | |
" Title \\\n", | |
"0 How can I find the full path to a font from it... \n", | |
"1 Get a preview JPEG of a PDF on Windows? \n", | |
"2 Continuous Integration System for a Python Cod... \n", | |
"3 cx_Oracle: How do I iterate over a result set? \n", | |
"4 Using 'in' to match an attribute of Python obj... \n", | |
"5 Class views in Django \n", | |
"6 Python and MySQL \n", | |
"7 How do I use Python's itertools.groupby()? \n", | |
"8 Adding a Method to an Existing Object Instance \n", | |
"9 How do you express binary literals in Python? \n", | |
"\n", | |
" Body \n", | |
"0 <p>I am using the Photoshop's javascript API t... \n", | |
"1 <p>I have a cross-platform (Python) applicatio... \n", | |
"2 <p>I'm starting work on a hobby project with a... \n", | |
"3 <p>There are several ways to iterate over a re... \n", | |
"4 <p>I don't remember whether I was dreaming or ... \n", | |
"5 <p><a href=\"http://www.djangoproject.com/\">Dja... \n", | |
"6 <p>I can get Python to work with Postgresql bu... \n", | |
"7 <p>I haven't been able to find an understandab... \n", | |
"8 <p>I've read that it is possible to add a meth... \n", | |
"9 <p>How do you express an integer as a binary n... " | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"questions.head(10)\n", | |
"#questions[questions['Score']==0]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Id</th>\n", | |
" <th>OwnerUserId</th>\n", | |
" <th>CreationDate</th>\n", | |
" <th>ParentId</th>\n", | |
" <th>Score</th>\n", | |
" <th>Body</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>497</td>\n", | |
" <td>50.0</td>\n", | |
" <td>2008-08-02T16:56:53Z</td>\n", | |
" <td>469</td>\n", | |
" <td>4</td>\n", | |
" <td><p>open up a terminal (Applications-&gt;Utilit...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>518</td>\n", | |
" <td>153.0</td>\n", | |
" <td>2008-08-02T17:42:28Z</td>\n", | |
" <td>469</td>\n", | |
" <td>2</td>\n", | |
" <td><p>I haven't been able to find anything that d...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>536</td>\n", | |
" <td>161.0</td>\n", | |
" <td>2008-08-02T18:49:07Z</td>\n", | |
" <td>502</td>\n", | |
" <td>9</td>\n", | |
" <td><p>You can use ImageMagick's convert utility f...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>538</td>\n", | |
" <td>156.0</td>\n", | |
" <td>2008-08-02T18:56:56Z</td>\n", | |
" <td>535</td>\n", | |
" <td>23</td>\n", | |
" <td><p>One possibility is Hudson. It's written in...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>541</td>\n", | |
" <td>157.0</td>\n", | |
" <td>2008-08-02T19:06:40Z</td>\n", | |
" <td>535</td>\n", | |
" <td>20</td>\n", | |
" <td><p>We run <a href=\"http://buildbot.net/trac\">B...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>595</td>\n", | |
" <td>116.0</td>\n", | |
" <td>2008-08-03T01:17:36Z</td>\n", | |
" <td>594</td>\n", | |
" <td>25</td>\n", | |
" <td><p>The canonical way is to use the built-in cu...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>660</td>\n", | |
" <td>197.0</td>\n", | |
" <td>2008-08-03T12:09:18Z</td>\n", | |
" <td>535</td>\n", | |
" <td>14</td>\n", | |
" <td><p>Second the Buildbot - Trac integration. You...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>701</td>\n", | |
" <td>111.0</td>\n", | |
" <td>2008-08-03T14:30:50Z</td>\n", | |
" <td>683</td>\n", | |
" <td>3</td>\n", | |
" <td><p>No, you were not dreaming. Python has a pr...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>735</td>\n", | |
" <td>145.0</td>\n", | |
" <td>2008-08-03T15:47:22Z</td>\n", | |
" <td>683</td>\n", | |
" <td>-2</td>\n", | |
" <td><p>I think:</p>\\r\\n\\r\\n<pre><code>#!/bin/pytho...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>745</td>\n", | |
" <td>154.0</td>\n", | |
" <td>2008-08-03T15:59:19Z</td>\n", | |
" <td>683</td>\n", | |
" <td>8</td>\n", | |
" <td><p>Are you looking to get a list of objects th...</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Id OwnerUserId CreationDate ParentId Score \\\n", | |
"0 497 50.0 2008-08-02T16:56:53Z 469 4 \n", | |
"1 518 153.0 2008-08-02T17:42:28Z 469 2 \n", | |
"2 536 161.0 2008-08-02T18:49:07Z 502 9 \n", | |
"3 538 156.0 2008-08-02T18:56:56Z 535 23 \n", | |
"4 541 157.0 2008-08-02T19:06:40Z 535 20 \n", | |
"5 595 116.0 2008-08-03T01:17:36Z 594 25 \n", | |
"6 660 197.0 2008-08-03T12:09:18Z 535 14 \n", | |
"7 701 111.0 2008-08-03T14:30:50Z 683 3 \n", | |
"8 735 145.0 2008-08-03T15:47:22Z 683 -2 \n", | |
"9 745 154.0 2008-08-03T15:59:19Z 683 8 \n", | |
"\n", | |
" Body \n", | |
"0 <p>open up a terminal (Applications->Utilit... \n", | |
"1 <p>I haven't been able to find anything that d... \n", | |
"2 <p>You can use ImageMagick's convert utility f... \n", | |
"3 <p>One possibility is Hudson. It's written in... \n", | |
"4 <p>We run <a href=\"http://buildbot.net/trac\">B... \n", | |
"5 <p>The canonical way is to use the built-in cu... \n", | |
"6 <p>Second the Buildbot - Trac integration. You... \n", | |
"7 <p>No, you were not dreaming. Python has a pr... \n", | |
"8 <p>I think:</p>\\r\\n\\r\\n<pre><code>#!/bin/pytho... \n", | |
"9 <p>Are you looking to get a list of objects th... " | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"answers.head(10)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# A dict from question ID to Answers (ansID, score)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"q_to_a = dict()\n", | |
"for ind, row in answers[['Id', 'ParentId', 'Score']].iterrows():\n", | |
" qid = row['ParentId']\n", | |
" aid = row['Id']\n", | |
" a_score = row['Score']\n", | |
" if qid not in q_to_a:\n", | |
" q_to_a[qid] = [(aid, a_score)]\n", | |
" else:\n", | |
" q_to_a[qid].append((aid, a_score))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# There are 539238 questions with 1+ answers. Tentitively, we gonna work on the questions w/ 4+ answers" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"539238\n", | |
"607282\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"print(len(q_to_a))\n", | |
"print(len(questions['Id']))\n", | |
"set(q_to_a.keys()).issubset(set(questions['Id'].tolist()))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"q_to_a = {k:v for k, v in q_to_a.items() if len(v) > 3}" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Statistics for questions w/ 4+ answers. For example, every question has 5.06 answers in average" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"0 4\n", | |
"1 7\n", | |
"2 8\n", | |
"3 9\n", | |
"4 6\n", | |
"5 9\n", | |
"6 16\n", | |
"7 6\n", | |
"8 4\n", | |
"9 4\n", | |
"dtype: int64\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"count 46569.000000\n", | |
"mean 5.061844\n", | |
"std 1.990798\n", | |
"min 4.000000\n", | |
"25% 4.000000\n", | |
"50% 4.000000\n", | |
"75% 5.000000\n", | |
"max 55.000000\n", | |
"dtype: float64" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"num_ans = pd.Series([len(a) for a in q_to_a.values()])\n", | |
"print(num_ans[:10])\n", | |
"num_ans.describe()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# An instance of the worst and the best answer for a question\n", | |
"\n", | |
"We can see that the body of questions and answers are HTML, so preprocessing/data cleaning for feature extraction is required." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"982 14\n", | |
"984 -6\n", | |
"2982 576\n", | |
"4600 4\n", | |
"22525 3\n", | |
"959064 66\n", | |
"8961717 25\n", | |
"9041763 3\n", | |
"9636303 4\n", | |
"16240409 7\n", | |
"18434150 2\n", | |
"24748849 1\n", | |
"24865663 4\n", | |
"28060251 10\n", | |
"32076685 3\n", | |
"34404761 1\n" | |
] | |
} | |
], | |
"source": [ | |
"for aid, score in q_to_a[972]:\n", | |
" print(aid, score)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Id</th>\n", | |
" <th>OwnerUserId</th>\n", | |
" <th>CreationDate</th>\n", | |
" <th>Score</th>\n", | |
" <th>Title</th>\n", | |
" <th>Body</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>972</td>\n", | |
" <td>145.0</td>\n", | |
" <td>2008-08-04T02:17:51Z</td>\n", | |
" <td>364</td>\n", | |
" <td>Adding a Method to an Existing Object Instance</td>\n", | |
" <td><p>I've read that it is possible to add a meth...</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Id OwnerUserId CreationDate Score \\\n", | |
"8 972 145.0 2008-08-04T02:17:51Z 364 \n", | |
"\n", | |
" Title \\\n", | |
"8 Adding a Method to an Existing Object Instance \n", | |
"\n", | |
" Body \n", | |
"8 <p>I've read that it is possible to add a meth... " | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"questions[questions['Id']==972]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<p>I've read that it is possible to add a method to an existing object (e.g. not in the class definition) in <strong>Python</strong>, I think this is called <em>Monkey Patching</em> (or in some cases <em>Duck Punching</em>). I understand that it's not always a good decision to do so. But, how might one do this?</p>\n", | |
"\n", | |
"<p><strong>UPDATE 8/04/2008 00:21:01 EST:</strong></p>\n", | |
"\n", | |
"<p><a href=\"http://stackoverflow.com/a/982\">That</a> looks like a good answer John Downey, I tried it but it appears that it ends up being not a <em>true</em> method.</p>\n", | |
"\n", | |
"<p>Your example defines the new patch function with an argument of <strong><code>self</code></strong>, but if you write actual code that way, the now patched class method asks for an argument named <code>self</code> (it doesn't automagically recognize it as the object to which it is supposed to bind, which is what would happen if defined within the class definition), meaning you have to call <strong><code>class.patch(obj)</code></strong> instead of just <strong><code>class.patch()</code></strong> if you want the same functionality as a <em>true</em> method.</p>\n", | |
"\n", | |
"<p><strong>It looks like Python isn't really treating it as a method, but more just as a variable which happens to be a function</strong> (and as such is callable). Is there any way to attach an actual method to a class?</p>\n", | |
"\n", | |
"<p>Oh, and Ryan, <a href=\"http://pypi.python.org/pypi/monkey\">that</a> isn't exactly what I was looking for (it isn't a builtin functionality), but it is quite cool nonetheless.</p>\n" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"from IPython.core.display import display, HTML\n", | |
"display(HTML(questions[questions['Id']==972].iloc[0]['Body']))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Id</th>\n", | |
" <th>OwnerUserId</th>\n", | |
" <th>CreationDate</th>\n", | |
" <th>ParentId</th>\n", | |
" <th>Score</th>\n", | |
" <th>Body</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td>984</td>\n", | |
" <td>122.0</td>\n", | |
" <td>2008-08-04T02:36:40Z</td>\n", | |
" <td>972</td>\n", | |
" <td>-6</td>\n", | |
" <td><p>I don't know Python syntax, but I know Ruby...</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Id OwnerUserId CreationDate ParentId Score \\\n", | |
"16 984 122.0 2008-08-04T02:36:40Z 972 -6 \n", | |
"\n", | |
" Body \n", | |
"16 <p>I don't know Python syntax, but I know Ruby... " | |
] | |
}, | |
"execution_count": 14, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"answers[answers['Id']==984]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Id</th>\n", | |
" <th>OwnerUserId</th>\n", | |
" <th>CreationDate</th>\n", | |
" <th>ParentId</th>\n", | |
" <th>Score</th>\n", | |
" <th>Body</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>37</th>\n", | |
" <td>2982</td>\n", | |
" <td>99.0</td>\n", | |
" <td>2008-08-06T00:33:35Z</td>\n", | |
" <td>972</td>\n", | |
" <td>576</td>\n", | |
" <td><p>In Python, there is a difference between fu...</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Id OwnerUserId CreationDate ParentId Score \\\n", | |
"37 2982 99.0 2008-08-06T00:33:35Z 972 576 \n", | |
"\n", | |
" Body \n", | |
"37 <p>In Python, there is a difference between fu... " | |
] | |
}, | |
"execution_count": 15, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"answers[answers['Id']==2982]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<p>I don't know Python syntax, but I know Ruby can do it, and it is rather trivial. Let's say you want to add a method to Array that prints the length to standard out:</p>\r\n", | |
"\r\n", | |
"<pre><code>class Array<br> def print_length<br> puts length<br> end<br>end<br></code></pre>\r\n", | |
"\r\n", | |
"<p>If you don't want to modify the whole class, you can just add the method to a single instance of the array, and no other arrays will have the method:</p>\r\n", | |
"\r\n", | |
"<pre><code>array = [1, 2, 3]<br>def array.print_length<br> puts length<br>end<br></code></pre>\r\n", | |
"\r\n", | |
"<p>Just be aware of the issues involved in using this feature. Jeff Atwood actually <a href=\"http://www.codinghorror.com/blog/archives/001151.html\" rel=\"nofollow\">wrote about it</a> not too long ago.</p>\n" | |
] | |
} | |
], | |
"source": [ | |
"print(answers[answers['Id']==984].iloc[0]['Body'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<p>I don't know Python syntax, but I know Ruby can do it, and it is rather trivial. Let's say you want to add a method to Array that prints the length to standard out:</p>\r\n", | |
"\r\n", | |
"<pre><code>class Array<br> def print_length<br> puts length<br> end<br>end<br></code></pre>\r\n", | |
"\r\n", | |
"<p>If you don't want to modify the whole class, you can just add the method to a single instance of the array, and no other arrays will have the method:</p>\r\n", | |
"\r\n", | |
"<pre><code>array = [1, 2, 3]<br>def array.print_length<br> puts length<br>end<br></code></pre>\r\n", | |
"\r\n", | |
"<p>Just be aware of the issues involved in using this feature. Jeff Atwood actually <a href=\"http://www.codinghorror.com/blog/archives/001151.html\" rel=\"nofollow\">wrote about it</a> not too long ago.</p>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"display(HTML(answers[answers['Id']==984].iloc[0]['Body']))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<p>In Python, there is a difference between functions and bound methods.</p>\n", | |
"\n", | |
"<pre><code>>>> def foo():\n", | |
"... print \"foo\"\n", | |
"...\n", | |
">>> class A:\n", | |
"... def bar( self ):\n", | |
"... print \"bar\"\n", | |
"...\n", | |
">>> a = A()\n", | |
">>> foo\n", | |
"<function foo at 0x00A98D70>\n", | |
">>> a.bar\n", | |
"<bound method A.bar of <__main__.A instance at 0x00A9BC88>>\n", | |
">>>\n", | |
"</code></pre>\n", | |
"\n", | |
"<p>Bound methods have been \"bound\" (how descriptive) to an instance, and that instance will be passed as the first argument whenever the method is called.</p>\n", | |
"\n", | |
"<p>Callables that are attributes of a class (as opposed to an instance) are still unbound, though, so you can modify the class definition whenever you want:</p>\n", | |
"\n", | |
"<pre><code>>>> def fooFighters( self ):\n", | |
"... print \"fooFighters\"\n", | |
"...\n", | |
">>> A.fooFighters = fooFighters\n", | |
">>> a2 = A()\n", | |
">>> a2.fooFighters\n", | |
"<bound method A.fooFighters of <__main__.A instance at 0x00A9BEB8>>\n", | |
">>> a2.fooFighters()\n", | |
"fooFighters\n", | |
"</code></pre>\n", | |
"\n", | |
"<p>Previously defined instances are updated as well (as long as they haven't overridden the attribute themselves):</p>\n", | |
"\n", | |
"<pre><code>>>> a.fooFighters()\n", | |
"fooFighters\n", | |
"</code></pre>\n", | |
"\n", | |
"<p>The problem comes when you want to attach a method to a single instance:</p>\n", | |
"\n", | |
"<pre><code>>>> def barFighters( self ):\n", | |
"... print \"barFighters\"\n", | |
"...\n", | |
">>> a.barFighters = barFighters\n", | |
">>> a.barFighters()\n", | |
"Traceback (most recent call last):\n", | |
" File \"<stdin>\", line 1, in <module>\n", | |
"TypeError: barFighters() takes exactly 1 argument (0 given)\n", | |
"</code></pre>\n", | |
"\n", | |
"<p>The function is not automatically bound when it's attached directly to an instance:</p>\n", | |
"\n", | |
"<pre><code>>>> a.barFighters\n", | |
"<function barFighters at 0x00A98EF0>\n", | |
"</code></pre>\n", | |
"\n", | |
"<p>To bind it, we can use the <a href=\"http://docs.python.org/library/types.html?highlight=methodtype#module-types\">MethodType function in the types module</a>:</p>\n", | |
"\n", | |
"<pre><code>>>> import types\n", | |
">>> a.barFighters = types.MethodType( barFighters, a )\n", | |
">>> a.barFighters\n", | |
"<bound method ?.barFighters of <__main__.A instance at 0x00A9BC88>>\n", | |
">>> a.barFighters()\n", | |
"barFighters\n", | |
"</code></pre>\n", | |
"\n", | |
"<p>This time other instances of the class have not been affected:</p>\n", | |
"\n", | |
"<pre><code>>>> a2.barFighters()\n", | |
"Traceback (most recent call last):\n", | |
" File \"<stdin>\", line 1, in <module>\n", | |
"AttributeError: A instance has no attribute 'barFighters'\n", | |
"</code></pre>\n", | |
"\n", | |
"<p>More information can be found by reading about <a href=\"http://users.rcn.com/python/download/Descriptor.htm\">descriptors</a> and <a href=\"http://www.onlamp.com/pub/a/python/2003/04/17/metaclasses.html\">metaclass</a> <a href=\"http://www.gnosis.cx/publish/programming/metaclass_2.html\">programming</a>.</p>\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"print(answers[answers['Id']==2982 ].iloc[0]['Body'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<p>In Python, there is a difference between functions and bound methods.</p>\n", | |
"\n", | |
"<pre><code>>>> def foo():\n", | |
"... print \"foo\"\n", | |
"...\n", | |
">>> class A:\n", | |
"... def bar( self ):\n", | |
"... print \"bar\"\n", | |
"...\n", | |
">>> a = A()\n", | |
">>> foo\n", | |
"<function foo at 0x00A98D70>\n", | |
">>> a.bar\n", | |
"<bound method A.bar of <__main__.A instance at 0x00A9BC88>>\n", | |
">>>\n", | |
"</code></pre>\n", | |
"\n", | |
"<p>Bound methods have been \"bound\" (how descriptive) to an instance, and that instance will be passed as the first argument whenever the method is called.</p>\n", | |
"\n", | |
"<p>Callables that are attributes of a class (as opposed to an instance) are still unbound, though, so you can modify the class definition whenever you want:</p>\n", | |
"\n", | |
"<pre><code>>>> def fooFighters( self ):\n", | |
"... print \"fooFighters\"\n", | |
"...\n", | |
">>> A.fooFighters = fooFighters\n", | |
">>> a2 = A()\n", | |
">>> a2.fooFighters\n", | |
"<bound method A.fooFighters of <__main__.A instance at 0x00A9BEB8>>\n", | |
">>> a2.fooFighters()\n", | |
"fooFighters\n", | |
"</code></pre>\n", | |
"\n", | |
"<p>Previously defined instances are updated as well (as long as they haven't overridden the attribute themselves):</p>\n", | |
"\n", | |
"<pre><code>>>> a.fooFighters()\n", | |
"fooFighters\n", | |
"</code></pre>\n", | |
"\n", | |
"<p>The problem comes when you want to attach a method to a single instance:</p>\n", | |
"\n", | |
"<pre><code>>>> def barFighters( self ):\n", | |
"... print \"barFighters\"\n", | |
"...\n", | |
">>> a.barFighters = barFighters\n", | |
">>> a.barFighters()\n", | |
"Traceback (most recent call last):\n", | |
" File \"<stdin>\", line 1, in <module>\n", | |
"TypeError: barFighters() takes exactly 1 argument (0 given)\n", | |
"</code></pre>\n", | |
"\n", | |
"<p>The function is not automatically bound when it's attached directly to an instance:</p>\n", | |
"\n", | |
"<pre><code>>>> a.barFighters\n", | |
"<function barFighters at 0x00A98EF0>\n", | |
"</code></pre>\n", | |
"\n", | |
"<p>To bind it, we can use the <a href=\"http://docs.python.org/library/types.html?highlight=methodtype#module-types\">MethodType function in the types module</a>:</p>\n", | |
"\n", | |
"<pre><code>>>> import types\n", | |
">>> a.barFighters = types.MethodType( barFighters, a )\n", | |
">>> a.barFighters\n", | |
"<bound method ?.barFighters of <__main__.A instance at 0x00A9BC88>>\n", | |
">>> a.barFighters()\n", | |
"barFighters\n", | |
"</code></pre>\n", | |
"\n", | |
"<p>This time other instances of the class have not been affected:</p>\n", | |
"\n", | |
"<pre><code>>>> a2.barFighters()\n", | |
"Traceback (most recent call last):\n", | |
" File \"<stdin>\", line 1, in <module>\n", | |
"AttributeError: A instance has no attribute 'barFighters'\n", | |
"</code></pre>\n", | |
"\n", | |
"<p>More information can be found by reading about <a href=\"http://users.rcn.com/python/download/Descriptor.htm\">descriptors</a> and <a href=\"http://www.onlamp.com/pub/a/python/2003/04/17/metaclasses.html\">metaclass</a> <a href=\"http://www.gnosis.cx/publish/programming/metaclass_2.html\">programming</a>.</p>\n" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"display(HTML(answers[answers['Id']==2982 ].iloc[0]['Body']))" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment