Created
May 1, 2021 07:15
-
-
Save Proteusiq/17ef30f8e231f221ef0200642378e0d9 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import httpx\n", | |
"from bs4 import BeautifulSoup\n", | |
"import pandas as pd" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Ethical web scraping\n", | |
"# Read Terms of Use and robots.txt. \n", | |
"# e.g. Goodreads `Use of Our Service` <https://www.goodreads.com/about/terms>\n", | |
"# https://www.goodreads.com/robots.txt\n", | |
"# Identify yourself in user-age and how webmaster can reach-out\n", | |
"# Repect web traffic by slowing down requests\n", | |
"\n", | |
"URI = 'https://www.goodreads.com/review/list/117367560'\n", | |
"params = {'shelf': 'to-read',\n", | |
" 'per_page': 100}\n", | |
"\n", | |
"headers = {'user-agent': 'Prayson W. Daniel: email: prayson***@****.com'}\n", | |
"with httpx.Client(headers=headers) as client:\n", | |
" r = client.get(URI, params=params)\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"soup = BeautifulSoup(r.text, 'lxml')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Using select [CSS selector]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'/review/list/117367560?page=2&shelf=to-read'" | |
] | |
}, | |
"execution_count": 15, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Pagination\n", | |
"# all elements tagged [a] that have an attribute [href] with values containg word page\n", | |
"# and href ends with \"shelf=to-read\"\n", | |
"next_page, = [a.get('href') for a in soup.select('a[href*=\"page=\"] + a[href$=\"shelf=to-read\"]')]\n", | |
"next_page" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# get Prayson's \"Want to Read\" list of books\n", | |
"# all elements with CSS [table] that have an attribute named [id] with value [books]\n", | |
"table, = soup.select('table[id=books]')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# all elements with CSS class attribute named with value [bookalike]\n", | |
"books = table.select('.bookalike') # class=\"bookalike\" => .bookalike\n", | |
" # id=\"rates\" => #rates" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# limit to fist hit, return the value of title from attribute a\n", | |
"titles = [book.select('td[class=\"field title\"]', limit=1)[0].a['title'] \n", | |
" for book in books\n", | |
"]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# select all <a> with href that starts with /author\n", | |
"authors = [book.select('a[href^=\"/author\"]', limit=1)[0].text\n", | |
" for book in books\n", | |
"]\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# select all [span] that are directly within an element named [div], \n", | |
"# but not [span] with title starting with \"Goodreads\"\n", | |
"added_dates = [book.select('div > span[title]:not(span[title^=\"Goodreads\"])')[0].get('title')\n", | |
" for book in books\n", | |
" ]\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"field_names = [\"num_pages\", \"avg_rating\", \"num_ratings\", \"date_pub_edition\"]\n", | |
"\n", | |
"# I know! An overkill usage of list comprehesion :(\n", | |
"multiple_data = [[(book\n", | |
" .select(f'td[class=\"field {field_name}\"]')[0]\n", | |
" .get_text(strip=True, separator=\" \")\n", | |
" .replace(f\"{field_name.replace('_', ' ')}\", \"\")\n", | |
" .strip()\n", | |
" ) for field_name in field_names\n", | |
" ] for book in books]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Data cleaning: Hail Pandas!\n", | |
"\n", | |
"dataf = pd.DataFrame(zip(titles, authors, added_dates, multiple_data),\n", | |
" columns=['title', 'author', 'added_date', 'multiple_data']\n", | |
" )\n", | |
"\n", | |
"dataf =(pd.merge(\n", | |
" dataf, (dataf\n", | |
" .get('multiple_data')\n", | |
" .apply(pd.Series)\n", | |
" .rename(columns={index:value for index, value in enumerate(field_names)})\n", | |
" ),\n", | |
" left_index=True,\n", | |
" right_index=True)\n", | |
" .drop(columns='multiple_data')\n", | |
" .assign(num_pages=lambda d: d['num_pages'].str[:-3],\n", | |
" num_ratings=lambda d: d['num_ratings'].str.replace(',',''))\n", | |
" .astype({\n", | |
" 'author':'category',\n", | |
" 'added_date': 'datetime64[ns]',\n", | |
" 'num_pages':'int32',\n", | |
" 'avg_rating': 'float32',\n", | |
" 'num_ratings': 'int64',\n", | |
" 'date_pub_edition': 'datetime64[ns]'\n", | |
" })\n", | |
")\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>title</th>\n", | |
" <th>author</th>\n", | |
" <th>added_date</th>\n", | |
" <th>num_pages</th>\n", | |
" <th>avg_rating</th>\n", | |
" <th>num_ratings</th>\n", | |
" <th>date_pub_edition</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>The Little Book of Talent: 52 Tips for Improving Your Skills</td>\n", | |
" <td>Coyle, Daniel</td>\n", | |
" <td>2021-03-13</td>\n", | |
" <td>160</td>\n", | |
" <td>4.00</td>\n", | |
" <td>7104</td>\n", | |
" <td>2012-08-21</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>The Talent Code: Unlocking the Secret of Skill in Sports, Art, Music, Math, and Just About Everything Else</td>\n", | |
" <td>Coyle, Daniel</td>\n", | |
" <td>2021-03-13</td>\n", | |
" <td>288</td>\n", | |
" <td>4.05</td>\n", | |
" <td>20789</td>\n", | |
" <td>2009-04-28</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>Culture Code the Exp</td>\n", | |
" <td>Coyle, Daniel</td>\n", | |
" <td>2021-03-13</td>\n", | |
" <td>280</td>\n", | |
" <td>4.31</td>\n", | |
" <td>17010</td>\n", | |
" <td>2018-07-01</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>Misbehaving: The Making of Behavioral Economics</td>\n", | |
" <td>Thaler, Richard H.</td>\n", | |
" <td>2021-02-19</td>\n", | |
" <td>358</td>\n", | |
" <td>4.19</td>\n", | |
" <td>15928</td>\n", | |
" <td>2016-06-14</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>Weapons of Math Destruction: How Big Data Increases Inequality and Threatens Democracy</td>\n", | |
" <td>O'Neil, Cathy</td>\n", | |
" <td>2021-02-09</td>\n", | |
" <td>259</td>\n", | |
" <td>3.88</td>\n", | |
" <td>18107</td>\n", | |
" <td>2016-09-06</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>The Hard Thing About Hard Things: Building a Business When There Are No Easy Answers</td>\n", | |
" <td>Horowitz, Ben</td>\n", | |
" <td>2020-11-27</td>\n", | |
" <td>304</td>\n", | |
" <td>4.24</td>\n", | |
" <td>64435</td>\n", | |
" <td>2014-03-04</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>How Innovation Works: Serendipity, Energy and the Saving of Time</td>\n", | |
" <td>Ridley, Matt</td>\n", | |
" <td>2020-11-27</td>\n", | |
" <td>416</td>\n", | |
" <td>4.16</td>\n", | |
" <td>1224</td>\n", | |
" <td>2020-05-19</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>Say What You Mean: A Mindful Approach to Nonviolent Communication</td>\n", | |
" <td>Sofer, Oren Jay</td>\n", | |
" <td>2020-11-27</td>\n", | |
" <td>272</td>\n", | |
" <td>4.24</td>\n", | |
" <td>908</td>\n", | |
" <td>2018-12-11</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>Creativity: Flow and the Psychology of Discovery and Invention</td>\n", | |
" <td>Csikszentmihalyi, Mihaly</td>\n", | |
" <td>2020-11-27</td>\n", | |
" <td>466</td>\n", | |
" <td>4.08</td>\n", | |
" <td>4963</td>\n", | |
" <td>2009-10-13</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>How to Feed a Dictator: Saddam Hussein, Idi Amin, Enver Hoxha, Fidel Castro, and Pol Pot Through the Eyes of Their Cooks</td>\n", | |
" <td>Szabłowski, Witold</td>\n", | |
" <td>2020-11-27</td>\n", | |
" <td>288</td>\n", | |
" <td>4.19</td>\n", | |
" <td>1366</td>\n", | |
" <td>2020-04-28</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>Win Bigly: Persuasion in a World Where Facts Don't Matter</td>\n", | |
" <td>Adams, Scott</td>\n", | |
" <td>2020-10-30</td>\n", | |
" <td>304</td>\n", | |
" <td>3.74</td>\n", | |
" <td>4236</td>\n", | |
" <td>2017-11-16</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>Impossible to Ignore: Creating Memorable Content to Influence Decisions</td>\n", | |
" <td>Simon, Carmen</td>\n", | |
" <td>2020-10-30</td>\n", | |
" <td>288</td>\n", | |
" <td>3.85</td>\n", | |
" <td>359</td>\n", | |
" <td>2016-05-11</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>The Art of Persuasion: Winning Without Intimidation</td>\n", | |
" <td>Burg, Bob</td>\n", | |
" <td>2020-10-30</td>\n", | |
" <td>240</td>\n", | |
" <td>4.08</td>\n", | |
" <td>465</td>\n", | |
" <td>2011-09-20</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>Scaling Up: How a Few Companies Make It...and Why the Rest Don't (Rockefeller Habits 2.0)</td>\n", | |
" <td>Harnish, Verne</td>\n", | |
" <td>2020-10-30</td>\n", | |
" <td>246</td>\n", | |
" <td>4.21</td>\n", | |
" <td>3639</td>\n", | |
" <td>2014-10-21</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>Fanatical Prospecting: The Ultimate Guide to Opening Sales Conversations and Filling the Pipeline by Leveraging Social Selling, Telephone, Email, Text, and Cold Calling</td>\n", | |
" <td>Blount, Jeb</td>\n", | |
" <td>2020-10-30</td>\n", | |
" <td>304</td>\n", | |
" <td>4.30</td>\n", | |
" <td>2996</td>\n", | |
" <td>2015-10-05</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>Principles: Life and Work</td>\n", | |
" <td>Dalio, Ray</td>\n", | |
" <td>2020-10-30</td>\n", | |
" <td>592</td>\n", | |
" <td>4.10</td>\n", | |
" <td>39262</td>\n", | |
" <td>2017-09-19</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td>Pitch Anything: An Innovative Method for Presenting, Persuading, and Winning the Deal</td>\n", | |
" <td>Klaff, Oren</td>\n", | |
" <td>2020-10-30</td>\n", | |
" <td>225</td>\n", | |
" <td>4.08</td>\n", | |
" <td>8815</td>\n", | |
" <td>2011-02-16</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>17</th>\n", | |
" <td>Way of the Wolf: Become a Master Closer with Straight Line Selling</td>\n", | |
" <td>Belfort, Jordan</td>\n", | |
" <td>2020-10-30</td>\n", | |
" <td>240</td>\n", | |
" <td>4.05</td>\n", | |
" <td>4570</td>\n", | |
" <td>2017-09-26</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18</th>\n", | |
" <td>Exactly What to Say: The Magic Words for Influence and Impact</td>\n", | |
" <td>Jones, Phil M.</td>\n", | |
" <td>2020-10-30</td>\n", | |
" <td>148</td>\n", | |
" <td>3.74</td>\n", | |
" <td>4136</td>\n", | |
" <td>2017-07-26</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>19</th>\n", | |
" <td>Essentialism: The Disciplined Pursuit of Less</td>\n", | |
" <td>McKeown, Greg</td>\n", | |
" <td>2020-10-30</td>\n", | |
" <td>260</td>\n", | |
" <td>4.03</td>\n", | |
" <td>66505</td>\n", | |
" <td>2014-04-15</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>20</th>\n", | |
" <td>Never Eat Alone: And Other Secrets to Success, One Relationship at a Time</td>\n", | |
" <td>Ferrazzi, Keith</td>\n", | |
" <td>2020-10-30</td>\n", | |
" <td>309</td>\n", | |
" <td>3.85</td>\n", | |
" <td>43404</td>\n", | |
" <td>2005-02-22</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>21</th>\n", | |
" <td>Made to Stick: Why Some Ideas Survive and Others Die</td>\n", | |
" <td>Heath, Chip</td>\n", | |
" <td>2020-10-22</td>\n", | |
" <td>291</td>\n", | |
" <td>3.96</td>\n", | |
" <td>79826</td>\n", | |
" <td>2007-01-02</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>22</th>\n", | |
" <td>Atomic Habits: An Easy & Proven Way to Build Good Habits & Break Bad Ones</td>\n", | |
" <td>Clear, James</td>\n", | |
" <td>2020-10-06</td>\n", | |
" <td>319</td>\n", | |
" <td>4.36</td>\n", | |
" <td>197725</td>\n", | |
" <td>2018-10-16</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>23</th>\n", | |
" <td>Built to Last: Successful Habits of Visionary Companies</td>\n", | |
" <td>Collins, James C.</td>\n", | |
" <td>2020-09-17</td>\n", | |
" <td>368</td>\n", | |
" <td>4.02</td>\n", | |
" <td>63281</td>\n", | |
" <td>2004-11-02</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>24</th>\n", | |
" <td>TED Talks: The Official TED Guide to Public Speaking</td>\n", | |
" <td>Anderson, Chris J.</td>\n", | |
" <td>2020-09-17</td>\n", | |
" <td>288</td>\n", | |
" <td>4.07</td>\n", | |
" <td>10901</td>\n", | |
" <td>2016-05-03</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>25</th>\n", | |
" <td>Brands and Bulls**t: Excel at the Former and Avoid the Latter. A Branding Primer for Millennial Marketers in a Digital Age.</td>\n", | |
" <td>Schroeder, Bernhard</td>\n", | |
" <td>2020-09-11</td>\n", | |
" <td>180</td>\n", | |
" <td>3.91</td>\n", | |
" <td>78</td>\n", | |
" <td>2017-10-30</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>26</th>\n", | |
" <td>Fail Fast or Win Big: The Start-Up Plan for Starting Now</td>\n", | |
" <td>Schroeder, Bernhard</td>\n", | |
" <td>2020-09-11</td>\n", | |
" <td>208</td>\n", | |
" <td>3.88</td>\n", | |
" <td>81</td>\n", | |
" <td>2015-02-18</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>27</th>\n", | |
" <td>Scrum: The Art of Doing Twice the Work in Half the Time</td>\n", | |
" <td>Sutherland, Jeff</td>\n", | |
" <td>2020-09-07</td>\n", | |
" <td>237</td>\n", | |
" <td>4.18</td>\n", | |
" <td>16332</td>\n", | |
" <td>2014-09-30</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>28</th>\n", | |
" <td>Crossing the Chasm: Marketing and Selling High-Tech Products to Mainstream Customers</td>\n", | |
" <td>Moore, Geoffrey A.</td>\n", | |
" <td>2020-09-07</td>\n", | |
" <td>211</td>\n", | |
" <td>4.00</td>\n", | |
" <td>24252</td>\n", | |
" <td>2006-07-25</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>29</th>\n", | |
" <td>The Last Girl: My Story of Captivity, and My Fight Against the Islamic State</td>\n", | |
" <td>Murad, Nadia</td>\n", | |
" <td>2020-08-23</td>\n", | |
" <td>12</td>\n", | |
" <td>4.46</td>\n", | |
" <td>16513</td>\n", | |
" <td>2017-11-07</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" title \\\n", | |
"0 The Little Book of Talent: 52 Tips for Improving Your Skills \n", | |
"1 The Talent Code: Unlocking the Secret of Skill in Sports, Art, Music, Math, and Just About Everything Else \n", | |
"2 Culture Code the Exp \n", | |
"3 Misbehaving: The Making of Behavioral Economics \n", | |
"4 Weapons of Math Destruction: How Big Data Increases Inequality and Threatens Democracy \n", | |
"5 The Hard Thing About Hard Things: Building a Business When There Are No Easy Answers \n", | |
"6 How Innovation Works: Serendipity, Energy and the Saving of Time \n", | |
"7 Say What You Mean: A Mindful Approach to Nonviolent Communication \n", | |
"8 Creativity: Flow and the Psychology of Discovery and Invention \n", | |
"9 How to Feed a Dictator: Saddam Hussein, Idi Amin, Enver Hoxha, Fidel Castro, and Pol Pot Through the Eyes of Their Cooks \n", | |
"10 Win Bigly: Persuasion in a World Where Facts Don't Matter \n", | |
"11 Impossible to Ignore: Creating Memorable Content to Influence Decisions \n", | |
"12 The Art of Persuasion: Winning Without Intimidation \n", | |
"13 Scaling Up: How a Few Companies Make It...and Why the Rest Don't (Rockefeller Habits 2.0) \n", | |
"14 Fanatical Prospecting: The Ultimate Guide to Opening Sales Conversations and Filling the Pipeline by Leveraging Social Selling, Telephone, Email, Text, and Cold Calling \n", | |
"15 Principles: Life and Work \n", | |
"16 Pitch Anything: An Innovative Method for Presenting, Persuading, and Winning the Deal \n", | |
"17 Way of the Wolf: Become a Master Closer with Straight Line Selling \n", | |
"18 Exactly What to Say: The Magic Words for Influence and Impact \n", | |
"19 Essentialism: The Disciplined Pursuit of Less \n", | |
"20 Never Eat Alone: And Other Secrets to Success, One Relationship at a Time \n", | |
"21 Made to Stick: Why Some Ideas Survive and Others Die \n", | |
"22 Atomic Habits: An Easy & Proven Way to Build Good Habits & Break Bad Ones \n", | |
"23 Built to Last: Successful Habits of Visionary Companies \n", | |
"24 TED Talks: The Official TED Guide to Public Speaking \n", | |
"25 Brands and Bulls**t: Excel at the Former and Avoid the Latter. A Branding Primer for Millennial Marketers in a Digital Age. \n", | |
"26 Fail Fast or Win Big: The Start-Up Plan for Starting Now \n", | |
"27 Scrum: The Art of Doing Twice the Work in Half the Time \n", | |
"28 Crossing the Chasm: Marketing and Selling High-Tech Products to Mainstream Customers \n", | |
"29 The Last Girl: My Story of Captivity, and My Fight Against the Islamic State \n", | |
"\n", | |
" author added_date num_pages avg_rating num_ratings \\\n", | |
"0 Coyle, Daniel 2021-03-13 160 4.00 7104 \n", | |
"1 Coyle, Daniel 2021-03-13 288 4.05 20789 \n", | |
"2 Coyle, Daniel 2021-03-13 280 4.31 17010 \n", | |
"3 Thaler, Richard H. 2021-02-19 358 4.19 15928 \n", | |
"4 O'Neil, Cathy 2021-02-09 259 3.88 18107 \n", | |
"5 Horowitz, Ben 2020-11-27 304 4.24 64435 \n", | |
"6 Ridley, Matt 2020-11-27 416 4.16 1224 \n", | |
"7 Sofer, Oren Jay 2020-11-27 272 4.24 908 \n", | |
"8 Csikszentmihalyi, Mihaly 2020-11-27 466 4.08 4963 \n", | |
"9 Szabłowski, Witold 2020-11-27 288 4.19 1366 \n", | |
"10 Adams, Scott 2020-10-30 304 3.74 4236 \n", | |
"11 Simon, Carmen 2020-10-30 288 3.85 359 \n", | |
"12 Burg, Bob 2020-10-30 240 4.08 465 \n", | |
"13 Harnish, Verne 2020-10-30 246 4.21 3639 \n", | |
"14 Blount, Jeb 2020-10-30 304 4.30 2996 \n", | |
"15 Dalio, Ray 2020-10-30 592 4.10 39262 \n", | |
"16 Klaff, Oren 2020-10-30 225 4.08 8815 \n", | |
"17 Belfort, Jordan 2020-10-30 240 4.05 4570 \n", | |
"18 Jones, Phil M. 2020-10-30 148 3.74 4136 \n", | |
"19 McKeown, Greg 2020-10-30 260 4.03 66505 \n", | |
"20 Ferrazzi, Keith 2020-10-30 309 3.85 43404 \n", | |
"21 Heath, Chip 2020-10-22 291 3.96 79826 \n", | |
"22 Clear, James 2020-10-06 319 4.36 197725 \n", | |
"23 Collins, James C. 2020-09-17 368 4.02 63281 \n", | |
"24 Anderson, Chris J. 2020-09-17 288 4.07 10901 \n", | |
"25 Schroeder, Bernhard 2020-09-11 180 3.91 78 \n", | |
"26 Schroeder, Bernhard 2020-09-11 208 3.88 81 \n", | |
"27 Sutherland, Jeff 2020-09-07 237 4.18 16332 \n", | |
"28 Moore, Geoffrey A. 2020-09-07 211 4.00 24252 \n", | |
"29 Murad, Nadia 2020-08-23 12 4.46 16513 \n", | |
"\n", | |
" date_pub_edition \n", | |
"0 2012-08-21 \n", | |
"1 2009-04-28 \n", | |
"2 2018-07-01 \n", | |
"3 2016-06-14 \n", | |
"4 2016-09-06 \n", | |
"5 2014-03-04 \n", | |
"6 2020-05-19 \n", | |
"7 2018-12-11 \n", | |
"8 2009-10-13 \n", | |
"9 2020-04-28 \n", | |
"10 2017-11-16 \n", | |
"11 2016-05-11 \n", | |
"12 2011-09-20 \n", | |
"13 2014-10-21 \n", | |
"14 2015-10-05 \n", | |
"15 2017-09-19 \n", | |
"16 2011-02-16 \n", | |
"17 2017-09-26 \n", | |
"18 2017-07-26 \n", | |
"19 2014-04-15 \n", | |
"20 2005-02-22 \n", | |
"21 2007-01-02 \n", | |
"22 2018-10-16 \n", | |
"23 2004-11-02 \n", | |
"24 2016-05-03 \n", | |
"25 2017-10-30 \n", | |
"26 2015-02-18 \n", | |
"27 2014-09-30 \n", | |
"28 2006-07-25 \n", | |
"29 2017-11-07 " | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"with pd.option_context('display.max_colwidth', -1):\n", | |
" display(dataf)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Scrapper", | |
"language": "python", | |
"name": "scrapper" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment