Skip to content

Instantly share code, notes, and snippets.

@dtmsecurity
Created July 27, 2020 07:41
Show Gist options
  • Save dtmsecurity/9c05890da66480e5291b116adb63c473 to your computer and use it in GitHub Desktop.
Save dtmsecurity/9c05890da66480e5291b116adb63c473 to your computer and use it in GitHub Desktop.
Integrate URLs scraped from liked tweets and Notion using the unofficial API
import urllib
from bs4 import BeautifulSoup
import tweepy
from urlextract import URLExtract
from notion.client import NotionClient
from notion.block import TodoBlock, BookmarkBlock
import os
from unshortenit import UnshortenIt
class GoldFermi:
def __init__(self, twitter_details, notion_details, url_file, reverse=True, verbose=False):
self.verbose = verbose
if self.verbose:
print("[*] Authenticate to Twitter")
self.auth = tweepy.OAuthHandler(twitter_details["consumer_key"], twitter_details["consumer_secret"])
self.auth.set_access_token(twitter_details["access_token"], twitter_details["access_token_secret"])
self.twitter_api = tweepy.API(self.auth)
self.twitter_screen_name = twitter_details["screen_name"]
self.twitter_tweet_limit = twitter_details["tweet_limit"]
if self.verbose:
print("[*] Authenticate to Notion")
self.notion_client = NotionClient(token_v2=notion_details["token_v2"])
self.notion_page = self.notion_client.get_block(notion_details["notion_page"])
if self.verbose:
print("[*] Target Notion page {}".format(self.notion_page))
self.url_file = url_file
if self.verbose:
print("[*] URL cache file {}".format(self.url_file))
self.reverse = reverse
if self.verbose and self.reverse:
print("[*] New send new URLs to top of Notion page")
def get_liked_tweets(self, tweet_limit):
if self.verbose:
print("[*] Get last {} tweets for {}".format(self.twitter_tweet_limit, self.twitter_screen_name))
return tweepy.Cursor(self.twitter_api.favorites, id=self.twitter_screen_name).items(tweet_limit)
def expand_tweet_text(self, tweet_id):
status = self.twitter_api.get_status(tweet_id, tweet_mode="extended")
try:
tweet_text = str(status.retweeted_status.full_text)
except AttributeError: # Not a Retweet
tweet_text = str(status.full_text)
return str(tweet_text)
def extract_urls(self, text):
extractor = URLExtract()
return extractor.find_urls(text)
def get_html_title(self, html):
try:
soup = BeautifulSoup(html, 'html.parser')
return soup.find('title').string
except:
return ""
def get_redirect(self, url):
if self.verbose:
print("[-] Unshorten {}".format(url))
unshortener = UnshortenIt(default_timeout=5)
return unshortener.unshorten(url)
def get_url_data(self, url):
redirect_url = self.get_redirect(url)
url_data = dict()
try:
opener = urllib.request.build_opener()
hdr = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36'}
if self.verbose:
print("[-] Visiting {}".format(redirect_url))
request = urllib.request.Request(redirect_url, headers=hdr)
u = opener.open(request)
url_data["expanded_url"] = str(u.geturl())
if self.verbose:
print("[-] Expanded url is {}".format(url_data["expanded_url"]))
url_data["html"] = u.read().decode()
url_data["page_title"] = self.get_html_title(url_data["html"])
if self.verbose:
print("[-] Page title is {}".format(url_data["page_title"]))
return url_data
except Exception as e:
#print(e)
pass
def save_link_to_notion_page(self, url, title, description):
new_todo = self.notion_page.children.add_new(TodoBlock, title=title)
new_todo.children.add_new(BookmarkBlock, title=title, link=url, description=description)
if self.reverse:
new_todo.move_to(self.notion_page, "first-child")
def url_exists(self, url):
if os.path.isfile(self.url_file):
with open(self.url_file, "r") as url_file_fh:
urls_in_file = url_file_fh.readlines()
for url_in_file in urls_in_file:
if url == url_in_file.strip():
return True
with open(self.url_file, "a+") as url_file_fh:
url_file_fh.write("{}\n".format(url))
return False
def process_liked_tweets(self):
for liked_tweet in self.get_liked_tweets(self.twitter_tweet_limit):
liked_tweet_id = liked_tweet.id
liked_tweet_text = self.expand_tweet_text(liked_tweet_id)
liked_tweet_screen_name = liked_tweet.user.screen_name
liked_tweet_description = "@{} - {}".format(liked_tweet_screen_name, liked_tweet_text)
if self.verbose:
print("[*] Tweet: {}".format(liked_tweet_description))
for url in self.extract_urls(liked_tweet_text):
try:
url_data = self.get_url_data(url)
if self.verbose:
print(" [*] URL: {} - {}".format(url_data["expanded_url"], url_data["page_title"]))
if "twitter.com" in url_data["expanded_url"].lower():
if self.verbose:
print(" [*] Ignoring as is to twitter.com")
continue
if self.url_exists(url_data["expanded_url"]):
if self.verbose:
print(" [*] Ignoring as already in cache")
continue
if self.verbose:
print(" [*] Adding to Notion")
self.save_link_to_notion_page(url_data["expanded_url"],url_data["page_title"], liked_tweet_description)
except:
pass
if __name__ == '__main__':
url_file_config = "urls.txt"
twitter_details_config = {
"consumer_key": "",
"consumer_secret": "",
"access_token": "",
"access_token_secret": "",
"screen_name": "@<twitter handle>",
"tweet_limit": 10
}
notion_details_config = {
"token_v2": "<notion cookie>",
"notion_page": "https://www.notion.so/<page>"
}
g = GoldFermi(twitter_details_config, notion_details_config, url_file_config, verbose=True)
g.process_liked_tweets()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment