Last active
June 10, 2023 17:07
-
-
Save bGZo/f3c4876e230308fc3d2b2bc8db9dd55e to your computer and use it in GitHub Desktop.
Get the content of Twitter/Mastodon with logseq format. The former service by tweetpik.com. The latter service by official API.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import requests | |
import re | |
import argparse | |
from argparse import RawTextHelpFormatter | |
from datetime import datetime | |
from markdownify import markdownify as md | |
TWITTER_TEMPLATE = ("- #+BEGIN_QUOTE\n" | |
"{}\n" | |
"โ {} [{}](https://twitter.com/{}/status/{})\n" | |
"โค๏ธ {} ๐ {} ๐ฌ {}\n" | |
"#+END_QUOTE\n") | |
MASTODON_TEMPLATE = ( "- #+BEGIN_QUOTE\n" | |
"{}\n" | |
"โ {} [{}]({})\n" | |
"#+END_QUOTE'\n") | |
def from_twitter(line): | |
base_url = 'https://tweetpik.com/api/v2/tweets?url=' | |
url = base_url + format_link(line) | |
try: | |
res = requests.get(url, headers={ | |
'Accept-Encoding': 'gzip, deflate', | |
"Accept": "application/json", | |
'Connection': 'keep-alive', | |
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36 Edg/94.0.992.38', | |
"Referer": "https://tweetpik.com/" | |
}) | |
responseTxt = format_response(res.text) | |
res = json.loads(responseTxt)[0] | |
except Exception as e: | |
print(e) | |
print(responseTxt) | |
print("Handle " + url + " occur error") | |
return "" | |
tweetContent = md(res['textHtml']) | |
if 'photos' in res: | |
for media in res['photos']: | |
tweetContent += "\n".format(media) | |
tweet = TWITTER_TEMPLATE.format( | |
tweetContent, | |
md(res['nameHtml']), | |
str(datetime.strptime(res['datetime'], '%Y-%m-%dT%H:%M:%S.%fZ').strftime('%Y%m%d, %H:%M:%S')), | |
res['handler'], res['id'], | |
str(res['likes']), str(res['retweets']), str(res['replies'])) | |
with open("backup_res_twitter.json", "a", encoding='UTF-8') as f: | |
f.write(str(res)+"\n") | |
print ("Handle Tweets {} Done. Congardulations! ๐".format(url)) | |
return tweet | |
def from_mastodon(line): | |
split_line = format_link(line).split('/') | |
url = 'https://' + split_line[2] + '/api/v1/statuses/' + split_line[4] | |
try: | |
res = requests.get(url, headers={ | |
'Accept-Encoding': 'gzip, deflate', | |
'Accept': '*/*', | |
'Connection': 'keep-alive', | |
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36 Edg/94.0.992.38', | |
}).json() | |
except Exception as e: | |
print(e) | |
print("Handle " + line + " occur error") | |
return "" | |
post_content = format_response(md(res['content'])) | |
if 'media_attachments' in res: | |
for media in res['media_attachments']: | |
post_content += "\n".format(media['url']) | |
post = TEMPLATE_MASTODON.format( | |
post_content, | |
res['account']['username'], res['created_at'], res['url']) | |
with open("backup_res_mastodon.json", "a", encoding='UTF-8') as f: | |
f.write(str(res)+"\n") | |
print ("Handle Mastodon {} Done. Congardulations! ๐".format(res['url'])) | |
return post | |
def format_link(link): | |
link = re.sub("\?s=\d+&t=\w+\n*", "", link) | |
link = re.sub(" ", "", link) | |
link = re.sub("-", "", link) | |
return link | |
def format_response(response): | |
response = re.sub(r'\n', '', response) | |
response = re.sub(r'\n\n', '\n', response) | |
response = re.sub(r'\\"', '\'', response) | |
return response | |
def get_info(file): | |
posts_collection = [] | |
for line in file: | |
if(re.search(r"twitter\.com", line)): | |
# print("twitter") | |
posts_collection.append(from_twitter(line)) | |
else: | |
# print("mastodon") | |
posts_collection.append(from_mastodon(line)) | |
return posts_collection | |
def output(file_name, lines): | |
with open(file_name, 'w', encoding='UTF-8') as f: | |
for line in lines: | |
f.write(line) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(prog="get_twitter_mastodon_content_with_logseq_format.py", | |
description="Get the content of post of Twitter/Mastodon meantime.\n\nThe formar depends \ | |
the API v2 by http://tweetpik.com, the latter use the Offical API.\nThe script need the \ | |
input file(one link each line), with option output, which will\noverwrite the input file \ | |
by default. Besises, the whole success responses will be\nsaved in backup_res_xxx.json file", | |
formatter_class=RawTextHelpFormatter) | |
parser.add_argument("file", type=argparse.FileType('r', encoding='UTF-8'), | |
help="Add the source url links file to handle") | |
parser.add_argument("-o", "--output", action="store", | |
help = "Flag this would output instead of default file(output.md)") | |
args = parser.parse_args() | |
if args.file: | |
response = get_info(args.file) | |
if args.output is not None: | |
output(args.output, response) | |
else: | |
output(args.file.name, response) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
requests | |
markdownify |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Quick Start
$ pip3 install --trusted-host pypi.tuna.tsinghua.edu.cn -i https://pypi.tuna.tsinghua.edu.cn/simple -r .\requirements.txt $ python3 get_twitter_mastodon_content_with_logseq_format.py input.file $ python3 get_twitter_mastodon_content_with_logseq_format.py input.file -o output.file
Usage
TODOs