Last active
June 3, 2023 17:33
-
-
Save n2nco/99435ab1c6a4ee80efc384558c9c6c52 to your computer and use it in GitHub Desktop.
Webdrive twitter notifications visually - Reply w text & media files - Uses OCR, LLM text structuring & PyAutoGUI
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from PIL import Image, ImageEnhance, ImageFilter | |
from selenium import webdriver | |
import time | |
import json | |
import pytesseract | |
from pytesseract import Output | |
import pyautogui | |
from Foundation import NSData | |
from AppKit import NSPasteboardTypePNG, NSPasteboardTypeTIFF, NSPasteboard | |
import subprocess | |
import openai | |
# First run > "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" --remote-debugging-port=9222 | |
# Reccomend closing all chrome tabs & windows other than this one ^ | |
# Then use that debugger-connected browser image.pngwindow to log in & navigate to twitter.com/notifications - have this as the only window open on primary screen (e.g. laptop) | |
# Note: solely tested on MacOS (pyautogui coordinates differ on MacOS) | |
# Note: for efficient reactivity, can use a MutationObserver to trigger OCR on new element (tweet noti) added to XPATH: '//*[@id="react-root"]/div/div/div[2]/main/div/div/div/div[1]/div/div[3]/section/div/div/div', | |
MODEL = "gpt-3.5-turbo" | |
OCR_TEXT_PROMPT = """ | |
Transform the provided OCR text from Twitter into a structured JSON format similar to the given JSON examples: | |
Target Structure: | |
[ | |
{ | |
"name": "A Twitter user's name", | |
"username": "@TwitterUserHandle", | |
“type”: “mentioned“ | |
"text": "The content of a tweet or a message directly to @langwallet" | |
}, | |
{ | |
"name": "Another Twitter user's name", | |
"username": "@AnotherTwitterUserHandle", | |
“type”: “ mentioned“ | |
"text": "The content of another tweet or a message directly to @langwallet. Including the account mentioned @‘ | |
}, | |
{ | |
"name": "A Twitter user's name", | |
"username": "@twitteruserhandle", | |
"type": "like", | |
"text": "liked a Tweet you were mentioned in" | |
}, | |
] | |
Example: | |
'VW Notifications > Q__ Search Twitter\nAll Verified Mentions °\nfe} Home What’s happening\n+ Explore Bay langdao @langDAOai - 19s see MLB - 2 hours ago\np @langwallet show me my token holdings rhesesastiooinne\n1) vd 9 &\n@ Notifications\nVW There was a login to your account @langwallet from a new device on #SpiderVerset®\nJun 01, 2023. Review it now. Now Playing exclusively in movie theaters.\nf&1 Messages\n@ Promoted by Sony Pictures Canada\n@ -~\na@ |\n(=) Lists Technology - Trending\nApheliOn.lens / .eth (knob/head) followed you #GPT4\n6,623 Tweets\nA Bookmarks yw There was a login to your account @langwallet from a new device on _\nJun 01, 2023. Review it now. Trending in Canada\n0 Dave\nTop Articles eS ue 611M Tweets\n. . . Trending in Canada\n© Verified Orgs uncensored @ liked a Tweet you were mentioned in Novant\nPutting this one on your radar @langwallet. A wallet that integrates Al to 6,131 Tweets\na Profile make it so simple that a layman can use it. Great use of Al, if the wallet is\nuser friendly, secure and simple, it could pickup. Show more\n©) More a Cy\nWho to follow\n\nantsinurpantz followed you\nCet Av eZ\ne b\na @ x @gaby_goldberg\nManO0Op followed you\n\nLangWallet @ . ey\n@langwallet @ me ‘a7 Messages\n\n»\n' | |
Response: | |
[ | |
{ | |
"name": "langdao", | |
"username": "@langDAOai", | |
"text": "@langwallet show me my token holdings", | |
“type”: “mentioned“ | |
}, | |
.... etc. | |
] | |
""" | |
def get_tweets_w_ocr(latest_only=False): | |
chrome_options = webdriver.ChromeOptions() | |
chrome_options.add_experimental_option("debuggerAddress", "127.0.0.1:9222") | |
global driver | |
driver = webdriver.Chrome(options=chrome_options) | |
url = 'https://twitter.com/notifications' | |
driver.get(url) | |
print("on twitter notifications tab") | |
# wait = WebDriverWait(driver, 10) | |
time.sleep(5) # give it time to load | |
while True: | |
try: | |
driver.save_screenshot('screenshot.png') | |
im = Image.open('screenshot.png') | |
text = pytesseract.image_to_string(im, lang='eng') | |
# tweets = process_text(text) #no longer in use - llm formatting now | |
print('starting LLM call to format OCR text') | |
json_tweets = call_gpt([{"role": "user", "content": text}, { | |
"role": "user", "content": OCR_TEXT_PROMPT}]) | |
print('OCR text formatting result: image.png', json_tweets) | |
# Load the JSON data into a Python object | |
# Updated to json.loads, not json.load. We also changed the variable to tweets | |
tweets = json.loads(json_tweets) | |
# Write JSON data to a file | |
with open('tweets.json', 'w') as f: | |
json.dump(tweets, f, indent=4) | |
# Return tweet(s) | |
if latest_only: | |
tweet = tweets[0] | |
return tweet | |
else: | |
return tweets | |
except Exception as e: | |
print('Error in webdrive twitter:', e) | |
time.sleep(60) | |
driver.refresh() | |
def handle_tweet(tweet): | |
#Heres where we make a call to determine the tweet's route through the langwallet backend | |
#Can call LLM here - e.g. WindowAI_Call with tweet + local data | |
print('Can call models here') | |
#Placeholder vars in instead of LLM call | |
reply = f"placeholder response to: {tweet['username']} who said: {tweet['text']}" | |
filename = 'screenshot.png' | |
return [reply, filename] # if no filename return false here | |
def check_if_latest_tweet_already_processed(tweet): | |
return False | |
def perform_action_with_gui(text_to_find=None, action=None, click_offset=0, text_to_type=None): | |
# Capture the whole screen containing the opean twitter browser that's logged in and on twitter.com/notifications | |
screenshot = pyautogui.screenshot() | |
img = screenshot.convert('L') # Directly convert the screenshot to grayscale | |
img = img.filter(ImageFilter.SHARPEN) # Apply sharpening filter | |
enhancer = ImageEnhance.Contrast(img) | |
img = enhancer.enhance(2) # Increase contrast | |
data = pytesseract.image_to_data(img, output_type=Output.DICT) | |
# Look for the text and perform the specified action | |
if action == 'cmd_enter': | |
pyautogui.hotkey('command', 'return') | |
for i in range(len(data['text'])): | |
print(data['text'][i]) | |
if data['text'][i] == text_to_find: | |
print('Found "@{}" at position: {}, {}'.format(text_to_find, | |
data['left'][i], data['top'][i])) | |
if action == 'click': | |
# Move the mouse to the location of the text | |
pyautogui.moveTo( | |
(data['left'][i] / 2) + click_offset, data['top'][i] / 2, duration=0.45) | |
pyautogui.mouseDown(button='left') | |
pyautogui.mouseUp() | |
pyautogui.click() | |
print('Clicked') | |
print('clicked') | |
elif action == 'type': | |
# Move the mouse to the location of the text | |
pyautogui.moveTo(data['left'][i] / 2, | |
data['top'][i] / 2, duration=1) | |
pyautogui.mouseDown(button='left') | |
pyautogui.mouseUp() | |
pyautogui.click() | |
# Click on the text to focus the input field | |
# Type the desired text | |
pyautogui.typewrite(text_to_type) | |
break # stop after finding the first occurrence | |
def set_up_reply(username, reply): # include @ e.g. @langwallet | |
# reply! is a constant on the GUI - it's detected & clicked | |
# Example: Click on the text | |
perform_action_with_gui(username, 'click', click_offset=100) | |
# Example: Click on the text | |
perform_action_with_gui('reply!', 'type', text_to_type=reply) | |
perform_action_with_gui('Reply', 'click') # no longer needed | |
def img_to_clipboard(format="PNG", filename="screenshot.png"): | |
pasteboard = NSPasteboard.generalPasteboard() | |
image_data = NSData.dataWithContentsOfFile_(filename) | |
if format not in ("PNG", "TIFF"): | |
raise TypeError("Invalid format, must be PNG or TIFF") | |
format_type = NSPasteboardTypePNG if format == "PNG" else NSPasteboardTypeTIFF | |
pasteboard.clearContents() | |
pasteboard.setData_forType_(image_data, format_type) | |
def paste_macos(): | |
script = """ | |
tell application "System Events" | |
keystroke "v" using command down | |
end tell | |
""" | |
return subprocess.run(["osascript", "-e", script], capture_output=True, text=True).stdout | |
def submit_reply(): | |
pyautogui.keyDown('command') | |
# Simulate pressing Return/Enter key | |
pyautogui.press('return') | |
# Release the Command (⌘) key | |
pyautogui.keyUp('command') | |
def call_gpt(messages, model=MODEL): | |
# print("""Inputting {} tokens into {}.""".format(num_tokens_from_messages(systemprompt+userprompt), model)) | |
response = openai.ChatCompletion.create( | |
model=model, | |
temperature=0, | |
messages=messages) | |
return response["choices"][0]["message"]["content"] | |
# ENTRY POINT | |
def entry_point(): | |
# can uncomment the following two lines to solely testresponses: | |
# for now returning only latest tweet - database & driver cron job not hooked up to this gist | |
latest_tweet = get_tweets_w_ocr(latest_only=True) | |
processed_already = check_if_latest_tweet_already_processed(latest_tweet) | |
#Can test reply portion w static data as follows: | |
#latest_tweet = {'name': 'langdao', 'username': '@langDAOai', | |
#'text': '@langwallet show me my wallet', 'type': 'mentioned'} | |
#processed_already = False | |
# only applies to mentions in notifications | |
if not processed_already and latest_tweet["type"] == "mentioned": | |
[reply, filename] = handle_tweet(latest_tweet) | |
set_up_reply(latest_tweet["username"], reply) | |
if filename: | |
img_to_clipboard(format="PNG", filename=filename) | |
paste_macos() | |
print("completed reply image paste") | |
submit_reply() | |
print("completed tweet reply") | |
else: | |
print("tweet either already processed or not an @mention") | |
entry_point() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment