Skip to content

Instantly share code, notes, and snippets.

@n2nco
Last active June 3, 2023 17:33
Show Gist options
  • Save n2nco/99435ab1c6a4ee80efc384558c9c6c52 to your computer and use it in GitHub Desktop.
Save n2nco/99435ab1c6a4ee80efc384558c9c6c52 to your computer and use it in GitHub Desktop.
Webdrive twitter notifications visually - Reply w text & media files - Uses OCR, LLM text structuring & PyAutoGUI
from PIL import Image, ImageEnhance, ImageFilter
from selenium import webdriver
import time
import json
import pytesseract
from pytesseract import Output
import pyautogui
from Foundation import NSData
from AppKit import NSPasteboardTypePNG, NSPasteboardTypeTIFF, NSPasteboard
import subprocess
import openai
# First run > "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" --remote-debugging-port=9222
# Reccomend closing all chrome tabs & windows other than this one ^
# Then use that debugger-connected browser image.pngwindow to log in & navigate to twitter.com/notifications - have this as the only window open on primary screen (e.g. laptop)
# Note: solely tested on MacOS (pyautogui coordinates differ on MacOS)
# Note: for efficient reactivity, can use a MutationObserver to trigger OCR on new element (tweet noti) added to XPATH: '//*[@id="react-root"]/div/div/div[2]/main/div/div/div/div[1]/div/div[3]/section/div/div/div',
MODEL = "gpt-3.5-turbo"
OCR_TEXT_PROMPT = """
Transform the provided OCR text from Twitter into a structured JSON format similar to the given JSON examples:
Target Structure:
[
{
"name": "A Twitter user's name",
"username": "@TwitterUserHandle",
“type”: “mentioned“
"text": "The content of a tweet or a message directly to @langwallet"
},
{
"name": "Another Twitter user's name",
"username": "@AnotherTwitterUserHandle",
“type”: “ mentioned“
"text": "The content of another tweet or a message directly to @langwallet. Including the account mentioned @‘
},
{
"name": "A Twitter user's name",
"username": "@twitteruserhandle",
"type": "like",
"text": "liked a Tweet you were mentioned in"
},
]
Example:
'VW Notifications > Q__ Search Twitter\nAll Verified Mentions °\nfe} Home What’s happening\n+ Explore Bay langdao @langDAOai - 19s see MLB - 2 hours ago\np @langwallet show me my token holdings rhesesastiooinne\n1) vd 9 &\n@ Notifications\nVW There was a login to your account @langwallet from a new device on #SpiderVerset®\nJun 01, 2023. Review it now. Now Playing exclusively in movie theaters.\nf&1 Messages\n@ Promoted by Sony Pictures Canada\n@ -~\na@ |\n(=) Lists Technology - Trending\nApheliOn.lens / .eth (knob/head) followed you #GPT4\n6,623 Tweets\nA Bookmarks yw There was a login to your account @langwallet from a new device on _\nJun 01, 2023. Review it now. Trending in Canada\n0 Dave\nTop Articles eS ue 611M Tweets\n. . . Trending in Canada\n© Verified Orgs uncensored @ liked a Tweet you were mentioned in Novant\nPutting this one on your radar @langwallet. A wallet that integrates Al to 6,131 Tweets\na Profile make it so simple that a layman can use it. Great use of Al, if the wallet is\nuser friendly, secure and simple, it could pickup. Show more\n©) More a Cy\nWho to follow\n\nantsinurpantz followed you\nCet Av eZ\ne b\na @ x @gaby_goldberg\nManO0Op followed you\n\nLangWallet @ . ey\n@langwallet @ me ‘a7 Messages\n\n»\n'
Response:
[
{
"name": "langdao",
"username": "@langDAOai",
"text": "@langwallet show me my token holdings",
“type”: “mentioned“
},
.... etc.
]
"""
def get_tweets_w_ocr(latest_only=False):
chrome_options = webdriver.ChromeOptions()
chrome_options.add_experimental_option("debuggerAddress", "127.0.0.1:9222")
global driver
driver = webdriver.Chrome(options=chrome_options)
url = 'https://twitter.com/notifications'
driver.get(url)
print("on twitter notifications tab")
# wait = WebDriverWait(driver, 10)
time.sleep(5) # give it time to load
while True:
try:
driver.save_screenshot('screenshot.png')
im = Image.open('screenshot.png')
text = pytesseract.image_to_string(im, lang='eng')
# tweets = process_text(text) #no longer in use - llm formatting now
print('starting LLM call to format OCR text')
json_tweets = call_gpt([{"role": "user", "content": text}, {
"role": "user", "content": OCR_TEXT_PROMPT}])
print('OCR text formatting result: image.png', json_tweets)
# Load the JSON data into a Python object
# Updated to json.loads, not json.load. We also changed the variable to tweets
tweets = json.loads(json_tweets)
# Write JSON data to a file
with open('tweets.json', 'w') as f:
json.dump(tweets, f, indent=4)
# Return tweet(s)
if latest_only:
tweet = tweets[0]
return tweet
else:
return tweets
except Exception as e:
print('Error in webdrive twitter:', e)
time.sleep(60)
driver.refresh()
def handle_tweet(tweet):
#Heres where we make a call to determine the tweet's route through the langwallet backend
#Can call LLM here - e.g. WindowAI_Call with tweet + local data
print('Can call models here')
#Placeholder vars in instead of LLM call
reply = f"placeholder response to: {tweet['username']} who said: {tweet['text']}"
filename = 'screenshot.png'
return [reply, filename] # if no filename return false here
def check_if_latest_tweet_already_processed(tweet):
return False
def perform_action_with_gui(text_to_find=None, action=None, click_offset=0, text_to_type=None):
# Capture the whole screen containing the opean twitter browser that's logged in and on twitter.com/notifications
screenshot = pyautogui.screenshot()
img = screenshot.convert('L') # Directly convert the screenshot to grayscale
img = img.filter(ImageFilter.SHARPEN) # Apply sharpening filter
enhancer = ImageEnhance.Contrast(img)
img = enhancer.enhance(2) # Increase contrast
data = pytesseract.image_to_data(img, output_type=Output.DICT)
# Look for the text and perform the specified action
if action == 'cmd_enter':
pyautogui.hotkey('command', 'return')
for i in range(len(data['text'])):
print(data['text'][i])
if data['text'][i] == text_to_find:
print('Found "@{}" at position: {}, {}'.format(text_to_find,
data['left'][i], data['top'][i]))
if action == 'click':
# Move the mouse to the location of the text
pyautogui.moveTo(
(data['left'][i] / 2) + click_offset, data['top'][i] / 2, duration=0.45)
pyautogui.mouseDown(button='left')
pyautogui.mouseUp()
pyautogui.click()
print('Clicked')
print('clicked')
elif action == 'type':
# Move the mouse to the location of the text
pyautogui.moveTo(data['left'][i] / 2,
data['top'][i] / 2, duration=1)
pyautogui.mouseDown(button='left')
pyautogui.mouseUp()
pyautogui.click()
# Click on the text to focus the input field
# Type the desired text
pyautogui.typewrite(text_to_type)
break # stop after finding the first occurrence
def set_up_reply(username, reply): # include @ e.g. @langwallet
# reply! is a constant on the GUI - it's detected & clicked
# Example: Click on the text
perform_action_with_gui(username, 'click', click_offset=100)
# Example: Click on the text
perform_action_with_gui('reply!', 'type', text_to_type=reply)
perform_action_with_gui('Reply', 'click') # no longer needed
def img_to_clipboard(format="PNG", filename="screenshot.png"):
pasteboard = NSPasteboard.generalPasteboard()
image_data = NSData.dataWithContentsOfFile_(filename)
if format not in ("PNG", "TIFF"):
raise TypeError("Invalid format, must be PNG or TIFF")
format_type = NSPasteboardTypePNG if format == "PNG" else NSPasteboardTypeTIFF
pasteboard.clearContents()
pasteboard.setData_forType_(image_data, format_type)
def paste_macos():
script = """
tell application "System Events"
keystroke "v" using command down
end tell
"""
return subprocess.run(["osascript", "-e", script], capture_output=True, text=True).stdout
def submit_reply():
pyautogui.keyDown('command')
# Simulate pressing Return/Enter key
pyautogui.press('return')
# Release the Command (⌘) key
pyautogui.keyUp('command')
def call_gpt(messages, model=MODEL):
# print("""Inputting {} tokens into {}.""".format(num_tokens_from_messages(systemprompt+userprompt), model))
response = openai.ChatCompletion.create(
model=model,
temperature=0,
messages=messages)
return response["choices"][0]["message"]["content"]
# ENTRY POINT
def entry_point():
# can uncomment the following two lines to solely testresponses:
# for now returning only latest tweet - database & driver cron job not hooked up to this gist
latest_tweet = get_tweets_w_ocr(latest_only=True)
processed_already = check_if_latest_tweet_already_processed(latest_tweet)
#Can test reply portion w static data as follows:
#latest_tweet = {'name': 'langdao', 'username': '@langDAOai',
#'text': '@langwallet show me my wallet', 'type': 'mentioned'}
#processed_already = False
# only applies to mentions in notifications
if not processed_already and latest_tweet["type"] == "mentioned":
[reply, filename] = handle_tweet(latest_tweet)
set_up_reply(latest_tweet["username"], reply)
if filename:
img_to_clipboard(format="PNG", filename=filename)
paste_macos()
print("completed reply image paste")
submit_reply()
print("completed tweet reply")
else:
print("tweet either already processed or not an @mention")
entry_point()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment