Created
July 6, 2023 19:16
-
-
Save stuartlangridge/20ffe860fee0ecc315d3878c1ea77c35 to your computer and use it in GitHub Desktop.
Python file to read the Bluesky ("atproto") firehose and show the json of each message. (Doesn't contain the author, because who knows how the hell to do that.)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from atproto.firehose import FirehoseSubscribeReposClient, parse_subscribe_repos_message | |
import sys | |
from atproto import CAR, models | |
from atproto.cbor import decode_dag, decode_dag_multi | |
from atproto.xrpc_client.models.utils import get_or_create | |
import json | |
class JSONExtra(json.JSONEncoder): | |
"""raw objects sometimes contain CID() objects, which | |
seem to be references to something elsewhere in bluesky. | |
So, we 'serialise' these as a string representation, | |
which is a hack but whatevAAAAR""" | |
def default(self, obj): | |
try: | |
result = json.JSONEncoder.default(self, obj) | |
return result | |
except: | |
return repr(obj) | |
client = FirehoseSubscribeReposClient() | |
# all of this undocumented horseshit is based on cargo-culting the bollocks out of | |
# https://github.com/MarshalX/atproto/blob/main/examples/firehose/sub_repos.py | |
# and | |
# https://github.com/MarshalX/bluesky-feed-generator/blob/main/server/data_stream.py | |
def on_message_handler(message): | |
commit = parse_subscribe_repos_message(message) | |
if not isinstance(commit, models.ComAtprotoSyncSubscribeRepos.Commit): | |
return | |
car = CAR.from_bytes(commit.blocks) | |
for op in commit.ops: | |
if op.action in ["create"] and op.cid: | |
raw = car.blocks.get(op.cid) | |
cooked = get_or_create(raw, strict=False) | |
if cooked._type == "app.bsky.feed.post": | |
# other types include "app.bsky.feed.like" etc which we ignore | |
# note that this data does not include who posted this skeet | |
# or possibly it does as a "CID" which you have to look up somehow | |
# who the hell knows? not me | |
print(json.dumps(raw, cls=JSONExtra, indent=2)) | |
client.start(on_message_handler) |
Here is a (mostly unchanged) version for 0.0.56 with the updated dependency locations and methods.
import json
from atproto_client.models import get_or_create
from atproto import CAR, models
from atproto_firehose import FirehoseSubscribeReposClient, parse_subscribe_repos_message
class JSONExtra(json.JSONEncoder):
"""raw objects sometimes contain CID() objects, which
seem to be references to something elsewhere in bluesky.
So, we 'serialise' these as a string representation,
which is a hack but whatevAAAAR"""
def default(self, obj):
try:
result = json.JSONEncoder.default(self, obj)
return result
except:
return repr(obj)
client = FirehoseSubscribeReposClient()
# all of this undocumented horseshit is based on cargo-culting the bollocks out of
# https://github.com/MarshalX/atproto/blob/main/examples/firehose/sub_repos.py
# and
# https://github.com/MarshalX/bluesky-feed-generator/blob/main/server/data_stream.py
def on_message_handler(message):
commit = parse_subscribe_repos_message(message)
if not isinstance(commit, models.ComAtprotoSyncSubscribeRepos.Commit):
return
car = CAR.from_bytes(commit.blocks)
for op in commit.ops:
if op.action in ["create"] and op.cid:
raw = car.blocks.get(op.cid)
cooked = get_or_create(raw, strict=False)
if cooked.py_type == "app.bsky.feed.post":
# other types include "app.bsky.feed.like" etc which we ignore
# note that this data does not include who posted this skeet
# or possibly it does as a "CID" which you have to look up somehow
# who the hell knows? not me
print(json.dumps(raw, cls=JSONExtra, indent=2))
client.start(on_message_handler)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
glorious, i think