Last active
May 6, 2022 14:43
-
-
Save lemon24/b7c4039ee6657ebb2347b5e338a0dca7 to your computer and use it in GitHub Desktop.
Twitter reader plugin prototype for https://github.com/lemon24/reader/issues/271
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Prototype showing how a Twitter reader plugin could work, based on | |
https://github.com/lemon24/reader/issues/271#issuecomment-1111789547 | |
FooRetriever and FakeParser are simplified versions of the actual thing. | |
Typing left as an exercise for the reader (won't be straightforward). | |
UpdaterWrapper is needed because we can't choose updaters | |
(like we do with parsers and mime types); we'd likely use feed.version. | |
Of note: | |
* The "enrich" bits are in updater just because it was easy to add them there. | |
It should be possible to tell: | |
* core/storage: "I need some extra data in feed for update" | |
* storage: "I need some extra entry data in entries for update" | |
* The foo updater shares some logic with reader._updater: | |
* "not modified" early exit | |
* no entries but had error exit | |
* ... but it does *not* share other logic: | |
* stale feeds (?) | |
* entry data + entry for update (old entry) merging | |
* fancy "should I update feed/entry" logic | |
""" | |
from contextlib import contextmanager | |
from dataclasses import dataclass | |
from reader import make_reader, Content | |
from reader._parser import RetrieveResult | |
from reader._types import FeedData, EntryData, FeedUpdateIntent, EntryUpdateIntent | |
TYPE = 'application/x.foo' | |
class FooRetriever: | |
slow_to_read = False | |
@contextmanager | |
def __call__(self, url, http_etag=None, *args): | |
print('retriever got http_etag', http_etag) | |
etag, recent_entries, api_key = http_etag | |
etag = int(etag) if etag else None | |
# fake some results | |
if not etag: | |
data = {1: [1]} | |
new_etag = 1 | |
elif etag == 1: | |
data = {8: [8]} | |
# derived from recent_entries on purpose | |
data.update({int(id): [int(id)+1] for id in recent_entries}) | |
new_etag = 8 | |
else: | |
data = {} | |
new_etag = etag | |
yield RetrieveResult(data, TYPE, new_etag) | |
def validate_url(self, url): | |
pass | |
class FooParser: | |
http_accept = TYPE | |
def __call__(self, url, file, headers=None): | |
print("parser got file", file) | |
feed = FeedData(url, version='foo') | |
entries = [ | |
EntryData(url, str(id), content=(Content(content, TYPE),)) | |
for id, content in file.items() | |
] | |
return feed, entries | |
@dataclass | |
class UpdaterWrapper: | |
original: ... | |
reader: ... | |
def process_old_feed(self, feed): | |
if not feed.url.startswith('foo:'): | |
return self.original.process_old_feed(feed) | |
# "enrich" http_etag with the ids of recent entries, | |
# so we can check them for new replies | |
ids = [e.id for e in reader.get_entries(feed=feed)] | |
api_key = reader.get_tag((), 'foo-api-key', 'key') | |
feed = feed._replace(http_etag=(feed.http_etag, ids, api_key)) | |
return feed | |
def make_update_intents(self, old_feed, now, global_now, parsed_feed, entry_pairs): | |
if not old_feed.url.startswith('foo:'): | |
return self.original.make_update_intents( | |
old_feed, now, global_now, parsed_feed, entry_pairs | |
) | |
# copied from _updater ("Not modified.") | |
if not parsed_feed: | |
if not old_feed.last_updated: | |
return FeedUpdateIntent(old_feed.url, now), () | |
if old_feed.last_exception: | |
return FeedUpdateIntent(old_feed.url, old_feed.last_updated), () | |
return None, () | |
entries = [] | |
for new_entry, entry_for_update in entry_pairs: | |
entry_new = not entry_for_update | |
if entry_new: | |
merged_entry = new_entry | |
else: | |
# "entrich" entry for update, | |
# so we can merge new tweets | |
old_entry = reader.get_entry((old_feed.url, new_entry.id)) | |
# merge in new tweets | |
old_content = old_entry.content[0] | |
new_content = new_entry.content[0] | |
merged_content = new_content._replace( | |
value=old_content.value + new_content.value | |
) | |
merged_entry = new_entry._replace(content=(merged_content,)) | |
entry = EntryUpdateIntent( | |
merged_entry, | |
now, | |
now if entry_new else None, | |
global_now if entry_new else None, | |
) | |
entries.append(entry) | |
feed = None | |
if entries: | |
feed = FeedUpdateIntent( | |
old_feed.url, | |
now, | |
parsed_feed.feed, | |
parsed_feed.http_etag, | |
parsed_feed.http_last_modified, | |
) | |
# copied from _updater | |
if not feed and old_feed.last_exception: | |
feed = FeedUpdateIntent(old_feed.url, old_feed.last_updated) | |
return feed, entries | |
reader = make_reader(':memory:') | |
reader._parser.mount_retriever('foo:', FooRetriever()) | |
reader._parser.mount_parser_by_mime_type(FooParser()) | |
reader._updater = UpdaterWrapper(reader._updater, reader) | |
reader.add_feed('foo:bar') | |
reader.add_feed('https://death.andgravity.com/_feed/index.xml') | |
for i in range(3): | |
print(f"--- update #{i+1}") | |
reader.update_feeds() | |
print("entry count after update:", reader.get_entry_counts().total) | |
print("entries after update (foo:bar):") | |
for e in reader.get_entries(feed='foo:bar'): | |
print(f" {e.id}: {e.content[0].value}") | |
print() | |
""" | |
output: | |
--- update #1 | |
retriever got http_etag (None, [], 'key') | |
parser got file {1: [1]} | |
entry count after update: 22 | |
entries after update (foo:bar): | |
1: [1] | |
--- update #2 | |
retriever got http_etag ('1', ['1'], 'key') | |
parser got file {8: [8], 1: [2]} | |
entry count after update: 23 | |
entries after update (foo:bar): | |
8: [8] | |
1: [1, 2] | |
--- update #3 | |
retriever got http_etag ('8', ['8', '1'], 'key') | |
parser got file {} | |
entry count after update: 23 | |
entries after update (foo:bar): | |
8: [8] | |
1: [1, 2] | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment