Last active
February 16, 2024 20:53
-
-
Save ispringle/69f96f4368920968f01e7d14707c83f4 to your computer and use it in GitHub Desktop.
RSS gen w/ globbing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from datetime import datetime | |
from bs4 import BeautifulSoup | |
from pathlib import Path | |
system_header = "/Users/ismailefetop/projects/org-blog/ismailefe_org" | |
website_header= "https://ismailefe.org" | |
blog_posts = blog_posts = list(Path(system_header+"blog").rglob("*.html")) | |
update_time = str(datetime.now().strftime('%a, %d %b %Y %H:%M:%S'))+' +0300' | |
feed_output = "/Users/ismailefetop/projects/org-blog/ismailefe_org/feed.xml" | |
xml_file = open(feed_output, "w") | |
xml_file.write( | |
f'''<?xml version="1.0" encoding="utf-8"?> | |
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"> | |
<channel> | |
<title>İsmail Efe's Blog Site</title> | |
<link>https://ismailefe.org/</link> | |
<description>İsmail Efe's Second Brain.</description> | |
<atom:link href="https://ismailefe.org/feed.xml" rel="self" type="application/rss+xml"/> | |
<lastBuildDate>{update_time}</lastBuildDate>''' | |
) | |
xml_file.close() | |
# Below function is partially written by ChatGPT. | |
def parse_html(filename_arg): | |
# Read the HTML file | |
with open(filename_arg, 'r', encoding='utf-8') as file: | |
html_content = file.read() | |
# Parse the HTML content using BeautifulSoup | |
soup = BeautifulSoup(html_content, 'html.parser') | |
# Extract title | |
title_tag = soup.find('title') | |
title = title_tag.text if title_tag else None | |
# Extract date (assuming date is in an element with class="date") | |
date_tag = soup.find(class_='date') | |
date = date_tag.text if date_tag else None | |
# Extract body content as HTML | |
body_tag = soup.body | |
body_html = str(body_tag) if body_tag else None | |
post_dict = {"title":title,"date":date,"body_html":body_html} | |
return post_dict | |
# Below function is written by ChatGPT. | |
def format_date(input_date): | |
# Convert input date string to a datetime object | |
input_datetime = datetime.strptime(input_date, '%Y-%m-%d') | |
# Format the datetime object to the desired string format | |
formatted_date = input_datetime.strftime('%a, %d %b %Y') | |
return formatted_date | |
for post in blog_posts: | |
post_dictionary = parse_html(system_header+post) | |
xml_file = open(feed_output, "a") | |
xml_file.write(f''' | |
<item> | |
<title>{post_dictionary["title"]}</title> | |
<description><![CDATA[<!DOCTYPE html> | |
<html xmlns="http://www.w3.org/1999/xhtml" lang="" xml:lang="">{post_dictionary["body_html"]}</html>]]></description> | |
<author>[email protected] (İsmail Efe Top)</author> | |
<link>{website_header+post}</link> | |
<guid>{website_header+post}</guid> | |
<pubDate>{format_date(post_dictionary["date"])} 00:00:00 +0300</pubDate> | |
</item> | |
''') | |
xml_file.close() | |
xml_file = open(feed_output, "a") | |
xml_file.write(''' | |
</channel> | |
</rss>''') | |
xml_file.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thank you so much!