aboutsummaryrefslogtreecommitdiff
path: root/rss.py
diff options
context:
space:
mode:
Diffstat (limited to 'rss.py')
-rw-r--r--rss.py111
1 files changed, 111 insertions, 0 deletions
diff --git a/rss.py b/rss.py
new file mode 100644
index 0000000..869ef6f
--- /dev/null
+++ b/rss.py
@@ -0,0 +1,111 @@
+import datetime
+from lxml import etree, html
+import os
+import re
+
+_DOMAIN = 'https://ypei.me'
+_RSS_TAGNAME = 'rss'
+_RSS_ATTRIBS = {'version': '2.0'}
+
+_POSTS_HTML_DIR = 'site/posts'
+_POSTS_ORG_DIR = 'posts'
+_POSTS_BASE_URL = _DOMAIN + '/posts'
+_BLOG_FEED_PATH = 'site/blog-feed.xml'
+
+_MICROBLOG_URL = _DOMAIN + '/microblog.html'
+_MICROBLOG_PATH = 'site/microblog.html'
+_MICROBLOG_FEED_PATH = 'site/microblog-feed.xml'
+
+
+def get_date(org):
+ return re.search('^#\+(date|DATE): <(.*)>$', org, re.MULTILINE).group(2)
+
+def make_rss_root():
+ return etree.Element(_RSS_TAGNAME, attrib=_RSS_ATTRIBS)
+
+def make_post_item(post_html, post_org, post_filename):
+ post = etree.Element('item')
+ tree = html.fromstring(post_html)
+ post.append(tree.find('./head/title'))
+ content = tree.find('.//div[@id="content"]')
+ content.tag = 'description'
+ content.attrib.clear()
+ post.append(content)
+ etree.SubElement(post, 'link').text = _POSTS_BASE_URL + '/' + post_filename
+ etree.SubElement(post, 'pubDate').text = get_date(post_org)
+ return post
+
+def make_post_items():
+ posts = []
+ for post in os.listdir(_POSTS_HTML_DIR):
+ post_sans_ext, ext = os.path.splitext(post)
+ if ext != '.html': continue
+ post_html = open(_POSTS_HTML_DIR + '/' + post, 'rb').read()
+ post_org = open(_POSTS_ORG_DIR + '/' + post_sans_ext + '.org').read()
+ posts.append(make_post_item(post_html, post_org, post))
+ posts.sort(key=lambda post: post.find('./pubDate').text, reverse=True)
+ return posts
+
+def make_blog_channel():
+ channel = etree.Element('channel')
+ etree.SubElement(channel, 'title').text = 'Yuchen Pei\'s blog'
+ etree.SubElement(channel, 'description').text = 'Yuchen Pei\'s blog'
+ etree.SubElement(channel, 'link').text = _DOMAIN + '/blog.html'
+ etree.SubElement(channel, 'copyright').text = '2013-2021 Yuchen Pei, licensed under CC BY-SA 4.0'
+ etree.SubElement(channel, 'lastBuildDate').text = str(datetime.datetime.now())
+ for post in make_post_items():
+ channel.append(post)
+ return channel
+
+def make_blog_rss():
+ root = make_rss_root()
+ root.append(make_blog_channel())
+ open(_BLOG_FEED_PATH, 'w').write(etree.tostring(root, encoding='unicode'))
+ return root
+
+def make_micropost_item(micropost_html):
+ """
+ The header of a micropost has the following format:
+ <p>
+ <b><a href="#ia-lawsuit">2020-08-02</a></b> - ia lawsuit
+ <a id="ia-lawsuit"></a>
+ </p>
+ """
+ micropost = etree.Element('item')
+ header = micropost_html.find('./p')
+ etree.SubElement(micropost, 'link').text = _MICROBLOG_URL + header.find('.//a').attrib['href']
+ etree.SubElement(micropost, 'pubDate').text = header.find('.//a').text
+ etree.SubElement(micropost, 'title').text = header.find('./b').tail[3:].strip()
+ micropost_html.remove(header)
+ micropost_html.tag = 'description'
+ micropost.append(micropost_html)
+ return micropost
+
+def make_and_add_micropost_items(channel):
+ microblog = html.fromstring(open(_MICROBLOG_PATH, 'rb').read())
+ for micropost in microblog.findall('.//div[@id="content"]/ul/li'):
+ channel.append(make_micropost_item(micropost))
+
+def make_microblog_channel():
+ channel = etree.Element('channel')
+ etree.SubElement(channel, 'title').text = 'Yuchen Pei\'s microblog'
+ etree.SubElement(channel, 'description').text = 'Yuchen Pei\'s microblog'
+ etree.SubElement(channel, 'link').text = _MICROBLOG_URL
+ etree.SubElement(channel, 'lastBuildDate').text = str(datetime.datetime.now())
+ make_and_add_micropost_items(channel)
+ return channel
+
+def make_microblog_rss():
+ root = make_rss_root()
+ root.append(make_microblog_channel())
+ open(_MICROBLOG_FEED_PATH, 'w').write(etree.tostring(root, encoding='unicode'))
+ return root
+
+def main():
+ make_blog_rss()
+ print(f"Published blog rss to {_BLOG_FEED_PATH}.")
+ make_microblog_rss()
+ print(f"Published microblog rss to {_MICROBLOG_FEED_PATH}.")
+
+if __name__ == '__main__':
+ main()