aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorhozan23 <hozan23@karyontech.net>2024-06-20 00:51:44 +0200
committerhozan23 <hozan23@karyontech.net>2024-06-20 00:51:44 +0200
commit6bd3e03de1c3a9202660556ac479e8317d357c21 (patch)
tree429e3a82ab2e880c44ca01839d67d6cfba8e5291
init commit
-rw-r--r--.gitignore6
-rwxr-xr-xmain.py192
2 files changed, 198 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..2a3bacc
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,6 @@
+sfeedrc
+feeds
+sfeed
+
+public
+result.json
diff --git a/main.py b/main.py
new file mode 100755
index 0000000..20ca0b2
--- /dev/null
+++ b/main.py
@@ -0,0 +1,192 @@
+#!/bin/python
+"""
+Fetches news from RSS feeds, processes the news data, and generates an HTML file
+containing the news items.
+"""
+
+import json
+import subprocess
+import sys
+import shutil
+from pathlib import Path
+from dataclasses import dataclass
+import datetime
+
+SFEED_UPDATE_COMMAND = "sfeed_update"
+SFEED_JSON_COMMAND = "sfeed_json"
+SFEEDRC_PATH = "sfeedrc"
+FEEDS_DIR_PATH = "feeds"
+JSON_FILE_PATH = "result.json"
+HTML_FILE_PATH = "public/index.html"
+
+HTML_HEADER = """<!DOCTYPE html>
+<html lang="en">
+<head>
+ <meta charset="UTF-8">
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+ <meta http-equiv="X-UA-Compatible" content="ie=edge">
+ <title>sfeed</title>
+ <link rel="stylesheet" href="./style.css">
+ <link rel="icon" href="./favicon.ico" type="image/x-icon">
+</head>
+<body>
+"""
+
+HTML_FOOTER = """
+</body></html>"""
+
+
+@dataclass
+class NewsItem:
+ """
+ A class to represent a news item.
+ """
+
+ title: str
+ link: str
+ date_published: datetime.datetime
+
+
+def parse_published_date(dt: str):
+ """
+ Parses the published date from the provided string and returns a datetime object
+ or None if the date is older than 4 days or if an error occurred while
+ parsing the date.
+ """
+
+ date_format = "%Y-%m-%dT%H:%M:%SZ"
+ try:
+ datetime_obj = datetime.datetime.strptime(dt, date_format)
+
+ current_date = datetime.datetime.now()
+ four_days_ago = current_date - datetime.timedelta(days=4)
+
+ if datetime_obj < four_days_ago:
+ return None
+
+ return datetime_obj
+
+ except ValueError as err:
+ print(f"Failed to parse published date: {err}")
+ return None
+
+
+def parse_json_data(data) -> list[NewsItem]:
+ """
+ Parses news items from the provided JSON data.
+
+ Returns A list of parsed NewsItem objects.
+ """
+
+ parsed_news: list[NewsItem] = []
+
+ items = data["items"]
+ for item in items:
+
+ datetime_obj = parse_published_date(item["date_published"])
+ if datetime_obj is None:
+ continue
+
+ news_item = NewsItem(item["title"], item["url"], datetime_obj)
+ parsed_news.append(news_item)
+
+ return parsed_news
+
+
+def write_to_html_file(parsed_news: list[NewsItem]):
+ """
+ Writes the provided parsed news items to an HTML file.
+ """
+
+ parsed_news = sorted(parsed_news, key=lambda i: i.date_published, reverse=True)
+
+ with open(HTML_FILE_PATH, "w", encoding="Utf-8") as html_file:
+
+ # Append the html page header
+ html_file.write(HTML_HEADER)
+ html_file.write(" <ul>\n")
+
+ for news_item in parsed_news:
+ dp = news_item.date_published.date()
+ il = news_item.link
+ it = news_item.title
+ html_file.write(
+ f' <li><span> {dp}</span> <a href="{il}">{it}</a></li>\n'
+ )
+
+ html_file.write(" </ul>")
+
+ # Append the html page footer
+ html_file.write(HTML_FOOTER)
+
+
+def generate_html_file():
+ """
+ Loads the JSON file, parses the news items, and writes them to a generated
+ HTML file.
+ """
+
+ with open(JSON_FILE_PATH, "r", encoding="Utf-8") as file:
+ data = json.load(file)
+
+ parsed_news = parse_json_data(data)
+ write_to_html_file(parsed_news)
+
+
+def fetch_rss_feeds():
+ """
+ Runs the sfeed_update command to fetch RSS news
+ """
+
+ try:
+ # Delete the old feeds if exists
+ if Path(FEEDS_DIR_PATH).exists():
+ shutil.rmtree(FEEDS_DIR_PATH)
+
+ # Fetch the feeds
+ subprocess.run(f"{SFEED_UPDATE_COMMAND} {SFEEDRC_PATH}", shell=True, check=True)
+
+ # It should not panic if it fails to fetch one or more feeds
+ except subprocess.CalledProcessError as err:
+ print(f"An error occurred while fetching the news: {err}")
+
+ except FileNotFoundError as err:
+ print(f"An error occurred while removing feeds directory: {err}")
+ sys.exit(1)
+
+
+def generate_json_file():
+ """
+ Converts the fetched news to JSON using sfeed_json.
+ """
+
+ try:
+ # Convert the fetched news to json
+ subprocess.run(
+ f"{SFEED_JSON_COMMAND} {FEEDS_DIR_PATH}/* > {JSON_FILE_PATH} ",
+ shell=True,
+ check=True,
+ )
+
+ except subprocess.CalledProcessError as err:
+ print(f"An error occurred while running sfeed_json: {err}")
+ sys.exit(1)
+
+
+def main():
+ """
+ Fetches news from an RSS feed and generates an HTML file containing these
+ news items.
+
+ The function performs the following steps:
+ 1. Fetches the latest RSS news feeds.
+ 2. Converts the fetched news into a JSON file.
+ 3. Parses the JSON file and generates an HTML file with the news items.
+ """
+
+ fetch_rss_feeds()
+ generate_json_file()
+ generate_html_file()
+
+
+main()