From 6bd3e03de1c3a9202660556ac479e8317d357c21 Mon Sep 17 00:00:00 2001 From: hozan23 Date: Thu, 20 Jun 2024 00:51:44 +0200 Subject: init commit --- main.py | 192 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 192 insertions(+) create mode 100755 main.py (limited to 'main.py') diff --git a/main.py b/main.py new file mode 100755 index 0000000..20ca0b2 --- /dev/null +++ b/main.py @@ -0,0 +1,192 @@ +#!/bin/python +""" +Fetches news from RSS feeds, processes the news data, and generates an HTML file +containing the news items. +""" + +import json +import subprocess +import sys +import shutil +from pathlib import Path +from dataclasses import dataclass +import datetime + +SFEED_UPDATE_COMMAND = "sfeed_update" +SFEED_JSON_COMMAND = "sfeed_json" +SFEEDRC_PATH = "sfeedrc" +FEEDS_DIR_PATH = "feeds" +JSON_FILE_PATH = "result.json" +HTML_FILE_PATH = "public/index.html" + +HTML_HEADER = """ + + + + + + sfeed + + + + +""" + +HTML_FOOTER = """ +""" + + +@dataclass +class NewsItem: + """ + A class to represent a news item. + """ + + title: str + link: str + date_published: datetime.datetime + + +def parse_published_date(dt: str): + """ + Parses the published date from the provided string and returns a datetime object + or None if the date is older than 4 days or if an error occurred while + parsing the date. + """ + + date_format = "%Y-%m-%dT%H:%M:%SZ" + try: + datetime_obj = datetime.datetime.strptime(dt, date_format) + + current_date = datetime.datetime.now() + four_days_ago = current_date - datetime.timedelta(days=4) + + if datetime_obj < four_days_ago: + return None + + return datetime_obj + + except ValueError as err: + print(f"Failed to parse published date: {err}") + return None + + +def parse_json_data(data) -> list[NewsItem]: + """ + Parses news items from the provided JSON data. + + Returns A list of parsed NewsItem objects. + """ + + parsed_news: list[NewsItem] = [] + + items = data["items"] + for item in items: + + datetime_obj = parse_published_date(item["date_published"]) + if datetime_obj is None: + continue + + news_item = NewsItem(item["title"], item["url"], datetime_obj) + parsed_news.append(news_item) + + return parsed_news + + +def write_to_html_file(parsed_news: list[NewsItem]): + """ + Writes the provided parsed news items to an HTML file. + """ + + parsed_news = sorted(parsed_news, key=lambda i: i.date_published, reverse=True) + + with open(HTML_FILE_PATH, "w", encoding="Utf-8") as html_file: + + # Append the html page header + html_file.write(HTML_HEADER) + html_file.write(" ") + + # Append the html page footer + html_file.write(HTML_FOOTER) + + +def generate_html_file(): + """ + Loads the JSON file, parses the news items, and writes them to a generated + HTML file. + """ + + with open(JSON_FILE_PATH, "r", encoding="Utf-8") as file: + data = json.load(file) + + parsed_news = parse_json_data(data) + write_to_html_file(parsed_news) + + +def fetch_rss_feeds(): + """ + Runs the sfeed_update command to fetch RSS news + """ + + try: + # Delete the old feeds if exists + if Path(FEEDS_DIR_PATH).exists(): + shutil.rmtree(FEEDS_DIR_PATH) + + # Fetch the feeds + subprocess.run(f"{SFEED_UPDATE_COMMAND} {SFEEDRC_PATH}", shell=True, check=True) + + # It should not panic if it fails to fetch one or more feeds + except subprocess.CalledProcessError as err: + print(f"An error occurred while fetching the news: {err}") + + except FileNotFoundError as err: + print(f"An error occurred while removing feeds directory: {err}") + sys.exit(1) + + +def generate_json_file(): + """ + Converts the fetched news to JSON using sfeed_json. + """ + + try: + # Convert the fetched news to json + subprocess.run( + f"{SFEED_JSON_COMMAND} {FEEDS_DIR_PATH}/* > {JSON_FILE_PATH} ", + shell=True, + check=True, + ) + + except subprocess.CalledProcessError as err: + print(f"An error occurred while running sfeed_json: {err}") + sys.exit(1) + + +def main(): + """ + Fetches news from an RSS feed and generates an HTML file containing these + news items. + + The function performs the following steps: + 1. Fetches the latest RSS news feeds. + 2. Converts the fetched news into a JSON file. + 3. Parses the JSON file and generates an HTML file with the news items. + """ + + fetch_rss_feeds() + generate_json_file() + generate_html_file() + + +main() -- cgit v1.2.3