diff options
| author | hozan23 <hozan23@karyontech.net> | 2024-06-20 00:51:44 +0200 | 
|---|---|---|
| committer | hozan23 <hozan23@karyontech.net> | 2024-06-20 00:51:44 +0200 | 
| commit | 6bd3e03de1c3a9202660556ac479e8317d357c21 (patch) | |
| tree | 429e3a82ab2e880c44ca01839d67d6cfba8e5291 | |
init commit
| -rw-r--r-- | .gitignore | 6 | ||||
| -rwxr-xr-x | main.py | 192 | 
2 files changed, 198 insertions, 0 deletions
| diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2a3bacc --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +sfeedrc +feeds +sfeed + +public +result.json @@ -0,0 +1,192 @@ +#!/bin/python +""" +Fetches news from RSS feeds, processes the news data, and generates an HTML file  +containing the news items. +""" + +import json +import subprocess +import sys +import shutil +from pathlib import Path +from dataclasses import dataclass +import datetime + +SFEED_UPDATE_COMMAND = "sfeed_update" +SFEED_JSON_COMMAND = "sfeed_json" +SFEEDRC_PATH = "sfeedrc" +FEEDS_DIR_PATH = "feeds" +JSON_FILE_PATH = "result.json" +HTML_FILE_PATH = "public/index.html" + +HTML_HEADER = """<!DOCTYPE html> +<html lang="en"> +<head> +  <meta charset="UTF-8"> +  <meta name="viewport" content="width=device-width, initial-scale=1.0"> +  <meta http-equiv="X-UA-Compatible" content="ie=edge"> +  <title>sfeed</title> +  <link rel="stylesheet" href="./style.css"> +  <link rel="icon" href="./favicon.ico" type="image/x-icon"> +</head> +<body> +""" + +HTML_FOOTER = """ +</body></html>""" + + +@dataclass +class NewsItem: +    """ +    A class to represent a news item. +    """ + +    title: str +    link: str +    date_published: datetime.datetime + + +def parse_published_date(dt: str): +    """ +    Parses the published date from the provided string and returns a datetime object +    or None if the date is older than 4 days or if an error occurred while +    parsing the date. +    """ + +    date_format = "%Y-%m-%dT%H:%M:%SZ" +    try: +        datetime_obj = datetime.datetime.strptime(dt, date_format) + +        current_date = datetime.datetime.now() +        four_days_ago = current_date - datetime.timedelta(days=4) + +        if datetime_obj < four_days_ago: +            return None + +        return datetime_obj + +    except ValueError as err: +        print(f"Failed to parse published date: {err}") +        return None + + +def parse_json_data(data) -> list[NewsItem]: +    """ +    Parses news items from the provided JSON data. + +    Returns A list of parsed NewsItem objects. +    """ + +    parsed_news: list[NewsItem] = [] + +    items = data["items"] +    for item in items: + +        datetime_obj = parse_published_date(item["date_published"]) +        if datetime_obj is None: +            continue + +        news_item = NewsItem(item["title"], item["url"], datetime_obj) +        parsed_news.append(news_item) + +    return parsed_news + + +def write_to_html_file(parsed_news: list[NewsItem]): +    """ +    Writes the provided parsed news items to an HTML file. +    """ + +    parsed_news = sorted(parsed_news, key=lambda i: i.date_published, reverse=True) + +    with open(HTML_FILE_PATH, "w", encoding="Utf-8") as html_file: + +        # Append the html page header +        html_file.write(HTML_HEADER) +        html_file.write("  <ul>\n") + +        for news_item in parsed_news: +            dp = news_item.date_published.date() +            il = news_item.link +            it = news_item.title +            html_file.write( +                f'    <li><span> {dp}</span> <a href="{il}">{it}</a></li>\n' +            ) + +        html_file.write("  </ul>") + +        # Append the html page footer +        html_file.write(HTML_FOOTER) + + +def generate_html_file(): +    """ +    Loads the JSON file, parses the news items, and writes them to a generated +    HTML file. +    """ + +    with open(JSON_FILE_PATH, "r", encoding="Utf-8") as file: +        data = json.load(file) + +    parsed_news = parse_json_data(data) +    write_to_html_file(parsed_news) + + +def fetch_rss_feeds(): +    """ +    Runs the sfeed_update command to fetch RSS news +    """ + +    try: +        # Delete the old feeds if exists +        if Path(FEEDS_DIR_PATH).exists(): +            shutil.rmtree(FEEDS_DIR_PATH) + +        # Fetch the feeds +        subprocess.run(f"{SFEED_UPDATE_COMMAND} {SFEEDRC_PATH}", shell=True, check=True) + +    # It should not panic if it fails to fetch one or more feeds +    except subprocess.CalledProcessError as err: +        print(f"An error occurred while fetching the news: {err}") + +    except FileNotFoundError as err: +        print(f"An error occurred while removing feeds directory: {err}") +        sys.exit(1) + + +def generate_json_file(): +    """ +    Converts the fetched news to JSON using sfeed_json. +    """ + +    try: +        # Convert the fetched news to json +        subprocess.run( +            f"{SFEED_JSON_COMMAND} {FEEDS_DIR_PATH}/* > {JSON_FILE_PATH} ", +            shell=True, +            check=True, +        ) + +    except subprocess.CalledProcessError as err: +        print(f"An error occurred while running sfeed_json: {err}") +        sys.exit(1) + + +def main(): +    """ +    Fetches news from an RSS feed and generates an HTML file containing these +    news items. + +    The function performs the following steps: +    1. Fetches the latest RSS news feeds. +    2. Converts the fetched news into a JSON file. +    3. Parses the JSON file and generates an HTML file with the news items. +    """ + +    fetch_rss_feeds() +    generate_json_file() +    generate_html_file() + + +main() | 
