main.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192

#!/bin/python
"""
Fetches news from RSS feeds, processes the news data, and generates an HTML file 
containing the news items.
"""

import json
import subprocess
import sys
import shutil
from pathlib import Path
from dataclasses import dataclass
import datetime

SFEED_UPDATE_COMMAND = "sfeed_update"
SFEED_JSON_COMMAND = "sfeed_json"
SFEEDRC_PATH = "sfeedrc"
FEEDS_DIR_PATH = "feeds"
JSON_FILE_PATH = "result.json"
HTML_FILE_PATH = "public/index.html"

HTML_HEADER = """<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <meta http-equiv="X-UA-Compatible" content="ie=edge">
  <title>sfeed</title>
  <link rel="stylesheet" href="./style.css">
  <link rel="icon" href="./favicon.ico" type="image/x-icon">
</head>
<body>
"""

HTML_FOOTER = """
</body></html>"""


@dataclass
class NewsItem:
    """
    A class to represent a news item.
    """

    title: str
    link: str
    date_published: datetime.datetime


def parse_published_date(dt: str):
    """
    Parses the published date from the provided string and returns a datetime object
    or None if the date is older than 4 days or if an error occurred while
    parsing the date.
    """

    date_format = "%Y-%m-%dT%H:%M:%SZ"
    try:
        datetime_obj = datetime.datetime.strptime(dt, date_format)

        current_date = datetime.datetime.now()
        four_days_ago = current_date - datetime.timedelta(days=4)

        if datetime_obj < four_days_ago:
            return None

        return datetime_obj

    except ValueError as err:
        print(f"Failed to parse published date: {err}")
        return None


def parse_json_data(data) -> list[NewsItem]:
    """
    Parses news items from the provided JSON data.

    Returns A list of parsed NewsItem objects.
    """

    parsed_news: list[NewsItem] = []

    items = data["items"]
    for item in items:

        datetime_obj = parse_published_date(item["date_published"])
        if datetime_obj is None:
            continue

        news_item = NewsItem(item["title"], item["url"], datetime_obj)
        parsed_news.append(news_item)

    return parsed_news


def write_to_html_file(parsed_news: list[NewsItem]):
    """
    Writes the provided parsed news items to an HTML file.
    """

    parsed_news = sorted(parsed_news, key=lambda i: i.date_published, reverse=True)

    with open(HTML_FILE_PATH, "w", encoding="Utf-8") as html_file:

        # Append the html page header
        html_file.write(HTML_HEADER)
        html_file.write("  <ul>\n")

        for news_item in parsed_news:
            dp = news_item.date_published.date()
            il = news_item.link
            it = news_item.title
            html_file.write(
                f'    <li><span> {dp}</span> <a href="{il}">{it}</a></li>\n'
            )

        html_file.write("  </ul>")

        # Append the html page footer
        html_file.write(HTML_FOOTER)


def generate_html_file():
    """
    Loads the JSON file, parses the news items, and writes them to a generated
    HTML file.
    """

    with open(JSON_FILE_PATH, "r", encoding="Utf-8") as file:
        data = json.load(file)

    parsed_news = parse_json_data(data)
    write_to_html_file(parsed_news)


def fetch_rss_feeds():
    """
    Runs the sfeed_update command to fetch RSS news
    """

    try:
        # Delete the old feeds if exists
        if Path(FEEDS_DIR_PATH).exists():
            shutil.rmtree(FEEDS_DIR_PATH)

        # Fetch the feeds
        subprocess.run(f"{SFEED_UPDATE_COMMAND} {SFEEDRC_PATH}", shell=True, check=True)

    # It should not panic if it fails to fetch one or more feeds
    except subprocess.CalledProcessError as err:
        print(f"An error occurred while fetching the news: {err}")

    except FileNotFoundError as err:
        print(f"An error occurred while removing feeds directory: {err}")
        sys.exit(1)


def generate_json_file():
    """
    Converts the fetched news to JSON using sfeed_json.
    """

    try:
        # Convert the fetched news to json
        subprocess.run(
            f"{SFEED_JSON_COMMAND} {FEEDS_DIR_PATH}/* > {JSON_FILE_PATH} ",
            shell=True,
            check=True,
        )

    except subprocess.CalledProcessError as err:
        print(f"An error occurred while running sfeed_json: {err}")
        sys.exit(1)


def main():
    """
    Fetches news from an RSS feed and generates an HTML file containing these
    news items.

    The function performs the following steps:
    1. Fetches the latest RSS news feeds.
    2. Converts the fetched news into a JSON file.
    3. Parses the JSON file and generates an HTML file with the news items.
    """

    fetch_rss_feeds()
    generate_json_file()
    generate_html_file()


main()