""" This module gets and caches news from imunify blog """ import asyncio import os import socket import time import urllib.request from urllib.error import HTTPError from xml.etree import ElementTree from contextlib import suppress from logging import getLogger from defence360agent.simple_rpc.hosting_panel import HostingPanel from defence360agent.utils import retry_on logger = getLogger(__file__) RSS_FEED_REMOTE_URL = "https://blog.imunify360.com/rss.xml" _TIMEOUT = 300 # default timeout for network operations here TAGS_TO_READ = ["title", "pubDate", "guid", "link"] __all__ = ["HTTPError", "NewsFeed"] class NewsFeed: cache_ttl = 60 # in minutes cache_file_path = "/var/imunify360/tmp/feed_cache.rss" @classmethod @retry_on( (ElementTree.ParseError, urllib.request.URLError), max_tries=10, on_error=lambda *args: NewsFeed.clear_cache(*args), ) async def get(cls): if cls._expired(): await cls._refresh() category_info = PanelCategory(HostingPanel().NAME) with open(cls.cache_file_path) as cache_file: root = ElementTree.fromstring(cache_file.read()) imunify_news = root.iter("item") return [ { child.tag: child.text for child in item if child.tag in TAGS_TO_READ } for item in imunify_news if category_info.is_allowed(item) ] @classmethod async def _refresh(cls): cache_file_dir_path = os.path.dirname(cls.cache_file_path) if not os.path.exists(cache_file_dir_path): os.makedirs(cache_file_dir_path) logger.info("Refresh news cache") with open(cls.cache_file_path, "wb") as cache_file: cache_file.write(await cls._fetch()) @classmethod def _expired(cls): if os.path.exists(cls.cache_file_path): last_modified_time = os.path.getmtime(cls.cache_file_path) else: last_modified_time = 0 cache_age = (time.time() - last_modified_time) / 60 # in minutes return cache_age > cls.cache_ttl @classmethod async def _fetch(cls, timeout=_TIMEOUT): return await asyncio.get_event_loop().run_in_executor( None, _fetch_url, RSS_FEED_REMOTE_URL, timeout ) @classmethod async def clear_cache(cls, *args): logger.warning("Clearing cache due to error: %s", args) with suppress(FileNotFoundError): os.unlink(cls.cache_file_path) def _fetch_url(url, timeout): try: # Cloudflare Browser Integrity Check blocks the default urllib # User-Agent. RSS feed URL was added to exceptions but they are # not free, so let's set a custom User-Agent anyway. headers = {"User-Agent": "imunify360-urllib/0.1"} req = urllib.request.Request(url, headers=headers) with urllib.request.urlopen(req, timeout=timeout) as response: return response.read() except socket.timeout: raise TimeoutError class PanelCategory: # RSS news categories, value saved in xml category tag # categories are case-insensitive so lowercase it panel_categories = {"cpanel", "plesk", "directadmin"} no_panel_category = "standalone-imunify" def __init__(self, p_name): p_name = p_name.lower() self.current = ( p_name if p_name in PanelCategory.panel_categories else PanelCategory.no_panel_category ) self.competitors = PanelCategory.panel_categories | { PanelCategory.no_panel_category } - {self.current} def is_allowed(self, item): item_categories = { child.text for child in item if child.tag == "category" } # category tag can include not only exact panel name, but also some # phrase for SEO purpose, so check it by `in` on joined string joined_category = "|||".join(item_categories).lower() current_in_category = self.current in joined_category competitors_in_category = any( com in joined_category for com in self.competitors ) # current panel didn't mentioned in categories, # but competitor was -> don't add to result return not competitors_in_category or current_in_category