""" This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program.  If not, see . Copyright © 2019 Cloud Linux Software Inc. This software is also available under ImunifyAV commercial license, see """ import base64 import binascii import csv import os from collections import namedtuple from contextlib import suppress from pathlib import Path from time import time from . import AIBOLIT ( SUSPICIOUS, VULNERS, EXTENDED_SUSPICIOUS, IGNORED_SUSPICIOUS, ) = ( "suspicious", "vulners", "extended-suspicious", "ignored-suspicious", ) SECTIONS = { "p": "php_malware", "j": "js_malware", "c": "cloudhash", "s": SUSPICIOUS, "v": VULNERS, "es": EXTENDED_SUSPICIOUS, "is": IGNORED_SUSPICIOUS, } SUSPICIOUS_SECTIONS = { SUSPICIOUS, VULNERS, EXTENDED_SUSPICIOUS, IGNORED_SUSPICIOUS, } AiBolitCSVReport = namedtuple( "AiBolitCSVReport", [ "section", "path", "signature", "ctime", "mtime", "size", "etime", "signature_id", "hash", "signature_name", "sha256", ], ) def parse_report_csv(report_path: Path): with report_path.open(newline="") as report_stream: for raw_row in csv.reader(report_stream, delimiter=","): row = AiBolitCSVReport(*raw_row) try: section = SECTIONS[row.section] except KeyError: continue sig = row.signature_name or "{}.{}".format( section, row.signature_id ) timestamp = ( int(float(row.etime)) if row.section != "v" else int(time()) ) file_name = row.path with suppress(binascii.Error): file_name = base64.b64decode(file_name, validate=True) file_name = os.fsdecode(file_name) yield { "name": AIBOLIT, "file_name": file_name, "signature": sig, "ctime": int(row.ctime), "modification_time": int(row.mtime), "suspicious": section in SUSPICIOUS_SECTIONS, "size": int(row.size or 0), "hash": row.sha256 or row.hash or None, "timestamp": timestamp, "extended_suspicious": section == EXTENDED_SUSPICIOUS, } def parse_report_json(report, base64_path=True): for section in SECTIONS.values(): for hit in report.get(section, []): sig = hit.get("sn") or ".".join([section, str(hit["sigid"])]) # vulners section does not provide timestamp ('et' field) # so current time is used instead. # 'et' - time when the file was scanned timestamp = ( int(float(hit["et"])) if section != "vulners" else int(time()) ) file_name = hit["fn"] if base64_path: with suppress(binascii.Error): file_name = base64.b64decode(file_name, validate=True) file_name = os.fsdecode(file_name) yield { "name": AIBOLIT, "file_name": file_name, "signature": sig, "suspicious": section in SUSPICIOUS_SECTIONS, "size": hit["sz"], "ctime": hit["ct"], "modification_time": hit["mt"], # 'hash' field is still used in 'cloudhash' section "hash": hit.get("sha256", hit.get("hash")), "timestamp": timestamp, "extended_suspicious": section == EXTENDED_SUSPICIOUS, }