#!/usr/bin/python3

# SPDX-License-Identifier: MPL-2.0
# SPDX-FileCopyrightText: 2023-2025 Collabora Ltd.
# SPDX-FileCopyrightText: 2023-2025 Walter Lozano <walter.lozano@collabora.com>
# SPDX-FileCopyrightText: 2025 Dylan Aïssi <dylan.aissi@collabora.com>

import argparse
import dataclasses
import gzip
import json
import os
import re
import sys
from dataclasses import dataclass
from json import JSONEncoder
from os.path import isdir, isfile, join

import requests

from debian import deb822

DEFAULT_METADATA_DIR = "/usr/share/doc"
DEFAULT_DPKG_STATUS = "/var/lib/dpkg/status"
DEFAULT_APT_LIST_DIR = "/var/lib/apt/lists"

ARCH_MAP = {"amd64": "x86_64", "armhf": "armv7hl", "arm64": "aarch64"}

def open_potentially_gzipped(path):
    if str(path).endswith(".gz"):
        return gzip.open(path, mode="r")
    else:
        return open(path)


@dataclass(frozen=True)
class SourcePackage:
    name: str
    version: str


class CustomJSONEncoder(JSONEncoder):
    def default(self, o):
        if isinstance(o, set):
            return list(o)
        return dataclasses.asdict(o)


class BomGenerator:
    def __init__(
        self, branch, arch, metadata_dir, packages_file, apt_list_dir
    ):
        self.branch = branch
        self.arch = arch
        self.metadata_dir = metadata_dir
        self.packages_file = packages_file
        self.apt_list_dir = apt_list_dir

        self.packages_to_sources = {}

    def get_packages_to_sources(self):
        for f in os.listdir(self.apt_list_dir):
            if not f.endswith("Sources"):
                continue

            sources = open(join(self.apt_list_dir, f)).read()
            source_packages = deb822.Sources(sources)

            for p in source_packages.iter_paragraphs(sources, use_apt_pkg=False):
                source_package = p["Package"]
                packages = [x.strip() for x in p["Binary"].split(",")]
                for p in packages:
                    self.packages_to_sources[p] = source_package

    def parse_metadata(self, f):
        metadata = {}
        shared_libraries = set()
        external_files = set()
        with open_potentially_gzipped(f) as fm:
            metadata = json.load(fm)
            for p in metadata["referenced_source_packages"]:
                pinfo = metadata["referenced_source_packages"][p]
                if "shared_libraries" in pinfo["origins"]:
                    shared_libraries.add(p)
                if "external_files" in pinfo["origins"]:
                    external_files.add(SourcePackage(p, pinfo["version"]))

        referenced_packages = {
            "shared_libraries": shared_libraries,
            "external_files": external_files,
        }

        return referenced_packages

    def merge_metadata(self, metadata_a, metadata_b):
        result = {}
        for key in "shared_libraries", "external_files":
            result[key] = metadata_a.get(key, set()) | metadata_b.get(key, set())
        return result

    def scan_metadata(self):
        referenced_packages = {}
        for d in os.listdir(self.metadata_dir):
            dirpath = join(self.metadata_dir, d)
            if not isdir(dirpath):
                continue
            filenames = os.listdir(dirpath)
            if len(filenames) == 0:
                continue
            for f in filenames:
                if not isfile(join(dirpath, f)) or f.find("_metadata_") == -1:
                    continue

                fparts = f.split("_")
                package_name = fparts[0]

                source_package_name = self.packages_to_sources.get(
                    package_name, package_name
                )

                f = join(dirpath, f)
                metadata = self.parse_metadata(f)
                if source_package_name not in referenced_packages:
                    referenced_packages[source_package_name] = metadata
                else:
                    referenced_packages[source_package_name] = self.merge_metadata(
                        referenced_packages[source_package_name], metadata
                    )

        return referenced_packages

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-a", "--arch", choices=["amd64", "armhf", "arm64"], help="architecture"
    )
    parser.add_argument("-b", "--branch", help="branch")
    parser.add_argument(
        "-d",
        "--dir",
        default=DEFAULT_METADATA_DIR,
        help="directory to search for information",
    )
    parser.add_argument(
        "-l",
        "--apt-list-dir",
        default=DEFAULT_APT_LIST_DIR,
        help="directory with apt lists",
    )
    parser.add_argument("-o", "--out", help="output file")
    parser.add_argument(
        "-s", "--dpkg-status", default=DEFAULT_DPKG_STATUS, help="dpkg status file"
    )

    args = parser.parse_args()

    bom_generator = BomGenerator(
        args.branch,
        args.arch,
        args.dir,
        args.dpkg_status,
        args.apt_list_dir,
    )

    bom_generator.get_packages_to_sources()

    bom = bom_generator.scan_metadata()

    if args.out:
        with open(args.out, "w+") as output:
            json.dump(bom, output, cls=CustomJSONEncoder)
    else:
        print(json.dumps(bom, cls=CustomJSONEncoder))


if __name__ == "__main__":
    main()
