"""
This module contains high-level logic for fixed format serialization.

Lower-level parts are implemented in C in mypyc/lib-rt/librt_internal.c
Short summary of low-level functionality:
* integers are automatically serialized as 1, 2, or 4 bytes, or arbitrary length.
* str/bytes are serialized as size (1, 2, or 4 bytes) followed by bytes buffer.
* floats are serialized as C doubles.

At high-level we add type tags as needed so that our format is self-descriptive.
More precisely:
* False, True, and None are stored as just a tag: 0, 1, 2 correspondingly.
* builtin primitives like int/str/bytes/float are stored as their type tag followed
  by bare (low-level) representation of the value. Reserved tag range for primitives is
  3 ... 19.
* generic (heterogeneous) list are stored as tag, followed by bare size, followed by
  sequence of tagged values.
* homogeneous lists of primitives are stored as tag, followed by bare size, followed
  by sequence of bare values.
* reserved tag range for sequence-like builtins is 20 ... 29
* currently we have only one mapping-like format: string-keyed dictionary with heterogeneous
  values. It is stored as tag, followed by bare size, followed by sequence of pairs: bare
  string key followed by tagged value.
* reserved tag range for mapping-like builtins is 30 ... 39
* there is an additional reserved tag range 40 ... 49 for any other builtin collections.
* custom classes (like types, symbols etc.) are stored as tag, followed by a sequence of
  tagged field values, followed by a special end tag 255. Names of class fields are
  *not* stored, the caller should know the field names and order for the given class tag.
* reserved tag range for symbols (TypeInfo, Var, etc) is 50 ... 79.
* class Instance is the only exception from the above format (since it is the most common one).
  It has two extra formats: few most common instances like "builtins.object" are stored as
  instance tag followed by a secondary tag, other plain non-generic instances are stored as
  instance tag followed by secondary tag followed by fullname as bare string. All generic
  readers must handle these.
* reserved tag range for Instance type formats is 80 ... 99, for other types it is 100 ... 149.
* tag 254 is reserved for if we would ever need to extend the tag range to indicated second tag
  page. Tags 150 ... 253 are free for everything else (e.g. AST nodes etc).

General convention is that custom classes implement write() and read() methods for FF
serialization. The write method should write both class tag and end tag. The read method
conventionally *does not* read the start tag (to simplify logic for unions). Known exceptions
are MypyFile.read() and SymbolTableNode.read(), since those two never appear in a union.

If any of these details change, or if the structure of CacheMeta changes please
bump CACHE_VERSION below.
"""

from __future__ import annotations

from collections.abc import Sequence
from typing import Any, Final, Optional, Union
from typing_extensions import TypeAlias as _TypeAlias

from librt.internal import (
    ReadBuffer as ReadBuffer,
    WriteBuffer as WriteBuffer,
    read_bool as read_bool,
    read_bytes as read_bytes_bare,
    read_float as read_float_bare,
    read_int as read_int_bare,
    read_str as read_str_bare,
    read_tag as read_tag,
    write_bool as write_bool,
    write_bytes as write_bytes_bare,
    write_float as write_float_bare,
    write_int as write_int_bare,
    write_str as write_str_bare,
    write_tag as write_tag,
)
from mypy_extensions import u8

# High-level cache layout format
CACHE_VERSION: Final = 1

SerializedError: _TypeAlias = tuple[Optional[str], int, int, int, int, str, str, Optional[str]]


class CacheMeta:
    """Class representing cache metadata for a module."""

    def __init__(
        self,
        *,
        id: str,
        path: str,
        mtime: int,
        size: int,
        hash: str,
        dependencies: list[str],
        data_mtime: int,
        data_file: str,
        suppressed: list[str],
        options: dict[str, object],
        dep_prios: list[int],
        dep_lines: list[int],
        dep_hashes: list[bytes],
        interface_hash: bytes,
        error_lines: list[SerializedError],
        version_id: str,
        ignore_all: bool,
        plugin_data: Any,
    ) -> None:
        self.id = id
        self.path = path
        self.mtime = mtime  # source file mtime
        self.size = size  # source file size
        self.hash = hash  # source file hash (as a hex string for historical reasons)
        self.dependencies = dependencies  # names of imported modules
        self.data_mtime = data_mtime  # mtime of data_file
        self.data_file = data_file  # path of <id>.data.json or <id>.data.ff
        self.suppressed = suppressed  # dependencies that weren't imported
        self.options = options  # build options snapshot
        # dep_prios and dep_lines are both aligned with dependencies + suppressed
        self.dep_prios = dep_prios
        self.dep_lines = dep_lines
        # dep_hashes list is aligned with dependencies only
        self.dep_hashes = dep_hashes  # list of interface_hash for dependencies
        self.interface_hash = interface_hash  # hash representing the public interface
        self.error_lines = error_lines
        self.version_id = version_id  # mypy version for cache invalidation
        self.ignore_all = ignore_all  # if errors were ignored
        self.plugin_data = plugin_data  # config data from plugins

    def serialize(self) -> dict[str, Any]:
        return {
            "id": self.id,
            "path": self.path,
            "mtime": self.mtime,
            "size": self.size,
            "hash": self.hash,
            "data_mtime": self.data_mtime,
            "dependencies": self.dependencies,
            "suppressed": self.suppressed,
            "options": self.options,
            "dep_prios": self.dep_prios,
            "dep_lines": self.dep_lines,
            "dep_hashes": [dep.hex() for dep in self.dep_hashes],
            "interface_hash": self.interface_hash.hex(),
            "error_lines": self.error_lines,
            "version_id": self.version_id,
            "ignore_all": self.ignore_all,
            "plugin_data": self.plugin_data,
        }

    @classmethod
    def deserialize(cls, meta: dict[str, Any], data_file: str) -> CacheMeta | None:
        try:
            return CacheMeta(
                id=meta["id"],
                path=meta["path"],
                mtime=meta["mtime"],
                size=meta["size"],
                hash=meta["hash"],
                dependencies=meta["dependencies"],
                data_mtime=meta["data_mtime"],
                data_file=data_file,
                suppressed=meta["suppressed"],
                options=meta["options"],
                dep_prios=meta["dep_prios"],
                dep_lines=meta["dep_lines"],
                dep_hashes=[bytes.fromhex(dep) for dep in meta["dep_hashes"]],
                interface_hash=bytes.fromhex(meta["interface_hash"]),
                error_lines=[tuple(err) for err in meta["error_lines"]],
                version_id=meta["version_id"],
                ignore_all=meta["ignore_all"],
                plugin_data=meta["plugin_data"],
            )
        except (KeyError, ValueError):
            return None

    def write(self, data: WriteBuffer) -> None:
        write_str(data, self.id)
        write_str(data, self.path)
        write_int(data, self.mtime)
        write_int(data, self.size)
        write_str(data, self.hash)
        write_str_list(data, self.dependencies)
        write_int(data, self.data_mtime)
        write_str_list(data, self.suppressed)
        write_json(data, self.options)
        write_int_list(data, self.dep_prios)
        write_int_list(data, self.dep_lines)
        write_bytes_list(data, self.dep_hashes)
        write_bytes(data, self.interface_hash)
        write_errors(data, self.error_lines)
        write_str(data, self.version_id)
        write_bool(data, self.ignore_all)
        # Plugin data may be not a dictionary, so we use
        # a more generic write_json_value() here.
        write_json_value(data, self.plugin_data)

    @classmethod
    def read(cls, data: ReadBuffer, data_file: str) -> CacheMeta | None:
        try:
            return CacheMeta(
                id=read_str(data),
                path=read_str(data),
                mtime=read_int(data),
                size=read_int(data),
                hash=read_str(data),
                dependencies=read_str_list(data),
                data_mtime=read_int(data),
                data_file=data_file,
                suppressed=read_str_list(data),
                options=read_json(data),
                dep_prios=read_int_list(data),
                dep_lines=read_int_list(data),
                dep_hashes=read_bytes_list(data),
                interface_hash=read_bytes(data),
                error_lines=read_errors(data),
                version_id=read_str(data),
                ignore_all=read_bool(data),
                plugin_data=read_json_value(data),
            )
        except ValueError:
            return None


# Always use this type alias to refer to type tags.
Tag = u8

# Primitives.
LITERAL_FALSE: Final[Tag] = 0
LITERAL_TRUE: Final[Tag] = 1
LITERAL_NONE: Final[Tag] = 2
LITERAL_INT: Final[Tag] = 3
LITERAL_STR: Final[Tag] = 4
LITERAL_BYTES: Final[Tag] = 5
LITERAL_FLOAT: Final[Tag] = 6
LITERAL_COMPLEX: Final[Tag] = 7

# Collections.
LIST_GEN: Final[Tag] = 20
LIST_INT: Final[Tag] = 21
LIST_STR: Final[Tag] = 22
LIST_BYTES: Final[Tag] = 23
TUPLE_GEN: Final[Tag] = 24
DICT_STR_GEN: Final[Tag] = 30

# Misc classes.
EXTRA_ATTRS: Final[Tag] = 150
DT_SPEC: Final[Tag] = 151

END_TAG: Final[Tag] = 255


def read_literal(data: ReadBuffer, tag: Tag) -> int | str | bool | float:
    if tag == LITERAL_INT:
        return read_int_bare(data)
    elif tag == LITERAL_STR:
        return read_str_bare(data)
    elif tag == LITERAL_FALSE:
        return False
    elif tag == LITERAL_TRUE:
        return True
    elif tag == LITERAL_FLOAT:
        return read_float_bare(data)
    assert False, f"Unknown literal tag {tag}"


# There is an intentional asymmetry between read and write for literals because
# None and/or complex values are only allowed in some contexts but not in others.
def write_literal(data: WriteBuffer, value: int | str | bool | float | complex | None) -> None:
    if isinstance(value, bool):
        write_bool(data, value)
    elif isinstance(value, int):
        write_tag(data, LITERAL_INT)
        write_int_bare(data, value)
    elif isinstance(value, str):
        write_tag(data, LITERAL_STR)
        write_str_bare(data, value)
    elif isinstance(value, float):
        write_tag(data, LITERAL_FLOAT)
        write_float_bare(data, value)
    elif isinstance(value, complex):
        write_tag(data, LITERAL_COMPLEX)
        write_float_bare(data, value.real)
        write_float_bare(data, value.imag)
    else:
        write_tag(data, LITERAL_NONE)


def read_int(data: ReadBuffer) -> int:
    assert read_tag(data) == LITERAL_INT
    return read_int_bare(data)


def write_int(data: WriteBuffer, value: int) -> None:
    write_tag(data, LITERAL_INT)
    write_int_bare(data, value)


def read_str(data: ReadBuffer) -> str:
    assert read_tag(data) == LITERAL_STR
    return read_str_bare(data)


def write_str(data: WriteBuffer, value: str) -> None:
    write_tag(data, LITERAL_STR)
    write_str_bare(data, value)


def read_bytes(data: ReadBuffer) -> bytes:
    assert read_tag(data) == LITERAL_BYTES
    return read_bytes_bare(data)


def write_bytes(data: WriteBuffer, value: bytes) -> None:
    write_tag(data, LITERAL_BYTES)
    write_bytes_bare(data, value)


def read_int_opt(data: ReadBuffer) -> int | None:
    tag = read_tag(data)
    if tag == LITERAL_NONE:
        return None
    assert tag == LITERAL_INT
    return read_int_bare(data)


def write_int_opt(data: WriteBuffer, value: int | None) -> None:
    if value is not None:
        write_tag(data, LITERAL_INT)
        write_int_bare(data, value)
    else:
        write_tag(data, LITERAL_NONE)


def read_str_opt(data: ReadBuffer) -> str | None:
    tag = read_tag(data)
    if tag == LITERAL_NONE:
        return None
    assert tag == LITERAL_STR
    return read_str_bare(data)


def write_str_opt(data: WriteBuffer, value: str | None) -> None:
    if value is not None:
        write_tag(data, LITERAL_STR)
        write_str_bare(data, value)
    else:
        write_tag(data, LITERAL_NONE)


def read_int_list(data: ReadBuffer) -> list[int]:
    assert read_tag(data) == LIST_INT
    size = read_int_bare(data)
    return [read_int_bare(data) for _ in range(size)]


def write_int_list(data: WriteBuffer, value: list[int]) -> None:
    write_tag(data, LIST_INT)
    write_int_bare(data, len(value))
    for item in value:
        write_int_bare(data, item)


def read_str_list(data: ReadBuffer) -> list[str]:
    assert read_tag(data) == LIST_STR
    size = read_int_bare(data)
    return [read_str_bare(data) for _ in range(size)]


def write_str_list(data: WriteBuffer, value: Sequence[str]) -> None:
    write_tag(data, LIST_STR)
    write_int_bare(data, len(value))
    for item in value:
        write_str_bare(data, item)


def read_bytes_list(data: ReadBuffer) -> list[bytes]:
    assert read_tag(data) == LIST_BYTES
    size = read_int_bare(data)
    return [read_bytes_bare(data) for _ in range(size)]


def write_bytes_list(data: WriteBuffer, value: Sequence[bytes]) -> None:
    write_tag(data, LIST_BYTES)
    write_int_bare(data, len(value))
    for item in value:
        write_bytes_bare(data, item)


def read_str_opt_list(data: ReadBuffer) -> list[str | None]:
    assert read_tag(data) == LIST_GEN
    size = read_int_bare(data)
    return [read_str_opt(data) for _ in range(size)]


def write_str_opt_list(data: WriteBuffer, value: list[str | None]) -> None:
    write_tag(data, LIST_GEN)
    write_int_bare(data, len(value))
    for item in value:
        write_str_opt(data, item)


Value: _TypeAlias = Union[None, int, str, bool]

# Our JSON format is somewhat non-standard as we distinguish lists and tuples.
# This is convenient for some internal things, like mypyc plugin and error serialization.
JsonValue: _TypeAlias = Union[
    Value, list["JsonValue"], dict[str, "JsonValue"], tuple["JsonValue", ...]
]


def read_json_value(data: ReadBuffer) -> JsonValue:
    tag = read_tag(data)
    if tag == LITERAL_NONE:
        return None
    if tag == LITERAL_FALSE:
        return False
    if tag == LITERAL_TRUE:
        return True
    if tag == LITERAL_INT:
        return read_int_bare(data)
    if tag == LITERAL_STR:
        return read_str_bare(data)
    if tag == LIST_GEN:
        size = read_int_bare(data)
        return [read_json_value(data) for _ in range(size)]
    if tag == TUPLE_GEN:
        size = read_int_bare(data)
        return tuple(read_json_value(data) for _ in range(size))
    if tag == DICT_STR_GEN:
        size = read_int_bare(data)
        return {read_str_bare(data): read_json_value(data) for _ in range(size)}
    assert False, f"Invalid JSON tag: {tag}"


def write_json_value(data: WriteBuffer, value: JsonValue) -> None:
    if value is None:
        write_tag(data, LITERAL_NONE)
    elif isinstance(value, bool):
        write_bool(data, value)
    elif isinstance(value, int):
        write_tag(data, LITERAL_INT)
        write_int_bare(data, value)
    elif isinstance(value, str):
        write_tag(data, LITERAL_STR)
        write_str_bare(data, value)
    elif isinstance(value, list):
        write_tag(data, LIST_GEN)
        write_int_bare(data, len(value))
        for val in value:
            write_json_value(data, val)
    elif isinstance(value, tuple):
        write_tag(data, TUPLE_GEN)
        write_int_bare(data, len(value))
        for val in value:
            write_json_value(data, val)
    elif isinstance(value, dict):
        write_tag(data, DICT_STR_GEN)
        write_int_bare(data, len(value))
        for key in sorted(value):
            write_str_bare(data, key)
            write_json_value(data, value[key])
    else:
        assert False, f"Invalid JSON value: {value}"


# These are functions for JSON *dictionaries* specifically. Unfortunately, we
# must use imprecise types here, because the callers use imprecise types.
def read_json(data: ReadBuffer) -> dict[str, Any]:
    assert read_tag(data) == DICT_STR_GEN
    size = read_int_bare(data)
    return {read_str_bare(data): read_json_value(data) for _ in range(size)}


def write_json(data: WriteBuffer, value: dict[str, Any]) -> None:
    write_tag(data, DICT_STR_GEN)
    write_int_bare(data, len(value))
    for key in sorted(value):
        write_str_bare(data, key)
        write_json_value(data, value[key])


def write_errors(data: WriteBuffer, errs: list[SerializedError]) -> None:
    write_tag(data, LIST_GEN)
    write_int_bare(data, len(errs))
    for path, line, column, end_line, end_column, severity, message, code in errs:
        write_tag(data, TUPLE_GEN)
        write_str_opt(data, path)
        write_int(data, line)
        write_int(data, column)
        write_int(data, end_line)
        write_int(data, end_column)
        write_str(data, severity)
        write_str(data, message)
        write_str_opt(data, code)


def read_errors(data: ReadBuffer) -> list[SerializedError]:
    assert read_tag(data) == LIST_GEN
    result = []
    for _ in range(read_int_bare(data)):
        assert read_tag(data) == TUPLE_GEN
        result.append(
            (
                read_str_opt(data),
                read_int(data),
                read_int(data),
                read_int(data),
                read_int(data),
                read_str(data),
                read_str(data),
                read_str_opt(data),
            )
        )
    return result