Standardizing argument unpacking behaviors for calls

mikeshardmind · February 15, 2025, 1:52am

I think what I said here What are the subtyping rules for tuple[T, ...]? - #51 by mikeshardmind when we were previously looking into the subtyping rules of gradual length tuples covers how I feel about unpacking unknown-length tuples, and I don’t think anything I’ve seen since changes it.

As a litmus test for any change that should handle the simpler non-overload case, I would present this example that heavily relies on iterator semantics and struct pack/unpack, and is representative of efficient binary protocol handling in pure python.

import struct
from itertools import chain
from collections.abc import Iterator
from typing import NamedTuple

# ---------------------------------------------------
# Common:
# up to 112 byte payload
# 1-byte: version | up to 111 bytes: versioned data
# ---------------------------------------------------
# Version 1:
# 1-byte: version = x01
# 6 length prefixed arrays of ids
#   legnth prefix as 1 byte, elements 8 bytes each
#   representing a 64bit int
# ---------------------------------------------------

class NoUserFeedback(Exception):
    pass

class V1TooManyIDs(Exception):
    pass

class V1ToggleWithAddRemove(Exception):
    pass

class V1MultipleToggle(Exception):
    pass
        
class V1NonActionableRule(Exception):
    pass

class V1AddRemoveOverlap(Exception):
    pass



class DataV1(NamedTuple):
    add: frozenset[int]
    remove: frozenset[int]
    toggle: frozenset[int]
    require_any: frozenset[int]
    require_all: frozenset[int]
    require_none: frozenset[int]


def validate_datav1(data: DataV1) -> None:
    """
    Checks that
    - there are 13 or fewer ids encoded
    - that toggle is not provided with either add or remove
    - that toggle contains no more than 1 id
    - that at least one of toggle, add, or remove is provided
    - that ids provided in add are not also provided in removed.
    """

    if sum(map(len, data)) > 13:
        raise V1TooManyIDs

    if tog := data.toggle:
        if data.add or data.remove:
            raise V1ToggleWithAddRemove
        if len(tog) > 1:
            raise V1MultipleToggle
    else:
        if not (data.add or data.remove):
            raise V1NonActionableRule
        if data.add & data.remove:
            raise V1AddRemoveOverlap



def pack_rules(data: DataV1, /) -> bytes:
    validate_datav1(data)
    struct_fmt = "!bb%dQb%dQb%dQb%dQb%dQb%dQ" % tuple(map(len, data))
    to_pack = chain.from_iterable((len(lst), *lst) for lst in data)
    return struct.pack(struct_fmt, 1, *to_pack)  # needs changing if new version


def _v1_struct_unpacker(raw: bytes, /) -> Iterator[frozenset[int]]:
    """
    Calling contract is that you have checked the version in advance
    """
    offset: int = 1
    for _ in range(6):
        (len_,) = struct.unpack_from("!b", raw, offset)
        yield frozenset(struct.unpack_from("!%dQ" % len_, raw, offset + 1))
        offset += 8 * len_ + 1


def _get_data_version(b: bytes, /) -> int:
    (r,) = struct.unpack_from("!b", b, 0)
    assert isinstance(r, int)
    return r


def unpack_rules(raw: bytes, /) -> DataV1:
    try:
        version = _get_data_version(raw)
    except struct.error:
        raise NoUserFeedback from None

    if version != 1:
        raise NoUserFeedback

    try:
        data = DataV1(*_v1_struct_unpacker(raw))
        validate_datav1(data)
    except (NoUserFeedback, struct.error):
        raise NoUserFeedback from None
    except Exception as exc:
        raise NoUserFeedback from None

    return data

Code sample currently passes in both pyright playground mypy-play (strict)

I consider myself part of this minority, but I think unknown length tuples shouldn’t error for things related to their length. They are gradual in that component of their type information and highly useful. Most things that use them the developer will use them either as if they were Sequence[T] rather than tuple[T, …] or they actually have another means of knowing the type is fine, such as the case with SQL rows being enforced by a database, or the example I’ve presented above with struct and binary protocols.