Source code for qs_codec.models.decode_options

"""This module contains the ``DecodeOptions`` class that configures the output of ``decode``.

Keys are decoded identically to values by the default decoder; whether a decoded ``.`` splits
segments is controlled by parsing options (``allow_dots`` / ``decode_dot_in_keys``) elsewhere.
"""

import inspect
import typing as t
from dataclasses import dataclass
from enum import Enum as _EnumBase
from functools import wraps

from ..enums.charset import Charset
from ..enums.decode_kind import DecodeKind
from ..enums.duplicates import Duplicates
from ..utils.decode_utils import DecodeUtils


[docs] @dataclass class DecodeOptions: """Options that configure the output of ``decode``.""" allow_dots: t.Optional[bool] = None """Set to ``True`` to decode dot ``dict`` notation in the encoded input. When ``None`` (default), it inherits the value of ``decode_dot_in_keys``.""" decode_dot_in_keys: t.Optional[bool] = None """Set to ``True`` to decode percent‑encoded dots in keys (e.g., ``%2E`` → ``.``). Note: it implies ``allow_dots``, so ``decode`` will error if you set ``decode_dot_in_keys`` to ``True``, and ``allow_dots`` to ``False``. When ``None`` (default), it defaults to ``False``. Inside bracket segments, percent-decoding naturally yields ``.`` from ``%2E/%2e``. This option controls whether **top‑level** encoded dots are treated as additional split points; it does **not** affect the literal ``.`` produced by percent-decoding inside bracket segments.""" allow_empty_lists: bool = False """Set to ``True`` to allow empty ``list`` values inside ``dict``\\s in the encoded input.""" list_limit: int = 20 """Maximum number of **indexed** items allowed in a single list (default: ``20``). During decoding, keys like ``a[0]``, ``a[1]``, … are treated as list indices. If an index exceeds this limit, the container is treated as a ``dict`` instead, with the numeric index kept as a string key (e.g., ``{"999": "x"}``) to prevent creation of massive sparse lists (e.g., ``a[999999999]``). This limit also applies to comma–split lists when ``comma=True``. Set a larger value if you explicitly need more items, or set a smaller one to harden against abuse. """ charset: Charset = Charset.UTF8 """The character encoding to use when decoding the input.""" charset_sentinel: bool = False """Some services add an initial ``utf8=✓`` value to forms so that old InternetExplorer versions are more likely to submit the form as ``utf-8``. Additionally, the server can check the value against wrong encodings of the checkmark character and detect that a query string or ``application/x-www-form-urlencoded`` body was *not* sent as ``utf-8``, e.g. if the form had an ``accept-charset`` parameter or the containing page had a different character set. ``qs_codec`` supports this mechanism via the ``charset_sentinel`` option. If specified, the ``utf-8`` parameter will be omitted from the returned ``dict``. It will be used to switch to ``LATIN1`` or ``UTF8`` mode depending on how the checkmark is encoded. Important: When you specify both the ``charset`` option and the ``charset_sentinel`` option, the ``charset`` will be overridden when the request contains a ``utf-8`` parameter from which the actual charset can be deduced. In that sense the ``charset`` will behave as the default charset rather than the authoritative charset.""" comma: bool = False """Set to ``True`` to parse the input as a comma-separated value. Note: nested ``dict`` s, such as ``'a={b:1},{c:d}'`` are not supported.""" delimiter: t.Union[str, t.Pattern[str]] = "&" """The delimiter to use when splitting key-value pairs in the encoded input. Can be a ``str`` or a ``Pattern``.""" depth: int = 5 """By default, when nesting ``dict``\\s ``qs_codec`` will only decode up to 5 children deep. This depth can be overridden by setting the ``depth``. The depth limit helps mitigate abuse when ``qs_codec`` is used to parse user input, and it is recommended to keep it a reasonably small number.""" parameter_limit: t.Union[int, float] = 1000 """For similar reasons, by default ``qs_codec`` will only parse up to 1000 parameters. This can be overridden by passing a ``parameter_limit`` option.""" duplicates: Duplicates = Duplicates.COMBINE """Strategy for handling duplicate keys in the input. - ``COMBINE`` (default): merge values into a list (e.g., ``a=1&a=2`` → ``{"a": [1, 2]}``). - ``FIRST``: keep the first value and ignore subsequent ones (``{"a": 1}``). - ``LAST``: keep only the last value seen (``{"a": 2}``). """ ignore_query_prefix: bool = False """Set to ``True`` to ignore the leading question mark query prefix in the encoded input.""" interpret_numeric_entities: bool = False """Set to ``True`` to interpret HTML numeric entities (``&#...;``) in the encoded input.""" parse_lists: bool = True """To disable ``list`` parsing entirely, set ``parse_lists`` to ``False``.""" strict_depth: bool = False """Enforce the ``depth`` limit when decoding nested structures. When ``True``, the decoder will not descend beyond ``depth`` levels. Combined with ``raise_on_limit_exceeded``: - if ``raise_on_limit_exceeded=True``, exceeding the depth raises an ``IndexError``; - if ``False``, the decoder stops descending and treats deeper content as a terminal value, preserving the last valid container without raising. """ strict_null_handling: bool = False """Set to ``True`` to decode values without ``=`` to ``None``.""" raise_on_limit_exceeded: bool = False """Raise instead of degrading gracefully when limits are exceeded. When ``True``, the decoder raises: - a ``DecodeError`` for parameter and list limit violations; and - an ``IndexError`` when nesting deeper than ``depth`` **and** ``strict_depth=True``. When ``False`` (default), the decoder degrades gracefully: it slices the parameter list at ``parameter_limit``, stops adding items beyond ``list_limit``, and—if ``strict_depth=True``—stops descending once ``depth`` is reached without raising. """ decoder: t.Optional[t.Callable[..., t.Optional[str]]] = DecodeUtils.decode """Custom scalar decoder invoked for each raw token prior to interpretation. The built-in decoder supports ``kind`` and is invoked as ``decoder(string, charset, kind=DecodeKind.KEY|VALUE)``. Custom decoders that omit ``kind`` (or ``charset``) are automatically adapted for compatibility. Returning ``None`` from the decoder uses ``None`` as the scalar value. """ legacy_decoder: t.Optional[t.Callable[..., t.Optional[str]]] = None """Back‑compat adapter for legacy decoders of the form ``decoder(value, charset)``. Prefer ``decoder`` which may optionally accept a ``kind`` argument. When both are supplied, ``decoder`` takes precedence (mirroring Kotlin/C#/Swift/Dart behavior).""" def __post_init__(self) -> None: """Post-initialization.""" # Default `decode_dot_in_keys` first, then mirror into `allow_dots` when unspecified. if self.decode_dot_in_keys is None: self.decode_dot_in_keys = False if self.allow_dots is None: self.allow_dots = bool(self.decode_dot_in_keys) # Enforce consistency with the docs: `decode_dot_in_keys=True` implies `allow_dots=True`. if self.decode_dot_in_keys and not self.allow_dots: raise ValueError("decode_dot_in_keys=True implies allow_dots=True") # decoder setup + compatibility wrapper: # precedence is: user `decoder` > `legacy_decoder` > library default. raw_dec = self.decoder if raw_dec is None and self.legacy_decoder is not None: raw_dec = self.legacy_decoder # legacy two-arg form; no kind if raw_dec is None: raw_dec = DecodeUtils.decode user_dec = raw_dec # Precompute dispatch to avoid per-call introspection. try: sig = inspect.signature(user_dec) params = sig.parameters param_list = list(params.values()) has_var_kw = any(p.kind == inspect.Parameter.VAR_KEYWORD for p in param_list) has_var_pos = any(p.kind == inspect.Parameter.VAR_POSITIONAL for p in param_list) accepts_charset_pos = False accepts_charset_kw = False if "charset" in params: p = params["charset"] if p.kind in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD): accepts_charset_pos = True if p.kind in (inspect.Parameter.POSITIONAL_OR_KEYWORD, inspect.Parameter.KEYWORD_ONLY): accepts_charset_kw = True if has_var_pos: accepts_charset_pos = True has_kind_param = "kind" in params accepts_kind_kw = False accepts_kind_pos = False if has_kind_param: k = params["kind"] accepts_kind_kw = k.kind in ( inspect.Parameter.POSITIONAL_OR_KEYWORD, inspect.Parameter.KEYWORD_ONLY, ) accepts_kind_pos = k.kind in ( inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD, ) elif has_var_kw: accepts_kind_kw = True # can pass via **kwargs accepts_kind_pos = False # Decide how to represent `kind`: prefer string for maximum compatibility pass_kind_as_str = True if has_kind_param: ann = params["kind"].annotation # NOTE: If a user annotates `kind` as a typing.Literal (e.g., Literal["key", "value"]), # `ann` will NOT be a `type`, so we fall back to passing strings (the default path below). # This is intentional: it preserves compatibility with callables that prefer plain strings # while still supporting Enum-typed signatures where we pass the Enum instance instead. if ann is inspect.Signature.empty: pass_kind_as_str = True else: if isinstance(ann, type): pass_kind_as_str = not issubclass(ann, _EnumBase) else: pass_kind_as_str = True elif has_var_kw: pass_kind_as_str = True def dispatch( s: t.Optional[str], charset: t.Optional[Charset], kind: DecodeKind, ) -> t.Optional[str]: kind_arg: t.Union[DecodeKind, str] = kind.value if pass_kind_as_str else kind args: t.List[t.Any] = [s] kwargs: t.Dict[str, t.Any] = {} if accepts_charset_pos: args.append(charset) elif accepts_charset_kw or has_var_kw: kwargs["charset"] = charset if accepts_kind_kw: kwargs["kind"] = kind_arg elif accepts_kind_pos: args.append(kind_arg) return user_dec(*args, **kwargs) except (TypeError, ValueError): # Builtins/callables without retrievable signature: try the most compatible forms. def dispatch( s: t.Optional[str], charset: t.Optional[Charset], kind: DecodeKind, ) -> t.Optional[str]: _ = kind # ignored by legacy decoders try: return user_dec(s) # type: ignore[misc] except TypeError as e1: try: return user_dec(s, charset) # type: ignore[misc] except TypeError as exc: raise e1 from exc @wraps(user_dec) def _adapter( s: t.Optional[str], charset: t.Optional[Charset] = Charset.UTF8, *, kind: DecodeKind = DecodeKind.VALUE, ) -> t.Optional[str]: """Adapter that dispatches based on the user decoder's signature.""" return dispatch(s, charset, kind) self.decoder = _adapter # --- Convenience methods ---
[docs] def decode( self, value: t.Optional[str], charset: t.Optional[Charset] = None, *, kind: DecodeKind = DecodeKind.VALUE ) -> t.Optional[t.Any]: """Unified scalar decode with key/value context. Uses the configured ``decoder`` (or ``legacy_decoder``) when provided; otherwise falls back to :meth:`DecodeUtils.decode`. The default library behavior decodes keys identically to values; whether a ``.`` participates in key splitting is decided later by the parser. """ # ``self.decoder`` has been normalized to accept (s, charset, *, kind) d = self.decoder if d is None: # Should not happen because we always set an adapter, but keep a safe fallback. return DecodeUtils.decode(value, charset or self.charset) return d(value, charset or self.charset, kind=kind)
[docs] def decode_key(self, value: t.Optional[str], charset: t.Optional[Charset] = None) -> t.Optional[str]: """Decode a key (or key segment). Always returns a string or ``None``. Note: custom decoders returning non-strings for keys are coerced via ``str()``. """ out = self.decode(value, charset, kind=DecodeKind.KEY) return None if out is None else str(out)
[docs] def decode_value(self, value: t.Optional[str], charset: t.Optional[Charset] = None) -> t.Optional[t.Any]: """Decode a value token. Returns any scalar or ``None``.""" return self.decode(value, charset, kind=DecodeKind.VALUE)