from __future__ import annotations

import re
import unicodedata


_NON_ALNUM_RE = re.compile(r"[^a-z0-9]+")


def slug_tokens(text: str) -> list[str]:
    normalized = unicodedata.normalize("NFKD", text)
    ascii_text = normalized.encode("ascii", errors="ignore").decode("ascii")
    lowered = ascii_text.lower()
    slug = _NON_ALNUM_RE.sub("-", lowered).strip("-")
    if not slug:
        return []
    return [t for t in slug.split("-") if t]


def truncate_tokens(tokens: list[str], max_len: int) -> list[str]:
    if max_len <= 0 or not tokens:
        return []
    out: list[str] = []
    current_len = 0
    for token in tokens:
        extra = len(token) if not out else (1 + len(token))
        if current_len + extra > max_len:
            break
        out.append(token)
        current_len += extra
    return out
