Source code for holovec.retrieval.codebook

from __future__ import annotations

from typing import Dict, Iterable, List, Optional, Tuple

import numpy as np

from ..backends.base import Array
from ..backends import get_backend, Backend


[docs] class Codebook: """Thin wrapper for label→vector mappings with convenience methods. Keeps insertion order of labels. Vectors are backend arrays. """
[docs] def __init__(self, items: Optional[Dict[str, Array]] = None, backend: Optional[Backend] = None): self._items: Dict[str, Array] = {} self._backend: Backend = backend if backend is not None else get_backend("numpy") if items: self.extend(items)
# Basic operations
[docs] def add(self, label: str, vector: Array) -> None: self._items[label] = vector
[docs] def extend(self, items: Dict[str, Array]) -> None: for k, v in items.items(): self.add(k, v)
@property def labels(self) -> List[str]: return list(self._items.keys()) @property def size(self) -> int: return len(self._items)
[docs] def as_list(self) -> List[Tuple[str, Array]]: return list(self._items.items())
[docs] def as_matrix(self, backend: Optional[Backend] = None) -> Tuple[List[str], Array]: """Return (labels, matrix) where matrix has shape (L, D).""" be = backend or self._backend if self.size == 0: return [], be.zeros((0,), dtype="float32") labels = self.labels stacked = be.stack([self._items[lbl] for lbl in labels], axis=0) return labels, stacked
# Persistence (npz)
[docs] def save(self, path: str) -> None: labels, mat = self.as_matrix() mat_np = self._backend.to_numpy(mat) np.savez(path, labels=np.array(labels, dtype=object), matrix=mat_np)
[docs] @classmethod def load(cls, path: str, backend: Optional[Backend] = None) -> "Codebook": be = backend or get_backend("numpy") data = np.load(path, allow_pickle=True) labels = [str(x) for x in data["labels"].tolist()] mat = data["matrix"] items: Dict[str, Array] = {} for i, lbl in enumerate(labels): items[lbl] = be.from_numpy(mat[i]) return cls(items=items, backend=be)