Source code for holovec.encoders.base

"""
Base classes and interfaces for encoders.

Encoders transform various data types (scalars, sequences, structured data)
into hypervectors for processing with VSA models.
"""

from abc import ABC, abstractmethod
from typing import Any, List, Optional, Tuple
from holovec.models.base import VSAModel
from holovec.backends.base import Array


[docs] class Encoder(ABC): """ Abstract base class for all encoders. Encoders transform data into hypervectors compatible with VSA models. They follow the principle of locality preservation: similar inputs should map to similar hypervectors. Attributes: model: VSA model instance used for vector operations backend: Backend instance (inherited from model) dimension: Dimensionality of hypervectors (inherited from model) """
[docs] def __init__(self, model: VSAModel): """ Initialize encoder with a VSA model. Args: model: VSA model instance to use for operations Raises: ValueError: If model is not compatible with this encoder """ # Validate model compatibility if not self._is_model_compatible(model): compatible = ", ".join(self.compatible_models) raise ValueError( f"{self.__class__.__name__} is not compatible with {model.model_name}. " f"Compatible models: {compatible}" ) self.model = model self.backend = model.backend self.dimension = model.dimension
[docs] @abstractmethod def encode(self, data: Any) -> Array: """ Encode data into hypervector. Args: data: Input data (type depends on encoder) Returns: Hypervector representation of shape (dimension,) Raises: ValueError: If data is invalid for this encoder """ pass
[docs] def encode_batch(self, data_list: List[Any]) -> List[Array]: """ Encode multiple data points. Default implementation encodes each item individually. Subclasses may override for more efficient batch encoding. Args: data_list: List of data items to encode Returns: List of encoded hypervectors """ return [self.encode(data) for data in data_list]
[docs] @abstractmethod def decode(self, hypervector: Array) -> Any: """ Decode hypervector back to data (if possible). Args: hypervector: Hypervector to decode, shape (dimension,) Returns: Decoded data, or None if encoder is not reversible Raises: NotImplementedError: If encoder does not support decoding """ pass
@property @abstractmethod def is_reversible(self) -> bool: """ Whether this encoder supports decoding. Returns: True if decode() is implemented and functional, False otherwise """ pass @property @abstractmethod def compatible_models(self) -> List[str]: """ List of compatible VSA model names. Returns: List of model names (e.g., ['FHRR', 'HRR']) """ pass @property @abstractmethod def input_type(self) -> str: """ Description of expected input type. Returns: Human-readable string describing input type (e.g., "scalar float", "sequence of symbols", "2D array") """ pass def _is_model_compatible(self, model: VSAModel) -> bool: """ Check if given model is compatible with this encoder. Args: model: VSA model to check Returns: True if compatible, False otherwise """ return model.model_name in self.compatible_models
[docs] def __repr__(self) -> str: """String representation of encoder.""" return ( f"{self.__class__.__name__}(" f"model={self.model.model_name}, " f"dimension={self.dimension}, " f"reversible={self.is_reversible})" )
class ScalarEncoder(Encoder): """ Abstract base class for encoders that map scalar values to hypervectors. Scalar encoders should preserve ordering: similar scalars should map to similar hypervectors. Most scalar encoders support a value range [min_val, max_val] that is normalized internally. """ def __init__( self, model: VSAModel, min_val: float, max_val: float ): """ Initialize scalar encoder. Args: model: VSA model instance min_val: Minimum value of encoding range max_val: Maximum value of encoding range Raises: ValueError: If min_val >= max_val """ super().__init__(model) if min_val >= max_val: raise ValueError( f"min_val must be less than max_val, got {min_val} >= {max_val}" ) self.min_val = min_val self.max_val = max_val self.range = max_val - min_val def normalize(self, value: float) -> float: """ Normalize value to [0, 1] range. Args: value: Value to normalize Returns: Normalized value in [0, 1] Note: Values outside [min_val, max_val] are clipped. """ # Clip to valid range value = max(self.min_val, min(self.max_val, value)) # Normalize to [0, 1] return (value - self.min_val) / self.range def denormalize(self, normalized_value: float) -> float: """ Denormalize value from [0, 1] to [min_val, max_val]. Args: normalized_value: Value in [0, 1] range Returns: Denormalized value in [min_val, max_val] """ return self.min_val + normalized_value * self.range @property def input_type(self) -> str: """Input type description.""" return f"scalar float in [{self.min_val}, {self.max_val}]" class SequenceEncoder(Encoder): """ Abstract base class for encoders that map sequences to hypervectors. Sequence encoders typically require a codebook mapping elements to hypervectors, and encode sequences by combining element representations with position information. """ def __init__( self, model: VSAModel, max_length: Optional[int] = None ): """ Initialize sequence encoder. Args: model: VSA model instance max_length: Maximum sequence length (None for unlimited) """ super().__init__(model) self.max_length = max_length @property def input_type(self) -> str: """Input type description.""" if self.max_length is not None: return f"sequence of length <= {self.max_length}" return "sequence of variable length" class StructuredEncoder(Encoder): """ Abstract base class for encoders that map structured data to hypervectors. Structured encoders handle multi-dimensional data like vectors, images, or graphs. They typically compose scalar or symbol encoders with structural binding operations. """ @property def input_type(self) -> str: """Input type description.""" return "structured data"