Source code for holovec.encoders.structured

"""
Structured data encoders for mapping vectors and structured data to hypervectors.

This module implements encoders that transform multi-dimensional vectors
and structured data into hypervector representations by composing scalar
encoders with dimension binding.
"""

from typing import List, Optional
from holovec.encoders.base import StructuredEncoder, ScalarEncoder
from holovec.models.base import VSAModel
from holovec.backends.base import Array


[docs] class VectorEncoder(StructuredEncoder): """ Vector encoder for multi-dimensional numeric data using role-filler binding. Encodes vectors by binding each dimension with its scalar-encoded value: encode([v₁, v₂, ..., vₐ]) = Σᵢ bind(Dᵢ, scalar_encode(vᵢ)) where: - Dᵢ is a random hypervector for dimension i - scalar_encode(vᵢ) encodes the scalar value using FPE/Thermometer/Level - bind() creates a role-filler binding - Σ bundles all dimension-value pairs This creates a compositional encoding where: - Each dimension has explicit representation (Dᵢ) - Similar values in corresponding dimensions → higher similarity - Supports partial matching across dimensions - Enables approximate decoding via unbinding Attributes: scalar_encoder: Encoder for individual scalar values n_dimensions: Number of dimensions in input vectors dim_vectors: List of dimension hypervectors (Dᵢ) normalize_input: Whether to normalize input vectors Example: >>> from holovec import VSA >>> from holovec.encoders import FractionalPowerEncoder, VectorEncoder >>> >>> model = VSA.create('FHRR', dim=10000) >>> scalar_enc = FractionalPowerEncoder(model, min_val=0, max_val=1) >>> encoder = VectorEncoder(model, scalar_encoder=scalar_enc, n_dims=128) >>> >>> # Encode a feature vector (list or any backend array) >>> features = [0.5] * 128 # Can also use numpy/torch/jax arrays >>> hv = encoder.encode(features) >>> >>> # Similar vectors have high similarity >>> features2 = [0.51] * 128 # Slightly different >>> hv2 = encoder.encode(features2) >>> model.similarity(hv, hv2) # High similarity >>> >>> # Decode to recover approximate values >>> recovered = encoder.decode(hv) >>> # Verify approximate recovery via similarity >>> model.similarity(encoder.encode(recovered), hv) > 0.9 """
[docs] def __init__( self, model: VSAModel, scalar_encoder: ScalarEncoder, n_dimensions: int, normalize_input: bool = False, seed: Optional[int] = None ): """ Initialize vector encoder. Args: model: VSA model instance scalar_encoder: Encoder for individual scalar values n_dimensions: Number of dimensions in input vectors normalize_input: Whether to normalize input vectors to unit length seed: Random seed for dimension vector generation Raises: ValueError: If n_dimensions < 1 TypeError: If scalar_encoder is not a ScalarEncoder """ super().__init__(model) if n_dimensions < 1: raise ValueError(f"n_dimensions must be >= 1, got {n_dimensions}") if not isinstance(scalar_encoder, ScalarEncoder): raise TypeError( f"scalar_encoder must be a ScalarEncoder, got {type(scalar_encoder)}" ) # Check model compatibility if model != scalar_encoder.model: raise ValueError( "scalar_encoder must use the same VSA model as VectorEncoder" ) self.scalar_encoder = scalar_encoder self.n_dimensions = n_dimensions self.normalize_input = normalize_input self.seed = seed # Generate dimension hypervectors (one per dimension) # These are the "roles" in role-filler binding self.dim_vectors: List[Array] = [] for i in range(n_dimensions): # Use deterministic seeding for reproducibility if seed is not None: dim_seed = seed + i else: dim_seed = i + 1000 # Offset to avoid collision with symbol seeds self.dim_vectors.append(model.random(seed=dim_seed))
[docs] def encode(self, vector: Array) -> Array: """ Encode a vector using dimension binding. Each element is bound with its corresponding dimension vector: result = Σᵢ bind(Dᵢ, scalar_encode(vector[i])) Args: vector: Input vector to encode, shape (n_dimensions,) Returns: Hypervector representing the vector Raises: ValueError: If vector shape doesn't match n_dimensions Example: >>> encoder = VectorEncoder(model, scalar_enc, n_dims=3) >>> vector = [1.0, 2.0, 3.0] # Can also be numpy/torch/jax array >>> hv = encoder.encode(vector) """ # Convert to backend array if needed vector = self.backend.array(vector) if vector.shape != (self.n_dimensions,): raise ValueError( f"Expected vector of shape ({self.n_dimensions},), " f"got {vector.shape}" ) # Optional: normalize to unit length if self.normalize_input: vector = self.backend.normalize(vector) # Bind each dimension with its scalar-encoded value bound_dims = [] for i, value in enumerate(vector): # Encode scalar value as hypervector value_hv = self.scalar_encoder.encode(float(value)) # Bind dimension role with value filler dim_hv = self.dim_vectors[i] bound = self.model.bind(dim_hv, value_hv) bound_dims.append(bound) # Bundle all dimension-value bindings vector_hv = self.model.bundle(bound_dims) return vector_hv
[docs] def decode(self, hypervector: Array) -> Array: """ Decode vector hypervector to recover approximate values. For each dimension i: 1. Unbind dimension: value_hv = unbind(hypervector, Dᵢ) 2. Decode scalar: value ≈ scalar_encoder.decode(value_hv) Args: hypervector: Vector hypervector to decode, shape (dimension,) Returns: Decoded vector, shape (n_dimensions,) (backend array type) Raises: NotImplementedError: If scalar_encoder doesn't support decoding Note: Decoding is approximate and quality depends on: - VSA model (exact vs. approximate binding) - Scalar encoder precision - Number of dimensions (more dims → more noise) Example: >>> original = [1.0, 2.0, 3.0] >>> encoded = encoder.encode(original) >>> decoded = encoder.decode(encoded) >>> # Check approximate recovery (using backend operations) >>> model.similarity(encoder.encode(decoded), encoded) > 0.9 """ if not self.scalar_encoder.is_reversible: raise NotImplementedError( f"Cannot decode: scalar_encoder {type(self.scalar_encoder).__name__} " "does not support decoding" ) decoded_values = [] for i in range(self.n_dimensions): # Unbind dimension to recover value hypervector dim_hv = self.dim_vectors[i] value_hv = self.model.unbind(hypervector, dim_hv) # Decode scalar value value = self.scalar_encoder.decode(value_hv) decoded_values.append(value) return self.backend.array(decoded_values)
@property def is_reversible(self) -> bool: """ VectorEncoder supports approximate decoding if scalar_encoder does. Returns: True if scalar_encoder supports decoding, False otherwise """ return self.scalar_encoder.is_reversible @property def compatible_models(self) -> List[str]: """ Works with all VSA models. Decoding quality varies: - Exact models (FHRR, MAP): High accuracy - Approximate models (HRR, BSC): Moderate accuracy Returns: List of all model names """ return ["MAP", "FHRR", "HRR", "BSC", "GHRR", "VTB", "BSDC"] @property def input_type(self) -> str: """Input type description.""" return f"{self.n_dimensions}-dimensional vector"
[docs] def __repr__(self) -> str: """String representation.""" return ( f"VectorEncoder(" f"model={self.model.model_name}, " f"scalar_encoder={type(self.scalar_encoder).__name__}, " f"n_dimensions={self.n_dimensions}, " f"normalize_input={self.normalize_input})" )