Source code for holovec.encoders.spatial

"""
Spatial encoders for image and grid data.

This module provides encoders for spatial data structures like images,
where both position and value information must be encoded.
"""

from typing import Optional, Tuple, Union, List
from holovec.models.base import VSAModel
from holovec.encoders.base import Encoder
from holovec.encoders.scalar import ScalarEncoder
from holovec.backends.base import Array


[docs] class ImageEncoder(Encoder): """ Image encoder for 2D images (grayscale, RGB, or RGBA). Encodes images by binding spatial positions (x, y) with pixel values. For color images, each channel is bound to a channel dimension vector before being combined with position information. Encoding strategy: For each pixel at position (x, y) with value v: 1. Encode position: pos_hv = bundle([bind(X, enc(x)), bind(Y, enc(y))]) 2. Encode value(s): - Grayscale: val_hv = enc(v) - RGB: val_hv = bundle([bind(R, enc(r)), bind(G, enc(g)), bind(B, enc(b))]) 3. Bind position with value: pixel_hv = bind(pos_hv, val_hv) 4. Bundle all pixels: image_hv = bundle([all pixel_hvs]) This creates a distributed representation that preserves both spatial structure and pixel values, enabling similarity-based image comparison. Parameters ---------- model : VSAModel The VSA model to use for encoding operations. scalar_encoder : ScalarEncoder Encoder for continuous pixel values (0-255 typically). normalize_pixels : bool, optional Whether to normalize pixel values to [0, 1] before encoding. Default is True. seed : int, optional Random seed for reproducibility. Default is None. Attributes ---------- n_channels : int Number of channels in the last encoded image (1, 3, or 4). image_shape : tuple Shape (height, width, channels) of the last encoded image. Examples -------- >>> from holovec import VSA >>> from holovec.encoders import ImageEncoder, ThermometerEncoder >>> import numpy as np >>> >>> model = VSA.create('MAP', dim=10000, seed=42) >>> scalar_enc = ThermometerEncoder(model, min_val=0, max_val=1, n_bins=256, seed=42) >>> encoder = ImageEncoder(model, scalar_enc, normalize_pixels=True, seed=42) >>> >>> # Encode a small grayscale image >>> image = np.array([[100, 150], [200, 250]], dtype=np.uint8) >>> hv = encoder.encode(image) >>> print(hv.shape) # (10000,) >>> >>> # Encode RGB image >>> rgb_image = np.random.randint(0, 256, (28, 28, 3), dtype=np.uint8) >>> hv_rgb = encoder.encode(rgb_image) """
[docs] def __init__( self, model: VSAModel, scalar_encoder: ScalarEncoder, normalize_pixels: bool = True, seed: Optional[int] = None ): """Initialize ImageEncoder.""" # Validate and set scalar_encoder BEFORE calling super().__init__ # because base class checks compatible_models which references it if not isinstance(scalar_encoder, ScalarEncoder): raise TypeError( f"scalar_encoder must be a ScalarEncoder, got {type(scalar_encoder)}" ) if scalar_encoder.model != model: raise ValueError( "scalar_encoder must use the same VSA model as the ImageEncoder" ) self.scalar_encoder = scalar_encoder self.normalize_pixels = normalize_pixels super().__init__(model) # Generate dimension vectors for spatial coordinates base_seed = seed if seed is not None else 2000 self.X = model.random(seed=base_seed) # X dimension self.Y = model.random(seed=base_seed + 1) # Y dimension # Generate dimension vectors for color channels (RGB/RGBA) self.R = model.random(seed=base_seed + 2) # Red channel self.G = model.random(seed=base_seed + 3) # Green channel self.B = model.random(seed=base_seed + 4) # Blue channel self.A = model.random(seed=base_seed + 5) # Alpha channel # Track last encoded image properties self.n_channels: Optional[int] = None self.image_shape: Optional[Tuple[int, ...]] = None
[docs] def encode(self, image: Union[Array, "numpy.ndarray"]) -> Array: """ Encode an image into a hypervector. Parameters ---------- image : array-like Image array with shape (height, width) for grayscale or (height, width, channels) for color images. Pixel values should be in range [0, 255] for uint8 or [0, 1] for float. Typically a NumPy array from PIL, OpenCV, or similar libraries. Returns ------- Array Hypervector encoding of the image. Raises ------ ValueError If image has invalid shape or number of channels. Notes ----- This encoder accepts images as NumPy arrays (the standard format from image libraries like PIL, OpenCV, scikit-image) and processes them using the configured backend. While input must be NumPy, internal VSA operations use the model's backend (NumPy/PyTorch/JAX). """ # Import numpy locally to avoid module-level backend dependency # Images from external sources (PIL, OpenCV) are numpy arrays import numpy as _np # Convert to numpy array if needed (handles lists, tuples, etc.) if not isinstance(image, _np.ndarray): image = _np.array(image) # Validate and normalize image shape if image.ndim == 2: # Grayscale image height, width = image.shape n_channels = 1 # Add channel dimension: (H, W) -> (H, W, 1) image = _np.expand_dims(image, axis=-1) elif image.ndim == 3: height, width, n_channels = image.shape if n_channels not in [1, 3, 4]: raise ValueError( f"Image must have 1, 3, or 4 channels, got {n_channels}" ) else: raise ValueError( f"Image must be 2D (grayscale) or 3D (color), got shape {image.shape}" ) # Store image properties self.n_channels = n_channels self.image_shape = (height, width, n_channels) # Normalize pixel values if requested if self.normalize_pixels: # Check dtype using string representation to avoid dtype dependency dtype_str = str(image.dtype) if 'uint8' in dtype_str: image = image.astype(_np.float32) / 255.0 elif 'int' in dtype_str: # Other integer types: normalize assuming 0-255 range image = image.astype(_np.float32) / 255.0 # If already float, assume it's in [0, 1] # Encode all pixels pixel_hvs = [] for y in range(height): for x in range(width): # Encode spatial position x_hv = self.scalar_encoder.encode(float(x)) y_hv = self.scalar_encoder.encode(float(y)) x_bound = self.model.bind(self.X, x_hv) y_bound = self.model.bind(self.Y, y_hv) pos_hv = self.model.bundle([x_bound, y_bound]) # Encode pixel value(s) if n_channels == 1: # Grayscale: just encode the intensity val_hv = self.scalar_encoder.encode(float(image[y, x, 0])) elif n_channels == 3: # RGB: bind each channel to its dimension vector r_hv = self.scalar_encoder.encode(float(image[y, x, 0])) g_hv = self.scalar_encoder.encode(float(image[y, x, 1])) b_hv = self.scalar_encoder.encode(float(image[y, x, 2])) r_bound = self.model.bind(self.R, r_hv) g_bound = self.model.bind(self.G, g_hv) b_bound = self.model.bind(self.B, b_hv) val_hv = self.model.bundle([r_bound, g_bound, b_bound]) else: # n_channels == 4 # RGBA: bind each channel including alpha r_hv = self.scalar_encoder.encode(float(image[y, x, 0])) g_hv = self.scalar_encoder.encode(float(image[y, x, 1])) b_hv = self.scalar_encoder.encode(float(image[y, x, 2])) a_hv = self.scalar_encoder.encode(float(image[y, x, 3])) r_bound = self.model.bind(self.R, r_hv) g_bound = self.model.bind(self.G, g_hv) b_bound = self.model.bind(self.B, b_hv) a_bound = self.model.bind(self.A, a_hv) val_hv = self.model.bundle([r_bound, g_bound, b_bound, a_bound]) # Bind position with value pixel_hv = self.model.bind(pos_hv, val_hv) pixel_hvs.append(pixel_hv) # Bundle all pixels to create image hypervector image_hv = self.model.bundle(pixel_hvs) return image_hv
[docs] def decode( self, hypervector: Array, height: int, width: int, n_channels: int = 1 ) -> "numpy.ndarray": """ Decode a hypervector to reconstruct an approximate image. Note: Image decoding is approximate and requires knowing the target image dimensions. Reconstruction quality depends on the scalar encoder's decoding capabilities and may require candidate value search. Parameters ---------- hypervector : Array The hypervector to decode. height : int Target image height. width : int Target image width. n_channels : int, optional Number of channels (1, 3, or 4). Default is 1. Returns ------- np.ndarray Reconstructed image with shape (height, width) for grayscale or (height, width, n_channels) for color. Raises ------ NotImplementedError Image decoding is computationally intractable without additional constraints. Notes ----- Image decoding is not implemented because it requires solving a high-dimensional inverse problem that is fundamentally ill-posed: **Mathematical Challenge:** The encoding process binds pixel values with position vectors: image_hv = bundle([bind(position(i,j), scalar(pixel[i,j])) for all i,j]) To decode, we must: 1. Unbind each position: pixel_hv[i,j] = unbind(image_hv, position(i,j)) 2. Decode each scalar: pixel[i,j] = scalar_decode(pixel_hv[i,j]) **Why This Is Intractable:** - Unbinding is approximate (except for FHRR with exact inverse) - Each unbind operation introduces noise - For H×W image: H×W unbind operations compound errors - Scalar decoding via optimization (1000 evals × 100 iterations) - Total: ~100M evaluations for 100×100 image - No gradient available for joint optimization **Alternative Approaches:** 1. **Database Retrieval**: Encode query image, find nearest match in database - Complexity: O(N) for N known images - Works well for classification/recognition tasks 2. **Iterative Resonator**: Use resonator cleanup with pixel codebook - Requires pre-built codebook of common pixel patterns - May reconstruct coarse structure but not fine details 3. **Neural Decoder**: Train neural network image_hv → image - Requires supervised training data - Can learn inverse mapping empirically - See: Imani et al. (2019) "VoiceHD" for similar approach For practical applications, use ImageEncoder for one-way encoding (e.g., image→hypervector→classifier) rather than reconstruction. References ---------- - Imani et al. (2019): "VoiceHD: Hyperdimensional Computing for Efficient Speech Recognition" - Plate (2003): "Holographic Reduced Representations" - Chapter 4 on approximate unbinding and error accumulation """ raise NotImplementedError( "Image decoding is not implemented due to computational intractability. " "See docstring for detailed mathematical explanation and alternatives. " "For reconstruction tasks, use similarity-based retrieval from a database " "of known images, or train a neural decoder network." )
@property def is_reversible(self) -> bool: """ Whether the encoder supports decoding. Returns ------- bool False - image decoding not yet implemented. """ return False @property def compatible_models(self) -> List[str]: """ List of compatible VSA model names. Returns ------- list of str All VSA models supported (depends on scalar encoder compatibility). """ return self.scalar_encoder.compatible_models @property def input_type(self) -> str: """ Description of expected input type. Returns ------- str Description of input format. """ if self.n_channels is None: return "2D array (grayscale) or 3D array (color) with shape (H, W) or (H, W, C)" elif self.n_channels == 1: return f"Grayscale image ({self.image_shape[0]}x{self.image_shape[1]})" elif self.n_channels == 3: return f"RGB image ({self.image_shape[0]}x{self.image_shape[1]}x3)" else: return f"RGBA image ({self.image_shape[0]}x{self.image_shape[1]}x4)"
[docs] def __repr__(self) -> str: """Return string representation.""" return ( f"ImageEncoder(model={self.model.model_name}, " f"scalar_encoder={self.scalar_encoder.__class__.__name__}, " f"normalize_pixels={self.normalize_pixels})" )