Source code for holovec.encoders.spatial

"""
Spatial encoders for image and grid data.

This module provides encoders for spatial data structures like images,
where both position and value information must be encoded.
"""

from typing import Optional, Tuple, Union, List
from holovec.models.base import VSAModel
from holovec.encoders.base import Encoder
from holovec.encoders.scalar import ScalarEncoder
from holovec.backends.base import Array



[docs]
class ImageEncoder(Encoder):
    """
    Image encoder for 2D images (grayscale, RGB, or RGBA).

    Encodes images by binding spatial positions (x, y) with pixel values.
    For color images, each channel is bound to a channel dimension vector
    before being combined with position information.

    Encoding strategy:
        For each pixel at position (x, y) with value v:
        1. Encode position: pos_hv = bundle([bind(X, enc(x)), bind(Y, enc(y))])
        2. Encode value(s):
           - Grayscale: val_hv = enc(v)
           - RGB: val_hv = bundle([bind(R, enc(r)), bind(G, enc(g)), bind(B, enc(b))])
        3. Bind position with value: pixel_hv = bind(pos_hv, val_hv)
        4. Bundle all pixels: image_hv = bundle([all pixel_hvs])

    This creates a distributed representation that preserves both spatial
    structure and pixel values, enabling similarity-based image comparison.

    Parameters
    ----------
    model : VSAModel
        The VSA model to use for encoding operations.
    scalar_encoder : ScalarEncoder
        Encoder for continuous pixel values (0-255 typically).
    normalize_pixels : bool, optional
        Whether to normalize pixel values to [0, 1] before encoding.
        Default is True.
    seed : int, optional
        Random seed for reproducibility. Default is None.

    Attributes
    ----------
    n_channels : int
        Number of channels in the last encoded image (1, 3, or 4).
    image_shape : tuple
        Shape (height, width, channels) of the last encoded image.

    Examples
    --------
    >>> from holovec import VSA
    >>> from holovec.encoders import ImageEncoder, ThermometerEncoder
    >>> import numpy as np
    >>>
    >>> model = VSA.create('MAP', dim=10000, seed=42)
    >>> scalar_enc = ThermometerEncoder(model, min_val=0, max_val=1, n_bins=256, seed=42)
    >>> encoder = ImageEncoder(model, scalar_enc, normalize_pixels=True, seed=42)
    >>>
    >>> # Encode a small grayscale image
    >>> image = np.array([[100, 150], [200, 250]], dtype=np.uint8)
    >>> hv = encoder.encode(image)
    >>> print(hv.shape)  # (10000,)
    >>>
    >>> # Encode RGB image
    >>> rgb_image = np.random.randint(0, 256, (28, 28, 3), dtype=np.uint8)
    >>> hv_rgb = encoder.encode(rgb_image)
    """


[docs]
    def __init__(
        self,
        model: VSAModel,
        scalar_encoder: ScalarEncoder,
        normalize_pixels: bool = True,
        seed: Optional[int] = None
    ):
        """Initialize ImageEncoder."""
        # Validate and set scalar_encoder BEFORE calling super().__init__
        # because base class checks compatible_models which references it
        if not isinstance(scalar_encoder, ScalarEncoder):
            raise TypeError(
                f"scalar_encoder must be a ScalarEncoder, got {type(scalar_encoder)}"
            )

        if scalar_encoder.model != model:
            raise ValueError(
                "scalar_encoder must use the same VSA model as the ImageEncoder"
            )

        self.scalar_encoder = scalar_encoder
        self.normalize_pixels = normalize_pixels

        super().__init__(model)

        # Generate dimension vectors for spatial coordinates
        base_seed = seed if seed is not None else 2000
        self.X = model.random(seed=base_seed)      # X dimension
        self.Y = model.random(seed=base_seed + 1)  # Y dimension

        # Generate dimension vectors for color channels (RGB/RGBA)
        self.R = model.random(seed=base_seed + 2)  # Red channel
        self.G = model.random(seed=base_seed + 3)  # Green channel
        self.B = model.random(seed=base_seed + 4)  # Blue channel
        self.A = model.random(seed=base_seed + 5)  # Alpha channel

        # Track last encoded image properties
        self.n_channels: Optional[int] = None
        self.image_shape: Optional[Tuple[int, ...]] = None



[docs]
    def encode(self, image: Union[Array, "numpy.ndarray"]) -> Array:
        """
        Encode an image into a hypervector.

        Parameters
        ----------
        image : array-like
            Image array with shape (height, width) for grayscale or
            (height, width, channels) for color images.
            Pixel values should be in range [0, 255] for uint8 or
            [0, 1] for float.
            Typically a NumPy array from PIL, OpenCV, or similar libraries.

        Returns
        -------
        Array
            Hypervector encoding of the image.

        Raises
        ------
        ValueError
            If image has invalid shape or number of channels.

        Notes
        -----
        This encoder accepts images as NumPy arrays (the standard format from
        image libraries like PIL, OpenCV, scikit-image) and processes them using
        the configured backend. While input must be NumPy, internal VSA operations
        use the model's backend (NumPy/PyTorch/JAX).
        """
        # Import numpy locally to avoid module-level backend dependency
        # Images from external sources (PIL, OpenCV) are numpy arrays
        import numpy as _np

        # Convert to numpy array if needed (handles lists, tuples, etc.)
        if not isinstance(image, _np.ndarray):
            image = _np.array(image)

        # Validate and normalize image shape
        if image.ndim == 2:
            # Grayscale image
            height, width = image.shape
            n_channels = 1
            # Add channel dimension: (H, W) -> (H, W, 1)
            image = _np.expand_dims(image, axis=-1)
        elif image.ndim == 3:
            height, width, n_channels = image.shape
            if n_channels not in [1, 3, 4]:
                raise ValueError(
                    f"Image must have 1, 3, or 4 channels, got {n_channels}"
                )
        else:
            raise ValueError(
                f"Image must be 2D (grayscale) or 3D (color), got shape {image.shape}"
            )

        # Store image properties
        self.n_channels = n_channels
        self.image_shape = (height, width, n_channels)

        # Normalize pixel values if requested
        if self.normalize_pixels:
            # Check dtype using string representation to avoid dtype dependency
            dtype_str = str(image.dtype)
            if 'uint8' in dtype_str:
                image = image.astype(_np.float32) / 255.0
            elif 'int' in dtype_str:
                # Other integer types: normalize assuming 0-255 range
                image = image.astype(_np.float32) / 255.0
            # If already float, assume it's in [0, 1]

        # Encode all pixels
        pixel_hvs = []

        for y in range(height):
            for x in range(width):
                # Encode spatial position
                x_hv = self.scalar_encoder.encode(float(x))
                y_hv = self.scalar_encoder.encode(float(y))

                x_bound = self.model.bind(self.X, x_hv)
                y_bound = self.model.bind(self.Y, y_hv)
                pos_hv = self.model.bundle([x_bound, y_bound])

                # Encode pixel value(s)
                if n_channels == 1:
                    # Grayscale: just encode the intensity
                    val_hv = self.scalar_encoder.encode(float(image[y, x, 0]))
                elif n_channels == 3:
                    # RGB: bind each channel to its dimension vector
                    r_hv = self.scalar_encoder.encode(float(image[y, x, 0]))
                    g_hv = self.scalar_encoder.encode(float(image[y, x, 1]))
                    b_hv = self.scalar_encoder.encode(float(image[y, x, 2]))

                    r_bound = self.model.bind(self.R, r_hv)
                    g_bound = self.model.bind(self.G, g_hv)
                    b_bound = self.model.bind(self.B, b_hv)

                    val_hv = self.model.bundle([r_bound, g_bound, b_bound])
                else:  # n_channels == 4
                    # RGBA: bind each channel including alpha
                    r_hv = self.scalar_encoder.encode(float(image[y, x, 0]))
                    g_hv = self.scalar_encoder.encode(float(image[y, x, 1]))
                    b_hv = self.scalar_encoder.encode(float(image[y, x, 2]))
                    a_hv = self.scalar_encoder.encode(float(image[y, x, 3]))

                    r_bound = self.model.bind(self.R, r_hv)
                    g_bound = self.model.bind(self.G, g_hv)
                    b_bound = self.model.bind(self.B, b_hv)
                    a_bound = self.model.bind(self.A, a_hv)

                    val_hv = self.model.bundle([r_bound, g_bound, b_bound, a_bound])

                # Bind position with value
                pixel_hv = self.model.bind(pos_hv, val_hv)
                pixel_hvs.append(pixel_hv)

        # Bundle all pixels to create image hypervector
        image_hv = self.model.bundle(pixel_hvs)

        return image_hv



[docs]
    def decode(
        self,
        hypervector: Array,
        height: int,
        width: int,
        n_channels: int = 1
    ) -> "numpy.ndarray":
        """
        Decode a hypervector to reconstruct an approximate image.

        Note: Image decoding is approximate and requires knowing the target
        image dimensions. Reconstruction quality depends on the scalar encoder's
        decoding capabilities and may require candidate value search.

        Parameters
        ----------
        hypervector : Array
            The hypervector to decode.
        height : int
            Target image height.
        width : int
            Target image width.
        n_channels : int, optional
            Number of channels (1, 3, or 4). Default is 1.

        Returns
        -------
        np.ndarray
            Reconstructed image with shape (height, width) for grayscale
            or (height, width, n_channels) for color.

        Raises
        ------
        NotImplementedError
            Image decoding is computationally intractable without additional constraints.

        Notes
        -----
        Image decoding is not implemented because it requires solving a high-dimensional
        inverse problem that is fundamentally ill-posed:

        **Mathematical Challenge:**

        The encoding process binds pixel values with position vectors:
            image_hv = bundle([bind(position(i,j), scalar(pixel[i,j])) for all i,j])

        To decode, we must:
        1. Unbind each position: pixel_hv[i,j] = unbind(image_hv, position(i,j))
        2. Decode each scalar: pixel[i,j] = scalar_decode(pixel_hv[i,j])

        **Why This Is Intractable:**

        - Unbinding is approximate (except for FHRR with exact inverse)
        - Each unbind operation introduces noise
        - For H×W image: H×W unbind operations compound errors
        - Scalar decoding via optimization (1000 evals × 100 iterations)
        - Total: ~100M evaluations for 100×100 image
        - No gradient available for joint optimization

        **Alternative Approaches:**

        1. **Database Retrieval**: Encode query image, find nearest match in database
           - Complexity: O(N) for N known images
           - Works well for classification/recognition tasks

        2. **Iterative Resonator**: Use resonator cleanup with pixel codebook
           - Requires pre-built codebook of common pixel patterns
           - May reconstruct coarse structure but not fine details

        3. **Neural Decoder**: Train neural network image_hv → image
           - Requires supervised training data
           - Can learn inverse mapping empirically
           - See: Imani et al. (2019) "VoiceHD" for similar approach

        For practical applications, use ImageEncoder for one-way encoding
        (e.g., image→hypervector→classifier) rather than reconstruction.

        References
        ----------
        - Imani et al. (2019): "VoiceHD: Hyperdimensional Computing for
          Efficient Speech Recognition"
        - Plate (2003): "Holographic Reduced Representations" - Chapter 4 on
          approximate unbinding and error accumulation
        """
        raise NotImplementedError(
            "Image decoding is not implemented due to computational intractability. "
            "See docstring for detailed mathematical explanation and alternatives. "
            "For reconstruction tasks, use similarity-based retrieval from a database "
            "of known images, or train a neural decoder network."
        )


    @property
    def is_reversible(self) -> bool:
        """
        Whether the encoder supports decoding.

        Returns
        -------
        bool
            False - image decoding not yet implemented.
        """
        return False

    @property
    def compatible_models(self) -> List[str]:
        """
        List of compatible VSA model names.

        Returns
        -------
        list of str
            All VSA models supported (depends on scalar encoder compatibility).
        """
        return self.scalar_encoder.compatible_models

    @property
    def input_type(self) -> str:
        """
        Description of expected input type.

        Returns
        -------
        str
            Description of input format.
        """
        if self.n_channels is None:
            return "2D array (grayscale) or 3D array (color) with shape (H, W) or (H, W, C)"
        elif self.n_channels == 1:
            return f"Grayscale image ({self.image_shape[0]}x{self.image_shape[1]})"
        elif self.n_channels == 3:
            return f"RGB image ({self.image_shape[0]}x{self.image_shape[1]}x3)"
        else:
            return f"RGBA image ({self.image_shape[0]}x{self.image_shape[1]}x4)"


[docs]
    def __repr__(self) -> str:
        """Return string representation."""
        return (
            f"ImageEncoder(model={self.model.model_name}, "
            f"scalar_encoder={self.scalar_encoder.__class__.__name__}, "
            f"normalize_pixels={self.normalize_pixels})"
        )