Multivariate Vector Encoding

Topics: VectorEncoder, scalar encoder composition, high-dimensional data Time: 15 minutes Prerequisites: 00_quickstart.py, 10_encoders_scalar.py Related: 17_encoders_image.py, 21_app_image_recognition.py

This example demonstrates the VectorEncoder, which encodes multivariate vectors by binding each dimension with its scalar value. This creates distributed representations that preserve similarity between vectors with similar values.

Key concepts: - Dimension binding: bind(dim_i, scalar_encode(value_i)) - Scalar composition: works with any scalar encoder (FPE, Thermometer, Level) - High-dimensional: scales to 64D, 784D, or higher - Reversible: can decode to approximate original values

The VectorEncoder is fundamental for feature vectors, embeddings, sensor data, and flattened images (MNIST-style pixel arrays).

 24 import numpy as np
 25 from holovec import VSA
 26 from holovec.encoders import (
 27     VectorEncoder,
 28     FractionalPowerEncoder,
 29     ThermometerEncoder,
 30     LevelEncoder,
 31 )
 32
 33 print("=" * 70)
 34 print("Multivariate Vector Encoding")
 35 print("=" * 70)
 36 print()
 37
 38 # ============================================================================
 39 # Demo 1: Basic VectorEncoder Usage
 40 # ============================================================================
 41 print("=" * 70)
 42 print("Demo 1: Basic VectorEncoder Usage")
 43 print("=" * 70)
 44
 45 # Create model and scalar encoder
 46 model = VSA.create('FHRR', dim=5000, seed=42)
 47 scalar_enc = FractionalPowerEncoder(model, min_val=0, max_val=10, seed=42)
 48
 49 # Create vector encoder for 5D vectors
 50 encoder = VectorEncoder(
 51     model,
 52     scalar_encoder=scalar_enc,
 53     n_dimensions=5,
 54     seed=42
 55 )
 56
 57 print(f"\nEncoder: {encoder}")
 58 print(f"Reversible: {encoder.is_reversible}")
 59 print(f"Input type: {encoder.input_type}")
 60
 61 # Encode some vectors
 62 vectors = {
 63     "v1": np.array([1.0, 2.0, 3.0, 4.0, 5.0]),
 64     "v2": np.array([1.1, 2.1, 3.1, 4.1, 5.1]),  # Close to v1
 65     "v3": np.array([5.0, 4.0, 3.0, 2.0, 1.0]),  # Reversed
 66     "v4": np.array([8.0, 9.0, 7.0, 6.0, 10.0]), # Different
 67 }
 68
 69 print("\nEncoding vectors:")
 70 encoded = {}
 71 for name, vec in vectors.items():
 72     hv = encoder.encode(vec)
 73     encoded[name] = hv
 74     print(f"  {name}: {vec} → HV shape: {hv.shape}")
 75
 76 # Similarity matrix
 77 print("\nSimilarity Matrix:")
 78 names = list(vectors.keys())
 79 print("     ", "  ".join(f"{n:>6}" for n in names))
 80 for i, name1 in enumerate(names):
 81     similarities = [
 82         float(model.similarity(encoded[name1], encoded[name2]))
 83         for name2 in names
 84     ]
 85     print(f"{name1:>4}", "  ".join(f"{s:6.3f}" for s in similarities))
 86
 87 # Test decoding
 88 print("\nDecoding test:")
 89 for name in ['v1', 'v2']:
 90     original = vectors[name]
 91     decoded = encoder.decode(encoded[name])
 92     error = np.abs(original - decoded)
 93     print(f"\n  {name} (original): {original}")
 94     print(f"  {name} (decoded):  {decoded}")
 95     print(f"  Error:            {error}")
 96     print(f"  Max error:        {np.max(error):.3f}")
 97
 98 print("\nKey observations:")
 99 print("  - Similar vectors have high similarity")
100 print("  - Different vectors are distinguishable")
101 print("  - Decoding recovers approximate values")
102
103 # ============================================================================
104 # Demo 2: Scalar Encoder Composition
105 # ============================================================================
106 print("\n" + "=" * 70)
107 print("Demo 2: Scalar Encoder Composition")
108 print("=" * 70)
109
110 # Test vector
111 test_vector = np.array([2.5, 5.0, 7.5])
112
113 # Create encoders with different scalar encoders
114 encoders = {
115     "FractionalPower": VectorEncoder(
116         model,
117         FractionalPowerEncoder(model, 0, 10, seed=42),
118         n_dimensions=3,
119         seed=42
120     ),
121     "Thermometer": VectorEncoder(
122         model,
123         ThermometerEncoder(model, 0, 10, n_bins=20),
124         n_dimensions=3,
125         seed=42
126     ),
127     "Level": VectorEncoder(
128         model,
129         LevelEncoder(model, 0, 10, n_levels=11),
130         n_dimensions=3,
131         seed=42
132     ),
133 }
134
135 print(f"\nTest vector: {test_vector}")
136 print("\nScalar Encoder      | Reversible | HV Shape    | Can Decode")
137 print("-" * 65)
138
139 encoded_comp = {}
140 for name, enc in encoders.items():
141     hv = enc.encode(test_vector)
142     encoded_comp[name] = hv
143     reversible = "Yes" if enc.is_reversible else "No "
144     can_decode = "Yes" if enc.is_reversible else "No "
145     print(f"{name:18} | {reversible:>10} | {hv.shape} | {can_decode}")
146
147 # Compare similarities
148 print("\nCross-encoder similarities:")
149 names = list(encoders.keys())
150 print("                   ", "  ".join(f"{n:>15}" for n in names))
151 for i, name1 in enumerate(names):
152     similarities = [
153         float(model.similarity(encoded_comp[name1], encoded_comp[name2]))
154         for name2 in names
155     ]
156     print(f"{name1:18}", "  ".join(f"{s:15.3f}" for s in similarities))
157
158 # Decode where possible
159 print("\nDecoding (where supported):")
160 for name, enc in encoders.items():
161     if enc.is_reversible:
162         decoded = enc.decode(encoded_comp[name])
163         error = np.abs(test_vector - decoded)
164         print(f"\n  {name}:")
165         print(f"    Original: {test_vector}")
166         print(f"    Decoded:  {decoded}")
167         print(f"    Max err:  {np.max(error):.3f}")
168     else:
169         print(f"\n  {name}: Not reversible (skipped)")
170
171 print("\nKey observations:")
172 print("  - VectorEncoder works with any scalar encoder")
173 print("  - Different scalar encoders give different properties")
174 print("  - Composition enables flexible encoding strategies")
175
176 # ============================================================================
177 # Demo 3: High-Dimensional Data (MNIST-style Images)
178 # ============================================================================
179 print("\n" + "=" * 70)
180 print("Demo 3: High-Dimensional Data (MNIST-style)")
181 print("=" * 70)
182
183 # Simulate small grayscale images (8x8 like mini-MNIST)
184 model_hd = VSA.create('FHRR', dim=10000, seed=42)
185
186 # Create encoder for 64-dimensional vectors (8x8 images)
187 scalar_enc_hd = FractionalPowerEncoder(model_hd, min_val=0, max_val=255, seed=42)
188 encoder_hd = VectorEncoder(
189     model_hd,
190     scalar_encoder=scalar_enc_hd,
191     n_dimensions=64,  # 8x8 flattened
192     normalize_input=False,
193     seed=42
194 )
195
196 print(f"\nEncoder: {encoder_hd}")
197 print(f"Image size: 8x8 = 64 pixels")
198 print(f"Pixel range: 0-255 (grayscale)")
199
200 # Create synthetic "images"
201 np.random.seed(42)
202 images = {
203     "digit_1": np.random.randint(0, 50, 64),    # Dark image
204     "digit_1b": np.random.randint(0, 50, 64) + np.random.randint(-5, 5, 64),  # Similar
205     "digit_7": np.random.randint(100, 255, 64),  # Bright image
206     "noise": np.random.randint(0, 255, 64),      # Random
207 }
208
209 print("\nEncoding images:")
210 encoded_imgs = {}
211 for name, img in images.items():
212     hv = encoder_hd.encode(img.astype(float))
213     encoded_imgs[name] = hv
214     print(f"  {name:10s}: mean={np.mean(img):6.1f}, std={np.std(img):5.1f} "
215           f"→ HV shape: {hv.shape}")
216
217 # Similarity matrix
218 print("\nSimilarity Matrix:")
219 names = list(images.keys())
220 print("           ", "  ".join(f"{n:>10}" for n in names))
221 for i, name1 in enumerate(names):
222     similarities = [
223         float(model_hd.similarity(encoded_imgs[name1], encoded_imgs[name2]))
224         for name2 in names
225     ]
226     print(f"{name1:10}", "  ".join(f"{s:10.3f}" for s in similarities))
227
228 # Test reconstruction
229 print("\nReconstruction test (first image):")
230 original = images["digit_1"].astype(float)
231 decoded = encoder_hd.decode(encoded_imgs["digit_1"])
232
233 # Reshape for display (8x8)
234 orig_grid = original.reshape(8, 8)
235 dec_grid = decoded.reshape(8, 8)
236
237 print("\nOriginal image (8x8):")
238 print(orig_grid.astype(int))
239
240 print("\nDecoded image (8x8):")
241 print(dec_grid.astype(int))
242
243 rmse = np.sqrt(np.mean((original - decoded) ** 2))
244 print(f"\nReconstruction RMSE: {rmse:.2f}")
245 print(f"Pixel correlation: {np.corrcoef(original, decoded)[0, 1]:.3f}")
246
247 print("\nKey observations:")
248 print("  - VectorEncoder scales to high-dimensional data (64D, 784D, etc.)")
249 print("  - Similar images have higher similarity")
250 print("  - Approximate reconstruction preserves main features")
251 print("  - Ready for classification via similarity search")
252
253 # ============================================================================
254 # Summary
255 # ============================================================================
256 print("\n" + "=" * 70)
257 print("Summary: VectorEncoder Key Takeaways")
258 print("=" * 70)
259 print()
260 print("✓ Multivariate: Encodes vectors by binding dimensions with values")
261 print("✓ Compositional: Works with any scalar encoder (FPE, Thermometer, Level)")
262 print("✓ Scalable: Handles 5D, 64D, 784D, or higher dimensions")
263 print("✓ Reversible: Decodes to approximate original values (with FPE)")
264 print("✓ Similarity-preserving: Similar vectors → similar hypervectors")
265 print()
266 print("Use cases:")
267 print("  - Feature vectors: ML model outputs, embeddings")
268 print("  - Sensor data: Multiple sensors, time steps")
269 print("  - Flattened images: MNIST (784D), CIFAR (3072D)")
270 print("  - Scientific data: Multivariate measurements")
271 print()
272 print("Next steps:")
273 print("  → 17_encoders_image.py - 2D spatial structure encoding")
274 print("  → 21_app_image_recognition.py - Apply to real image data")
275 print("  → 25_app_integration_patterns.py - Combine with other encoders")
276 print()
277 print("=" * 70)

Gallery generated by Sphinx-Gallery