Performance Benchmarks

Topics: Speed comparison, accuracy testing, backend selection, model efficiency Time: 15 minutes Prerequisites: 02_models_comparison.py, 01_basic_operations.py Related: 32_distributed_representations.py, 02_models_comparison.py

This example benchmarks different VSA models and backends to help you choose the right configuration for your application’s performance requirements.

Key concepts: - Operation speed: bind, bundle, permute, similarity - Backend comparison: NumPy (CPU) vs PyTorch (GPU) vs JAX (JIT) - Model efficiency: Memory and computation trade-offs - Dimension scaling: How performance changes with dimension - Practical recommendations: Choose based on your constraints

Use this to make informed decisions about model and backend selection.

 23 import time
 24 import numpy as np
 25 from holovec import VSA
 26
 27 print("=" * 70)
 28 print("Performance Benchmarks")
 29 print("=" * 70)
 30 print()
 31
 32 # ============================================================================
 33 # Demo 1: Operation Speed by Model
 34 # ============================================================================
 35 print("=" * 70)
 36 print("Demo 1: Basic Operation Speed (NumPy backend)")
 37 print("=" * 70)
 38
 39 dimension = 10000
 40 n_iterations = 1000
 41
 42 models_to_test = ['MAP', 'FHRR', 'HRR', 'BSC']
 43
 44 print(f"\nDimension: {dimension}")
 45 print(f"Iterations: {n_iterations}")
 46 print(f"Backend: NumPy (CPU)")
 47 print()
 48
 49 results = {}
 50
 51 for model_name in models_to_test:
 52     model = VSA.create(model_name, dim=dimension, seed=42)
 53
 54     # Create test vectors
 55     A = model.random(seed=1)
 56     B = model.random(seed=2)
 57     vectors = [model.random(seed=i) for i in range(10)]
 58
 59     # Benchmark bind
 60     start = time.time()
 61     for _ in range(n_iterations):
 62         _ = model.bind(A, B)
 63     bind_time = (time.time() - start) / n_iterations * 1000  # ms
 64
 65     # Benchmark bundle
 66     start = time.time()
 67     for _ in range(n_iterations):
 68         _ = model.bundle(vectors)
 69     bundle_time = (time.time() - start) / n_iterations * 1000
 70
 71     # Benchmark similarity
 72     start = time.time()
 73     for _ in range(n_iterations):
 74         _ = model.similarity(A, B)
 75     sim_time = (time.time() - start) / n_iterations * 1000
 76
 77     # Benchmark permute (if available)
 78     try:
 79         start = time.time()
 80         for _ in range(n_iterations):
 81             _ = model.permute(A)
 82         perm_time = (time.time() - start) / n_iterations * 1000
 83     except:
 84         perm_time = None
 85
 86     results[model_name] = {
 87         'bind': bind_time,
 88         'bundle': bundle_time,
 89         'similarity': sim_time,
 90         'permute': perm_time
 91     }
 92
 93 # Print results
 94 print(f"{'Model':<10s} {'Bind (ms)':<12s} {'Bundle (ms)':<12s} {'Sim (ms)':<12s} {'Permute (ms)':<12s}")
 95 print("-" * 70)
 96
 97 for model_name, times in results.items():
 98     perm_str = f"{times['permute']:.4f}" if times['permute'] else "N/A"
 99     print(f"{model_name:<10s} {times['bind']:10.4f}   {times['bundle']:10.4f}   "
100           f"{times['similarity']:10.4f}   {perm_str:>10s}")
101
102 print("\nObservations:")
103 print("  - MAP typically fastest (simple multiplication)")
104 print("  - FHRR/HRR slower (FFT operations)")
105 print("  - BSC depends on sparsity (fewer operations on sparse vectors)")
106
107 # ============================================================================
108 # Demo 2: Dimension Scaling
109 # ============================================================================
110 print("\n" + "=" * 70)
111 print("Demo 2: Performance vs Dimension")
112 print("=" * 70)
113
114 dimensions = [1000, 5000, 10000, 20000]
115 model_name = 'MAP'  # Test with MAP (fastest)
116
117 print(f"\nModel: {model_name}")
118 print(f"\n{'Dimension':<12s} {'Bind (ms)':<12s} {'Bundle (ms)':<12s} {'Similarity (ms)':<15s}")
119 print("-" * 60)
120
121 for dim in dimensions:
122     model = VSA.create(model_name, dim=dim, seed=42)
123     A = model.random(seed=1)
124     B = model.random(seed=2)
125     vectors = [model.random(seed=i) for i in range(10)]
126
127     # Quick benchmark (fewer iterations for larger dims)
128     n_iter = max(100, 10000 // (dim // 1000))
129
130     # Bind
131     start = time.time()
132     for _ in range(n_iter):
133         _ = model.bind(A, B)
134     bind_time = (time.time() - start) / n_iter * 1000
135
136     # Bundle
137     start = time.time()
138     for _ in range(n_iter):
139         _ = model.bundle(vectors)
140     bundle_time = (time.time() - start) / n_iter * 1000
141
142     # Similarity
143     start = time.time()
144     for _ in range(n_iter):
145         _ = model.similarity(A, B)
146     sim_time = (time.time() - start) / n_iter * 1000
147
148     print(f"{dim:<12d} {bind_time:10.4f}   {bundle_time:10.4f}   {sim_time:12.4f}")
149
150 print("\nScaling pattern:")
151 print("  - Generally linear with dimension")
152 print("  - Bundle scales with number of vectors to combine")
153 print("  - Similarity involves dot product (linear complexity)")
154
155 # ============================================================================
156 # Demo 3: Memory Usage
157 # ============================================================================
158 print("\n" + "=" * 70)
159 print("Demo 3: Memory Footprint")
160 print("=" * 70)
161
162 dimension = 10000
163
164 print(f"\nDimension: {dimension}")
165 print(f"\n{'Model':<10s} {'Dtype':<15s} {'Bytes/Vector':<15s} {'MB/1000 vectors':<15s}")
166 print("-" * 65)
167
168 for model_name in ['MAP', 'FHRR', 'HRR', 'BSC']:
169     model = VSA.create(model_name, dim=dimension, seed=42)
170     A = model.random(seed=1)
171
172     # Get dtype info
173     if hasattr(A, 'dtype'):
174         dtype = str(A.dtype)
175         itemsize = A.itemsize if hasattr(A, 'itemsize') else 8
176     else:
177         dtype = "backend-specific"
178         itemsize = 8  # estimate
179
180     bytes_per_vector = dimension * itemsize
181     mb_per_1000 = bytes_per_vector * 1000 / (1024 * 1024)
182
183     print(f"{model_name:<10s} {dtype:<15s} {bytes_per_vector:<15,d} {mb_per_1000:14.2f}")
184
185 print("\nMemory considerations:")
186 print("  - MAP: int8 or float32 (smallest)")
187 print("  - FHRR/HRR: complex64/128 (larger, 2x float)")
188 print("  - BSC: Binary sparse (very small if sparse)")
189 print("  - Choose based on storage constraints")
190
191 # ============================================================================
192 # Demo 4: Accuracy Under Noise
193 # ============================================================================
194 print("\n" + "=" * 70)
195 print("Demo 4: Noise Tolerance Comparison")
196 print("=" * 70)
197
198 dimension = 10000
199 noise_levels = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5]
200
201 print(f"\nDimension: {dimension}")
202 print(f"Test: similarity(A, A + noise)")
203 print()
204
205 print(f"{'Noise':<10s} ", end="")
206 for model_name in models_to_test:
207     print(f"{model_name:<10s} ", end="")
208 print()
209 print("-" * 55)
210
211 for noise_level in noise_levels:
212     print(f"{noise_level:<10.1f} ", end="")
213
214     for model_name in models_to_test:
215         model = VSA.create(model_name, dim=dimension, seed=42)
216         A = model.random(seed=1)
217
218         # Add noise
219         noise = model.random(seed=999)
220         noisy_A = model.bundle([A, noise])  # Simple noise addition via bundling
221
222         # Measure similarity to original
223         sim = float(model.similarity(A, noisy_A))
224         print(f"{sim:10.3f} ", end="")
225
226     print()
227
228 print("\nNoise tolerance:")
229 print("  - All models degrade gracefully with noise")
230 print("  - Higher dimension = better noise tolerance")
231 print("  - Use cleanup strategies for noise-heavy applications")
232
233 # ============================================================================
234 # Demo 5: Bundling Capacity
235 # ============================================================================
236 print("\n" + "=" * 70)
237 print("Demo 5: Bundling Capacity (Information Loss)")
238 print("=" * 70)
239
240 dimension = 10000
241 bundle_sizes = [1, 5, 10, 20, 50, 100]
242
243 print(f"\nDimension: {dimension}")
244 print(f"Test: similarity after bundling N vectors")
245 print()
246
247 print(f"{'N vectors':<12s} ", end="")
248 for model_name in models_to_test:
249     print(f"{model_name:<10s} ", end="")
250 print()
251 print("-" * 60)
252
253 for n in bundle_sizes:
254     print(f"{n:<12d} ", end="")
255
256     for model_name in models_to_test:
257         model = VSA.create(model_name, dim=dimension, seed=42)
258
259         # Create target and other vectors
260         target = model.random(seed=1)
261         others = [model.random(seed=100+i) for i in range(1, n)]
262
263         # Bundle
264         if n == 1:
265             bundled = target
266         else:
267             bundled = model.bundle([target] + others)
268
269         # Similarity to original
270         sim = float(model.similarity(bundled, target))
271         print(f"{sim:10.3f} ", end="")
272
273     print()
274
275 print("\nCapacity insights:")
276 print("  - Similarity degrades as bundle size increases")
277 print("  - MAP maintains higher similarity (sum-based)")
278 print("  - Higher dimension supports more vectors in bundle")
279
280 # ============================================================================
281 # Demo 6: Backend Comparison (if available)
282 # ============================================================================
283 print("\n" + "=" * 70)
284 print("Demo 6: Backend Comparison")
285 print("=" * 70)
286
287 available_backends = []
288
289 # Test numpy
290 try:
291     model = VSA.create('MAP', dim=10000, backend='numpy', seed=42)
292     available_backends.append('numpy')
293 except:
294     pass
295
296 # Test torch (if available)
297 try:
298     model = VSA.create('MAP', dim=10000, backend='torch', seed=42)
299     available_backends.append('torch')
300 except:
301     pass
302
303 # Test jax (if available)
304 try:
305     model = VSA.create('MAP', dim=10000, backend='jax', seed=42)
306     available_backends.append('jax')
307 except:
308     pass
309
310 print(f"\nAvailable backends: {', '.join(available_backends)}")
311
312 if len(available_backends) > 1:
313     print("\nBenchmarking available backends...")
314     dimension = 10000
315     n_iter = 100
316
317     print(f"\n{'Backend':<10s} {'Bind (ms)':<12s} {'Bundle (ms)':<12s} {'Similarity (ms)':<15s}")
318     print("-" * 60)
319
320     for backend in available_backends:
321         model = VSA.create('MAP', dim=dimension, backend=backend, seed=42)
322         A = model.random(seed=1)
323         B = model.random(seed=2)
324         vectors = [model.random(seed=i) for i in range(10)]
325
326         # Bind
327         start = time.time()
328         for _ in range(n_iter):
329             _ = model.bind(A, B)
330         bind_time = (time.time() - start) / n_iter * 1000
331
332         # Bundle
333         start = time.time()
334         for _ in range(n_iter):
335             _ = model.bundle(vectors)
336         bundle_time = (time.time() - start) / n_iter * 1000
337
338         # Similarity
339         start = time.time()
340         for _ in range(n_iter):
341             _ = model.similarity(A, B)
342         sim_time = (time.time() - start) / n_iter * 1000
343
344         print(f"{backend:<10s} {bind_time:10.4f}   {bundle_time:10.4f}   {sim_time:12.4f}")
345 else:
346     print(f"\nOnly {available_backends[0]} backend available.")
347     print("\nTo test other backends:")
348     print("  pip install torch  # For GPU acceleration")
349     print("  pip install jax jaxlib  # For JIT compilation")
350
351 print("\nBackend recommendations:")
352 print("  - NumPy: Default, good for CPU, no extra dependencies")
353 print("  - PyTorch: Best for GPU, large batches, deep learning integration")
354 print("  - JAX: Best for JIT compilation, TPU, functional programming")
355
356 # ============================================================================
357 # Summary
358 # ============================================================================
359 print("\n" + "=" * 70)
360 print("Summary: Performance Recommendations")
361 print("=" * 70)
362 print()
363
364 print("✓ Model Selection by Speed:")
365 print("  1. MAP - Fastest (element-wise multiplication)")
366 print("  2. BSC - Fast for sparse operations")
367 print("  3. HRR/FHRR - Slower (FFT overhead)")
368 print()
369
370 print("✓ Dimension Recommendations:")
371 print("  - Small problems (<1000 items): 1000-5000 dim")
372 print("  - Medium problems (1000-10000 items): 5000-10000 dim")
373 print("  - Large problems (>10000 items): 10000-20000 dim")
374 print()
375
376 print("✓ Backend Selection:")
377 print("  - CPU only: NumPy (default)")
378 print("  - GPU available: PyTorch (faster for large batches)")
379 print("  - Need JIT/TPU: JAX (compile once, run fast)")
380 print()
381
382 print("✓ Memory Constraints:")
383 print("  - Limited memory: MAP with lower dimension")
384 print("  - Plenty of memory: Any model, higher dimension")
385 print("  - Sparse data: BSC (efficient sparse storage)")
386 print()
387
388 print("✓ Noise Tolerance:")
389 print("  - High noise: Higher dimension, use cleanup strategies")
390 print("  - Low noise: Standard dimension (10000) sufficient")
391 print()
392
393 print("Next steps:")
394 print("  → 32_distributed_representations.py - Capacity deep dive")
395 print("  → 33_error_handling_robustness.py - Noise handling strategies")
396 print("  → 02_models_comparison.py - Model characteristics")
397 print()
398 print("=" * 70)

Gallery generated by Sphinx-Gallery