Note
Go to the end to download the full example code.
Performance Benchmarks¶
Topics: Speed comparison, accuracy testing, backend selection, model efficiency Time: 15 minutes Prerequisites: 02_models_comparison.py, 01_basic_operations.py Related: 32_distributed_representations.py, 02_models_comparison.py
This example benchmarks different VSA models and backends to help you choose the right configuration for your application’s performance requirements.
Key concepts: - Operation speed: bind, bundle, permute, similarity - Backend comparison: NumPy (CPU) vs PyTorch (GPU) vs JAX (JIT) - Model efficiency: Memory and computation trade-offs - Dimension scaling: How performance changes with dimension - Practical recommendations: Choose based on your constraints
Use this to make informed decisions about model and backend selection.
23 import time
24 import numpy as np
25 from holovec import VSA
26
27 print("=" * 70)
28 print("Performance Benchmarks")
29 print("=" * 70)
30 print()
31
32 # ============================================================================
33 # Demo 1: Operation Speed by Model
34 # ============================================================================
35 print("=" * 70)
36 print("Demo 1: Basic Operation Speed (NumPy backend)")
37 print("=" * 70)
38
39 dimension = 10000
40 n_iterations = 1000
41
42 models_to_test = ['MAP', 'FHRR', 'HRR', 'BSC']
43
44 print(f"\nDimension: {dimension}")
45 print(f"Iterations: {n_iterations}")
46 print(f"Backend: NumPy (CPU)")
47 print()
48
49 results = {}
50
51 for model_name in models_to_test:
52 model = VSA.create(model_name, dim=dimension, seed=42)
53
54 # Create test vectors
55 A = model.random(seed=1)
56 B = model.random(seed=2)
57 vectors = [model.random(seed=i) for i in range(10)]
58
59 # Benchmark bind
60 start = time.time()
61 for _ in range(n_iterations):
62 _ = model.bind(A, B)
63 bind_time = (time.time() - start) / n_iterations * 1000 # ms
64
65 # Benchmark bundle
66 start = time.time()
67 for _ in range(n_iterations):
68 _ = model.bundle(vectors)
69 bundle_time = (time.time() - start) / n_iterations * 1000
70
71 # Benchmark similarity
72 start = time.time()
73 for _ in range(n_iterations):
74 _ = model.similarity(A, B)
75 sim_time = (time.time() - start) / n_iterations * 1000
76
77 # Benchmark permute (if available)
78 try:
79 start = time.time()
80 for _ in range(n_iterations):
81 _ = model.permute(A)
82 perm_time = (time.time() - start) / n_iterations * 1000
83 except:
84 perm_time = None
85
86 results[model_name] = {
87 'bind': bind_time,
88 'bundle': bundle_time,
89 'similarity': sim_time,
90 'permute': perm_time
91 }
92
93 # Print results
94 print(f"{'Model':<10s} {'Bind (ms)':<12s} {'Bundle (ms)':<12s} {'Sim (ms)':<12s} {'Permute (ms)':<12s}")
95 print("-" * 70)
96
97 for model_name, times in results.items():
98 perm_str = f"{times['permute']:.4f}" if times['permute'] else "N/A"
99 print(f"{model_name:<10s} {times['bind']:10.4f} {times['bundle']:10.4f} "
100 f"{times['similarity']:10.4f} {perm_str:>10s}")
101
102 print("\nObservations:")
103 print(" - MAP typically fastest (simple multiplication)")
104 print(" - FHRR/HRR slower (FFT operations)")
105 print(" - BSC depends on sparsity (fewer operations on sparse vectors)")
106
107 # ============================================================================
108 # Demo 2: Dimension Scaling
109 # ============================================================================
110 print("\n" + "=" * 70)
111 print("Demo 2: Performance vs Dimension")
112 print("=" * 70)
113
114 dimensions = [1000, 5000, 10000, 20000]
115 model_name = 'MAP' # Test with MAP (fastest)
116
117 print(f"\nModel: {model_name}")
118 print(f"\n{'Dimension':<12s} {'Bind (ms)':<12s} {'Bundle (ms)':<12s} {'Similarity (ms)':<15s}")
119 print("-" * 60)
120
121 for dim in dimensions:
122 model = VSA.create(model_name, dim=dim, seed=42)
123 A = model.random(seed=1)
124 B = model.random(seed=2)
125 vectors = [model.random(seed=i) for i in range(10)]
126
127 # Quick benchmark (fewer iterations for larger dims)
128 n_iter = max(100, 10000 // (dim // 1000))
129
130 # Bind
131 start = time.time()
132 for _ in range(n_iter):
133 _ = model.bind(A, B)
134 bind_time = (time.time() - start) / n_iter * 1000
135
136 # Bundle
137 start = time.time()
138 for _ in range(n_iter):
139 _ = model.bundle(vectors)
140 bundle_time = (time.time() - start) / n_iter * 1000
141
142 # Similarity
143 start = time.time()
144 for _ in range(n_iter):
145 _ = model.similarity(A, B)
146 sim_time = (time.time() - start) / n_iter * 1000
147
148 print(f"{dim:<12d} {bind_time:10.4f} {bundle_time:10.4f} {sim_time:12.4f}")
149
150 print("\nScaling pattern:")
151 print(" - Generally linear with dimension")
152 print(" - Bundle scales with number of vectors to combine")
153 print(" - Similarity involves dot product (linear complexity)")
154
155 # ============================================================================
156 # Demo 3: Memory Usage
157 # ============================================================================
158 print("\n" + "=" * 70)
159 print("Demo 3: Memory Footprint")
160 print("=" * 70)
161
162 dimension = 10000
163
164 print(f"\nDimension: {dimension}")
165 print(f"\n{'Model':<10s} {'Dtype':<15s} {'Bytes/Vector':<15s} {'MB/1000 vectors':<15s}")
166 print("-" * 65)
167
168 for model_name in ['MAP', 'FHRR', 'HRR', 'BSC']:
169 model = VSA.create(model_name, dim=dimension, seed=42)
170 A = model.random(seed=1)
171
172 # Get dtype info
173 if hasattr(A, 'dtype'):
174 dtype = str(A.dtype)
175 itemsize = A.itemsize if hasattr(A, 'itemsize') else 8
176 else:
177 dtype = "backend-specific"
178 itemsize = 8 # estimate
179
180 bytes_per_vector = dimension * itemsize
181 mb_per_1000 = bytes_per_vector * 1000 / (1024 * 1024)
182
183 print(f"{model_name:<10s} {dtype:<15s} {bytes_per_vector:<15,d} {mb_per_1000:14.2f}")
184
185 print("\nMemory considerations:")
186 print(" - MAP: int8 or float32 (smallest)")
187 print(" - FHRR/HRR: complex64/128 (larger, 2x float)")
188 print(" - BSC: Binary sparse (very small if sparse)")
189 print(" - Choose based on storage constraints")
190
191 # ============================================================================
192 # Demo 4: Accuracy Under Noise
193 # ============================================================================
194 print("\n" + "=" * 70)
195 print("Demo 4: Noise Tolerance Comparison")
196 print("=" * 70)
197
198 dimension = 10000
199 noise_levels = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5]
200
201 print(f"\nDimension: {dimension}")
202 print(f"Test: similarity(A, A + noise)")
203 print()
204
205 print(f"{'Noise':<10s} ", end="")
206 for model_name in models_to_test:
207 print(f"{model_name:<10s} ", end="")
208 print()
209 print("-" * 55)
210
211 for noise_level in noise_levels:
212 print(f"{noise_level:<10.1f} ", end="")
213
214 for model_name in models_to_test:
215 model = VSA.create(model_name, dim=dimension, seed=42)
216 A = model.random(seed=1)
217
218 # Add noise
219 noise = model.random(seed=999)
220 noisy_A = model.bundle([A, noise]) # Simple noise addition via bundling
221
222 # Measure similarity to original
223 sim = float(model.similarity(A, noisy_A))
224 print(f"{sim:10.3f} ", end="")
225
226 print()
227
228 print("\nNoise tolerance:")
229 print(" - All models degrade gracefully with noise")
230 print(" - Higher dimension = better noise tolerance")
231 print(" - Use cleanup strategies for noise-heavy applications")
232
233 # ============================================================================
234 # Demo 5: Bundling Capacity
235 # ============================================================================
236 print("\n" + "=" * 70)
237 print("Demo 5: Bundling Capacity (Information Loss)")
238 print("=" * 70)
239
240 dimension = 10000
241 bundle_sizes = [1, 5, 10, 20, 50, 100]
242
243 print(f"\nDimension: {dimension}")
244 print(f"Test: similarity after bundling N vectors")
245 print()
246
247 print(f"{'N vectors':<12s} ", end="")
248 for model_name in models_to_test:
249 print(f"{model_name:<10s} ", end="")
250 print()
251 print("-" * 60)
252
253 for n in bundle_sizes:
254 print(f"{n:<12d} ", end="")
255
256 for model_name in models_to_test:
257 model = VSA.create(model_name, dim=dimension, seed=42)
258
259 # Create target and other vectors
260 target = model.random(seed=1)
261 others = [model.random(seed=100+i) for i in range(1, n)]
262
263 # Bundle
264 if n == 1:
265 bundled = target
266 else:
267 bundled = model.bundle([target] + others)
268
269 # Similarity to original
270 sim = float(model.similarity(bundled, target))
271 print(f"{sim:10.3f} ", end="")
272
273 print()
274
275 print("\nCapacity insights:")
276 print(" - Similarity degrades as bundle size increases")
277 print(" - MAP maintains higher similarity (sum-based)")
278 print(" - Higher dimension supports more vectors in bundle")
279
280 # ============================================================================
281 # Demo 6: Backend Comparison (if available)
282 # ============================================================================
283 print("\n" + "=" * 70)
284 print("Demo 6: Backend Comparison")
285 print("=" * 70)
286
287 available_backends = []
288
289 # Test numpy
290 try:
291 model = VSA.create('MAP', dim=10000, backend='numpy', seed=42)
292 available_backends.append('numpy')
293 except:
294 pass
295
296 # Test torch (if available)
297 try:
298 model = VSA.create('MAP', dim=10000, backend='torch', seed=42)
299 available_backends.append('torch')
300 except:
301 pass
302
303 # Test jax (if available)
304 try:
305 model = VSA.create('MAP', dim=10000, backend='jax', seed=42)
306 available_backends.append('jax')
307 except:
308 pass
309
310 print(f"\nAvailable backends: {', '.join(available_backends)}")
311
312 if len(available_backends) > 1:
313 print("\nBenchmarking available backends...")
314 dimension = 10000
315 n_iter = 100
316
317 print(f"\n{'Backend':<10s} {'Bind (ms)':<12s} {'Bundle (ms)':<12s} {'Similarity (ms)':<15s}")
318 print("-" * 60)
319
320 for backend in available_backends:
321 model = VSA.create('MAP', dim=dimension, backend=backend, seed=42)
322 A = model.random(seed=1)
323 B = model.random(seed=2)
324 vectors = [model.random(seed=i) for i in range(10)]
325
326 # Bind
327 start = time.time()
328 for _ in range(n_iter):
329 _ = model.bind(A, B)
330 bind_time = (time.time() - start) / n_iter * 1000
331
332 # Bundle
333 start = time.time()
334 for _ in range(n_iter):
335 _ = model.bundle(vectors)
336 bundle_time = (time.time() - start) / n_iter * 1000
337
338 # Similarity
339 start = time.time()
340 for _ in range(n_iter):
341 _ = model.similarity(A, B)
342 sim_time = (time.time() - start) / n_iter * 1000
343
344 print(f"{backend:<10s} {bind_time:10.4f} {bundle_time:10.4f} {sim_time:12.4f}")
345 else:
346 print(f"\nOnly {available_backends[0]} backend available.")
347 print("\nTo test other backends:")
348 print(" pip install torch # For GPU acceleration")
349 print(" pip install jax jaxlib # For JIT compilation")
350
351 print("\nBackend recommendations:")
352 print(" - NumPy: Default, good for CPU, no extra dependencies")
353 print(" - PyTorch: Best for GPU, large batches, deep learning integration")
354 print(" - JAX: Best for JIT compilation, TPU, functional programming")
355
356 # ============================================================================
357 # Summary
358 # ============================================================================
359 print("\n" + "=" * 70)
360 print("Summary: Performance Recommendations")
361 print("=" * 70)
362 print()
363
364 print("✓ Model Selection by Speed:")
365 print(" 1. MAP - Fastest (element-wise multiplication)")
366 print(" 2. BSC - Fast for sparse operations")
367 print(" 3. HRR/FHRR - Slower (FFT overhead)")
368 print()
369
370 print("✓ Dimension Recommendations:")
371 print(" - Small problems (<1000 items): 1000-5000 dim")
372 print(" - Medium problems (1000-10000 items): 5000-10000 dim")
373 print(" - Large problems (>10000 items): 10000-20000 dim")
374 print()
375
376 print("✓ Backend Selection:")
377 print(" - CPU only: NumPy (default)")
378 print(" - GPU available: PyTorch (faster for large batches)")
379 print(" - Need JIT/TPU: JAX (compile once, run fast)")
380 print()
381
382 print("✓ Memory Constraints:")
383 print(" - Limited memory: MAP with lower dimension")
384 print(" - Plenty of memory: Any model, higher dimension")
385 print(" - Sparse data: BSC (efficient sparse storage)")
386 print()
387
388 print("✓ Noise Tolerance:")
389 print(" - High noise: Higher dimension, use cleanup strategies")
390 print(" - Low noise: Standard dimension (10000) sufficient")
391 print()
392
393 print("Next steps:")
394 print(" → 32_distributed_representations.py - Capacity deep dive")
395 print(" → 33_error_handling_robustness.py - Noise handling strategies")
396 print(" → 02_models_comparison.py - Model characteristics")
397 print()
398 print("=" * 70)