Error Handling and Robustness¶

Topics: Noise tolerance, error propagation, fault recovery, graceful degradation Time: 15 minutes Prerequisites: 01_basic_operations.py, 27_cleanup_strategies.py Related: 32_distributed_representations.py, 31_performance_benchmarks.py
This example demonstrates how hyperdimensional computing gracefully handles errors, noise, and partial corruption - one of HDC’s key strengths.
Key concepts: - Noise tolerance: Similar vectors remain similar under corruption - Error propagation: How errors spread through operations - Graceful degradation: Performance decreases smoothly - Recovery strategies: Cleanup, redundancy, dimension tuning - Practical robustness: Real-world sensor noise, transmission errors
HDC is inherently robust, making it ideal for edge devices, noisy sensors, and fault-tolerant systems.
 import numpy as np
 from holovec import VSA
 from holovec.utils.cleanup import BruteForceCleanup, ResonatorCleanup

 print("=" * 70)
 print("Error Handling and Robustness")
 print("=" * 70)
 print()

 # ============================================================================
 # Demo 1: Noise Tolerance Basics
 # ============================================================================
 print("=" * 70)
 print("Demo 1: Noise Tolerance - Bit Flip Robustness")
 print("=" * 70)

 model = VSA.create('MAP', dim=10000, seed=42)

 print(f"\nModel: {model.model_name}")
 print(f"Dimension: {model.dimension}")
 print()

 # Create a vector and add noise by flipping bits
 original = model.random(seed=1)

 flip_percentages = [0, 1, 5, 10, 20, 30, 40, 50]

 print(f"{'Noise %':<12s} {'Similarity':<15s} {'Still Recognizable?':<20s}")
 print("-" * 55)

 for flip_pct in flip_percentages:
     # Create noisy version by flipping random bits
     noisy = original.copy() if hasattr(original, 'copy') else original

     if flip_pct > 0:
         # Simulate bit flips by bundling with random noise
         noise_strength = flip_pct / 100.0
         noise = model.random(seed=999)

         # Weighted bundle: more original, less noise
         # For MAP: approximate via bundling
         noisy = model.bundle([original] * int(100 - flip_pct) + [noise] * int(flip_pct))

     sim = float(model.similarity(original, noisy))
     recognizable = "Yes" if sim > 0.7 else "Marginal" if sim > 0.4 else "No"

     print(f"{flip_pct:10d}%   {sim:13.3f}   {recognizable:<20s}")

 print("\nKey insight:")
 print("  - Tolerates up to ~20% corruption while maintaining similarity")
 print("  - Degrades gracefully (no sudden failure)")
 print("  - Higher dimension increases noise tolerance")

 # ============================================================================
 # Demo 2: Error Propagation Through Operations
 # ============================================================================
 print("\n" + "=" * 70)
 print("Demo 2: Error Propagation Analysis")
 print("=" * 70)

 model = VSA.create('MAP', dim=10000, seed=42)

 # Clean vectors
 A_clean = model.random(seed=1)
 B_clean = model.random(seed=2)

 # Add 10% noise to each
 np.random.seed(42)
 noise_A = model.random(seed=100)
 noise_B = model.random(seed=101)

 A_noisy = model.bundle([A_clean] * 9 + [noise_A])
 B_noisy = model.bundle([B_clean] * 9 + [noise_B])

 print(f"\nInput noise: ~10% corruption on each vector")
 print()

 # Test different operations
 print(f"{'Operation':<20s} {'Clean Result':<15s} {'Noisy Result':<15s} {'Degradation':<12s}")
 print("-" * 70)

 # Binding
 AB_clean = model.bind(A_clean, B_clean)
 AB_noisy = model.bind(A_noisy, B_noisy)
 sim_bind = float(model.similarity(AB_clean, AB_noisy))
 print(f"{'Bind (A * B)':<20s} {1.0:13.3f}   {sim_bind:13.3f}   {1.0 - sim_bind:10.3f}")

 # Bundling
 bundle_clean = model.bundle([A_clean, B_clean])
 bundle_noisy = model.bundle([A_noisy, B_noisy])
 sim_bundle = float(model.similarity(bundle_clean, bundle_noisy))
 print(f"{'Bundle (A + B)':<20s} {1.0:13.3f}   {sim_bundle:13.3f}   {1.0 - sim_bundle:10.3f}")

 # Permutation (if available)
 try:
     perm_clean = model.permute(A_clean)
     perm_noisy = model.permute(A_noisy)
     sim_perm = float(model.similarity(perm_clean, perm_noisy))
     print(f"{'Permute (ρ(A))':<20s} {1.0:13.3f}   {sim_perm:13.3f}   {1.0 - sim_perm:10.3f}")
 except:
     pass

 # Unbinding
 recovered_clean = model.unbind(AB_clean, A_clean)
 recovered_noisy = model.unbind(AB_noisy, A_noisy)
 sim_unbind_clean = float(model.similarity(recovered_clean, B_clean))
 sim_unbind_noisy = float(model.similarity(recovered_noisy, B_clean))
 print(f"{'Unbind (AB / A)':<20s} {sim_unbind_clean:13.3f}   {sim_unbind_noisy:13.3f}   {sim_unbind_clean - sim_unbind_noisy:10.3f}")

 print("\nObservations:")
 print("  - Error propagates but doesn't amplify catastrophically")
 print("  - Bundling is most robust (averaging effect)")
 print("  - Binding maintains reasonable similarity")
 print("  - Operations exhibit graceful degradation")

 # ============================================================================
 # Demo 3: Dimension vs Noise Tolerance
 # ============================================================================
 print("\n" + "=" * 70)
 print("Demo 3: Dimension Effect on Noise Tolerance")
 print("=" * 70)

 noise_level = 0.2  # 20% noise

 print(f"\nNoise level: {noise_level * 100:.0f}%")
 print()

 dimensions = [1000, 5000, 10000, 20000]

 print(f"{'Dimension':<12s} {'Similarity':<15s} {'Recognizable?':<15s}")
 print("-" * 50)

 for dim in dimensions:
     model = VSA.create('MAP', dim=dim, seed=42)

     original = model.random(seed=1)
     noise = model.random(seed=999)

     # Add noise
     noisy = model.bundle([original] * int(100 * (1 - noise_level)) +
                           [noise] * int(100 * noise_level))

     sim = float(model.similarity(original, noisy))
     recognizable = "Yes" if sim > 0.7 else "Marginal" if sim > 0.4 else "No"

     print(f"{dim:<12d} {sim:13.3f}   {recognizable:<15s}")

 print("\nKey insight:")
 print("  - Higher dimension = better noise tolerance")
 print("  - Diminishing returns above ~10,000 dimensions")
 print("  - Choose dimension based on expected noise level")

 # ============================================================================
 # Demo 4: Sensor Noise Simulation
 # ============================================================================
 print("\n" + "=" * 70)
 print("Demo 4: Realistic Sensor Noise Scenario")
 print("=" * 70)

 from holovec.encoders import FractionalPowerEncoder

 model = VSA.create('FHRR', dim=10000, seed=42)

 # Temperature sensor with noise
 encoder = FractionalPowerEncoder(model, min_val=0, max_val=100, bandwidth=0.1, seed=42)

 true_temp = 37.5
 noise_std = 0.5  # ±0.5°C sensor noise

 print(f"\nTrue temperature: {true_temp}°C")
 print(f"Sensor noise: ±{noise_std}°C (std dev)")
 print()

 # Simulate 10 noisy readings
 np.random.seed(42)
 readings = true_temp + np.random.randn(10) * noise_std

 print("Noisy readings:")
 for i, reading in enumerate(readings, 1):
     print(f"  {i:2d}. {reading:6.2f}°C (error: {reading - true_temp:+.2f}°C)")

 # Encode each reading
 encoded_readings = [encoder.encode(r) for r in readings]

 # Average the encoded vectors (noise reduction through bundling)
 averaged = model.bundle(encoded_readings)

 # Decode
 decoded_temp = encoder.decode(averaged)

 print(f"\nDecoded from averaged HVs: {decoded_temp:.2f}°C")
 print(f"Error from true value: {abs(decoded_temp - true_temp):.2f}°C")
 print(f"Improvement: {np.mean([abs(r - true_temp) for r in readings]) / abs(decoded_temp - true_temp):.1f}x")

 print("\nBenefit:")
 print("  - Bundling multiple noisy measurements reduces error")
 print("  - HDC naturally implements sensor fusion")
 print("  - Robust to individual sensor failures")

 # ============================================================================
 # Demo 5: Transmission Error Recovery
 # ============================================================================
 print("\n" + "=" * 70)
 print("Demo 5: Communication Channel Errors")
 print("=" * 70)

 model = VSA.create('MAP', dim=10000, seed=42)

 # Create a codebook
 codebook = {
     "message_A": model.random(seed=1),
     "message_B": model.random(seed=2),
     "message_C": model.random(seed=3),
     "message_D": model.random(seed=4),
     "message_E": model.random(seed=5),
 }

 cleanup = BruteForceCleanup()

 print("\nSimulating transmission errors:")
 print()

 error_rates = [0, 0.05, 0.10, 0.15, 0.20]

 print(f"{'Error Rate':<12s} {'Correct ID Rate':<18s} {'Avg Rank':<10s}")
 print("-" * 48)

 for error_rate in error_rates:
     correct = 0
     ranks = []

     for msg_name, msg_hv in codebook.items():
         # Simulate transmission error
         if error_rate > 0:
             noise = model.random(seed=999)
             received = model.bundle([msg_hv] * int(100 * (1 - error_rate)) +
                                     [noise] * int(100 * error_rate))
         else:
             received = msg_hv

         # Try to identify using cleanup
         labels, sims = cleanup.factorize(received, codebook, model, n_factors=1)

         if labels[0] == msg_name:
             correct += 1

         # Find rank of correct message
         all_sims = [(name, float(model.similarity(received, hv)))
                     for name, hv in codebook.items()]
         all_sims.sort(key=lambda x: x[1], reverse=True)
         rank = next(i for i, (name, _) in enumerate(all_sims, 1) if name == msg_name)
         ranks.append(rank)

     accuracy = correct / len(codebook)
     avg_rank = np.mean(ranks)

     print(f"{error_rate * 100:10.0f}%   {accuracy:16.1%}   {avg_rank:8.1f}")

 print("\nInsight:")
 print("  - Tolerates up to 15% transmission error with cleanup")
 print("  - Even with errors, correct message often in top-3")
 print("  - Error correction codes can further improve robustness")

 # ============================================================================
 # Demo 6: Recovery Strategies
 # ============================================================================
 print("\n" + "=" * 70)
 print("Demo 6: Error Recovery with Cleanup")
 print("=" * 70)

 model = VSA.create('MAP', dim=10000, seed=42)

 codebook = {f"item_{i}": model.random(seed=100+i) for i in range(50)}

 # Heavily corrupted query
 target_key = "item_25"
 target = codebook[target_key]

 # Add 30% noise
 noise = model.random(seed=999)
 corrupted = model.bundle([target] * 7 + [noise] * 3)

 print(f"\nTarget: {target_key}")
 print(f"Corruption: 30% noise")
 print()

 # Without cleanup
 sims_no_cleanup = [(key, float(model.similarity(corrupted, hv)))
                     for key, hv in codebook.items()]
 sims_no_cleanup.sort(key=lambda x: x[1], reverse=True)

 print("Top-5 matches WITHOUT cleanup:")
 for i, (key, sim) in enumerate(sims_no_cleanup[:5], 1):
     marker = " ← Target!" if key == target_key else ""
     print(f"  {i}. {key:12s}: {sim:.3f}{marker}")

 # With BruteForce cleanup
 bf_cleanup = BruteForceCleanup()
 labels_bf, sims_bf = bf_cleanup.factorize(corrupted, codebook, model, n_factors=5)

 print("\nTop-5 matches WITH BruteForce cleanup:")
 for i, (label, sim) in enumerate(zip(labels_bf, sims_bf), 1):
     marker = " ← Target!" if label == target_key else ""
     print(f"  {i}. {label:12s}: {sim:.3f}{marker}")

 # With Resonator cleanup
 res_cleanup = ResonatorCleanup()
 labels_res, sims_res = res_cleanup.factorize(corrupted, codebook, model,
                                               n_factors=5, max_iterations=10)

 print("\nTop-5 matches WITH Resonator cleanup:")
 for i, (label, sim) in enumerate(zip(labels_res, sims_res), 1):
     marker = " ← Target!" if label == target_key else ""
     print(f"  {i}. {label:12s}: {sim:.3f}{marker}")

 print("\nCleanup improves robustness significantly!")

 # ============================================================================
 # Demo 7: Redundancy Strategies
 # ============================================================================
 print("\n" + "=" * 70)
 print("Demo 7: Redundancy for Fault Tolerance")
 print("=" * 70)

 model = VSA.create('MAP', dim=10000, seed=42)

 # Store data with redundancy
 data = model.random(seed=1)

 # Repeat encoding (redundancy)
 redundancy_levels = [1, 3, 5, 10]
 corruption_rate = 0.2

 print(f"\nCorruption rate: {corruption_rate * 100:.0f}%")
 print()

 print(f"{'Redundancy':<12s} {'Similarity':<15s} {'Recovery Quality':<18s}")
 print("-" * 52)

 for redundancy in redundancy_levels:
     # Create redundant copies
     copies = [data] * redundancy

     # Bundle them
     redundant_storage = model.bundle(copies)

     # Corrupt the storage
     noise = model.random(seed=999)
     corrupted = model.bundle([redundant_storage] * int(100 * (1 - corruption_rate)) +
                               [noise] * int(100 * corruption_rate))

     # Check similarity to original
     sim = float(model.similarity(corrupted, data))
     quality = "Excellent" if sim > 0.8 else "Good" if sim > 0.6 else "Poor"

     print(f"{redundancy:10d}x   {sim:13.3f}   {quality:<18s}")

 print("\nRedundancy helps but has diminishing returns:")
 print("  - 3x redundancy provides good protection")
 print("  - Beyond 5x, limited additional benefit")
 print("  - Trade-off: robustness vs storage efficiency")

 # ============================================================================
 # Summary
 # ============================================================================
 print("\n" + "=" * 70)
 print("Summary: Robustness Best Practices")
 print("=" * 70)
 print()

 print("✓ Noise Tolerance Strategies:")
 print("  1. Use adequate dimension (10k+ for noisy environments)")
 print("  2. Apply cleanup on retrieval (BruteForce or Resonator)")
 print("  3. Bundle multiple noisy samples (sensor fusion)")
 print("  4. Add redundancy for critical data")
 print()

 print("✓ Error Recovery:")
 print("  - Detect: Monitor similarity scores")
 print("  - Recover: Use cleanup strategies")
 print("  - Prevent: Higher dimension, redundancy")
 print("  - Degrade gracefully: Always get approximate answer")
 print()

 print("✓ Design Guidelines:")
 print("  - Expected noise < 10%: Standard dimension (10k)")
 print("  - Expected noise 10-20%: Higher dimension (20k) + cleanup")
 print("  - Expected noise > 20%: Redundancy + aggressive cleanup")
 print("  - Critical applications: Add error-correcting codes")
 print()

 print("✓ HDC Advantages for Robust Systems:")
 print("  - Graceful degradation (no catastrophic failures)")
 print("  - Natural fault tolerance (distributed representation)")
 print("  - Simple error recovery (cleanup strategies)")
 print("  - Predictable behavior under stress")
 print()

 print("Next steps:")
 print("  → 27_cleanup_strategies.py - Detailed cleanup methods")
 print("  → 32_distributed_representations.py - Capacity under noise")
 print("  → 31_performance_benchmarks.py - Dimension cost analysis")
 print()
 print("=" * 70)
Gallery generated by Sphinx-Gallery