Thermometer and Level Encoders Deep Dive¶

Topics: Ordinal encoding, discrete bins, model compatibility, use cases Time: 10 minutes Prerequisites: 10_encoders_scalar.py, 01_basic_operations.py Related: 11_encoders_fractional_power.py, 02_models_comparison.py
This example explores ThermometerEncoder and LevelEncoder - the universal scalar encoders that work with all VSA models (MAP, BSC, FHRR, HRR, etc).
Key concepts: - Thermometer encoding: Cumulative activation (ordinal relationships) - Level encoding: One-hot bins (categorical values) - Model compatibility: Works with ALL VSA models - Trade-offs: Discrete vs smooth similarity - Use cases: Rankings, categories, MAP/BSC applications
Use these encoders when you need model-agnostic encoding or when working with MAP, BSC, or BSDC models (which don’t support FPE).
 import numpy as np
 from holovec import VSA
 from holovec.encoders import ThermometerEncoder, LevelEncoder

 print("=" * 70)
 print("Thermometer and Level Encoders Deep Dive")
 print("=" * 70)
 print()

 # ============================================================================
 # Demo 1: Thermometer Encoder - Ordinal Relationships
 # ============================================================================
 print("=" * 70)
 print("Demo 1: ThermometerEncoder - Ordinal Encoding")
 print("=" * 70)

 model = VSA.create('MAP', dim=10000, seed=42)

 # Create thermometer encoder with different bin counts
 n_bins_options = [10, 20, 50, 100]

 print(f"\nModel: {model.model_name}, dimension={model.dimension}")
 print(f"Range: 0-100")
 print()

 # Test similarity with different bin counts
 test_pairs = [(50, 51), (50, 55), (50, 60), (50, 75)]

 print(f"{'Bins':<8s} ", end="")
 for v1, v2 in test_pairs:
     print(f"{v1}-{v2:<5d} ", end="")
 print()
 print("-" * 50)

 for n_bins in n_bins_options:
     encoder = ThermometerEncoder(model, min_val=0, max_val=100, n_bins=n_bins)
     print(f"{n_bins:<8d} ", end="")

     for v1, v2 in test_pairs:
         hv1 = encoder.encode(v1)
         hv2 = encoder.encode(v2)
         sim = float(model.similarity(hv1, hv2))
         print(f"{sim:7.3f} ", end="")
     print()

 print("\nObservations:")
 print("  - More bins = finer granularity, lower similarity for same distance")
 print("  - Fewer bins = coarser, higher similarity, more grouping")
 print("  - Ordinal property: similarity decreases monotonically with distance")

 # ============================================================================
 # Demo 2: Thermometer Properties
 # ============================================================================
 print("\n" + "=" * 70)
 print("Demo 2: Thermometer Encoding Properties")
 print("=" * 70)

 encoder = ThermometerEncoder(model, min_val=0, max_val=100, n_bins=20)

 print(f"\nBin size: {(100 / 20):.1f} units per bin")
 print(f"Reversible: {encoder.is_reversible}")
 print(f"Compatible models: {encoder.compatible_models}")
 print()

 # Show ordinal property
 values = [10, 30, 50, 70, 90]
 reference = 50

 print(f"Reference value: {reference}")
 print(f"\n{'Value':<10s} {'Distance':<10s} {'Similarity':<12s}")
 print("-" * 40)

 for val in values:
     hv_ref = encoder.encode(reference)
     hv_val = encoder.encode(val)
     sim = float(model.similarity(hv_ref, hv_val))
     dist = abs(val - reference)
     print(f"{val:<10.1f} {dist:<10.1f} {sim:8.3f}")

 print("\nKey property:")
 print("  - Monotonic: similarity decreases as distance increases")
 print("  - Symmetric: sim(A,B) = sim(B,A)")
 print("  - Cumulative: each value activates bins 0..n")

 # ============================================================================
 # Demo 3: Level Encoder - Discrete Categories
 # ============================================================================
 print("\n" + "=" * 70)
 print("Demo 3: LevelEncoder - Discrete Bins")
 print("=" * 70)

 # Create level encoder
 level_encoder = LevelEncoder(model, min_val=0, max_val=100, n_levels=5)

 print(f"\nLevels: {level_encoder.n_levels}")
 print(f"Bin size: {100 / 5:.1f} units per level")
 print(f"Reversible: {level_encoder.is_reversible}")
 print()

 # Map values to levels
 test_values = [5, 15, 35, 55, 75, 95]

 print(f"{'Value':<10s} {'Level':<10s} {'Decoded':<10s}")
 print("-" * 35)

 for val in test_values:
     hv = level_encoder.encode(val)
     decoded = level_encoder.decode(hv)
     print(f"{val:<10.1f} {int(val // 20):<10d} {decoded:<10.1f}")

 print("\nLevel bins:")
 print("  [0-20)   → Level 0 → decoded as 10.0")
 print("  [20-40)  → Level 1 → decoded as 30.0")
 print("  [40-60)  → Level 2 → decoded as 50.0")
 print("  [60-80)  → Level 3 → decoded as 70.0")
 print("  [80-100] → Level 4 → decoded as 90.0")

 # ============================================================================
 # Demo 4: Level vs Thermometer Comparison
 # ============================================================================
 print("\n" + "=" * 70)
 print("Demo 4: Level vs Thermometer Similarity Patterns")
 print("=" * 70)

 thermo = ThermometerEncoder(model, min_val=0, max_val=100, n_bins=20)
 level = LevelEncoder(model, min_val=0, max_val=100, n_levels=5)

 reference = 50.0
 test_vals = np.linspace(0, 100, 11)

 print(f"\nReference: {reference}")
 print(f"\n{'Value':<10s} {'Thermo Sim':<12s} {'Level Sim':<12s}")
 print("-" * 40)

 for val in test_vals:
     # Thermometer
     ref_t = thermo.encode(reference)
     val_t = thermo.encode(val)
     sim_t = float(model.similarity(ref_t, val_t))

     # Level
     ref_l = level.encode(reference)
     val_l = level.encode(val)
     sim_l = float(model.similarity(ref_l, val_l))

     print(f"{val:<10.1f} {sim_t:8.3f}     {sim_l:8.3f}")

 print("\nKey differences:")
 print("  Thermometer: Gradual similarity decay")
 print("  Level:       High similarity within bin, drop across bins")

 # ============================================================================
 # Demo 5: Model Compatibility
 # ============================================================================
 print("\n" + "=" * 70)
 print("Demo 5: Universal Model Compatibility")
 print("=" * 70)

 test_value = 42.5

 print("\n✓ Thermometer and Level work with ALL models:\n")

 for model_name in ['MAP', 'FHRR', 'HRR', 'BSC']:
     m = VSA.create(model_name, dim=5000, seed=42)

     # Thermometer
     thermo = ThermometerEncoder(m, min_val=0, max_val=100, n_bins=20)
     hv_t = thermo.encode(test_value)

     # Level
     level = LevelEncoder(m, min_val=0, max_val=100, n_levels=10)
     hv_l = level.encode(test_value)
     decoded_l = level.decode(hv_l)

     print(f"{model_name:10s}: Thermometer ✓  Level ✓  (decoded={decoded_l:.1f})")

 print("\nVersus FPE:")
 print("  FPE: Only FHRR, HRR (requires complex representation)")
 print("  Thermo/Level: ALL models (universal)")

 # ============================================================================
 # Demo 6: Bin Count Selection
 # ============================================================================
 print("\n" + "=" * 70)
 print("Demo 6: Choosing Number of Bins/Levels")
 print("=" * 70)

 print("\nRule of thumb:")
 print("  bins ≈ dimension / 200  (for good orthogonality)")
 print()

 dimensions = [1000, 5000, 10000, 20000]

 print(f"{'Dimension':<12s} {'Suggested Bins':<15s} {'Reasoning'}")
 print("-" * 60)

 for dim in dimensions:
     suggested = dim // 200
     print(f"{dim:<12d} {suggested:<15d} ~{suggested} orthogonal vectors available")

 print("\nTrade-offs:")
 print("  More bins:   Finer resolution, needs higher dimension")
 print("  Fewer bins:  Coarser, works with lower dimension")

 # Test capacity
 dim = 10000
 print(f"\nCapacity test for dimension={dim}:")

 for n_bins in [10, 50, 100, 200]:
     model = VSA.create('MAP', dim=dim, seed=42)
     encoder = ThermometerEncoder(model, min_val=0, max_val=100, n_bins=n_bins)

     # Create all bin vectors and check orthogonality
     bin_hvs = [encoder.encode(i * (100 / n_bins)) for i in range(n_bins)]

     # Average similarity between different bins
     sims = []
     for i in range(len(bin_hvs)):
         for j in range(i+1, len(bin_hvs)):
             sim = float(model.similarity(bin_hvs[i], bin_hvs[j]))
             sims.append(abs(sim))

     avg_cross_sim = np.mean(sims) if sims else 0

     print(f"  {n_bins:3d} bins: avg cross-similarity = {avg_cross_sim:.3f}")

 print("\nTarget: cross-similarity < 0.1 for good separation")

 # ============================================================================
 # Demo 7: Use Cases and Recommendations
 # ============================================================================
 print("\n" + "=" * 70)
 print("Demo 7: When to Use Each Encoder")
 print("=" * 70)

 print("\n✓ Use ThermometerEncoder when:")
 print("  - Need ordinal relationships (rankings, scores)")
 print("  - Using MAP, BSC, or BSDC models")
 print("  - Want monotonic similarity decay")
 print("  - Don't need exact value recovery")
 print("  Examples: product ratings, priority levels, age groups")
 print()

 print("✓ Use LevelEncoder when:")
 print("  - Have discrete categories or bins")
 print("  - Need reversible encoding with categorical output")
 print("  - Want sharp boundaries between levels")
 print("  - Using any VSA model")
 print("  Examples: grade levels (A/B/C), risk categories, size buckets")
 print()

 print("✓ Use FractionalPowerEncoder when:")
 print("  - Need smooth similarity for continuous values")
 print("  - Using FHRR or HRR models")
 print("  - Want exact value recovery (reversible)")
 print("  - Have precise measurements")
 print("  Examples: temperature, pressure, time, GPS coordinates")

 # ============================================================================
 # Demo 8: Practical Pattern - Rating System
 # ============================================================================
 print("\n" + "=" * 70)
 print("Demo 8: Practical Example - Product Rating System")
 print("=" * 70)

 model = VSA.create('MAP', dim=10000, seed=42)

 # 5-star rating system with half stars (0.5 increments)
 rating_encoder = LevelEncoder(model, min_val=0.0, max_val=5.0, n_levels=10)

 # Sample products with ratings
 products = {
     "Laptop": 4.5,
     "Mouse": 3.5,
     "Keyboard": 4.0,
     "Monitor": 4.5,
     "Webcam": 3.0
 }

 # Create product symbols
 product_hvs = {name: model.random(seed=hash(name) % 10000)
                for name in products.keys()}

 # Bind product to rating
 RATING = model.random(seed=99)
 product_ratings = {}

 print("\nProduct ratings:")
 for name, rating in products.items():
     rating_hv = rating_encoder.encode(rating)
     product_rating = model.bind(product_hvs[name], model.bind(RATING, rating_hv))
     product_ratings[name] = product_rating
     print(f"  {name:12s}: {rating:.1f} stars")

 # Query: Find products with ~4.5 stars
 target_rating = 4.5
 target_hv = rating_encoder.encode(target_rating)

 print(f"\nSearching for products rated ~{target_rating} stars:")
 print(f"\n{'Product':<12s} {'Actual':<10s} {'Similarity':<12s}")
 print("-" * 40)

 for name, rating in products.items():
     rating_hv = rating_encoder.encode(rating)
     sim = float(model.similarity(rating_hv, target_hv))
     print(f"{name:12s} {rating:.1f}        {sim:8.3f}")

 print("\nHigh similarity products have similar ratings!")

 # ============================================================================
 # Summary
 # ============================================================================
 print("\n" + "=" * 70)
 print("Summary: Thermometer vs Level vs FPE")
 print("=" * 70)
 print()

 summary_table = """
 Feature              Thermometer    Level          FPE
 ─────────────────────────────────────────────────────────────
 Model compatibility  ALL            ALL            FHRR,HRR only
 Similarity type      Gradual decay  Sharp bins     Smooth decay
 Reversible           No             Yes            Yes
 Ordinal property     Yes            Partial        Yes
 Best for            Rankings       Categories     Continuous
 Bin granularity     Flexible       Flexible       Continuous
 Value recovery      No             Bin center     Exact
 """

 print(summary_table)

 print("\nQuick selection guide:")
 print("  1. Check model: MAP/BSC? → Use Thermometer or Level")
 print("  2. Need reversible? → Level (discrete) or FPE (smooth)")
 print("  3. Ordinal only? → Thermometer")
 print("  4. Continuous precision? → FPE (if using FHRR/HRR)")
 print()

 print("Next steps:")
 print("  → 11_encoders_fractional_power.py - FPE deep dive")
 print("  → 02_models_comparison.py - Choose the right model")
 print("  → 20_app_text_classification.py - Apply encoders in practice")
 print()
 print("=" * 70)
Gallery generated by Sphinx-Gallery