Thermometer and Level Encoders Deep Dive

Topics: Ordinal encoding, discrete bins, model compatibility, use cases Time: 10 minutes Prerequisites: 10_encoders_scalar.py, 01_basic_operations.py Related: 11_encoders_fractional_power.py, 02_models_comparison.py

This example explores ThermometerEncoder and LevelEncoder - the universal scalar encoders that work with all VSA models (MAP, BSC, FHRR, HRR, etc).

Key concepts: - Thermometer encoding: Cumulative activation (ordinal relationships) - Level encoding: One-hot bins (categorical values) - Model compatibility: Works with ALL VSA models - Trade-offs: Discrete vs smooth similarity - Use cases: Rankings, categories, MAP/BSC applications

Use these encoders when you need model-agnostic encoding or when working with MAP, BSC, or BSDC models (which don’t support FPE).

 24 import numpy as np
 25 from holovec import VSA
 26 from holovec.encoders import ThermometerEncoder, LevelEncoder
 27
 28 print("=" * 70)
 29 print("Thermometer and Level Encoders Deep Dive")
 30 print("=" * 70)
 31 print()
 32
 33 # ============================================================================
 34 # Demo 1: Thermometer Encoder - Ordinal Relationships
 35 # ============================================================================
 36 print("=" * 70)
 37 print("Demo 1: ThermometerEncoder - Ordinal Encoding")
 38 print("=" * 70)
 39
 40 model = VSA.create('MAP', dim=10000, seed=42)
 41
 42 # Create thermometer encoder with different bin counts
 43 n_bins_options = [10, 20, 50, 100]
 44
 45 print(f"\nModel: {model.model_name}, dimension={model.dimension}")
 46 print(f"Range: 0-100")
 47 print()
 48
 49 # Test similarity with different bin counts
 50 test_pairs = [(50, 51), (50, 55), (50, 60), (50, 75)]
 51
 52 print(f"{'Bins':<8s} ", end="")
 53 for v1, v2 in test_pairs:
 54     print(f"{v1}-{v2:<5d} ", end="")
 55 print()
 56 print("-" * 50)
 57
 58 for n_bins in n_bins_options:
 59     encoder = ThermometerEncoder(model, min_val=0, max_val=100, n_bins=n_bins)
 60     print(f"{n_bins:<8d} ", end="")
 61
 62     for v1, v2 in test_pairs:
 63         hv1 = encoder.encode(v1)
 64         hv2 = encoder.encode(v2)
 65         sim = float(model.similarity(hv1, hv2))
 66         print(f"{sim:7.3f} ", end="")
 67     print()
 68
 69 print("\nObservations:")
 70 print("  - More bins = finer granularity, lower similarity for same distance")
 71 print("  - Fewer bins = coarser, higher similarity, more grouping")
 72 print("  - Ordinal property: similarity decreases monotonically with distance")
 73
 74 # ============================================================================
 75 # Demo 2: Thermometer Properties
 76 # ============================================================================
 77 print("\n" + "=" * 70)
 78 print("Demo 2: Thermometer Encoding Properties")
 79 print("=" * 70)
 80
 81 encoder = ThermometerEncoder(model, min_val=0, max_val=100, n_bins=20)
 82
 83 print(f"\nBin size: {(100 / 20):.1f} units per bin")
 84 print(f"Reversible: {encoder.is_reversible}")
 85 print(f"Compatible models: {encoder.compatible_models}")
 86 print()
 87
 88 # Show ordinal property
 89 values = [10, 30, 50, 70, 90]
 90 reference = 50
 91
 92 print(f"Reference value: {reference}")
 93 print(f"\n{'Value':<10s} {'Distance':<10s} {'Similarity':<12s}")
 94 print("-" * 40)
 95
 96 for val in values:
 97     hv_ref = encoder.encode(reference)
 98     hv_val = encoder.encode(val)
 99     sim = float(model.similarity(hv_ref, hv_val))
100     dist = abs(val - reference)
101     print(f"{val:<10.1f} {dist:<10.1f} {sim:8.3f}")
102
103 print("\nKey property:")
104 print("  - Monotonic: similarity decreases as distance increases")
105 print("  - Symmetric: sim(A,B) = sim(B,A)")
106 print("  - Cumulative: each value activates bins 0..n")
107
108 # ============================================================================
109 # Demo 3: Level Encoder - Discrete Categories
110 # ============================================================================
111 print("\n" + "=" * 70)
112 print("Demo 3: LevelEncoder - Discrete Bins")
113 print("=" * 70)
114
115 # Create level encoder
116 level_encoder = LevelEncoder(model, min_val=0, max_val=100, n_levels=5)
117
118 print(f"\nLevels: {level_encoder.n_levels}")
119 print(f"Bin size: {100 / 5:.1f} units per level")
120 print(f"Reversible: {level_encoder.is_reversible}")
121 print()
122
123 # Map values to levels
124 test_values = [5, 15, 35, 55, 75, 95]
125
126 print(f"{'Value':<10s} {'Level':<10s} {'Decoded':<10s}")
127 print("-" * 35)
128
129 for val in test_values:
130     hv = level_encoder.encode(val)
131     decoded = level_encoder.decode(hv)
132     print(f"{val:<10.1f} {int(val // 20):<10d} {decoded:<10.1f}")
133
134 print("\nLevel bins:")
135 print("  [0-20)   → Level 0 → decoded as 10.0")
136 print("  [20-40)  → Level 1 → decoded as 30.0")
137 print("  [40-60)  → Level 2 → decoded as 50.0")
138 print("  [60-80)  → Level 3 → decoded as 70.0")
139 print("  [80-100] → Level 4 → decoded as 90.0")
140
141 # ============================================================================
142 # Demo 4: Level vs Thermometer Comparison
143 # ============================================================================
144 print("\n" + "=" * 70)
145 print("Demo 4: Level vs Thermometer Similarity Patterns")
146 print("=" * 70)
147
148 thermo = ThermometerEncoder(model, min_val=0, max_val=100, n_bins=20)
149 level = LevelEncoder(model, min_val=0, max_val=100, n_levels=5)
150
151 reference = 50.0
152 test_vals = np.linspace(0, 100, 11)
153
154 print(f"\nReference: {reference}")
155 print(f"\n{'Value':<10s} {'Thermo Sim':<12s} {'Level Sim':<12s}")
156 print("-" * 40)
157
158 for val in test_vals:
159     # Thermometer
160     ref_t = thermo.encode(reference)
161     val_t = thermo.encode(val)
162     sim_t = float(model.similarity(ref_t, val_t))
163
164     # Level
165     ref_l = level.encode(reference)
166     val_l = level.encode(val)
167     sim_l = float(model.similarity(ref_l, val_l))
168
169     print(f"{val:<10.1f} {sim_t:8.3f}     {sim_l:8.3f}")
170
171 print("\nKey differences:")
172 print("  Thermometer: Gradual similarity decay")
173 print("  Level:       High similarity within bin, drop across bins")
174
175 # ============================================================================
176 # Demo 5: Model Compatibility
177 # ============================================================================
178 print("\n" + "=" * 70)
179 print("Demo 5: Universal Model Compatibility")
180 print("=" * 70)
181
182 test_value = 42.5
183
184 print("\n✓ Thermometer and Level work with ALL models:\n")
185
186 for model_name in ['MAP', 'FHRR', 'HRR', 'BSC']:
187     m = VSA.create(model_name, dim=5000, seed=42)
188
189     # Thermometer
190     thermo = ThermometerEncoder(m, min_val=0, max_val=100, n_bins=20)
191     hv_t = thermo.encode(test_value)
192
193     # Level
194     level = LevelEncoder(m, min_val=0, max_val=100, n_levels=10)
195     hv_l = level.encode(test_value)
196     decoded_l = level.decode(hv_l)
197
198     print(f"{model_name:10s}: Thermometer ✓  Level ✓  (decoded={decoded_l:.1f})")
199
200 print("\nVersus FPE:")
201 print("  FPE: Only FHRR, HRR (requires complex representation)")
202 print("  Thermo/Level: ALL models (universal)")
203
204 # ============================================================================
205 # Demo 6: Bin Count Selection
206 # ============================================================================
207 print("\n" + "=" * 70)
208 print("Demo 6: Choosing Number of Bins/Levels")
209 print("=" * 70)
210
211 print("\nRule of thumb:")
212 print("  bins ≈ dimension / 200  (for good orthogonality)")
213 print()
214
215 dimensions = [1000, 5000, 10000, 20000]
216
217 print(f"{'Dimension':<12s} {'Suggested Bins':<15s} {'Reasoning'}")
218 print("-" * 60)
219
220 for dim in dimensions:
221     suggested = dim // 200
222     print(f"{dim:<12d} {suggested:<15d} ~{suggested} orthogonal vectors available")
223
224 print("\nTrade-offs:")
225 print("  More bins:   Finer resolution, needs higher dimension")
226 print("  Fewer bins:  Coarser, works with lower dimension")
227
228 # Test capacity
229 dim = 10000
230 print(f"\nCapacity test for dimension={dim}:")
231
232 for n_bins in [10, 50, 100, 200]:
233     model = VSA.create('MAP', dim=dim, seed=42)
234     encoder = ThermometerEncoder(model, min_val=0, max_val=100, n_bins=n_bins)
235
236     # Create all bin vectors and check orthogonality
237     bin_hvs = [encoder.encode(i * (100 / n_bins)) for i in range(n_bins)]
238
239     # Average similarity between different bins
240     sims = []
241     for i in range(len(bin_hvs)):
242         for j in range(i+1, len(bin_hvs)):
243             sim = float(model.similarity(bin_hvs[i], bin_hvs[j]))
244             sims.append(abs(sim))
245
246     avg_cross_sim = np.mean(sims) if sims else 0
247
248     print(f"  {n_bins:3d} bins: avg cross-similarity = {avg_cross_sim:.3f}")
249
250 print("\nTarget: cross-similarity < 0.1 for good separation")
251
252 # ============================================================================
253 # Demo 7: Use Cases and Recommendations
254 # ============================================================================
255 print("\n" + "=" * 70)
256 print("Demo 7: When to Use Each Encoder")
257 print("=" * 70)
258
259 print("\n✓ Use ThermometerEncoder when:")
260 print("  - Need ordinal relationships (rankings, scores)")
261 print("  - Using MAP, BSC, or BSDC models")
262 print("  - Want monotonic similarity decay")
263 print("  - Don't need exact value recovery")
264 print("  Examples: product ratings, priority levels, age groups")
265 print()
266
267 print("✓ Use LevelEncoder when:")
268 print("  - Have discrete categories or bins")
269 print("  - Need reversible encoding with categorical output")
270 print("  - Want sharp boundaries between levels")
271 print("  - Using any VSA model")
272 print("  Examples: grade levels (A/B/C), risk categories, size buckets")
273 print()
274
275 print("✓ Use FractionalPowerEncoder when:")
276 print("  - Need smooth similarity for continuous values")
277 print("  - Using FHRR or HRR models")
278 print("  - Want exact value recovery (reversible)")
279 print("  - Have precise measurements")
280 print("  Examples: temperature, pressure, time, GPS coordinates")
281
282 # ============================================================================
283 # Demo 8: Practical Pattern - Rating System
284 # ============================================================================
285 print("\n" + "=" * 70)
286 print("Demo 8: Practical Example - Product Rating System")
287 print("=" * 70)
288
289 model = VSA.create('MAP', dim=10000, seed=42)
290
291 # 5-star rating system with half stars (0.5 increments)
292 rating_encoder = LevelEncoder(model, min_val=0.0, max_val=5.0, n_levels=10)
293
294 # Sample products with ratings
295 products = {
296     "Laptop": 4.5,
297     "Mouse": 3.5,
298     "Keyboard": 4.0,
299     "Monitor": 4.5,
300     "Webcam": 3.0
301 }
302
303 # Create product symbols
304 product_hvs = {name: model.random(seed=hash(name) % 10000)
305                for name in products.keys()}
306
307 # Bind product to rating
308 RATING = model.random(seed=99)
309 product_ratings = {}
310
311 print("\nProduct ratings:")
312 for name, rating in products.items():
313     rating_hv = rating_encoder.encode(rating)
314     product_rating = model.bind(product_hvs[name], model.bind(RATING, rating_hv))
315     product_ratings[name] = product_rating
316     print(f"  {name:12s}: {rating:.1f} stars")
317
318 # Query: Find products with ~4.5 stars
319 target_rating = 4.5
320 target_hv = rating_encoder.encode(target_rating)
321
322 print(f"\nSearching for products rated ~{target_rating} stars:")
323 print(f"\n{'Product':<12s} {'Actual':<10s} {'Similarity':<12s}")
324 print("-" * 40)
325
326 for name, rating in products.items():
327     rating_hv = rating_encoder.encode(rating)
328     sim = float(model.similarity(rating_hv, target_hv))
329     print(f"{name:12s} {rating:.1f}        {sim:8.3f}")
330
331 print("\nHigh similarity products have similar ratings!")
332
333 # ============================================================================
334 # Summary
335 # ============================================================================
336 print("\n" + "=" * 70)
337 print("Summary: Thermometer vs Level vs FPE")
338 print("=" * 70)
339 print()
340
341 summary_table = """
342 Feature              Thermometer    Level          FPE
343 ─────────────────────────────────────────────────────────────
344 Model compatibility  ALL            ALL            FHRR,HRR only
345 Similarity type      Gradual decay  Sharp bins     Smooth decay
346 Reversible           No             Yes            Yes
347 Ordinal property     Yes            Partial        Yes
348 Best for            Rankings       Categories     Continuous
349 Bin granularity     Flexible       Flexible       Continuous
350 Value recovery      No             Bin center     Exact
351 """
352
353 print(summary_table)
354
355 print("\nQuick selection guide:")
356 print("  1. Check model: MAP/BSC? → Use Thermometer or Level")
357 print("  2. Need reversible? → Level (discrete) or FPE (smooth)")
358 print("  3. Ordinal only? → Thermometer")
359 print("  4. Continuous precision? → FPE (if using FHRR/HRR)")
360 print()
361
362 print("Next steps:")
363 print("  → 11_encoders_fractional_power.py - FPE deep dive")
364 print("  → 02_models_comparison.py - Choose the right model")
365 print("  → 20_app_text_classification.py - Apply encoders in practice")
366 print()
367 print("=" * 70)

Gallery generated by Sphinx-Gallery