Note
Go to the end to download the full example code.
Thermometer and Level Encoders Deep Dive¶
Topics: Ordinal encoding, discrete bins, model compatibility, use cases Time: 10 minutes Prerequisites: 10_encoders_scalar.py, 01_basic_operations.py Related: 11_encoders_fractional_power.py, 02_models_comparison.py
This example explores ThermometerEncoder and LevelEncoder - the universal scalar encoders that work with all VSA models (MAP, BSC, FHRR, HRR, etc).
Key concepts: - Thermometer encoding: Cumulative activation (ordinal relationships) - Level encoding: One-hot bins (categorical values) - Model compatibility: Works with ALL VSA models - Trade-offs: Discrete vs smooth similarity - Use cases: Rankings, categories, MAP/BSC applications
Use these encoders when you need model-agnostic encoding or when working with MAP, BSC, or BSDC models (which don’t support FPE).
24 import numpy as np
25 from holovec import VSA
26 from holovec.encoders import ThermometerEncoder, LevelEncoder
27
28 print("=" * 70)
29 print("Thermometer and Level Encoders Deep Dive")
30 print("=" * 70)
31 print()
32
33 # ============================================================================
34 # Demo 1: Thermometer Encoder - Ordinal Relationships
35 # ============================================================================
36 print("=" * 70)
37 print("Demo 1: ThermometerEncoder - Ordinal Encoding")
38 print("=" * 70)
39
40 model = VSA.create('MAP', dim=10000, seed=42)
41
42 # Create thermometer encoder with different bin counts
43 n_bins_options = [10, 20, 50, 100]
44
45 print(f"\nModel: {model.model_name}, dimension={model.dimension}")
46 print(f"Range: 0-100")
47 print()
48
49 # Test similarity with different bin counts
50 test_pairs = [(50, 51), (50, 55), (50, 60), (50, 75)]
51
52 print(f"{'Bins':<8s} ", end="")
53 for v1, v2 in test_pairs:
54 print(f"{v1}-{v2:<5d} ", end="")
55 print()
56 print("-" * 50)
57
58 for n_bins in n_bins_options:
59 encoder = ThermometerEncoder(model, min_val=0, max_val=100, n_bins=n_bins)
60 print(f"{n_bins:<8d} ", end="")
61
62 for v1, v2 in test_pairs:
63 hv1 = encoder.encode(v1)
64 hv2 = encoder.encode(v2)
65 sim = float(model.similarity(hv1, hv2))
66 print(f"{sim:7.3f} ", end="")
67 print()
68
69 print("\nObservations:")
70 print(" - More bins = finer granularity, lower similarity for same distance")
71 print(" - Fewer bins = coarser, higher similarity, more grouping")
72 print(" - Ordinal property: similarity decreases monotonically with distance")
73
74 # ============================================================================
75 # Demo 2: Thermometer Properties
76 # ============================================================================
77 print("\n" + "=" * 70)
78 print("Demo 2: Thermometer Encoding Properties")
79 print("=" * 70)
80
81 encoder = ThermometerEncoder(model, min_val=0, max_val=100, n_bins=20)
82
83 print(f"\nBin size: {(100 / 20):.1f} units per bin")
84 print(f"Reversible: {encoder.is_reversible}")
85 print(f"Compatible models: {encoder.compatible_models}")
86 print()
87
88 # Show ordinal property
89 values = [10, 30, 50, 70, 90]
90 reference = 50
91
92 print(f"Reference value: {reference}")
93 print(f"\n{'Value':<10s} {'Distance':<10s} {'Similarity':<12s}")
94 print("-" * 40)
95
96 for val in values:
97 hv_ref = encoder.encode(reference)
98 hv_val = encoder.encode(val)
99 sim = float(model.similarity(hv_ref, hv_val))
100 dist = abs(val - reference)
101 print(f"{val:<10.1f} {dist:<10.1f} {sim:8.3f}")
102
103 print("\nKey property:")
104 print(" - Monotonic: similarity decreases as distance increases")
105 print(" - Symmetric: sim(A,B) = sim(B,A)")
106 print(" - Cumulative: each value activates bins 0..n")
107
108 # ============================================================================
109 # Demo 3: Level Encoder - Discrete Categories
110 # ============================================================================
111 print("\n" + "=" * 70)
112 print("Demo 3: LevelEncoder - Discrete Bins")
113 print("=" * 70)
114
115 # Create level encoder
116 level_encoder = LevelEncoder(model, min_val=0, max_val=100, n_levels=5)
117
118 print(f"\nLevels: {level_encoder.n_levels}")
119 print(f"Bin size: {100 / 5:.1f} units per level")
120 print(f"Reversible: {level_encoder.is_reversible}")
121 print()
122
123 # Map values to levels
124 test_values = [5, 15, 35, 55, 75, 95]
125
126 print(f"{'Value':<10s} {'Level':<10s} {'Decoded':<10s}")
127 print("-" * 35)
128
129 for val in test_values:
130 hv = level_encoder.encode(val)
131 decoded = level_encoder.decode(hv)
132 print(f"{val:<10.1f} {int(val // 20):<10d} {decoded:<10.1f}")
133
134 print("\nLevel bins:")
135 print(" [0-20) → Level 0 → decoded as 10.0")
136 print(" [20-40) → Level 1 → decoded as 30.0")
137 print(" [40-60) → Level 2 → decoded as 50.0")
138 print(" [60-80) → Level 3 → decoded as 70.0")
139 print(" [80-100] → Level 4 → decoded as 90.0")
140
141 # ============================================================================
142 # Demo 4: Level vs Thermometer Comparison
143 # ============================================================================
144 print("\n" + "=" * 70)
145 print("Demo 4: Level vs Thermometer Similarity Patterns")
146 print("=" * 70)
147
148 thermo = ThermometerEncoder(model, min_val=0, max_val=100, n_bins=20)
149 level = LevelEncoder(model, min_val=0, max_val=100, n_levels=5)
150
151 reference = 50.0
152 test_vals = np.linspace(0, 100, 11)
153
154 print(f"\nReference: {reference}")
155 print(f"\n{'Value':<10s} {'Thermo Sim':<12s} {'Level Sim':<12s}")
156 print("-" * 40)
157
158 for val in test_vals:
159 # Thermometer
160 ref_t = thermo.encode(reference)
161 val_t = thermo.encode(val)
162 sim_t = float(model.similarity(ref_t, val_t))
163
164 # Level
165 ref_l = level.encode(reference)
166 val_l = level.encode(val)
167 sim_l = float(model.similarity(ref_l, val_l))
168
169 print(f"{val:<10.1f} {sim_t:8.3f} {sim_l:8.3f}")
170
171 print("\nKey differences:")
172 print(" Thermometer: Gradual similarity decay")
173 print(" Level: High similarity within bin, drop across bins")
174
175 # ============================================================================
176 # Demo 5: Model Compatibility
177 # ============================================================================
178 print("\n" + "=" * 70)
179 print("Demo 5: Universal Model Compatibility")
180 print("=" * 70)
181
182 test_value = 42.5
183
184 print("\n✓ Thermometer and Level work with ALL models:\n")
185
186 for model_name in ['MAP', 'FHRR', 'HRR', 'BSC']:
187 m = VSA.create(model_name, dim=5000, seed=42)
188
189 # Thermometer
190 thermo = ThermometerEncoder(m, min_val=0, max_val=100, n_bins=20)
191 hv_t = thermo.encode(test_value)
192
193 # Level
194 level = LevelEncoder(m, min_val=0, max_val=100, n_levels=10)
195 hv_l = level.encode(test_value)
196 decoded_l = level.decode(hv_l)
197
198 print(f"{model_name:10s}: Thermometer ✓ Level ✓ (decoded={decoded_l:.1f})")
199
200 print("\nVersus FPE:")
201 print(" FPE: Only FHRR, HRR (requires complex representation)")
202 print(" Thermo/Level: ALL models (universal)")
203
204 # ============================================================================
205 # Demo 6: Bin Count Selection
206 # ============================================================================
207 print("\n" + "=" * 70)
208 print("Demo 6: Choosing Number of Bins/Levels")
209 print("=" * 70)
210
211 print("\nRule of thumb:")
212 print(" bins ≈ dimension / 200 (for good orthogonality)")
213 print()
214
215 dimensions = [1000, 5000, 10000, 20000]
216
217 print(f"{'Dimension':<12s} {'Suggested Bins':<15s} {'Reasoning'}")
218 print("-" * 60)
219
220 for dim in dimensions:
221 suggested = dim // 200
222 print(f"{dim:<12d} {suggested:<15d} ~{suggested} orthogonal vectors available")
223
224 print("\nTrade-offs:")
225 print(" More bins: Finer resolution, needs higher dimension")
226 print(" Fewer bins: Coarser, works with lower dimension")
227
228 # Test capacity
229 dim = 10000
230 print(f"\nCapacity test for dimension={dim}:")
231
232 for n_bins in [10, 50, 100, 200]:
233 model = VSA.create('MAP', dim=dim, seed=42)
234 encoder = ThermometerEncoder(model, min_val=0, max_val=100, n_bins=n_bins)
235
236 # Create all bin vectors and check orthogonality
237 bin_hvs = [encoder.encode(i * (100 / n_bins)) for i in range(n_bins)]
238
239 # Average similarity between different bins
240 sims = []
241 for i in range(len(bin_hvs)):
242 for j in range(i+1, len(bin_hvs)):
243 sim = float(model.similarity(bin_hvs[i], bin_hvs[j]))
244 sims.append(abs(sim))
245
246 avg_cross_sim = np.mean(sims) if sims else 0
247
248 print(f" {n_bins:3d} bins: avg cross-similarity = {avg_cross_sim:.3f}")
249
250 print("\nTarget: cross-similarity < 0.1 for good separation")
251
252 # ============================================================================
253 # Demo 7: Use Cases and Recommendations
254 # ============================================================================
255 print("\n" + "=" * 70)
256 print("Demo 7: When to Use Each Encoder")
257 print("=" * 70)
258
259 print("\n✓ Use ThermometerEncoder when:")
260 print(" - Need ordinal relationships (rankings, scores)")
261 print(" - Using MAP, BSC, or BSDC models")
262 print(" - Want monotonic similarity decay")
263 print(" - Don't need exact value recovery")
264 print(" Examples: product ratings, priority levels, age groups")
265 print()
266
267 print("✓ Use LevelEncoder when:")
268 print(" - Have discrete categories or bins")
269 print(" - Need reversible encoding with categorical output")
270 print(" - Want sharp boundaries between levels")
271 print(" - Using any VSA model")
272 print(" Examples: grade levels (A/B/C), risk categories, size buckets")
273 print()
274
275 print("✓ Use FractionalPowerEncoder when:")
276 print(" - Need smooth similarity for continuous values")
277 print(" - Using FHRR or HRR models")
278 print(" - Want exact value recovery (reversible)")
279 print(" - Have precise measurements")
280 print(" Examples: temperature, pressure, time, GPS coordinates")
281
282 # ============================================================================
283 # Demo 8: Practical Pattern - Rating System
284 # ============================================================================
285 print("\n" + "=" * 70)
286 print("Demo 8: Practical Example - Product Rating System")
287 print("=" * 70)
288
289 model = VSA.create('MAP', dim=10000, seed=42)
290
291 # 5-star rating system with half stars (0.5 increments)
292 rating_encoder = LevelEncoder(model, min_val=0.0, max_val=5.0, n_levels=10)
293
294 # Sample products with ratings
295 products = {
296 "Laptop": 4.5,
297 "Mouse": 3.5,
298 "Keyboard": 4.0,
299 "Monitor": 4.5,
300 "Webcam": 3.0
301 }
302
303 # Create product symbols
304 product_hvs = {name: model.random(seed=hash(name) % 10000)
305 for name in products.keys()}
306
307 # Bind product to rating
308 RATING = model.random(seed=99)
309 product_ratings = {}
310
311 print("\nProduct ratings:")
312 for name, rating in products.items():
313 rating_hv = rating_encoder.encode(rating)
314 product_rating = model.bind(product_hvs[name], model.bind(RATING, rating_hv))
315 product_ratings[name] = product_rating
316 print(f" {name:12s}: {rating:.1f} stars")
317
318 # Query: Find products with ~4.5 stars
319 target_rating = 4.5
320 target_hv = rating_encoder.encode(target_rating)
321
322 print(f"\nSearching for products rated ~{target_rating} stars:")
323 print(f"\n{'Product':<12s} {'Actual':<10s} {'Similarity':<12s}")
324 print("-" * 40)
325
326 for name, rating in products.items():
327 rating_hv = rating_encoder.encode(rating)
328 sim = float(model.similarity(rating_hv, target_hv))
329 print(f"{name:12s} {rating:.1f} {sim:8.3f}")
330
331 print("\nHigh similarity products have similar ratings!")
332
333 # ============================================================================
334 # Summary
335 # ============================================================================
336 print("\n" + "=" * 70)
337 print("Summary: Thermometer vs Level vs FPE")
338 print("=" * 70)
339 print()
340
341 summary_table = """
342 Feature Thermometer Level FPE
343 ─────────────────────────────────────────────────────────────
344 Model compatibility ALL ALL FHRR,HRR only
345 Similarity type Gradual decay Sharp bins Smooth decay
346 Reversible No Yes Yes
347 Ordinal property Yes Partial Yes
348 Best for Rankings Categories Continuous
349 Bin granularity Flexible Flexible Continuous
350 Value recovery No Bin center Exact
351 """
352
353 print(summary_table)
354
355 print("\nQuick selection guide:")
356 print(" 1. Check model: MAP/BSC? → Use Thermometer or Level")
357 print(" 2. Need reversible? → Level (discrete) or FPE (smooth)")
358 print(" 3. Ordinal only? → Thermometer")
359 print(" 4. Continuous precision? → FPE (if using FHRR/HRR)")
360 print()
361
362 print("Next steps:")
363 print(" → 11_encoders_fractional_power.py - FPE deep dive")
364 print(" → 02_models_comparison.py - Choose the right model")
365 print(" → 20_app_text_classification.py - Apply encoders in practice")
366 print()
367 print("=" * 70)