"""
Thermometer and Level Encoders Deep Dive
========================================

Topics: Ordinal encoding, discrete bins, model compatibility, use cases
Time: 10 minutes
Prerequisites: 10_encoders_scalar.py, 01_basic_operations.py
Related: 11_encoders_fractional_power.py, 02_models_comparison.py

This example explores ThermometerEncoder and LevelEncoder - the universal
scalar encoders that work with all VSA models (MAP, BSC, FHRR, HRR, etc).

Key concepts:
- Thermometer encoding: Cumulative activation (ordinal relationships)
- Level encoding: One-hot bins (categorical values)
- Model compatibility: Works with ALL VSA models
- Trade-offs: Discrete vs smooth similarity
- Use cases: Rankings, categories, MAP/BSC applications

Use these encoders when you need model-agnostic encoding or when working
with MAP, BSC, or BSDC models (which don't support FPE).
"""

import numpy as np
from holovec import VSA
from holovec.encoders import ThermometerEncoder, LevelEncoder

print("=" * 70)
print("Thermometer and Level Encoders Deep Dive")
print("=" * 70)
print()

# ============================================================================
# Demo 1: Thermometer Encoder - Ordinal Relationships
# ============================================================================
print("=" * 70)
print("Demo 1: ThermometerEncoder - Ordinal Encoding")
print("=" * 70)

model = VSA.create('MAP', dim=10000, seed=42)

# Create thermometer encoder with different bin counts
n_bins_options = [10, 20, 50, 100]

print(f"\nModel: {model.model_name}, dimension={model.dimension}")
print(f"Range: 0-100")
print()

# Test similarity with different bin counts
test_pairs = [(50, 51), (50, 55), (50, 60), (50, 75)]

print(f"{'Bins':<8s} ", end="")
for v1, v2 in test_pairs:
    print(f"{v1}-{v2:<5d} ", end="")
print()
print("-" * 50)

for n_bins in n_bins_options:
    encoder = ThermometerEncoder(model, min_val=0, max_val=100, n_bins=n_bins)
    print(f"{n_bins:<8d} ", end="")

    for v1, v2 in test_pairs:
        hv1 = encoder.encode(v1)
        hv2 = encoder.encode(v2)
        sim = float(model.similarity(hv1, hv2))
        print(f"{sim:7.3f} ", end="")
    print()

print("\nObservations:")
print("  - More bins = finer granularity, lower similarity for same distance")
print("  - Fewer bins = coarser, higher similarity, more grouping")
print("  - Ordinal property: similarity decreases monotonically with distance")

# ============================================================================
# Demo 2: Thermometer Properties
# ============================================================================
print("\n" + "=" * 70)
print("Demo 2: Thermometer Encoding Properties")
print("=" * 70)

encoder = ThermometerEncoder(model, min_val=0, max_val=100, n_bins=20)

print(f"\nBin size: {(100 / 20):.1f} units per bin")
print(f"Reversible: {encoder.is_reversible}")
print(f"Compatible models: {encoder.compatible_models}")
print()

# Show ordinal property
values = [10, 30, 50, 70, 90]
reference = 50

print(f"Reference value: {reference}")
print(f"\n{'Value':<10s} {'Distance':<10s} {'Similarity':<12s}")
print("-" * 40)

for val in values:
    hv_ref = encoder.encode(reference)
    hv_val = encoder.encode(val)
    sim = float(model.similarity(hv_ref, hv_val))
    dist = abs(val - reference)
    print(f"{val:<10.1f} {dist:<10.1f} {sim:8.3f}")

print("\nKey property:")
print("  - Monotonic: similarity decreases as distance increases")
print("  - Symmetric: sim(A,B) = sim(B,A)")
print("  - Cumulative: each value activates bins 0..n")

# ============================================================================
# Demo 3: Level Encoder - Discrete Categories
# ============================================================================
print("\n" + "=" * 70)
print("Demo 3: LevelEncoder - Discrete Bins")
print("=" * 70)

# Create level encoder
level_encoder = LevelEncoder(model, min_val=0, max_val=100, n_levels=5)

print(f"\nLevels: {level_encoder.n_levels}")
print(f"Bin size: {100 / 5:.1f} units per level")
print(f"Reversible: {level_encoder.is_reversible}")
print()

# Map values to levels
test_values = [5, 15, 35, 55, 75, 95]

print(f"{'Value':<10s} {'Level':<10s} {'Decoded':<10s}")
print("-" * 35)

for val in test_values:
    hv = level_encoder.encode(val)
    decoded = level_encoder.decode(hv)
    print(f"{val:<10.1f} {int(val // 20):<10d} {decoded:<10.1f}")

print("\nLevel bins:")
print("  [0-20)   → Level 0 → decoded as 10.0")
print("  [20-40)  → Level 1 → decoded as 30.0")
print("  [40-60)  → Level 2 → decoded as 50.0")
print("  [60-80)  → Level 3 → decoded as 70.0")
print("  [80-100] → Level 4 → decoded as 90.0")

# ============================================================================
# Demo 4: Level vs Thermometer Comparison
# ============================================================================
print("\n" + "=" * 70)
print("Demo 4: Level vs Thermometer Similarity Patterns")
print("=" * 70)

thermo = ThermometerEncoder(model, min_val=0, max_val=100, n_bins=20)
level = LevelEncoder(model, min_val=0, max_val=100, n_levels=5)

reference = 50.0
test_vals = np.linspace(0, 100, 11)

print(f"\nReference: {reference}")
print(f"\n{'Value':<10s} {'Thermo Sim':<12s} {'Level Sim':<12s}")
print("-" * 40)

for val in test_vals:
    # Thermometer
    ref_t = thermo.encode(reference)
    val_t = thermo.encode(val)
    sim_t = float(model.similarity(ref_t, val_t))

    # Level
    ref_l = level.encode(reference)
    val_l = level.encode(val)
    sim_l = float(model.similarity(ref_l, val_l))

    print(f"{val:<10.1f} {sim_t:8.3f}     {sim_l:8.3f}")

print("\nKey differences:")
print("  Thermometer: Gradual similarity decay")
print("  Level:       High similarity within bin, drop across bins")

# ============================================================================
# Demo 5: Model Compatibility
# ============================================================================
print("\n" + "=" * 70)
print("Demo 5: Universal Model Compatibility")
print("=" * 70)

test_value = 42.5

print("\n✓ Thermometer and Level work with ALL models:\n")

for model_name in ['MAP', 'FHRR', 'HRR', 'BSC']:
    m = VSA.create(model_name, dim=5000, seed=42)

    # Thermometer
    thermo = ThermometerEncoder(m, min_val=0, max_val=100, n_bins=20)
    hv_t = thermo.encode(test_value)

    # Level
    level = LevelEncoder(m, min_val=0, max_val=100, n_levels=10)
    hv_l = level.encode(test_value)
    decoded_l = level.decode(hv_l)

    print(f"{model_name:10s}: Thermometer ✓  Level ✓  (decoded={decoded_l:.1f})")

print("\nVersus FPE:")
print("  FPE: Only FHRR, HRR (requires complex representation)")
print("  Thermo/Level: ALL models (universal)")

# ============================================================================
# Demo 6: Bin Count Selection
# ============================================================================
print("\n" + "=" * 70)
print("Demo 6: Choosing Number of Bins/Levels")
print("=" * 70)

print("\nRule of thumb:")
print("  bins ≈ dimension / 200  (for good orthogonality)")
print()

dimensions = [1000, 5000, 10000, 20000]

print(f"{'Dimension':<12s} {'Suggested Bins':<15s} {'Reasoning'}")
print("-" * 60)

for dim in dimensions:
    suggested = dim // 200
    print(f"{dim:<12d} {suggested:<15d} ~{suggested} orthogonal vectors available")

print("\nTrade-offs:")
print("  More bins:   Finer resolution, needs higher dimension")
print("  Fewer bins:  Coarser, works with lower dimension")

# Test capacity
dim = 10000
print(f"\nCapacity test for dimension={dim}:")

for n_bins in [10, 50, 100, 200]:
    model = VSA.create('MAP', dim=dim, seed=42)
    encoder = ThermometerEncoder(model, min_val=0, max_val=100, n_bins=n_bins)

    # Create all bin vectors and check orthogonality
    bin_hvs = [encoder.encode(i * (100 / n_bins)) for i in range(n_bins)]

    # Average similarity between different bins
    sims = []
    for i in range(len(bin_hvs)):
        for j in range(i+1, len(bin_hvs)):
            sim = float(model.similarity(bin_hvs[i], bin_hvs[j]))
            sims.append(abs(sim))

    avg_cross_sim = np.mean(sims) if sims else 0

    print(f"  {n_bins:3d} bins: avg cross-similarity = {avg_cross_sim:.3f}")

print("\nTarget: cross-similarity < 0.1 for good separation")

# ============================================================================
# Demo 7: Use Cases and Recommendations
# ============================================================================
print("\n" + "=" * 70)
print("Demo 7: When to Use Each Encoder")
print("=" * 70)

print("\n✓ Use ThermometerEncoder when:")
print("  - Need ordinal relationships (rankings, scores)")
print("  - Using MAP, BSC, or BSDC models")
print("  - Want monotonic similarity decay")
print("  - Don't need exact value recovery")
print("  Examples: product ratings, priority levels, age groups")
print()

print("✓ Use LevelEncoder when:")
print("  - Have discrete categories or bins")
print("  - Need reversible encoding with categorical output")
print("  - Want sharp boundaries between levels")
print("  - Using any VSA model")
print("  Examples: grade levels (A/B/C), risk categories, size buckets")
print()

print("✓ Use FractionalPowerEncoder when:")
print("  - Need smooth similarity for continuous values")
print("  - Using FHRR or HRR models")
print("  - Want exact value recovery (reversible)")
print("  - Have precise measurements")
print("  Examples: temperature, pressure, time, GPS coordinates")

# ============================================================================
# Demo 8: Practical Pattern - Rating System
# ============================================================================
print("\n" + "=" * 70)
print("Demo 8: Practical Example - Product Rating System")
print("=" * 70)

model = VSA.create('MAP', dim=10000, seed=42)

# 5-star rating system with half stars (0.5 increments)
rating_encoder = LevelEncoder(model, min_val=0.0, max_val=5.0, n_levels=10)

# Sample products with ratings
products = {
    "Laptop": 4.5,
    "Mouse": 3.5,
    "Keyboard": 4.0,
    "Monitor": 4.5,
    "Webcam": 3.0
}

# Create product symbols
product_hvs = {name: model.random(seed=hash(name) % 10000)
               for name in products.keys()}

# Bind product to rating
RATING = model.random(seed=99)
product_ratings = {}

print("\nProduct ratings:")
for name, rating in products.items():
    rating_hv = rating_encoder.encode(rating)
    product_rating = model.bind(product_hvs[name], model.bind(RATING, rating_hv))
    product_ratings[name] = product_rating
    print(f"  {name:12s}: {rating:.1f} stars")

# Query: Find products with ~4.5 stars
target_rating = 4.5
target_hv = rating_encoder.encode(target_rating)

print(f"\nSearching for products rated ~{target_rating} stars:")
print(f"\n{'Product':<12s} {'Actual':<10s} {'Similarity':<12s}")
print("-" * 40)

for name, rating in products.items():
    rating_hv = rating_encoder.encode(rating)
    sim = float(model.similarity(rating_hv, target_hv))
    print(f"{name:12s} {rating:.1f}        {sim:8.3f}")

print("\nHigh similarity products have similar ratings!")

# ============================================================================
# Summary
# ============================================================================
print("\n" + "=" * 70)
print("Summary: Thermometer vs Level vs FPE")
print("=" * 70)
print()

summary_table = """
Feature              Thermometer    Level          FPE
─────────────────────────────────────────────────────────────
Model compatibility  ALL            ALL            FHRR,HRR only
Similarity type      Gradual decay  Sharp bins     Smooth decay
Reversible           No             Yes            Yes
Ordinal property     Yes            Partial        Yes
Best for            Rankings       Categories     Continuous
Bin granularity     Flexible       Flexible       Continuous
Value recovery      No             Bin center     Exact
"""

print(summary_table)

print("\nQuick selection guide:")
print("  1. Check model: MAP/BSC? → Use Thermometer or Level")
print("  2. Need reversible? → Level (discrete) or FPE (smooth)")
print("  3. Ordinal only? → Thermometer")
print("  4. Continuous precision? → FPE (if using FHRR/HRR)")
print()

print("Next steps:")
print("  → 11_encoders_fractional_power.py - FPE deep dive")
print("  → 02_models_comparison.py - Choose the right model")
print("  → 20_app_text_classification.py - Apply encoders in practice")
print()
print("=" * 70)