Multi-Factor Unbinding and Factorization Methods¶

Topics: Factorization, multi-factor unbinding, composite structures, iterative cleanup Time: 15 minutes Prerequisites: 27_cleanup_strategies.py, 24_app_working_memory.py Related: 23_app_symbolic_reasoning.py
This example demonstrates advanced techniques for factorizing composite hypervectors - decomposing bundled representations back into their constituent factors. Mastering factorization is essential for information retrieval from distributed hyperdimensional representations.
Key concepts: - Bundle factorization: Decompose bundled items (A ⊕ B ⊕ C → A, B, C) - Binding factorization: Decompose bound structures (A ⊗ B ⊗ C → A, B, C) - Iterative unbinding: Sequential factor extraction - Noise accumulation: How errors compound during factorization - Practical strategies: When and how to factorize
Factorization enables querying and retrieving information from complex composite representations built through binding and bundling operations.
 import numpy as np
 from holovec import VSA
 from holovec.utils.cleanup import BruteForceCleanup, ResonatorCleanup

 print("=" * 70)
 print("Multi-Factor Unbinding and Factorization")
 print("=" * 70)
 print()

 # Create model
 model = VSA.create('FHRR', dim=10000, seed=42)

 # Create cleanup strategy
 cleanup = BruteForceCleanup()

 # ============================================================================
 # Demo 1: Bundle Factorization - Recovering Bundled Items
 # ============================================================================
 print("=" * 70)
 print("Demo 1: Bundle Factorization")
 print("=" * 70)

 print("\nScenario: Bundle of 4 items")

 # Create codebook
 items = {}
 for i in range(10):
     items[f"item_{i}"] = model.random(seed=100 + i)

 # Bundle 4 specific items
 bundled = [items["item_0"], items["item_2"], items["item_5"], items["item_7"]]
 bundle = model.bundle(bundled)

 print("  Bundle: item_0 ⊕ item_2 ⊕ item_5 ⊕ item_7")

 # Factorize to recover all items
 print("\n" + "=" * 70)
 print("Factorizing bundle:")
 print("=" * 70)

 labels, sims = cleanup.factorize(bundle, items, model, n_factors=6)

 print("\nRecovered factors (top 6):")
 for i, (label, sim) in enumerate(zip(labels, sims), 1):
     in_bundle = "✓" if label in ["item_0", "item_2", "item_5", "item_7"] else "✗"
     print(f"  {i}. {label:10s}: {sim:.3f}  [{in_bundle}]")

 # Calculate recall
 correct_in_top4 = sum(1 for l in labels[:4] if l in ["item_0", "item_2", "item_5", "item_7"])
 recall = correct_in_top4 / 4.0

 print(f"\nRecall@4: {recall:.2f} ({correct_in_top4}/4 factors recovered)")

 print("\nKey observation:")
 print("  - Top factors are the original bundled items")
 print("  - Similarity degrades but items still identifiable")
 print("  - Ideal for 'what's in this bundle?' queries")

 # ============================================================================
 # Demo 2: Binding Chain Factorization - Sequential Unbinding
 # ============================================================================
 print("\n" + "=" * 70)
 print("Demo 2: Binding Chain Factorization")
 print("=" * 70)

 print("\nScenario: Chain of bindings (A ⊗ B ⊗ C)")

 # Create binding chain: A ⊗ B ⊗ C
 A = items["item_0"]
 B = items["item_1"]
 C = items["item_2"]

 chain = model.bind(model.bind(A, B), C)

 print("  Chain: item_0 ⊗ item_1 ⊗ item_2")

 # Method 1: Sequential unbinding (if you know the order)
 print("\n" + "=" * 70)
 print("Method 1: Sequential unbinding (knowing order)")
 print("=" * 70)

 # Unbind C to get (A ⊗ B)
 step1 = model.unbind(chain, C)
 print("\n  Step 1: Unbind item_2")
 label1, sim1 = cleanup.cleanup(step1, {k: model.bind(v, items["item_1"]) for k, v in items.items()}, model)
 print(f"    Result ≈ (item_0 ⊗ item_1), found: {label1} (sim={sim1:.3f})")

 # Unbind B to get A
 step2 = model.unbind(step1, B)
 print("\n  Step 2: Unbind item_1")
 label2, sim2 = cleanup.cleanup(step2, items, model)
 print(f"    Result ≈ item_0, found: {label2} (sim={sim2:.3f})")

 print("\nKey observation:")
 print("  - Sequential unbinding requires knowing binding order")
 print("  - Each unbind step recovers one factor")
 print("  - Most reliable when order is known")

 # ============================================================================
 # Demo 3: Mixed Binding and Bundling - Structured Factorization
 # ============================================================================
 print("\n" + "=" * 70)
 print("Demo 3: Mixed Operations - Structured Factorization")
 print("=" * 70)

 print("\nScenario: Role-filler structure with multiple bindings")
 print("  Structure: (role_A ⊗ filler_1) ⊕ (role_B ⊗ filler_2)")

 # Create roles and fillers
 role_A = model.random(seed=200)
 role_B = model.random(seed=201)
 filler_1 = items["item_3"]
 filler_2 = items["item_4"]

 # Create structure
 struct = model.bundle([
     model.bind(role_A, filler_1),
     model.bind(role_B, filler_2)
 ])

 print("\n  role_A ⊗ item_3")
 print("  role_B ⊗ item_4")
 print("  → bundled together")

 # Query by role
 print("\n" + "=" * 70)
 print("Query: What is bound to role_A?")
 print("=" * 70)

 result_A = model.unbind(struct, role_A)
 label_A, sim_A = cleanup.cleanup(result_A, items, model)

 print(f"\n  Unbind role_A: {label_A} (similarity={sim_A:.3f})")
 print(f"  Expected: item_3")

 # Query by role B
 print("\n" + "=" * 70)
 print("Query: What is bound to role_B?")
 print("=" * 70)

 result_B = model.unbind(struct, role_B)
 label_B, sim_B = cleanup.cleanup(result_B, items, model)

 print(f"\n  Unbind role_B: {label_B} (similarity={sim_B:.3f})")
 print(f"  Expected: item_4")

 print("\nKey observation:")
 print("  - Can query structure by role (dimension)")
 print("  - Unbinding isolates specific role-filler pairs")
 print("  - Essential pattern for structured retrieval")

 # ============================================================================
 # Demo 4: Noise Accumulation in Factorization
 # ============================================================================
 print("\n" + "=" * 70)
 print("Demo 4: Noise Accumulation During Factorization")
 print("=" * 70)

 print("\nTesting: Bundle size vs. factorization accuracy")

 # Test different bundle sizes
 sizes = [2, 4, 6, 8, 10]

 print(f"\n{'Size':>5s} | {'Recall@Top':>12s} | {'Avg Sim':>10s} | {'Correct':>10s}")
 print("-" * 45)

 for size in sizes:
     # Create bundle of 'size' items
     selected = [items[f"item_{i}"] for i in range(size)]
     test_bundle = model.bundle(selected)

     # Factorize
     labels_test, sims_test = cleanup.factorize(test_bundle, items, model, n_factors=size)

     # Calculate metrics
     expected = {f"item_{i}" for i in range(size)}
     correct_count = sum(1 for l in labels_test[:size] if l in expected)
     recall = correct_count / size
     avg_sim = np.mean(sims_test[:size])

     print(f"{size:>5d} | {recall:>12.2f} | {avg_sim:>10.3f} | {correct_count:>10d}/{size}")

 print("\nKey observation:")
 print("  - Accuracy decreases with more bundled items")
 print("  - Similarities degrade due to interference")
 print("  - Practical limit: ~5-7 factors for reliable recovery")
 print("  - Mirrors human working memory capacity!")

 # ============================================================================
 # Demo 5: Practical Application - Query Decomposition
 # ============================================================================
 print("\n" + "=" * 70)
 print("Demo 5: Practical Application - Complex Query")
 print("=" * 70)

 print("\nScenario: Multi-attribute product search")
 print("  Query: color=red AND category=laptop AND price=affordable")

 # Define attributes
 COLOR = model.random(seed=300)
 CATEGORY = model.random(seed=301)
 PRICE = model.random(seed=302)

 # Define values
 red = model.random(seed=400)
 laptop = model.random(seed=401)
 affordable = model.random(seed=402)

 # Create query
 query = model.bundle([
     model.bind(COLOR, red),
     model.bind(CATEGORY, laptop),
     model.bind(PRICE, affordable)
 ])

 print("\n  Query HV created (color ⊗ red) ⊕ (category ⊗ laptop) ⊕ (price ⊗ affordable)")

 # Decompose query to understand it
 print("\n" + "=" * 70)
 print("Decomposing query attributes:")
 print("=" * 70)

 # Create attribute codebook
 attributes = {
     "COLOR": COLOR,
     "CATEGORY": CATEGORY,
     "PRICE": PRICE
 }

 # Factorize to find which attributes are in query
 attr_labels, attr_sims = cleanup.factorize(query, attributes, model, n_factors=3)

 print("\nQuery contains these attributes:")
 for label, sim in zip(attr_labels, attr_sims):
     print(f"  {label:10s}: {sim:.3f}")

 # Extract values for each attribute
 print("\n" + "=" * 70)
 print("Extracting attribute values:")
 print("=" * 70)

 values = {
     "red": red,
     "blue": model.random(seed=403),
     "laptop": laptop,
     "phone": model.random(seed=404),
     "affordable": affordable,
     "expensive": model.random(seed=405)
 }

 # Extract color value
 color_val = model.unbind(query, COLOR)
 color_label, color_sim = cleanup.cleanup(color_val, values, model)
 print(f"\n  COLOR value: {color_label} (similarity={color_sim:.3f})")

 # Extract category value
 category_val = model.unbind(query, CATEGORY)
 category_label, category_sim = cleanup.cleanup(category_val, values, model)
 print(f"  CATEGORY value: {category_label} (similarity={category_sim:.3f})")

 # Extract price value
 price_val = model.unbind(query, PRICE)
 price_label, price_sim = cleanup.cleanup(price_val, values, model)
 print(f"  PRICE value: {price_label} (similarity={price_sim:.3f})")

 print("\nKey observation:")
 print("  - Can decompose complex queries into attributes + values")
 print("  - Enables query understanding and refinement")
 print("  - Practical for search engines and databases")

 # ============================================================================
 # Demo 6: Best Practices for Factorization
 # ============================================================================
 print("\n" + "=" * 70)
 print("Demo 6: Factorization Best Practices")
 print("=" * 70)

 print("\n✓ DO:")
 print("  - Factorize bundles with ≤ 7 items for best results")
 print("  - Use cleanup strategies (BruteForce or Resonator)")
 print("  - Provide comprehensive codebook for cleanup")
 print("  - Check similarity scores to assess confidence")
 print("  - Sequential unbinding when order is known")
 print()
 print("✗ DON'T:")
 print("  - Bundle > 10 items if you need to factorize later")
 print("  - Expect perfect recovery (always approximate)")
 print("  - Unbind without cleanup (results are noisy)")
 print("  - Ignore similarity scores (they indicate confidence)")
 print("  - Chain too many unbind operations (noise compounds)")
 print()
 print("Strategies by use case:")
 print()
 print("  Bundle factorization:")
 print("    - Use: factorize() method")
 print("    - Returns: top-k most similar items")
 print("    - Best for: 'what's in this bundle?' queries")
 print()
 print("  Binding chain factorization:")
 print("    - Use: sequential unbind() + cleanup()")
 print("    - Requires: knowing binding order")
 print("    - Best for: structured data with known schema")
 print()
 print("  Mixed operations:")
 print("    - Use: unbind() by dimension + cleanup()")
 print("    - Pattern: role-filler binding in bundles")
 print("    - Best for: attribute-value structures")
 print()

 # ============================================================================
 # Summary
 # ============================================================================
 print("=" * 70)
 print("Summary: Factorization Key Takeaways")
 print("=" * 70)
 print()
 print("✓ Bundle factorization: Decompose A ⊕ B ⊕ C → A, B, C")
 print("✓ Binding chains: Sequential unbinding with known order")
 print("✓ Mixed structures: Combine unbinding + cleanup")
 print("✓ Noise accumulation: Accuracy degrades with complexity")
 print("✓ Practical limit: ~5-7 factors for reliable recovery")
 print()
 print("Core factorization pattern:")
 print("  1. Create comprehensive codebook")
 print("  2. Call cleanup.factorize(composite, codebook, model, n_factors=k)")
 print("  3. Check similarity scores for confidence")
 print("  4. Use top-k results as recovered factors")
 print()
 print("When to factorize:")
 print("  - Retrieving bundled items from working memory")
 print("  - Decomposing composite queries")
 print("  - Understanding structured representations")
 print("  - Multi-attribute search and filtering")
 print()
 print("Complexity considerations:")
 print("  - Bundle of 3 items: Easy, high accuracy")
 print("  - Bundle of 5-7 items: Moderate, good accuracy")
 print("  - Bundle of 10+ items: Hard, degraded accuracy")
 print("  - Deep binding chains: Noise compounds exponentially")
 print()
 print("Next steps:")
 print("  → Apply factorization in your domain")
 print("  → Combine with 27_cleanup_strategies.py techniques")
 print("  → Use in 24_app_working_memory.py patterns")
 print()
 print("=" * 70)
Gallery generated by Sphinx-Gallery