Note
Go to the end to download the full example code.
Multi-Factor Unbinding and Factorization Methods¶
Topics: Factorization, multi-factor unbinding, composite structures, iterative cleanup Time: 15 minutes Prerequisites: 27_cleanup_strategies.py, 24_app_working_memory.py Related: 23_app_symbolic_reasoning.py
This example demonstrates advanced techniques for factorizing composite hypervectors - decomposing bundled representations back into their constituent factors. Mastering factorization is essential for information retrieval from distributed hyperdimensional representations.
Key concepts: - Bundle factorization: Decompose bundled items (A ⊕ B ⊕ C → A, B, C) - Binding factorization: Decompose bound structures (A ⊗ B ⊗ C → A, B, C) - Iterative unbinding: Sequential factor extraction - Noise accumulation: How errors compound during factorization - Practical strategies: When and how to factorize
Factorization enables querying and retrieving information from complex composite representations built through binding and bundling operations.
26 import numpy as np
27 from holovec import VSA
28 from holovec.utils.cleanup import BruteForceCleanup, ResonatorCleanup
29
30 print("=" * 70)
31 print("Multi-Factor Unbinding and Factorization")
32 print("=" * 70)
33 print()
34
35 # Create model
36 model = VSA.create('FHRR', dim=10000, seed=42)
37
38 # Create cleanup strategy
39 cleanup = BruteForceCleanup()
40
41 # ============================================================================
42 # Demo 1: Bundle Factorization - Recovering Bundled Items
43 # ============================================================================
44 print("=" * 70)
45 print("Demo 1: Bundle Factorization")
46 print("=" * 70)
47
48 print("\nScenario: Bundle of 4 items")
49
50 # Create codebook
51 items = {}
52 for i in range(10):
53 items[f"item_{i}"] = model.random(seed=100 + i)
54
55 # Bundle 4 specific items
56 bundled = [items["item_0"], items["item_2"], items["item_5"], items["item_7"]]
57 bundle = model.bundle(bundled)
58
59 print(" Bundle: item_0 ⊕ item_2 ⊕ item_5 ⊕ item_7")
60
61 # Factorize to recover all items
62 print("\n" + "=" * 70)
63 print("Factorizing bundle:")
64 print("=" * 70)
65
66 labels, sims = cleanup.factorize(bundle, items, model, n_factors=6)
67
68 print("\nRecovered factors (top 6):")
69 for i, (label, sim) in enumerate(zip(labels, sims), 1):
70 in_bundle = "✓" if label in ["item_0", "item_2", "item_5", "item_7"] else "✗"
71 print(f" {i}. {label:10s}: {sim:.3f} [{in_bundle}]")
72
73 # Calculate recall
74 correct_in_top4 = sum(1 for l in labels[:4] if l in ["item_0", "item_2", "item_5", "item_7"])
75 recall = correct_in_top4 / 4.0
76
77 print(f"\nRecall@4: {recall:.2f} ({correct_in_top4}/4 factors recovered)")
78
79 print("\nKey observation:")
80 print(" - Top factors are the original bundled items")
81 print(" - Similarity degrades but items still identifiable")
82 print(" - Ideal for 'what's in this bundle?' queries")
83
84 # ============================================================================
85 # Demo 2: Binding Chain Factorization - Sequential Unbinding
86 # ============================================================================
87 print("\n" + "=" * 70)
88 print("Demo 2: Binding Chain Factorization")
89 print("=" * 70)
90
91 print("\nScenario: Chain of bindings (A ⊗ B ⊗ C)")
92
93 # Create binding chain: A ⊗ B ⊗ C
94 A = items["item_0"]
95 B = items["item_1"]
96 C = items["item_2"]
97
98 chain = model.bind(model.bind(A, B), C)
99
100 print(" Chain: item_0 ⊗ item_1 ⊗ item_2")
101
102 # Method 1: Sequential unbinding (if you know the order)
103 print("\n" + "=" * 70)
104 print("Method 1: Sequential unbinding (knowing order)")
105 print("=" * 70)
106
107 # Unbind C to get (A ⊗ B)
108 step1 = model.unbind(chain, C)
109 print("\n Step 1: Unbind item_2")
110 label1, sim1 = cleanup.cleanup(step1, {k: model.bind(v, items["item_1"]) for k, v in items.items()}, model)
111 print(f" Result ≈ (item_0 ⊗ item_1), found: {label1} (sim={sim1:.3f})")
112
113 # Unbind B to get A
114 step2 = model.unbind(step1, B)
115 print("\n Step 2: Unbind item_1")
116 label2, sim2 = cleanup.cleanup(step2, items, model)
117 print(f" Result ≈ item_0, found: {label2} (sim={sim2:.3f})")
118
119 print("\nKey observation:")
120 print(" - Sequential unbinding requires knowing binding order")
121 print(" - Each unbind step recovers one factor")
122 print(" - Most reliable when order is known")
123
124 # ============================================================================
125 # Demo 3: Mixed Binding and Bundling - Structured Factorization
126 # ============================================================================
127 print("\n" + "=" * 70)
128 print("Demo 3: Mixed Operations - Structured Factorization")
129 print("=" * 70)
130
131 print("\nScenario: Role-filler structure with multiple bindings")
132 print(" Structure: (role_A ⊗ filler_1) ⊕ (role_B ⊗ filler_2)")
133
134 # Create roles and fillers
135 role_A = model.random(seed=200)
136 role_B = model.random(seed=201)
137 filler_1 = items["item_3"]
138 filler_2 = items["item_4"]
139
140 # Create structure
141 struct = model.bundle([
142 model.bind(role_A, filler_1),
143 model.bind(role_B, filler_2)
144 ])
145
146 print("\n role_A ⊗ item_3")
147 print(" role_B ⊗ item_4")
148 print(" → bundled together")
149
150 # Query by role
151 print("\n" + "=" * 70)
152 print("Query: What is bound to role_A?")
153 print("=" * 70)
154
155 result_A = model.unbind(struct, role_A)
156 label_A, sim_A = cleanup.cleanup(result_A, items, model)
157
158 print(f"\n Unbind role_A: {label_A} (similarity={sim_A:.3f})")
159 print(f" Expected: item_3")
160
161 # Query by role B
162 print("\n" + "=" * 70)
163 print("Query: What is bound to role_B?")
164 print("=" * 70)
165
166 result_B = model.unbind(struct, role_B)
167 label_B, sim_B = cleanup.cleanup(result_B, items, model)
168
169 print(f"\n Unbind role_B: {label_B} (similarity={sim_B:.3f})")
170 print(f" Expected: item_4")
171
172 print("\nKey observation:")
173 print(" - Can query structure by role (dimension)")
174 print(" - Unbinding isolates specific role-filler pairs")
175 print(" - Essential pattern for structured retrieval")
176
177 # ============================================================================
178 # Demo 4: Noise Accumulation in Factorization
179 # ============================================================================
180 print("\n" + "=" * 70)
181 print("Demo 4: Noise Accumulation During Factorization")
182 print("=" * 70)
183
184 print("\nTesting: Bundle size vs. factorization accuracy")
185
186 # Test different bundle sizes
187 sizes = [2, 4, 6, 8, 10]
188
189 print(f"\n{'Size':>5s} | {'Recall@Top':>12s} | {'Avg Sim':>10s} | {'Correct':>10s}")
190 print("-" * 45)
191
192 for size in sizes:
193 # Create bundle of 'size' items
194 selected = [items[f"item_{i}"] for i in range(size)]
195 test_bundle = model.bundle(selected)
196
197 # Factorize
198 labels_test, sims_test = cleanup.factorize(test_bundle, items, model, n_factors=size)
199
200 # Calculate metrics
201 expected = {f"item_{i}" for i in range(size)}
202 correct_count = sum(1 for l in labels_test[:size] if l in expected)
203 recall = correct_count / size
204 avg_sim = np.mean(sims_test[:size])
205
206 print(f"{size:>5d} | {recall:>12.2f} | {avg_sim:>10.3f} | {correct_count:>10d}/{size}")
207
208 print("\nKey observation:")
209 print(" - Accuracy decreases with more bundled items")
210 print(" - Similarities degrade due to interference")
211 print(" - Practical limit: ~5-7 factors for reliable recovery")
212 print(" - Mirrors human working memory capacity!")
213
214 # ============================================================================
215 # Demo 5: Practical Application - Query Decomposition
216 # ============================================================================
217 print("\n" + "=" * 70)
218 print("Demo 5: Practical Application - Complex Query")
219 print("=" * 70)
220
221 print("\nScenario: Multi-attribute product search")
222 print(" Query: color=red AND category=laptop AND price=affordable")
223
224 # Define attributes
225 COLOR = model.random(seed=300)
226 CATEGORY = model.random(seed=301)
227 PRICE = model.random(seed=302)
228
229 # Define values
230 red = model.random(seed=400)
231 laptop = model.random(seed=401)
232 affordable = model.random(seed=402)
233
234 # Create query
235 query = model.bundle([
236 model.bind(COLOR, red),
237 model.bind(CATEGORY, laptop),
238 model.bind(PRICE, affordable)
239 ])
240
241 print("\n Query HV created (color ⊗ red) ⊕ (category ⊗ laptop) ⊕ (price ⊗ affordable)")
242
243 # Decompose query to understand it
244 print("\n" + "=" * 70)
245 print("Decomposing query attributes:")
246 print("=" * 70)
247
248 # Create attribute codebook
249 attributes = {
250 "COLOR": COLOR,
251 "CATEGORY": CATEGORY,
252 "PRICE": PRICE
253 }
254
255 # Factorize to find which attributes are in query
256 attr_labels, attr_sims = cleanup.factorize(query, attributes, model, n_factors=3)
257
258 print("\nQuery contains these attributes:")
259 for label, sim in zip(attr_labels, attr_sims):
260 print(f" {label:10s}: {sim:.3f}")
261
262 # Extract values for each attribute
263 print("\n" + "=" * 70)
264 print("Extracting attribute values:")
265 print("=" * 70)
266
267 values = {
268 "red": red,
269 "blue": model.random(seed=403),
270 "laptop": laptop,
271 "phone": model.random(seed=404),
272 "affordable": affordable,
273 "expensive": model.random(seed=405)
274 }
275
276 # Extract color value
277 color_val = model.unbind(query, COLOR)
278 color_label, color_sim = cleanup.cleanup(color_val, values, model)
279 print(f"\n COLOR value: {color_label} (similarity={color_sim:.3f})")
280
281 # Extract category value
282 category_val = model.unbind(query, CATEGORY)
283 category_label, category_sim = cleanup.cleanup(category_val, values, model)
284 print(f" CATEGORY value: {category_label} (similarity={category_sim:.3f})")
285
286 # Extract price value
287 price_val = model.unbind(query, PRICE)
288 price_label, price_sim = cleanup.cleanup(price_val, values, model)
289 print(f" PRICE value: {price_label} (similarity={price_sim:.3f})")
290
291 print("\nKey observation:")
292 print(" - Can decompose complex queries into attributes + values")
293 print(" - Enables query understanding and refinement")
294 print(" - Practical for search engines and databases")
295
296 # ============================================================================
297 # Demo 6: Best Practices for Factorization
298 # ============================================================================
299 print("\n" + "=" * 70)
300 print("Demo 6: Factorization Best Practices")
301 print("=" * 70)
302
303 print("\n✓ DO:")
304 print(" - Factorize bundles with ≤ 7 items for best results")
305 print(" - Use cleanup strategies (BruteForce or Resonator)")
306 print(" - Provide comprehensive codebook for cleanup")
307 print(" - Check similarity scores to assess confidence")
308 print(" - Sequential unbinding when order is known")
309 print()
310 print("✗ DON'T:")
311 print(" - Bundle > 10 items if you need to factorize later")
312 print(" - Expect perfect recovery (always approximate)")
313 print(" - Unbind without cleanup (results are noisy)")
314 print(" - Ignore similarity scores (they indicate confidence)")
315 print(" - Chain too many unbind operations (noise compounds)")
316 print()
317 print("Strategies by use case:")
318 print()
319 print(" Bundle factorization:")
320 print(" - Use: factorize() method")
321 print(" - Returns: top-k most similar items")
322 print(" - Best for: 'what's in this bundle?' queries")
323 print()
324 print(" Binding chain factorization:")
325 print(" - Use: sequential unbind() + cleanup()")
326 print(" - Requires: knowing binding order")
327 print(" - Best for: structured data with known schema")
328 print()
329 print(" Mixed operations:")
330 print(" - Use: unbind() by dimension + cleanup()")
331 print(" - Pattern: role-filler binding in bundles")
332 print(" - Best for: attribute-value structures")
333 print()
334
335 # ============================================================================
336 # Summary
337 # ============================================================================
338 print("=" * 70)
339 print("Summary: Factorization Key Takeaways")
340 print("=" * 70)
341 print()
342 print("✓ Bundle factorization: Decompose A ⊕ B ⊕ C → A, B, C")
343 print("✓ Binding chains: Sequential unbinding with known order")
344 print("✓ Mixed structures: Combine unbinding + cleanup")
345 print("✓ Noise accumulation: Accuracy degrades with complexity")
346 print("✓ Practical limit: ~5-7 factors for reliable recovery")
347 print()
348 print("Core factorization pattern:")
349 print(" 1. Create comprehensive codebook")
350 print(" 2. Call cleanup.factorize(composite, codebook, model, n_factors=k)")
351 print(" 3. Check similarity scores for confidence")
352 print(" 4. Use top-k results as recovered factors")
353 print()
354 print("When to factorize:")
355 print(" - Retrieving bundled items from working memory")
356 print(" - Decomposing composite queries")
357 print(" - Understanding structured representations")
358 print(" - Multi-attribute search and filtering")
359 print()
360 print("Complexity considerations:")
361 print(" - Bundle of 3 items: Easy, high accuracy")
362 print(" - Bundle of 5-7 items: Moderate, good accuracy")
363 print(" - Bundle of 10+ items: Hard, degraded accuracy")
364 print(" - Deep binding chains: Noise compounds exponentially")
365 print()
366 print("Next steps:")
367 print(" → Apply factorization in your domain")
368 print(" → Combine with 27_cleanup_strategies.py techniques")
369 print(" → Use in 24_app_working_memory.py patterns")
370 print()
371 print("=" * 70)