Note
Go to the end to download the full example code.
Position-Based Sequence Encoding¶
Topics: PositionBindingEncoder, order sensitivity, sequence similarity Time: 15 minutes Prerequisites: 00_quickstart.py, 01_basic_operations.py Related: 14_encoders_ngram.py, 15_encoders_trajectory.py
This example demonstrates the PositionBindingEncoder, which encodes sequences by binding each element to a unique position vector. This creates order-sensitive representations where different arrangements of the same elements produce distinct hypervectors.
Key concepts: - Position binding: bind(symbol_i, position_i) for each element - Order sensitivity: permutations are distinguishable - Sequence similarity: shared prefixes increase similarity - Reversible encoding: can decode to recover symbols
The PositionBindingEncoder is fundamental for text processing, time series, and any ordered data where position matters.
25 from holovec import VSA
26 from holovec.encoders import PositionBindingEncoder
27
28 print("=" * 70)
29 print("Position-Based Sequence Encoding")
30 print("=" * 70)
31 print()
32
33 # ============================================================================
34 # Demo 1: Basic Usage
35 # ============================================================================
36 print("=" * 70)
37 print("Demo 1: Basic PositionBindingEncoder Usage")
38 print("=" * 70)
39
40 # Create model
41 model = VSA.create('MAP', dim=5000, seed=42)
42
43 # Create encoder
44 encoder = PositionBindingEncoder(model, seed=42)
45
46 print(f"\nEncoder: {encoder}")
47 print(f"Reversible: {encoder.is_reversible}")
48 print(f"Compatible models: {encoder.compatible_models}")
49
50 # Encode some sequences
51 sequences = [
52 ['hello', 'world'],
53 ['hello', 'world', '!'],
54 ['goodbye', 'world'],
55 ['world', 'hello'] # Reversed order
56 ]
57
58 print("\nEncoding sequences:")
59 encoded = []
60 for seq in sequences:
61 hv = encoder.encode(seq)
62 encoded.append(hv)
63 print(f" {seq} → HV shape: {hv.shape}")
64
65 # Check similarities
66 print("\nSimilarity Matrix:")
67 for i, seq1 in enumerate(sequences):
68 similarities = []
69 for j, seq2 in enumerate(sequences):
70 sim = float(model.similarity(encoded[i], encoded[j]))
71 similarities.append(sim)
72 seq_str = str(seq1)[:30].ljust(30)
73 sims_str = " ".join(f"{s:5.3f}" for s in similarities)
74 print(f"{seq_str} | {sims_str}")
75
76 # Test decoding
77 print("\nDecoding test (first 3 positions):")
78 for i, seq in enumerate(sequences[:2]): # Only decode first 2
79 decoded = encoder.decode(encoded[i], max_positions=5, threshold=0.2)
80 print(f" Original: {seq}")
81 print(f" Decoded: {decoded}\n")
82
83 print("Key observations:")
84 print(" - Identical sequences have similarity ≈ 1.0")
85 print(" - Shared prefix increases similarity")
86 print(" - Different order creates different encodings")
87 print(" - Decoding recovers first few symbols accurately")
88
89 # ============================================================================
90 # Demo 2: Order Sensitivity
91 # ============================================================================
92 print("\n" + "=" * 70)
93 print("Demo 2: Order Sensitivity")
94 print("=" * 70)
95
96 # Test order sensitivity
97 original = ['a', 'b', 'c', 'd']
98 permutations = [
99 (['a', 'b', 'c', 'd'], "Original"),
100 (['d', 'c', 'b', 'a'], "Reversed"),
101 (['b', 'c', 'd', 'a'], "Rotated 1"),
102 (['c', 'd', 'a', 'b'], "Rotated 2"),
103 ]
104
105 print("\nTesting order sensitivity:")
106 ref_hv = encoder.encode(original)
107
108 print(f"Reference: {original}")
109 print("\nSequence | Similarity | Description")
110 print("-" * 60)
111
112 for seq, desc in permutations:
113 hv = encoder.encode(seq)
114 sim = float(model.similarity(ref_hv, hv))
115 seq_str = str(seq).ljust(20)
116 print(f"{seq_str} | {sim:10.3f} | {desc}")
117
118 print("\nKey observation:")
119 print(" - Different orders produce distinct encodings")
120 print(" - Even rotations are clearly distinguishable")
121
122 # ============================================================================
123 # Demo 3: Sequence Similarity
124 # ============================================================================
125 print("\n" + "=" * 70)
126 print("Demo 3: Sequence Similarity and Prefix Matching")
127 print("=" * 70)
128
129 # Test prefix matching
130 reference = ['the', 'quick', 'brown', 'fox', 'jumps']
131 variants = [
132 (['the', 'quick', 'brown', 'fox', 'jumps'], "Identical"),
133 (['the', 'quick', 'brown', 'fox'], "Prefix (4/5)"),
134 (['the', 'quick', 'brown'], "Prefix (3/5)"),
135 (['the', 'quick'], "Prefix (2/5)"),
136 (['the'], "Prefix (1/5)"),
137 (['the', 'slow', 'brown', 'fox', 'walks'], "1 match only"),
138 (['a', 'completely', 'different', 'sentence'], "No match"),
139 ]
140
141 print(f"\nReference: {reference}")
142 print("\nSequence | Similarity | Shared")
143 print("-" * 70)
144
145 ref_hv = encoder.encode(reference)
146
147 for seq, desc in variants:
148 hv = encoder.encode(seq)
149 sim = float(model.similarity(ref_hv, hv))
150 seq_str = str(seq)[:40].ljust(40)
151 print(f"{seq_str} | {sim:10.3f} | {desc}")
152
153 print("\nKey observations:")
154 print(" - Longer shared prefix → higher similarity")
155 print(" - Similarity degrades gracefully with differences")
156 print(" - Enables approximate sequence matching")
157
158 # ============================================================================
159 # Summary
160 # ============================================================================
161 print("\n" + "=" * 70)
162 print("Summary: PositionBindingEncoder Key Takeaways")
163 print("=" * 70)
164 print()
165 print("✓ Order-sensitive: Different arrangements are distinguishable")
166 print("✓ Prefix similarity: Shared prefixes increase similarity")
167 print("✓ Reversible: Can decode to recover original symbols")
168 print("✓ Foundation for text: Used in n-gram and language models")
169 print("✓ Works with all models: Compatible with MAP, FHRR, HRR, BSC, BSDC")
170 print()
171 print("Use cases:")
172 print(" - Text processing: words in sentences")
173 print(" - Time series: events in temporal order")
174 print(" - Structured data: ordered records")
175 print(" - Sequences: any data where position matters")
176 print()
177 print("Next steps:")
178 print(" → 14_encoders_ngram.py - N-gram text encoding")
179 print(" → 15_encoders_trajectory.py - Continuous sequences")
180 print(" → 20_app_text_classification.py - Apply to real text data")
181 print()
182 print("=" * 70)