Position-Based Sequence Encoding

Topics: PositionBindingEncoder, order sensitivity, sequence similarity Time: 15 minutes Prerequisites: 00_quickstart.py, 01_basic_operations.py Related: 14_encoders_ngram.py, 15_encoders_trajectory.py

This example demonstrates the PositionBindingEncoder, which encodes sequences by binding each element to a unique position vector. This creates order-sensitive representations where different arrangements of the same elements produce distinct hypervectors.

Key concepts: - Position binding: bind(symbol_i, position_i) for each element - Order sensitivity: permutations are distinguishable - Sequence similarity: shared prefixes increase similarity - Reversible encoding: can decode to recover symbols

The PositionBindingEncoder is fundamental for text processing, time series, and any ordered data where position matters.

 25 from holovec import VSA
 26 from holovec.encoders import PositionBindingEncoder
 27
 28 print("=" * 70)
 29 print("Position-Based Sequence Encoding")
 30 print("=" * 70)
 31 print()
 32
 33 # ============================================================================
 34 # Demo 1: Basic Usage
 35 # ============================================================================
 36 print("=" * 70)
 37 print("Demo 1: Basic PositionBindingEncoder Usage")
 38 print("=" * 70)
 39
 40 # Create model
 41 model = VSA.create('MAP', dim=5000, seed=42)
 42
 43 # Create encoder
 44 encoder = PositionBindingEncoder(model, seed=42)
 45
 46 print(f"\nEncoder: {encoder}")
 47 print(f"Reversible: {encoder.is_reversible}")
 48 print(f"Compatible models: {encoder.compatible_models}")
 49
 50 # Encode some sequences
 51 sequences = [
 52     ['hello', 'world'],
 53     ['hello', 'world', '!'],
 54     ['goodbye', 'world'],
 55     ['world', 'hello']  # Reversed order
 56 ]
 57
 58 print("\nEncoding sequences:")
 59 encoded = []
 60 for seq in sequences:
 61     hv = encoder.encode(seq)
 62     encoded.append(hv)
 63     print(f"  {seq} → HV shape: {hv.shape}")
 64
 65 # Check similarities
 66 print("\nSimilarity Matrix:")
 67 for i, seq1 in enumerate(sequences):
 68     similarities = []
 69     for j, seq2 in enumerate(sequences):
 70         sim = float(model.similarity(encoded[i], encoded[j]))
 71         similarities.append(sim)
 72     seq_str = str(seq1)[:30].ljust(30)
 73     sims_str = "  ".join(f"{s:5.3f}" for s in similarities)
 74     print(f"{seq_str} | {sims_str}")
 75
 76 # Test decoding
 77 print("\nDecoding test (first 3 positions):")
 78 for i, seq in enumerate(sequences[:2]):  # Only decode first 2
 79     decoded = encoder.decode(encoded[i], max_positions=5, threshold=0.2)
 80     print(f"  Original: {seq}")
 81     print(f"  Decoded:  {decoded}\n")
 82
 83 print("Key observations:")
 84 print("  - Identical sequences have similarity ≈ 1.0")
 85 print("  - Shared prefix increases similarity")
 86 print("  - Different order creates different encodings")
 87 print("  - Decoding recovers first few symbols accurately")
 88
 89 # ============================================================================
 90 # Demo 2: Order Sensitivity
 91 # ============================================================================
 92 print("\n" + "=" * 70)
 93 print("Demo 2: Order Sensitivity")
 94 print("=" * 70)
 95
 96 # Test order sensitivity
 97 original = ['a', 'b', 'c', 'd']
 98 permutations = [
 99     (['a', 'b', 'c', 'd'], "Original"),
100     (['d', 'c', 'b', 'a'], "Reversed"),
101     (['b', 'c', 'd', 'a'], "Rotated 1"),
102     (['c', 'd', 'a', 'b'], "Rotated 2"),
103 ]
104
105 print("\nTesting order sensitivity:")
106 ref_hv = encoder.encode(original)
107
108 print(f"Reference: {original}")
109 print("\nSequence              | Similarity | Description")
110 print("-" * 60)
111
112 for seq, desc in permutations:
113     hv = encoder.encode(seq)
114     sim = float(model.similarity(ref_hv, hv))
115     seq_str = str(seq).ljust(20)
116     print(f"{seq_str} | {sim:10.3f} | {desc}")
117
118 print("\nKey observation:")
119 print("  - Different orders produce distinct encodings")
120 print("  - Even rotations are clearly distinguishable")
121
122 # ============================================================================
123 # Demo 3: Sequence Similarity
124 # ============================================================================
125 print("\n" + "=" * 70)
126 print("Demo 3: Sequence Similarity and Prefix Matching")
127 print("=" * 70)
128
129 # Test prefix matching
130 reference = ['the', 'quick', 'brown', 'fox', 'jumps']
131 variants = [
132     (['the', 'quick', 'brown', 'fox', 'jumps'], "Identical"),
133     (['the', 'quick', 'brown', 'fox'], "Prefix (4/5)"),
134     (['the', 'quick', 'brown'], "Prefix (3/5)"),
135     (['the', 'quick'], "Prefix (2/5)"),
136     (['the'], "Prefix (1/5)"),
137     (['the', 'slow', 'brown', 'fox', 'walks'], "1 match only"),
138     (['a', 'completely', 'different', 'sentence'], "No match"),
139 ]
140
141 print(f"\nReference: {reference}")
142 print("\nSequence                                | Similarity | Shared")
143 print("-" * 70)
144
145 ref_hv = encoder.encode(reference)
146
147 for seq, desc in variants:
148     hv = encoder.encode(seq)
149     sim = float(model.similarity(ref_hv, hv))
150     seq_str = str(seq)[:40].ljust(40)
151     print(f"{seq_str} | {sim:10.3f} | {desc}")
152
153 print("\nKey observations:")
154 print("  - Longer shared prefix → higher similarity")
155 print("  - Similarity degrades gracefully with differences")
156 print("  - Enables approximate sequence matching")
157
158 # ============================================================================
159 # Summary
160 # ============================================================================
161 print("\n" + "=" * 70)
162 print("Summary: PositionBindingEncoder Key Takeaways")
163 print("=" * 70)
164 print()
165 print("✓ Order-sensitive: Different arrangements are distinguishable")
166 print("✓ Prefix similarity: Shared prefixes increase similarity")
167 print("✓ Reversible: Can decode to recover original symbols")
168 print("✓ Foundation for text: Used in n-gram and language models")
169 print("✓ Works with all models: Compatible with MAP, FHRR, HRR, BSC, BSDC")
170 print()
171 print("Use cases:")
172 print("  - Text processing: words in sentences")
173 print("  - Time series: events in temporal order")
174 print("  - Structured data: ordered records")
175 print("  - Sequences: any data where position matters")
176 print()
177 print("Next steps:")
178 print("  → 14_encoders_ngram.py - N-gram text encoding")
179 print("  → 15_encoders_trajectory.py - Continuous sequences")
180 print("  → 20_app_text_classification.py - Apply to real text data")
181 print()
182 print("=" * 70)

Gallery generated by Sphinx-Gallery