Skip to content

Commit 6913b29

Browse files
committed
fix(stores): Actually check a vector is a unit vector/normalized
Instead of just summing the components to see if they equal 1.0, take the actual magnitude/p-norm of the vector and check that is approximately 1.0. Note that this shouldn't change the order of results except in edge cases if I am too lax with the precision of the equality comparison. However it should improve performance for normalized vectors which were being misclassified.
1 parent b264a91 commit 6913b29

File tree

1 file changed

+9
-5
lines changed

1 file changed

+9
-5
lines changed

backend/go/stores/store.go

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -311,12 +311,16 @@ func (s *Store) StoresGet(opts *pb.StoresGetOptions) (pb.StoresGetResult, error)
311311
}
312312

313313
func isNormalized(k []float32) bool {
314-
var sum float32
314+
var sum float64
315+
315316
for _, v := range k {
316-
sum += v
317+
v64 := float64(v)
318+
sum += v64*v64
317319
}
318320

319-
return sum == 1.0
321+
s := math.Sqrt(sum)
322+
323+
return s >= 0.99 && s <= 1.01
320324
}
321325

322326
// TODO: This we could replace with handwritten SIMD code
@@ -328,7 +332,7 @@ func normalizedCosineSimilarity(k1, k2 []float32) float32 {
328332
dot += k1[i] * k2[i]
329333
}
330334

331-
assert(dot >= -1 && dot <= 1, fmt.Sprintf("dot = %f", dot))
335+
assert(dot >= -1.01 && dot <= 1.01, fmt.Sprintf("dot = %f", dot))
332336

333337
// 2.0 * (1.0 - dot) would be the Euclidean distance
334338
return dot
@@ -418,7 +422,7 @@ func cosineSimilarity(k1, k2 []float32, mag1 float64) float32 {
418422

419423
sim := float32(dot / (mag1 * math.Sqrt(mag2)))
420424

421-
assert(sim >= -1 && sim <= 1, fmt.Sprintf("sim = %f", sim))
425+
assert(sim >= -1.01 && sim <= 1.01, fmt.Sprintf("sim = %f", sim))
422426

423427
return sim
424428
}

0 commit comments

Comments
 (0)