remove unneeded embedding normalization

This commit is contained in:
johnpccd 2025-05-25 14:05:08 +02:00
parent 107ade464d
commit 8c0a868144
2 changed files with 4 additions and 60 deletions

View File

@ -74,14 +74,9 @@ class EmbeddingService:
# Get the image embedding vector
embedding_vector = embeddings.image_embedding
# Convert to numpy array and normalize
# Convert to numpy array - DO NOT normalize Vertex AI embeddings
embeddings_array = np.array(embedding_vector, dtype=np.float32)
# Normalize the feature vector
norm = np.linalg.norm(embeddings_array)
if norm > 0:
embeddings_array = embeddings_array / norm
logger.info(f"Generated image embeddings with shape: {embeddings_array.shape}")
return embeddings_array.tolist()
@ -112,14 +107,9 @@ class EmbeddingService:
# Get the text embedding vector
embedding_vector = embeddings.text_embedding
# Convert to numpy array and normalize
# Convert to numpy array - DO NOT normalize Vertex AI embeddings
embeddings_array = np.array(embedding_vector, dtype=np.float32)
# Normalize the feature vector
norm = np.linalg.norm(embeddings_array)
if norm > 0:
embeddings_array = embeddings_array / norm
logger.info(f"Generated text embeddings with shape: {embeddings_array.shape}")
return embeddings_array.tolist()
@ -175,29 +165,5 @@ class EmbeddingService:
logger.error(f"Error deleting embedding: {e}")
return False
def calculate_similarity(self, embedding1: List[float], embedding2: List[float]) -> float:
"""
Calculate cosine similarity between two embeddings
Args:
embedding1: First embedding
embedding2: Second embedding
Returns:
Cosine similarity (0-1)
"""
try:
# Convert to numpy arrays
vec1 = np.array(embedding1)
vec2 = np.array(embedding2)
# Calculate cosine similarity
similarity = np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
return float(similarity)
except Exception as e:
logger.error(f"Error calculating similarity: {e}")
raise
# Create a singleton service
embedding_service = EmbeddingService()

View File

@ -49,8 +49,8 @@ class VectorDatabaseService:
# Handle HTTPS setting properly
if https is None:
#TODO ensure SSL
https = False
#TODO ensure SSL
https = False
logger.info(f"Connecting to Qdrant at {self.host}:{self.port} (HTTPS: {https}, gRPC: {prefer_grpc})")
@ -331,26 +331,4 @@ class VectorDatabaseService:
return False
# Utility functions for vector operations
def normalize_vector(vector: List[float]) -> List[float]:
"""Normalize a vector to unit length."""
vector_array = np.array(vector)
norm = np.linalg.norm(vector_array)
if norm == 0:
return vector
return (vector_array / norm).tolist()
def cosine_similarity(vector1: List[float], vector2: List[float]) -> float:
"""Calculate cosine similarity between two vectors."""
v1 = np.array(vector1)
v2 = np.array(vector2)
dot_product = np.dot(v1, v2)
norm_v1 = np.linalg.norm(v1)
norm_v2 = np.linalg.norm(v2)
if norm_v1 == 0 or norm_v2 == 0:
return 0.0
return dot_product / (norm_v1 * norm_v2)