remove unneeded embedding normalization
This commit is contained in:
parent
107ade464d
commit
8c0a868144
@ -74,14 +74,9 @@ class EmbeddingService:
|
|||||||
# Get the image embedding vector
|
# Get the image embedding vector
|
||||||
embedding_vector = embeddings.image_embedding
|
embedding_vector = embeddings.image_embedding
|
||||||
|
|
||||||
# Convert to numpy array and normalize
|
# Convert to numpy array - DO NOT normalize Vertex AI embeddings
|
||||||
embeddings_array = np.array(embedding_vector, dtype=np.float32)
|
embeddings_array = np.array(embedding_vector, dtype=np.float32)
|
||||||
|
|
||||||
# Normalize the feature vector
|
|
||||||
norm = np.linalg.norm(embeddings_array)
|
|
||||||
if norm > 0:
|
|
||||||
embeddings_array = embeddings_array / norm
|
|
||||||
|
|
||||||
logger.info(f"Generated image embeddings with shape: {embeddings_array.shape}")
|
logger.info(f"Generated image embeddings with shape: {embeddings_array.shape}")
|
||||||
return embeddings_array.tolist()
|
return embeddings_array.tolist()
|
||||||
|
|
||||||
@ -112,14 +107,9 @@ class EmbeddingService:
|
|||||||
# Get the text embedding vector
|
# Get the text embedding vector
|
||||||
embedding_vector = embeddings.text_embedding
|
embedding_vector = embeddings.text_embedding
|
||||||
|
|
||||||
# Convert to numpy array and normalize
|
# Convert to numpy array - DO NOT normalize Vertex AI embeddings
|
||||||
embeddings_array = np.array(embedding_vector, dtype=np.float32)
|
embeddings_array = np.array(embedding_vector, dtype=np.float32)
|
||||||
|
|
||||||
# Normalize the feature vector
|
|
||||||
norm = np.linalg.norm(embeddings_array)
|
|
||||||
if norm > 0:
|
|
||||||
embeddings_array = embeddings_array / norm
|
|
||||||
|
|
||||||
logger.info(f"Generated text embeddings with shape: {embeddings_array.shape}")
|
logger.info(f"Generated text embeddings with shape: {embeddings_array.shape}")
|
||||||
return embeddings_array.tolist()
|
return embeddings_array.tolist()
|
||||||
|
|
||||||
@ -175,29 +165,5 @@ class EmbeddingService:
|
|||||||
logger.error(f"Error deleting embedding: {e}")
|
logger.error(f"Error deleting embedding: {e}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def calculate_similarity(self, embedding1: List[float], embedding2: List[float]) -> float:
|
|
||||||
"""
|
|
||||||
Calculate cosine similarity between two embeddings
|
|
||||||
|
|
||||||
Args:
|
|
||||||
embedding1: First embedding
|
|
||||||
embedding2: Second embedding
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Cosine similarity (0-1)
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Convert to numpy arrays
|
|
||||||
vec1 = np.array(embedding1)
|
|
||||||
vec2 = np.array(embedding2)
|
|
||||||
|
|
||||||
# Calculate cosine similarity
|
|
||||||
similarity = np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
|
|
||||||
|
|
||||||
return float(similarity)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error calculating similarity: {e}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
# Create a singleton service
|
# Create a singleton service
|
||||||
embedding_service = EmbeddingService()
|
embedding_service = EmbeddingService()
|
||||||
@ -49,7 +49,7 @@ class VectorDatabaseService:
|
|||||||
|
|
||||||
# Handle HTTPS setting properly
|
# Handle HTTPS setting properly
|
||||||
if https is None:
|
if https is None:
|
||||||
#TODO ensure SSL
|
#TODO ensure SSL
|
||||||
https = False
|
https = False
|
||||||
|
|
||||||
logger.info(f"Connecting to Qdrant at {self.host}:{self.port} (HTTPS: {https}, gRPC: {prefer_grpc})")
|
logger.info(f"Connecting to Qdrant at {self.host}:{self.port} (HTTPS: {https}, gRPC: {prefer_grpc})")
|
||||||
@ -331,26 +331,4 @@ class VectorDatabaseService:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
# Utility functions for vector operations
|
|
||||||
def normalize_vector(vector: List[float]) -> List[float]:
|
|
||||||
"""Normalize a vector to unit length."""
|
|
||||||
vector_array = np.array(vector)
|
|
||||||
norm = np.linalg.norm(vector_array)
|
|
||||||
if norm == 0:
|
|
||||||
return vector
|
|
||||||
return (vector_array / norm).tolist()
|
|
||||||
|
|
||||||
|
|
||||||
def cosine_similarity(vector1: List[float], vector2: List[float]) -> float:
|
|
||||||
"""Calculate cosine similarity between two vectors."""
|
|
||||||
v1 = np.array(vector1)
|
|
||||||
v2 = np.array(vector2)
|
|
||||||
|
|
||||||
dot_product = np.dot(v1, v2)
|
|
||||||
norm_v1 = np.linalg.norm(v1)
|
|
||||||
norm_v2 = np.linalg.norm(v2)
|
|
||||||
|
|
||||||
if norm_v1 == 0 or norm_v2 == 0:
|
|
||||||
return 0.0
|
|
||||||
|
|
||||||
return dot_product / (norm_v1 * norm_v2)
|
|
||||||
Loading…
x
Reference in New Issue
Block a user