remove unneeded embedding normalization
This commit is contained in:
parent
107ade464d
commit
8c0a868144
@ -74,14 +74,9 @@ class EmbeddingService:
|
||||
# Get the image embedding vector
|
||||
embedding_vector = embeddings.image_embedding
|
||||
|
||||
# Convert to numpy array and normalize
|
||||
# Convert to numpy array - DO NOT normalize Vertex AI embeddings
|
||||
embeddings_array = np.array(embedding_vector, dtype=np.float32)
|
||||
|
||||
# Normalize the feature vector
|
||||
norm = np.linalg.norm(embeddings_array)
|
||||
if norm > 0:
|
||||
embeddings_array = embeddings_array / norm
|
||||
|
||||
logger.info(f"Generated image embeddings with shape: {embeddings_array.shape}")
|
||||
return embeddings_array.tolist()
|
||||
|
||||
@ -112,14 +107,9 @@ class EmbeddingService:
|
||||
# Get the text embedding vector
|
||||
embedding_vector = embeddings.text_embedding
|
||||
|
||||
# Convert to numpy array and normalize
|
||||
# Convert to numpy array - DO NOT normalize Vertex AI embeddings
|
||||
embeddings_array = np.array(embedding_vector, dtype=np.float32)
|
||||
|
||||
# Normalize the feature vector
|
||||
norm = np.linalg.norm(embeddings_array)
|
||||
if norm > 0:
|
||||
embeddings_array = embeddings_array / norm
|
||||
|
||||
logger.info(f"Generated text embeddings with shape: {embeddings_array.shape}")
|
||||
return embeddings_array.tolist()
|
||||
|
||||
@ -175,29 +165,5 @@ class EmbeddingService:
|
||||
logger.error(f"Error deleting embedding: {e}")
|
||||
return False
|
||||
|
||||
def calculate_similarity(self, embedding1: List[float], embedding2: List[float]) -> float:
|
||||
"""
|
||||
Calculate cosine similarity between two embeddings
|
||||
|
||||
Args:
|
||||
embedding1: First embedding
|
||||
embedding2: Second embedding
|
||||
|
||||
Returns:
|
||||
Cosine similarity (0-1)
|
||||
"""
|
||||
try:
|
||||
# Convert to numpy arrays
|
||||
vec1 = np.array(embedding1)
|
||||
vec2 = np.array(embedding2)
|
||||
|
||||
# Calculate cosine similarity
|
||||
similarity = np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
|
||||
|
||||
return float(similarity)
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating similarity: {e}")
|
||||
raise
|
||||
|
||||
# Create a singleton service
|
||||
embedding_service = EmbeddingService()
|
||||
@ -49,8 +49,8 @@ class VectorDatabaseService:
|
||||
|
||||
# Handle HTTPS setting properly
|
||||
if https is None:
|
||||
#TODO ensure SSL
|
||||
https = False
|
||||
#TODO ensure SSL
|
||||
https = False
|
||||
|
||||
logger.info(f"Connecting to Qdrant at {self.host}:{self.port} (HTTPS: {https}, gRPC: {prefer_grpc})")
|
||||
|
||||
@ -331,26 +331,4 @@ class VectorDatabaseService:
|
||||
return False
|
||||
|
||||
|
||||
# Utility functions for vector operations
|
||||
def normalize_vector(vector: List[float]) -> List[float]:
|
||||
"""Normalize a vector to unit length."""
|
||||
vector_array = np.array(vector)
|
||||
norm = np.linalg.norm(vector_array)
|
||||
if norm == 0:
|
||||
return vector
|
||||
return (vector_array / norm).tolist()
|
||||
|
||||
|
||||
def cosine_similarity(vector1: List[float], vector2: List[float]) -> float:
|
||||
"""Calculate cosine similarity between two vectors."""
|
||||
v1 = np.array(vector1)
|
||||
v2 = np.array(vector2)
|
||||
|
||||
dot_product = np.dot(v1, v2)
|
||||
norm_v1 = np.linalg.norm(v1)
|
||||
norm_v2 = np.linalg.norm(v2)
|
||||
|
||||
if norm_v1 == 0 or norm_v2 == 0:
|
||||
return 0.0
|
||||
|
||||
return dot_product / (norm_v1 * norm_v2)
|
||||
Loading…
x
Reference in New Issue
Block a user