diff --git a/src/services/embedding_service.py b/src/services/embedding_service.py index b142bd4..99ba27d 100644 --- a/src/services/embedding_service.py +++ b/src/services/embedding_service.py @@ -74,14 +74,9 @@ class EmbeddingService: # Get the image embedding vector embedding_vector = embeddings.image_embedding - # Convert to numpy array and normalize + # Convert to numpy array - DO NOT normalize Vertex AI embeddings embeddings_array = np.array(embedding_vector, dtype=np.float32) - # Normalize the feature vector - norm = np.linalg.norm(embeddings_array) - if norm > 0: - embeddings_array = embeddings_array / norm - logger.info(f"Generated image embeddings with shape: {embeddings_array.shape}") return embeddings_array.tolist() @@ -112,14 +107,9 @@ class EmbeddingService: # Get the text embedding vector embedding_vector = embeddings.text_embedding - # Convert to numpy array and normalize + # Convert to numpy array - DO NOT normalize Vertex AI embeddings embeddings_array = np.array(embedding_vector, dtype=np.float32) - # Normalize the feature vector - norm = np.linalg.norm(embeddings_array) - if norm > 0: - embeddings_array = embeddings_array / norm - logger.info(f"Generated text embeddings with shape: {embeddings_array.shape}") return embeddings_array.tolist() @@ -175,29 +165,5 @@ class EmbeddingService: logger.error(f"Error deleting embedding: {e}") return False - def calculate_similarity(self, embedding1: List[float], embedding2: List[float]) -> float: - """ - Calculate cosine similarity between two embeddings - - Args: - embedding1: First embedding - embedding2: Second embedding - - Returns: - Cosine similarity (0-1) - """ - try: - # Convert to numpy arrays - vec1 = np.array(embedding1) - vec2 = np.array(embedding2) - - # Calculate cosine similarity - similarity = np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2)) - - return float(similarity) - except Exception as e: - logger.error(f"Error calculating similarity: {e}") - raise - # Create a singleton service embedding_service = EmbeddingService() \ No newline at end of file diff --git a/src/services/vector_db.py b/src/services/vector_db.py index 471f586..e8c80ef 100644 --- a/src/services/vector_db.py +++ b/src/services/vector_db.py @@ -49,8 +49,8 @@ class VectorDatabaseService: # Handle HTTPS setting properly if https is None: -#TODO ensure SSL - https = False + #TODO ensure SSL + https = False logger.info(f"Connecting to Qdrant at {self.host}:{self.port} (HTTPS: {https}, gRPC: {prefer_grpc})") @@ -331,26 +331,4 @@ class VectorDatabaseService: return False -# Utility functions for vector operations -def normalize_vector(vector: List[float]) -> List[float]: - """Normalize a vector to unit length.""" - vector_array = np.array(vector) - norm = np.linalg.norm(vector_array) - if norm == 0: - return vector - return (vector_array / norm).tolist() - -def cosine_similarity(vector1: List[float], vector2: List[float]) -> float: - """Calculate cosine similarity between two vectors.""" - v1 = np.array(vector1) - v2 = np.array(vector2) - - dot_product = np.dot(v1, v2) - norm_v1 = np.linalg.norm(v1) - norm_v2 = np.linalg.norm(v2) - - if norm_v1 == 0 or norm_v2 == 0: - return 0.0 - - return dot_product / (norm_v1 * norm_v2) \ No newline at end of file