import pytest import numpy as np from unittest.mock import patch, MagicMock, AsyncMock from bson import ObjectId from src.services.vector_store import VectorStoreService from src.models.image import ImageModel class TestVectorStoreService: """Test vector store operations for semantic search""" @pytest.fixture def mock_pinecone_index(self): """Mock Pinecone index for testing""" mock_index = MagicMock() mock_index.upsert = MagicMock() mock_index.query = MagicMock() mock_index.delete = MagicMock() mock_index.describe_index_stats = MagicMock() return mock_index @pytest.fixture def vector_store_service(self, mock_pinecone_index): """Create vector store service with mocked dependencies""" with patch('src.services.vector_store.pinecone') as mock_pinecone: mock_pinecone.Index.return_value = mock_pinecone_index service = VectorStoreService() service.index = mock_pinecone_index return service @pytest.fixture def sample_embedding(self): """Generate a sample embedding vector""" return np.random.rand(512).tolist() # 512-dimensional vector @pytest.fixture def sample_image(self): """Create a sample image model""" return ImageModel( filename="test-image.jpg", original_filename="test_image.jpg", file_size=1024, content_type="image/jpeg", storage_path="images/test-image.jpg", team_id=ObjectId(), uploader_id=ObjectId(), description="A test image", tags=["test", "image"] ) def test_store_embedding(self, vector_store_service, sample_embedding, sample_image): """Test storing an embedding in the vector database""" # Store the embedding embedding_id = vector_store_service.store_embedding( image_id=str(sample_image.id), embedding=sample_embedding, metadata={ "filename": sample_image.filename, "team_id": str(sample_image.team_id), "tags": sample_image.tags, "description": sample_image.description } ) # Verify the embedding was stored assert embedding_id is not None vector_store_service.index.upsert.assert_called_once() # Check the upsert call arguments call_args = vector_store_service.index.upsert.call_args vectors = call_args[1]['vectors'] assert len(vectors) == 1 assert vectors[0]['id'] == embedding_id assert len(vectors[0]['values']) == len(sample_embedding) assert 'metadata' in vectors[0] def test_search_similar_images(self, vector_store_service, sample_embedding): """Test searching for similar images using vector similarity""" # Mock search results mock_results = { 'matches': [ { 'id': 'embedding1', 'score': 0.95, 'metadata': { 'image_id': str(ObjectId()), 'filename': 'similar1.jpg', 'team_id': str(ObjectId()), 'tags': ['cat', 'animal'] } }, { 'id': 'embedding2', 'score': 0.87, 'metadata': { 'image_id': str(ObjectId()), 'filename': 'similar2.jpg', 'team_id': str(ObjectId()), 'tags': ['dog', 'animal'] } } ] } vector_store_service.index.query.return_value = mock_results # Perform search results = vector_store_service.search_similar( query_embedding=sample_embedding, team_id=str(ObjectId()), top_k=10, score_threshold=0.8 ) # Verify search was performed vector_store_service.index.query.assert_called_once() # Check results assert len(results) == 2 assert results[0]['score'] == 0.95 assert results[1]['score'] == 0.87 assert all('image_id' in result for result in results) def test_search_with_filters(self, vector_store_service, sample_embedding): """Test searching with metadata filters""" team_id = str(ObjectId()) # Perform search with team filter vector_store_service.search_similar( query_embedding=sample_embedding, team_id=team_id, top_k=5, filters={"tags": {"$in": ["cat", "dog"]}} ) # Verify filter was applied call_args = vector_store_service.index.query.call_args assert 'filter' in call_args[1] assert call_args[1]['filter']['team_id'] == team_id def test_delete_embedding(self, vector_store_service): """Test deleting an embedding from the vector database""" embedding_id = "test-embedding-123" # Delete the embedding success = vector_store_service.delete_embedding(embedding_id) # Verify deletion was attempted vector_store_service.index.delete.assert_called_once_with(ids=[embedding_id]) assert success is True def test_batch_store_embeddings(self, vector_store_service, sample_embedding): """Test storing multiple embeddings in batch""" # Create batch data batch_data = [] for i in range(5): batch_data.append({ 'image_id': str(ObjectId()), 'embedding': sample_embedding, 'metadata': { 'filename': f'image{i}.jpg', 'team_id': str(ObjectId()), 'tags': [f'tag{i}'] } }) # Store batch embedding_ids = vector_store_service.batch_store_embeddings(batch_data) # Verify batch storage assert len(embedding_ids) == 5 vector_store_service.index.upsert.assert_called_once() # Check batch upsert call call_args = vector_store_service.index.upsert.call_args vectors = call_args[1]['vectors'] assert len(vectors) == 5 def test_get_index_stats(self, vector_store_service): """Test getting vector database statistics""" # Mock stats response mock_stats = { 'total_vector_count': 1000, 'dimension': 512, 'index_fullness': 0.1 } vector_store_service.index.describe_index_stats.return_value = mock_stats # Get stats stats = vector_store_service.get_index_stats() # Verify stats retrieval vector_store_service.index.describe_index_stats.assert_called_once() assert stats['total_vector_count'] == 1000 assert stats['dimension'] == 512 def test_search_with_score_threshold(self, vector_store_service, sample_embedding): """Test filtering search results by score threshold""" # Mock results with varying scores mock_results = { 'matches': [ {'id': 'emb1', 'score': 0.95, 'metadata': {'image_id': '1'}}, {'id': 'emb2', 'score': 0.75, 'metadata': {'image_id': '2'}}, {'id': 'emb3', 'score': 0.65, 'metadata': {'image_id': '3'}}, {'id': 'emb4', 'score': 0.45, 'metadata': {'image_id': '4'}} ] } vector_store_service.index.query.return_value = mock_results # Search with score threshold results = vector_store_service.search_similar( query_embedding=sample_embedding, team_id=str(ObjectId()), top_k=10, score_threshold=0.7 ) # Only results above threshold should be returned assert len(results) == 2 assert all(result['score'] >= 0.7 for result in results) def test_update_embedding_metadata(self, vector_store_service): """Test updating metadata for an existing embedding""" embedding_id = "test-embedding-123" new_metadata = { 'tags': ['updated', 'tag'], 'description': 'Updated description' } # Update metadata success = vector_store_service.update_embedding_metadata( embedding_id, new_metadata ) # Verify update was attempted # This would depend on the actual implementation assert success is True def test_search_by_image_id(self, vector_store_service): """Test searching for a specific image's embedding""" image_id = str(ObjectId()) # Mock search by metadata mock_results = { 'matches': [ { 'id': 'embedding1', 'score': 1.0, 'metadata': { 'image_id': image_id, 'filename': 'target.jpg' } } ] } vector_store_service.index.query.return_value = mock_results # Search by image ID result = vector_store_service.get_embedding_by_image_id(image_id) # Verify result assert result is not None assert result['metadata']['image_id'] == image_id def test_bulk_delete_embeddings(self, vector_store_service): """Test deleting multiple embeddings""" embedding_ids = ['emb1', 'emb2', 'emb3'] # Delete multiple embeddings success = vector_store_service.bulk_delete_embeddings(embedding_ids) # Verify bulk deletion vector_store_service.index.delete.assert_called_once_with(ids=embedding_ids) assert success is True def test_search_pagination(self, vector_store_service, sample_embedding): """Test paginated search results""" # This would test pagination if implemented # Implementation depends on how pagination is handled in the vector store pass def test_vector_dimension_validation(self, vector_store_service): """Test validation of embedding dimensions""" # Test with wrong dimension wrong_dimension_embedding = np.random.rand(256).tolist() # Wrong size with pytest.raises(ValueError): vector_store_service.store_embedding( image_id=str(ObjectId()), embedding=wrong_dimension_embedding, metadata={} ) def test_connection_error_handling(self, vector_store_service): """Test handling of connection errors""" # Mock connection error vector_store_service.index.query.side_effect = Exception("Connection failed") # Search should handle the error gracefully with pytest.raises(Exception): vector_store_service.search_similar( query_embedding=[0.1] * 512, team_id=str(ObjectId()), top_k=10 ) def test_empty_search_results(self, vector_store_service, sample_embedding): """Test handling of empty search results""" # Mock empty results vector_store_service.index.query.return_value = {'matches': []} # Search should return empty list results = vector_store_service.search_similar( query_embedding=sample_embedding, team_id=str(ObjectId()), top_k=10 ) assert results == [] class TestVectorStoreIntegration: """Integration tests for vector store with other services""" def test_embedding_lifecycle(self, vector_store_service, sample_embedding, sample_image): """Test complete embedding lifecycle: store, search, update, delete""" # Store embedding embedding_id = vector_store_service.store_embedding( image_id=str(sample_image.id), embedding=sample_embedding, metadata={'filename': sample_image.filename} ) # Search for similar embeddings mock_results = { 'matches': [ { 'id': embedding_id, 'score': 1.0, 'metadata': {'image_id': str(sample_image.id)} } ] } vector_store_service.index.query.return_value = mock_results results = vector_store_service.search_similar( query_embedding=sample_embedding, team_id=str(sample_image.team_id), top_k=1 ) assert len(results) == 1 assert results[0]['id'] == embedding_id # Delete embedding success = vector_store_service.delete_embedding(embedding_id) assert success is True def test_team_isolation(self, vector_store_service, sample_embedding): """Test that team data is properly isolated""" team1_id = str(ObjectId()) team2_id = str(ObjectId()) # Mock search results that should be filtered by team mock_results = { 'matches': [ { 'id': 'emb1', 'score': 0.9, 'metadata': {'image_id': '1', 'team_id': team1_id} }, { 'id': 'emb2', 'score': 0.8, 'metadata': {'image_id': '2', 'team_id': team2_id} } ] } vector_store_service.index.query.return_value = mock_results # Search for team1 should only return team1 results results = vector_store_service.search_similar( query_embedding=sample_embedding, team_id=team1_id, top_k=10 ) # Verify team filter was applied in the query call_args = vector_store_service.index.query.call_args assert 'filter' in call_args[1] assert call_args[1]['filter']['team_id'] == team1_id