image_management_api/tests/services/test_vector_store.py
2025-05-24 12:27:32 +02:00

391 lines
14 KiB
Python

import pytest
import numpy as np
from unittest.mock import patch, MagicMock, AsyncMock
from bson import ObjectId
from src.services.vector_store import VectorStoreService
from src.models.image import ImageModel
class TestVectorStoreService:
"""Test vector store operations for semantic search"""
@pytest.fixture
def mock_pinecone_index(self):
"""Mock Pinecone index for testing"""
mock_index = MagicMock()
mock_index.upsert = MagicMock()
mock_index.query = MagicMock()
mock_index.delete = MagicMock()
mock_index.describe_index_stats = MagicMock()
return mock_index
@pytest.fixture
def vector_store_service(self, mock_pinecone_index):
"""Create vector store service with mocked dependencies"""
with patch('src.services.vector_store.pinecone') as mock_pinecone:
mock_pinecone.Index.return_value = mock_pinecone_index
service = VectorStoreService()
service.index = mock_pinecone_index
return service
@pytest.fixture
def sample_embedding(self):
"""Generate a sample embedding vector"""
return np.random.rand(512).tolist() # 512-dimensional vector
@pytest.fixture
def sample_image(self):
"""Create a sample image model"""
return ImageModel(
filename="test-image.jpg",
original_filename="test_image.jpg",
file_size=1024,
content_type="image/jpeg",
storage_path="images/test-image.jpg",
team_id=ObjectId(),
uploader_id=ObjectId(),
description="A test image",
tags=["test", "image"]
)
def test_store_embedding(self, vector_store_service, sample_embedding, sample_image):
"""Test storing an embedding in the vector database"""
# Store the embedding
embedding_id = vector_store_service.store_embedding(
image_id=str(sample_image.id),
embedding=sample_embedding,
metadata={
"filename": sample_image.filename,
"team_id": str(sample_image.team_id),
"tags": sample_image.tags,
"description": sample_image.description
}
)
# Verify the embedding was stored
assert embedding_id is not None
vector_store_service.index.upsert.assert_called_once()
# Check the upsert call arguments
call_args = vector_store_service.index.upsert.call_args
vectors = call_args[1]['vectors']
assert len(vectors) == 1
assert vectors[0]['id'] == embedding_id
assert len(vectors[0]['values']) == len(sample_embedding)
assert 'metadata' in vectors[0]
def test_search_similar_images(self, vector_store_service, sample_embedding):
"""Test searching for similar images using vector similarity"""
# Mock search results
mock_results = {
'matches': [
{
'id': 'embedding1',
'score': 0.95,
'metadata': {
'image_id': str(ObjectId()),
'filename': 'similar1.jpg',
'team_id': str(ObjectId()),
'tags': ['cat', 'animal']
}
},
{
'id': 'embedding2',
'score': 0.87,
'metadata': {
'image_id': str(ObjectId()),
'filename': 'similar2.jpg',
'team_id': str(ObjectId()),
'tags': ['dog', 'animal']
}
}
]
}
vector_store_service.index.query.return_value = mock_results
# Perform search
results = vector_store_service.search_similar(
query_embedding=sample_embedding,
team_id=str(ObjectId()),
top_k=10,
score_threshold=0.8
)
# Verify search was performed
vector_store_service.index.query.assert_called_once()
# Check results
assert len(results) == 2
assert results[0]['score'] == 0.95
assert results[1]['score'] == 0.87
assert all('image_id' in result for result in results)
def test_search_with_filters(self, vector_store_service, sample_embedding):
"""Test searching with metadata filters"""
team_id = str(ObjectId())
# Perform search with team filter
vector_store_service.search_similar(
query_embedding=sample_embedding,
team_id=team_id,
top_k=5,
filters={"tags": {"$in": ["cat", "dog"]}}
)
# Verify filter was applied
call_args = vector_store_service.index.query.call_args
assert 'filter' in call_args[1]
assert call_args[1]['filter']['team_id'] == team_id
def test_delete_embedding(self, vector_store_service):
"""Test deleting an embedding from the vector database"""
embedding_id = "test-embedding-123"
# Delete the embedding
success = vector_store_service.delete_embedding(embedding_id)
# Verify deletion was attempted
vector_store_service.index.delete.assert_called_once_with(ids=[embedding_id])
assert success is True
def test_batch_store_embeddings(self, vector_store_service, sample_embedding):
"""Test storing multiple embeddings in batch"""
# Create batch data
batch_data = []
for i in range(5):
batch_data.append({
'image_id': str(ObjectId()),
'embedding': sample_embedding,
'metadata': {
'filename': f'image{i}.jpg',
'team_id': str(ObjectId()),
'tags': [f'tag{i}']
}
})
# Store batch
embedding_ids = vector_store_service.batch_store_embeddings(batch_data)
# Verify batch storage
assert len(embedding_ids) == 5
vector_store_service.index.upsert.assert_called_once()
# Check batch upsert call
call_args = vector_store_service.index.upsert.call_args
vectors = call_args[1]['vectors']
assert len(vectors) == 5
def test_get_index_stats(self, vector_store_service):
"""Test getting vector database statistics"""
# Mock stats response
mock_stats = {
'total_vector_count': 1000,
'dimension': 512,
'index_fullness': 0.1
}
vector_store_service.index.describe_index_stats.return_value = mock_stats
# Get stats
stats = vector_store_service.get_index_stats()
# Verify stats retrieval
vector_store_service.index.describe_index_stats.assert_called_once()
assert stats['total_vector_count'] == 1000
assert stats['dimension'] == 512
def test_search_with_score_threshold(self, vector_store_service, sample_embedding):
"""Test filtering search results by score threshold"""
# Mock results with varying scores
mock_results = {
'matches': [
{'id': 'emb1', 'score': 0.95, 'metadata': {'image_id': '1'}},
{'id': 'emb2', 'score': 0.75, 'metadata': {'image_id': '2'}},
{'id': 'emb3', 'score': 0.65, 'metadata': {'image_id': '3'}},
{'id': 'emb4', 'score': 0.45, 'metadata': {'image_id': '4'}}
]
}
vector_store_service.index.query.return_value = mock_results
# Search with score threshold
results = vector_store_service.search_similar(
query_embedding=sample_embedding,
team_id=str(ObjectId()),
top_k=10,
score_threshold=0.7
)
# Only results above threshold should be returned
assert len(results) == 2
assert all(result['score'] >= 0.7 for result in results)
def test_update_embedding_metadata(self, vector_store_service):
"""Test updating metadata for an existing embedding"""
embedding_id = "test-embedding-123"
new_metadata = {
'tags': ['updated', 'tag'],
'description': 'Updated description'
}
# Update metadata
success = vector_store_service.update_embedding_metadata(
embedding_id, new_metadata
)
# Verify update was attempted
# This would depend on the actual implementation
assert success is True
def test_search_by_image_id(self, vector_store_service):
"""Test searching for a specific image's embedding"""
image_id = str(ObjectId())
# Mock search by metadata
mock_results = {
'matches': [
{
'id': 'embedding1',
'score': 1.0,
'metadata': {
'image_id': image_id,
'filename': 'target.jpg'
}
}
]
}
vector_store_service.index.query.return_value = mock_results
# Search by image ID
result = vector_store_service.get_embedding_by_image_id(image_id)
# Verify result
assert result is not None
assert result['metadata']['image_id'] == image_id
def test_bulk_delete_embeddings(self, vector_store_service):
"""Test deleting multiple embeddings"""
embedding_ids = ['emb1', 'emb2', 'emb3']
# Delete multiple embeddings
success = vector_store_service.bulk_delete_embeddings(embedding_ids)
# Verify bulk deletion
vector_store_service.index.delete.assert_called_once_with(ids=embedding_ids)
assert success is True
def test_search_pagination(self, vector_store_service, sample_embedding):
"""Test paginated search results"""
# This would test pagination if implemented
# Implementation depends on how pagination is handled in the vector store
pass
def test_vector_dimension_validation(self, vector_store_service):
"""Test validation of embedding dimensions"""
# Test with wrong dimension
wrong_dimension_embedding = np.random.rand(256).tolist() # Wrong size
with pytest.raises(ValueError):
vector_store_service.store_embedding(
image_id=str(ObjectId()),
embedding=wrong_dimension_embedding,
metadata={}
)
def test_connection_error_handling(self, vector_store_service):
"""Test handling of connection errors"""
# Mock connection error
vector_store_service.index.query.side_effect = Exception("Connection failed")
# Search should handle the error gracefully
with pytest.raises(Exception):
vector_store_service.search_similar(
query_embedding=[0.1] * 512,
team_id=str(ObjectId()),
top_k=10
)
def test_empty_search_results(self, vector_store_service, sample_embedding):
"""Test handling of empty search results"""
# Mock empty results
vector_store_service.index.query.return_value = {'matches': []}
# Search should return empty list
results = vector_store_service.search_similar(
query_embedding=sample_embedding,
team_id=str(ObjectId()),
top_k=10
)
assert results == []
class TestVectorStoreIntegration:
"""Integration tests for vector store with other services"""
def test_embedding_lifecycle(self, vector_store_service, sample_embedding, sample_image):
"""Test complete embedding lifecycle: store, search, update, delete"""
# Store embedding
embedding_id = vector_store_service.store_embedding(
image_id=str(sample_image.id),
embedding=sample_embedding,
metadata={'filename': sample_image.filename}
)
# Search for similar embeddings
mock_results = {
'matches': [
{
'id': embedding_id,
'score': 1.0,
'metadata': {'image_id': str(sample_image.id)}
}
]
}
vector_store_service.index.query.return_value = mock_results
results = vector_store_service.search_similar(
query_embedding=sample_embedding,
team_id=str(sample_image.team_id),
top_k=1
)
assert len(results) == 1
assert results[0]['id'] == embedding_id
# Delete embedding
success = vector_store_service.delete_embedding(embedding_id)
assert success is True
def test_team_isolation(self, vector_store_service, sample_embedding):
"""Test that team data is properly isolated"""
team1_id = str(ObjectId())
team2_id = str(ObjectId())
# Mock search results that should be filtered by team
mock_results = {
'matches': [
{
'id': 'emb1',
'score': 0.9,
'metadata': {'image_id': '1', 'team_id': team1_id}
},
{
'id': 'emb2',
'score': 0.8,
'metadata': {'image_id': '2', 'team_id': team2_id}
}
]
}
vector_store_service.index.query.return_value = mock_results
# Search for team1 should only return team1 results
results = vector_store_service.search_similar(
query_embedding=sample_embedding,
team_id=team1_id,
top_k=10
)
# Verify team filter was applied in the query
call_args = vector_store_service.index.query.call_args
assert 'filter' in call_args[1]
assert call_args[1]['filter']['team_id'] == team1_id