391 lines
14 KiB
Python
391 lines
14 KiB
Python
import pytest
|
|
import numpy as np
|
|
from unittest.mock import patch, MagicMock, AsyncMock
|
|
from bson import ObjectId
|
|
|
|
from src.services.vector_store import VectorStoreService
|
|
from src.models.image import ImageModel
|
|
|
|
|
|
class TestVectorStoreService:
|
|
"""Test vector store operations for semantic search"""
|
|
|
|
@pytest.fixture
|
|
def mock_pinecone_index(self):
|
|
"""Mock Pinecone index for testing"""
|
|
mock_index = MagicMock()
|
|
mock_index.upsert = MagicMock()
|
|
mock_index.query = MagicMock()
|
|
mock_index.delete = MagicMock()
|
|
mock_index.describe_index_stats = MagicMock()
|
|
return mock_index
|
|
|
|
@pytest.fixture
|
|
def vector_store_service(self, mock_pinecone_index):
|
|
"""Create vector store service with mocked dependencies"""
|
|
with patch('src.services.vector_store.pinecone') as mock_pinecone:
|
|
mock_pinecone.Index.return_value = mock_pinecone_index
|
|
service = VectorStoreService()
|
|
service.index = mock_pinecone_index
|
|
return service
|
|
|
|
@pytest.fixture
|
|
def sample_embedding(self):
|
|
"""Generate a sample embedding vector"""
|
|
return np.random.rand(512).tolist() # 512-dimensional vector
|
|
|
|
@pytest.fixture
|
|
def sample_image(self):
|
|
"""Create a sample image model"""
|
|
return ImageModel(
|
|
filename="test-image.jpg",
|
|
original_filename="test_image.jpg",
|
|
file_size=1024,
|
|
content_type="image/jpeg",
|
|
storage_path="images/test-image.jpg",
|
|
team_id=ObjectId(),
|
|
uploader_id=ObjectId(),
|
|
description="A test image",
|
|
tags=["test", "image"]
|
|
)
|
|
|
|
def test_store_embedding(self, vector_store_service, sample_embedding, sample_image):
|
|
"""Test storing an embedding in the vector database"""
|
|
# Store the embedding
|
|
embedding_id = vector_store_service.store_embedding(
|
|
image_id=str(sample_image.id),
|
|
embedding=sample_embedding,
|
|
metadata={
|
|
"filename": sample_image.filename,
|
|
"team_id": str(sample_image.team_id),
|
|
"tags": sample_image.tags,
|
|
"description": sample_image.description
|
|
}
|
|
)
|
|
|
|
# Verify the embedding was stored
|
|
assert embedding_id is not None
|
|
vector_store_service.index.upsert.assert_called_once()
|
|
|
|
# Check the upsert call arguments
|
|
call_args = vector_store_service.index.upsert.call_args
|
|
vectors = call_args[1]['vectors']
|
|
assert len(vectors) == 1
|
|
assert vectors[0]['id'] == embedding_id
|
|
assert len(vectors[0]['values']) == len(sample_embedding)
|
|
assert 'metadata' in vectors[0]
|
|
|
|
def test_search_similar_images(self, vector_store_service, sample_embedding):
|
|
"""Test searching for similar images using vector similarity"""
|
|
# Mock search results
|
|
mock_results = {
|
|
'matches': [
|
|
{
|
|
'id': 'embedding1',
|
|
'score': 0.95,
|
|
'metadata': {
|
|
'image_id': str(ObjectId()),
|
|
'filename': 'similar1.jpg',
|
|
'team_id': str(ObjectId()),
|
|
'tags': ['cat', 'animal']
|
|
}
|
|
},
|
|
{
|
|
'id': 'embedding2',
|
|
'score': 0.87,
|
|
'metadata': {
|
|
'image_id': str(ObjectId()),
|
|
'filename': 'similar2.jpg',
|
|
'team_id': str(ObjectId()),
|
|
'tags': ['dog', 'animal']
|
|
}
|
|
}
|
|
]
|
|
}
|
|
vector_store_service.index.query.return_value = mock_results
|
|
|
|
# Perform search
|
|
results = vector_store_service.search_similar(
|
|
query_embedding=sample_embedding,
|
|
team_id=str(ObjectId()),
|
|
top_k=10,
|
|
score_threshold=0.8
|
|
)
|
|
|
|
# Verify search was performed
|
|
vector_store_service.index.query.assert_called_once()
|
|
|
|
# Check results
|
|
assert len(results) == 2
|
|
assert results[0]['score'] == 0.95
|
|
assert results[1]['score'] == 0.87
|
|
assert all('image_id' in result for result in results)
|
|
|
|
def test_search_with_filters(self, vector_store_service, sample_embedding):
|
|
"""Test searching with metadata filters"""
|
|
team_id = str(ObjectId())
|
|
|
|
# Perform search with team filter
|
|
vector_store_service.search_similar(
|
|
query_embedding=sample_embedding,
|
|
team_id=team_id,
|
|
top_k=5,
|
|
filters={"tags": {"$in": ["cat", "dog"]}}
|
|
)
|
|
|
|
# Verify filter was applied
|
|
call_args = vector_store_service.index.query.call_args
|
|
assert 'filter' in call_args[1]
|
|
assert call_args[1]['filter']['team_id'] == team_id
|
|
|
|
def test_delete_embedding(self, vector_store_service):
|
|
"""Test deleting an embedding from the vector database"""
|
|
embedding_id = "test-embedding-123"
|
|
|
|
# Delete the embedding
|
|
success = vector_store_service.delete_embedding(embedding_id)
|
|
|
|
# Verify deletion was attempted
|
|
vector_store_service.index.delete.assert_called_once_with(ids=[embedding_id])
|
|
assert success is True
|
|
|
|
def test_batch_store_embeddings(self, vector_store_service, sample_embedding):
|
|
"""Test storing multiple embeddings in batch"""
|
|
# Create batch data
|
|
batch_data = []
|
|
for i in range(5):
|
|
batch_data.append({
|
|
'image_id': str(ObjectId()),
|
|
'embedding': sample_embedding,
|
|
'metadata': {
|
|
'filename': f'image{i}.jpg',
|
|
'team_id': str(ObjectId()),
|
|
'tags': [f'tag{i}']
|
|
}
|
|
})
|
|
|
|
# Store batch
|
|
embedding_ids = vector_store_service.batch_store_embeddings(batch_data)
|
|
|
|
# Verify batch storage
|
|
assert len(embedding_ids) == 5
|
|
vector_store_service.index.upsert.assert_called_once()
|
|
|
|
# Check batch upsert call
|
|
call_args = vector_store_service.index.upsert.call_args
|
|
vectors = call_args[1]['vectors']
|
|
assert len(vectors) == 5
|
|
|
|
def test_get_index_stats(self, vector_store_service):
|
|
"""Test getting vector database statistics"""
|
|
# Mock stats response
|
|
mock_stats = {
|
|
'total_vector_count': 1000,
|
|
'dimension': 512,
|
|
'index_fullness': 0.1
|
|
}
|
|
vector_store_service.index.describe_index_stats.return_value = mock_stats
|
|
|
|
# Get stats
|
|
stats = vector_store_service.get_index_stats()
|
|
|
|
# Verify stats retrieval
|
|
vector_store_service.index.describe_index_stats.assert_called_once()
|
|
assert stats['total_vector_count'] == 1000
|
|
assert stats['dimension'] == 512
|
|
|
|
def test_search_with_score_threshold(self, vector_store_service, sample_embedding):
|
|
"""Test filtering search results by score threshold"""
|
|
# Mock results with varying scores
|
|
mock_results = {
|
|
'matches': [
|
|
{'id': 'emb1', 'score': 0.95, 'metadata': {'image_id': '1'}},
|
|
{'id': 'emb2', 'score': 0.75, 'metadata': {'image_id': '2'}},
|
|
{'id': 'emb3', 'score': 0.65, 'metadata': {'image_id': '3'}},
|
|
{'id': 'emb4', 'score': 0.45, 'metadata': {'image_id': '4'}}
|
|
]
|
|
}
|
|
vector_store_service.index.query.return_value = mock_results
|
|
|
|
# Search with score threshold
|
|
results = vector_store_service.search_similar(
|
|
query_embedding=sample_embedding,
|
|
team_id=str(ObjectId()),
|
|
top_k=10,
|
|
score_threshold=0.7
|
|
)
|
|
|
|
# Only results above threshold should be returned
|
|
assert len(results) == 2
|
|
assert all(result['score'] >= 0.7 for result in results)
|
|
|
|
def test_update_embedding_metadata(self, vector_store_service):
|
|
"""Test updating metadata for an existing embedding"""
|
|
embedding_id = "test-embedding-123"
|
|
new_metadata = {
|
|
'tags': ['updated', 'tag'],
|
|
'description': 'Updated description'
|
|
}
|
|
|
|
# Update metadata
|
|
success = vector_store_service.update_embedding_metadata(
|
|
embedding_id, new_metadata
|
|
)
|
|
|
|
# Verify update was attempted
|
|
# This would depend on the actual implementation
|
|
assert success is True
|
|
|
|
def test_search_by_image_id(self, vector_store_service):
|
|
"""Test searching for a specific image's embedding"""
|
|
image_id = str(ObjectId())
|
|
|
|
# Mock search by metadata
|
|
mock_results = {
|
|
'matches': [
|
|
{
|
|
'id': 'embedding1',
|
|
'score': 1.0,
|
|
'metadata': {
|
|
'image_id': image_id,
|
|
'filename': 'target.jpg'
|
|
}
|
|
}
|
|
]
|
|
}
|
|
vector_store_service.index.query.return_value = mock_results
|
|
|
|
# Search by image ID
|
|
result = vector_store_service.get_embedding_by_image_id(image_id)
|
|
|
|
# Verify result
|
|
assert result is not None
|
|
assert result['metadata']['image_id'] == image_id
|
|
|
|
def test_bulk_delete_embeddings(self, vector_store_service):
|
|
"""Test deleting multiple embeddings"""
|
|
embedding_ids = ['emb1', 'emb2', 'emb3']
|
|
|
|
# Delete multiple embeddings
|
|
success = vector_store_service.bulk_delete_embeddings(embedding_ids)
|
|
|
|
# Verify bulk deletion
|
|
vector_store_service.index.delete.assert_called_once_with(ids=embedding_ids)
|
|
assert success is True
|
|
|
|
def test_search_pagination(self, vector_store_service, sample_embedding):
|
|
"""Test paginated search results"""
|
|
# This would test pagination if implemented
|
|
# Implementation depends on how pagination is handled in the vector store
|
|
pass
|
|
|
|
def test_vector_dimension_validation(self, vector_store_service):
|
|
"""Test validation of embedding dimensions"""
|
|
# Test with wrong dimension
|
|
wrong_dimension_embedding = np.random.rand(256).tolist() # Wrong size
|
|
|
|
with pytest.raises(ValueError):
|
|
vector_store_service.store_embedding(
|
|
image_id=str(ObjectId()),
|
|
embedding=wrong_dimension_embedding,
|
|
metadata={}
|
|
)
|
|
|
|
def test_connection_error_handling(self, vector_store_service):
|
|
"""Test handling of connection errors"""
|
|
# Mock connection error
|
|
vector_store_service.index.query.side_effect = Exception("Connection failed")
|
|
|
|
# Search should handle the error gracefully
|
|
with pytest.raises(Exception):
|
|
vector_store_service.search_similar(
|
|
query_embedding=[0.1] * 512,
|
|
team_id=str(ObjectId()),
|
|
top_k=10
|
|
)
|
|
|
|
def test_empty_search_results(self, vector_store_service, sample_embedding):
|
|
"""Test handling of empty search results"""
|
|
# Mock empty results
|
|
vector_store_service.index.query.return_value = {'matches': []}
|
|
|
|
# Search should return empty list
|
|
results = vector_store_service.search_similar(
|
|
query_embedding=sample_embedding,
|
|
team_id=str(ObjectId()),
|
|
top_k=10
|
|
)
|
|
|
|
assert results == []
|
|
|
|
|
|
class TestVectorStoreIntegration:
|
|
"""Integration tests for vector store with other services"""
|
|
|
|
def test_embedding_lifecycle(self, vector_store_service, sample_embedding, sample_image):
|
|
"""Test complete embedding lifecycle: store, search, update, delete"""
|
|
# Store embedding
|
|
embedding_id = vector_store_service.store_embedding(
|
|
image_id=str(sample_image.id),
|
|
embedding=sample_embedding,
|
|
metadata={'filename': sample_image.filename}
|
|
)
|
|
|
|
# Search for similar embeddings
|
|
mock_results = {
|
|
'matches': [
|
|
{
|
|
'id': embedding_id,
|
|
'score': 1.0,
|
|
'metadata': {'image_id': str(sample_image.id)}
|
|
}
|
|
]
|
|
}
|
|
vector_store_service.index.query.return_value = mock_results
|
|
|
|
results = vector_store_service.search_similar(
|
|
query_embedding=sample_embedding,
|
|
team_id=str(sample_image.team_id),
|
|
top_k=1
|
|
)
|
|
|
|
assert len(results) == 1
|
|
assert results[0]['id'] == embedding_id
|
|
|
|
# Delete embedding
|
|
success = vector_store_service.delete_embedding(embedding_id)
|
|
assert success is True
|
|
|
|
def test_team_isolation(self, vector_store_service, sample_embedding):
|
|
"""Test that team data is properly isolated"""
|
|
team1_id = str(ObjectId())
|
|
team2_id = str(ObjectId())
|
|
|
|
# Mock search results that should be filtered by team
|
|
mock_results = {
|
|
'matches': [
|
|
{
|
|
'id': 'emb1',
|
|
'score': 0.9,
|
|
'metadata': {'image_id': '1', 'team_id': team1_id}
|
|
},
|
|
{
|
|
'id': 'emb2',
|
|
'score': 0.8,
|
|
'metadata': {'image_id': '2', 'team_id': team2_id}
|
|
}
|
|
]
|
|
}
|
|
vector_store_service.index.query.return_value = mock_results
|
|
|
|
# Search for team1 should only return team1 results
|
|
results = vector_store_service.search_similar(
|
|
query_embedding=sample_embedding,
|
|
team_id=team1_id,
|
|
top_k=10
|
|
)
|
|
|
|
# Verify team filter was applied in the query
|
|
call_args = vector_store_service.index.query.call_args
|
|
assert 'filter' in call_args[1]
|
|
assert call_args[1]['filter']['team_id'] == team1_id |