121 lines
4.6 KiB
Python
121 lines
4.6 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Simple test script to embed text and search Qdrant without filters
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import asyncio
|
|
import logging
|
|
from dotenv import load_dotenv
|
|
|
|
# Load environment variables
|
|
load_dotenv()
|
|
|
|
# Set up logging
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
async def simple_search_test():
|
|
"""Simple test: embed text and search without filters"""
|
|
|
|
try:
|
|
# Import services
|
|
from src.services.vector_db import VectorDatabaseService
|
|
from src.services.embedding_service import EmbeddingService
|
|
|
|
# Initialize services
|
|
logger.info("Initializing services...")
|
|
vector_db = VectorDatabaseService()
|
|
embedding_service = EmbeddingService()
|
|
|
|
# Test 1: Generate text embedding
|
|
logger.info("=== Generating Text Embedding ===")
|
|
search_query = "rectangle"
|
|
text_embedding = await embedding_service.generate_text_embedding(search_query)
|
|
|
|
if text_embedding:
|
|
logger.info(f"✓ Generated embedding for '{search_query}' - length: {len(text_embedding)}")
|
|
else:
|
|
logger.error("✗ Failed to generate text embedding")
|
|
return False
|
|
|
|
# Test 2: Search without any filters
|
|
logger.info("=== Searching Qdrant (No Filters) ===")
|
|
|
|
# Try different thresholds to see what we get
|
|
thresholds = [0.1, 0.3, 0.5, 0.65, 0.8]
|
|
|
|
for threshold in thresholds:
|
|
logger.info(f"\n--- Threshold: {threshold} ---")
|
|
|
|
search_results = vector_db.search_similar_images(
|
|
query_vector=text_embedding,
|
|
limit=10,
|
|
score_threshold=threshold
|
|
# No filter_conditions = search everything
|
|
)
|
|
|
|
logger.info(f"Found {len(search_results)} results")
|
|
|
|
# Show top 3 results
|
|
for i, result in enumerate(search_results[:3]):
|
|
logger.info(f" {i+1}. Score: {result['score']:.4f} | ID: {result['image_id']} | File: {result['metadata'].get('filename', 'N/A')}")
|
|
|
|
# Test 3: Very low threshold to see all data
|
|
logger.info("\n=== All Data (Threshold 0.0) ===")
|
|
all_results = vector_db.search_similar_images(
|
|
query_vector=text_embedding,
|
|
limit=50,
|
|
score_threshold=0.0 # Get everything
|
|
)
|
|
|
|
logger.info(f"Total vectors in collection: {len(all_results)}")
|
|
|
|
# Test 4: With team filtering (like the API does)
|
|
logger.info("\n=== Testing Team Filtering ===")
|
|
test_team_id = "68330a29472a0704d2f77063" # From server logs
|
|
filtered_results = vector_db.search_similar_images(
|
|
query_vector=text_embedding,
|
|
limit=50,
|
|
score_threshold=0.0,
|
|
filter_conditions={"team_id": test_team_id}
|
|
)
|
|
|
|
logger.info(f"Results with team filter ({test_team_id}): {len(filtered_results)}")
|
|
|
|
# Show metadata for all results to see team_ids
|
|
logger.info("\n=== Checking Team IDs in Vector DB ===")
|
|
for i, result in enumerate(all_results):
|
|
metadata = result.get('metadata', {})
|
|
team_id = metadata.get('team_id', 'N/A')
|
|
logger.info(f" {i+1}. Image ID: {result['image_id']} | Team ID: {team_id}")
|
|
|
|
# Show some stats
|
|
if all_results:
|
|
scores = [r['score'] for r in all_results]
|
|
logger.info(f"Score range: {min(scores):.4f} to {max(scores):.4f}")
|
|
logger.info(f"Average score: {sum(scores)/len(scores):.4f}")
|
|
|
|
# Show top 5 and bottom 5
|
|
logger.info("\nTop 5 results:")
|
|
for i, result in enumerate(all_results[:5]):
|
|
logger.info(f" {i+1}. Score: {result['score']:.4f} | ID: {result['image_id']}")
|
|
|
|
if len(all_results) > 5:
|
|
logger.info("\nBottom 5 results:")
|
|
for i, result in enumerate(all_results[-5:]):
|
|
logger.info(f" {len(all_results)-4+i}. Score: {result['score']:.4f} | ID: {result['image_id']}")
|
|
|
|
logger.info("\n✓ Simple search test completed!")
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"✗ Test failed: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return False
|
|
|
|
if __name__ == "__main__":
|
|
success = asyncio.run(simple_search_test())
|
|
sys.exit(0 if success else 1) |