102 lines
3.7 KiB
Python
102 lines
3.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Simple test script to embed text and search Qdrant without filters
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import asyncio
|
|
import logging
|
|
from dotenv import load_dotenv
|
|
|
|
# Load environment variables
|
|
load_dotenv()
|
|
|
|
# Set up logging
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
async def simple_search_test():
|
|
"""Simple test: embed text and search without filters"""
|
|
|
|
try:
|
|
# Import services
|
|
from src.services.vector_db import VectorDatabaseService
|
|
from src.services.embedding_service import EmbeddingService
|
|
|
|
# Initialize services
|
|
logger.info("Initializing services...")
|
|
vector_db = VectorDatabaseService()
|
|
embedding_service = EmbeddingService()
|
|
|
|
# Test 1: Generate text embedding
|
|
logger.info("=== Generating Text Embedding ===")
|
|
search_query = "blank"
|
|
text_embedding = await embedding_service.generate_text_embedding(search_query)
|
|
|
|
if text_embedding:
|
|
logger.info(f"✓ Generated embedding for '{search_query}' - length: {len(text_embedding)}")
|
|
else:
|
|
logger.error("✗ Failed to generate text embedding")
|
|
return False
|
|
|
|
# Test 2: Search without any filters
|
|
logger.info("=== Searching Qdrant (No Filters) ===")
|
|
|
|
# Try different thresholds to see what we get
|
|
thresholds = [0.1, 0.3, 0.5, 0.65, 0.8]
|
|
|
|
for threshold in thresholds:
|
|
logger.info(f"\n--- Threshold: {threshold} ---")
|
|
|
|
search_results = vector_db.search_similar_images(
|
|
query_vector=text_embedding,
|
|
limit=10,
|
|
score_threshold=threshold
|
|
# No filter_conditions = search everything
|
|
)
|
|
|
|
logger.info(f"Found {len(search_results)} results")
|
|
|
|
# Show top 3 results
|
|
for i, result in enumerate(search_results[:3]):
|
|
logger.info(f" {i+1}. Score: {result['score']:.4f} | ID: {result['image_id']} | File: {result['metadata'].get('filename', 'N/A')}")
|
|
|
|
# Test 3: Very low threshold to see all data
|
|
logger.info("\n=== All Data (Threshold 0.0) ===")
|
|
all_results = vector_db.search_similar_images(
|
|
query_vector=text_embedding,
|
|
limit=50,
|
|
score_threshold=0.0 # Get everything
|
|
)
|
|
|
|
logger.info(f"Total vectors in collection: {len(all_results)}")
|
|
|
|
# Show some stats
|
|
if all_results:
|
|
scores = [r['score'] for r in all_results]
|
|
logger.info(f"Score range: {min(scores):.4f} to {max(scores):.4f}")
|
|
logger.info(f"Average score: {sum(scores)/len(scores):.4f}")
|
|
|
|
# Show top 5 and bottom 5
|
|
logger.info("\nTop 5 results:")
|
|
for i, result in enumerate(all_results[:5]):
|
|
logger.info(f" {i+1}. Score: {result['score']:.4f} | ID: {result['image_id']}")
|
|
|
|
if len(all_results) > 5:
|
|
logger.info("\nBottom 5 results:")
|
|
for i, result in enumerate(all_results[-5:]):
|
|
logger.info(f" {len(all_results)-4+i}. Score: {result['score']:.4f} | ID: {result['image_id']}")
|
|
|
|
logger.info("\n✓ Simple search test completed!")
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"✗ Test failed: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return False
|
|
|
|
if __name__ == "__main__":
|
|
success = asyncio.run(simple_search_test())
|
|
sys.exit(0 if success else 1) |