#!/usr/bin/env python3 """ Simple test script to embed text and search Qdrant without filters """ import os import sys import asyncio import logging from dotenv import load_dotenv # Load environment variables load_dotenv() # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) async def simple_search_test(): """Simple test: embed text and search without filters""" try: # Import services from src.services.vector_db import VectorDatabaseService from src.services.embedding_service import EmbeddingService # Initialize services logger.info("Initializing services...") vector_db = VectorDatabaseService() embedding_service = EmbeddingService() # Test 1: Generate text embedding logger.info("=== Generating Text Embedding ===") search_query = "blank" text_embedding = await embedding_service.generate_text_embedding(search_query) if text_embedding: logger.info(f"āœ“ Generated embedding for '{search_query}' - length: {len(text_embedding)}") else: logger.error("āœ— Failed to generate text embedding") return False # Test 2: Search without any filters logger.info("=== Searching Qdrant (No Filters) ===") # Try different thresholds to see what we get thresholds = [0.1, 0.3, 0.5, 0.65, 0.8] for threshold in thresholds: logger.info(f"\n--- Threshold: {threshold} ---") search_results = vector_db.search_similar_images( query_vector=text_embedding, limit=10, score_threshold=threshold # No filter_conditions = search everything ) logger.info(f"Found {len(search_results)} results") # Show top 3 results for i, result in enumerate(search_results[:3]): logger.info(f" {i+1}. Score: {result['score']:.4f} | ID: {result['image_id']} | File: {result['metadata'].get('filename', 'N/A')}") # Test 3: Very low threshold to see all data logger.info("\n=== All Data (Threshold 0.0) ===") all_results = vector_db.search_similar_images( query_vector=text_embedding, limit=50, score_threshold=0.0 # Get everything ) logger.info(f"Total vectors in collection: {len(all_results)}") # Show some stats if all_results: scores = [r['score'] for r in all_results] logger.info(f"Score range: {min(scores):.4f} to {max(scores):.4f}") logger.info(f"Average score: {sum(scores)/len(scores):.4f}") # Show top 5 and bottom 5 logger.info("\nTop 5 results:") for i, result in enumerate(all_results[:5]): logger.info(f" {i+1}. Score: {result['score']:.4f} | ID: {result['image_id']}") if len(all_results) > 5: logger.info("\nBottom 5 results:") for i, result in enumerate(all_results[-5:]): logger.info(f" {len(all_results)-4+i}. Score: {result['score']:.4f} | ID: {result['image_id']}") logger.info("\nāœ“ Simple search test completed!") return True except Exception as e: logger.error(f"āœ— Test failed: {e}") import traceback traceback.print_exc() return False if __name__ == "__main__": success = asyncio.run(simple_search_test()) sys.exit(0 if success else 1)