#!/usr/bin/env python3 """ Simple test script to embed text and search Qdrant without filters """ import os import sys import asyncio import logging from dotenv import load_dotenv # Load environment variables load_dotenv() # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) async def simple_search_test(): """Simple test: embed text and search without filters""" try: # Import services from src.services.vector_db import VectorDatabaseService from src.services.embedding_service import EmbeddingService # Initialize services logger.info("Initializing services...") vector_db = VectorDatabaseService() embedding_service = EmbeddingService() # Test 1: Generate text embedding logger.info("=== Generating Text Embedding ===") search_query = "rectangle" text_embedding = await embedding_service.generate_text_embedding(search_query) if text_embedding: logger.info(f"āœ“ Generated embedding for '{search_query}' - length: {len(text_embedding)}") else: logger.error("āœ— Failed to generate text embedding") return False # Test 2: Search without any filters logger.info("=== Searching Qdrant (No Filters) ===") # Try different thresholds to see what we get thresholds = [0.1, 0.3, 0.5, 0.65, 0.8] for threshold in thresholds: logger.info(f"\n--- Threshold: {threshold} ---") search_results = vector_db.search_similar_images( query_vector=text_embedding, limit=10, score_threshold=threshold # No filter_conditions = search everything ) logger.info(f"Found {len(search_results)} results") # Show top 3 results for i, result in enumerate(search_results[:3]): logger.info(f" {i+1}. Score: {result['score']:.4f} | ID: {result['image_id']} | File: {result['metadata'].get('filename', 'N/A')}") # Test 3: Very low threshold to see all data logger.info("\n=== All Data (Threshold 0.0) ===") all_results = vector_db.search_similar_images( query_vector=text_embedding, limit=50, score_threshold=0.0 # Get everything ) logger.info(f"Total vectors in collection: {len(all_results)}") # Test 4: With team filtering (like the API does) logger.info("\n=== Testing Team Filtering ===") test_team_id = "68330a29472a0704d2f77063" # From server logs filtered_results = vector_db.search_similar_images( query_vector=text_embedding, limit=50, score_threshold=0.0, filter_conditions={"team_id": test_team_id} ) logger.info(f"Results with team filter ({test_team_id}): {len(filtered_results)}") # Show metadata for all results to see team_ids logger.info("\n=== Checking Team IDs in Vector DB ===") for i, result in enumerate(all_results): metadata = result.get('metadata', {}) team_id = metadata.get('team_id', 'N/A') logger.info(f" {i+1}. Image ID: {result['image_id']} | Team ID: {team_id}") # Show some stats if all_results: scores = [r['score'] for r in all_results] logger.info(f"Score range: {min(scores):.4f} to {max(scores):.4f}") logger.info(f"Average score: {sum(scores)/len(scores):.4f}") # Show top 5 and bottom 5 logger.info("\nTop 5 results:") for i, result in enumerate(all_results[:5]): logger.info(f" {i+1}. Score: {result['score']:.4f} | ID: {result['image_id']}") if len(all_results) > 5: logger.info("\nBottom 5 results:") for i, result in enumerate(all_results[-5:]): logger.info(f" {len(all_results)-4+i}. Score: {result['score']:.4f} | ID: {result['image_id']}") logger.info("\nāœ“ Simple search test completed!") return True except Exception as e: logger.error(f"āœ— Test failed: {e}") import traceback traceback.print_exc() return False if __name__ == "__main__": success = asyncio.run(simple_search_test()) sys.exit(0 if success else 1)