fix image upload

This commit is contained in:
johnpccd 2025-05-25 17:01:57 +02:00
parent 46fd8e6e5e
commit 0b5b383789
12 changed files with 137 additions and 321 deletions

View File

@ -407,9 +407,9 @@ The API provides the following main endpoints with their authentication and pagi
- **Query Parameters:**
- `q` (required) - Search query
- `limit` (default: 10, min: 1, max: 50) - Number of results
- `threshold` (default: 0.7, min: 0.0, max: 1.0) - Similarity threshold
- `similarity_threshold` (default: 0.7, min: 0.0, max: 1.0) - Similarity threshold
- `collection_id` (optional) - Filter by collection
- **Response includes:** `results`, `total`, `limit`, `threshold`, `query`
- **Response includes:** `results`, `total`, `limit`, `similarity_threshold`, `query`
- `POST /api/v1/search` - Advanced search with same pagination
### 🔑 **Authentication Model**

View File

@ -180,7 +180,7 @@ class ApiClient {
async searchImages(query, similarityThreshold, maxResults = 20) {
const searchData = {
query,
threshold: similarityThreshold,
similarity_threshold: similarityThreshold,
limit: maxResults
};

View File

@ -224,6 +224,29 @@ def generate_image_embeddings(image_data: bytes) -> Optional[np.ndarray]:
Numpy array of embeddings or None if failed
"""
try:
# Basic validation of image data
if not image_data or len(image_data) == 0:
logger.error("Empty image data provided")
return None
# Check image size (limit to 10MB)
if len(image_data) > 10 * 1024 * 1024:
logger.warning(f"Large image detected: {len(image_data)} bytes")
# Validate image format using PIL
try:
pil_image = Image.open(io.BytesIO(image_data))
logger.info(f"Image format: {pil_image.format}, size: {pil_image.size}, mode: {pil_image.mode}")
# Check for blank/empty images
if pil_image.size[0] == 0 or pil_image.size[1] == 0:
logger.error("Image has zero dimensions")
return None
except Exception as e:
logger.error(f"Invalid image format: {e}")
return None
# Create Vertex AI image object
vertex_image = VertexImage(image_data)
@ -238,15 +261,23 @@ def generate_image_embeddings(image_data: bytes) -> Optional[np.ndarray]:
# Get the image embedding vector
embedding_vector = embeddings.image_embedding
# Convert to numpy array
# Convert to numpy array - DO NOT normalize Vertex AI embeddings
# This must match the behavior in the main embedding service
embeddings_array = np.array(embedding_vector, dtype=np.float32)
# Normalize the feature vector
norm = np.linalg.norm(embeddings_array)
if norm > 0:
embeddings_array = embeddings_array / norm
# Validate embedding quality
if np.any(np.isnan(embeddings_array)) or np.any(np.isinf(embeddings_array)):
logger.error("Generated embeddings contain NaN or infinite values")
return None
# Check if embedding is mostly zeros (might indicate processing issue)
zero_ratio = np.sum(embeddings_array == 0.0) / len(embeddings_array)
if zero_ratio > 0.9:
logger.warning(f"Embedding is {zero_ratio*100:.1f}% zeros - might indicate processing issue")
logger.info(f"Generated embeddings with shape: {embeddings_array.shape}")
logger.info(f"Embedding stats - min: {embeddings_array.min():.6f}, max: {embeddings_array.max():.6f}, norm: {np.linalg.norm(embeddings_array):.6f}")
return embeddings_array
except Exception as e:

View File

@ -1,7 +1,7 @@
{
"version": 4,
"terraform_version": "1.10.1",
"serial": 445,
"serial": 451,
"lineage": "a183cd95-f987-8698-c6dd-84e933c394a5",
"outputs": {
"cloud_function_name": {
@ -98,16 +98,16 @@
"attributes": {
"exclude_symlink_directories": null,
"excludes": null,
"id": "0cfb36e4e396f12e3ad2944c44b083bff2224ad5",
"output_base64sha256": "uMoV4IM2IuGcRtqeI7wbu3OsTmvDx1ohDDxkEE5NY9U=",
"output_base64sha512": "BQB+g3lC0+y5vOx6KHh4AWCeHk3D2nmdgE8JrFaiPlCWV6KsrMdANGyKeZ/aFmvGjbFw7MGQD4s0u/tn+viVAA==",
"id": "fe2be242d7b603458e099720b9d99e319952abb0",
"output_base64sha256": "+3DKsNV8OT4cv8S2rftR1Pe8UUHzJNn2hhWkS8FcN9I=",
"output_base64sha512": "2+zPRbyYxFDTa0LJz6TmZOajp10eJNQla9tK0S8d++4T6vXgt7nKSy81GO+V4ttWYopmipOvRYJe5YZ4WvmecA==",
"output_file_mode": null,
"output_md5": "b532cf3ff81d62dd7dec013e486931aa",
"output_md5": "8e8a10dd705e45fe8645782a20d999a3",
"output_path": "./function-source.zip",
"output_sha": "0cfb36e4e396f12e3ad2944c44b083bff2224ad5",
"output_sha256": "b8ca15e0833622e19c46da9e23bc1bbb73ac4e6bc3c75a210c3c64104e4d63d5",
"output_sha512": "05007e837942d3ecb9bcec7a28787801609e1e4dc3da799d804f09ac56a23e509657a2acacc740346c8a799fda166bc68db170ecc1900f8b34bbfb67faf89500",
"output_size": 69764346,
"output_sha": "fe2be242d7b603458e099720b9d99e319952abb0",
"output_sha256": "fb70cab0d57c393e1cbfc4b6adfb51d4f7bc5141f324d9f68615a44bc15c37d2",
"output_sha512": "dbeccf45bc98c450d36b42c9cfa4e664e6a3a75d1e24d4256bdb4ad12f1dfbee13eaf5e0b7b9ca4b2f3518ef95e2db56628a668a93af45825ee586785af99e70",
"output_size": 69764811,
"source": [],
"source_content": null,
"source_content_filename": null,
@ -172,7 +172,7 @@
"effective_annotations": {
"run.googleapis.com/ingress": "all",
"run.googleapis.com/ingress-status": "all",
"run.googleapis.com/operation-id": "a9aeb6de-fdd6-43b2-93f8-8b7f72afab4c",
"run.googleapis.com/operation-id": "2b1374b6-6b03-4d99-a76c-b8e751f46df0",
"run.googleapis.com/urls": "[\"https://sereact-761163285547.us-central1.run.app\",\"https://sereact-p64zpdtkta-uc.a.run.app\"]",
"serving.knative.dev/creator": "johnpccd3@gmail.com",
"serving.knative.dev/lastModifier": "johnpccd3@gmail.com"
@ -184,12 +184,12 @@
"generation": 1,
"labels": null,
"namespace": "gen-lang-client-0424120530",
"resource_version": "AAY19MELEOc",
"resource_version": "AAY191YVk4g",
"self_link": "/apis/serving.knative.dev/v1/namespaces/761163285547/services/sereact",
"terraform_labels": {
"goog-terraform-provisioned": "true"
},
"uid": "8c8be11c-c607-4caa-a65e-c552ec445882"
"uid": "d8b0e29e-2db1-4f23-8b6c-d7238a9a5f89"
}
],
"name": "sereact",
@ -216,14 +216,14 @@
"type": "RoutesReady"
}
],
"latest_created_revision_name": "sereact-00001-z4g",
"latest_ready_revision_name": "sereact-00001-z4g",
"latest_created_revision_name": "sereact-00001-htg",
"latest_ready_revision_name": "sereact-00001-htg",
"observed_generation": 1,
"traffic": [
{
"latest_revision": true,
"percent": 100,
"revision_name": "sereact-00001-z4g",
"revision_name": "sereact-00001-htg",
"tag": "",
"url": ""
}
@ -440,7 +440,7 @@
"schema_version": 0,
"attributes": {
"condition": [],
"etag": "BwY19MG70Fs=",
"etag": "BwY191avNJc=",
"id": "v1/projects/gen-lang-client-0424120530/locations/us-central1/services/sereact/roles/run.invoker/allUsers",
"location": "us-central1",
"member": "allUsers",
@ -474,7 +474,7 @@
"automatic_update_policy": [
{}
],
"build": "projects/761163285547/locations/us-central1/builds/ae88c918-6bb3-4aef-a56f-270f48f73049",
"build": "projects/761163285547/locations/us-central1/builds/3804d611-8e49-491b-b994-4ed7fc528e92",
"docker_repository": "projects/gen-lang-client-0424120530/locations/us-central1/repositories/gcf-artifacts",
"entry_point": "process_image_embedding",
"environment_variables": {},
@ -487,8 +487,8 @@
"storage_source": [
{
"bucket": "gen-lang-client-0424120530-cloud-function-source",
"generation": 1748175166697242,
"object": "function-source-b532cf3ff81d62dd7dec013e486931aa.zip"
"generation": 1748186294324568,
"object": "function-source-8e8a10dd705e45fe8645782a20d999a3.zip"
}
]
}
@ -508,7 +508,7 @@
"pubsub_topic": "projects/gen-lang-client-0424120530/topics/image-processing-topic",
"retry_policy": "RETRY_POLICY_RETRY",
"service_account_email": "761163285547-compute@developer.gserviceaccount.com",
"trigger": "projects/gen-lang-client-0424120530/locations/us-central1/triggers/process-image-embedding-645734",
"trigger": "projects/gen-lang-client-0424120530/locations/us-central1/triggers/process-image-embedding-873142",
"trigger_region": "us-central1"
}
],
@ -559,7 +559,7 @@
"goog-terraform-provisioned": "true"
},
"timeouts": null,
"update_time": "2025-05-25T12:15:22.215124150Z",
"update_time": "2025-05-25T15:20:42.834818284Z",
"url": "https://us-central1-gen-lang-client-0424120530.cloudfunctions.net/process-image-embedding"
},
"sensitive_attributes": [
@ -875,8 +875,8 @@
"database_edition": "STANDARD",
"delete_protection_state": "DELETE_PROTECTION_DISABLED",
"deletion_policy": "ABANDON",
"earliest_version_time": "2025-05-25T11:12:43.126081Z",
"etag": "IPjb6fzLvo0DMKrW4vCEvY0D",
"earliest_version_time": "2025-05-25T14:17:09.893967Z",
"etag": "IKfR75n1vo0DMKrW4vCEvY0D",
"id": "projects/gen-lang-client-0424120530/databases/sereact-imagedb",
"key_prefix": "",
"location_id": "us-central1",
@ -1514,21 +1514,21 @@
"content_encoding": "",
"content_language": "",
"content_type": "application/zip",
"crc32c": "EgiVnQ==",
"crc32c": "cMTkTw==",
"customer_encryption": [],
"detect_md5hash": "tTLPP/gdYt197AE+SGkxqg==",
"detect_md5hash": "jooQ3XBeRf6GRXgqINmZow==",
"event_based_hold": false,
"generation": 1748174860755303,
"id": "gen-lang-client-0424120530-cloud-function-source-function-source-b532cf3ff81d62dd7dec013e486931aa.zip",
"generation": 1748186292978895,
"id": "gen-lang-client-0424120530-cloud-function-source-function-source-8e8a10dd705e45fe8645782a20d999a3.zip",
"kms_key_name": "",
"md5hash": "tTLPP/gdYt197AE+SGkxqg==",
"md5hexhash": "b532cf3ff81d62dd7dec013e486931aa",
"media_link": "https://storage.googleapis.com/download/storage/v1/b/gen-lang-client-0424120530-cloud-function-source/o/function-source-b532cf3ff81d62dd7dec013e486931aa.zip?generation=1748174860755303\u0026alt=media",
"metadata": {},
"name": "function-source-b532cf3ff81d62dd7dec013e486931aa.zip",
"output_name": "function-source-b532cf3ff81d62dd7dec013e486931aa.zip",
"md5hash": "jooQ3XBeRf6GRXgqINmZow==",
"md5hexhash": "8e8a10dd705e45fe8645782a20d999a3",
"media_link": "https://storage.googleapis.com/download/storage/v1/b/gen-lang-client-0424120530-cloud-function-source/o/function-source-8e8a10dd705e45fe8645782a20d999a3.zip?generation=1748186292978895\u0026alt=media",
"metadata": null,
"name": "function-source-8e8a10dd705e45fe8645782a20d999a3.zip",
"output_name": "function-source-8e8a10dd705e45fe8645782a20d999a3.zip",
"retention": [],
"self_link": "https://www.googleapis.com/storage/v1/b/gen-lang-client-0424120530-cloud-function-source/o/function-source-b532cf3ff81d62dd7dec013e486931aa.zip",
"self_link": "https://www.googleapis.com/storage/v1/b/gen-lang-client-0424120530-cloud-function-source/o/function-source-8e8a10dd705e45fe8645782a20d999a3.zip",
"source": "./function-source.zip",
"storage_class": "STANDARD",
"temporary_hold": false,

View File

@ -1,7 +1,7 @@
{
"version": 4,
"terraform_version": "1.10.1",
"serial": 441,
"serial": 445,
"lineage": "a183cd95-f987-8698-c6dd-84e933c394a5",
"outputs": {
"cloud_function_name": {
@ -172,7 +172,7 @@
"effective_annotations": {
"run.googleapis.com/ingress": "all",
"run.googleapis.com/ingress-status": "all",
"run.googleapis.com/operation-id": "0f195b05-99ac-4d28-b5fe-2d3dea289124",
"run.googleapis.com/operation-id": "a9aeb6de-fdd6-43b2-93f8-8b7f72afab4c",
"run.googleapis.com/urls": "[\"https://sereact-761163285547.us-central1.run.app\",\"https://sereact-p64zpdtkta-uc.a.run.app\"]",
"serving.knative.dev/creator": "johnpccd3@gmail.com",
"serving.knative.dev/lastModifier": "johnpccd3@gmail.com"
@ -182,14 +182,14 @@
"goog-terraform-provisioned": "true"
},
"generation": 1,
"labels": {},
"labels": null,
"namespace": "gen-lang-client-0424120530",
"resource_version": "AAY189oNgAQ",
"resource_version": "AAY19MELEOc",
"self_link": "/apis/serving.knative.dev/v1/namespaces/761163285547/services/sereact",
"terraform_labels": {
"goog-terraform-provisioned": "true"
},
"uid": "20e61eb3-6217-40e8-8ae5-45111d31bbda"
"uid": "8c8be11c-c607-4caa-a65e-c552ec445882"
}
],
"name": "sereact",
@ -216,14 +216,14 @@
"type": "RoutesReady"
}
],
"latest_created_revision_name": "sereact-00001-2lz",
"latest_ready_revision_name": "sereact-00001-2lz",
"latest_created_revision_name": "sereact-00001-z4g",
"latest_ready_revision_name": "sereact-00001-z4g",
"observed_generation": 1,
"traffic": [
{
"latest_revision": true,
"percent": 100,
"revision_name": "sereact-00001-2lz",
"revision_name": "sereact-00001-z4g",
"tag": "",
"url": ""
}
@ -256,8 +256,8 @@
"container_concurrency": 80,
"containers": [
{
"args": [],
"command": [],
"args": null,
"command": null,
"env": [
{
"name": "API_KEY_SECRET",
@ -337,7 +337,7 @@
"cpu": "1",
"memory": "1Gi"
},
"requests": {}
"requests": null
}
],
"startup_probe": [
@ -359,7 +359,7 @@
"working_dir": ""
}
],
"node_selector": {},
"node_selector": null,
"service_account_name": "761163285547-compute@developer.gserviceaccount.com",
"serving_state": "",
"timeout_seconds": 300,
@ -440,7 +440,7 @@
"schema_version": 0,
"attributes": {
"condition": [],
"etag": "BwY189qg+AA=",
"etag": "BwY19MG70Fs=",
"id": "v1/projects/gen-lang-client-0424120530/locations/us-central1/services/sereact/roles/run.invoker/allUsers",
"location": "us-central1",
"member": "allUsers",
@ -474,7 +474,7 @@
"automatic_update_policy": [
{}
],
"build": "projects/761163285547/locations/us-central1/builds/aab08c74-df86-4cd7-9176-4ff267cab3e6",
"build": "projects/761163285547/locations/us-central1/builds/ae88c918-6bb3-4aef-a56f-270f48f73049",
"docker_repository": "projects/gen-lang-client-0424120530/locations/us-central1/repositories/gcf-artifacts",
"entry_point": "process_image_embedding",
"environment_variables": {},
@ -487,8 +487,8 @@
"storage_source": [
{
"bucket": "gen-lang-client-0424120530-cloud-function-source",
"generation": 1748171376287077,
"object": "function-source-58a2b7fe53bb2c8c921405cc965d635c.zip"
"generation": 1748175166697242,
"object": "function-source-b532cf3ff81d62dd7dec013e486931aa.zip"
}
]
}
@ -508,13 +508,13 @@
"pubsub_topic": "projects/gen-lang-client-0424120530/topics/image-processing-topic",
"retry_policy": "RETRY_POLICY_RETRY",
"service_account_email": "761163285547-compute@developer.gserviceaccount.com",
"trigger": "projects/gen-lang-client-0424120530/locations/us-central1/triggers/process-image-embedding-013009",
"trigger": "projects/gen-lang-client-0424120530/locations/us-central1/triggers/process-image-embedding-645734",
"trigger_region": "us-central1"
}
],
"id": "projects/gen-lang-client-0424120530/locations/us-central1/functions/process-image-embedding",
"kms_key_name": "",
"labels": {},
"labels": null,
"location": "us-central1",
"name": "process-image-embedding",
"project": "gen-lang-client-0424120530",
@ -559,7 +559,7 @@
"goog-terraform-provisioned": "true"
},
"timeouts": null,
"update_time": "2025-05-25T11:13:04.212724797Z",
"update_time": "2025-05-25T12:15:22.215124150Z",
"url": "https://us-central1-gen-lang-client-0424120530.cloudfunctions.net/process-image-embedding"
},
"sensitive_attributes": [
@ -588,6 +588,7 @@
}
]
],
"private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjozNjAwMDAwMDAwMDAwLCJkZWxldGUiOjM2MDAwMDAwMDAwMDAsInVwZGF0ZSI6MzYwMDAwMDAwMDAwMH19",
"dependencies": [
"data.archive_file.function_source",
"data.google_project.current",
@ -602,13 +603,6 @@
}
]
},
{
"mode": "managed",
"type": "google_compute_address",
"name": "vector_db_static_ip",
"provider": "provider[\"registry.terraform.io/hashicorp/google\"]",
"instances": []
},
{
"mode": "managed",
"type": "google_compute_firewall",
@ -818,7 +812,18 @@
[
{
"type": "get_attr",
"value": "metadata_startup_script"
"value": "boot_disk"
},
{
"type": "index",
"value": {
"value": 0,
"type": "number"
}
},
{
"type": "get_attr",
"value": "disk_encryption_key_rsa"
}
],
[
@ -841,18 +846,7 @@
[
{
"type": "get_attr",
"value": "boot_disk"
},
{
"type": "index",
"value": {
"value": 0,
"type": "number"
}
},
{
"type": "get_attr",
"value": "disk_encryption_key_rsa"
"value": "metadata_startup_script"
}
]
],
@ -881,8 +875,8 @@
"database_edition": "STANDARD",
"delete_protection_state": "DELETE_PROTECTION_DISABLED",
"deletion_policy": "ABANDON",
"earliest_version_time": "2025-05-25T11:07:20.673706Z",
"etag": "IIrliOPKvo0DMKrW4vCEvY0D",
"earliest_version_time": "2025-05-25T11:12:43.126081Z",
"etag": "IPjb6fzLvo0DMKrW4vCEvY0D",
"id": "projects/gen-lang-client-0424120530/databases/sereact-imagedb",
"key_prefix": "",
"location_id": "us-central1",
@ -1530,7 +1524,7 @@
"md5hash": "tTLPP/gdYt197AE+SGkxqg==",
"md5hexhash": "b532cf3ff81d62dd7dec013e486931aa",
"media_link": "https://storage.googleapis.com/download/storage/v1/b/gen-lang-client-0424120530-cloud-function-source/o/function-source-b532cf3ff81d62dd7dec013e486931aa.zip?generation=1748174860755303\u0026alt=media",
"metadata": null,
"metadata": {},
"name": "function-source-b532cf3ff81d62dd7dec013e486931aa.zip",
"output_name": "function-source-b532cf3ff81d62dd7dec013e486931aa.zip",
"retention": [],

View File

@ -1,121 +0,0 @@
#!/usr/bin/env python3
"""
Simple test script to embed text and search Qdrant without filters
"""
import os
import sys
import asyncio
import logging
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
async def simple_search_test():
"""Simple test: embed text and search without filters"""
try:
# Import services
from src.services.vector_db import VectorDatabaseService
from src.services.embedding_service import EmbeddingService
# Initialize services
logger.info("Initializing services...")
vector_db = VectorDatabaseService()
embedding_service = EmbeddingService()
# Test 1: Generate text embedding
logger.info("=== Generating Text Embedding ===")
search_query = "rectangle"
text_embedding = await embedding_service.generate_text_embedding(search_query)
if text_embedding:
logger.info(f"✓ Generated embedding for '{search_query}' - length: {len(text_embedding)}")
else:
logger.error("✗ Failed to generate text embedding")
return False
# Test 2: Search without any filters
logger.info("=== Searching Qdrant (No Filters) ===")
# Try different thresholds to see what we get
thresholds = [0.1, 0.3, 0.5, 0.65, 0.8]
for threshold in thresholds:
logger.info(f"\n--- Threshold: {threshold} ---")
search_results = vector_db.search_similar_images(
query_vector=text_embedding,
limit=10,
score_threshold=threshold
# No filter_conditions = search everything
)
logger.info(f"Found {len(search_results)} results")
# Show top 3 results
for i, result in enumerate(search_results[:3]):
logger.info(f" {i+1}. Score: {result['score']:.4f} | ID: {result['image_id']} | File: {result['metadata'].get('filename', 'N/A')}")
# Test 3: Very low threshold to see all data
logger.info("\n=== All Data (Threshold 0.0) ===")
all_results = vector_db.search_similar_images(
query_vector=text_embedding,
limit=50,
score_threshold=0.0 # Get everything
)
logger.info(f"Total vectors in collection: {len(all_results)}")
# Test 4: With team filtering (like the API does)
logger.info("\n=== Testing Team Filtering ===")
test_team_id = "68330a29472a0704d2f77063" # From server logs
filtered_results = vector_db.search_similar_images(
query_vector=text_embedding,
limit=50,
score_threshold=0.0,
filter_conditions={"team_id": test_team_id}
)
logger.info(f"Results with team filter ({test_team_id}): {len(filtered_results)}")
# Show metadata for all results to see team_ids
logger.info("\n=== Checking Team IDs in Vector DB ===")
for i, result in enumerate(all_results):
metadata = result.get('metadata', {})
team_id = metadata.get('team_id', 'N/A')
logger.info(f" {i+1}. Image ID: {result['image_id']} | Team ID: {team_id}")
# Show some stats
if all_results:
scores = [r['score'] for r in all_results]
logger.info(f"Score range: {min(scores):.4f} to {max(scores):.4f}")
logger.info(f"Average score: {sum(scores)/len(scores):.4f}")
# Show top 5 and bottom 5
logger.info("\nTop 5 results:")
for i, result in enumerate(all_results[:5]):
logger.info(f" {i+1}. Score: {result['score']:.4f} | ID: {result['image_id']}")
if len(all_results) > 5:
logger.info("\nBottom 5 results:")
for i, result in enumerate(all_results[-5:]):
logger.info(f" {len(all_results)-4+i}. Score: {result['score']:.4f} | ID: {result['image_id']}")
logger.info("\n✓ Simple search test completed!")
return True
except Exception as e:
logger.error(f"✗ Test failed: {e}")
import traceback
traceback.print_exc()
return False
if __name__ == "__main__":
success = asyncio.run(simple_search_test())
sys.exit(0 if success else 1)

View File

@ -76,7 +76,7 @@ async def upload_image(
file_size=file_size,
content_type=content_type,
storage_path=storage_path,
public_url="", # Will be set after we have the image ID
public_url=None, # Will be set after we have the image ID
team_id=current_user.team_id,
uploader_id=current_user.id,
description=description,

View File

@ -33,7 +33,7 @@ async def search_images(
request: Request,
q: str = Query(..., description="Search query"),
limit: int = Query(10, ge=1, le=50, description="Number of results to return"),
threshold: float = Query(0.65, ge=0.0, le=1.0, description="Similarity threshold"),
similarity_threshold: float = Query(0.65, ge=0.0, le=1.0, description="Similarity threshold"),
collection_id: Optional[str] = Query(None, description="Filter by collection ID"),
current_user: UserModel = Depends(get_current_user)
):
@ -46,7 +46,7 @@ async def search_images(
"method": request.method,
"query": q,
"limit": limit,
"threshold": threshold
"similarity_threshold": similarity_threshold
},
user_id=str(current_user.id),
team_id=str(current_user.team_id)
@ -62,7 +62,7 @@ async def search_images(
search_results = get_vector_db_service().search_similar_images(
query_vector=query_embedding,
limit=limit,
score_threshold=threshold,
similarity_threshold=similarity_threshold,
filter_conditions={"team_id": str(current_user.team_id)} if current_user.team_id else None
)
@ -72,12 +72,12 @@ async def search_images(
results=[],
total=0,
limit=limit,
threshold=threshold
similarity_threshold=similarity_threshold
)
# Get image IDs and scores from search results
image_ids = [result['image_id'] for result in search_results if result['image_id']]
scores = {result['image_id']: result['score'] for result in search_results if result['image_id']}
scores = {result['image_id']: result['similarity_score'] for result in search_results if result['image_id']}
# Get image metadata from database
images = await image_repository.get_by_ids(image_ids)
@ -123,7 +123,7 @@ async def search_images(
results=results,
total=len(results),
limit=limit,
threshold=threshold
similarity_threshold=similarity_threshold
)
except Exception as e:
@ -160,11 +160,11 @@ async def search_images_advanced(
logger.info(f"Generated embedding with length: {len(query_embedding)}")
# Search in vector database
logger.info(f"Searching vector database with threshold: {search_request.threshold}")
logger.info(f"Searching vector database with similarity_threshold: {search_request.similarity_threshold}")
search_results = get_vector_db_service().search_similar_images(
query_vector=query_embedding,
limit=search_request.limit,
score_threshold=search_request.threshold,
similarity_threshold=search_request.similarity_threshold,
filter_conditions={"team_id": str(current_user.team_id)} if current_user.team_id else None
)
@ -177,12 +177,12 @@ async def search_images_advanced(
results=[],
total=0,
limit=search_request.limit,
threshold=search_request.threshold
similarity_threshold=search_request.similarity_threshold
)
# Get image IDs and scores from search results
image_ids = [result['image_id'] for result in search_results if result['image_id']]
scores = {result['image_id']: result['score'] for result in search_results if result['image_id']}
scores = {result['image_id']: result['similarity_score'] for result in search_results if result['image_id']}
logger.info(f"Extracted {len(image_ids)} image IDs: {image_ids}")
@ -247,7 +247,7 @@ async def search_images_advanced(
results=results,
total=len(results),
limit=search_request.limit,
threshold=search_request.threshold
similarity_threshold=search_request.similarity_threshold
)
except Exception as e:

View File

@ -1,6 +1,6 @@
from typing import List, Optional, ClassVar
from datetime import datetime
from pydantic import BaseModel, Field
from pydantic import BaseModel, Field, field_validator
from src.schemas.image import ImageResponse
@ -8,18 +8,25 @@ class SearchRequest(BaseModel):
"""Schema for advanced search request"""
query: str = Field(..., description="Search query", min_length=1)
limit: int = Field(10, description="Maximum number of results", ge=1, le=50)
threshold: float = Field(0.7, description="Similarity threshold", ge=0.0, le=1.0)
similarity_threshold: Optional[float] = Field(None, description="Similarity threshold", ge=0.0, le=1.0)
collection_id: Optional[str] = Field(None, description="Filter by collection ID")
date_from: Optional[datetime] = Field(None, description="Filter images uploaded after this date")
date_to: Optional[datetime] = Field(None, description="Filter images uploaded before this date")
uploader_id: Optional[str] = Field(None, description="Filter by uploader ID")
@field_validator('similarity_threshold')
@classmethod
def set_default_similarity_threshold(cls, v):
if v is None:
return 0.7
return v
model_config: ClassVar[dict] = {
"json_schema_extra": {
"example": {
"query": "mountain sunset",
"limit": 10,
"threshold": 0.7,
"similarity_threshold": 0.7,
"collection_id": "507f1f77bcf86cd799439044",
"date_from": "2023-01-01T00:00:00",
"date_to": "2023-12-31T23:59:59",
@ -34,7 +41,7 @@ class SearchResponse(BaseModel):
results: List[ImageResponse]
total: int
limit: int
threshold: float
similarity_threshold: float
model_config: ClassVar[dict] = {
"json_schema_extra": {
@ -66,7 +73,7 @@ class SearchResponse(BaseModel):
],
"total": 1,
"limit": 10,
"threshold": 0.7
"similarity_threshold": 0.7
}
}
}

View File

@ -139,7 +139,7 @@ class VectorDatabaseService:
self,
query_vector: List[float],
limit: int = 10,
score_threshold: float = 0.65,
similarity_threshold: float = 0.65,
filter_conditions: Dict[str, Any] = None
) -> List[Dict[str, Any]]:
"""
@ -148,7 +148,7 @@ class VectorDatabaseService:
Args:
query_vector: Query vector to search for
limit: Maximum number of results to return
score_threshold: Minimum similarity score threshold
similarity_threshold: Minimum similarity score threshold
filter_conditions: Additional filter conditions
Returns:
@ -172,14 +172,14 @@ class VectorDatabaseService:
query_vector=query_vector,
query_filter=search_filter,
limit=limit,
score_threshold=score_threshold
score_threshold=similarity_threshold
)
results = []
for hit in search_result:
result = {
"point_id": hit.id,
"score": hit.score,
"similarity_score": hit.score,
"image_id": hit.payload.get("image_id"),
"metadata": hit.payload
}

View File

@ -1,47 +0,0 @@
#!/bin/bash
# Development startup script for Sereact API
# This script sets the environment variables and starts the application
# Auto-generated by deployment/scripts/setup_local_env.sh
# Function to handle cleanup on exit
cleanup() {
echo ""
echo "Shutting down server..."
if [ ! -z "$SERVER_PID" ]; then
kill $SERVER_PID 2>/dev/null
wait $SERVER_PID 2>/dev/null
fi
echo "Server stopped."
exit 0
}
# Set up signal handlers
trap cleanup SIGINT SIGTERM
# Activate virtual environment
source venv/Scripts/activate
# Set environment variables from deployed infrastructure
export QDRANT_HOST=34.171.134.17
export QDRANT_PORT=6333
export FIRESTORE_PROJECT_ID=gen-lang-client-0424120530
export GCS_BUCKET_NAME=sereact-images
export ENVIRONMENT=development
# Start the application
echo "Starting Sereact API with deployed infrastructure..."
echo "Qdrant endpoint: http://$QDRANT_HOST:$QDRANT_PORT"
echo "Firestore project: $FIRESTORE_PROJECT_ID"
echo "GCS bucket: $GCS_BUCKET_NAME"
echo "API will be available at: http://localhost:8000"
echo "API documentation: http://localhost:8000/docs"
echo "Press Ctrl+C to stop the server"
echo ""
# Start uvicorn in background and capture PID
uvicorn main:app --host 0.0.0.0 --port 8000 --reload &
SERVER_PID=$!
# Wait for the server process
wait $SERVER_PID

View File

@ -1,48 +0,0 @@
#!/usr/bin/env python3
"""
Test script to verify that similarity threshold is properly handled
"""
import json
from src.schemas.search import SearchRequest
def test_threshold_handling():
"""Test that threshold values are properly handled in the schema"""
# Test with threshold = 0
test_data_zero = {
"query": "test query",
"threshold": 0.0,
"limit": 10
}
request_zero = SearchRequest(**test_data_zero)
print(f"Threshold 0.0 test: {request_zero.threshold}")
assert request_zero.threshold == 0.0, f"Expected 0.0, got {request_zero.threshold}"
# Test with threshold = 0.5
test_data_half = {
"query": "test query",
"threshold": 0.5,
"limit": 10
}
request_half = SearchRequest(**test_data_half)
print(f"Threshold 0.5 test: {request_half.threshold}")
assert request_half.threshold == 0.5, f"Expected 0.5, got {request_half.threshold}"
# Test with threshold = 1.0
test_data_one = {
"query": "test query",
"threshold": 1.0,
"limit": 10
}
request_one = SearchRequest(**test_data_one)
print(f"Threshold 1.0 test: {request_one.threshold}")
assert request_one.threshold == 1.0, f"Expected 1.0, got {request_one.threshold}"
print("All threshold tests passed!")
if __name__ == "__main__":
test_threshold_handling()