fix image upload

This commit is contained in:
johnpccd 2025-05-25 17:01:57 +02:00
parent 46fd8e6e5e
commit 0b5b383789
12 changed files with 137 additions and 321 deletions

View File

@ -407,9 +407,9 @@ The API provides the following main endpoints with their authentication and pagi
- **Query Parameters:** - **Query Parameters:**
- `q` (required) - Search query - `q` (required) - Search query
- `limit` (default: 10, min: 1, max: 50) - Number of results - `limit` (default: 10, min: 1, max: 50) - Number of results
- `threshold` (default: 0.7, min: 0.0, max: 1.0) - Similarity threshold - `similarity_threshold` (default: 0.7, min: 0.0, max: 1.0) - Similarity threshold
- `collection_id` (optional) - Filter by collection - `collection_id` (optional) - Filter by collection
- **Response includes:** `results`, `total`, `limit`, `threshold`, `query` - **Response includes:** `results`, `total`, `limit`, `similarity_threshold`, `query`
- `POST /api/v1/search` - Advanced search with same pagination - `POST /api/v1/search` - Advanced search with same pagination
### 🔑 **Authentication Model** ### 🔑 **Authentication Model**

View File

@ -180,7 +180,7 @@ class ApiClient {
async searchImages(query, similarityThreshold, maxResults = 20) { async searchImages(query, similarityThreshold, maxResults = 20) {
const searchData = { const searchData = {
query, query,
threshold: similarityThreshold, similarity_threshold: similarityThreshold,
limit: maxResults limit: maxResults
}; };

View File

@ -224,6 +224,29 @@ def generate_image_embeddings(image_data: bytes) -> Optional[np.ndarray]:
Numpy array of embeddings or None if failed Numpy array of embeddings or None if failed
""" """
try: try:
# Basic validation of image data
if not image_data or len(image_data) == 0:
logger.error("Empty image data provided")
return None
# Check image size (limit to 10MB)
if len(image_data) > 10 * 1024 * 1024:
logger.warning(f"Large image detected: {len(image_data)} bytes")
# Validate image format using PIL
try:
pil_image = Image.open(io.BytesIO(image_data))
logger.info(f"Image format: {pil_image.format}, size: {pil_image.size}, mode: {pil_image.mode}")
# Check for blank/empty images
if pil_image.size[0] == 0 or pil_image.size[1] == 0:
logger.error("Image has zero dimensions")
return None
except Exception as e:
logger.error(f"Invalid image format: {e}")
return None
# Create Vertex AI image object # Create Vertex AI image object
vertex_image = VertexImage(image_data) vertex_image = VertexImage(image_data)
@ -238,15 +261,23 @@ def generate_image_embeddings(image_data: bytes) -> Optional[np.ndarray]:
# Get the image embedding vector # Get the image embedding vector
embedding_vector = embeddings.image_embedding embedding_vector = embeddings.image_embedding
# Convert to numpy array # Convert to numpy array - DO NOT normalize Vertex AI embeddings
# This must match the behavior in the main embedding service
embeddings_array = np.array(embedding_vector, dtype=np.float32) embeddings_array = np.array(embedding_vector, dtype=np.float32)
# Normalize the feature vector # Validate embedding quality
norm = np.linalg.norm(embeddings_array) if np.any(np.isnan(embeddings_array)) or np.any(np.isinf(embeddings_array)):
if norm > 0: logger.error("Generated embeddings contain NaN or infinite values")
embeddings_array = embeddings_array / norm return None
# Check if embedding is mostly zeros (might indicate processing issue)
zero_ratio = np.sum(embeddings_array == 0.0) / len(embeddings_array)
if zero_ratio > 0.9:
logger.warning(f"Embedding is {zero_ratio*100:.1f}% zeros - might indicate processing issue")
logger.info(f"Generated embeddings with shape: {embeddings_array.shape}") logger.info(f"Generated embeddings with shape: {embeddings_array.shape}")
logger.info(f"Embedding stats - min: {embeddings_array.min():.6f}, max: {embeddings_array.max():.6f}, norm: {np.linalg.norm(embeddings_array):.6f}")
return embeddings_array return embeddings_array
except Exception as e: except Exception as e:

View File

@ -1,7 +1,7 @@
{ {
"version": 4, "version": 4,
"terraform_version": "1.10.1", "terraform_version": "1.10.1",
"serial": 445, "serial": 451,
"lineage": "a183cd95-f987-8698-c6dd-84e933c394a5", "lineage": "a183cd95-f987-8698-c6dd-84e933c394a5",
"outputs": { "outputs": {
"cloud_function_name": { "cloud_function_name": {
@ -98,16 +98,16 @@
"attributes": { "attributes": {
"exclude_symlink_directories": null, "exclude_symlink_directories": null,
"excludes": null, "excludes": null,
"id": "0cfb36e4e396f12e3ad2944c44b083bff2224ad5", "id": "fe2be242d7b603458e099720b9d99e319952abb0",
"output_base64sha256": "uMoV4IM2IuGcRtqeI7wbu3OsTmvDx1ohDDxkEE5NY9U=", "output_base64sha256": "+3DKsNV8OT4cv8S2rftR1Pe8UUHzJNn2hhWkS8FcN9I=",
"output_base64sha512": "BQB+g3lC0+y5vOx6KHh4AWCeHk3D2nmdgE8JrFaiPlCWV6KsrMdANGyKeZ/aFmvGjbFw7MGQD4s0u/tn+viVAA==", "output_base64sha512": "2+zPRbyYxFDTa0LJz6TmZOajp10eJNQla9tK0S8d++4T6vXgt7nKSy81GO+V4ttWYopmipOvRYJe5YZ4WvmecA==",
"output_file_mode": null, "output_file_mode": null,
"output_md5": "b532cf3ff81d62dd7dec013e486931aa", "output_md5": "8e8a10dd705e45fe8645782a20d999a3",
"output_path": "./function-source.zip", "output_path": "./function-source.zip",
"output_sha": "0cfb36e4e396f12e3ad2944c44b083bff2224ad5", "output_sha": "fe2be242d7b603458e099720b9d99e319952abb0",
"output_sha256": "b8ca15e0833622e19c46da9e23bc1bbb73ac4e6bc3c75a210c3c64104e4d63d5", "output_sha256": "fb70cab0d57c393e1cbfc4b6adfb51d4f7bc5141f324d9f68615a44bc15c37d2",
"output_sha512": "05007e837942d3ecb9bcec7a28787801609e1e4dc3da799d804f09ac56a23e509657a2acacc740346c8a799fda166bc68db170ecc1900f8b34bbfb67faf89500", "output_sha512": "dbeccf45bc98c450d36b42c9cfa4e664e6a3a75d1e24d4256bdb4ad12f1dfbee13eaf5e0b7b9ca4b2f3518ef95e2db56628a668a93af45825ee586785af99e70",
"output_size": 69764346, "output_size": 69764811,
"source": [], "source": [],
"source_content": null, "source_content": null,
"source_content_filename": null, "source_content_filename": null,
@ -172,7 +172,7 @@
"effective_annotations": { "effective_annotations": {
"run.googleapis.com/ingress": "all", "run.googleapis.com/ingress": "all",
"run.googleapis.com/ingress-status": "all", "run.googleapis.com/ingress-status": "all",
"run.googleapis.com/operation-id": "a9aeb6de-fdd6-43b2-93f8-8b7f72afab4c", "run.googleapis.com/operation-id": "2b1374b6-6b03-4d99-a76c-b8e751f46df0",
"run.googleapis.com/urls": "[\"https://sereact-761163285547.us-central1.run.app\",\"https://sereact-p64zpdtkta-uc.a.run.app\"]", "run.googleapis.com/urls": "[\"https://sereact-761163285547.us-central1.run.app\",\"https://sereact-p64zpdtkta-uc.a.run.app\"]",
"serving.knative.dev/creator": "johnpccd3@gmail.com", "serving.knative.dev/creator": "johnpccd3@gmail.com",
"serving.knative.dev/lastModifier": "johnpccd3@gmail.com" "serving.knative.dev/lastModifier": "johnpccd3@gmail.com"
@ -184,12 +184,12 @@
"generation": 1, "generation": 1,
"labels": null, "labels": null,
"namespace": "gen-lang-client-0424120530", "namespace": "gen-lang-client-0424120530",
"resource_version": "AAY19MELEOc", "resource_version": "AAY191YVk4g",
"self_link": "/apis/serving.knative.dev/v1/namespaces/761163285547/services/sereact", "self_link": "/apis/serving.knative.dev/v1/namespaces/761163285547/services/sereact",
"terraform_labels": { "terraform_labels": {
"goog-terraform-provisioned": "true" "goog-terraform-provisioned": "true"
}, },
"uid": "8c8be11c-c607-4caa-a65e-c552ec445882" "uid": "d8b0e29e-2db1-4f23-8b6c-d7238a9a5f89"
} }
], ],
"name": "sereact", "name": "sereact",
@ -216,14 +216,14 @@
"type": "RoutesReady" "type": "RoutesReady"
} }
], ],
"latest_created_revision_name": "sereact-00001-z4g", "latest_created_revision_name": "sereact-00001-htg",
"latest_ready_revision_name": "sereact-00001-z4g", "latest_ready_revision_name": "sereact-00001-htg",
"observed_generation": 1, "observed_generation": 1,
"traffic": [ "traffic": [
{ {
"latest_revision": true, "latest_revision": true,
"percent": 100, "percent": 100,
"revision_name": "sereact-00001-z4g", "revision_name": "sereact-00001-htg",
"tag": "", "tag": "",
"url": "" "url": ""
} }
@ -440,7 +440,7 @@
"schema_version": 0, "schema_version": 0,
"attributes": { "attributes": {
"condition": [], "condition": [],
"etag": "BwY19MG70Fs=", "etag": "BwY191avNJc=",
"id": "v1/projects/gen-lang-client-0424120530/locations/us-central1/services/sereact/roles/run.invoker/allUsers", "id": "v1/projects/gen-lang-client-0424120530/locations/us-central1/services/sereact/roles/run.invoker/allUsers",
"location": "us-central1", "location": "us-central1",
"member": "allUsers", "member": "allUsers",
@ -474,7 +474,7 @@
"automatic_update_policy": [ "automatic_update_policy": [
{} {}
], ],
"build": "projects/761163285547/locations/us-central1/builds/ae88c918-6bb3-4aef-a56f-270f48f73049", "build": "projects/761163285547/locations/us-central1/builds/3804d611-8e49-491b-b994-4ed7fc528e92",
"docker_repository": "projects/gen-lang-client-0424120530/locations/us-central1/repositories/gcf-artifacts", "docker_repository": "projects/gen-lang-client-0424120530/locations/us-central1/repositories/gcf-artifacts",
"entry_point": "process_image_embedding", "entry_point": "process_image_embedding",
"environment_variables": {}, "environment_variables": {},
@ -487,8 +487,8 @@
"storage_source": [ "storage_source": [
{ {
"bucket": "gen-lang-client-0424120530-cloud-function-source", "bucket": "gen-lang-client-0424120530-cloud-function-source",
"generation": 1748175166697242, "generation": 1748186294324568,
"object": "function-source-b532cf3ff81d62dd7dec013e486931aa.zip" "object": "function-source-8e8a10dd705e45fe8645782a20d999a3.zip"
} }
] ]
} }
@ -508,7 +508,7 @@
"pubsub_topic": "projects/gen-lang-client-0424120530/topics/image-processing-topic", "pubsub_topic": "projects/gen-lang-client-0424120530/topics/image-processing-topic",
"retry_policy": "RETRY_POLICY_RETRY", "retry_policy": "RETRY_POLICY_RETRY",
"service_account_email": "761163285547-compute@developer.gserviceaccount.com", "service_account_email": "761163285547-compute@developer.gserviceaccount.com",
"trigger": "projects/gen-lang-client-0424120530/locations/us-central1/triggers/process-image-embedding-645734", "trigger": "projects/gen-lang-client-0424120530/locations/us-central1/triggers/process-image-embedding-873142",
"trigger_region": "us-central1" "trigger_region": "us-central1"
} }
], ],
@ -559,7 +559,7 @@
"goog-terraform-provisioned": "true" "goog-terraform-provisioned": "true"
}, },
"timeouts": null, "timeouts": null,
"update_time": "2025-05-25T12:15:22.215124150Z", "update_time": "2025-05-25T15:20:42.834818284Z",
"url": "https://us-central1-gen-lang-client-0424120530.cloudfunctions.net/process-image-embedding" "url": "https://us-central1-gen-lang-client-0424120530.cloudfunctions.net/process-image-embedding"
}, },
"sensitive_attributes": [ "sensitive_attributes": [
@ -875,8 +875,8 @@
"database_edition": "STANDARD", "database_edition": "STANDARD",
"delete_protection_state": "DELETE_PROTECTION_DISABLED", "delete_protection_state": "DELETE_PROTECTION_DISABLED",
"deletion_policy": "ABANDON", "deletion_policy": "ABANDON",
"earliest_version_time": "2025-05-25T11:12:43.126081Z", "earliest_version_time": "2025-05-25T14:17:09.893967Z",
"etag": "IPjb6fzLvo0DMKrW4vCEvY0D", "etag": "IKfR75n1vo0DMKrW4vCEvY0D",
"id": "projects/gen-lang-client-0424120530/databases/sereact-imagedb", "id": "projects/gen-lang-client-0424120530/databases/sereact-imagedb",
"key_prefix": "", "key_prefix": "",
"location_id": "us-central1", "location_id": "us-central1",
@ -1514,21 +1514,21 @@
"content_encoding": "", "content_encoding": "",
"content_language": "", "content_language": "",
"content_type": "application/zip", "content_type": "application/zip",
"crc32c": "EgiVnQ==", "crc32c": "cMTkTw==",
"customer_encryption": [], "customer_encryption": [],
"detect_md5hash": "tTLPP/gdYt197AE+SGkxqg==", "detect_md5hash": "jooQ3XBeRf6GRXgqINmZow==",
"event_based_hold": false, "event_based_hold": false,
"generation": 1748174860755303, "generation": 1748186292978895,
"id": "gen-lang-client-0424120530-cloud-function-source-function-source-b532cf3ff81d62dd7dec013e486931aa.zip", "id": "gen-lang-client-0424120530-cloud-function-source-function-source-8e8a10dd705e45fe8645782a20d999a3.zip",
"kms_key_name": "", "kms_key_name": "",
"md5hash": "tTLPP/gdYt197AE+SGkxqg==", "md5hash": "jooQ3XBeRf6GRXgqINmZow==",
"md5hexhash": "b532cf3ff81d62dd7dec013e486931aa", "md5hexhash": "8e8a10dd705e45fe8645782a20d999a3",
"media_link": "https://storage.googleapis.com/download/storage/v1/b/gen-lang-client-0424120530-cloud-function-source/o/function-source-b532cf3ff81d62dd7dec013e486931aa.zip?generation=1748174860755303\u0026alt=media", "media_link": "https://storage.googleapis.com/download/storage/v1/b/gen-lang-client-0424120530-cloud-function-source/o/function-source-8e8a10dd705e45fe8645782a20d999a3.zip?generation=1748186292978895\u0026alt=media",
"metadata": {}, "metadata": null,
"name": "function-source-b532cf3ff81d62dd7dec013e486931aa.zip", "name": "function-source-8e8a10dd705e45fe8645782a20d999a3.zip",
"output_name": "function-source-b532cf3ff81d62dd7dec013e486931aa.zip", "output_name": "function-source-8e8a10dd705e45fe8645782a20d999a3.zip",
"retention": [], "retention": [],
"self_link": "https://www.googleapis.com/storage/v1/b/gen-lang-client-0424120530-cloud-function-source/o/function-source-b532cf3ff81d62dd7dec013e486931aa.zip", "self_link": "https://www.googleapis.com/storage/v1/b/gen-lang-client-0424120530-cloud-function-source/o/function-source-8e8a10dd705e45fe8645782a20d999a3.zip",
"source": "./function-source.zip", "source": "./function-source.zip",
"storage_class": "STANDARD", "storage_class": "STANDARD",
"temporary_hold": false, "temporary_hold": false,

View File

@ -1,7 +1,7 @@
{ {
"version": 4, "version": 4,
"terraform_version": "1.10.1", "terraform_version": "1.10.1",
"serial": 441, "serial": 445,
"lineage": "a183cd95-f987-8698-c6dd-84e933c394a5", "lineage": "a183cd95-f987-8698-c6dd-84e933c394a5",
"outputs": { "outputs": {
"cloud_function_name": { "cloud_function_name": {
@ -172,7 +172,7 @@
"effective_annotations": { "effective_annotations": {
"run.googleapis.com/ingress": "all", "run.googleapis.com/ingress": "all",
"run.googleapis.com/ingress-status": "all", "run.googleapis.com/ingress-status": "all",
"run.googleapis.com/operation-id": "0f195b05-99ac-4d28-b5fe-2d3dea289124", "run.googleapis.com/operation-id": "a9aeb6de-fdd6-43b2-93f8-8b7f72afab4c",
"run.googleapis.com/urls": "[\"https://sereact-761163285547.us-central1.run.app\",\"https://sereact-p64zpdtkta-uc.a.run.app\"]", "run.googleapis.com/urls": "[\"https://sereact-761163285547.us-central1.run.app\",\"https://sereact-p64zpdtkta-uc.a.run.app\"]",
"serving.knative.dev/creator": "johnpccd3@gmail.com", "serving.knative.dev/creator": "johnpccd3@gmail.com",
"serving.knative.dev/lastModifier": "johnpccd3@gmail.com" "serving.knative.dev/lastModifier": "johnpccd3@gmail.com"
@ -182,14 +182,14 @@
"goog-terraform-provisioned": "true" "goog-terraform-provisioned": "true"
}, },
"generation": 1, "generation": 1,
"labels": {}, "labels": null,
"namespace": "gen-lang-client-0424120530", "namespace": "gen-lang-client-0424120530",
"resource_version": "AAY189oNgAQ", "resource_version": "AAY19MELEOc",
"self_link": "/apis/serving.knative.dev/v1/namespaces/761163285547/services/sereact", "self_link": "/apis/serving.knative.dev/v1/namespaces/761163285547/services/sereact",
"terraform_labels": { "terraform_labels": {
"goog-terraform-provisioned": "true" "goog-terraform-provisioned": "true"
}, },
"uid": "20e61eb3-6217-40e8-8ae5-45111d31bbda" "uid": "8c8be11c-c607-4caa-a65e-c552ec445882"
} }
], ],
"name": "sereact", "name": "sereact",
@ -216,14 +216,14 @@
"type": "RoutesReady" "type": "RoutesReady"
} }
], ],
"latest_created_revision_name": "sereact-00001-2lz", "latest_created_revision_name": "sereact-00001-z4g",
"latest_ready_revision_name": "sereact-00001-2lz", "latest_ready_revision_name": "sereact-00001-z4g",
"observed_generation": 1, "observed_generation": 1,
"traffic": [ "traffic": [
{ {
"latest_revision": true, "latest_revision": true,
"percent": 100, "percent": 100,
"revision_name": "sereact-00001-2lz", "revision_name": "sereact-00001-z4g",
"tag": "", "tag": "",
"url": "" "url": ""
} }
@ -256,8 +256,8 @@
"container_concurrency": 80, "container_concurrency": 80,
"containers": [ "containers": [
{ {
"args": [], "args": null,
"command": [], "command": null,
"env": [ "env": [
{ {
"name": "API_KEY_SECRET", "name": "API_KEY_SECRET",
@ -337,7 +337,7 @@
"cpu": "1", "cpu": "1",
"memory": "1Gi" "memory": "1Gi"
}, },
"requests": {} "requests": null
} }
], ],
"startup_probe": [ "startup_probe": [
@ -359,7 +359,7 @@
"working_dir": "" "working_dir": ""
} }
], ],
"node_selector": {}, "node_selector": null,
"service_account_name": "761163285547-compute@developer.gserviceaccount.com", "service_account_name": "761163285547-compute@developer.gserviceaccount.com",
"serving_state": "", "serving_state": "",
"timeout_seconds": 300, "timeout_seconds": 300,
@ -440,7 +440,7 @@
"schema_version": 0, "schema_version": 0,
"attributes": { "attributes": {
"condition": [], "condition": [],
"etag": "BwY189qg+AA=", "etag": "BwY19MG70Fs=",
"id": "v1/projects/gen-lang-client-0424120530/locations/us-central1/services/sereact/roles/run.invoker/allUsers", "id": "v1/projects/gen-lang-client-0424120530/locations/us-central1/services/sereact/roles/run.invoker/allUsers",
"location": "us-central1", "location": "us-central1",
"member": "allUsers", "member": "allUsers",
@ -474,7 +474,7 @@
"automatic_update_policy": [ "automatic_update_policy": [
{} {}
], ],
"build": "projects/761163285547/locations/us-central1/builds/aab08c74-df86-4cd7-9176-4ff267cab3e6", "build": "projects/761163285547/locations/us-central1/builds/ae88c918-6bb3-4aef-a56f-270f48f73049",
"docker_repository": "projects/gen-lang-client-0424120530/locations/us-central1/repositories/gcf-artifacts", "docker_repository": "projects/gen-lang-client-0424120530/locations/us-central1/repositories/gcf-artifacts",
"entry_point": "process_image_embedding", "entry_point": "process_image_embedding",
"environment_variables": {}, "environment_variables": {},
@ -487,8 +487,8 @@
"storage_source": [ "storage_source": [
{ {
"bucket": "gen-lang-client-0424120530-cloud-function-source", "bucket": "gen-lang-client-0424120530-cloud-function-source",
"generation": 1748171376287077, "generation": 1748175166697242,
"object": "function-source-58a2b7fe53bb2c8c921405cc965d635c.zip" "object": "function-source-b532cf3ff81d62dd7dec013e486931aa.zip"
} }
] ]
} }
@ -508,13 +508,13 @@
"pubsub_topic": "projects/gen-lang-client-0424120530/topics/image-processing-topic", "pubsub_topic": "projects/gen-lang-client-0424120530/topics/image-processing-topic",
"retry_policy": "RETRY_POLICY_RETRY", "retry_policy": "RETRY_POLICY_RETRY",
"service_account_email": "761163285547-compute@developer.gserviceaccount.com", "service_account_email": "761163285547-compute@developer.gserviceaccount.com",
"trigger": "projects/gen-lang-client-0424120530/locations/us-central1/triggers/process-image-embedding-013009", "trigger": "projects/gen-lang-client-0424120530/locations/us-central1/triggers/process-image-embedding-645734",
"trigger_region": "us-central1" "trigger_region": "us-central1"
} }
], ],
"id": "projects/gen-lang-client-0424120530/locations/us-central1/functions/process-image-embedding", "id": "projects/gen-lang-client-0424120530/locations/us-central1/functions/process-image-embedding",
"kms_key_name": "", "kms_key_name": "",
"labels": {}, "labels": null,
"location": "us-central1", "location": "us-central1",
"name": "process-image-embedding", "name": "process-image-embedding",
"project": "gen-lang-client-0424120530", "project": "gen-lang-client-0424120530",
@ -559,7 +559,7 @@
"goog-terraform-provisioned": "true" "goog-terraform-provisioned": "true"
}, },
"timeouts": null, "timeouts": null,
"update_time": "2025-05-25T11:13:04.212724797Z", "update_time": "2025-05-25T12:15:22.215124150Z",
"url": "https://us-central1-gen-lang-client-0424120530.cloudfunctions.net/process-image-embedding" "url": "https://us-central1-gen-lang-client-0424120530.cloudfunctions.net/process-image-embedding"
}, },
"sensitive_attributes": [ "sensitive_attributes": [
@ -588,6 +588,7 @@
} }
] ]
], ],
"private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjozNjAwMDAwMDAwMDAwLCJkZWxldGUiOjM2MDAwMDAwMDAwMDAsInVwZGF0ZSI6MzYwMDAwMDAwMDAwMH19",
"dependencies": [ "dependencies": [
"data.archive_file.function_source", "data.archive_file.function_source",
"data.google_project.current", "data.google_project.current",
@ -602,13 +603,6 @@
} }
] ]
}, },
{
"mode": "managed",
"type": "google_compute_address",
"name": "vector_db_static_ip",
"provider": "provider[\"registry.terraform.io/hashicorp/google\"]",
"instances": []
},
{ {
"mode": "managed", "mode": "managed",
"type": "google_compute_firewall", "type": "google_compute_firewall",
@ -818,7 +812,18 @@
[ [
{ {
"type": "get_attr", "type": "get_attr",
"value": "metadata_startup_script" "value": "boot_disk"
},
{
"type": "index",
"value": {
"value": 0,
"type": "number"
}
},
{
"type": "get_attr",
"value": "disk_encryption_key_rsa"
} }
], ],
[ [
@ -841,18 +846,7 @@
[ [
{ {
"type": "get_attr", "type": "get_attr",
"value": "boot_disk" "value": "metadata_startup_script"
},
{
"type": "index",
"value": {
"value": 0,
"type": "number"
}
},
{
"type": "get_attr",
"value": "disk_encryption_key_rsa"
} }
] ]
], ],
@ -881,8 +875,8 @@
"database_edition": "STANDARD", "database_edition": "STANDARD",
"delete_protection_state": "DELETE_PROTECTION_DISABLED", "delete_protection_state": "DELETE_PROTECTION_DISABLED",
"deletion_policy": "ABANDON", "deletion_policy": "ABANDON",
"earliest_version_time": "2025-05-25T11:07:20.673706Z", "earliest_version_time": "2025-05-25T11:12:43.126081Z",
"etag": "IIrliOPKvo0DMKrW4vCEvY0D", "etag": "IPjb6fzLvo0DMKrW4vCEvY0D",
"id": "projects/gen-lang-client-0424120530/databases/sereact-imagedb", "id": "projects/gen-lang-client-0424120530/databases/sereact-imagedb",
"key_prefix": "", "key_prefix": "",
"location_id": "us-central1", "location_id": "us-central1",
@ -1530,7 +1524,7 @@
"md5hash": "tTLPP/gdYt197AE+SGkxqg==", "md5hash": "tTLPP/gdYt197AE+SGkxqg==",
"md5hexhash": "b532cf3ff81d62dd7dec013e486931aa", "md5hexhash": "b532cf3ff81d62dd7dec013e486931aa",
"media_link": "https://storage.googleapis.com/download/storage/v1/b/gen-lang-client-0424120530-cloud-function-source/o/function-source-b532cf3ff81d62dd7dec013e486931aa.zip?generation=1748174860755303\u0026alt=media", "media_link": "https://storage.googleapis.com/download/storage/v1/b/gen-lang-client-0424120530-cloud-function-source/o/function-source-b532cf3ff81d62dd7dec013e486931aa.zip?generation=1748174860755303\u0026alt=media",
"metadata": null, "metadata": {},
"name": "function-source-b532cf3ff81d62dd7dec013e486931aa.zip", "name": "function-source-b532cf3ff81d62dd7dec013e486931aa.zip",
"output_name": "function-source-b532cf3ff81d62dd7dec013e486931aa.zip", "output_name": "function-source-b532cf3ff81d62dd7dec013e486931aa.zip",
"retention": [], "retention": [],

View File

@ -1,121 +0,0 @@
#!/usr/bin/env python3
"""
Simple test script to embed text and search Qdrant without filters
"""
import os
import sys
import asyncio
import logging
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
async def simple_search_test():
"""Simple test: embed text and search without filters"""
try:
# Import services
from src.services.vector_db import VectorDatabaseService
from src.services.embedding_service import EmbeddingService
# Initialize services
logger.info("Initializing services...")
vector_db = VectorDatabaseService()
embedding_service = EmbeddingService()
# Test 1: Generate text embedding
logger.info("=== Generating Text Embedding ===")
search_query = "rectangle"
text_embedding = await embedding_service.generate_text_embedding(search_query)
if text_embedding:
logger.info(f"✓ Generated embedding for '{search_query}' - length: {len(text_embedding)}")
else:
logger.error("✗ Failed to generate text embedding")
return False
# Test 2: Search without any filters
logger.info("=== Searching Qdrant (No Filters) ===")
# Try different thresholds to see what we get
thresholds = [0.1, 0.3, 0.5, 0.65, 0.8]
for threshold in thresholds:
logger.info(f"\n--- Threshold: {threshold} ---")
search_results = vector_db.search_similar_images(
query_vector=text_embedding,
limit=10,
score_threshold=threshold
# No filter_conditions = search everything
)
logger.info(f"Found {len(search_results)} results")
# Show top 3 results
for i, result in enumerate(search_results[:3]):
logger.info(f" {i+1}. Score: {result['score']:.4f} | ID: {result['image_id']} | File: {result['metadata'].get('filename', 'N/A')}")
# Test 3: Very low threshold to see all data
logger.info("\n=== All Data (Threshold 0.0) ===")
all_results = vector_db.search_similar_images(
query_vector=text_embedding,
limit=50,
score_threshold=0.0 # Get everything
)
logger.info(f"Total vectors in collection: {len(all_results)}")
# Test 4: With team filtering (like the API does)
logger.info("\n=== Testing Team Filtering ===")
test_team_id = "68330a29472a0704d2f77063" # From server logs
filtered_results = vector_db.search_similar_images(
query_vector=text_embedding,
limit=50,
score_threshold=0.0,
filter_conditions={"team_id": test_team_id}
)
logger.info(f"Results with team filter ({test_team_id}): {len(filtered_results)}")
# Show metadata for all results to see team_ids
logger.info("\n=== Checking Team IDs in Vector DB ===")
for i, result in enumerate(all_results):
metadata = result.get('metadata', {})
team_id = metadata.get('team_id', 'N/A')
logger.info(f" {i+1}. Image ID: {result['image_id']} | Team ID: {team_id}")
# Show some stats
if all_results:
scores = [r['score'] for r in all_results]
logger.info(f"Score range: {min(scores):.4f} to {max(scores):.4f}")
logger.info(f"Average score: {sum(scores)/len(scores):.4f}")
# Show top 5 and bottom 5
logger.info("\nTop 5 results:")
for i, result in enumerate(all_results[:5]):
logger.info(f" {i+1}. Score: {result['score']:.4f} | ID: {result['image_id']}")
if len(all_results) > 5:
logger.info("\nBottom 5 results:")
for i, result in enumerate(all_results[-5:]):
logger.info(f" {len(all_results)-4+i}. Score: {result['score']:.4f} | ID: {result['image_id']}")
logger.info("\n✓ Simple search test completed!")
return True
except Exception as e:
logger.error(f"✗ Test failed: {e}")
import traceback
traceback.print_exc()
return False
if __name__ == "__main__":
success = asyncio.run(simple_search_test())
sys.exit(0 if success else 1)

View File

@ -76,7 +76,7 @@ async def upload_image(
file_size=file_size, file_size=file_size,
content_type=content_type, content_type=content_type,
storage_path=storage_path, storage_path=storage_path,
public_url="", # Will be set after we have the image ID public_url=None, # Will be set after we have the image ID
team_id=current_user.team_id, team_id=current_user.team_id,
uploader_id=current_user.id, uploader_id=current_user.id,
description=description, description=description,

View File

@ -33,7 +33,7 @@ async def search_images(
request: Request, request: Request,
q: str = Query(..., description="Search query"), q: str = Query(..., description="Search query"),
limit: int = Query(10, ge=1, le=50, description="Number of results to return"), limit: int = Query(10, ge=1, le=50, description="Number of results to return"),
threshold: float = Query(0.65, ge=0.0, le=1.0, description="Similarity threshold"), similarity_threshold: float = Query(0.65, ge=0.0, le=1.0, description="Similarity threshold"),
collection_id: Optional[str] = Query(None, description="Filter by collection ID"), collection_id: Optional[str] = Query(None, description="Filter by collection ID"),
current_user: UserModel = Depends(get_current_user) current_user: UserModel = Depends(get_current_user)
): ):
@ -46,7 +46,7 @@ async def search_images(
"method": request.method, "method": request.method,
"query": q, "query": q,
"limit": limit, "limit": limit,
"threshold": threshold "similarity_threshold": similarity_threshold
}, },
user_id=str(current_user.id), user_id=str(current_user.id),
team_id=str(current_user.team_id) team_id=str(current_user.team_id)
@ -62,7 +62,7 @@ async def search_images(
search_results = get_vector_db_service().search_similar_images( search_results = get_vector_db_service().search_similar_images(
query_vector=query_embedding, query_vector=query_embedding,
limit=limit, limit=limit,
score_threshold=threshold, similarity_threshold=similarity_threshold,
filter_conditions={"team_id": str(current_user.team_id)} if current_user.team_id else None filter_conditions={"team_id": str(current_user.team_id)} if current_user.team_id else None
) )
@ -72,12 +72,12 @@ async def search_images(
results=[], results=[],
total=0, total=0,
limit=limit, limit=limit,
threshold=threshold similarity_threshold=similarity_threshold
) )
# Get image IDs and scores from search results # Get image IDs and scores from search results
image_ids = [result['image_id'] for result in search_results if result['image_id']] image_ids = [result['image_id'] for result in search_results if result['image_id']]
scores = {result['image_id']: result['score'] for result in search_results if result['image_id']} scores = {result['image_id']: result['similarity_score'] for result in search_results if result['image_id']}
# Get image metadata from database # Get image metadata from database
images = await image_repository.get_by_ids(image_ids) images = await image_repository.get_by_ids(image_ids)
@ -123,7 +123,7 @@ async def search_images(
results=results, results=results,
total=len(results), total=len(results),
limit=limit, limit=limit,
threshold=threshold similarity_threshold=similarity_threshold
) )
except Exception as e: except Exception as e:
@ -160,11 +160,11 @@ async def search_images_advanced(
logger.info(f"Generated embedding with length: {len(query_embedding)}") logger.info(f"Generated embedding with length: {len(query_embedding)}")
# Search in vector database # Search in vector database
logger.info(f"Searching vector database with threshold: {search_request.threshold}") logger.info(f"Searching vector database with similarity_threshold: {search_request.similarity_threshold}")
search_results = get_vector_db_service().search_similar_images( search_results = get_vector_db_service().search_similar_images(
query_vector=query_embedding, query_vector=query_embedding,
limit=search_request.limit, limit=search_request.limit,
score_threshold=search_request.threshold, similarity_threshold=search_request.similarity_threshold,
filter_conditions={"team_id": str(current_user.team_id)} if current_user.team_id else None filter_conditions={"team_id": str(current_user.team_id)} if current_user.team_id else None
) )
@ -177,12 +177,12 @@ async def search_images_advanced(
results=[], results=[],
total=0, total=0,
limit=search_request.limit, limit=search_request.limit,
threshold=search_request.threshold similarity_threshold=search_request.similarity_threshold
) )
# Get image IDs and scores from search results # Get image IDs and scores from search results
image_ids = [result['image_id'] for result in search_results if result['image_id']] image_ids = [result['image_id'] for result in search_results if result['image_id']]
scores = {result['image_id']: result['score'] for result in search_results if result['image_id']} scores = {result['image_id']: result['similarity_score'] for result in search_results if result['image_id']}
logger.info(f"Extracted {len(image_ids)} image IDs: {image_ids}") logger.info(f"Extracted {len(image_ids)} image IDs: {image_ids}")
@ -247,7 +247,7 @@ async def search_images_advanced(
results=results, results=results,
total=len(results), total=len(results),
limit=search_request.limit, limit=search_request.limit,
threshold=search_request.threshold similarity_threshold=search_request.similarity_threshold
) )
except Exception as e: except Exception as e:

View File

@ -1,6 +1,6 @@
from typing import List, Optional, ClassVar from typing import List, Optional, ClassVar
from datetime import datetime from datetime import datetime
from pydantic import BaseModel, Field from pydantic import BaseModel, Field, field_validator
from src.schemas.image import ImageResponse from src.schemas.image import ImageResponse
@ -8,18 +8,25 @@ class SearchRequest(BaseModel):
"""Schema for advanced search request""" """Schema for advanced search request"""
query: str = Field(..., description="Search query", min_length=1) query: str = Field(..., description="Search query", min_length=1)
limit: int = Field(10, description="Maximum number of results", ge=1, le=50) limit: int = Field(10, description="Maximum number of results", ge=1, le=50)
threshold: float = Field(0.7, description="Similarity threshold", ge=0.0, le=1.0) similarity_threshold: Optional[float] = Field(None, description="Similarity threshold", ge=0.0, le=1.0)
collection_id: Optional[str] = Field(None, description="Filter by collection ID") collection_id: Optional[str] = Field(None, description="Filter by collection ID")
date_from: Optional[datetime] = Field(None, description="Filter images uploaded after this date") date_from: Optional[datetime] = Field(None, description="Filter images uploaded after this date")
date_to: Optional[datetime] = Field(None, description="Filter images uploaded before this date") date_to: Optional[datetime] = Field(None, description="Filter images uploaded before this date")
uploader_id: Optional[str] = Field(None, description="Filter by uploader ID") uploader_id: Optional[str] = Field(None, description="Filter by uploader ID")
@field_validator('similarity_threshold')
@classmethod
def set_default_similarity_threshold(cls, v):
if v is None:
return 0.7
return v
model_config: ClassVar[dict] = { model_config: ClassVar[dict] = {
"json_schema_extra": { "json_schema_extra": {
"example": { "example": {
"query": "mountain sunset", "query": "mountain sunset",
"limit": 10, "limit": 10,
"threshold": 0.7, "similarity_threshold": 0.7,
"collection_id": "507f1f77bcf86cd799439044", "collection_id": "507f1f77bcf86cd799439044",
"date_from": "2023-01-01T00:00:00", "date_from": "2023-01-01T00:00:00",
"date_to": "2023-12-31T23:59:59", "date_to": "2023-12-31T23:59:59",
@ -34,7 +41,7 @@ class SearchResponse(BaseModel):
results: List[ImageResponse] results: List[ImageResponse]
total: int total: int
limit: int limit: int
threshold: float similarity_threshold: float
model_config: ClassVar[dict] = { model_config: ClassVar[dict] = {
"json_schema_extra": { "json_schema_extra": {
@ -66,7 +73,7 @@ class SearchResponse(BaseModel):
], ],
"total": 1, "total": 1,
"limit": 10, "limit": 10,
"threshold": 0.7 "similarity_threshold": 0.7
} }
} }
} }

View File

@ -139,7 +139,7 @@ class VectorDatabaseService:
self, self,
query_vector: List[float], query_vector: List[float],
limit: int = 10, limit: int = 10,
score_threshold: float = 0.65, similarity_threshold: float = 0.65,
filter_conditions: Dict[str, Any] = None filter_conditions: Dict[str, Any] = None
) -> List[Dict[str, Any]]: ) -> List[Dict[str, Any]]:
""" """
@ -148,7 +148,7 @@ class VectorDatabaseService:
Args: Args:
query_vector: Query vector to search for query_vector: Query vector to search for
limit: Maximum number of results to return limit: Maximum number of results to return
score_threshold: Minimum similarity score threshold similarity_threshold: Minimum similarity score threshold
filter_conditions: Additional filter conditions filter_conditions: Additional filter conditions
Returns: Returns:
@ -172,14 +172,14 @@ class VectorDatabaseService:
query_vector=query_vector, query_vector=query_vector,
query_filter=search_filter, query_filter=search_filter,
limit=limit, limit=limit,
score_threshold=score_threshold score_threshold=similarity_threshold
) )
results = [] results = []
for hit in search_result: for hit in search_result:
result = { result = {
"point_id": hit.id, "point_id": hit.id,
"score": hit.score, "similarity_score": hit.score,
"image_id": hit.payload.get("image_id"), "image_id": hit.payload.get("image_id"),
"metadata": hit.payload "metadata": hit.payload
} }

View File

@ -1,47 +0,0 @@
#!/bin/bash
# Development startup script for Sereact API
# This script sets the environment variables and starts the application
# Auto-generated by deployment/scripts/setup_local_env.sh
# Function to handle cleanup on exit
cleanup() {
echo ""
echo "Shutting down server..."
if [ ! -z "$SERVER_PID" ]; then
kill $SERVER_PID 2>/dev/null
wait $SERVER_PID 2>/dev/null
fi
echo "Server stopped."
exit 0
}
# Set up signal handlers
trap cleanup SIGINT SIGTERM
# Activate virtual environment
source venv/Scripts/activate
# Set environment variables from deployed infrastructure
export QDRANT_HOST=34.171.134.17
export QDRANT_PORT=6333
export FIRESTORE_PROJECT_ID=gen-lang-client-0424120530
export GCS_BUCKET_NAME=sereact-images
export ENVIRONMENT=development
# Start the application
echo "Starting Sereact API with deployed infrastructure..."
echo "Qdrant endpoint: http://$QDRANT_HOST:$QDRANT_PORT"
echo "Firestore project: $FIRESTORE_PROJECT_ID"
echo "GCS bucket: $GCS_BUCKET_NAME"
echo "API will be available at: http://localhost:8000"
echo "API documentation: http://localhost:8000/docs"
echo "Press Ctrl+C to stop the server"
echo ""
# Start uvicorn in background and capture PID
uvicorn main:app --host 0.0.0.0 --port 8000 --reload &
SERVER_PID=$!
# Wait for the server process
wait $SERVER_PID

View File

@ -1,48 +0,0 @@
#!/usr/bin/env python3
"""
Test script to verify that similarity threshold is properly handled
"""
import json
from src.schemas.search import SearchRequest
def test_threshold_handling():
"""Test that threshold values are properly handled in the schema"""
# Test with threshold = 0
test_data_zero = {
"query": "test query",
"threshold": 0.0,
"limit": 10
}
request_zero = SearchRequest(**test_data_zero)
print(f"Threshold 0.0 test: {request_zero.threshold}")
assert request_zero.threshold == 0.0, f"Expected 0.0, got {request_zero.threshold}"
# Test with threshold = 0.5
test_data_half = {
"query": "test query",
"threshold": 0.5,
"limit": 10
}
request_half = SearchRequest(**test_data_half)
print(f"Threshold 0.5 test: {request_half.threshold}")
assert request_half.threshold == 0.5, f"Expected 0.5, got {request_half.threshold}"
# Test with threshold = 1.0
test_data_one = {
"query": "test query",
"threshold": 1.0,
"limit": 10
}
request_one = SearchRequest(**test_data_one)
print(f"Threshold 1.0 test: {request_one.threshold}")
assert request_one.threshold == 1.0, f"Expected 1.0, got {request_one.threshold}"
print("All threshold tests passed!")
if __name__ == "__main__":
test_threshold_handling()