fix image upload
This commit is contained in:
parent
46fd8e6e5e
commit
0b5b383789
@ -407,9 +407,9 @@ The API provides the following main endpoints with their authentication and pagi
|
||||
- **Query Parameters:**
|
||||
- `q` (required) - Search query
|
||||
- `limit` (default: 10, min: 1, max: 50) - Number of results
|
||||
- `threshold` (default: 0.7, min: 0.0, max: 1.0) - Similarity threshold
|
||||
- `similarity_threshold` (default: 0.7, min: 0.0, max: 1.0) - Similarity threshold
|
||||
- `collection_id` (optional) - Filter by collection
|
||||
- **Response includes:** `results`, `total`, `limit`, `threshold`, `query`
|
||||
- **Response includes:** `results`, `total`, `limit`, `similarity_threshold`, `query`
|
||||
- `POST /api/v1/search` - Advanced search with same pagination
|
||||
|
||||
### 🔑 **Authentication Model**
|
||||
|
||||
@ -180,7 +180,7 @@ class ApiClient {
|
||||
async searchImages(query, similarityThreshold, maxResults = 20) {
|
||||
const searchData = {
|
||||
query,
|
||||
threshold: similarityThreshold,
|
||||
similarity_threshold: similarityThreshold,
|
||||
limit: maxResults
|
||||
};
|
||||
|
||||
|
||||
@ -224,6 +224,29 @@ def generate_image_embeddings(image_data: bytes) -> Optional[np.ndarray]:
|
||||
Numpy array of embeddings or None if failed
|
||||
"""
|
||||
try:
|
||||
# Basic validation of image data
|
||||
if not image_data or len(image_data) == 0:
|
||||
logger.error("Empty image data provided")
|
||||
return None
|
||||
|
||||
# Check image size (limit to 10MB)
|
||||
if len(image_data) > 10 * 1024 * 1024:
|
||||
logger.warning(f"Large image detected: {len(image_data)} bytes")
|
||||
|
||||
# Validate image format using PIL
|
||||
try:
|
||||
pil_image = Image.open(io.BytesIO(image_data))
|
||||
logger.info(f"Image format: {pil_image.format}, size: {pil_image.size}, mode: {pil_image.mode}")
|
||||
|
||||
# Check for blank/empty images
|
||||
if pil_image.size[0] == 0 or pil_image.size[1] == 0:
|
||||
logger.error("Image has zero dimensions")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Invalid image format: {e}")
|
||||
return None
|
||||
|
||||
# Create Vertex AI image object
|
||||
vertex_image = VertexImage(image_data)
|
||||
|
||||
@ -238,15 +261,23 @@ def generate_image_embeddings(image_data: bytes) -> Optional[np.ndarray]:
|
||||
# Get the image embedding vector
|
||||
embedding_vector = embeddings.image_embedding
|
||||
|
||||
# Convert to numpy array
|
||||
# Convert to numpy array - DO NOT normalize Vertex AI embeddings
|
||||
# This must match the behavior in the main embedding service
|
||||
embeddings_array = np.array(embedding_vector, dtype=np.float32)
|
||||
|
||||
# Normalize the feature vector
|
||||
norm = np.linalg.norm(embeddings_array)
|
||||
if norm > 0:
|
||||
embeddings_array = embeddings_array / norm
|
||||
# Validate embedding quality
|
||||
if np.any(np.isnan(embeddings_array)) or np.any(np.isinf(embeddings_array)):
|
||||
logger.error("Generated embeddings contain NaN or infinite values")
|
||||
return None
|
||||
|
||||
# Check if embedding is mostly zeros (might indicate processing issue)
|
||||
zero_ratio = np.sum(embeddings_array == 0.0) / len(embeddings_array)
|
||||
if zero_ratio > 0.9:
|
||||
logger.warning(f"Embedding is {zero_ratio*100:.1f}% zeros - might indicate processing issue")
|
||||
|
||||
logger.info(f"Generated embeddings with shape: {embeddings_array.shape}")
|
||||
logger.info(f"Embedding stats - min: {embeddings_array.min():.6f}, max: {embeddings_array.max():.6f}, norm: {np.linalg.norm(embeddings_array):.6f}")
|
||||
|
||||
return embeddings_array
|
||||
|
||||
except Exception as e:
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
{
|
||||
"version": 4,
|
||||
"terraform_version": "1.10.1",
|
||||
"serial": 445,
|
||||
"serial": 451,
|
||||
"lineage": "a183cd95-f987-8698-c6dd-84e933c394a5",
|
||||
"outputs": {
|
||||
"cloud_function_name": {
|
||||
@ -98,16 +98,16 @@
|
||||
"attributes": {
|
||||
"exclude_symlink_directories": null,
|
||||
"excludes": null,
|
||||
"id": "0cfb36e4e396f12e3ad2944c44b083bff2224ad5",
|
||||
"output_base64sha256": "uMoV4IM2IuGcRtqeI7wbu3OsTmvDx1ohDDxkEE5NY9U=",
|
||||
"output_base64sha512": "BQB+g3lC0+y5vOx6KHh4AWCeHk3D2nmdgE8JrFaiPlCWV6KsrMdANGyKeZ/aFmvGjbFw7MGQD4s0u/tn+viVAA==",
|
||||
"id": "fe2be242d7b603458e099720b9d99e319952abb0",
|
||||
"output_base64sha256": "+3DKsNV8OT4cv8S2rftR1Pe8UUHzJNn2hhWkS8FcN9I=",
|
||||
"output_base64sha512": "2+zPRbyYxFDTa0LJz6TmZOajp10eJNQla9tK0S8d++4T6vXgt7nKSy81GO+V4ttWYopmipOvRYJe5YZ4WvmecA==",
|
||||
"output_file_mode": null,
|
||||
"output_md5": "b532cf3ff81d62dd7dec013e486931aa",
|
||||
"output_md5": "8e8a10dd705e45fe8645782a20d999a3",
|
||||
"output_path": "./function-source.zip",
|
||||
"output_sha": "0cfb36e4e396f12e3ad2944c44b083bff2224ad5",
|
||||
"output_sha256": "b8ca15e0833622e19c46da9e23bc1bbb73ac4e6bc3c75a210c3c64104e4d63d5",
|
||||
"output_sha512": "05007e837942d3ecb9bcec7a28787801609e1e4dc3da799d804f09ac56a23e509657a2acacc740346c8a799fda166bc68db170ecc1900f8b34bbfb67faf89500",
|
||||
"output_size": 69764346,
|
||||
"output_sha": "fe2be242d7b603458e099720b9d99e319952abb0",
|
||||
"output_sha256": "fb70cab0d57c393e1cbfc4b6adfb51d4f7bc5141f324d9f68615a44bc15c37d2",
|
||||
"output_sha512": "dbeccf45bc98c450d36b42c9cfa4e664e6a3a75d1e24d4256bdb4ad12f1dfbee13eaf5e0b7b9ca4b2f3518ef95e2db56628a668a93af45825ee586785af99e70",
|
||||
"output_size": 69764811,
|
||||
"source": [],
|
||||
"source_content": null,
|
||||
"source_content_filename": null,
|
||||
@ -172,7 +172,7 @@
|
||||
"effective_annotations": {
|
||||
"run.googleapis.com/ingress": "all",
|
||||
"run.googleapis.com/ingress-status": "all",
|
||||
"run.googleapis.com/operation-id": "a9aeb6de-fdd6-43b2-93f8-8b7f72afab4c",
|
||||
"run.googleapis.com/operation-id": "2b1374b6-6b03-4d99-a76c-b8e751f46df0",
|
||||
"run.googleapis.com/urls": "[\"https://sereact-761163285547.us-central1.run.app\",\"https://sereact-p64zpdtkta-uc.a.run.app\"]",
|
||||
"serving.knative.dev/creator": "johnpccd3@gmail.com",
|
||||
"serving.knative.dev/lastModifier": "johnpccd3@gmail.com"
|
||||
@ -184,12 +184,12 @@
|
||||
"generation": 1,
|
||||
"labels": null,
|
||||
"namespace": "gen-lang-client-0424120530",
|
||||
"resource_version": "AAY19MELEOc",
|
||||
"resource_version": "AAY191YVk4g",
|
||||
"self_link": "/apis/serving.knative.dev/v1/namespaces/761163285547/services/sereact",
|
||||
"terraform_labels": {
|
||||
"goog-terraform-provisioned": "true"
|
||||
},
|
||||
"uid": "8c8be11c-c607-4caa-a65e-c552ec445882"
|
||||
"uid": "d8b0e29e-2db1-4f23-8b6c-d7238a9a5f89"
|
||||
}
|
||||
],
|
||||
"name": "sereact",
|
||||
@ -216,14 +216,14 @@
|
||||
"type": "RoutesReady"
|
||||
}
|
||||
],
|
||||
"latest_created_revision_name": "sereact-00001-z4g",
|
||||
"latest_ready_revision_name": "sereact-00001-z4g",
|
||||
"latest_created_revision_name": "sereact-00001-htg",
|
||||
"latest_ready_revision_name": "sereact-00001-htg",
|
||||
"observed_generation": 1,
|
||||
"traffic": [
|
||||
{
|
||||
"latest_revision": true,
|
||||
"percent": 100,
|
||||
"revision_name": "sereact-00001-z4g",
|
||||
"revision_name": "sereact-00001-htg",
|
||||
"tag": "",
|
||||
"url": ""
|
||||
}
|
||||
@ -440,7 +440,7 @@
|
||||
"schema_version": 0,
|
||||
"attributes": {
|
||||
"condition": [],
|
||||
"etag": "BwY19MG70Fs=",
|
||||
"etag": "BwY191avNJc=",
|
||||
"id": "v1/projects/gen-lang-client-0424120530/locations/us-central1/services/sereact/roles/run.invoker/allUsers",
|
||||
"location": "us-central1",
|
||||
"member": "allUsers",
|
||||
@ -474,7 +474,7 @@
|
||||
"automatic_update_policy": [
|
||||
{}
|
||||
],
|
||||
"build": "projects/761163285547/locations/us-central1/builds/ae88c918-6bb3-4aef-a56f-270f48f73049",
|
||||
"build": "projects/761163285547/locations/us-central1/builds/3804d611-8e49-491b-b994-4ed7fc528e92",
|
||||
"docker_repository": "projects/gen-lang-client-0424120530/locations/us-central1/repositories/gcf-artifacts",
|
||||
"entry_point": "process_image_embedding",
|
||||
"environment_variables": {},
|
||||
@ -487,8 +487,8 @@
|
||||
"storage_source": [
|
||||
{
|
||||
"bucket": "gen-lang-client-0424120530-cloud-function-source",
|
||||
"generation": 1748175166697242,
|
||||
"object": "function-source-b532cf3ff81d62dd7dec013e486931aa.zip"
|
||||
"generation": 1748186294324568,
|
||||
"object": "function-source-8e8a10dd705e45fe8645782a20d999a3.zip"
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -508,7 +508,7 @@
|
||||
"pubsub_topic": "projects/gen-lang-client-0424120530/topics/image-processing-topic",
|
||||
"retry_policy": "RETRY_POLICY_RETRY",
|
||||
"service_account_email": "761163285547-compute@developer.gserviceaccount.com",
|
||||
"trigger": "projects/gen-lang-client-0424120530/locations/us-central1/triggers/process-image-embedding-645734",
|
||||
"trigger": "projects/gen-lang-client-0424120530/locations/us-central1/triggers/process-image-embedding-873142",
|
||||
"trigger_region": "us-central1"
|
||||
}
|
||||
],
|
||||
@ -559,7 +559,7 @@
|
||||
"goog-terraform-provisioned": "true"
|
||||
},
|
||||
"timeouts": null,
|
||||
"update_time": "2025-05-25T12:15:22.215124150Z",
|
||||
"update_time": "2025-05-25T15:20:42.834818284Z",
|
||||
"url": "https://us-central1-gen-lang-client-0424120530.cloudfunctions.net/process-image-embedding"
|
||||
},
|
||||
"sensitive_attributes": [
|
||||
@ -875,8 +875,8 @@
|
||||
"database_edition": "STANDARD",
|
||||
"delete_protection_state": "DELETE_PROTECTION_DISABLED",
|
||||
"deletion_policy": "ABANDON",
|
||||
"earliest_version_time": "2025-05-25T11:12:43.126081Z",
|
||||
"etag": "IPjb6fzLvo0DMKrW4vCEvY0D",
|
||||
"earliest_version_time": "2025-05-25T14:17:09.893967Z",
|
||||
"etag": "IKfR75n1vo0DMKrW4vCEvY0D",
|
||||
"id": "projects/gen-lang-client-0424120530/databases/sereact-imagedb",
|
||||
"key_prefix": "",
|
||||
"location_id": "us-central1",
|
||||
@ -1514,21 +1514,21 @@
|
||||
"content_encoding": "",
|
||||
"content_language": "",
|
||||
"content_type": "application/zip",
|
||||
"crc32c": "EgiVnQ==",
|
||||
"crc32c": "cMTkTw==",
|
||||
"customer_encryption": [],
|
||||
"detect_md5hash": "tTLPP/gdYt197AE+SGkxqg==",
|
||||
"detect_md5hash": "jooQ3XBeRf6GRXgqINmZow==",
|
||||
"event_based_hold": false,
|
||||
"generation": 1748174860755303,
|
||||
"id": "gen-lang-client-0424120530-cloud-function-source-function-source-b532cf3ff81d62dd7dec013e486931aa.zip",
|
||||
"generation": 1748186292978895,
|
||||
"id": "gen-lang-client-0424120530-cloud-function-source-function-source-8e8a10dd705e45fe8645782a20d999a3.zip",
|
||||
"kms_key_name": "",
|
||||
"md5hash": "tTLPP/gdYt197AE+SGkxqg==",
|
||||
"md5hexhash": "b532cf3ff81d62dd7dec013e486931aa",
|
||||
"media_link": "https://storage.googleapis.com/download/storage/v1/b/gen-lang-client-0424120530-cloud-function-source/o/function-source-b532cf3ff81d62dd7dec013e486931aa.zip?generation=1748174860755303\u0026alt=media",
|
||||
"metadata": {},
|
||||
"name": "function-source-b532cf3ff81d62dd7dec013e486931aa.zip",
|
||||
"output_name": "function-source-b532cf3ff81d62dd7dec013e486931aa.zip",
|
||||
"md5hash": "jooQ3XBeRf6GRXgqINmZow==",
|
||||
"md5hexhash": "8e8a10dd705e45fe8645782a20d999a3",
|
||||
"media_link": "https://storage.googleapis.com/download/storage/v1/b/gen-lang-client-0424120530-cloud-function-source/o/function-source-8e8a10dd705e45fe8645782a20d999a3.zip?generation=1748186292978895\u0026alt=media",
|
||||
"metadata": null,
|
||||
"name": "function-source-8e8a10dd705e45fe8645782a20d999a3.zip",
|
||||
"output_name": "function-source-8e8a10dd705e45fe8645782a20d999a3.zip",
|
||||
"retention": [],
|
||||
"self_link": "https://www.googleapis.com/storage/v1/b/gen-lang-client-0424120530-cloud-function-source/o/function-source-b532cf3ff81d62dd7dec013e486931aa.zip",
|
||||
"self_link": "https://www.googleapis.com/storage/v1/b/gen-lang-client-0424120530-cloud-function-source/o/function-source-8e8a10dd705e45fe8645782a20d999a3.zip",
|
||||
"source": "./function-source.zip",
|
||||
"storage_class": "STANDARD",
|
||||
"temporary_hold": false,
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
{
|
||||
"version": 4,
|
||||
"terraform_version": "1.10.1",
|
||||
"serial": 441,
|
||||
"serial": 445,
|
||||
"lineage": "a183cd95-f987-8698-c6dd-84e933c394a5",
|
||||
"outputs": {
|
||||
"cloud_function_name": {
|
||||
@ -172,7 +172,7 @@
|
||||
"effective_annotations": {
|
||||
"run.googleapis.com/ingress": "all",
|
||||
"run.googleapis.com/ingress-status": "all",
|
||||
"run.googleapis.com/operation-id": "0f195b05-99ac-4d28-b5fe-2d3dea289124",
|
||||
"run.googleapis.com/operation-id": "a9aeb6de-fdd6-43b2-93f8-8b7f72afab4c",
|
||||
"run.googleapis.com/urls": "[\"https://sereact-761163285547.us-central1.run.app\",\"https://sereact-p64zpdtkta-uc.a.run.app\"]",
|
||||
"serving.knative.dev/creator": "johnpccd3@gmail.com",
|
||||
"serving.knative.dev/lastModifier": "johnpccd3@gmail.com"
|
||||
@ -182,14 +182,14 @@
|
||||
"goog-terraform-provisioned": "true"
|
||||
},
|
||||
"generation": 1,
|
||||
"labels": {},
|
||||
"labels": null,
|
||||
"namespace": "gen-lang-client-0424120530",
|
||||
"resource_version": "AAY189oNgAQ",
|
||||
"resource_version": "AAY19MELEOc",
|
||||
"self_link": "/apis/serving.knative.dev/v1/namespaces/761163285547/services/sereact",
|
||||
"terraform_labels": {
|
||||
"goog-terraform-provisioned": "true"
|
||||
},
|
||||
"uid": "20e61eb3-6217-40e8-8ae5-45111d31bbda"
|
||||
"uid": "8c8be11c-c607-4caa-a65e-c552ec445882"
|
||||
}
|
||||
],
|
||||
"name": "sereact",
|
||||
@ -216,14 +216,14 @@
|
||||
"type": "RoutesReady"
|
||||
}
|
||||
],
|
||||
"latest_created_revision_name": "sereact-00001-2lz",
|
||||
"latest_ready_revision_name": "sereact-00001-2lz",
|
||||
"latest_created_revision_name": "sereact-00001-z4g",
|
||||
"latest_ready_revision_name": "sereact-00001-z4g",
|
||||
"observed_generation": 1,
|
||||
"traffic": [
|
||||
{
|
||||
"latest_revision": true,
|
||||
"percent": 100,
|
||||
"revision_name": "sereact-00001-2lz",
|
||||
"revision_name": "sereact-00001-z4g",
|
||||
"tag": "",
|
||||
"url": ""
|
||||
}
|
||||
@ -256,8 +256,8 @@
|
||||
"container_concurrency": 80,
|
||||
"containers": [
|
||||
{
|
||||
"args": [],
|
||||
"command": [],
|
||||
"args": null,
|
||||
"command": null,
|
||||
"env": [
|
||||
{
|
||||
"name": "API_KEY_SECRET",
|
||||
@ -337,7 +337,7 @@
|
||||
"cpu": "1",
|
||||
"memory": "1Gi"
|
||||
},
|
||||
"requests": {}
|
||||
"requests": null
|
||||
}
|
||||
],
|
||||
"startup_probe": [
|
||||
@ -359,7 +359,7 @@
|
||||
"working_dir": ""
|
||||
}
|
||||
],
|
||||
"node_selector": {},
|
||||
"node_selector": null,
|
||||
"service_account_name": "761163285547-compute@developer.gserviceaccount.com",
|
||||
"serving_state": "",
|
||||
"timeout_seconds": 300,
|
||||
@ -440,7 +440,7 @@
|
||||
"schema_version": 0,
|
||||
"attributes": {
|
||||
"condition": [],
|
||||
"etag": "BwY189qg+AA=",
|
||||
"etag": "BwY19MG70Fs=",
|
||||
"id": "v1/projects/gen-lang-client-0424120530/locations/us-central1/services/sereact/roles/run.invoker/allUsers",
|
||||
"location": "us-central1",
|
||||
"member": "allUsers",
|
||||
@ -474,7 +474,7 @@
|
||||
"automatic_update_policy": [
|
||||
{}
|
||||
],
|
||||
"build": "projects/761163285547/locations/us-central1/builds/aab08c74-df86-4cd7-9176-4ff267cab3e6",
|
||||
"build": "projects/761163285547/locations/us-central1/builds/ae88c918-6bb3-4aef-a56f-270f48f73049",
|
||||
"docker_repository": "projects/gen-lang-client-0424120530/locations/us-central1/repositories/gcf-artifacts",
|
||||
"entry_point": "process_image_embedding",
|
||||
"environment_variables": {},
|
||||
@ -487,8 +487,8 @@
|
||||
"storage_source": [
|
||||
{
|
||||
"bucket": "gen-lang-client-0424120530-cloud-function-source",
|
||||
"generation": 1748171376287077,
|
||||
"object": "function-source-58a2b7fe53bb2c8c921405cc965d635c.zip"
|
||||
"generation": 1748175166697242,
|
||||
"object": "function-source-b532cf3ff81d62dd7dec013e486931aa.zip"
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -508,13 +508,13 @@
|
||||
"pubsub_topic": "projects/gen-lang-client-0424120530/topics/image-processing-topic",
|
||||
"retry_policy": "RETRY_POLICY_RETRY",
|
||||
"service_account_email": "761163285547-compute@developer.gserviceaccount.com",
|
||||
"trigger": "projects/gen-lang-client-0424120530/locations/us-central1/triggers/process-image-embedding-013009",
|
||||
"trigger": "projects/gen-lang-client-0424120530/locations/us-central1/triggers/process-image-embedding-645734",
|
||||
"trigger_region": "us-central1"
|
||||
}
|
||||
],
|
||||
"id": "projects/gen-lang-client-0424120530/locations/us-central1/functions/process-image-embedding",
|
||||
"kms_key_name": "",
|
||||
"labels": {},
|
||||
"labels": null,
|
||||
"location": "us-central1",
|
||||
"name": "process-image-embedding",
|
||||
"project": "gen-lang-client-0424120530",
|
||||
@ -559,7 +559,7 @@
|
||||
"goog-terraform-provisioned": "true"
|
||||
},
|
||||
"timeouts": null,
|
||||
"update_time": "2025-05-25T11:13:04.212724797Z",
|
||||
"update_time": "2025-05-25T12:15:22.215124150Z",
|
||||
"url": "https://us-central1-gen-lang-client-0424120530.cloudfunctions.net/process-image-embedding"
|
||||
},
|
||||
"sensitive_attributes": [
|
||||
@ -588,6 +588,7 @@
|
||||
}
|
||||
]
|
||||
],
|
||||
"private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjozNjAwMDAwMDAwMDAwLCJkZWxldGUiOjM2MDAwMDAwMDAwMDAsInVwZGF0ZSI6MzYwMDAwMDAwMDAwMH19",
|
||||
"dependencies": [
|
||||
"data.archive_file.function_source",
|
||||
"data.google_project.current",
|
||||
@ -602,13 +603,6 @@
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"mode": "managed",
|
||||
"type": "google_compute_address",
|
||||
"name": "vector_db_static_ip",
|
||||
"provider": "provider[\"registry.terraform.io/hashicorp/google\"]",
|
||||
"instances": []
|
||||
},
|
||||
{
|
||||
"mode": "managed",
|
||||
"type": "google_compute_firewall",
|
||||
@ -818,7 +812,18 @@
|
||||
[
|
||||
{
|
||||
"type": "get_attr",
|
||||
"value": "metadata_startup_script"
|
||||
"value": "boot_disk"
|
||||
},
|
||||
{
|
||||
"type": "index",
|
||||
"value": {
|
||||
"value": 0,
|
||||
"type": "number"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "get_attr",
|
||||
"value": "disk_encryption_key_rsa"
|
||||
}
|
||||
],
|
||||
[
|
||||
@ -841,18 +846,7 @@
|
||||
[
|
||||
{
|
||||
"type": "get_attr",
|
||||
"value": "boot_disk"
|
||||
},
|
||||
{
|
||||
"type": "index",
|
||||
"value": {
|
||||
"value": 0,
|
||||
"type": "number"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "get_attr",
|
||||
"value": "disk_encryption_key_rsa"
|
||||
"value": "metadata_startup_script"
|
||||
}
|
||||
]
|
||||
],
|
||||
@ -881,8 +875,8 @@
|
||||
"database_edition": "STANDARD",
|
||||
"delete_protection_state": "DELETE_PROTECTION_DISABLED",
|
||||
"deletion_policy": "ABANDON",
|
||||
"earliest_version_time": "2025-05-25T11:07:20.673706Z",
|
||||
"etag": "IIrliOPKvo0DMKrW4vCEvY0D",
|
||||
"earliest_version_time": "2025-05-25T11:12:43.126081Z",
|
||||
"etag": "IPjb6fzLvo0DMKrW4vCEvY0D",
|
||||
"id": "projects/gen-lang-client-0424120530/databases/sereact-imagedb",
|
||||
"key_prefix": "",
|
||||
"location_id": "us-central1",
|
||||
@ -1530,7 +1524,7 @@
|
||||
"md5hash": "tTLPP/gdYt197AE+SGkxqg==",
|
||||
"md5hexhash": "b532cf3ff81d62dd7dec013e486931aa",
|
||||
"media_link": "https://storage.googleapis.com/download/storage/v1/b/gen-lang-client-0424120530-cloud-function-source/o/function-source-b532cf3ff81d62dd7dec013e486931aa.zip?generation=1748174860755303\u0026alt=media",
|
||||
"metadata": null,
|
||||
"metadata": {},
|
||||
"name": "function-source-b532cf3ff81d62dd7dec013e486931aa.zip",
|
||||
"output_name": "function-source-b532cf3ff81d62dd7dec013e486931aa.zip",
|
||||
"retention": [],
|
||||
|
||||
@ -1,121 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Simple test script to embed text and search Qdrant without filters
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import asyncio
|
||||
import logging
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Set up logging
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
async def simple_search_test():
|
||||
"""Simple test: embed text and search without filters"""
|
||||
|
||||
try:
|
||||
# Import services
|
||||
from src.services.vector_db import VectorDatabaseService
|
||||
from src.services.embedding_service import EmbeddingService
|
||||
|
||||
# Initialize services
|
||||
logger.info("Initializing services...")
|
||||
vector_db = VectorDatabaseService()
|
||||
embedding_service = EmbeddingService()
|
||||
|
||||
# Test 1: Generate text embedding
|
||||
logger.info("=== Generating Text Embedding ===")
|
||||
search_query = "rectangle"
|
||||
text_embedding = await embedding_service.generate_text_embedding(search_query)
|
||||
|
||||
if text_embedding:
|
||||
logger.info(f"✓ Generated embedding for '{search_query}' - length: {len(text_embedding)}")
|
||||
else:
|
||||
logger.error("✗ Failed to generate text embedding")
|
||||
return False
|
||||
|
||||
# Test 2: Search without any filters
|
||||
logger.info("=== Searching Qdrant (No Filters) ===")
|
||||
|
||||
# Try different thresholds to see what we get
|
||||
thresholds = [0.1, 0.3, 0.5, 0.65, 0.8]
|
||||
|
||||
for threshold in thresholds:
|
||||
logger.info(f"\n--- Threshold: {threshold} ---")
|
||||
|
||||
search_results = vector_db.search_similar_images(
|
||||
query_vector=text_embedding,
|
||||
limit=10,
|
||||
score_threshold=threshold
|
||||
# No filter_conditions = search everything
|
||||
)
|
||||
|
||||
logger.info(f"Found {len(search_results)} results")
|
||||
|
||||
# Show top 3 results
|
||||
for i, result in enumerate(search_results[:3]):
|
||||
logger.info(f" {i+1}. Score: {result['score']:.4f} | ID: {result['image_id']} | File: {result['metadata'].get('filename', 'N/A')}")
|
||||
|
||||
# Test 3: Very low threshold to see all data
|
||||
logger.info("\n=== All Data (Threshold 0.0) ===")
|
||||
all_results = vector_db.search_similar_images(
|
||||
query_vector=text_embedding,
|
||||
limit=50,
|
||||
score_threshold=0.0 # Get everything
|
||||
)
|
||||
|
||||
logger.info(f"Total vectors in collection: {len(all_results)}")
|
||||
|
||||
# Test 4: With team filtering (like the API does)
|
||||
logger.info("\n=== Testing Team Filtering ===")
|
||||
test_team_id = "68330a29472a0704d2f77063" # From server logs
|
||||
filtered_results = vector_db.search_similar_images(
|
||||
query_vector=text_embedding,
|
||||
limit=50,
|
||||
score_threshold=0.0,
|
||||
filter_conditions={"team_id": test_team_id}
|
||||
)
|
||||
|
||||
logger.info(f"Results with team filter ({test_team_id}): {len(filtered_results)}")
|
||||
|
||||
# Show metadata for all results to see team_ids
|
||||
logger.info("\n=== Checking Team IDs in Vector DB ===")
|
||||
for i, result in enumerate(all_results):
|
||||
metadata = result.get('metadata', {})
|
||||
team_id = metadata.get('team_id', 'N/A')
|
||||
logger.info(f" {i+1}. Image ID: {result['image_id']} | Team ID: {team_id}")
|
||||
|
||||
# Show some stats
|
||||
if all_results:
|
||||
scores = [r['score'] for r in all_results]
|
||||
logger.info(f"Score range: {min(scores):.4f} to {max(scores):.4f}")
|
||||
logger.info(f"Average score: {sum(scores)/len(scores):.4f}")
|
||||
|
||||
# Show top 5 and bottom 5
|
||||
logger.info("\nTop 5 results:")
|
||||
for i, result in enumerate(all_results[:5]):
|
||||
logger.info(f" {i+1}. Score: {result['score']:.4f} | ID: {result['image_id']}")
|
||||
|
||||
if len(all_results) > 5:
|
||||
logger.info("\nBottom 5 results:")
|
||||
for i, result in enumerate(all_results[-5:]):
|
||||
logger.info(f" {len(all_results)-4+i}. Score: {result['score']:.4f} | ID: {result['image_id']}")
|
||||
|
||||
logger.info("\n✓ Simple search test completed!")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"✗ Test failed: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
if __name__ == "__main__":
|
||||
success = asyncio.run(simple_search_test())
|
||||
sys.exit(0 if success else 1)
|
||||
@ -76,7 +76,7 @@ async def upload_image(
|
||||
file_size=file_size,
|
||||
content_type=content_type,
|
||||
storage_path=storage_path,
|
||||
public_url="", # Will be set after we have the image ID
|
||||
public_url=None, # Will be set after we have the image ID
|
||||
team_id=current_user.team_id,
|
||||
uploader_id=current_user.id,
|
||||
description=description,
|
||||
|
||||
@ -33,7 +33,7 @@ async def search_images(
|
||||
request: Request,
|
||||
q: str = Query(..., description="Search query"),
|
||||
limit: int = Query(10, ge=1, le=50, description="Number of results to return"),
|
||||
threshold: float = Query(0.65, ge=0.0, le=1.0, description="Similarity threshold"),
|
||||
similarity_threshold: float = Query(0.65, ge=0.0, le=1.0, description="Similarity threshold"),
|
||||
collection_id: Optional[str] = Query(None, description="Filter by collection ID"),
|
||||
current_user: UserModel = Depends(get_current_user)
|
||||
):
|
||||
@ -46,7 +46,7 @@ async def search_images(
|
||||
"method": request.method,
|
||||
"query": q,
|
||||
"limit": limit,
|
||||
"threshold": threshold
|
||||
"similarity_threshold": similarity_threshold
|
||||
},
|
||||
user_id=str(current_user.id),
|
||||
team_id=str(current_user.team_id)
|
||||
@ -62,7 +62,7 @@ async def search_images(
|
||||
search_results = get_vector_db_service().search_similar_images(
|
||||
query_vector=query_embedding,
|
||||
limit=limit,
|
||||
score_threshold=threshold,
|
||||
similarity_threshold=similarity_threshold,
|
||||
filter_conditions={"team_id": str(current_user.team_id)} if current_user.team_id else None
|
||||
)
|
||||
|
||||
@ -72,12 +72,12 @@ async def search_images(
|
||||
results=[],
|
||||
total=0,
|
||||
limit=limit,
|
||||
threshold=threshold
|
||||
similarity_threshold=similarity_threshold
|
||||
)
|
||||
|
||||
# Get image IDs and scores from search results
|
||||
image_ids = [result['image_id'] for result in search_results if result['image_id']]
|
||||
scores = {result['image_id']: result['score'] for result in search_results if result['image_id']}
|
||||
scores = {result['image_id']: result['similarity_score'] for result in search_results if result['image_id']}
|
||||
|
||||
# Get image metadata from database
|
||||
images = await image_repository.get_by_ids(image_ids)
|
||||
@ -123,7 +123,7 @@ async def search_images(
|
||||
results=results,
|
||||
total=len(results),
|
||||
limit=limit,
|
||||
threshold=threshold
|
||||
similarity_threshold=similarity_threshold
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
@ -160,11 +160,11 @@ async def search_images_advanced(
|
||||
logger.info(f"Generated embedding with length: {len(query_embedding)}")
|
||||
|
||||
# Search in vector database
|
||||
logger.info(f"Searching vector database with threshold: {search_request.threshold}")
|
||||
logger.info(f"Searching vector database with similarity_threshold: {search_request.similarity_threshold}")
|
||||
search_results = get_vector_db_service().search_similar_images(
|
||||
query_vector=query_embedding,
|
||||
limit=search_request.limit,
|
||||
score_threshold=search_request.threshold,
|
||||
similarity_threshold=search_request.similarity_threshold,
|
||||
filter_conditions={"team_id": str(current_user.team_id)} if current_user.team_id else None
|
||||
)
|
||||
|
||||
@ -177,12 +177,12 @@ async def search_images_advanced(
|
||||
results=[],
|
||||
total=0,
|
||||
limit=search_request.limit,
|
||||
threshold=search_request.threshold
|
||||
similarity_threshold=search_request.similarity_threshold
|
||||
)
|
||||
|
||||
# Get image IDs and scores from search results
|
||||
image_ids = [result['image_id'] for result in search_results if result['image_id']]
|
||||
scores = {result['image_id']: result['score'] for result in search_results if result['image_id']}
|
||||
scores = {result['image_id']: result['similarity_score'] for result in search_results if result['image_id']}
|
||||
|
||||
logger.info(f"Extracted {len(image_ids)} image IDs: {image_ids}")
|
||||
|
||||
@ -247,7 +247,7 @@ async def search_images_advanced(
|
||||
results=results,
|
||||
total=len(results),
|
||||
limit=search_request.limit,
|
||||
threshold=search_request.threshold
|
||||
similarity_threshold=search_request.similarity_threshold
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
from typing import List, Optional, ClassVar
|
||||
from datetime import datetime
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
|
||||
from src.schemas.image import ImageResponse
|
||||
|
||||
@ -8,18 +8,25 @@ class SearchRequest(BaseModel):
|
||||
"""Schema for advanced search request"""
|
||||
query: str = Field(..., description="Search query", min_length=1)
|
||||
limit: int = Field(10, description="Maximum number of results", ge=1, le=50)
|
||||
threshold: float = Field(0.7, description="Similarity threshold", ge=0.0, le=1.0)
|
||||
similarity_threshold: Optional[float] = Field(None, description="Similarity threshold", ge=0.0, le=1.0)
|
||||
collection_id: Optional[str] = Field(None, description="Filter by collection ID")
|
||||
date_from: Optional[datetime] = Field(None, description="Filter images uploaded after this date")
|
||||
date_to: Optional[datetime] = Field(None, description="Filter images uploaded before this date")
|
||||
uploader_id: Optional[str] = Field(None, description="Filter by uploader ID")
|
||||
|
||||
@field_validator('similarity_threshold')
|
||||
@classmethod
|
||||
def set_default_similarity_threshold(cls, v):
|
||||
if v is None:
|
||||
return 0.7
|
||||
return v
|
||||
|
||||
model_config: ClassVar[dict] = {
|
||||
"json_schema_extra": {
|
||||
"example": {
|
||||
"query": "mountain sunset",
|
||||
"limit": 10,
|
||||
"threshold": 0.7,
|
||||
"similarity_threshold": 0.7,
|
||||
"collection_id": "507f1f77bcf86cd799439044",
|
||||
"date_from": "2023-01-01T00:00:00",
|
||||
"date_to": "2023-12-31T23:59:59",
|
||||
@ -34,7 +41,7 @@ class SearchResponse(BaseModel):
|
||||
results: List[ImageResponse]
|
||||
total: int
|
||||
limit: int
|
||||
threshold: float
|
||||
similarity_threshold: float
|
||||
|
||||
model_config: ClassVar[dict] = {
|
||||
"json_schema_extra": {
|
||||
@ -66,7 +73,7 @@ class SearchResponse(BaseModel):
|
||||
],
|
||||
"total": 1,
|
||||
"limit": 10,
|
||||
"threshold": 0.7
|
||||
"similarity_threshold": 0.7
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -139,7 +139,7 @@ class VectorDatabaseService:
|
||||
self,
|
||||
query_vector: List[float],
|
||||
limit: int = 10,
|
||||
score_threshold: float = 0.65,
|
||||
similarity_threshold: float = 0.65,
|
||||
filter_conditions: Dict[str, Any] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
@ -148,7 +148,7 @@ class VectorDatabaseService:
|
||||
Args:
|
||||
query_vector: Query vector to search for
|
||||
limit: Maximum number of results to return
|
||||
score_threshold: Minimum similarity score threshold
|
||||
similarity_threshold: Minimum similarity score threshold
|
||||
filter_conditions: Additional filter conditions
|
||||
|
||||
Returns:
|
||||
@ -172,14 +172,14 @@ class VectorDatabaseService:
|
||||
query_vector=query_vector,
|
||||
query_filter=search_filter,
|
||||
limit=limit,
|
||||
score_threshold=score_threshold
|
||||
score_threshold=similarity_threshold
|
||||
)
|
||||
|
||||
results = []
|
||||
for hit in search_result:
|
||||
result = {
|
||||
"point_id": hit.id,
|
||||
"score": hit.score,
|
||||
"similarity_score": hit.score,
|
||||
"image_id": hit.payload.get("image_id"),
|
||||
"metadata": hit.payload
|
||||
}
|
||||
|
||||
47
start_dev.sh
47
start_dev.sh
@ -1,47 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Development startup script for Sereact API
|
||||
# This script sets the environment variables and starts the application
|
||||
# Auto-generated by deployment/scripts/setup_local_env.sh
|
||||
|
||||
# Function to handle cleanup on exit
|
||||
cleanup() {
|
||||
echo ""
|
||||
echo "Shutting down server..."
|
||||
if [ ! -z "$SERVER_PID" ]; then
|
||||
kill $SERVER_PID 2>/dev/null
|
||||
wait $SERVER_PID 2>/dev/null
|
||||
fi
|
||||
echo "Server stopped."
|
||||
exit 0
|
||||
}
|
||||
|
||||
# Set up signal handlers
|
||||
trap cleanup SIGINT SIGTERM
|
||||
|
||||
# Activate virtual environment
|
||||
source venv/Scripts/activate
|
||||
|
||||
# Set environment variables from deployed infrastructure
|
||||
export QDRANT_HOST=34.171.134.17
|
||||
export QDRANT_PORT=6333
|
||||
export FIRESTORE_PROJECT_ID=gen-lang-client-0424120530
|
||||
export GCS_BUCKET_NAME=sereact-images
|
||||
export ENVIRONMENT=development
|
||||
|
||||
# Start the application
|
||||
echo "Starting Sereact API with deployed infrastructure..."
|
||||
echo "Qdrant endpoint: http://$QDRANT_HOST:$QDRANT_PORT"
|
||||
echo "Firestore project: $FIRESTORE_PROJECT_ID"
|
||||
echo "GCS bucket: $GCS_BUCKET_NAME"
|
||||
echo "API will be available at: http://localhost:8000"
|
||||
echo "API documentation: http://localhost:8000/docs"
|
||||
echo "Press Ctrl+C to stop the server"
|
||||
echo ""
|
||||
|
||||
# Start uvicorn in background and capture PID
|
||||
uvicorn main:app --host 0.0.0.0 --port 8000 --reload &
|
||||
SERVER_PID=$!
|
||||
|
||||
# Wait for the server process
|
||||
wait $SERVER_PID
|
||||
@ -1,48 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script to verify that similarity threshold is properly handled
|
||||
"""
|
||||
|
||||
import json
|
||||
from src.schemas.search import SearchRequest
|
||||
|
||||
def test_threshold_handling():
|
||||
"""Test that threshold values are properly handled in the schema"""
|
||||
|
||||
# Test with threshold = 0
|
||||
test_data_zero = {
|
||||
"query": "test query",
|
||||
"threshold": 0.0,
|
||||
"limit": 10
|
||||
}
|
||||
|
||||
request_zero = SearchRequest(**test_data_zero)
|
||||
print(f"Threshold 0.0 test: {request_zero.threshold}")
|
||||
assert request_zero.threshold == 0.0, f"Expected 0.0, got {request_zero.threshold}"
|
||||
|
||||
# Test with threshold = 0.5
|
||||
test_data_half = {
|
||||
"query": "test query",
|
||||
"threshold": 0.5,
|
||||
"limit": 10
|
||||
}
|
||||
|
||||
request_half = SearchRequest(**test_data_half)
|
||||
print(f"Threshold 0.5 test: {request_half.threshold}")
|
||||
assert request_half.threshold == 0.5, f"Expected 0.5, got {request_half.threshold}"
|
||||
|
||||
# Test with threshold = 1.0
|
||||
test_data_one = {
|
||||
"query": "test query",
|
||||
"threshold": 1.0,
|
||||
"limit": 10
|
||||
}
|
||||
|
||||
request_one = SearchRequest(**test_data_one)
|
||||
print(f"Threshold 1.0 test: {request_one.threshold}")
|
||||
assert request_one.threshold == 1.0, f"Expected 1.0, got {request_one.threshold}"
|
||||
|
||||
print("All threshold tests passed!")
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_threshold_handling()
|
||||
Loading…
x
Reference in New Issue
Block a user