cp
This commit is contained in:
parent
36bb44bfe7
commit
11923229f6
10
README.md
10
README.md
@ -545,23 +545,19 @@ This modular architecture provides several benefits:
|
||||
### High Priority
|
||||
- [ ] Remove Pinecone integration and complete Qdrant migration
|
||||
- [ ] Test and validate vector search functionality with Qdrant
|
||||
- [ ] Implement proper pagination for search results
|
||||
- [ ] Implement proper pagination for search results and all endpoints
|
||||
- [ ] Test Cloud Function image processing pipeline
|
||||
- [ ] Validate VM setup for self-hosted Qdrant instance
|
||||
|
||||
### Medium Priority
|
||||
- [ ] Add comprehensive logging for vector search operations
|
||||
- [ ] Implement caching layer for frequently accessed embeddings
|
||||
- [ ] Implement caching for frequently accessed data
|
||||
- [ ] Add monitoring and alerting for vector database performance
|
||||
- [ ] Create backup strategy for vector database
|
||||
- [ ] Document vector search API endpoints
|
||||
- [ ] Set up Qdrant cluster with multiple nodes
|
||||
|
||||
### Low Priority
|
||||
- [ ] Add support for batch embedding generation
|
||||
- [ ] Implement embedding versioning
|
||||
- [ ] Add support for custom embedding models
|
||||
- [ ] Create visualization tools for embedding space
|
||||
- [ ] Add support for embedding fine-tuning
|
||||
|
||||
## Recent Changes
|
||||
- Migrated from Pinecone to self-hosted Qdrant
|
||||
|
||||
58
debug_vector_db.py
Normal file
58
debug_vector_db.py
Normal file
@ -0,0 +1,58 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Debug script to test VectorDatabaseService initialization
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
|
||||
print("Environment variables:")
|
||||
print(f"QDRANT_HOST: {os.getenv('QDRANT_HOST')}")
|
||||
print(f"QDRANT_PORT: {os.getenv('QDRANT_PORT')}")
|
||||
print(f"QDRANT_HTTPS: {os.getenv('QDRANT_HTTPS')}")
|
||||
print(f"QDRANT_PREFER_GRPC: {os.getenv('QDRANT_PREFER_GRPC')}")
|
||||
|
||||
# Import and check settings
|
||||
from src.config.config import settings
|
||||
|
||||
print("\nSettings values:")
|
||||
print(f"settings.QDRANT_HOST: {settings.QDRANT_HOST}")
|
||||
print(f"settings.QDRANT_PORT: {settings.QDRANT_PORT}")
|
||||
print(f"settings.QDRANT_HTTPS: {settings.QDRANT_HTTPS}")
|
||||
print(f"settings.QDRANT_PREFER_GRPC: {settings.QDRANT_PREFER_GRPC}")
|
||||
|
||||
# Test VectorDatabaseService initialization step by step
|
||||
print("\nTesting VectorDatabaseService initialization step by step...")
|
||||
try:
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
# Test direct QdrantClient creation
|
||||
print("Creating QdrantClient directly...")
|
||||
client = QdrantClient(
|
||||
host=settings.QDRANT_HOST,
|
||||
port=settings.QDRANT_PORT,
|
||||
api_key=settings.QDRANT_API_KEY,
|
||||
prefer_grpc=settings.QDRANT_PREFER_GRPC,
|
||||
https=settings.QDRANT_HTTPS
|
||||
)
|
||||
print("QdrantClient created successfully")
|
||||
|
||||
# Test get_collections (this is what fails in _ensure_collection_exists)
|
||||
print("Testing get_collections...")
|
||||
collections = client.get_collections()
|
||||
print(f"Collections retrieved: {[col.name for col in collections.collections]}")
|
||||
|
||||
# Now test full VectorDatabaseService
|
||||
print("\nTesting full VectorDatabaseService...")
|
||||
from src.services.vector_db import VectorDatabaseService
|
||||
vector_db = VectorDatabaseService()
|
||||
print("VectorDatabaseService created successfully")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
@ -1,66 +1,66 @@
|
||||
# Deployment Options for Sereact
|
||||
|
||||
This directory contains multiple options for deploying the Sereact application:
|
||||
|
||||
## Terraform Infrastructure (`/terraform`)
|
||||
|
||||
The Terraform configuration automates the provisioning of all required Google Cloud resources:
|
||||
|
||||
- Google Cloud Run service
|
||||
- Google Container Registry (GCR)
|
||||
- Cloud Firestore
|
||||
- Cloud Storage buckets
|
||||
|
||||
See [terraform/README.md](terraform/README.md) for detailed instructions.
|
||||
|
||||
## Cloud Run Deployment (`/cloud-run`)
|
||||
|
||||
The `service.yaml` file defines the Cloud Run service configuration which can be deployed using:
|
||||
|
||||
```bash
|
||||
gcloud run services replace deployment/cloud-run/service.yaml --region=us-central1
|
||||
```
|
||||
|
||||
## Deployment Script (`deploy.sh`)
|
||||
|
||||
For convenience, a deployment script is provided to handle the entire deployment workflow:
|
||||
|
||||
```bash
|
||||
# Provision infrastructure with Terraform
|
||||
./deployment/deploy.sh --provision
|
||||
|
||||
# Build and push Docker image
|
||||
./deployment/deploy.sh --build
|
||||
|
||||
# Deploy to Cloud Run
|
||||
./deployment/deploy.sh --deploy
|
||||
|
||||
# Do everything (provision, build, deploy)
|
||||
./deployment/deploy.sh --all
|
||||
```
|
||||
|
||||
## CI/CD Pipelines
|
||||
|
||||
For CI/CD integration, consider using:
|
||||
|
||||
1. **GitHub Actions**: Sample workflow included in terraform/README.md
|
||||
2. **Cloud Build**: Configure a `cloudbuild.yaml` in your repository
|
||||
3. **Jenkins**: Use the `deploy.sh` script in your pipeline
|
||||
|
||||
## Managing Secrets
|
||||
|
||||
Sensitive data should be managed using Google Secret Manager:
|
||||
|
||||
```bash
|
||||
# Create a secret
|
||||
gcloud secrets create sereact-api-key-secret --replication-policy="automatic"
|
||||
gcloud secrets create sereact-vector-db-key --replication-policy="automatic"
|
||||
|
||||
# Add a secret version
|
||||
echo -n "your-api-key-secret" | gcloud secrets versions add sereact-api-key-secret --data-file=-
|
||||
echo -n "your-vector-db-key" | gcloud secrets versions add sereact-vector-db-key --data-file=-
|
||||
|
||||
# Update Cloud Run service to use the secrets
|
||||
gcloud run services update sereact \
|
||||
--update-secrets=API_KEY_SECRET=sereact-api-key-secret:latest,VECTOR_DB_API_KEY=sereact-vector-db-key:latest
|
||||
# Deployment Options for Sereact
|
||||
|
||||
This directory contains multiple options for deploying the Sereact application:
|
||||
|
||||
## Terraform Infrastructure (`/terraform`)
|
||||
|
||||
The Terraform configuration automates the provisioning of all required Google Cloud resources:
|
||||
|
||||
- Google Cloud Run service
|
||||
- Google Container Registry (GCR)
|
||||
- Cloud Firestore
|
||||
- Cloud Storage buckets
|
||||
|
||||
See [terraform/README.md](terraform/README.md) for detailed instructions.
|
||||
|
||||
## Cloud Run Deployment (`/cloud-run`)
|
||||
|
||||
The `service.yaml` file defines the Cloud Run service configuration which can be deployed using:
|
||||
|
||||
```bash
|
||||
gcloud run services replace deployment/cloud-run/service.yaml --region=us-central1
|
||||
```
|
||||
|
||||
## Deployment Script (`deploy.sh`)
|
||||
|
||||
For convenience, a deployment script is provided to handle the entire deployment workflow:
|
||||
|
||||
```bash
|
||||
# Provision infrastructure with Terraform
|
||||
./deployment/deploy.sh --provision
|
||||
|
||||
# Build and push Docker image
|
||||
./deployment/deploy.sh --build
|
||||
|
||||
# Deploy to Cloud Run
|
||||
./deployment/deploy.sh --deploy
|
||||
|
||||
# Do everything (provision, build, deploy)
|
||||
./deployment/deploy.sh --all
|
||||
```
|
||||
|
||||
## CI/CD Pipelines
|
||||
|
||||
For CI/CD integration, consider using:
|
||||
|
||||
1. **GitHub Actions**: Sample workflow included in terraform/README.md
|
||||
2. **Cloud Build**: Configure a `cloudbuild.yaml` in your repository
|
||||
3. **Jenkins**: Use the `deploy.sh` script in your pipeline
|
||||
|
||||
## Managing Secrets
|
||||
|
||||
Sensitive data should be managed using Google Secret Manager:
|
||||
|
||||
```bash
|
||||
# Create a secret
|
||||
gcloud secrets create sereact-api-key-secret --replication-policy="automatic"
|
||||
gcloud secrets create sereact-vector-db-key --replication-policy="automatic"
|
||||
|
||||
# Add a secret version
|
||||
echo -n "your-api-key-secret" | gcloud secrets versions add sereact-api-key-secret --data-file=-
|
||||
echo -n "your-vector-db-key" | gcloud secrets versions add sereact-vector-db-key --data-file=-
|
||||
|
||||
# Update Cloud Run service to use the secrets
|
||||
gcloud run services update sereact \
|
||||
--update-secrets=API_KEY_SECRET=sereact-api-key-secret:latest,VECTOR_DB_API_KEY=sereact-vector-db-key:latest
|
||||
```
|
||||
@ -53,6 +53,10 @@ spec:
|
||||
value: "Content-Length,Content-Range"
|
||||
- name: CORS_MAX_AGE
|
||||
value: "3600"
|
||||
- name: QDRANT_HTTPS
|
||||
value: "false"
|
||||
- name: QDRANT_PREFER_GRPC
|
||||
value: "false"
|
||||
volumeMounts:
|
||||
- name: gcp-sa-key
|
||||
mountPath: /var/secrets/google
|
||||
|
||||
@ -310,10 +310,24 @@ if [ "$DEPLOY" = true ]; then
|
||||
# Get service URL from Terraform output
|
||||
SERVICE_URL=$(terraform output -raw cloud_run_url 2>/dev/null || echo "URL not available")
|
||||
|
||||
# Get Qdrant VM information
|
||||
QDRANT_IP=$(terraform output -raw vector_db_vm_external_ip 2>/dev/null || echo "IP not available")
|
||||
QDRANT_ENDPOINT=$(terraform output -raw qdrant_http_endpoint 2>/dev/null || echo "Endpoint not available")
|
||||
|
||||
cd - > /dev/null
|
||||
|
||||
echo "Deployment completed successfully."
|
||||
echo "=================================="
|
||||
echo "Service URL: $SERVICE_URL"
|
||||
echo "Qdrant VM IP: $QDRANT_IP"
|
||||
echo "Qdrant HTTP Endpoint: $QDRANT_ENDPOINT"
|
||||
echo ""
|
||||
echo "Environment variables set in Cloud Run:"
|
||||
echo " QDRANT_HOST=$QDRANT_IP"
|
||||
echo " QDRANT_PORT=6333"
|
||||
echo ""
|
||||
echo "To get detailed Qdrant information, run:"
|
||||
echo " ./deployment/terraform/scripts/get_qdrant_ip.sh"
|
||||
fi
|
||||
|
||||
echo "All operations completed."
|
||||
@ -90,4 +90,244 @@ To destroy all provisioned resources:
|
||||
|
||||
```bash
|
||||
terraform destroy
|
||||
```
|
||||
```
|
||||
|
||||
# Terraform Infrastructure for Sereact
|
||||
|
||||
This directory contains Terraform configurations to deploy the complete Sereact infrastructure on Google Cloud Platform, including automatic configuration of Qdrant VM IP addresses for Cloud Run.
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
The infrastructure includes:
|
||||
- **Cloud Run Service**: Main application service
|
||||
- **Qdrant VM**: Vector database running on Compute Engine
|
||||
- **Firestore**: Document database
|
||||
- **Cloud Storage**: File storage
|
||||
- **Automatic IP Configuration**: Qdrant VM IP is automatically passed to Cloud Run
|
||||
|
||||
## Qdrant VM IP Address Integration
|
||||
|
||||
The Terraform configuration automatically handles passing the Qdrant VM IP address to your Cloud Run service:
|
||||
|
||||
### How It Works
|
||||
|
||||
1. **VM Creation**: Terraform creates a Compute Engine VM running Qdrant
|
||||
2. **IP Address Extraction**: Terraform extracts the VM's external IP address
|
||||
3. **Environment Variable Injection**: The IP is automatically set as `QDRANT_HOST` in Cloud Run
|
||||
4. **Dependency Management**: Cloud Run waits for the VM to be created before deploying
|
||||
|
||||
### Configuration Options
|
||||
|
||||
#### Static vs Dynamic IP
|
||||
|
||||
You can choose between static and dynamic IP addresses:
|
||||
|
||||
```hcl
|
||||
# In terraform.tfvars
|
||||
use_static_ip = true # Use static IP (recommended for production)
|
||||
use_static_ip = false # Use ephemeral IP (cheaper for development)
|
||||
```
|
||||
|
||||
#### Environment Variables Set Automatically
|
||||
|
||||
The following environment variables are automatically configured in Cloud Run:
|
||||
|
||||
```bash
|
||||
QDRANT_HOST=<vm-external-ip> # Automatically set from VM
|
||||
QDRANT_PORT=6333 # HTTP port
|
||||
QDRANT_API_KEY=<your-api-key> # From terraform.tfvars
|
||||
```
|
||||
|
||||
## Quick Start
|
||||
|
||||
### 1. Prerequisites
|
||||
|
||||
```bash
|
||||
# Install required tools
|
||||
gcloud auth login
|
||||
gcloud config set project YOUR_PROJECT_ID
|
||||
terraform --version
|
||||
```
|
||||
|
||||
### 2. Configuration
|
||||
|
||||
```bash
|
||||
# Copy and edit configuration
|
||||
cp terraform.tfvars.example terraform.tfvars
|
||||
# Edit terraform.tfvars with your values
|
||||
```
|
||||
|
||||
### 3. Deploy Everything
|
||||
|
||||
```bash
|
||||
# From the root directory
|
||||
./deployment/deploy.sh --deploy --build
|
||||
```
|
||||
|
||||
### 4. Get Qdrant Information
|
||||
|
||||
```bash
|
||||
# Get Qdrant VM IP and endpoints
|
||||
./deployment/terraform/scripts/get_qdrant_ip.sh
|
||||
|
||||
# Get internal IP (for VPC connections)
|
||||
./deployment/terraform/scripts/get_qdrant_ip.sh --internal
|
||||
```
|
||||
|
||||
## Terraform Outputs
|
||||
|
||||
The configuration provides several outputs for integration:
|
||||
|
||||
```bash
|
||||
# Get specific outputs
|
||||
terraform output cloud_run_url # Cloud Run service URL
|
||||
terraform output vector_db_vm_external_ip # Qdrant VM external IP
|
||||
terraform output vector_db_vm_internal_ip # Qdrant VM internal IP
|
||||
terraform output qdrant_http_endpoint # Full HTTP endpoint
|
||||
terraform output cloud_run_qdrant_host # IP configured for Cloud Run
|
||||
|
||||
# Get deployment summary
|
||||
terraform output deployment_summary
|
||||
```
|
||||
|
||||
## File Structure
|
||||
|
||||
```
|
||||
terraform/
|
||||
├── main.tf # Cloud Run and main resources
|
||||
├── vm.tf # Qdrant VM configuration
|
||||
├── outputs.tf # Output definitions
|
||||
├── variables.tf # Variable definitions
|
||||
├── terraform.tfvars # Your configuration values
|
||||
├── scripts/
|
||||
│ ├── get_qdrant_ip.sh # Helper script to get VM IP
|
||||
│ └── install_qdrant.sh # VM startup script
|
||||
└── README.md # This file
|
||||
```
|
||||
|
||||
## Environment Variables Reference
|
||||
|
||||
### Automatically Configured
|
||||
|
||||
These are set automatically by Terraform:
|
||||
|
||||
| Variable | Source | Description |
|
||||
|----------|--------|-------------|
|
||||
| `QDRANT_HOST` | VM external IP | Qdrant server IP address |
|
||||
| `QDRANT_PORT` | Static value | Qdrant HTTP port (6333) |
|
||||
| `QDRANT_API_KEY` | terraform.tfvars | Qdrant authentication key |
|
||||
| `FIRESTORE_PROJECT_ID` | terraform.tfvars | GCP project ID |
|
||||
| `GCS_BUCKET_NAME` | terraform.tfvars | Storage bucket name |
|
||||
|
||||
### Manual Configuration Required
|
||||
|
||||
Set these in `terraform.tfvars`:
|
||||
|
||||
```hcl
|
||||
project_id = "your-gcp-project-id"
|
||||
storage_bucket_name = "your-bucket-name"
|
||||
qdrant_api_key = "your-qdrant-api-key"
|
||||
use_static_ip = true # or false
|
||||
```
|
||||
|
||||
## Networking Configuration
|
||||
|
||||
### Firewall Rules
|
||||
|
||||
The configuration automatically creates firewall rules to allow:
|
||||
- Qdrant HTTP traffic (port 6333)
|
||||
- Qdrant gRPC traffic (port 6334)
|
||||
- Access from Cloud Run to Qdrant VM
|
||||
|
||||
### IP Address Types
|
||||
|
||||
#### External IP (Default)
|
||||
- Used for Cloud Run → Qdrant communication
|
||||
- Accessible from internet (with firewall rules)
|
||||
- Can be static or ephemeral
|
||||
|
||||
#### Internal IP (Alternative)
|
||||
- Available for VPC-native connections
|
||||
- More secure but requires VPC configuration
|
||||
- Use `cloud_run_qdrant_host_internal` output
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Check Qdrant Connectivity
|
||||
|
||||
```bash
|
||||
# Get IP and test connection
|
||||
./deployment/terraform/scripts/get_qdrant_ip.sh
|
||||
|
||||
# Manual test
|
||||
QDRANT_IP=$(terraform output -raw vector_db_vm_external_ip)
|
||||
curl http://$QDRANT_IP:6333/health
|
||||
```
|
||||
|
||||
### Verify Cloud Run Environment
|
||||
|
||||
```bash
|
||||
# Check Cloud Run service
|
||||
gcloud run services describe sereact --region=us-central1
|
||||
|
||||
# Check environment variables
|
||||
gcloud run services describe sereact --region=us-central1 \
|
||||
--format="value(spec.template.spec.containers[0].env[].name,spec.template.spec.containers[0].env[].value)"
|
||||
```
|
||||
|
||||
### Common Issues
|
||||
|
||||
1. **VM not ready**: Qdrant installation takes 2-3 minutes after VM creation
|
||||
2. **Firewall blocking**: Check that your IP is in `allowed_cidr_blocks`
|
||||
3. **Wrong IP type**: Ensure you're using external IP for Cloud Run connections
|
||||
|
||||
## Security Considerations
|
||||
|
||||
### Production Recommendations
|
||||
|
||||
1. **Use Static IP**: Set `use_static_ip = true`
|
||||
2. **Restrict Access**: Set specific CIDR blocks in `allowed_cidr_blocks`
|
||||
3. **Use Internal IP**: Consider VPC-native networking for internal communication
|
||||
4. **Secure API Key**: Store `qdrant_api_key` in Secret Manager
|
||||
|
||||
### Development Setup
|
||||
|
||||
```hcl
|
||||
# terraform.tfvars for development
|
||||
use_static_ip = false
|
||||
allowed_cidr_blocks = "0.0.0.0/0" # Open access (not for production!)
|
||||
```
|
||||
|
||||
## Cost Optimization
|
||||
|
||||
### Development
|
||||
- Use `use_static_ip = false` to avoid static IP charges
|
||||
- Use smaller VM sizes: `machine_type = "e2-micro"`
|
||||
|
||||
### Production
|
||||
- Use `use_static_ip = true` for consistent connectivity
|
||||
- Use appropriate VM sizes based on load
|
||||
|
||||
## Advanced Configuration
|
||||
|
||||
### Custom VM Configuration
|
||||
|
||||
Edit `vm.tf` to customize:
|
||||
- Machine type
|
||||
- Disk size
|
||||
- Startup script
|
||||
- Network configuration
|
||||
|
||||
### VPC-Native Networking
|
||||
|
||||
For internal-only communication:
|
||||
1. Create VPC connector
|
||||
2. Configure Cloud Run to use VPC
|
||||
3. Use internal IP output: `cloud_run_qdrant_host_internal`
|
||||
|
||||
## Support
|
||||
|
||||
For issues with:
|
||||
- **Terraform**: Check `terraform plan` output
|
||||
- **VM connectivity**: Use the helper script `get_qdrant_ip.sh`
|
||||
- **Cloud Run**: Check logs with `gcloud run logs tail sereact`
|
||||
@ -1,146 +1,146 @@
|
||||
provider "google" {
|
||||
project = var.project_id
|
||||
region = var.region
|
||||
zone = var.zone
|
||||
}
|
||||
|
||||
# Get current project information
|
||||
data "google_project" "current" {
|
||||
project_id = var.project_id
|
||||
}
|
||||
|
||||
# Enable required APIs
|
||||
resource "google_project_service" "services" {
|
||||
for_each = toset([
|
||||
"cloudresourcemanager.googleapis.com",
|
||||
"containerregistry.googleapis.com",
|
||||
"run.googleapis.com",
|
||||
"firestore.googleapis.com",
|
||||
"storage.googleapis.com",
|
||||
"compute.googleapis.com"
|
||||
])
|
||||
|
||||
project = var.project_id
|
||||
service = each.key
|
||||
|
||||
disable_on_destroy = false
|
||||
}
|
||||
|
||||
# Cloud Storage bucket
|
||||
resource "google_storage_bucket" "app_bucket" {
|
||||
name = var.storage_bucket_name
|
||||
location = var.region
|
||||
uniform_bucket_level_access = true
|
||||
|
||||
depends_on = [google_project_service.services]
|
||||
}
|
||||
|
||||
# Firestore Database
|
||||
resource "google_firestore_database" "database" {
|
||||
name = var.firestore_db_name
|
||||
location_id = var.region
|
||||
type = "FIRESTORE_NATIVE"
|
||||
|
||||
depends_on = [google_project_service.services]
|
||||
}
|
||||
|
||||
# Container Registry - no explicit resource needed, just enable the API
|
||||
# You'll push images to gcr.io/${var.project_id}/sereact-api
|
||||
|
||||
# Cloud Run service
|
||||
resource "google_cloud_run_service" "sereact" {
|
||||
name = "sereact"
|
||||
location = var.region
|
||||
|
||||
metadata {
|
||||
annotations = {
|
||||
"run.googleapis.com/ingress" = "all"
|
||||
}
|
||||
}
|
||||
|
||||
template {
|
||||
spec {
|
||||
containers {
|
||||
# Use our optimized image
|
||||
image = "gcr.io/${var.project_id}/sereact-api:latest"
|
||||
|
||||
ports {
|
||||
container_port = 8000
|
||||
}
|
||||
|
||||
resources {
|
||||
limits = {
|
||||
cpu = "1"
|
||||
memory = "1Gi"
|
||||
}
|
||||
}
|
||||
|
||||
env {
|
||||
name = "FIRESTORE_PROJECT_ID"
|
||||
value = var.project_id
|
||||
}
|
||||
|
||||
env {
|
||||
name = "FIRESTORE_DATABASE_NAME"
|
||||
value = var.firestore_db_name
|
||||
}
|
||||
|
||||
env {
|
||||
name = "GCS_BUCKET_NAME"
|
||||
value = var.storage_bucket_name
|
||||
}
|
||||
|
||||
env {
|
||||
name = "VECTOR_DB_ENVIRONMENT"
|
||||
value = var.vector_db_environment
|
||||
}
|
||||
|
||||
env {
|
||||
name = "VECTOR_DB_INDEX_NAME"
|
||||
value = var.vector_db_index_name
|
||||
}
|
||||
|
||||
env {
|
||||
name = "QDRANT_HOST"
|
||||
value = var.use_static_ip ? google_compute_address.vector_db_static_ip.address : google_compute_instance.vector_db_vm.network_interface[0].access_config[0].nat_ip
|
||||
}
|
||||
|
||||
env {
|
||||
name = "QDRANT_PORT"
|
||||
value = "6333"
|
||||
}
|
||||
|
||||
env {
|
||||
name = "QDRANT_API_KEY"
|
||||
value = var.qdrant_api_key
|
||||
}
|
||||
|
||||
env {
|
||||
name = "LOG_LEVEL"
|
||||
value = "INFO"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
metadata {
|
||||
annotations = {
|
||||
"autoscaling.knative.dev/maxScale" = "10"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
traffic {
|
||||
percent = 100
|
||||
latest_revision = true
|
||||
}
|
||||
|
||||
depends_on = [google_project_service.services, google_compute_instance.vector_db_vm]
|
||||
}
|
||||
|
||||
# Make the Cloud Run service publicly accessible
|
||||
resource "google_cloud_run_service_iam_member" "public_access" {
|
||||
service = google_cloud_run_service.sereact.name
|
||||
location = google_cloud_run_service.sereact.location
|
||||
role = "roles/run.invoker"
|
||||
member = "allUsers"
|
||||
provider "google" {
|
||||
project = var.project_id
|
||||
region = var.region
|
||||
zone = var.zone
|
||||
}
|
||||
|
||||
# Get current project information
|
||||
data "google_project" "current" {
|
||||
project_id = var.project_id
|
||||
}
|
||||
|
||||
# Enable required APIs
|
||||
resource "google_project_service" "services" {
|
||||
for_each = toset([
|
||||
"cloudresourcemanager.googleapis.com",
|
||||
"containerregistry.googleapis.com",
|
||||
"run.googleapis.com",
|
||||
"firestore.googleapis.com",
|
||||
"storage.googleapis.com",
|
||||
"compute.googleapis.com"
|
||||
])
|
||||
|
||||
project = var.project_id
|
||||
service = each.key
|
||||
|
||||
disable_on_destroy = false
|
||||
}
|
||||
|
||||
# Cloud Storage bucket
|
||||
resource "google_storage_bucket" "app_bucket" {
|
||||
name = var.storage_bucket_name
|
||||
location = var.region
|
||||
uniform_bucket_level_access = true
|
||||
|
||||
depends_on = [google_project_service.services]
|
||||
}
|
||||
|
||||
# Firestore Database
|
||||
resource "google_firestore_database" "database" {
|
||||
name = var.firestore_db_name
|
||||
location_id = var.region
|
||||
type = "FIRESTORE_NATIVE"
|
||||
|
||||
depends_on = [google_project_service.services]
|
||||
}
|
||||
|
||||
# Container Registry - no explicit resource needed, just enable the API
|
||||
# You'll push images to gcr.io/${var.project_id}/sereact-api
|
||||
|
||||
# Cloud Run service
|
||||
resource "google_cloud_run_service" "sereact" {
|
||||
name = "sereact"
|
||||
location = var.region
|
||||
|
||||
metadata {
|
||||
annotations = {
|
||||
"run.googleapis.com/ingress" = "all"
|
||||
}
|
||||
}
|
||||
|
||||
template {
|
||||
spec {
|
||||
containers {
|
||||
# Use our optimized image
|
||||
image = "gcr.io/${var.project_id}/sereact-api:latest"
|
||||
|
||||
ports {
|
||||
container_port = 8000
|
||||
}
|
||||
|
||||
resources {
|
||||
limits = {
|
||||
cpu = "1"
|
||||
memory = "1Gi"
|
||||
}
|
||||
}
|
||||
|
||||
env {
|
||||
name = "FIRESTORE_PROJECT_ID"
|
||||
value = var.project_id
|
||||
}
|
||||
|
||||
env {
|
||||
name = "FIRESTORE_DATABASE_NAME"
|
||||
value = var.firestore_db_name
|
||||
}
|
||||
|
||||
env {
|
||||
name = "GCS_BUCKET_NAME"
|
||||
value = var.storage_bucket_name
|
||||
}
|
||||
|
||||
env {
|
||||
name = "VECTOR_DB_ENVIRONMENT"
|
||||
value = var.vector_db_environment
|
||||
}
|
||||
|
||||
env {
|
||||
name = "VECTOR_DB_INDEX_NAME"
|
||||
value = var.vector_db_index_name
|
||||
}
|
||||
|
||||
env {
|
||||
name = "QDRANT_HOST"
|
||||
value = google_compute_instance.vector_db_vm.network_interface[0].access_config[0].nat_ip
|
||||
}
|
||||
|
||||
env {
|
||||
name = "QDRANT_PORT"
|
||||
value = "6333"
|
||||
}
|
||||
|
||||
env {
|
||||
name = "QDRANT_API_KEY"
|
||||
value = var.qdrant_api_key
|
||||
}
|
||||
|
||||
env {
|
||||
name = "LOG_LEVEL"
|
||||
value = "INFO"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
metadata {
|
||||
annotations = {
|
||||
"autoscaling.knative.dev/maxScale" = "10"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
traffic {
|
||||
percent = 100
|
||||
latest_revision = true
|
||||
}
|
||||
|
||||
depends_on = [google_project_service.services, google_compute_instance.vector_db_vm]
|
||||
}
|
||||
|
||||
# Make the Cloud Run service publicly accessible
|
||||
resource "google_cloud_run_service_iam_member" "public_access" {
|
||||
service = google_cloud_run_service.sereact.name
|
||||
location = google_cloud_run_service.sereact.location
|
||||
role = "roles/run.invoker"
|
||||
member = "allUsers"
|
||||
}
|
||||
@ -1,66 +1,74 @@
|
||||
output "cloud_run_url" {
|
||||
value = google_cloud_run_service.sereact.status[0].url
|
||||
description = "The URL of the deployed Cloud Run service"
|
||||
}
|
||||
|
||||
output "storage_bucket_name" {
|
||||
value = google_storage_bucket.app_bucket.name
|
||||
description = "The name of the provisioned Cloud Storage bucket"
|
||||
}
|
||||
|
||||
output "firestore_database_id" {
|
||||
value = google_firestore_database.database.id
|
||||
description = "The ID of the provisioned Firestore database"
|
||||
}
|
||||
|
||||
output "container_registry_url" {
|
||||
value = "gcr.io/${var.project_id}/sereact"
|
||||
description = "The URL of the Container Registry repository"
|
||||
}
|
||||
|
||||
# Vector Database VM outputs
|
||||
output "vector_db_vm_name" {
|
||||
value = google_compute_instance.vector_db_vm.name
|
||||
description = "The name of the vector database VM"
|
||||
}
|
||||
|
||||
output "vector_db_vm_external_ip" {
|
||||
value = google_compute_instance.vector_db_vm.network_interface[0].access_config[0].nat_ip
|
||||
description = "The external IP address of the vector database VM"
|
||||
}
|
||||
|
||||
output "vector_db_vm_internal_ip" {
|
||||
value = google_compute_instance.vector_db_vm.network_interface[0].network_ip
|
||||
description = "The internal IP address of the vector database VM"
|
||||
}
|
||||
|
||||
output "vector_db_static_ip" {
|
||||
value = var.use_static_ip ? google_compute_address.vector_db_static_ip.address : null
|
||||
description = "The static IP address of the vector database VM (if enabled)"
|
||||
}
|
||||
|
||||
output "qdrant_http_endpoint" {
|
||||
value = "http://${google_compute_instance.vector_db_vm.network_interface[0].access_config[0].nat_ip}:6333"
|
||||
description = "The HTTP endpoint for Qdrant vector database"
|
||||
}
|
||||
|
||||
output "qdrant_grpc_endpoint" {
|
||||
value = "http://${google_compute_instance.vector_db_vm.network_interface[0].access_config[0].nat_ip}:6334"
|
||||
description = "The gRPC endpoint for Qdrant vector database"
|
||||
}
|
||||
|
||||
# Cloud Run environment configuration
|
||||
output "cloud_run_qdrant_host" {
|
||||
value = var.use_static_ip ? google_compute_address.vector_db_static_ip.address : google_compute_instance.vector_db_vm.network_interface[0].access_config[0].nat_ip
|
||||
description = "The Qdrant host configured for Cloud Run"
|
||||
}
|
||||
|
||||
output "deployment_summary" {
|
||||
value = {
|
||||
cloud_run_url = google_cloud_run_service.sereact.status[0].url
|
||||
qdrant_endpoint = "http://${google_compute_instance.vector_db_vm.network_interface[0].access_config[0].nat_ip}:6333"
|
||||
firestore_database = var.firestore_db_name
|
||||
storage_bucket = var.storage_bucket_name
|
||||
}
|
||||
description = "Summary of deployed resources"
|
||||
output "cloud_run_url" {
|
||||
value = google_cloud_run_service.sereact.status[0].url
|
||||
description = "The URL of the deployed Cloud Run service"
|
||||
}
|
||||
|
||||
output "storage_bucket_name" {
|
||||
value = google_storage_bucket.app_bucket.name
|
||||
description = "The name of the provisioned Cloud Storage bucket"
|
||||
}
|
||||
|
||||
output "firestore_database_id" {
|
||||
value = google_firestore_database.database.id
|
||||
description = "The ID of the provisioned Firestore database"
|
||||
}
|
||||
|
||||
output "container_registry_url" {
|
||||
value = "gcr.io/${var.project_id}/sereact"
|
||||
description = "The URL of the Container Registry repository"
|
||||
}
|
||||
|
||||
# Vector Database VM outputs
|
||||
output "vector_db_vm_name" {
|
||||
value = google_compute_instance.vector_db_vm.name
|
||||
description = "The name of the vector database VM"
|
||||
}
|
||||
|
||||
output "vector_db_vm_external_ip" {
|
||||
value = google_compute_instance.vector_db_vm.network_interface[0].access_config[0].nat_ip
|
||||
description = "The external IP address of the vector database VM"
|
||||
}
|
||||
|
||||
output "vector_db_vm_internal_ip" {
|
||||
value = google_compute_instance.vector_db_vm.network_interface[0].network_ip
|
||||
description = "The internal IP address of the vector database VM"
|
||||
}
|
||||
|
||||
output "vector_db_static_ip" {
|
||||
value = var.use_static_ip ? google_compute_address.vector_db_static_ip[0].address : null
|
||||
description = "The static IP address of the vector database VM (if enabled)"
|
||||
}
|
||||
|
||||
output "qdrant_http_endpoint" {
|
||||
value = "http://${google_compute_instance.vector_db_vm.network_interface[0].access_config[0].nat_ip}:6333"
|
||||
description = "The HTTP endpoint for Qdrant vector database"
|
||||
}
|
||||
|
||||
output "qdrant_grpc_endpoint" {
|
||||
value = "http://${google_compute_instance.vector_db_vm.network_interface[0].access_config[0].nat_ip}:6334"
|
||||
description = "The gRPC endpoint for Qdrant vector database"
|
||||
}
|
||||
|
||||
# Cloud Run environment configuration
|
||||
output "cloud_run_qdrant_host" {
|
||||
value = google_compute_instance.vector_db_vm.network_interface[0].access_config[0].nat_ip
|
||||
description = "The Qdrant host IP address configured for Cloud Run"
|
||||
}
|
||||
|
||||
# Separate output for internal IP (useful for VPC-native connections)
|
||||
output "cloud_run_qdrant_host_internal" {
|
||||
value = google_compute_instance.vector_db_vm.network_interface[0].network_ip
|
||||
description = "The internal Qdrant host IP address for VPC-native connections"
|
||||
}
|
||||
|
||||
output "deployment_summary" {
|
||||
value = {
|
||||
cloud_run_url = google_cloud_run_service.sereact.status[0].url
|
||||
qdrant_endpoint = "http://${google_compute_instance.vector_db_vm.network_interface[0].access_config[0].nat_ip}:6333"
|
||||
qdrant_host_ip = google_compute_instance.vector_db_vm.network_interface[0].access_config[0].nat_ip
|
||||
firestore_database = var.firestore_db_name
|
||||
storage_bucket = var.storage_bucket_name
|
||||
static_ip_enabled = var.use_static_ip
|
||||
}
|
||||
description = "Summary of deployed resources"
|
||||
}
|
||||
73
deployment/terraform/scripts/get_qdrant_ip.sh
Normal file
73
deployment/terraform/scripts/get_qdrant_ip.sh
Normal file
@ -0,0 +1,73 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Script to get Qdrant VM IP address from Terraform outputs
|
||||
# Usage: ./get_qdrant_ip.sh [--internal]
|
||||
|
||||
set -e
|
||||
|
||||
# Change to terraform directory
|
||||
cd "$(dirname "$0")/.."
|
||||
|
||||
# Check if terraform state exists
|
||||
if [ ! -f "terraform.tfstate" ]; then
|
||||
echo "ERROR: No terraform.tfstate found. Please run 'terraform apply' first."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Parse command line arguments
|
||||
INTERNAL=false
|
||||
if [ "$1" = "--internal" ]; then
|
||||
INTERNAL=true
|
||||
fi
|
||||
|
||||
echo "Retrieving Qdrant VM IP address..."
|
||||
echo "=================================="
|
||||
|
||||
# Get VM information
|
||||
VM_NAME=$(terraform output -raw vector_db_vm_name 2>/dev/null || echo "N/A")
|
||||
echo "VM Name: $VM_NAME"
|
||||
|
||||
if [ "$INTERNAL" = true ]; then
|
||||
# Get internal IP
|
||||
QDRANT_IP=$(terraform output -raw vector_db_vm_internal_ip 2>/dev/null || echo "N/A")
|
||||
echo "Internal IP: $QDRANT_IP"
|
||||
echo "Qdrant HTTP Endpoint (Internal): http://$QDRANT_IP:6333"
|
||||
echo "Qdrant gRPC Endpoint (Internal): http://$QDRANT_IP:6334"
|
||||
else
|
||||
# Get external IP
|
||||
QDRANT_IP=$(terraform output -raw vector_db_vm_external_ip 2>/dev/null || echo "N/A")
|
||||
echo "External IP: $QDRANT_IP"
|
||||
echo "Qdrant HTTP Endpoint: http://$QDRANT_IP:6333"
|
||||
echo "Qdrant gRPC Endpoint: http://$QDRANT_IP:6334"
|
||||
fi
|
||||
|
||||
# Check if static IP is enabled
|
||||
STATIC_IP_ENABLED=$(terraform output -raw vector_db_static_ip 2>/dev/null || echo "null")
|
||||
if [ "$STATIC_IP_ENABLED" != "null" ] && [ "$STATIC_IP_ENABLED" != "" ]; then
|
||||
echo "Static IP: $STATIC_IP_ENABLED (enabled)"
|
||||
else
|
||||
echo "Static IP: Not enabled (using ephemeral IP)"
|
||||
fi
|
||||
|
||||
# Get Cloud Run service URL
|
||||
CLOUD_RUN_URL=$(terraform output -raw cloud_run_url 2>/dev/null || echo "N/A")
|
||||
echo "Cloud Run URL: $CLOUD_RUN_URL"
|
||||
|
||||
echo ""
|
||||
echo "Environment Variables for Cloud Run:"
|
||||
echo "QDRANT_HOST=$QDRANT_IP"
|
||||
echo "QDRANT_PORT=6333"
|
||||
|
||||
# Test connectivity (optional)
|
||||
echo ""
|
||||
echo "Testing Qdrant connectivity..."
|
||||
if command -v curl &> /dev/null; then
|
||||
if curl -s --connect-timeout 5 "http://$QDRANT_IP:6333/health" > /dev/null; then
|
||||
echo "✓ Qdrant is accessible at http://$QDRANT_IP:6333"
|
||||
else
|
||||
echo "✗ Qdrant is not accessible at http://$QDRANT_IP:6333"
|
||||
echo " This might be normal if the VM is still starting up or firewall rules need adjustment."
|
||||
fi
|
||||
else
|
||||
echo "curl not available - skipping connectivity test"
|
||||
fi
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -15,7 +15,8 @@ resource "google_compute_instance" "vector_db_vm" {
|
||||
network_interface {
|
||||
network = "default"
|
||||
access_config {
|
||||
# Ephemeral public IP
|
||||
# Use static IP if enabled, otherwise ephemeral
|
||||
nat_ip = var.use_static_ip ? google_compute_address.vector_db_static_ip[0].address : null
|
||||
}
|
||||
}
|
||||
|
||||
@ -63,42 +64,7 @@ resource "google_compute_firewall" "qdrant_firewall" {
|
||||
|
||||
# Static IP for the vector DB VM (optional but recommended)
|
||||
resource "google_compute_address" "vector_db_static_ip" {
|
||||
count = var.use_static_ip ? 1 : 0
|
||||
name = "vector-db-static-ip"
|
||||
region = var.region
|
||||
}
|
||||
|
||||
# Attach the static IP to the VM
|
||||
resource "google_compute_instance" "vector_db_vm_with_static_ip" {
|
||||
count = var.use_static_ip ? 1 : 0
|
||||
name = "sereact-vector-db-static"
|
||||
machine_type = "e2-standard-2"
|
||||
zone = var.zone
|
||||
|
||||
boot_disk {
|
||||
initialize_params {
|
||||
image = "ubuntu-os-cloud/ubuntu-2204-lts"
|
||||
size = 50
|
||||
type = "pd-standard"
|
||||
}
|
||||
}
|
||||
|
||||
network_interface {
|
||||
network = "default"
|
||||
access_config {
|
||||
nat_ip = google_compute_address.vector_db_static_ip.address
|
||||
}
|
||||
}
|
||||
|
||||
metadata_startup_script = templatefile("${path.module}/scripts/install_qdrant.sh", {
|
||||
qdrant_api_key = var.qdrant_api_key
|
||||
})
|
||||
|
||||
service_account {
|
||||
email = google_service_account.vector_db_sa.email
|
||||
scopes = ["cloud-platform"]
|
||||
}
|
||||
|
||||
tags = ["vector-db", "qdrant"]
|
||||
|
||||
depends_on = [google_project_service.services]
|
||||
}
|
||||
@ -16,10 +16,18 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(tags=["Search"], prefix="/search")
|
||||
|
||||
# Initialize services
|
||||
vector_db_service = VectorDatabaseService()
|
||||
# Initialize services - delay VectorDatabaseService instantiation
|
||||
vector_db_service = None
|
||||
embedding_service = EmbeddingService()
|
||||
|
||||
def get_vector_db_service():
|
||||
"""Get or create the vector database service instance"""
|
||||
global vector_db_service
|
||||
if vector_db_service is None:
|
||||
logger.info("Initializing VectorDatabaseService...")
|
||||
vector_db_service = VectorDatabaseService()
|
||||
return vector_db_service
|
||||
|
||||
@router.get("", response_model=SearchResponse)
|
||||
async def search_images(
|
||||
request: Request,
|
||||
@ -52,7 +60,7 @@ async def search_images(
|
||||
raise HTTPException(status_code=400, detail="Failed to generate search embedding")
|
||||
|
||||
# Search in vector database
|
||||
search_results = vector_db_service.search_similar_images(
|
||||
search_results = get_vector_db_service().search_similar_images(
|
||||
query_vector=query_embedding,
|
||||
limit=limit,
|
||||
score_threshold=threshold,
|
||||
@ -156,7 +164,7 @@ async def search_images_advanced(
|
||||
raise HTTPException(status_code=400, detail="Failed to generate search embedding")
|
||||
|
||||
# Search in vector database
|
||||
search_results = vector_db_service.search_similar_images(
|
||||
search_results = get_vector_db_service().search_similar_images(
|
||||
query_vector=query_embedding,
|
||||
limit=search_request.limit,
|
||||
score_threshold=search_request.threshold,
|
||||
@ -289,14 +297,14 @@ async def find_similar_images(
|
||||
raise HTTPException(status_code=400, detail="Reference image does not have embeddings")
|
||||
|
||||
# Get the embedding for the reference image
|
||||
reference_data = vector_db_service.get_image_vector(image_id)
|
||||
reference_data = get_vector_db_service().get_image_vector(image_id)
|
||||
if not reference_data or not reference_data.get('vector'):
|
||||
raise HTTPException(status_code=400, detail="Failed to get reference image embedding")
|
||||
|
||||
reference_embedding = reference_data['vector']
|
||||
|
||||
# Search for similar images
|
||||
search_results = vector_db_service.search_similar_images(
|
||||
search_results = get_vector_db_service().search_similar_images(
|
||||
query_vector=reference_embedding,
|
||||
limit=limit + 1, # +1 to account for the reference image itself
|
||||
score_threshold=threshold,
|
||||
|
||||
@ -47,13 +47,15 @@ class VectorDatabaseService:
|
||||
if prefer_grpc is None:
|
||||
prefer_grpc = settings.QDRANT_PREFER_GRPC
|
||||
|
||||
# Auto-detect HTTPS usage if not specified
|
||||
# Handle HTTPS setting properly
|
||||
if https is None:
|
||||
# First check if explicitly set in settings
|
||||
if hasattr(settings, 'QDRANT_HTTPS') and os.getenv("QDRANT_HTTPS"):
|
||||
# First check if explicitly set in environment
|
||||
qdrant_https_env = os.getenv("QDRANT_HTTPS")
|
||||
if qdrant_https_env is not None:
|
||||
# Use the parsed boolean value from settings
|
||||
https = settings.QDRANT_HTTPS
|
||||
else:
|
||||
# Use HTTP for localhost and known development IPs, HTTPS for others
|
||||
# Auto-detect: Use HTTP for localhost and known development IPs, HTTPS for others
|
||||
https = not (self.host in ["localhost", "127.0.0.1"] or self.host.startswith("192.168.") or self.host.startswith("10."))
|
||||
# Override for specific known HTTP-only servers
|
||||
if self.host == "34.171.134.17":
|
||||
|
||||
@ -8,20 +8,34 @@ import os
|
||||
import sys
|
||||
from src.services.vector_db import VectorDatabaseService
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Load environment variables from .env file
|
||||
load_dotenv()
|
||||
|
||||
def test_qdrant_connection():
|
||||
"""Test the connection to Qdrant"""
|
||||
|
||||
# Set environment variables from deployed infrastructure
|
||||
os.environ['QDRANT_HOST'] = '34.171.134.17'
|
||||
os.environ['QDRANT_PORT'] = '6333'
|
||||
# os.environ['QDRANT_HOST'] = '34.171.134.17'
|
||||
# os.environ['QDRANT_PORT'] = '6333'
|
||||
# os.environ['QDRANT_HTTPS'] = 'false' # Explicitly disable HTTPS
|
||||
# os.environ['QDRANT_PREFER_GRPC'] = 'false' # Explicitly disable gRPC
|
||||
|
||||
try:
|
||||
print("Testing Qdrant connection...")
|
||||
print(f"Host: {os.environ['QDRANT_HOST']}")
|
||||
print(f"Port: {os.environ['QDRANT_PORT']}")
|
||||
# print(f"Host: {os.environ['QDRANT_HOST']}")
|
||||
# print(f"Port: {os.environ['QDRANT_PORT']}")
|
||||
# print(f"HTTPS: {os.environ['QDRANT_HTTPS']}")
|
||||
# print(f"gRPC: {os.environ['QDRANT_PREFER_GRPC']}")
|
||||
|
||||
# Initialize the service
|
||||
vector_db = VectorDatabaseService()
|
||||
# Initialize the service with explicit parameters to ensure HTTP is used
|
||||
vector_db = VectorDatabaseService(
|
||||
host=os.environ['QDRANT_HOST'],
|
||||
port=int(os.environ['QDRANT_PORT']),
|
||||
prefer_grpc=False,
|
||||
https=False
|
||||
)
|
||||
|
||||
# Test health check
|
||||
is_healthy = vector_db.health_check()
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user