cp
This commit is contained in:
parent
ee7a3677fc
commit
046746c5b6
494
README.md
494
README.md
@ -1,13 +1,16 @@
|
|||||||
# SEREACT - Secure Image Management API
|
# SEREACT - Secure Image Management API
|
||||||
|
|
||||||
SEREACT is a secure API for storing, organizing, and retrieving images with advanced search capabilities.
|
SEREACT is a secure API for storing, organizing, and retrieving images with advanced search capabilities powered by AI-generated embeddings.
|
||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
- Secure image storage in Google Cloud Storage
|
- Secure image storage in Google Cloud Storage
|
||||||
- Team-based organization and access control
|
- Team-based organization and access control
|
||||||
- API key authentication
|
- API key authentication
|
||||||
- Semantic search using image embeddings
|
- **Asynchronous image processing with Pub/Sub and Cloud Functions**
|
||||||
|
- **AI-powered image embeddings using Google Cloud Vision API**
|
||||||
|
- **Semantic search using vector similarity in Pinecone**
|
||||||
|
- **Automatic retry mechanism for failed processing (up to 3 attempts)**
|
||||||
- Metadata extraction and storage
|
- Metadata extraction and storage
|
||||||
- Image processing capabilities
|
- Image processing capabilities
|
||||||
- Multi-team support
|
- Multi-team support
|
||||||
@ -19,6 +22,7 @@ SEREACT is a secure API for storing, organizing, and retrieving images with adva
|
|||||||
sereact/
|
sereact/
|
||||||
├── images/ # Sample images for testing
|
├── images/ # Sample images for testing
|
||||||
├── deployment/ # Deployment configurations
|
├── deployment/ # Deployment configurations
|
||||||
|
│ ├── cloud-function/ # **Cloud Function for image processing**
|
||||||
│ ├── cloud-run/ # Google Cloud Run configuration
|
│ ├── cloud-run/ # Google Cloud Run configuration
|
||||||
│ └── terraform/ # Infrastructure as code
|
│ └── terraform/ # Infrastructure as code
|
||||||
├── docs/ # Documentation
|
├── docs/ # Documentation
|
||||||
@ -37,6 +41,7 @@ sereact/
|
|||||||
│ ├── models/ # Database models
|
│ ├── models/ # Database models
|
||||||
│ ├── schemas/ # API request/response schemas
|
│ ├── schemas/ # API request/response schemas
|
||||||
│ ├── services/ # Business logic services
|
│ ├── services/ # Business logic services
|
||||||
|
│ │ └── pubsub_service.py # **Pub/Sub message publishing**
|
||||||
│ └── utils/ # Utility functions
|
│ └── utils/ # Utility functions
|
||||||
├── tests/ # Test code
|
├── tests/ # Test code
|
||||||
│ ├── api/ # API tests
|
│ ├── api/ # API tests
|
||||||
@ -44,6 +49,7 @@ sereact/
|
|||||||
│ ├── models/ # Model tests
|
│ ├── models/ # Model tests
|
||||||
│ ├── services/ # Service tests
|
│ ├── services/ # Service tests
|
||||||
│ ├── integration/ # Integration tests
|
│ ├── integration/ # Integration tests
|
||||||
|
│ │ └── test_cloud_function.py # **Cloud Function tests**
|
||||||
│ └── test_e2e.py # **Comprehensive E2E workflow tests**
|
│ └── test_e2e.py # **Comprehensive E2E workflow tests**
|
||||||
├── main.py # Application entry point
|
├── main.py # Application entry point
|
||||||
├── requirements.txt # Python dependencies
|
├── requirements.txt # Python dependencies
|
||||||
@ -79,38 +85,49 @@ sereact/
|
|||||||
└─────────────┘ └─────────────┘
|
└─────────────┘ └─────────────┘
|
||||||
```
|
```
|
||||||
|
|
||||||
1. **Image Upload Flow**:
|
## **Image Processing Workflow**
|
||||||
- Images are uploaded through the FastAPI backend
|
|
||||||
- Images are stored in Cloud Storage
|
|
||||||
- A message is published to Pub/Sub queue
|
|
||||||
|
|
||||||
2. **Embedding Generation Flow**:
|
### 1. **Image Upload Flow**:
|
||||||
- Cloud Function is triggered by Pub/Sub message
|
- User uploads image through FastAPI backend
|
||||||
- Function calls Cloud Vision API to generate image embeddings
|
- Image is stored in Google Cloud Storage
|
||||||
- Embeddings are stored in Pinecone Vector DB
|
- Image metadata is saved to Firestore with `embedding_status: "pending"`
|
||||||
|
- **Pub/Sub message is published to trigger async processing**
|
||||||
|
|
||||||
3. **Search Flow**:
|
### 2. **Embedding Generation Flow** (Asynchronous):
|
||||||
|
- **Cloud Function is triggered by Pub/Sub message**
|
||||||
|
- Function updates image status to `"processing"`
|
||||||
|
- **Function downloads image from Cloud Storage**
|
||||||
|
- **Function calls Google Cloud Vision API to generate embeddings**
|
||||||
|
- **Embeddings are stored in Pinecone Vector Database**
|
||||||
|
- **Firestore is updated with embedding info and status: "success"**
|
||||||
|
|
||||||
|
### 3. **Error Handling & Retry**:
|
||||||
|
- **Failed processing updates status to "failed" with error message**
|
||||||
|
- **Automatic retry up to 3 times using Pub/Sub retry policy**
|
||||||
|
- **Dead letter queue for permanently failed messages**
|
||||||
|
|
||||||
|
### 4. **Search Flow**:
|
||||||
- Search queries processed by FastAPI backend
|
- Search queries processed by FastAPI backend
|
||||||
- Vector similarity search performed against Pinecone
|
- Vector similarity search performed against Pinecone
|
||||||
- Results combined with metadata from Firestore
|
- Results combined with metadata from Firestore
|
||||||
|
|
||||||
## Technology Stack
|
## Technology Stack
|
||||||
|
|
||||||
- FastAPI - Web framework
|
- **FastAPI** - Web framework
|
||||||
- Firestore - Database
|
- **Firestore** - Database
|
||||||
- Google Cloud Storage - Image storage
|
- **Google Cloud Storage** - Image storage
|
||||||
- Google Pub/Sub - Message queue
|
- **Google Pub/Sub** - Message queue for async processing
|
||||||
- Google Cloud Functions - Serverless computing
|
- **Google Cloud Functions** - Serverless image processing
|
||||||
- Google Cloud Vision API - Image analysis and embedding generation
|
- **Google Cloud Vision API** - AI-powered image analysis and embedding generation
|
||||||
- Pinecone - Vector database for semantic search
|
- **Pinecone** - Vector database for semantic search
|
||||||
- Pydantic - Data validation
|
- **Pydantic** - Data validation
|
||||||
|
|
||||||
## Setup and Installation
|
## Setup and Installation
|
||||||
|
|
||||||
### Prerequisites
|
### Prerequisites
|
||||||
|
|
||||||
- Python 3.8+
|
- Python 3.8+
|
||||||
- Google Cloud account with Firestore, Storage, Pub/Sub, and Cloud Functions enabled
|
- Google Cloud account with Firestore, Storage, Pub/Sub, Cloud Functions, and Vision API enabled
|
||||||
- Pinecone account for vector database
|
- Pinecone account for vector database
|
||||||
|
|
||||||
### Installation
|
### Installation
|
||||||
@ -145,6 +162,7 @@ sereact/
|
|||||||
|
|
||||||
# Google Pub/Sub
|
# Google Pub/Sub
|
||||||
PUBSUB_TOPIC=image-processing-topic
|
PUBSUB_TOPIC=image-processing-topic
|
||||||
|
PUBSUB_SUBSCRIPTION=image-processing-subscription
|
||||||
|
|
||||||
# Google Cloud Vision
|
# Google Cloud Vision
|
||||||
VISION_API_ENABLED=true
|
VISION_API_ENABLED=true
|
||||||
@ -152,18 +170,71 @@ sereact/
|
|||||||
# Security
|
# Security
|
||||||
API_KEY_SECRET=your-secret-key
|
API_KEY_SECRET=your-secret-key
|
||||||
|
|
||||||
# Vector database
|
# Vector database (Pinecone)
|
||||||
VECTOR_DB_API_KEY=your-pinecone-api-key
|
VECTOR_DB_API_KEY=your-pinecone-api-key
|
||||||
VECTOR_DB_ENVIRONMENT=your-pinecone-environment
|
VECTOR_DB_ENVIRONMENT=your-pinecone-environment
|
||||||
VECTOR_DB_INDEX_NAME=image-embeddings
|
VECTOR_DB_INDEX_NAME=image-embeddings
|
||||||
```
|
```
|
||||||
|
|
||||||
5. Run the application:
|
5. **Deploy Infrastructure** (Optional - for production):
|
||||||
|
```bash
|
||||||
|
# Deploy Pub/Sub infrastructure with Terraform
|
||||||
|
cd deployment/terraform
|
||||||
|
terraform init
|
||||||
|
terraform plan
|
||||||
|
terraform apply
|
||||||
|
|
||||||
|
# Deploy Cloud Function
|
||||||
|
cd ../cloud-function
|
||||||
|
./deploy.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
6. Run the application:
|
||||||
```bash
|
```bash
|
||||||
uvicorn main:app --reload
|
uvicorn main:app --reload
|
||||||
```
|
```
|
||||||
|
|
||||||
6. Visit `http://localhost:8000/docs` in your browser to access the API documentation.
|
7. Visit `http://localhost:8000/docs` in your browser to access the API documentation.
|
||||||
|
|
||||||
|
## **Deployment**
|
||||||
|
|
||||||
|
### **Cloud Function Deployment**
|
||||||
|
|
||||||
|
The image processing Cloud Function can be deployed using the provided script:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd deployment/cloud-function
|
||||||
|
|
||||||
|
# Set environment variables
|
||||||
|
export GOOGLE_CLOUD_PROJECT=your-project-id
|
||||||
|
export PINECONE_API_KEY=your-pinecone-api-key
|
||||||
|
export PINECONE_ENVIRONMENT=your-pinecone-environment
|
||||||
|
|
||||||
|
# Deploy the function
|
||||||
|
./deploy.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
### **Infrastructure as Code**
|
||||||
|
|
||||||
|
Use Terraform to deploy the complete infrastructure:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd deployment/terraform
|
||||||
|
|
||||||
|
# Initialize Terraform
|
||||||
|
terraform init
|
||||||
|
|
||||||
|
# Review the deployment plan
|
||||||
|
terraform plan
|
||||||
|
|
||||||
|
# Deploy infrastructure
|
||||||
|
terraform apply
|
||||||
|
```
|
||||||
|
|
||||||
|
This will create:
|
||||||
|
- **Pub/Sub topic and subscription with retry policy**
|
||||||
|
- **Dead letter queue for failed messages**
|
||||||
|
- **IAM bindings for service accounts**
|
||||||
|
|
||||||
## API Endpoints
|
## API Endpoints
|
||||||
|
|
||||||
@ -172,8 +243,23 @@ The API provides the following main endpoints:
|
|||||||
- `/api/v1/auth/*` - Authentication and API key management
|
- `/api/v1/auth/*` - Authentication and API key management
|
||||||
- `/api/v1/teams/*` - Team management
|
- `/api/v1/teams/*` - Team management
|
||||||
- `/api/v1/users/*` - User management
|
- `/api/v1/users/*` - User management
|
||||||
- `/api/v1/images/*` - Image upload, download, and management
|
- `/api/v1/images/*` - **Image upload, download, and management (with async processing)**
|
||||||
- `/api/v1/search/*` - Image search functionality
|
- `/api/v1/search/*` - **Image search functionality (semantic search)**
|
||||||
|
|
||||||
|
### **Image Processing Status**
|
||||||
|
|
||||||
|
Images now include embedding processing status:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"id": "image-id",
|
||||||
|
"filename": "example.jpg",
|
||||||
|
"embedding_status": "success", // "pending", "processing", "success", "failed"
|
||||||
|
"embedding_error": null,
|
||||||
|
"embedding_retry_count": 0,
|
||||||
|
"has_embedding": true
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
Refer to the Swagger UI documentation at `/docs` for detailed endpoint information.
|
Refer to the Swagger UI documentation at `/docs` for detailed endpoint information.
|
||||||
|
|
||||||
@ -182,7 +268,13 @@ Refer to the Swagger UI documentation at `/docs` for detailed endpoint informati
|
|||||||
### Running Tests
|
### Running Tests
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
# Run all tests
|
||||||
pytest
|
pytest
|
||||||
|
|
||||||
|
# Run specific test categories
|
||||||
|
pytest tests/services/test_pubsub_service.py # Pub/Sub service tests
|
||||||
|
pytest tests/integration/test_cloud_function.py # Cloud Function tests
|
||||||
|
pytest tests/api/test_images_pubsub.py # API integration tests
|
||||||
```
|
```
|
||||||
|
|
||||||
### **Comprehensive End-to-End Testing**
|
### **Comprehensive End-to-End Testing**
|
||||||
@ -198,360 +290,8 @@ python scripts/run_tests.py unit
|
|||||||
|
|
||||||
# Run integration tests (requires real database)
|
# Run integration tests (requires real database)
|
||||||
python scripts/run_tests.py integration
|
python scripts/run_tests.py integration
|
||||||
|
|
||||||
# Run all tests
|
|
||||||
python scripts/run_tests.py all
|
|
||||||
|
|
||||||
# Run with coverage report
|
|
||||||
python scripts/run_tests.py coverage
|
|
||||||
```
|
```
|
||||||
|
|
||||||
#### **E2E Test Coverage**
|
|
||||||
|
|
||||||
Our comprehensive E2E tests cover:
|
|
||||||
|
|
||||||
**Core Functionality:**
|
|
||||||
- ✅ **Bootstrap Setup**: Automatic creation of isolated test environment with artificial data
|
|
||||||
- ✅ **Authentication**: API key validation and verification
|
|
||||||
- ✅ **Team Management**: Create, read, update, delete teams
|
|
||||||
- ✅ **User Management**: Create, read, update, delete users
|
|
||||||
- ✅ **API Key Management**: Create, list, revoke API keys
|
|
||||||
|
|
||||||
**Image Operations:**
|
|
||||||
- ✅ **Image Upload**: File upload with metadata
|
|
||||||
- ✅ **Image Retrieval**: Get image details and download
|
|
||||||
- ✅ **Image Updates**: Modify descriptions and tags
|
|
||||||
- ✅ **Image Listing**: Paginated image lists with filters
|
|
||||||
|
|
||||||
**Advanced Search Functionality:**
|
|
||||||
- ✅ **Text Search**: Search by description content
|
|
||||||
- ✅ **Tag Search**: Filter by tags
|
|
||||||
- ✅ **Advanced Search**: Combined filters and thresholds
|
|
||||||
- ✅ **Similarity Search**: Find similar images using embeddings
|
|
||||||
- ✅ **Search Performance**: Response time validation
|
|
||||||
|
|
||||||
**Security and Isolation:**
|
|
||||||
- ✅ **User Roles**: Admin vs regular user permissions
|
|
||||||
- ✅ **Multi-team Isolation**: Data privacy between teams
|
|
||||||
- ✅ **Access Control**: Unauthorized access prevention
|
|
||||||
- ✅ **Error Handling**: Graceful error responses
|
|
||||||
|
|
||||||
**Performance and Scalability:**
|
|
||||||
- ✅ **Bulk Operations**: Multiple image uploads
|
|
||||||
- ✅ **Concurrent Access**: Simultaneous user operations
|
|
||||||
- ✅ **Database Performance**: Query response times
|
|
||||||
- ✅ **Data Consistency**: Transaction integrity
|
|
||||||
|
|
||||||
#### **Test Features**
|
|
||||||
|
|
||||||
**🎯 Completely Self-Contained**
|
|
||||||
- **No setup required**: Tests create their own isolated environment
|
|
||||||
- **Artificial test data**: Each test class creates unique teams, users, and images
|
|
||||||
- **Automatic cleanup**: All test data is deleted after tests complete
|
|
||||||
- **No environment variables needed**: Just run the tests!
|
|
||||||
|
|
||||||
**🔒 Isolated and Safe**
|
|
||||||
- **Unique identifiers**: Each test uses timestamp-based unique names
|
|
||||||
- **No conflicts**: Tests can run in parallel without interference
|
|
||||||
- **No database pollution**: Tests don't affect existing data
|
|
||||||
- **Idempotent**: Can be run multiple times safely
|
|
||||||
|
|
||||||
**⚡ Performance-Aware**
|
|
||||||
- **Class-scoped fixtures**: Expensive setup shared across test methods
|
|
||||||
- **Efficient cleanup**: Resources deleted in optimal order
|
|
||||||
- **Real database tests**: Optional performance testing with larger datasets
|
|
||||||
- **Timing validation**: Response time assertions for critical operations
|
|
||||||
|
|
||||||
#### **Advanced Test Modes**
|
|
||||||
|
|
||||||
**Standard E2E Tests (No Setup Required)**
|
|
||||||
```bash
|
|
||||||
# Just run them - completely self-contained!
|
|
||||||
python scripts/run_tests.py e2e
|
|
||||||
```
|
|
||||||
|
|
||||||
**Integration Tests with Real Services**
|
|
||||||
```bash
|
|
||||||
# Enable integration tests with real Google Cloud services
|
|
||||||
export E2E_INTEGRATION_TEST=1
|
|
||||||
pytest -m integration
|
|
||||||
```
|
|
||||||
|
|
||||||
**Real Database Performance Tests**
|
|
||||||
```bash
|
|
||||||
# Enable real database tests with larger datasets
|
|
||||||
export E2E_REALDB_TEST=1
|
|
||||||
pytest -m realdb
|
|
||||||
```
|
|
||||||
|
|
||||||
For detailed testing information, see [docs/TESTING.md](docs/TESTING.md).
|
|
||||||
|
|
||||||
### Creating a New API Version
|
|
||||||
|
|
||||||
1. Create a new package under `src/api/` (e.g., `v2`)
|
|
||||||
2. Implement new endpoints
|
|
||||||
3. Update the main.py file to include the new routers
|
|
||||||
|
|
||||||
## Deployment
|
|
||||||
|
|
||||||
### Google Cloud Run
|
|
||||||
|
|
||||||
1. Build the Docker image:
|
|
||||||
```bash
|
|
||||||
docker build -t gcr.io/your-project/sereact .
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Push to Google Container Registry:
|
|
||||||
```bash
|
|
||||||
docker push gcr.io/your-project/sereact
|
|
||||||
```
|
|
||||||
|
|
||||||
3. Deploy to Cloud Run:
|
|
||||||
```bash
|
|
||||||
gcloud run deploy sereact --image gcr.io/your-project/sereact --platform managed
|
|
||||||
```
|
|
||||||
|
|
||||||
## Local Development with Docker Compose
|
|
||||||
|
|
||||||
To run the application locally using Docker Compose:
|
|
||||||
|
|
||||||
1. Make sure you have Docker and Docker Compose installed
|
|
||||||
2. Run the following command in the project root:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
docker compose up
|
|
||||||
```
|
|
||||||
|
|
||||||
This will:
|
|
||||||
- Build the API container based on the Dockerfile
|
|
||||||
- Mount your local codebase into the container for live reloading
|
|
||||||
- Mount your Firestore credentials for authentication
|
|
||||||
- Expose the API on http://localhost:8000
|
|
||||||
|
|
||||||
To stop the containers:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
docker compose down
|
|
||||||
```
|
|
||||||
|
|
||||||
To rebuild containers after making changes to the Dockerfile or requirements:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
docker compose up --build
|
|
||||||
```
|
|
||||||
|
|
||||||
## Additional Information
|
|
||||||
|
|
||||||
## Design Decisions
|
|
||||||
|
|
||||||
### Database Selection: Firestore
|
|
||||||
|
|
||||||
- **Document-oriented model**: Ideal for hierarchical team/user/image data structures with flexible schemas
|
|
||||||
- **Real-time capabilities**: Enables live updates for collaborative features
|
|
||||||
- **Automatic scaling**: Handles variable workloads without manual intervention
|
|
||||||
- **ACID transactions**: Ensures data integrity for critical operations
|
|
||||||
- **Security rules**: Granular access control at the document level
|
|
||||||
- **Seamless GCP integration**: Works well with other Google Cloud services
|
|
||||||
|
|
||||||
### Storage Solution: Google Cloud Storage
|
|
||||||
|
|
||||||
- **Object storage optimized for binary data**: Perfect for image files of varying sizes
|
|
||||||
- **Content-delivery capabilities**: Fast global access to images
|
|
||||||
- **Lifecycle management**: Automated rules for moving less-accessed images to cheaper storage tiers
|
|
||||||
- **Fine-grained access control**: Secure pre-signed URLs for temporary access
|
|
||||||
- **Versioning support**: Maintains image history when needed
|
|
||||||
- **Cost-effective**: Pay only for what you use with no minimum fees
|
|
||||||
|
|
||||||
### Decoupled Embedding Generation
|
|
||||||
|
|
||||||
We deliberately decoupled the image embedding process from the upload flow for several reasons:
|
|
||||||
|
|
||||||
1. **Upload responsiveness**: Users experience fast upload times since compute-intensive embedding generation happens asynchronously
|
|
||||||
2. **System resilience**: Upload service remains available even if embedding service experiences issues
|
|
||||||
3. **Independent scaling**: Each component can scale based on its specific resource needs
|
|
||||||
4. **Cost optimization**: Cloud Functions only run when needed, avoiding idle compute costs
|
|
||||||
5. **Processing flexibility**: Can modify embedding algorithms without affecting the core upload flow
|
|
||||||
6. **Batch processing**: Potential to batch embedding generation for further cost optimization
|
|
||||||
|
|
||||||
### Latency Considerations
|
|
||||||
|
|
||||||
- **API response times**: FastAPI provides high-performance request handling
|
|
||||||
- **Caching strategy**: Frequently accessed images and search results are cached
|
|
||||||
- **Edge deployment**: Cloud Run regional deployment optimizes for user location
|
|
||||||
- **Async processing**: Non-blocking operations for concurrent request handling
|
|
||||||
- **Embedding pre-computation**: All embeddings are generated ahead of time, making searches fast
|
|
||||||
- **Search optimization**: Vector database indices are optimized for quick similarity searches
|
|
||||||
|
|
||||||
### Cost Optimization
|
|
||||||
|
|
||||||
- **Serverless architecture**: Pay-per-use model eliminates idle infrastructure costs
|
|
||||||
- **Storage tiering**: Automatic migration of older images to cheaper storage classes
|
|
||||||
- **Compute efficiency**: Cloud Functions minimize compute costs through precise scaling
|
|
||||||
- **Caching**: Reduces repeated processing of the same data
|
|
||||||
- **Resource throttling**: Rate limits prevent unexpected usage spikes
|
|
||||||
- **Embedding dimensions**: Balancing vector size for accuracy vs. storage costs
|
|
||||||
- **Query optimization**: Efficient search patterns to minimize vector database operations
|
|
||||||
|
|
||||||
### Scalability Approach
|
|
||||||
|
|
||||||
- **Horizontal scaling**: All components can scale out rather than up
|
|
||||||
- **Stateless design**: API servers maintain no local state, enabling easy replication
|
|
||||||
- **Queue-based workload distribution**: Prevents system overload during traffic spikes
|
|
||||||
- **Database sharding capability**: Firestore automatically shards data for growth
|
|
||||||
- **Vector database partitioning**: Pinecone handles distributed vector search at scale
|
|
||||||
- **Load balancing**: Traffic distributed across multiple service instances
|
|
||||||
- **Microservice architecture**: Individual components can scale independently based on demand
|
|
||||||
|
|
||||||
### Security Architecture
|
|
||||||
|
|
||||||
- **API key authentication**: Simple but effective access control for machine-to-machine communication
|
|
||||||
- **Team-based permissions**: Multi-tenant isolation with hierarchical access controls
|
|
||||||
- **Encrypted storage**: All data encrypted at rest and in transit
|
|
||||||
- **Secret management**: Sensitive configuration isolated from application code
|
|
||||||
- **Minimal attack surface**: Limited public endpoints with appropriate rate limiting
|
|
||||||
- **Audit logging**: Comprehensive activity tracking for security analysis
|
|
||||||
|
|
||||||
### API Key Authentication System
|
|
||||||
|
|
||||||
SEREACT uses a simple API key authentication system:
|
|
||||||
|
|
||||||
#### Key Generation and Storage
|
|
||||||
|
|
||||||
- API keys are generated as cryptographically secure random strings
|
|
||||||
- Each team can have multiple API keys
|
|
||||||
- Keys are never stored in plaintext - only secure hashes are saved to the database
|
|
||||||
|
|
||||||
#### Authentication Flow
|
|
||||||
|
|
||||||
1. Client includes the API key in requests via the `X-API-Key` HTTP header
|
|
||||||
2. Auth middleware validates the key by:
|
|
||||||
- Hashing the provided key
|
|
||||||
- Querying Firestore for a matching hash
|
|
||||||
- Verifying the key belongs to the appropriate team
|
|
||||||
3. Request is either authorized to proceed or rejected with 401/403 status
|
|
||||||
|
|
||||||
#### Key Management
|
|
||||||
|
|
||||||
- API keys can be created through the API:
|
|
||||||
- User makes an authenticated request
|
|
||||||
- The system generates a new random key and returns it ONCE
|
|
||||||
- Only the hash is stored in the database
|
|
||||||
- Keys can be viewed and revoked through dedicated endpoints
|
|
||||||
- Each API key use is logged for audit purposes
|
|
||||||
|
|
||||||
#### Design Considerations
|
|
||||||
|
|
||||||
- No master/global API key exists to eliminate single points of failure
|
|
||||||
- All keys are scoped to specific teams to enforce multi-tenant isolation
|
|
||||||
- Keys are transmitted only over HTTPS to prevent interception
|
|
||||||
|
|
||||||
This authentication approach balances security with usability for machine-to-machine API interactions, while maintaining complete isolation between different teams using the system.
|
|
||||||
|
|
||||||
### Database Structure
|
|
||||||
|
|
||||||
The Firestore database is organized into collections and documents with the following structure:
|
|
||||||
|
|
||||||
#### Collections and Documents
|
|
||||||
|
|
||||||
```
|
|
||||||
firestore-root/
|
|
||||||
├── teams/ # Teams collection
|
|
||||||
│ └── {team_id}/ # Team document
|
|
||||||
│ ├── name: string # Team name
|
|
||||||
│ ├── created_at: timestamp
|
|
||||||
│ ├── updated_at: timestamp
|
|
||||||
│ │
|
|
||||||
│ ├── users/ # Team users subcollection
|
|
||||||
│ │ └── {user_id}/ # User document in team context
|
|
||||||
│ │ ├── role: string (admin, member, viewer)
|
|
||||||
│ │ ├── joined_at: timestamp
|
|
||||||
│ │ └── status: string (active, inactive)
|
|
||||||
│ │
|
|
||||||
│ ├── api_keys/ # Team API keys subcollection
|
|
||||||
│ │ └── {api_key_id}/ # API key document
|
|
||||||
│ │ ├── key_hash: string # Hashed API key value
|
|
||||||
│ │ ├── name: string # Key name/description
|
|
||||||
│ │ ├── created_at: timestamp
|
|
||||||
│ │ ├── expires_at: timestamp
|
|
||||||
│ │ ├── created_by: user_id
|
|
||||||
│ │ └── permissions: array # Specific permissions
|
|
||||||
│ │
|
|
||||||
│ └── collections/ # Image collections subcollection
|
|
||||||
│ └── {collection_id}/ # Collection document
|
|
||||||
│ ├── name: string
|
|
||||||
│ ├── description: string
|
|
||||||
│ ├── created_at: timestamp
|
|
||||||
│ ├── created_by: user_id
|
|
||||||
│ └── metadata: map # Collection-level metadata
|
|
||||||
│
|
|
||||||
├── users/ # Global users collection
|
|
||||||
│ └── {user_id}/ # User document
|
|
||||||
│ ├── email: string
|
|
||||||
│ ├── name: string
|
|
||||||
│ ├── created_at: timestamp
|
|
||||||
│ └── settings: map # User preferences
|
|
||||||
│
|
|
||||||
└── images/ # Images collection
|
|
||||||
└── {image_id}/ # Image document
|
|
||||||
├── filename: string
|
|
||||||
├── storage_path: string # GCS path
|
|
||||||
├── mime_type: string
|
|
||||||
├── size_bytes: number
|
|
||||||
├── width: number
|
|
||||||
├── height: number
|
|
||||||
├── uploaded_at: timestamp
|
|
||||||
├── uploaded_by: user_id
|
|
||||||
├── team_id: string
|
|
||||||
├── collection_id: string # Optional parent collection
|
|
||||||
├── status: string (processing, ready, error)
|
|
||||||
├── embedding_id: string # Reference to vector DB
|
|
||||||
├── metadata: map # Extracted and custom metadata
|
|
||||||
│ ├── labels: array # AI-generated labels
|
|
||||||
│ ├── colors: array # Dominant colors
|
|
||||||
│ ├── objects: array # Detected objects
|
|
||||||
│ ├── custom: map # User-defined metadata
|
|
||||||
│ └── exif: map # Original image EXIF data
|
|
||||||
│
|
|
||||||
└── processing/ # Processing subcollection
|
|
||||||
└── {job_id}/ # Processing job document
|
|
||||||
├── type: string # embedding, analysis, etc.
|
|
||||||
├── status: string # pending, running, complete, error
|
|
||||||
├── created_at: timestamp
|
|
||||||
├── updated_at: timestamp
|
|
||||||
├── completed_at: timestamp
|
|
||||||
└── error: string # Error message if applicable
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Key Relationships and Indexes
|
|
||||||
|
|
||||||
- **Team-User**: Many-to-many relationship through the team's users subcollection
|
|
||||||
- **Team-Image**: One-to-many relationship (images belong to one team)
|
|
||||||
- **Collection-Image**: One-to-many relationship (images can belong to one collection)
|
|
||||||
- **User-Image**: One-to-many relationship (upload attribution)
|
|
||||||
|
|
||||||
#### Composite Indexes
|
|
||||||
|
|
||||||
The following composite indexes are created to support efficient queries:
|
|
||||||
|
|
||||||
1. `images` collection:
|
|
||||||
- `team_id` ASC, `uploaded_at` DESC → List recent images for a team
|
|
||||||
- `team_id` ASC, `collection_id` ASC, `uploaded_at` DESC → List recent images in a collection
|
|
||||||
- `team_id` ASC, `status` ASC, `uploaded_at` ASC → Find oldest processing images
|
|
||||||
- `uploaded_by` ASC, `uploaded_at` DESC → List user's recent uploads
|
|
||||||
|
|
||||||
2. `users` subcollection (within teams):
|
|
||||||
- `role` ASC, `joined_at` DESC → List team members by role
|
|
||||||
|
|
||||||
#### Security Rules
|
|
||||||
|
|
||||||
Firestore security rules enforce the following access patterns:
|
|
||||||
|
|
||||||
- Team admins can read/write all team data
|
|
||||||
- Team members can read all team data but can only write to collections and images
|
|
||||||
- Team viewers can only read team data
|
|
||||||
- Users can only access teams they belong to
|
|
||||||
- API keys have scoped access based on their assigned permissions
|
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
This project is licensed under the MIT License - see the LICENSE file for details.
|
This project is licensed under the MIT License - see the LICENSE file for details.
|
||||||
|
|||||||
75
deployment/cloud-function/deploy.sh
Normal file
75
deployment/cloud-function/deploy.sh
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Cloud Function deployment script for image processing
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
PROJECT_ID=${GOOGLE_CLOUD_PROJECT:-"your-project-id"}
|
||||||
|
FUNCTION_NAME="process-image-embedding"
|
||||||
|
REGION=${REGION:-"us-central1"}
|
||||||
|
PUBSUB_TOPIC=${PUBSUB_TOPIC:-"image-processing-topic"}
|
||||||
|
MEMORY=${MEMORY:-"512MB"}
|
||||||
|
TIMEOUT=${TIMEOUT:-"540s"}
|
||||||
|
|
||||||
|
# Environment variables for the function
|
||||||
|
PINECONE_API_KEY=${PINECONE_API_KEY:-""}
|
||||||
|
PINECONE_ENVIRONMENT=${PINECONE_ENVIRONMENT:-""}
|
||||||
|
PINECONE_INDEX_NAME=${PINECONE_INDEX_NAME:-"image-embeddings"}
|
||||||
|
|
||||||
|
echo "Deploying Cloud Function: $FUNCTION_NAME"
|
||||||
|
echo "Project: $PROJECT_ID"
|
||||||
|
echo "Region: $REGION"
|
||||||
|
echo "Pub/Sub Topic: $PUBSUB_TOPIC"
|
||||||
|
|
||||||
|
# Check if required environment variables are set
|
||||||
|
if [ -z "$PINECONE_API_KEY" ]; then
|
||||||
|
echo "Warning: PINECONE_API_KEY not set. Function will not store embeddings."
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -z "$PINECONE_ENVIRONMENT" ]; then
|
||||||
|
echo "Warning: PINECONE_ENVIRONMENT not set. Function will not store embeddings."
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Deploy the function
|
||||||
|
gcloud functions deploy $FUNCTION_NAME \
|
||||||
|
--gen2 \
|
||||||
|
--runtime=python311 \
|
||||||
|
--region=$REGION \
|
||||||
|
--source=. \
|
||||||
|
--entry-point=process_image_embedding \
|
||||||
|
--trigger-topic=$PUBSUB_TOPIC \
|
||||||
|
--memory=$MEMORY \
|
||||||
|
--timeout=$TIMEOUT \
|
||||||
|
--set-env-vars="PINECONE_API_KEY=$PINECONE_API_KEY,PINECONE_ENVIRONMENT=$PINECONE_ENVIRONMENT,PINECONE_INDEX_NAME=$PINECONE_INDEX_NAME" \
|
||||||
|
--retry \
|
||||||
|
--max-instances=10 \
|
||||||
|
--min-instances=0
|
||||||
|
|
||||||
|
echo "Cloud Function deployed successfully!"
|
||||||
|
echo "Function name: $FUNCTION_NAME"
|
||||||
|
echo "Trigger: Pub/Sub topic '$PUBSUB_TOPIC'"
|
||||||
|
echo "Region: $REGION"
|
||||||
|
|
||||||
|
# Set up retry policy for the Pub/Sub subscription
|
||||||
|
SUBSCRIPTION_NAME="${PUBSUB_TOPIC}-subscription"
|
||||||
|
|
||||||
|
echo "Configuring retry policy for subscription: $SUBSCRIPTION_NAME"
|
||||||
|
|
||||||
|
# Check if subscription exists, create if not
|
||||||
|
if ! gcloud pubsub subscriptions describe $SUBSCRIPTION_NAME --project=$PROJECT_ID >/dev/null 2>&1; then
|
||||||
|
echo "Creating Pub/Sub subscription: $SUBSCRIPTION_NAME"
|
||||||
|
gcloud pubsub subscriptions create $SUBSCRIPTION_NAME \
|
||||||
|
--topic=$PUBSUB_TOPIC \
|
||||||
|
--project=$PROJECT_ID
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Update subscription with retry policy (max 3 retries)
|
||||||
|
gcloud pubsub subscriptions update $SUBSCRIPTION_NAME \
|
||||||
|
--max-retry-delay=600s \
|
||||||
|
--min-retry-delay=10s \
|
||||||
|
--max-delivery-attempts=3 \
|
||||||
|
--project=$PROJECT_ID
|
||||||
|
|
||||||
|
echo "Retry policy configured: max 3 delivery attempts"
|
||||||
|
echo "Deployment complete!"
|
||||||
263
deployment/cloud-function/main.py
Normal file
263
deployment/cloud-function/main.py
Normal file
@ -0,0 +1,263 @@
|
|||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import base64
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Dict, Any, Optional
|
||||||
|
import functions_framework
|
||||||
|
from google.cloud import vision
|
||||||
|
from google.cloud import firestore
|
||||||
|
from google.cloud import storage
|
||||||
|
import pinecone
|
||||||
|
import numpy as np
|
||||||
|
from PIL import Image
|
||||||
|
import io
|
||||||
|
import os
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Initialize clients
|
||||||
|
vision_client = vision.ImageAnnotatorClient()
|
||||||
|
firestore_client = firestore.Client()
|
||||||
|
storage_client = storage.Client()
|
||||||
|
|
||||||
|
# Initialize Pinecone
|
||||||
|
PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY')
|
||||||
|
PINECONE_ENVIRONMENT = os.environ.get('PINECONE_ENVIRONMENT')
|
||||||
|
PINECONE_INDEX_NAME = os.environ.get('PINECONE_INDEX_NAME', 'image-embeddings')
|
||||||
|
|
||||||
|
if PINECONE_API_KEY and PINECONE_ENVIRONMENT:
|
||||||
|
pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_ENVIRONMENT)
|
||||||
|
index = pinecone.Index(PINECONE_INDEX_NAME)
|
||||||
|
else:
|
||||||
|
index = None
|
||||||
|
logger.warning("Pinecone not configured, embeddings will not be stored")
|
||||||
|
|
||||||
|
@functions_framework.cloud_event
|
||||||
|
def process_image_embedding(cloud_event):
|
||||||
|
"""
|
||||||
|
Cloud Function triggered by Pub/Sub to process image embeddings
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Decode the Pub/Sub message
|
||||||
|
message_data = base64.b64decode(cloud_event.data["message"]["data"]).decode('utf-8')
|
||||||
|
message = json.loads(message_data)
|
||||||
|
|
||||||
|
logger.info(f"Processing image embedding task: {message}")
|
||||||
|
|
||||||
|
# Extract task data
|
||||||
|
image_id = message.get('image_id')
|
||||||
|
storage_path = message.get('storage_path')
|
||||||
|
team_id = message.get('team_id')
|
||||||
|
retry_count = message.get('retry_count', 0)
|
||||||
|
|
||||||
|
if not all([image_id, storage_path, team_id]):
|
||||||
|
logger.error(f"Missing required fields in message: {message}")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Update image status to processing
|
||||||
|
update_image_status(image_id, 'processing', retry_count)
|
||||||
|
|
||||||
|
# Process the image
|
||||||
|
success = process_image(image_id, storage_path, team_id, retry_count)
|
||||||
|
|
||||||
|
if success:
|
||||||
|
logger.info(f"Successfully processed image {image_id}")
|
||||||
|
update_image_status(image_id, 'success', retry_count)
|
||||||
|
else:
|
||||||
|
logger.error(f"Failed to process image {image_id}")
|
||||||
|
update_image_status(image_id, 'failed', retry_count, "Processing failed")
|
||||||
|
|
||||||
|
# Retry logic is handled by Pub/Sub retry policy
|
||||||
|
# The function will be retried automatically up to 3 times
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in process_image_embedding: {e}")
|
||||||
|
# Extract image_id if possible for status update
|
||||||
|
try:
|
||||||
|
message_data = base64.b64decode(cloud_event.data["message"]["data"]).decode('utf-8')
|
||||||
|
message = json.loads(message_data)
|
||||||
|
image_id = message.get('image_id')
|
||||||
|
retry_count = message.get('retry_count', 0)
|
||||||
|
if image_id:
|
||||||
|
update_image_status(image_id, 'failed', retry_count, str(e))
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
raise e
|
||||||
|
|
||||||
|
def process_image(image_id: str, storage_path: str, team_id: str, retry_count: int) -> bool:
|
||||||
|
"""
|
||||||
|
Process a single image to generate embeddings
|
||||||
|
|
||||||
|
Args:
|
||||||
|
image_id: The ID of the image to process
|
||||||
|
storage_path: The GCS path of the image
|
||||||
|
team_id: The team ID that owns the image
|
||||||
|
retry_count: Current retry count
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if processing was successful, False otherwise
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Download image from Cloud Storage
|
||||||
|
bucket_name = storage_path.split('/')[0]
|
||||||
|
blob_path = '/'.join(storage_path.split('/')[1:])
|
||||||
|
|
||||||
|
bucket = storage_client.bucket(bucket_name)
|
||||||
|
blob = bucket.blob(blob_path)
|
||||||
|
|
||||||
|
if not blob.exists():
|
||||||
|
logger.error(f"Image not found in storage: {storage_path}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Download image data
|
||||||
|
image_data = blob.download_as_bytes()
|
||||||
|
|
||||||
|
# Generate embeddings using Google Cloud Vision
|
||||||
|
embeddings = generate_image_embeddings(image_data)
|
||||||
|
|
||||||
|
if embeddings is None:
|
||||||
|
logger.error(f"Failed to generate embeddings for image {image_id}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Store embeddings in Pinecone
|
||||||
|
if index:
|
||||||
|
embedding_id = f"{team_id}_{image_id}"
|
||||||
|
|
||||||
|
# Prepare metadata
|
||||||
|
metadata = {
|
||||||
|
'image_id': image_id,
|
||||||
|
'team_id': team_id,
|
||||||
|
'storage_path': storage_path,
|
||||||
|
'created_at': datetime.utcnow().isoformat()
|
||||||
|
}
|
||||||
|
|
||||||
|
# Upsert to Pinecone
|
||||||
|
index.upsert(vectors=[(embedding_id, embeddings.tolist(), metadata)])
|
||||||
|
|
||||||
|
logger.info(f"Stored embeddings for image {image_id} in Pinecone")
|
||||||
|
|
||||||
|
# Update Firestore with embedding info
|
||||||
|
update_image_embedding_info(image_id, embedding_id, 'google-vision-v1')
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error processing image {image_id}: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def generate_image_embeddings(image_data: bytes) -> Optional[np.ndarray]:
|
||||||
|
"""
|
||||||
|
Generate image embeddings using Google Cloud Vision API
|
||||||
|
|
||||||
|
Args:
|
||||||
|
image_data: Binary image data
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Numpy array of embeddings or None if failed
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Create Vision API image object
|
||||||
|
image = vision.Image(content=image_data)
|
||||||
|
|
||||||
|
# Use object localization to get feature vectors
|
||||||
|
# This provides rich semantic information about the image
|
||||||
|
response = vision_client.object_localization(image=image)
|
||||||
|
|
||||||
|
if response.error.message:
|
||||||
|
logger.error(f"Vision API error: {response.error.message}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Extract features from detected objects
|
||||||
|
features = []
|
||||||
|
|
||||||
|
# Get object detection features
|
||||||
|
for obj in response.localized_object_annotations:
|
||||||
|
# Use object name and confidence as features
|
||||||
|
features.extend([
|
||||||
|
hash(obj.name) % 1000 / 1000.0, # Normalized hash of object name
|
||||||
|
obj.score, # Confidence score
|
||||||
|
obj.bounding_poly.normalized_vertices[0].x, # Bounding box features
|
||||||
|
obj.bounding_poly.normalized_vertices[0].y,
|
||||||
|
obj.bounding_poly.normalized_vertices[2].x - obj.bounding_poly.normalized_vertices[0].x, # Width
|
||||||
|
obj.bounding_poly.normalized_vertices[2].y - obj.bounding_poly.normalized_vertices[0].y, # Height
|
||||||
|
])
|
||||||
|
|
||||||
|
# Also get label detection for additional semantic information
|
||||||
|
label_response = vision_client.label_detection(image=image)
|
||||||
|
|
||||||
|
for label in label_response.label_annotations[:10]: # Top 10 labels
|
||||||
|
features.extend([
|
||||||
|
hash(label.description) % 1000 / 1000.0, # Normalized hash of label
|
||||||
|
label.score # Confidence score
|
||||||
|
])
|
||||||
|
|
||||||
|
# Pad or truncate to fixed size (512 dimensions)
|
||||||
|
target_size = 512
|
||||||
|
if len(features) < target_size:
|
||||||
|
features.extend([0.0] * (target_size - len(features)))
|
||||||
|
else:
|
||||||
|
features = features[:target_size]
|
||||||
|
|
||||||
|
return np.array(features, dtype=np.float32)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error generating embeddings: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def update_image_status(image_id: str, status: str, retry_count: int, error_message: str = None):
|
||||||
|
"""
|
||||||
|
Update the image embedding status in Firestore
|
||||||
|
|
||||||
|
Args:
|
||||||
|
image_id: The ID of the image
|
||||||
|
status: The new status (processing, success, failed)
|
||||||
|
retry_count: Current retry count
|
||||||
|
error_message: Error message if status is failed
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
doc_ref = firestore_client.collection('images').document(image_id)
|
||||||
|
|
||||||
|
update_data = {
|
||||||
|
'embedding_status': status,
|
||||||
|
'embedding_retry_count': retry_count,
|
||||||
|
'embedding_last_attempt': datetime.utcnow()
|
||||||
|
}
|
||||||
|
|
||||||
|
if error_message:
|
||||||
|
update_data['embedding_error'] = error_message
|
||||||
|
|
||||||
|
if status == 'success':
|
||||||
|
update_data['has_embedding'] = True
|
||||||
|
update_data['embedding_error'] = None # Clear any previous error
|
||||||
|
|
||||||
|
doc_ref.update(update_data)
|
||||||
|
logger.info(f"Updated image {image_id} status to {status}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error updating image status: {e}")
|
||||||
|
|
||||||
|
def update_image_embedding_info(image_id: str, embedding_id: str, model: str):
|
||||||
|
"""
|
||||||
|
Update the image with embedding information
|
||||||
|
|
||||||
|
Args:
|
||||||
|
image_id: The ID of the image
|
||||||
|
embedding_id: The ID of the embedding in the vector database
|
||||||
|
model: The model used to generate embeddings
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
doc_ref = firestore_client.collection('images').document(image_id)
|
||||||
|
|
||||||
|
update_data = {
|
||||||
|
'embedding_id': embedding_id,
|
||||||
|
'embedding_model': model,
|
||||||
|
'has_embedding': True
|
||||||
|
}
|
||||||
|
|
||||||
|
doc_ref.update(update_data)
|
||||||
|
logger.info(f"Updated image {image_id} with embedding info")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error updating image embedding info: {e}")
|
||||||
7
deployment/cloud-function/requirements.txt
Normal file
7
deployment/cloud-function/requirements.txt
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
functions-framework==3.4.0
|
||||||
|
google-cloud-vision==3.4.5
|
||||||
|
google-cloud-firestore==2.11.1
|
||||||
|
google-cloud-storage==2.12.0
|
||||||
|
pinecone-client==2.2.4
|
||||||
|
numpy==1.24.3
|
||||||
|
Pillow==10.1.0
|
||||||
104
deployment/terraform/pubsub.tf
Normal file
104
deployment/terraform/pubsub.tf
Normal file
@ -0,0 +1,104 @@
|
|||||||
|
# Pub/Sub topic for image processing tasks
|
||||||
|
resource "google_pubsub_topic" "image_processing" {
|
||||||
|
name = var.pubsub_topic_name
|
||||||
|
|
||||||
|
labels = {
|
||||||
|
environment = var.environment
|
||||||
|
service = "sereact"
|
||||||
|
component = "image-processing"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Pub/Sub subscription with retry policy
|
||||||
|
resource "google_pubsub_subscription" "image_processing" {
|
||||||
|
name = "${var.pubsub_topic_name}-subscription"
|
||||||
|
topic = google_pubsub_topic.image_processing.name
|
||||||
|
|
||||||
|
# Retry policy configuration
|
||||||
|
retry_policy {
|
||||||
|
minimum_backoff = "10s"
|
||||||
|
maximum_backoff = "600s"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Dead letter policy after 3 failed attempts
|
||||||
|
dead_letter_policy {
|
||||||
|
dead_letter_topic = google_pubsub_topic.image_processing_dlq.id
|
||||||
|
max_delivery_attempts = 3
|
||||||
|
}
|
||||||
|
|
||||||
|
# Message retention
|
||||||
|
message_retention_duration = "604800s" # 7 days
|
||||||
|
retain_acked_messages = false
|
||||||
|
|
||||||
|
# Acknowledgment deadline
|
||||||
|
ack_deadline_seconds = 600 # 10 minutes
|
||||||
|
|
||||||
|
labels = {
|
||||||
|
environment = var.environment
|
||||||
|
service = "sereact"
|
||||||
|
component = "image-processing"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Dead letter queue for failed messages
|
||||||
|
resource "google_pubsub_topic" "image_processing_dlq" {
|
||||||
|
name = "${var.pubsub_topic_name}-dlq"
|
||||||
|
|
||||||
|
labels = {
|
||||||
|
environment = var.environment
|
||||||
|
service = "sereact"
|
||||||
|
component = "image-processing-dlq"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Dead letter subscription for monitoring failed messages
|
||||||
|
resource "google_pubsub_subscription" "image_processing_dlq" {
|
||||||
|
name = "${var.pubsub_topic_name}-dlq-subscription"
|
||||||
|
topic = google_pubsub_topic.image_processing_dlq.name
|
||||||
|
|
||||||
|
# Long retention for failed messages
|
||||||
|
message_retention_duration = "2592000s" # 30 days
|
||||||
|
retain_acked_messages = true
|
||||||
|
|
||||||
|
labels = {
|
||||||
|
environment = var.environment
|
||||||
|
service = "sereact"
|
||||||
|
component = "image-processing-dlq"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# IAM binding for Cloud Function to publish to topic
|
||||||
|
resource "google_pubsub_topic_iam_binding" "image_processing_publisher" {
|
||||||
|
topic = google_pubsub_topic.image_processing.name
|
||||||
|
role = "roles/pubsub.publisher"
|
||||||
|
|
||||||
|
members = [
|
||||||
|
"serviceAccount:${var.cloud_run_service_account}",
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
# IAM binding for Cloud Function to subscribe
|
||||||
|
resource "google_pubsub_subscription_iam_binding" "image_processing_subscriber" {
|
||||||
|
subscription = google_pubsub_subscription.image_processing.name
|
||||||
|
role = "roles/pubsub.subscriber"
|
||||||
|
|
||||||
|
members = [
|
||||||
|
"serviceAccount:${var.cloud_function_service_account}",
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Output the topic and subscription names
|
||||||
|
output "pubsub_topic_name" {
|
||||||
|
description = "Name of the Pub/Sub topic for image processing"
|
||||||
|
value = google_pubsub_topic.image_processing.name
|
||||||
|
}
|
||||||
|
|
||||||
|
output "pubsub_subscription_name" {
|
||||||
|
description = "Name of the Pub/Sub subscription for image processing"
|
||||||
|
value = google_pubsub_subscription.image_processing.name
|
||||||
|
}
|
||||||
|
|
||||||
|
output "pubsub_dlq_topic_name" {
|
||||||
|
description = "Name of the dead letter queue topic"
|
||||||
|
value = google_pubsub_topic.image_processing_dlq.name
|
||||||
|
}
|
||||||
@ -24,4 +24,26 @@ variable "firestore_db_name" {
|
|||||||
description = "The name of the Firestore database"
|
description = "The name of the Firestore database"
|
||||||
type = string
|
type = string
|
||||||
default = "imagedb"
|
default = "imagedb"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "environment" {
|
||||||
|
description = "The deployment environment (dev, staging, prod)"
|
||||||
|
type = string
|
||||||
|
default = "dev"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "pubsub_topic_name" {
|
||||||
|
description = "The name of the Pub/Sub topic for image processing"
|
||||||
|
type = string
|
||||||
|
default = "image-processing-topic"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "cloud_run_service_account" {
|
||||||
|
description = "The service account email for Cloud Run"
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "cloud_function_service_account" {
|
||||||
|
description = "The service account email for Cloud Functions"
|
||||||
|
type = string
|
||||||
}
|
}
|
||||||
@ -6,6 +6,7 @@ python-dotenv==1.0.0
|
|||||||
google-cloud-storage==2.12.0
|
google-cloud-storage==2.12.0
|
||||||
google-cloud-vision==3.4.5
|
google-cloud-vision==3.4.5
|
||||||
google-cloud-firestore==2.11.1
|
google-cloud-firestore==2.11.1
|
||||||
|
google-cloud-pubsub==2.18.4
|
||||||
python-multipart==0.0.6
|
python-multipart==0.0.6
|
||||||
python-jose==3.3.0
|
python-jose==3.3.0
|
||||||
passlib==1.7.4
|
passlib==1.7.4
|
||||||
|
|||||||
@ -10,6 +10,7 @@ from src.db.repositories.image_repository import image_repository
|
|||||||
from src.services.storage import StorageService
|
from src.services.storage import StorageService
|
||||||
from src.services.image_processor import ImageProcessor
|
from src.services.image_processor import ImageProcessor
|
||||||
from src.services.embedding_service import EmbeddingService
|
from src.services.embedding_service import EmbeddingService
|
||||||
|
from src.services.pubsub_service import pubsub_service
|
||||||
from src.models.image import ImageModel
|
from src.models.image import ImageModel
|
||||||
from src.models.user import UserModel
|
from src.models.user import UserModel
|
||||||
from src.schemas.image import ImageResponse, ImageListResponse, ImageCreate, ImageUpdate
|
from src.schemas.image import ImageResponse, ImageListResponse, ImageCreate, ImageUpdate
|
||||||
@ -84,11 +85,17 @@ async def upload_image(
|
|||||||
# Save to database
|
# Save to database
|
||||||
created_image = await image_repository.create(image)
|
created_image = await image_repository.create(image)
|
||||||
|
|
||||||
# Start async processing for embeddings (in background)
|
# Publish image processing task to Pub/Sub
|
||||||
try:
|
try:
|
||||||
await embedding_service.process_image_async(str(created_image.id), storage_path)
|
task_published = await pubsub_service.publish_image_processing_task(
|
||||||
|
image_id=str(created_image.id),
|
||||||
|
storage_path=storage_path,
|
||||||
|
team_id=str(current_user.team_id)
|
||||||
|
)
|
||||||
|
if not task_published:
|
||||||
|
logger.warning(f"Failed to publish processing task for image {created_image.id}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Failed to start embedding processing: {e}")
|
logger.warning(f"Failed to publish image processing task: {e}")
|
||||||
|
|
||||||
# Convert to response
|
# Convert to response
|
||||||
response = ImageResponse(
|
response = ImageResponse(
|
||||||
|
|||||||
@ -27,6 +27,10 @@ class ImageModel(BaseModel):
|
|||||||
embedding_id: Optional[str] = None
|
embedding_id: Optional[str] = None
|
||||||
embedding_model: Optional[str] = None
|
embedding_model: Optional[str] = None
|
||||||
has_embedding: bool = False
|
has_embedding: bool = False
|
||||||
|
embedding_status: str = "pending" # pending, processing, success, failed
|
||||||
|
embedding_error: Optional[str] = None
|
||||||
|
embedding_retry_count: int = 0
|
||||||
|
embedding_last_attempt: Optional[datetime] = None
|
||||||
|
|
||||||
model_config: ClassVar[dict] = {
|
model_config: ClassVar[dict] = {
|
||||||
"populate_by_name": True,
|
"populate_by_name": True,
|
||||||
|
|||||||
121
src/services/pubsub_service.py
Normal file
121
src/services/pubsub_service.py
Normal file
@ -0,0 +1,121 @@
|
|||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from typing import Dict, Any, Optional
|
||||||
|
from google.cloud import pubsub_v1
|
||||||
|
from google.api_core import retry
|
||||||
|
from src.config.config import settings
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class PubSubService:
|
||||||
|
"""Service for Google Cloud Pub/Sub operations"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.project_id = settings.FIRESTORE_PROJECT_ID
|
||||||
|
self.topic_name = settings.PUBSUB_TOPIC
|
||||||
|
self.publisher = None
|
||||||
|
self._topic_path = None
|
||||||
|
|
||||||
|
if self.project_id and self.topic_name:
|
||||||
|
try:
|
||||||
|
self.publisher = pubsub_v1.PublisherClient()
|
||||||
|
self._topic_path = self.publisher.topic_path(self.project_id, self.topic_name)
|
||||||
|
logger.info(f"PubSub service initialized for topic: {self._topic_path}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to initialize PubSub client: {e}")
|
||||||
|
self.publisher = None
|
||||||
|
|
||||||
|
async def publish_image_processing_task(
|
||||||
|
self,
|
||||||
|
image_id: str,
|
||||||
|
storage_path: str,
|
||||||
|
team_id: str,
|
||||||
|
retry_count: int = 0
|
||||||
|
) -> bool:
|
||||||
|
"""
|
||||||
|
Publish an image processing task to Pub/Sub
|
||||||
|
|
||||||
|
Args:
|
||||||
|
image_id: The ID of the image to process
|
||||||
|
storage_path: The GCS path of the image
|
||||||
|
team_id: The team ID that owns the image
|
||||||
|
retry_count: Current retry count for this task
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if message was published successfully, False otherwise
|
||||||
|
"""
|
||||||
|
if not self.publisher or not self._topic_path:
|
||||||
|
logger.error("PubSub client not initialized")
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Create the message payload
|
||||||
|
message_data = {
|
||||||
|
"image_id": image_id,
|
||||||
|
"storage_path": storage_path,
|
||||||
|
"team_id": team_id,
|
||||||
|
"retry_count": retry_count,
|
||||||
|
"task_type": "generate_embeddings"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Convert to JSON bytes
|
||||||
|
message_bytes = json.dumps(message_data).encode('utf-8')
|
||||||
|
|
||||||
|
# Add message attributes
|
||||||
|
attributes = {
|
||||||
|
"task_type": "generate_embeddings",
|
||||||
|
"image_id": image_id,
|
||||||
|
"retry_count": str(retry_count)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Publish the message with retry
|
||||||
|
future = self.publisher.publish(
|
||||||
|
self._topic_path,
|
||||||
|
message_bytes,
|
||||||
|
**attributes
|
||||||
|
)
|
||||||
|
|
||||||
|
# Wait for the publish to complete
|
||||||
|
message_id = future.result(timeout=30)
|
||||||
|
|
||||||
|
logger.info(f"Published image processing task for image {image_id}, message ID: {message_id}")
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to publish image processing task for image {image_id}: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
async def publish_retry_task(
|
||||||
|
self,
|
||||||
|
image_id: str,
|
||||||
|
storage_path: str,
|
||||||
|
team_id: str,
|
||||||
|
retry_count: int,
|
||||||
|
error_message: str
|
||||||
|
) -> bool:
|
||||||
|
"""
|
||||||
|
Publish a retry task for failed image processing
|
||||||
|
|
||||||
|
Args:
|
||||||
|
image_id: The ID of the image to process
|
||||||
|
storage_path: The GCS path of the image
|
||||||
|
team_id: The team ID that owns the image
|
||||||
|
retry_count: Current retry count for this task
|
||||||
|
error_message: The error message from the previous attempt
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if message was published successfully, False otherwise
|
||||||
|
"""
|
||||||
|
if retry_count >= 3:
|
||||||
|
logger.warning(f"Maximum retry count reached for image {image_id}, not publishing retry task")
|
||||||
|
return False
|
||||||
|
|
||||||
|
return await self.publish_image_processing_task(
|
||||||
|
image_id=image_id,
|
||||||
|
storage_path=storage_path,
|
||||||
|
team_id=team_id,
|
||||||
|
retry_count=retry_count + 1
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create a singleton service
|
||||||
|
pubsub_service = PubSubService()
|
||||||
408
tests/api/test_images_pubsub.py
Normal file
408
tests/api/test_images_pubsub.py
Normal file
@ -0,0 +1,408 @@
|
|||||||
|
import pytest
|
||||||
|
from unittest.mock import Mock, patch, AsyncMock
|
||||||
|
from fastapi.testclient import TestClient
|
||||||
|
from fastapi import status
|
||||||
|
import io
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
from src.api.v1.images import router
|
||||||
|
from src.models.user import UserModel
|
||||||
|
from src.models.team import PyObjectId
|
||||||
|
|
||||||
|
|
||||||
|
class TestImageUploadWithPubSub:
|
||||||
|
"""Test cases for image upload with Pub/Sub integration"""
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_current_user(self):
|
||||||
|
"""Mock current user"""
|
||||||
|
user = UserModel(
|
||||||
|
id=PyObjectId(),
|
||||||
|
email="test@example.com",
|
||||||
|
team_id=PyObjectId(),
|
||||||
|
is_active=True
|
||||||
|
)
|
||||||
|
return user
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def test_image_file(self):
|
||||||
|
"""Create a test image file"""
|
||||||
|
# Create a simple test image
|
||||||
|
img = Image.new('RGB', (100, 100), color='red')
|
||||||
|
img_bytes = io.BytesIO()
|
||||||
|
img.save(img_bytes, format='JPEG')
|
||||||
|
img_bytes.seek(0)
|
||||||
|
return img_bytes
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_storage_service(self):
|
||||||
|
"""Mock storage service"""
|
||||||
|
with patch('src.api.v1.images.storage_service') as mock_service:
|
||||||
|
mock_service.upload_file = AsyncMock(return_value=(
|
||||||
|
"bucket/team-123/image.jpg",
|
||||||
|
"image/jpeg",
|
||||||
|
1024,
|
||||||
|
{"width": 100, "height": 100}
|
||||||
|
))
|
||||||
|
yield mock_service
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_image_repository(self):
|
||||||
|
"""Mock image repository"""
|
||||||
|
with patch('src.api.v1.images.image_repository') as mock_repo:
|
||||||
|
mock_image = Mock()
|
||||||
|
mock_image.id = PyObjectId()
|
||||||
|
mock_image.filename = "test.jpg"
|
||||||
|
mock_image.original_filename = "test.jpg"
|
||||||
|
mock_image.file_size = 1024
|
||||||
|
mock_image.content_type = "image/jpeg"
|
||||||
|
mock_image.storage_path = "bucket/team-123/image.jpg"
|
||||||
|
mock_image.team_id = PyObjectId()
|
||||||
|
mock_image.uploader_id = PyObjectId()
|
||||||
|
mock_image.upload_date = "2023-01-01T00:00:00"
|
||||||
|
mock_image.description = None
|
||||||
|
mock_image.tags = []
|
||||||
|
mock_image.metadata = {}
|
||||||
|
mock_image.has_embedding = False
|
||||||
|
mock_image.collection_id = None
|
||||||
|
|
||||||
|
mock_repo.create = AsyncMock(return_value=mock_image)
|
||||||
|
yield mock_repo
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_pubsub_service(self):
|
||||||
|
"""Mock Pub/Sub service"""
|
||||||
|
with patch('src.api.v1.images.pubsub_service') as mock_service:
|
||||||
|
mock_service.publish_image_processing_task = AsyncMock(return_value=True)
|
||||||
|
yield mock_service
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_upload_image_publishes_to_pubsub(
|
||||||
|
self,
|
||||||
|
mock_current_user,
|
||||||
|
test_image_file,
|
||||||
|
mock_storage_service,
|
||||||
|
mock_image_repository,
|
||||||
|
mock_pubsub_service
|
||||||
|
):
|
||||||
|
"""Test that image upload publishes a task to Pub/Sub"""
|
||||||
|
with patch('src.api.v1.images.get_current_user', return_value=mock_current_user):
|
||||||
|
from src.api.v1.images import upload_image
|
||||||
|
from fastapi import UploadFile
|
||||||
|
|
||||||
|
# Create upload file
|
||||||
|
upload_file = UploadFile(
|
||||||
|
filename="test.jpg",
|
||||||
|
file=test_image_file,
|
||||||
|
content_type="image/jpeg"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Mock request
|
||||||
|
request = Mock()
|
||||||
|
request.url.path = "/api/v1/images"
|
||||||
|
request.method = "POST"
|
||||||
|
|
||||||
|
# Call the upload function
|
||||||
|
response = await upload_image(
|
||||||
|
request=request,
|
||||||
|
file=upload_file,
|
||||||
|
current_user=mock_current_user
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify Pub/Sub task was published
|
||||||
|
mock_pubsub_service.publish_image_processing_task.assert_called_once()
|
||||||
|
|
||||||
|
# Check the call arguments
|
||||||
|
call_args = mock_pubsub_service.publish_image_processing_task.call_args
|
||||||
|
assert call_args[1]['image_id'] == str(mock_image_repository.create.return_value.id)
|
||||||
|
assert call_args[1]['storage_path'] == "bucket/team-123/image.jpg"
|
||||||
|
assert call_args[1]['team_id'] == str(mock_current_user.team_id)
|
||||||
|
|
||||||
|
# Verify response
|
||||||
|
assert response.filename == "test.jpg"
|
||||||
|
assert response.content_type == "image/jpeg"
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_upload_image_pubsub_failure_continues(
|
||||||
|
self,
|
||||||
|
mock_current_user,
|
||||||
|
test_image_file,
|
||||||
|
mock_storage_service,
|
||||||
|
mock_image_repository,
|
||||||
|
mock_pubsub_service
|
||||||
|
):
|
||||||
|
"""Test that upload continues even if Pub/Sub publishing fails"""
|
||||||
|
# Mock Pub/Sub failure
|
||||||
|
mock_pubsub_service.publish_image_processing_task = AsyncMock(return_value=False)
|
||||||
|
|
||||||
|
with patch('src.api.v1.images.get_current_user', return_value=mock_current_user):
|
||||||
|
from src.api.v1.images import upload_image
|
||||||
|
from fastapi import UploadFile
|
||||||
|
|
||||||
|
# Create upload file
|
||||||
|
upload_file = UploadFile(
|
||||||
|
filename="test.jpg",
|
||||||
|
file=test_image_file,
|
||||||
|
content_type="image/jpeg"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Mock request
|
||||||
|
request = Mock()
|
||||||
|
request.url.path = "/api/v1/images"
|
||||||
|
request.method = "POST"
|
||||||
|
|
||||||
|
# Call the upload function - should not raise exception
|
||||||
|
response = await upload_image(
|
||||||
|
request=request,
|
||||||
|
file=upload_file,
|
||||||
|
current_user=mock_current_user
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify Pub/Sub task was attempted
|
||||||
|
mock_pubsub_service.publish_image_processing_task.assert_called_once()
|
||||||
|
|
||||||
|
# Verify response is still successful
|
||||||
|
assert response.filename == "test.jpg"
|
||||||
|
assert response.content_type == "image/jpeg"
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_upload_image_pubsub_exception_continues(
|
||||||
|
self,
|
||||||
|
mock_current_user,
|
||||||
|
test_image_file,
|
||||||
|
mock_storage_service,
|
||||||
|
mock_image_repository,
|
||||||
|
mock_pubsub_service
|
||||||
|
):
|
||||||
|
"""Test that upload continues even if Pub/Sub publishing raises exception"""
|
||||||
|
# Mock Pub/Sub exception
|
||||||
|
mock_pubsub_service.publish_image_processing_task = AsyncMock(
|
||||||
|
side_effect=Exception("Pub/Sub error")
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch('src.api.v1.images.get_current_user', return_value=mock_current_user):
|
||||||
|
from src.api.v1.images import upload_image
|
||||||
|
from fastapi import UploadFile
|
||||||
|
|
||||||
|
# Create upload file
|
||||||
|
upload_file = UploadFile(
|
||||||
|
filename="test.jpg",
|
||||||
|
file=test_image_file,
|
||||||
|
content_type="image/jpeg"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Mock request
|
||||||
|
request = Mock()
|
||||||
|
request.url.path = "/api/v1/images"
|
||||||
|
request.method = "POST"
|
||||||
|
|
||||||
|
# Call the upload function - should not raise exception
|
||||||
|
response = await upload_image(
|
||||||
|
request=request,
|
||||||
|
file=upload_file,
|
||||||
|
current_user=mock_current_user
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify Pub/Sub task was attempted
|
||||||
|
mock_pubsub_service.publish_image_processing_task.assert_called_once()
|
||||||
|
|
||||||
|
# Verify response is still successful
|
||||||
|
assert response.filename == "test.jpg"
|
||||||
|
assert response.content_type == "image/jpeg"
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_upload_image_with_description_and_tags(
|
||||||
|
self,
|
||||||
|
mock_current_user,
|
||||||
|
test_image_file,
|
||||||
|
mock_storage_service,
|
||||||
|
mock_image_repository,
|
||||||
|
mock_pubsub_service
|
||||||
|
):
|
||||||
|
"""Test image upload with description and tags"""
|
||||||
|
with patch('src.api.v1.images.get_current_user', return_value=mock_current_user):
|
||||||
|
from src.api.v1.images import upload_image
|
||||||
|
from fastapi import UploadFile
|
||||||
|
|
||||||
|
# Create upload file
|
||||||
|
upload_file = UploadFile(
|
||||||
|
filename="test.jpg",
|
||||||
|
file=test_image_file,
|
||||||
|
content_type="image/jpeg"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Mock request
|
||||||
|
request = Mock()
|
||||||
|
request.url.path = "/api/v1/images"
|
||||||
|
request.method = "POST"
|
||||||
|
|
||||||
|
# Call the upload function with description and tags
|
||||||
|
response = await upload_image(
|
||||||
|
request=request,
|
||||||
|
file=upload_file,
|
||||||
|
description="Test image",
|
||||||
|
tags="nature, landscape, outdoor",
|
||||||
|
current_user=mock_current_user
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify Pub/Sub task was published
|
||||||
|
mock_pubsub_service.publish_image_processing_task.assert_called_once()
|
||||||
|
|
||||||
|
# Verify image was created with correct data
|
||||||
|
mock_image_repository.create.assert_called_once()
|
||||||
|
created_image_data = mock_image_repository.create.call_args[0][0]
|
||||||
|
|
||||||
|
assert created_image_data.description == "Test image"
|
||||||
|
assert created_image_data.tags == ["nature", "landscape", "outdoor"]
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_upload_image_with_collection_id(
|
||||||
|
self,
|
||||||
|
mock_current_user,
|
||||||
|
test_image_file,
|
||||||
|
mock_storage_service,
|
||||||
|
mock_image_repository,
|
||||||
|
mock_pubsub_service
|
||||||
|
):
|
||||||
|
"""Test image upload with collection ID"""
|
||||||
|
with patch('src.api.v1.images.get_current_user', return_value=mock_current_user):
|
||||||
|
from src.api.v1.images import upload_image
|
||||||
|
from fastapi import UploadFile
|
||||||
|
|
||||||
|
# Create upload file
|
||||||
|
upload_file = UploadFile(
|
||||||
|
filename="test.jpg",
|
||||||
|
file=test_image_file,
|
||||||
|
content_type="image/jpeg"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Mock request
|
||||||
|
request = Mock()
|
||||||
|
request.url.path = "/api/v1/images"
|
||||||
|
request.method = "POST"
|
||||||
|
|
||||||
|
collection_id = str(PyObjectId())
|
||||||
|
|
||||||
|
# Call the upload function with collection ID
|
||||||
|
response = await upload_image(
|
||||||
|
request=request,
|
||||||
|
file=upload_file,
|
||||||
|
collection_id=collection_id,
|
||||||
|
current_user=mock_current_user
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify Pub/Sub task was published
|
||||||
|
mock_pubsub_service.publish_image_processing_task.assert_called_once()
|
||||||
|
|
||||||
|
# Verify image was created with collection ID
|
||||||
|
mock_image_repository.create.assert_called_once()
|
||||||
|
created_image_data = mock_image_repository.create.call_args[0][0]
|
||||||
|
|
||||||
|
assert str(created_image_data.collection_id) == collection_id
|
||||||
|
|
||||||
|
|
||||||
|
class TestImageModelUpdates:
|
||||||
|
"""Test cases for updated image model with embedding fields"""
|
||||||
|
|
||||||
|
def test_image_model_has_embedding_fields(self):
|
||||||
|
"""Test that ImageModel has the new embedding fields"""
|
||||||
|
from src.models.image import ImageModel
|
||||||
|
|
||||||
|
# Create an image model instance
|
||||||
|
image = ImageModel(
|
||||||
|
filename="test.jpg",
|
||||||
|
original_filename="test.jpg",
|
||||||
|
file_size=1024,
|
||||||
|
content_type="image/jpeg",
|
||||||
|
storage_path="bucket/path/image.jpg",
|
||||||
|
team_id=PyObjectId(),
|
||||||
|
uploader_id=PyObjectId()
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check that embedding fields exist with default values
|
||||||
|
assert hasattr(image, 'embedding_status')
|
||||||
|
assert hasattr(image, 'embedding_error')
|
||||||
|
assert hasattr(image, 'embedding_retry_count')
|
||||||
|
assert hasattr(image, 'embedding_last_attempt')
|
||||||
|
|
||||||
|
# Check default values
|
||||||
|
assert image.embedding_status == "pending"
|
||||||
|
assert image.embedding_error is None
|
||||||
|
assert image.embedding_retry_count == 0
|
||||||
|
assert image.embedding_last_attempt is None
|
||||||
|
assert image.has_embedding is False
|
||||||
|
|
||||||
|
def test_image_model_embedding_fields_can_be_set(self):
|
||||||
|
"""Test that embedding fields can be set"""
|
||||||
|
from src.models.image import ImageModel
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
now = datetime.utcnow()
|
||||||
|
|
||||||
|
# Create an image model instance with embedding fields
|
||||||
|
image = ImageModel(
|
||||||
|
filename="test.jpg",
|
||||||
|
original_filename="test.jpg",
|
||||||
|
file_size=1024,
|
||||||
|
content_type="image/jpeg",
|
||||||
|
storage_path="bucket/path/image.jpg",
|
||||||
|
team_id=PyObjectId(),
|
||||||
|
uploader_id=PyObjectId(),
|
||||||
|
embedding_status="processing",
|
||||||
|
embedding_error="Test error",
|
||||||
|
embedding_retry_count=2,
|
||||||
|
embedding_last_attempt=now,
|
||||||
|
has_embedding=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check that values were set correctly
|
||||||
|
assert image.embedding_status == "processing"
|
||||||
|
assert image.embedding_error == "Test error"
|
||||||
|
assert image.embedding_retry_count == 2
|
||||||
|
assert image.embedding_last_attempt == now
|
||||||
|
assert image.has_embedding is True
|
||||||
|
|
||||||
|
|
||||||
|
class TestPubSubServiceIntegration:
|
||||||
|
"""Integration tests for Pub/Sub service with image API"""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_end_to_end_image_upload_flow(self):
|
||||||
|
"""Test the complete flow from image upload to Pub/Sub message"""
|
||||||
|
# This would be an integration test that verifies the entire flow
|
||||||
|
# from API call to Pub/Sub message publication
|
||||||
|
|
||||||
|
# Mock all dependencies
|
||||||
|
with patch('src.api.v1.images.storage_service') as mock_storage, \
|
||||||
|
patch('src.api.v1.images.image_repository') as mock_repo, \
|
||||||
|
patch('src.api.v1.images.pubsub_service') as mock_pubsub, \
|
||||||
|
patch('src.api.v1.images.get_current_user') as mock_auth:
|
||||||
|
|
||||||
|
# Setup mocks
|
||||||
|
mock_user = Mock()
|
||||||
|
mock_user.id = PyObjectId()
|
||||||
|
mock_user.team_id = PyObjectId()
|
||||||
|
mock_auth.return_value = mock_user
|
||||||
|
|
||||||
|
mock_storage.upload_file = AsyncMock(return_value=(
|
||||||
|
"bucket/team/image.jpg", "image/jpeg", 1024, {}
|
||||||
|
))
|
||||||
|
|
||||||
|
mock_image = Mock()
|
||||||
|
mock_image.id = PyObjectId()
|
||||||
|
mock_repo.create = AsyncMock(return_value=mock_image)
|
||||||
|
|
||||||
|
mock_pubsub.publish_image_processing_task = AsyncMock(return_value=True)
|
||||||
|
|
||||||
|
# Create test client
|
||||||
|
from fastapi import FastAPI
|
||||||
|
app = FastAPI()
|
||||||
|
app.include_router(router)
|
||||||
|
|
||||||
|
# This would test the actual HTTP endpoint
|
||||||
|
# but requires more complex setup for file uploads
|
||||||
|
|
||||||
|
# For now, verify the mocks would be called correctly
|
||||||
|
assert mock_storage.upload_file is not None
|
||||||
|
assert mock_repo.create is not None
|
||||||
|
assert mock_pubsub.publish_image_processing_task is not None
|
||||||
370
tests/integration/test_cloud_function.py
Normal file
370
tests/integration/test_cloud_function.py
Normal file
@ -0,0 +1,370 @@
|
|||||||
|
import pytest
|
||||||
|
import json
|
||||||
|
import base64
|
||||||
|
from unittest.mock import Mock, patch, MagicMock
|
||||||
|
import numpy as np
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
# Mock the cloud function modules before importing
|
||||||
|
with patch.dict('sys.modules', {
|
||||||
|
'functions_framework': Mock(),
|
||||||
|
'google.cloud.vision': Mock(),
|
||||||
|
'google.cloud.firestore': Mock(),
|
||||||
|
'google.cloud.storage': Mock(),
|
||||||
|
'pinecone': Mock()
|
||||||
|
}):
|
||||||
|
# Import the cloud function after mocking
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
sys.path.append(os.path.join(os.path.dirname(__file__), '../../deployment/cloud-function'))
|
||||||
|
|
||||||
|
# Mock the main module
|
||||||
|
main_module = Mock()
|
||||||
|
sys.modules['main'] = main_module
|
||||||
|
|
||||||
|
|
||||||
|
class TestCloudFunctionProcessing:
|
||||||
|
"""Test cases for Cloud Function image processing logic"""
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_cloud_event(self):
|
||||||
|
"""Create a mock cloud event for testing"""
|
||||||
|
message_data = {
|
||||||
|
"image_id": "test-image-123",
|
||||||
|
"storage_path": "test-bucket/team-456/image.jpg",
|
||||||
|
"team_id": "team-456",
|
||||||
|
"retry_count": 0,
|
||||||
|
"task_type": "generate_embeddings"
|
||||||
|
}
|
||||||
|
|
||||||
|
encoded_data = base64.b64encode(json.dumps(message_data).encode('utf-8')).decode('utf-8')
|
||||||
|
|
||||||
|
cloud_event = Mock()
|
||||||
|
cloud_event.data = {
|
||||||
|
"message": {
|
||||||
|
"data": encoded_data
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return cloud_event
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_vision_client(self):
|
||||||
|
"""Mock Google Cloud Vision client"""
|
||||||
|
with patch('main.vision_client') as mock_client:
|
||||||
|
yield mock_client
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_firestore_client(self):
|
||||||
|
"""Mock Firestore client"""
|
||||||
|
with patch('main.firestore_client') as mock_client:
|
||||||
|
yield mock_client
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_storage_client(self):
|
||||||
|
"""Mock Cloud Storage client"""
|
||||||
|
with patch('main.storage_client') as mock_client:
|
||||||
|
yield mock_client
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_pinecone_index(self):
|
||||||
|
"""Mock Pinecone index"""
|
||||||
|
with patch('main.index') as mock_index:
|
||||||
|
yield mock_index
|
||||||
|
|
||||||
|
def test_message_decoding(self, mock_cloud_event):
|
||||||
|
"""Test that Pub/Sub message is correctly decoded"""
|
||||||
|
# This would test the message decoding logic
|
||||||
|
message_data = base64.b64decode(mock_cloud_event.data["message"]["data"]).decode('utf-8')
|
||||||
|
message = json.loads(message_data)
|
||||||
|
|
||||||
|
assert message["image_id"] == "test-image-123"
|
||||||
|
assert message["storage_path"] == "test-bucket/team-456/image.jpg"
|
||||||
|
assert message["team_id"] == "team-456"
|
||||||
|
assert message["retry_count"] == 0
|
||||||
|
assert message["task_type"] == "generate_embeddings"
|
||||||
|
|
||||||
|
def test_missing_required_fields(self):
|
||||||
|
"""Test handling of messages with missing required fields"""
|
||||||
|
# Test with missing image_id
|
||||||
|
message_data = {
|
||||||
|
"storage_path": "test-bucket/team-456/image.jpg",
|
||||||
|
"team_id": "team-456",
|
||||||
|
"retry_count": 0
|
||||||
|
}
|
||||||
|
|
||||||
|
encoded_data = base64.b64encode(json.dumps(message_data).encode('utf-8')).decode('utf-8')
|
||||||
|
|
||||||
|
cloud_event = Mock()
|
||||||
|
cloud_event.data = {
|
||||||
|
"message": {
|
||||||
|
"data": encoded_data
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# The function should handle this gracefully
|
||||||
|
message = json.loads(base64.b64decode(cloud_event.data["message"]["data"]).decode('utf-8'))
|
||||||
|
|
||||||
|
assert message.get('image_id') is None
|
||||||
|
assert message.get('storage_path') is not None
|
||||||
|
assert message.get('team_id') is not None
|
||||||
|
|
||||||
|
@patch('main.process_image')
|
||||||
|
@patch('main.update_image_status')
|
||||||
|
def test_successful_processing_flow(self, mock_update_status, mock_process_image, mock_cloud_event):
|
||||||
|
"""Test successful image processing flow"""
|
||||||
|
# Mock successful processing
|
||||||
|
mock_process_image.return_value = True
|
||||||
|
|
||||||
|
# Import and call the function
|
||||||
|
from main import process_image_embedding
|
||||||
|
|
||||||
|
# This would test the main function flow
|
||||||
|
# Since we can't easily test the actual function due to the decorator,
|
||||||
|
# we test the logic components
|
||||||
|
|
||||||
|
message_data = json.loads(base64.b64decode(mock_cloud_event.data["message"]["data"]).decode('utf-8'))
|
||||||
|
|
||||||
|
# Simulate the function logic
|
||||||
|
image_id = message_data.get('image_id')
|
||||||
|
storage_path = message_data.get('storage_path')
|
||||||
|
team_id = message_data.get('team_id')
|
||||||
|
retry_count = message_data.get('retry_count', 0)
|
||||||
|
|
||||||
|
# Update status to processing
|
||||||
|
mock_update_status.assert_not_called() # Not called yet
|
||||||
|
|
||||||
|
# Process image
|
||||||
|
success = mock_process_image(image_id, storage_path, team_id, retry_count)
|
||||||
|
assert success is True
|
||||||
|
|
||||||
|
@patch('main.process_image')
|
||||||
|
@patch('main.update_image_status')
|
||||||
|
def test_failed_processing_flow(self, mock_update_status, mock_process_image, mock_cloud_event):
|
||||||
|
"""Test failed image processing flow"""
|
||||||
|
# Mock failed processing
|
||||||
|
mock_process_image.return_value = False
|
||||||
|
|
||||||
|
message_data = json.loads(base64.b64decode(mock_cloud_event.data["message"]["data"]).decode('utf-8'))
|
||||||
|
|
||||||
|
# Simulate the function logic
|
||||||
|
image_id = message_data.get('image_id')
|
||||||
|
storage_path = message_data.get('storage_path')
|
||||||
|
team_id = message_data.get('team_id')
|
||||||
|
retry_count = message_data.get('retry_count', 0)
|
||||||
|
|
||||||
|
# Process image
|
||||||
|
success = mock_process_image(image_id, storage_path, team_id, retry_count)
|
||||||
|
assert success is False
|
||||||
|
|
||||||
|
|
||||||
|
class TestImageProcessingLogic:
|
||||||
|
"""Test cases for image processing logic"""
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_storage_setup(self):
|
||||||
|
"""Setup mock storage client and blob"""
|
||||||
|
with patch('main.storage_client') as mock_storage:
|
||||||
|
mock_bucket = Mock()
|
||||||
|
mock_blob = Mock()
|
||||||
|
|
||||||
|
mock_storage.bucket.return_value = mock_bucket
|
||||||
|
mock_bucket.blob.return_value = mock_blob
|
||||||
|
mock_blob.exists.return_value = True
|
||||||
|
mock_blob.download_as_bytes.return_value = b"fake_image_data"
|
||||||
|
|
||||||
|
yield mock_storage, mock_bucket, mock_blob
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_vision_response(self):
|
||||||
|
"""Mock Vision API response"""
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.error.message = ""
|
||||||
|
|
||||||
|
# Mock object annotations
|
||||||
|
mock_obj = Mock()
|
||||||
|
mock_obj.name = "person"
|
||||||
|
mock_obj.score = 0.95
|
||||||
|
|
||||||
|
# Mock bounding box
|
||||||
|
mock_vertex1 = Mock()
|
||||||
|
mock_vertex1.x = 0.1
|
||||||
|
mock_vertex1.y = 0.1
|
||||||
|
mock_vertex2 = Mock()
|
||||||
|
mock_vertex2.x = 0.9
|
||||||
|
mock_vertex2.y = 0.9
|
||||||
|
|
||||||
|
mock_obj.bounding_poly.normalized_vertices = [mock_vertex1, Mock(), mock_vertex2, Mock()]
|
||||||
|
mock_response.localized_object_annotations = [mock_obj]
|
||||||
|
|
||||||
|
return mock_response
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_label_response(self):
|
||||||
|
"""Mock Vision API label response"""
|
||||||
|
mock_response = Mock()
|
||||||
|
|
||||||
|
mock_label = Mock()
|
||||||
|
mock_label.description = "person"
|
||||||
|
mock_label.score = 0.98
|
||||||
|
|
||||||
|
mock_response.label_annotations = [mock_label]
|
||||||
|
return mock_response
|
||||||
|
|
||||||
|
@patch('main.generate_image_embeddings')
|
||||||
|
@patch('main.update_image_embedding_info')
|
||||||
|
def test_process_image_success(self, mock_update_embedding, mock_generate_embeddings, mock_storage_setup):
|
||||||
|
"""Test successful image processing"""
|
||||||
|
mock_storage, mock_bucket, mock_blob = mock_storage_setup
|
||||||
|
|
||||||
|
# Mock successful embedding generation
|
||||||
|
mock_embeddings = np.random.rand(512).astype(np.float32)
|
||||||
|
mock_generate_embeddings.return_value = mock_embeddings
|
||||||
|
|
||||||
|
# Mock Pinecone index
|
||||||
|
with patch('main.index') as mock_index:
|
||||||
|
mock_index.upsert = Mock()
|
||||||
|
|
||||||
|
from main import process_image
|
||||||
|
|
||||||
|
result = process_image("test-image-123", "test-bucket/path/image.jpg", "team-456", 0)
|
||||||
|
|
||||||
|
assert result is True
|
||||||
|
mock_generate_embeddings.assert_called_once()
|
||||||
|
mock_index.upsert.assert_called_once()
|
||||||
|
mock_update_embedding.assert_called_once()
|
||||||
|
|
||||||
|
def test_process_image_storage_not_found(self, mock_storage_setup):
|
||||||
|
"""Test processing when image not found in storage"""
|
||||||
|
mock_storage, mock_bucket, mock_blob = mock_storage_setup
|
||||||
|
mock_blob.exists.return_value = False
|
||||||
|
|
||||||
|
from main import process_image
|
||||||
|
|
||||||
|
result = process_image("test-image-123", "test-bucket/path/image.jpg", "team-456", 0)
|
||||||
|
|
||||||
|
assert result is False
|
||||||
|
|
||||||
|
@patch('main.generate_image_embeddings')
|
||||||
|
def test_process_image_embedding_generation_failed(self, mock_generate_embeddings, mock_storage_setup):
|
||||||
|
"""Test processing when embedding generation fails"""
|
||||||
|
mock_storage, mock_bucket, mock_blob = mock_storage_setup
|
||||||
|
mock_generate_embeddings.return_value = None
|
||||||
|
|
||||||
|
from main import process_image
|
||||||
|
|
||||||
|
result = process_image("test-image-123", "test-bucket/path/image.jpg", "team-456", 0)
|
||||||
|
|
||||||
|
assert result is False
|
||||||
|
|
||||||
|
@patch('main.vision_client')
|
||||||
|
def test_generate_image_embeddings_success(self, mock_vision_client, mock_vision_response, mock_label_response):
|
||||||
|
"""Test successful embedding generation"""
|
||||||
|
mock_vision_client.object_localization.return_value = mock_vision_response
|
||||||
|
mock_vision_client.label_detection.return_value = mock_label_response
|
||||||
|
|
||||||
|
from main import generate_image_embeddings
|
||||||
|
|
||||||
|
embeddings = generate_image_embeddings(b"fake_image_data")
|
||||||
|
|
||||||
|
assert embeddings is not None
|
||||||
|
assert isinstance(embeddings, np.ndarray)
|
||||||
|
assert embeddings.shape == (512,)
|
||||||
|
assert embeddings.dtype == np.float32
|
||||||
|
|
||||||
|
@patch('main.vision_client')
|
||||||
|
def test_generate_image_embeddings_vision_error(self, mock_vision_client):
|
||||||
|
"""Test embedding generation with Vision API error"""
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.error.message = "Vision API error"
|
||||||
|
mock_vision_client.object_localization.return_value = mock_response
|
||||||
|
|
||||||
|
from main import generate_image_embeddings
|
||||||
|
|
||||||
|
embeddings = generate_image_embeddings(b"fake_image_data")
|
||||||
|
|
||||||
|
assert embeddings is None
|
||||||
|
|
||||||
|
@patch('main.vision_client')
|
||||||
|
def test_generate_image_embeddings_exception(self, mock_vision_client):
|
||||||
|
"""Test embedding generation with exception"""
|
||||||
|
mock_vision_client.object_localization.side_effect = Exception("API error")
|
||||||
|
|
||||||
|
from main import generate_image_embeddings
|
||||||
|
|
||||||
|
embeddings = generate_image_embeddings(b"fake_image_data")
|
||||||
|
|
||||||
|
assert embeddings is None
|
||||||
|
|
||||||
|
|
||||||
|
class TestFirestoreUpdates:
|
||||||
|
"""Test cases for Firestore update operations"""
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_firestore_doc(self):
|
||||||
|
"""Mock Firestore document reference"""
|
||||||
|
with patch('main.firestore_client') as mock_client:
|
||||||
|
mock_doc = Mock()
|
||||||
|
mock_client.collection.return_value.document.return_value = mock_doc
|
||||||
|
yield mock_doc
|
||||||
|
|
||||||
|
def test_update_image_status_processing(self, mock_firestore_doc):
|
||||||
|
"""Test updating image status to processing"""
|
||||||
|
from main import update_image_status
|
||||||
|
|
||||||
|
update_image_status("test-image-123", "processing", 1)
|
||||||
|
|
||||||
|
mock_firestore_doc.update.assert_called_once()
|
||||||
|
call_args = mock_firestore_doc.update.call_args[0][0]
|
||||||
|
|
||||||
|
assert call_args["embedding_status"] == "processing"
|
||||||
|
assert call_args["embedding_retry_count"] == 1
|
||||||
|
assert "embedding_last_attempt" in call_args
|
||||||
|
|
||||||
|
def test_update_image_status_success(self, mock_firestore_doc):
|
||||||
|
"""Test updating image status to success"""
|
||||||
|
from main import update_image_status
|
||||||
|
|
||||||
|
update_image_status("test-image-123", "success", 2)
|
||||||
|
|
||||||
|
mock_firestore_doc.update.assert_called_once()
|
||||||
|
call_args = mock_firestore_doc.update.call_args[0][0]
|
||||||
|
|
||||||
|
assert call_args["embedding_status"] == "success"
|
||||||
|
assert call_args["has_embedding"] is True
|
||||||
|
assert call_args["embedding_error"] is None
|
||||||
|
|
||||||
|
def test_update_image_status_failed(self, mock_firestore_doc):
|
||||||
|
"""Test updating image status to failed"""
|
||||||
|
from main import update_image_status
|
||||||
|
|
||||||
|
update_image_status("test-image-123", "failed", 3, "Processing error")
|
||||||
|
|
||||||
|
mock_firestore_doc.update.assert_called_once()
|
||||||
|
call_args = mock_firestore_doc.update.call_args[0][0]
|
||||||
|
|
||||||
|
assert call_args["embedding_status"] == "failed"
|
||||||
|
assert call_args["embedding_error"] == "Processing error"
|
||||||
|
|
||||||
|
def test_update_image_embedding_info(self, mock_firestore_doc):
|
||||||
|
"""Test updating image embedding information"""
|
||||||
|
from main import update_image_embedding_info
|
||||||
|
|
||||||
|
update_image_embedding_info("test-image-123", "team-456_test-image-123", "google-vision-v1")
|
||||||
|
|
||||||
|
mock_firestore_doc.update.assert_called_once()
|
||||||
|
call_args = mock_firestore_doc.update.call_args[0][0]
|
||||||
|
|
||||||
|
assert call_args["embedding_id"] == "team-456_test-image-123"
|
||||||
|
assert call_args["embedding_model"] == "google-vision-v1"
|
||||||
|
assert call_args["has_embedding"] is True
|
||||||
|
|
||||||
|
def test_update_image_status_firestore_error(self, mock_firestore_doc):
|
||||||
|
"""Test handling Firestore update errors"""
|
||||||
|
mock_firestore_doc.update.side_effect = Exception("Firestore error")
|
||||||
|
|
||||||
|
from main import update_image_status
|
||||||
|
|
||||||
|
# Should not raise exception, just log error
|
||||||
|
update_image_status("test-image-123", "failed", 1, "Test error")
|
||||||
|
|
||||||
|
mock_firestore_doc.update.assert_called_once()
|
||||||
269
tests/services/test_pubsub_service.py
Normal file
269
tests/services/test_pubsub_service.py
Normal file
@ -0,0 +1,269 @@
|
|||||||
|
import pytest
|
||||||
|
import json
|
||||||
|
from unittest.mock import Mock, patch, AsyncMock
|
||||||
|
from google.cloud import pubsub_v1
|
||||||
|
from google.api_core.exceptions import GoogleAPIError
|
||||||
|
|
||||||
|
from src.services.pubsub_service import PubSubService, pubsub_service
|
||||||
|
|
||||||
|
|
||||||
|
class TestPubSubService:
|
||||||
|
"""Test cases for PubSubService"""
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_publisher(self):
|
||||||
|
"""Mock Pub/Sub publisher client"""
|
||||||
|
with patch('src.services.pubsub_service.pubsub_v1.PublisherClient') as mock_client:
|
||||||
|
mock_instance = Mock()
|
||||||
|
mock_client.return_value = mock_instance
|
||||||
|
mock_instance.topic_path.return_value = "projects/test-project/topics/test-topic"
|
||||||
|
yield mock_instance
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def pubsub_service_instance(self, mock_publisher):
|
||||||
|
"""Create a PubSubService instance with mocked dependencies"""
|
||||||
|
with patch('src.services.pubsub_service.settings') as mock_settings:
|
||||||
|
mock_settings.FIRESTORE_PROJECT_ID = "test-project"
|
||||||
|
mock_settings.PUBSUB_TOPIC = "test-topic"
|
||||||
|
|
||||||
|
service = PubSubService()
|
||||||
|
return service
|
||||||
|
|
||||||
|
def test_init_success(self, mock_publisher):
|
||||||
|
"""Test successful initialization of PubSubService"""
|
||||||
|
with patch('src.services.pubsub_service.settings') as mock_settings:
|
||||||
|
mock_settings.FIRESTORE_PROJECT_ID = "test-project"
|
||||||
|
mock_settings.PUBSUB_TOPIC = "test-topic"
|
||||||
|
|
||||||
|
service = PubSubService()
|
||||||
|
|
||||||
|
assert service.project_id == "test-project"
|
||||||
|
assert service.topic_name == "test-topic"
|
||||||
|
assert service.publisher is not None
|
||||||
|
assert service._topic_path == "projects/test-project/topics/test-topic"
|
||||||
|
|
||||||
|
def test_init_missing_config(self):
|
||||||
|
"""Test initialization with missing configuration"""
|
||||||
|
with patch('src.services.pubsub_service.settings') as mock_settings:
|
||||||
|
mock_settings.FIRESTORE_PROJECT_ID = ""
|
||||||
|
mock_settings.PUBSUB_TOPIC = ""
|
||||||
|
|
||||||
|
service = PubSubService()
|
||||||
|
|
||||||
|
assert service.publisher is None
|
||||||
|
assert service._topic_path is None
|
||||||
|
|
||||||
|
def test_init_client_error(self):
|
||||||
|
"""Test initialization with client creation error"""
|
||||||
|
with patch('src.services.pubsub_service.settings') as mock_settings:
|
||||||
|
mock_settings.FIRESTORE_PROJECT_ID = "test-project"
|
||||||
|
mock_settings.PUBSUB_TOPIC = "test-topic"
|
||||||
|
|
||||||
|
with patch('src.services.pubsub_service.pubsub_v1.PublisherClient', side_effect=Exception("Client error")):
|
||||||
|
service = PubSubService()
|
||||||
|
|
||||||
|
assert service.publisher is None
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_publish_image_processing_task_success(self, pubsub_service_instance):
|
||||||
|
"""Test successful publishing of image processing task"""
|
||||||
|
# Mock the future result
|
||||||
|
mock_future = Mock()
|
||||||
|
mock_future.result.return_value = "message-id-123"
|
||||||
|
|
||||||
|
pubsub_service_instance.publisher.publish.return_value = mock_future
|
||||||
|
|
||||||
|
result = await pubsub_service_instance.publish_image_processing_task(
|
||||||
|
image_id="test-image-id",
|
||||||
|
storage_path="bucket/path/to/image.jpg",
|
||||||
|
team_id="test-team-id",
|
||||||
|
retry_count=0
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result is True
|
||||||
|
|
||||||
|
# Verify publish was called with correct parameters
|
||||||
|
pubsub_service_instance.publisher.publish.assert_called_once()
|
||||||
|
call_args = pubsub_service_instance.publisher.publish.call_args
|
||||||
|
|
||||||
|
# Check topic path
|
||||||
|
assert call_args[0][0] == pubsub_service_instance._topic_path
|
||||||
|
|
||||||
|
# Check message data
|
||||||
|
message_data = json.loads(call_args[0][1].decode('utf-8'))
|
||||||
|
assert message_data["image_id"] == "test-image-id"
|
||||||
|
assert message_data["storage_path"] == "bucket/path/to/image.jpg"
|
||||||
|
assert message_data["team_id"] == "test-team-id"
|
||||||
|
assert message_data["retry_count"] == 0
|
||||||
|
assert message_data["task_type"] == "generate_embeddings"
|
||||||
|
|
||||||
|
# Check attributes
|
||||||
|
attributes = call_args[1]
|
||||||
|
assert attributes["task_type"] == "generate_embeddings"
|
||||||
|
assert attributes["image_id"] == "test-image-id"
|
||||||
|
assert attributes["retry_count"] == "0"
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_publish_image_processing_task_no_client(self):
|
||||||
|
"""Test publishing when client is not initialized"""
|
||||||
|
service = PubSubService()
|
||||||
|
service.publisher = None
|
||||||
|
service._topic_path = None
|
||||||
|
|
||||||
|
result = await service.publish_image_processing_task(
|
||||||
|
image_id="test-image-id",
|
||||||
|
storage_path="bucket/path/to/image.jpg",
|
||||||
|
team_id="test-team-id"
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result is False
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_publish_image_processing_task_publish_error(self, pubsub_service_instance):
|
||||||
|
"""Test publishing with publish error"""
|
||||||
|
pubsub_service_instance.publisher.publish.side_effect = GoogleAPIError("Publish failed")
|
||||||
|
|
||||||
|
result = await pubsub_service_instance.publish_image_processing_task(
|
||||||
|
image_id="test-image-id",
|
||||||
|
storage_path="bucket/path/to/image.jpg",
|
||||||
|
team_id="test-team-id"
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result is False
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_publish_image_processing_task_future_timeout(self, pubsub_service_instance):
|
||||||
|
"""Test publishing with future timeout"""
|
||||||
|
mock_future = Mock()
|
||||||
|
mock_future.result.side_effect = Exception("Timeout")
|
||||||
|
|
||||||
|
pubsub_service_instance.publisher.publish.return_value = mock_future
|
||||||
|
|
||||||
|
result = await pubsub_service_instance.publish_image_processing_task(
|
||||||
|
image_id="test-image-id",
|
||||||
|
storage_path="bucket/path/to/image.jpg",
|
||||||
|
team_id="test-team-id"
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result is False
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_publish_retry_task_success(self, pubsub_service_instance):
|
||||||
|
"""Test successful publishing of retry task"""
|
||||||
|
# Mock the future result
|
||||||
|
mock_future = Mock()
|
||||||
|
mock_future.result.return_value = "message-id-123"
|
||||||
|
|
||||||
|
pubsub_service_instance.publisher.publish.return_value = mock_future
|
||||||
|
|
||||||
|
result = await pubsub_service_instance.publish_retry_task(
|
||||||
|
image_id="test-image-id",
|
||||||
|
storage_path="bucket/path/to/image.jpg",
|
||||||
|
team_id="test-team-id",
|
||||||
|
retry_count=1,
|
||||||
|
error_message="Previous attempt failed"
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result is True
|
||||||
|
|
||||||
|
# Verify publish was called with incremented retry count
|
||||||
|
call_args = pubsub_service_instance.publisher.publish.call_args
|
||||||
|
message_data = json.loads(call_args[0][1].decode('utf-8'))
|
||||||
|
assert message_data["retry_count"] == 2
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_publish_retry_task_max_retries(self, pubsub_service_instance):
|
||||||
|
"""Test retry task when max retries reached"""
|
||||||
|
result = await pubsub_service_instance.publish_retry_task(
|
||||||
|
image_id="test-image-id",
|
||||||
|
storage_path="bucket/path/to/image.jpg",
|
||||||
|
team_id="test-team-id",
|
||||||
|
retry_count=3, # Max retries reached
|
||||||
|
error_message="Previous attempt failed"
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result is False
|
||||||
|
|
||||||
|
# Verify publish was not called
|
||||||
|
pubsub_service_instance.publisher.publish.assert_not_called()
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_publish_retry_task_with_retry_count_2(self, pubsub_service_instance):
|
||||||
|
"""Test retry task with retry count 2 (should still work)"""
|
||||||
|
# Mock the future result
|
||||||
|
mock_future = Mock()
|
||||||
|
mock_future.result.return_value = "message-id-123"
|
||||||
|
|
||||||
|
pubsub_service_instance.publisher.publish.return_value = mock_future
|
||||||
|
|
||||||
|
result = await pubsub_service_instance.publish_retry_task(
|
||||||
|
image_id="test-image-id",
|
||||||
|
storage_path="bucket/path/to/image.jpg",
|
||||||
|
team_id="test-team-id",
|
||||||
|
retry_count=2,
|
||||||
|
error_message="Previous attempt failed"
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result is True
|
||||||
|
|
||||||
|
# Verify publish was called with retry count 3
|
||||||
|
call_args = pubsub_service_instance.publisher.publish.call_args
|
||||||
|
message_data = json.loads(call_args[0][1].decode('utf-8'))
|
||||||
|
assert message_data["retry_count"] == 3
|
||||||
|
|
||||||
|
def test_singleton_service(self):
|
||||||
|
"""Test that pubsub_service is properly initialized"""
|
||||||
|
# This tests the module-level singleton
|
||||||
|
assert pubsub_service is not None
|
||||||
|
assert isinstance(pubsub_service, PubSubService)
|
||||||
|
|
||||||
|
|
||||||
|
class TestPubSubServiceIntegration:
|
||||||
|
"""Integration tests for PubSubService"""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_message_format_compatibility(self):
|
||||||
|
"""Test that message format is compatible with Cloud Function expectations"""
|
||||||
|
with patch('src.services.pubsub_service.settings') as mock_settings:
|
||||||
|
mock_settings.FIRESTORE_PROJECT_ID = "test-project"
|
||||||
|
mock_settings.PUBSUB_TOPIC = "test-topic"
|
||||||
|
|
||||||
|
with patch('src.services.pubsub_service.pubsub_v1.PublisherClient') as mock_client:
|
||||||
|
mock_instance = Mock()
|
||||||
|
mock_client.return_value = mock_instance
|
||||||
|
mock_instance.topic_path.return_value = "projects/test-project/topics/test-topic"
|
||||||
|
|
||||||
|
mock_future = Mock()
|
||||||
|
mock_future.result.return_value = "message-id-123"
|
||||||
|
mock_instance.publish.return_value = mock_future
|
||||||
|
|
||||||
|
service = PubSubService()
|
||||||
|
|
||||||
|
await service.publish_image_processing_task(
|
||||||
|
image_id="67890abcdef123456789",
|
||||||
|
storage_path="my-bucket/team-123/image.jpg",
|
||||||
|
team_id="team-123",
|
||||||
|
retry_count=1
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify the message structure matches what Cloud Function expects
|
||||||
|
call_args = mock_instance.publish.call_args
|
||||||
|
message_data = json.loads(call_args[0][1].decode('utf-8'))
|
||||||
|
|
||||||
|
# Required fields for Cloud Function
|
||||||
|
required_fields = ["image_id", "storage_path", "team_id", "retry_count", "task_type"]
|
||||||
|
for field in required_fields:
|
||||||
|
assert field in message_data
|
||||||
|
|
||||||
|
# Verify data types
|
||||||
|
assert isinstance(message_data["image_id"], str)
|
||||||
|
assert isinstance(message_data["storage_path"], str)
|
||||||
|
assert isinstance(message_data["team_id"], str)
|
||||||
|
assert isinstance(message_data["retry_count"], int)
|
||||||
|
assert isinstance(message_data["task_type"], str)
|
||||||
|
|
||||||
|
# Verify attributes
|
||||||
|
attributes = call_args[1]
|
||||||
|
assert "task_type" in attributes
|
||||||
|
assert "image_id" in attributes
|
||||||
|
assert "retry_count" in attributes
|
||||||
Loading…
x
Reference in New Issue
Block a user