mirror of
https://github.com/ferdzo/iotDashboard.git
synced 2026-04-05 17:16:26 +00:00
Migration to UV, introducing db_write, mqtt_ingestion, db_migrations, half working prototype.
This commit is contained in:
26
services/db_write/.env.example
Normal file
26
services/db_write/.env.example
Normal file
@@ -0,0 +1,26 @@
|
||||
# Redis Configuration
|
||||
REDIS_HOST=localhost
|
||||
REDIS_PORT=6379
|
||||
REDIS_DB=0
|
||||
REDIS_PASSWORD=
|
||||
|
||||
# Database Configuration (PostgreSQL/TimescaleDB)
|
||||
DATABASE_URL=postgresql://user:password@localhost:5432/iot_dashboard
|
||||
|
||||
# Consumer Configuration
|
||||
CONSUMER_GROUP_NAME=db_writer
|
||||
CONSUMER_NAME=worker-01
|
||||
BATCH_SIZE=100
|
||||
BATCH_TIMEOUT_SEC=5
|
||||
PROCESSING_INTERVAL_SEC=1
|
||||
|
||||
# Stream Configuration
|
||||
STREAM_PATTERN=mqtt_stream:*
|
||||
DEAD_LETTER_STREAM=mqtt_stream:failed
|
||||
|
||||
# Database Table Configuration
|
||||
TABLE_NAME=sensor_readings
|
||||
ENABLE_TIMESCALE=false
|
||||
|
||||
# Logging Configuration
|
||||
LOG_LEVEL=INFO
|
||||
54
services/db_write/.gitignore
vendored
Normal file
54
services/db_write/.gitignore
vendored
Normal file
@@ -0,0 +1,54 @@
|
||||
# Python
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
*.so
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
|
||||
# Virtual environments
|
||||
.venv/
|
||||
venv/
|
||||
ENV/
|
||||
env/
|
||||
|
||||
# Environment variables
|
||||
.env
|
||||
.env.local
|
||||
|
||||
# IDE
|
||||
.vscode/
|
||||
.idea/
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
|
||||
# Logs
|
||||
*.log
|
||||
|
||||
# Database
|
||||
*.db
|
||||
*.sqlite
|
||||
*.sqlite3
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# uv
|
||||
.uv/
|
||||
uv.lock
|
||||
1
services/db_write/.python-version
Normal file
1
services/db_write/.python-version
Normal file
@@ -0,0 +1 @@
|
||||
3.13
|
||||
406
services/db_write/README.md
Normal file
406
services/db_write/README.md
Normal file
@@ -0,0 +1,406 @@
|
||||
# Database Writer Service
|
||||
|
||||
A robust, production-ready service that reads sensor data from Redis streams and writes it to PostgreSQL/TimescaleDB. Part of the IoT Dashboard project.
|
||||
|
||||
## Features
|
||||
|
||||
- ✅ **Reliable consumption** from Redis streams using consumer groups
|
||||
- ✅ **Batch processing** for high throughput
|
||||
- ✅ **At-least-once delivery** with message acknowledgments
|
||||
- ✅ **Dead letter queue** for failed messages
|
||||
- ✅ **Connection pooling** for database efficiency
|
||||
- ✅ **Graceful shutdown** handling
|
||||
- ✅ **Flexible schema** that adapts to changes
|
||||
- ✅ **Structured logging** with JSON output
|
||||
- ✅ **Health checks** for monitoring
|
||||
- ✅ **TimescaleDB support** for time-series optimization
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
Redis Streams → Consumer Group → Transform → Database → Acknowledge
|
||||
↓
|
||||
Failed messages
|
||||
↓
|
||||
Dead Letter Queue
|
||||
```
|
||||
|
||||
### Components
|
||||
|
||||
- **`main.py`**: Service orchestration and processing loop
|
||||
- **`redis_reader.py`**: Redis stream consumer with fault tolerance
|
||||
- **`db_writer.py`**: Database operations with connection pooling
|
||||
- **`schema.py`**: Data transformation and validation
|
||||
- **`config.py`**: Configuration management
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- Python 3.13+
|
||||
- [uv](https://github.com/astral-sh/uv) package manager
|
||||
- Redis server with streams
|
||||
- PostgreSQL or TimescaleDB
|
||||
|
||||
### Installation
|
||||
|
||||
1. **Navigate to the service directory**:
|
||||
```bash
|
||||
cd services/db_write
|
||||
```
|
||||
|
||||
2. **Copy and configure environment variables**:
|
||||
```bash
|
||||
cp .env.example .env
|
||||
# Edit .env with your DATABASE_URL and other settings
|
||||
```
|
||||
|
||||
3. **Install dependencies**:
|
||||
```bash
|
||||
uv sync
|
||||
```
|
||||
|
||||
4. **Setup database schema** (IMPORTANT - do this before running):
|
||||
```bash
|
||||
# Review the schema in models.py first
|
||||
cat models.py
|
||||
|
||||
# Create initial migration
|
||||
chmod +x migrate.sh
|
||||
./migrate.sh create "initial schema"
|
||||
|
||||
# Review the generated migration
|
||||
ls -lt alembic/versions/
|
||||
|
||||
# Apply migrations
|
||||
./migrate.sh upgrade
|
||||
```
|
||||
|
||||
5. **Run the service**:
|
||||
```bash
|
||||
uv run main.py
|
||||
```
|
||||
|
||||
Or use the standalone script:
|
||||
```bash
|
||||
chmod +x run-standalone.sh
|
||||
./run-standalone.sh
|
||||
```
|
||||
|
||||
### ⚠️ Important: Schema Management
|
||||
|
||||
This service uses **Alembic** for database migrations. The service will NOT create tables automatically.
|
||||
|
||||
- Schema is defined in `models.py`
|
||||
- Migrations are managed with `./migrate.sh` or `alembic` commands
|
||||
- See `SCHEMA_MANAGEMENT.md` for detailed guide
|
||||
|
||||
## Schema Management
|
||||
|
||||
This service uses **SQLAlchemy** for models and **Alembic** for migrations.
|
||||
|
||||
### Key Files
|
||||
|
||||
- **`models.py`**: Define your database schema here (SQLAlchemy models)
|
||||
- **`alembic/`**: Migration scripts directory
|
||||
- **`migrate.sh`**: Helper script for common migration tasks
|
||||
- **`SCHEMA_MANAGEMENT.md`**: Comprehensive migration guide
|
||||
|
||||
### Quick Migration Commands
|
||||
|
||||
```bash
|
||||
# Create a new migration after editing models.py
|
||||
./migrate.sh create "add new column"
|
||||
|
||||
# Apply pending migrations
|
||||
./migrate.sh upgrade
|
||||
|
||||
# Check migration status
|
||||
./migrate.sh check
|
||||
|
||||
# View migration history
|
||||
./migrate.sh history
|
||||
|
||||
# Rollback last migration
|
||||
./migrate.sh downgrade 1
|
||||
```
|
||||
|
||||
**See `SCHEMA_MANAGEMENT.md` for detailed documentation.**
|
||||
|
||||
## Configuration
|
||||
|
||||
All configuration is done via environment variables. See `.env.example` for all available options.
|
||||
|
||||
### Required Settings
|
||||
|
||||
```bash
|
||||
# Redis connection
|
||||
REDIS_HOST=localhost
|
||||
REDIS_PORT=6379
|
||||
|
||||
# Database connection
|
||||
DATABASE_URL=postgresql://user:password@localhost:5432/iot_dashboard
|
||||
```
|
||||
|
||||
### Optional Settings
|
||||
|
||||
```bash
|
||||
# Consumer configuration
|
||||
CONSUMER_GROUP_NAME=db_writer # Consumer group name
|
||||
CONSUMER_NAME=worker-01 # Unique consumer name
|
||||
BATCH_SIZE=100 # Messages per batch
|
||||
BATCH_TIMEOUT_SEC=5 # Read timeout
|
||||
PROCESSING_INTERVAL_SEC=1 # Delay between batches
|
||||
|
||||
# Stream configuration
|
||||
STREAM_PATTERN=mqtt_stream:* # Stream name pattern
|
||||
DEAD_LETTER_STREAM=mqtt_stream:failed
|
||||
|
||||
# Database
|
||||
TABLE_NAME=sensor_readings # Target table name
|
||||
ENABLE_TIMESCALE=false # Use TimescaleDB features
|
||||
|
||||
# Logging
|
||||
LOG_LEVEL=INFO # DEBUG, INFO, WARNING, ERROR
|
||||
LOG_FORMAT=json # json or console
|
||||
```
|
||||
|
||||
## Data Flow
|
||||
|
||||
### Input (Redis Streams)
|
||||
|
||||
The service reads from Redis streams with the format:
|
||||
```
|
||||
mqtt_stream:{device_id}:{sensor_type}
|
||||
```
|
||||
|
||||
Each message contains:
|
||||
```
|
||||
{
|
||||
"value": "23.5",
|
||||
"timestamp": "2023-10-18T14:30:00Z",
|
||||
"metadata": "{...}" (optional)
|
||||
}
|
||||
```
|
||||
|
||||
### Output (Database)
|
||||
|
||||
Data is written to the `sensor_readings` table:
|
||||
|
||||
```sql
|
||||
CREATE TABLE sensor_readings (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
timestamp TIMESTAMPTZ NOT NULL,
|
||||
device_id VARCHAR(100) NOT NULL,
|
||||
sensor_type VARCHAR(100) NOT NULL,
|
||||
value DOUBLE PRECISION NOT NULL,
|
||||
metadata JSONB,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
```
|
||||
|
||||
**Note**: The table is automatically created if it doesn't exist.
|
||||
|
||||
## Running with Docker
|
||||
|
||||
### Build the image
|
||||
|
||||
```bash
|
||||
docker build -t db-writer:latest .
|
||||
```
|
||||
|
||||
### Run the container
|
||||
|
||||
```bash
|
||||
docker run -d \
|
||||
--name db-writer \
|
||||
-e REDIS_HOST=redis \
|
||||
-e DATABASE_URL=postgresql://user:pass@postgres:5432/iot \
|
||||
db-writer:latest
|
||||
```
|
||||
|
||||
## Consumer Groups
|
||||
|
||||
The service uses Redis consumer groups for reliable, distributed processing:
|
||||
|
||||
- **Multiple instances**: Run multiple workers for load balancing
|
||||
- **Fault tolerance**: Messages are not lost if a consumer crashes
|
||||
- **Acknowledgments**: Messages are only removed after successful processing
|
||||
- **Pending messages**: Unacknowledged messages can be reclaimed
|
||||
|
||||
### Running Multiple Workers
|
||||
|
||||
```bash
|
||||
# Terminal 1
|
||||
CONSUMER_NAME=worker-01 uv run main.py
|
||||
|
||||
# Terminal 2
|
||||
CONSUMER_NAME=worker-02 uv run main.py
|
||||
```
|
||||
|
||||
All workers in the same consumer group will share the load.
|
||||
|
||||
## Error Handling
|
||||
|
||||
### Dead Letter Queue
|
||||
|
||||
Failed messages are sent to the dead letter stream (`mqtt_stream:failed`) with error information:
|
||||
|
||||
```
|
||||
{
|
||||
"original_stream": "mqtt_stream:esp32:temperature",
|
||||
"original_id": "1634567890123-0",
|
||||
"device_id": "esp32",
|
||||
"sensor_type": "temperature",
|
||||
"value": "23.5",
|
||||
"error": "Database connection failed",
|
||||
"failed_at": "1634567890.123"
|
||||
}
|
||||
```
|
||||
|
||||
### Retry Strategy
|
||||
|
||||
- **Transient errors**: Automatic retry with backoff
|
||||
- **Data errors**: Immediate send to DLQ
|
||||
- **Connection errors**: Reconnection attempts
|
||||
|
||||
## Monitoring
|
||||
|
||||
### Health Checks
|
||||
|
||||
Check service health programmatically:
|
||||
|
||||
```python
|
||||
from main import DatabaseWriterService
|
||||
|
||||
service = DatabaseWriterService()
|
||||
health = service.health_check()
|
||||
print(health)
|
||||
# {
|
||||
# 'running': True,
|
||||
# 'redis': True,
|
||||
# 'database': True,
|
||||
# 'stats': {...}
|
||||
# }
|
||||
```
|
||||
|
||||
### Logs
|
||||
|
||||
The service outputs structured logs:
|
||||
|
||||
```json
|
||||
{
|
||||
"event": "Processed batch",
|
||||
"rows_written": 100,
|
||||
"messages_acknowledged": 100,
|
||||
"timestamp": "2023-10-18T14:30:00Z",
|
||||
"level": "info"
|
||||
}
|
||||
```
|
||||
|
||||
### Statistics
|
||||
|
||||
Runtime statistics are tracked:
|
||||
- `messages_read`: Total messages consumed
|
||||
- `messages_written`: Total rows inserted
|
||||
- `messages_failed`: Failed messages sent to DLQ
|
||||
- `batches_processed`: Number of successful batches
|
||||
- `errors`: Total errors encountered
|
||||
|
||||
## Development
|
||||
|
||||
### Project Structure
|
||||
|
||||
```
|
||||
db_write/
|
||||
├── config.py # Configuration management
|
||||
├── db_writer.py # Database operations
|
||||
├── redis_reader.py # Redis stream consumer
|
||||
├── schema.py # Data models and transformation
|
||||
├── main.py # Service entry point
|
||||
├── pyproject.toml # Dependencies
|
||||
├── .env.example # Configuration template
|
||||
└── README.md # This file
|
||||
```
|
||||
|
||||
### Adding Dependencies
|
||||
|
||||
```bash
|
||||
uv add package-name
|
||||
```
|
||||
|
||||
### Running Tests
|
||||
|
||||
```bash
|
||||
uv run pytest
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Service won't start
|
||||
|
||||
1. **Check configuration**: Verify all required environment variables are set
|
||||
2. **Test connections**: Ensure Redis and PostgreSQL are accessible
|
||||
3. **Check logs**: Look for specific error messages
|
||||
|
||||
### No messages being processed
|
||||
|
||||
1. **Check streams exist**: `redis-cli KEYS "mqtt_stream:*"`
|
||||
2. **Verify consumer group**: The service creates it automatically, but check Redis logs
|
||||
3. **Check stream pattern**: Ensure `STREAM_PATTERN` matches your stream names
|
||||
|
||||
### Messages going to dead letter queue
|
||||
|
||||
1. **Check DLQ**: `redis-cli XRANGE mqtt_stream:failed - + COUNT 10`
|
||||
2. **Review error messages**: Each DLQ entry contains the error reason
|
||||
3. **Validate data format**: Ensure messages match expected schema
|
||||
|
||||
### High memory usage
|
||||
|
||||
1. **Reduce batch size**: Lower `BATCH_SIZE` in configuration
|
||||
2. **Check connection pool**: May need to adjust pool size
|
||||
3. **Monitor pending messages**: Use `XPENDING` to check backlog
|
||||
|
||||
## Performance Tuning
|
||||
|
||||
### Throughput Optimization
|
||||
|
||||
- **Increase batch size**: Process more messages per batch
|
||||
- **Multiple workers**: Run multiple consumer instances
|
||||
- **Connection pooling**: Adjust pool size based on load
|
||||
- **Processing interval**: Reduce delay between batches
|
||||
|
||||
### Latency Optimization
|
||||
|
||||
- **Decrease batch size**: Process smaller batches more frequently
|
||||
- **Reduce timeout**: Lower `BATCH_TIMEOUT_SEC`
|
||||
- **Single worker**: Avoid consumer group coordination overhead
|
||||
|
||||
## Production Deployment
|
||||
|
||||
### Recommended Settings
|
||||
|
||||
```bash
|
||||
BATCH_SIZE=500
|
||||
PROCESSING_INTERVAL_SEC=0.1
|
||||
LOG_LEVEL=INFO
|
||||
LOG_FORMAT=json
|
||||
ENABLE_TIMESCALE=true
|
||||
```
|
||||
|
||||
### Monitoring
|
||||
|
||||
- Monitor consumer lag using Redis `XPENDING`
|
||||
- Track database insert latency
|
||||
- Alert on error rate > 5%
|
||||
- Monitor DLQ depth
|
||||
|
||||
### Scaling
|
||||
|
||||
1. **Horizontal**: Add more consumer instances with unique `CONSUMER_NAME`
|
||||
2. **Vertical**: Increase resources for database writes
|
||||
3. **Database**: Use TimescaleDB for better time-series performance
|
||||
|
||||
## License
|
||||
|
||||
Part of the IoT Dashboard project.
|
||||
140
services/db_write/config.py
Normal file
140
services/db_write/config.py
Normal file
@@ -0,0 +1,140 @@
|
||||
"""
|
||||
Configuration management for the database writer service.
|
||||
Loads settings from environment variables with sensible defaults.
|
||||
"""
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
import dotenv
|
||||
|
||||
dotenv.load_dotenv()
|
||||
|
||||
|
||||
@dataclass
|
||||
class RedisConfig:
|
||||
"""Redis connection configuration"""
|
||||
host: str
|
||||
port: int = 6379
|
||||
db: int = 0
|
||||
password: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class DatabaseConfig:
|
||||
"""Database connection configuration"""
|
||||
url: Optional[str] = None
|
||||
host: Optional[str] = None
|
||||
port: int = 5432
|
||||
name: Optional[str] = None
|
||||
user: Optional[str] = None
|
||||
password: Optional[str] = None
|
||||
table_name: str = "sensor_readings"
|
||||
enable_timescale: bool = False
|
||||
|
||||
def get_connection_string(self) -> str:
|
||||
"""Build connection string from components or return URL"""
|
||||
if self.url:
|
||||
return self.url
|
||||
|
||||
if not all([self.host, self.name, self.user, self.password]):
|
||||
raise ValueError("Either DATABASE_URL or all DB_* variables must be set")
|
||||
|
||||
return f"postgresql://{self.user}:{self.password}@{self.host}:{self.port}/{self.name}"
|
||||
|
||||
|
||||
@dataclass
|
||||
class ConsumerConfig:
|
||||
"""Redis consumer group configuration"""
|
||||
group_name: str = "db_writer"
|
||||
consumer_name: str = "worker-01"
|
||||
batch_size: int = 100
|
||||
batch_timeout_sec: int = 5
|
||||
processing_interval_sec: float = 1.0
|
||||
block_time_ms: int = 5000
|
||||
|
||||
|
||||
@dataclass
|
||||
class StreamConfig:
|
||||
"""Redis stream configuration"""
|
||||
pattern: str = "mqtt_stream:*"
|
||||
dead_letter_stream: str = "mqtt_stream:failed"
|
||||
max_retries: int = 3
|
||||
trim_maxlen: int = 10000 # Keep last N messages in each stream
|
||||
|
||||
|
||||
@dataclass
|
||||
class LogConfig:
|
||||
"""Logging configuration"""
|
||||
level: str = "INFO"
|
||||
format: str = "json" # json or console
|
||||
|
||||
|
||||
class Config:
|
||||
"""Main configuration class"""
|
||||
|
||||
def __init__(self):
|
||||
self.redis = RedisConfig(
|
||||
host=os.getenv('REDIS_HOST', 'localhost'),
|
||||
port=int(os.getenv('REDIS_PORT', 6379)),
|
||||
db=int(os.getenv('REDIS_DB', 0)),
|
||||
password=os.getenv('REDIS_PASSWORD', None) or None
|
||||
)
|
||||
|
||||
self.database = DatabaseConfig(
|
||||
url=os.getenv('DATABASE_URL', None),
|
||||
host=os.getenv('DB_HOST', None),
|
||||
port=int(os.getenv('DB_PORT', 5432)),
|
||||
name=os.getenv('DB_NAME', None),
|
||||
user=os.getenv('DB_USER', None),
|
||||
password=os.getenv('DB_PASSWORD', None),
|
||||
table_name=os.getenv('TABLE_NAME', 'sensor_readings'),
|
||||
enable_timescale=os.getenv('ENABLE_TIMESCALE', 'false').lower() == 'true'
|
||||
)
|
||||
|
||||
self.consumer = ConsumerConfig(
|
||||
group_name=os.getenv('CONSUMER_GROUP_NAME', 'db_writer'),
|
||||
consumer_name=os.getenv('CONSUMER_NAME', 'worker-01'),
|
||||
batch_size=int(os.getenv('BATCH_SIZE', 100)),
|
||||
batch_timeout_sec=int(os.getenv('BATCH_TIMEOUT_SEC', 5)),
|
||||
processing_interval_sec=float(os.getenv('PROCESSING_INTERVAL_SEC', 1.0)),
|
||||
block_time_ms=int(os.getenv('BLOCK_TIME_MS', 5000))
|
||||
)
|
||||
|
||||
self.stream = StreamConfig(
|
||||
pattern=os.getenv('STREAM_PATTERN', 'mqtt_stream:*'),
|
||||
dead_letter_stream=os.getenv('DEAD_LETTER_STREAM', 'mqtt_stream:failed'),
|
||||
max_retries=int(os.getenv('MAX_RETRIES', 3)),
|
||||
trim_maxlen=int(os.getenv('TRIM_MAXLEN', 10000))
|
||||
)
|
||||
|
||||
self.log = LogConfig(
|
||||
level=os.getenv('LOG_LEVEL', 'INFO'),
|
||||
format=os.getenv('LOG_FORMAT', 'json')
|
||||
)
|
||||
|
||||
def validate(self):
|
||||
"""Validate configuration"""
|
||||
errors = []
|
||||
|
||||
# Validate Redis config
|
||||
if not self.redis.host:
|
||||
errors.append("REDIS_HOST is required")
|
||||
|
||||
# Validate database config
|
||||
try:
|
||||
self.database.get_connection_string()
|
||||
except ValueError as e:
|
||||
errors.append(str(e))
|
||||
|
||||
# Validate consumer config
|
||||
if self.consumer.batch_size < 1:
|
||||
errors.append("BATCH_SIZE must be >= 1")
|
||||
|
||||
if errors:
|
||||
raise ValueError(f"Configuration errors: {', '.join(errors)}")
|
||||
|
||||
return True
|
||||
|
||||
|
||||
# Global config instance
|
||||
config = Config()
|
||||
91
services/db_write/db_writer.py
Normal file
91
services/db_write/db_writer.py
Normal file
@@ -0,0 +1,91 @@
|
||||
import logging
|
||||
from typing import List
|
||||
from sqlalchemy import create_engine, select
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from sqlalchemy.pool import QueuePool
|
||||
|
||||
from config import config
|
||||
from schema import SensorReading
|
||||
from models import SensorReading as SensorReadingModel
|
||||
|
||||
|
||||
class DatabaseWriter:
|
||||
"""
|
||||
Database writer using SQLAlchemy.
|
||||
|
||||
Schema is defined in models.py and should be managed using Alembic migrations.
|
||||
This class only handles data insertion, NOT schema creation.
|
||||
|
||||
To manage schema:
|
||||
1. Edit models.py to define your schema
|
||||
2. Generate migration: alembic revision --autogenerate -m "description"
|
||||
3. Apply migration: alembic upgrade head
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
# Initialize SQLAlchemy engine with connection pooling
|
||||
connection_string = config.database.get_connection_string()
|
||||
|
||||
self.engine = create_engine(
|
||||
connection_string,
|
||||
poolclass=QueuePool,
|
||||
pool_size=5,
|
||||
max_overflow=10,
|
||||
pool_pre_ping=True
|
||||
)
|
||||
|
||||
# Create session factory
|
||||
self.SessionLocal = sessionmaker(bind=self.engine)
|
||||
|
||||
self.logger.info("Database writer initialized")
|
||||
|
||||
def write_batch(self, readings: List[SensorReading]) -> bool:
|
||||
"""Write a batch of sensor readings to the database"""
|
||||
if not readings:
|
||||
return True
|
||||
|
||||
session = self.SessionLocal()
|
||||
try:
|
||||
# Convert to database objects
|
||||
db_objects = [
|
||||
SensorReadingModel(
|
||||
timestamp=reading.timestamp,
|
||||
device_id=reading.device_id,
|
||||
sensor_type=reading.sensor_type,
|
||||
value=reading.value,
|
||||
metadata=reading.metadata
|
||||
)
|
||||
for reading in readings
|
||||
]
|
||||
|
||||
# Bulk insert
|
||||
session.bulk_save_objects(db_objects)
|
||||
session.commit()
|
||||
|
||||
self.logger.debug(f"Wrote {len(readings)} readings to database")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to write batch: {e}")
|
||||
session.rollback()
|
||||
return False
|
||||
finally:
|
||||
session.close()
|
||||
|
||||
def health_check(self) -> bool:
|
||||
"""Check if database connection is healthy"""
|
||||
try:
|
||||
with self.engine.connect() as conn:
|
||||
result = conn.execute(select(1))
|
||||
return result.fetchone()[0] == 1
|
||||
except Exception as e:
|
||||
self.logger.error(f"Database health check failed: {e}")
|
||||
return False
|
||||
|
||||
def close(self):
|
||||
"""Close database engine and all connections"""
|
||||
if hasattr(self, 'engine') and self.engine:
|
||||
self.engine.dispose()
|
||||
self.logger.info("Database engine closed")
|
||||
281
services/db_write/main.py
Normal file
281
services/db_write/main.py
Normal file
@@ -0,0 +1,281 @@
|
||||
"""
|
||||
Main entry point for the database writer service.
|
||||
Orchestrates the read → transform → write cycle with error handling.
|
||||
"""
|
||||
import logging
|
||||
import signal
|
||||
import sys
|
||||
import time
|
||||
import structlog
|
||||
from typing import List
|
||||
|
||||
from config import config
|
||||
from redis_reader import RedisReader
|
||||
from db_writer import DatabaseWriter
|
||||
from schema import SchemaHandler, StreamMessage, SensorReading
|
||||
|
||||
|
||||
def configure_logging():
|
||||
"""Configure structured logging"""
|
||||
if config.log.format == 'json':
|
||||
structlog.configure(
|
||||
processors=[
|
||||
structlog.stdlib.filter_by_level,
|
||||
structlog.stdlib.add_logger_name,
|
||||
structlog.stdlib.add_log_level,
|
||||
structlog.stdlib.PositionalArgumentsFormatter(),
|
||||
structlog.processors.TimeStamper(fmt="iso"),
|
||||
structlog.processors.StackInfoRenderer(),
|
||||
structlog.processors.format_exc_info,
|
||||
structlog.processors.JSONRenderer()
|
||||
],
|
||||
wrapper_class=structlog.stdlib.BoundLogger,
|
||||
context_class=dict,
|
||||
logger_factory=structlog.stdlib.LoggerFactory(),
|
||||
cache_logger_on_first_use=True,
|
||||
)
|
||||
|
||||
# Configure standard logging
|
||||
logging.basicConfig(
|
||||
level=getattr(logging, config.log.level.upper(), logging.INFO),
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
|
||||
|
||||
class DatabaseWriterService:
|
||||
"""Main service class that orchestrates the data pipeline"""
|
||||
|
||||
def __init__(self):
|
||||
self.running = False
|
||||
self.redis_reader: RedisReader = None
|
||||
self.db_writer: DatabaseWriter = None
|
||||
self.schema_handler: SchemaHandler = None
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
# Setup signal handlers for graceful shutdown
|
||||
signal.signal(signal.SIGTERM, self._signal_handler)
|
||||
signal.signal(signal.SIGINT, self._signal_handler)
|
||||
|
||||
# Statistics
|
||||
self.stats = {
|
||||
'messages_read': 0,
|
||||
'messages_written': 0,
|
||||
'messages_failed': 0,
|
||||
'batches_processed': 0,
|
||||
'errors': 0
|
||||
}
|
||||
|
||||
def _signal_handler(self, signum, frame):
|
||||
"""Handle shutdown signals"""
|
||||
self.logger.info(f"Received signal {signum}, initiating graceful shutdown...")
|
||||
self.stop()
|
||||
|
||||
def start(self) -> bool:
|
||||
"""Start the service"""
|
||||
self.logger.info("Starting Database Writer Service...")
|
||||
|
||||
try:
|
||||
# Validate configuration
|
||||
config.validate()
|
||||
self.logger.info("Configuration validated successfully")
|
||||
|
||||
# Initialize components
|
||||
self.schema_handler = SchemaHandler()
|
||||
self.logger.info("Schema handler initialized")
|
||||
|
||||
self.redis_reader = RedisReader()
|
||||
self.logger.info("Redis reader initialized")
|
||||
|
||||
self.db_writer = DatabaseWriter()
|
||||
self.logger.info("Database writer initialized")
|
||||
|
||||
# Start the processing loop
|
||||
self.running = True
|
||||
self.logger.info("Service started successfully, entering processing loop")
|
||||
|
||||
self._processing_loop()
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Service startup failed: {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
def _processing_loop(self):
|
||||
"""Main processing loop"""
|
||||
consecutive_errors = 0
|
||||
max_consecutive_errors = 5
|
||||
|
||||
while self.running:
|
||||
try:
|
||||
# Read batch from Redis
|
||||
messages = self.redis_reader.read_batch()
|
||||
|
||||
if not messages:
|
||||
# No messages, sleep briefly
|
||||
time.sleep(config.consumer.processing_interval_sec)
|
||||
continue
|
||||
|
||||
self.stats['messages_read'] += len(messages)
|
||||
self.logger.debug(f"Read {len(messages)} messages from Redis")
|
||||
|
||||
# Transform messages to sensor readings
|
||||
readings = self._transform_messages(messages)
|
||||
|
||||
if not readings:
|
||||
self.logger.warning("No valid readings after transformation")
|
||||
# Acknowledge the messages anyway (they were invalid)
|
||||
self.redis_reader.acknowledge_batch(messages)
|
||||
continue
|
||||
|
||||
# Write to database
|
||||
result = self.db_writer.write_batch(readings)
|
||||
|
||||
if result.success:
|
||||
# Successfully written, acknowledge the messages
|
||||
ack_count = self.redis_reader.acknowledge_batch(messages)
|
||||
self.stats['messages_written'] += result.rows_written
|
||||
self.stats['batches_processed'] += 1
|
||||
consecutive_errors = 0
|
||||
|
||||
self.logger.info(
|
||||
f"Processed batch: {result.rows_written} readings written, "
|
||||
f"{ack_count} messages acknowledged"
|
||||
)
|
||||
else:
|
||||
# Write failed, send to dead letter queue
|
||||
self.logger.error(f"Failed to write batch: {result.error}")
|
||||
self._handle_failed_batch(messages, result.error)
|
||||
self.stats['messages_failed'] += len(messages)
|
||||
self.stats['errors'] += 1
|
||||
consecutive_errors += 1
|
||||
|
||||
# Check for too many consecutive errors
|
||||
if consecutive_errors >= max_consecutive_errors:
|
||||
self.logger.error(
|
||||
f"Too many consecutive errors ({consecutive_errors}), "
|
||||
"pausing for 30 seconds"
|
||||
)
|
||||
time.sleep(30)
|
||||
consecutive_errors = 0
|
||||
|
||||
# Brief pause between batches
|
||||
if config.consumer.processing_interval_sec > 0:
|
||||
time.sleep(config.consumer.processing_interval_sec)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
self.logger.info("Keyboard interrupt received")
|
||||
break
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error in processing loop: {e}", exc_info=True)
|
||||
self.stats['errors'] += 1
|
||||
consecutive_errors += 1
|
||||
time.sleep(5) # Back off on errors
|
||||
|
||||
self.logger.info("Processing loop terminated")
|
||||
|
||||
def _transform_messages(self, messages: List[StreamMessage]) -> List[SensorReading]:
|
||||
"""Transform stream messages to sensor readings"""
|
||||
readings = []
|
||||
|
||||
for msg in messages:
|
||||
reading = self.schema_handler.transform_message(msg)
|
||||
if reading:
|
||||
readings.append(reading)
|
||||
else:
|
||||
self.logger.warning(
|
||||
f"Failed to transform message {msg.message_id} from {msg.stream_key}"
|
||||
)
|
||||
|
||||
return readings
|
||||
|
||||
def _handle_failed_batch(self, messages: List[StreamMessage], error: str):
|
||||
"""Handle a batch that failed to write to database"""
|
||||
# Send all messages to dead letter queue
|
||||
for msg in messages:
|
||||
self.redis_reader.send_to_dead_letter(msg, error)
|
||||
|
||||
# Acknowledge them so they don't block the consumer group
|
||||
self.redis_reader.acknowledge_batch(messages)
|
||||
|
||||
self.logger.warning(f"Sent {len(messages)} messages to dead letter queue")
|
||||
|
||||
def stop(self):
|
||||
"""Stop the service gracefully"""
|
||||
if not self.running:
|
||||
return
|
||||
|
||||
self.logger.info("Stopping service...")
|
||||
self.running = False
|
||||
|
||||
# Print final statistics
|
||||
self.logger.info(
|
||||
f"Final statistics: "
|
||||
f"messages_read={self.stats['messages_read']}, "
|
||||
f"messages_written={self.stats['messages_written']}, "
|
||||
f"messages_failed={self.stats['messages_failed']}, "
|
||||
f"batches_processed={self.stats['batches_processed']}, "
|
||||
f"errors={self.stats['errors']}"
|
||||
)
|
||||
|
||||
# Close connections
|
||||
if self.redis_reader:
|
||||
self.redis_reader.close()
|
||||
|
||||
if self.db_writer:
|
||||
self.db_writer.close()
|
||||
|
||||
self.logger.info("Service stopped")
|
||||
|
||||
def health_check(self) -> dict:
|
||||
"""Check service health"""
|
||||
health = {
|
||||
'running': self.running,
|
||||
'redis': False,
|
||||
'database': False,
|
||||
'stats': self.stats
|
||||
}
|
||||
|
||||
if self.redis_reader:
|
||||
health['redis'] = self.redis_reader.health_check()
|
||||
|
||||
if self.db_writer:
|
||||
health['database'] = self.db_writer.health_check()
|
||||
|
||||
return health
|
||||
|
||||
|
||||
def main():
|
||||
"""Entry point"""
|
||||
# Configure logging
|
||||
configure_logging()
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
logger.info("=" * 60)
|
||||
logger.info("Database Writer Service")
|
||||
logger.info(f"Consumer Group: {config.consumer.group_name}")
|
||||
logger.info(f"Consumer Name: {config.consumer.consumer_name}")
|
||||
logger.info(f"Batch Size: {config.consumer.batch_size}")
|
||||
logger.info(f"Stream Pattern: {config.stream.pattern}")
|
||||
logger.info("=" * 60)
|
||||
|
||||
service = DatabaseWriterService()
|
||||
|
||||
try:
|
||||
success = service.start()
|
||||
if not success:
|
||||
logger.error("Service failed to start")
|
||||
sys.exit(1)
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Received keyboard interrupt")
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error: {e}", exc_info=True)
|
||||
sys.exit(1)
|
||||
finally:
|
||||
service.stop()
|
||||
|
||||
logger.info("Service exited")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
22
services/db_write/pyproject.toml
Normal file
22
services/db_write/pyproject.toml
Normal file
@@ -0,0 +1,22 @@
|
||||
[project]
|
||||
name = "db-write"
|
||||
version = "0.1.0"
|
||||
description = "Database writer service that reads from Redis streams and writes to PostgreSQL/TimescaleDB"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.13"
|
||||
dependencies = [
|
||||
"redis>=6.4.0",
|
||||
"psycopg2-binary>=2.9.0",
|
||||
"python-dotenv>=1.0.0",
|
||||
"pydantic>=2.0.0",
|
||||
"structlog>=24.0.0",
|
||||
"alembic>=1.13.0",
|
||||
"sqlalchemy>=2.0.0",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
dev = [
|
||||
"pytest>=7.0.0",
|
||||
"fakeredis>=2.0.0",
|
||||
"pytest-asyncio>=0.21.0",
|
||||
]
|
||||
142
services/db_write/redis_reader.py
Normal file
142
services/db_write/redis_reader.py
Normal file
@@ -0,0 +1,142 @@
|
||||
|
||||
import redis
|
||||
import logging
|
||||
from typing import List, Optional, Dict
|
||||
from config import config
|
||||
from schema import SchemaHandler, StreamMessage
|
||||
|
||||
|
||||
class RedisReader:
|
||||
"""Redis stream consumer with consumer groups for reliability"""
|
||||
|
||||
def __init__(self, streams: Optional[List[str]] = None):
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.schema_handler = SchemaHandler()
|
||||
|
||||
self.redis_client = redis.StrictRedis(
|
||||
host=config.redis.host,
|
||||
port=config.redis.port,
|
||||
db=config.redis.db,
|
||||
password=config.redis.password,
|
||||
decode_responses=False
|
||||
)
|
||||
|
||||
self.redis_client.ping()
|
||||
self.logger.info(f"Connected to Redis at {config.redis.host}:{config.redis.port}")
|
||||
|
||||
if streams:
|
||||
self.streams = streams
|
||||
else:
|
||||
pattern = config.stream.pattern
|
||||
keys = self.redis_client.keys(pattern)
|
||||
self.streams = [k.decode('utf-8') if isinstance(k, bytes) else k for k in keys]
|
||||
|
||||
self.logger.info(f"Monitoring {len(self.streams)} streams")
|
||||
|
||||
# Initialize consumer groups
|
||||
self._setup_consumer_groups()
|
||||
|
||||
def _setup_consumer_groups(self):
|
||||
"""Create consumer groups for streams"""
|
||||
group_name = config.consumer.group_name
|
||||
|
||||
for stream in self.streams:
|
||||
try:
|
||||
self.redis_client.xgroup_create(stream, group_name, id='0', mkstream=True)
|
||||
self.logger.info(f"Created consumer group '{group_name}' for '{stream}'")
|
||||
except redis.exceptions.ResponseError as e:
|
||||
if 'BUSYGROUP' in str(e):
|
||||
self.logger.debug(f"Consumer group '{group_name}' already exists for '{stream}'")
|
||||
else:
|
||||
self.logger.error(f"Error creating consumer group for {stream}: {e}")
|
||||
|
||||
def read_batch(self, batch_size: Optional[int] = None,
|
||||
timeout_ms: Optional[int] = None) -> List[StreamMessage]:
|
||||
"""Read a batch of messages from streams using consumer group"""
|
||||
if batch_size is None:
|
||||
batch_size = config.consumer.batch_size
|
||||
if timeout_ms is None:
|
||||
timeout_ms = config.consumer.block_time_ms
|
||||
|
||||
if not self.streams:
|
||||
return []
|
||||
|
||||
# Prepare stream dict for XREADGROUP
|
||||
stream_dict = {stream: '>' for stream in self.streams}
|
||||
|
||||
try:
|
||||
results = self.redis_client.xreadgroup(
|
||||
groupname=config.consumer.group_name,
|
||||
consumername=config.consumer.consumer_name,
|
||||
streams=stream_dict,
|
||||
count=batch_size,
|
||||
block=timeout_ms
|
||||
)
|
||||
|
||||
if not results:
|
||||
return []
|
||||
|
||||
# Parse results into StreamMessage objects
|
||||
messages = []
|
||||
for stream_key, entries in results:
|
||||
stream_name = stream_key.decode('utf-8') if isinstance(stream_key, bytes) else stream_key
|
||||
|
||||
for message_id, fields in entries:
|
||||
msg_id = message_id.decode('utf-8') if isinstance(message_id, bytes) else message_id
|
||||
|
||||
stream_msg = self.schema_handler.parse_stream_entry(stream_name, msg_id, fields)
|
||||
if stream_msg:
|
||||
messages.append(stream_msg)
|
||||
|
||||
if messages:
|
||||
self.logger.debug(f"Read {len(messages)} messages")
|
||||
|
||||
return messages
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error reading from Redis: {e}")
|
||||
return []
|
||||
|
||||
def acknowledge_batch(self, messages: List[StreamMessage]) -> int:
|
||||
"""Acknowledge multiple messages at once"""
|
||||
ack_count = 0
|
||||
|
||||
# Group messages by stream
|
||||
by_stream: Dict[str, List[str]] = {}
|
||||
for msg in messages:
|
||||
if msg.stream_key not in by_stream:
|
||||
by_stream[msg.stream_key] = []
|
||||
by_stream[msg.stream_key].append(msg.message_id)
|
||||
|
||||
# Acknowledge each stream's messages
|
||||
for stream_key, message_ids in by_stream.items():
|
||||
try:
|
||||
result = self.redis_client.xack(
|
||||
stream_key,
|
||||
config.consumer.group_name,
|
||||
*message_ids
|
||||
)
|
||||
ack_count += result
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to acknowledge messages from {stream_key}: {e}")
|
||||
|
||||
if ack_count > 0:
|
||||
self.logger.debug(f"Acknowledged {ack_count} messages")
|
||||
|
||||
return ack_count
|
||||
|
||||
def health_check(self) -> bool:
|
||||
"""Check if Redis connection is healthy"""
|
||||
try:
|
||||
self.redis_client.ping()
|
||||
return True
|
||||
except redis.RedisError:
|
||||
return False
|
||||
|
||||
def close(self):
|
||||
"""Close Redis connection"""
|
||||
try:
|
||||
self.redis_client.close()
|
||||
self.logger.info("Redis connection closed")
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error closing Redis connection: {e}")
|
||||
198
services/db_write/schema.py
Normal file
198
services/db_write/schema.py
Normal file
@@ -0,0 +1,198 @@
|
||||
"""
|
||||
Schema definitions and data transformation logic.
|
||||
Handles conversion between Redis stream messages and database records.
|
||||
"""
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from typing import Optional, Dict, Any
|
||||
import json
|
||||
|
||||
|
||||
@dataclass
|
||||
class StreamMessage:
|
||||
"""Represents a message from Redis stream"""
|
||||
stream_key: str
|
||||
message_id: str
|
||||
device_id: str
|
||||
sensor_type: str
|
||||
value: float
|
||||
timestamp: str
|
||||
metadata: Optional[Dict[str, Any]] = None
|
||||
|
||||
@property
|
||||
def stream_name(self) -> str:
|
||||
"""Return the stream name without prefix"""
|
||||
return self.stream_key
|
||||
|
||||
|
||||
@dataclass
|
||||
class SensorReading:
|
||||
"""Represents a sensor reading ready for database insertion"""
|
||||
timestamp: datetime
|
||||
device_id: str
|
||||
sensor_type: str
|
||||
value: float
|
||||
metadata: Optional[Dict[str, Any]] = None
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary for database insertion"""
|
||||
return {
|
||||
'timestamp': self.timestamp,
|
||||
'device_id': self.device_id,
|
||||
'sensor_type': self.sensor_type,
|
||||
'value': self.value,
|
||||
'metadata': json.dumps(self.metadata) if self.metadata else None
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class ValidationResult:
|
||||
"""Result of data validation"""
|
||||
valid: bool
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
class SchemaHandler:
|
||||
"""Handles schema transformation and validation"""
|
||||
|
||||
def __init__(self):
|
||||
self.logger = self._get_logger()
|
||||
|
||||
def _get_logger(self):
|
||||
"""Get logger instance"""
|
||||
import logging
|
||||
return logging.getLogger(__name__)
|
||||
|
||||
def transform_message(self, stream_message: StreamMessage) -> Optional[SensorReading]:
|
||||
"""
|
||||
Transform a Redis stream message into a SensorReading.
|
||||
Returns None if transformation fails.
|
||||
"""
|
||||
try:
|
||||
# Parse timestamp
|
||||
timestamp = self._parse_timestamp(stream_message.timestamp)
|
||||
|
||||
# Create sensor reading
|
||||
reading = SensorReading(
|
||||
timestamp=timestamp,
|
||||
device_id=stream_message.device_id,
|
||||
sensor_type=stream_message.sensor_type,
|
||||
value=float(stream_message.value),
|
||||
metadata=stream_message.metadata
|
||||
)
|
||||
|
||||
# Validate the reading
|
||||
validation = self.validate_reading(reading)
|
||||
if not validation.valid:
|
||||
self.logger.error(f"Invalid reading: {validation.error}")
|
||||
return None
|
||||
|
||||
return reading
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to transform message: {e}", exc_info=True)
|
||||
return None
|
||||
|
||||
def validate_reading(self, reading: SensorReading) -> ValidationResult:
|
||||
"""Validate a sensor reading"""
|
||||
try:
|
||||
# Check required fields
|
||||
if not reading.device_id:
|
||||
return ValidationResult(False, "device_id is required")
|
||||
|
||||
if not reading.sensor_type:
|
||||
return ValidationResult(False, "sensor_type is required")
|
||||
|
||||
if reading.value is None:
|
||||
return ValidationResult(False, "value is required")
|
||||
|
||||
# Validate timestamp
|
||||
if not isinstance(reading.timestamp, datetime):
|
||||
return ValidationResult(False, "timestamp must be a datetime object")
|
||||
|
||||
# Validate value is numeric
|
||||
if not isinstance(reading.value, (int, float)):
|
||||
return ValidationResult(False, "value must be numeric")
|
||||
|
||||
# Check for reasonable value ranges (can be customized)
|
||||
if reading.value < -1000000 or reading.value > 1000000:
|
||||
self.logger.warning(f"Value {reading.value} is outside typical range")
|
||||
|
||||
return ValidationResult(True)
|
||||
|
||||
except Exception as e:
|
||||
return ValidationResult(False, f"Validation error: {str(e)}")
|
||||
|
||||
def _parse_timestamp(self, timestamp_str: str) -> datetime:
|
||||
"""Parse timestamp string into datetime object"""
|
||||
# Try ISO format first
|
||||
try:
|
||||
return datetime.fromisoformat(timestamp_str.replace('Z', '+00:00'))
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# Try common formats
|
||||
formats = [
|
||||
'%Y-%m-%dT%H:%M:%S.%fZ',
|
||||
'%Y-%m-%dT%H:%M:%SZ',
|
||||
'%Y-%m-%d %H:%M:%S.%f',
|
||||
'%Y-%m-%d %H:%M:%S',
|
||||
]
|
||||
|
||||
for fmt in formats:
|
||||
try:
|
||||
return datetime.strptime(timestamp_str, fmt)
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
# If all else fails, use current time and log warning
|
||||
self.logger.warning(f"Could not parse timestamp '{timestamp_str}', using current time")
|
||||
return datetime.utcnow()
|
||||
|
||||
def parse_stream_entry(self, stream_key: str, message_id: str, fields: Dict[bytes, bytes]) -> Optional[StreamMessage]:
|
||||
"""
|
||||
Parse a raw Redis stream entry into a StreamMessage.
|
||||
Expected stream key format: mqtt_stream:{device_id}:{sensor_type}
|
||||
Expected fields: value, timestamp (and optionally metadata)
|
||||
"""
|
||||
try:
|
||||
# Extract device_id and sensor_type from stream key
|
||||
# Format: mqtt_stream:{device_id}:{sensor_type}
|
||||
parts = stream_key.split(':')
|
||||
if len(parts) < 3:
|
||||
self.logger.error(f"Invalid stream key format: {stream_key}")
|
||||
return None
|
||||
|
||||
device_id = parts[1]
|
||||
sensor_type = ':'.join(parts[2:]) # Handle sensor types with colons
|
||||
|
||||
# Extract fields from message
|
||||
value_bytes = fields.get(b'value')
|
||||
timestamp_bytes = fields.get(b'timestamp') or fields.get(b'time')
|
||||
|
||||
if not value_bytes or not timestamp_bytes:
|
||||
self.logger.error(f"Missing required fields in message: {fields}")
|
||||
return None
|
||||
|
||||
# Parse metadata if present
|
||||
metadata = None
|
||||
metadata_bytes = fields.get(b'metadata')
|
||||
if metadata_bytes:
|
||||
try:
|
||||
metadata = json.loads(metadata_bytes.decode('utf-8'))
|
||||
except json.JSONDecodeError:
|
||||
self.logger.warning(f"Could not parse metadata: {metadata_bytes}")
|
||||
|
||||
return StreamMessage(
|
||||
stream_key=stream_key,
|
||||
message_id=message_id,
|
||||
device_id=device_id,
|
||||
sensor_type=sensor_type,
|
||||
value=float(value_bytes.decode('utf-8')),
|
||||
timestamp=timestamp_bytes.decode('utf-8'),
|
||||
metadata=metadata
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to parse stream entry: {e}", exc_info=True)
|
||||
return None
|
||||
Reference in New Issue
Block a user