Working device manager, added folders for better organization.

This commit is contained in:
ferdzo
2025-10-30 14:26:08 +01:00
parent 12d3720421
commit 7446e9b4ac
21 changed files with 342 additions and 47 deletions

View File

@@ -0,0 +1,139 @@
"""
Configuration management for the database writer service.
Loads settings from environment variables with sensible defaults.
"""
import os
from dataclasses import dataclass
from typing import Optional
import dotenv
dotenv.load_dotenv()
@dataclass
class RedisConfig:
"""Redis connection configuration"""
host: str
port: int = 6379
db: int = 0
password: Optional[str] = None
@dataclass
class DatabaseConfig:
"""Database connection configuration"""
url: Optional[str] = None
host: Optional[str] = None
port: int = 5432
name: Optional[str] = None
user: Optional[str] = None
password: Optional[str] = None
table_name: str = "sensor_readings"
enable_timescale: bool = False
def get_connection_string(self) -> str:
"""Build connection string from components or return URL"""
if self.url:
return self.url
if not all([self.host, self.name, self.user, self.password]):
raise ValueError("Either DATABASE_URL or all DB_* variables must be set")
return f"postgresql://{self.user}:{self.password}@{self.host}:{self.port}/{self.name}"
@dataclass
class ConsumerConfig:
"""Redis consumer group configuration"""
group_name: str = "db_writer"
consumer_name: str = "worker-01"
batch_size: int = 100
batch_timeout_sec: int = 5
processing_interval_sec: float = 1.0
block_time_ms: int = 5000
@dataclass
class StreamConfig:
"""Redis stream configuration"""
pattern: str = "mqtt_stream:*"
dead_letter_stream: str = "mqtt_stream:failed"
max_retries: int = 3
trim_maxlen: int = 10000 # Keep last N messages in each stream
@dataclass
class LogConfig:
"""Logging configuration"""
level: str = "INFO"
format: str = "json" # json or console
class Config:
"""Main configuration class"""
def __init__(self):
self.redis = RedisConfig(
host=os.getenv("REDIS_HOST", "localhost"),
port=int(os.getenv("REDIS_PORT", 6379)),
db=int(os.getenv("REDIS_DB", 0)),
password=os.getenv("REDIS_PASSWORD", None) or None,
)
self.database = DatabaseConfig(
url=os.getenv("DATABASE_URL", None),
host=os.getenv("DB_HOST", None),
port=int(os.getenv("DB_PORT", 5432)),
name=os.getenv("DB_NAME", None),
user=os.getenv("DB_USER", None),
password=os.getenv("DB_PASSWORD", None),
table_name=os.getenv("TABLE_NAME", "sensor_readings"),
enable_timescale=os.getenv("ENABLE_TIMESCALE", "false").lower() == "true",
)
self.consumer = ConsumerConfig(
group_name=os.getenv("CONSUMER_GROUP_NAME", "db_writer"),
consumer_name=os.getenv("CONSUMER_NAME", "worker-01"),
batch_size=int(os.getenv("BATCH_SIZE", 100)),
batch_timeout_sec=int(os.getenv("BATCH_TIMEOUT_SEC", 5)),
processing_interval_sec=float(os.getenv("PROCESSING_INTERVAL_SEC", 1.0)),
block_time_ms=int(os.getenv("BLOCK_TIME_MS", 5000)),
)
self.stream = StreamConfig(
max_retries=int(os.getenv("MAX_RETRIES", 3)),
trim_maxlen=int(os.getenv("TRIM_MAXLEN", 10000)),
)
self.log = LogConfig(
level=os.getenv("LOG_LEVEL", "INFO"), format=os.getenv("LOG_FORMAT", "json")
)
def validate(self):
"""Validate configuration"""
errors = []
if not self.redis.host:
errors.append("REDIS_HOST is required")
try:
self.database.get_connection_string()
except ValueError as e:
errors.append(str(e))
if self.consumer.batch_size < 1:
errors.append("BATCH_SIZE must be >= 1")
if errors:
raise ValueError(f"Configuration errors: {', '.join(errors)}")
return True
config = Config()

View File

@@ -0,0 +1,80 @@
import logging
from typing import List
from sqlalchemy import create_engine, select
from sqlalchemy.orm import sessionmaker
from sqlalchemy.pool import QueuePool
from src.config import config
from src.schema import TelemetryReading
class DatabaseWriter:
"""
Database writer for telemetry data.
"""
def __init__(self):
self.logger = logging.getLogger(__name__)
connection_string = config.database.get_connection_string()
self.engine = create_engine(
connection_string,
poolclass=QueuePool,
pool_size=5,
max_overflow=10,
pool_pre_ping=True,
)
self.SessionLocal = sessionmaker(bind=self.engine)
self.logger.info("Database writer initialized")
def write_batch(self, readings: List[TelemetryReading]) -> bool:
"""Write a batch of telemetry readings to the database"""
if not readings:
return True
session = self.SessionLocal()
try:
# Convert to database objects using the correct field mapping
db_objects = [
TelemetryReading(
time=reading.time,
device_id=reading.device_id,
metric=reading.metric,
value=reading.value,
unit=reading.unit,
)
for reading in readings
]
# Bulk insert
session.bulk_save_objects(db_objects)
session.commit()
self.logger.debug(f"Wrote {len(readings)} readings to database")
return True
except Exception as e:
self.logger.error(f"Failed to write batch: {e}")
session.rollback()
return False
finally:
session.close()
def health_check(self) -> bool:
"""Check if database connection is healthy"""
try:
with self.engine.connect() as conn:
result = conn.execute(select(1))
return result.fetchone()[0] == 1
except Exception as e:
self.logger.error(f"Database health check failed: {e}")
return False
def close(self):
"""Close database engine and all connections"""
if hasattr(self, "engine") and self.engine:
self.engine.dispose()
self.logger.info("Database engine closed")

View File

@@ -0,0 +1,145 @@
import redis
import logging
from typing import List, Optional, Dict
from src.config import config
from src.schema import SchemaHandler, StreamMessage
class RedisReader:
"""Redis stream consumer with consumer groups for reliability"""
def __init__(self, stream_name: str = "mqtt:ingestion"):
self.logger = logging.getLogger(__name__)
self.schema_handler = SchemaHandler()
self.redis_client = redis.StrictRedis(
host=config.redis.host,
port=config.redis.port,
db=config.redis.db,
password=config.redis.password,
decode_responses=False,
)
self.redis_client.ping()
self.logger.info(
f"Connected to Redis at {config.redis.host}:{config.redis.port}"
)
# Use single stream instead of pattern matching
self.stream_name = stream_name
self.logger.info(f"Monitoring stream: {self.stream_name}")
# Initialize consumer group for the single stream
self._setup_consumer_group()
def _setup_consumer_group(self):
"""Create consumer group for the single stream"""
group_name = config.consumer.group_name
try:
self.redis_client.xgroup_create(
self.stream_name, group_name, id="0", mkstream=True
)
self.logger.info(
f"Created consumer group '{group_name}' for '{self.stream_name}'"
)
except redis.exceptions.ResponseError as e:
if "BUSYGROUP" in str(e):
self.logger.debug(
f"Consumer group '{group_name}' already exists for '{self.stream_name}'"
)
else:
self.logger.error(f"Error creating consumer group: {e}")
def read_batch(
self, batch_size: Optional[int] = None, timeout_ms: Optional[int] = None
) -> List[StreamMessage]:
"""Read a batch of messages from single stream using consumer group"""
if batch_size is None:
batch_size = config.consumer.batch_size
if timeout_ms is None:
timeout_ms = config.consumer.block_time_ms
# Read from single stream - much simpler!
stream_dict = {self.stream_name: ">"}
try:
results = self.redis_client.xreadgroup(
groupname=config.consumer.group_name,
consumername=config.consumer.consumer_name,
streams=stream_dict,
count=batch_size,
block=timeout_ms,
)
if not results:
return []
# Parse results into StreamMessage objects
messages = []
for stream_key, entries in results:
for message_id, fields in entries:
msg_id = (
message_id.decode("utf-8")
if isinstance(message_id, bytes)
else message_id
)
# Parse with new format (device_id and metric in payload)
stream_msg = self.schema_handler.parse_stream_entry_new_format(
self.stream_name, msg_id, fields
)
if stream_msg:
messages.append(stream_msg)
if messages:
self.logger.debug(f"Read {len(messages)} messages")
return messages
except Exception as e:
self.logger.error(f"Error reading from Redis: {e}")
return []
def acknowledge_batch(self, messages: List[StreamMessage]) -> int:
"""Acknowledge multiple messages at once"""
ack_count = 0
# Group messages by stream
by_stream: Dict[str, List[str]] = {}
for msg in messages:
if msg.stream_key not in by_stream:
by_stream[msg.stream_key] = []
by_stream[msg.stream_key].append(msg.message_id)
# Acknowledge each stream's messages
for stream_key, message_ids in by_stream.items():
try:
result = self.redis_client.xack(
stream_key, config.consumer.group_name, *message_ids
)
ack_count += result
except Exception as e:
self.logger.error(
f"Failed to acknowledge messages from {stream_key}: {e}"
)
if ack_count > 0:
self.logger.debug(f"Acknowledged {ack_count} messages")
return ack_count
def health_check(self) -> bool:
"""Check if Redis connection is healthy"""
try:
self.redis_client.ping()
return True
except redis.RedisError:
return False
def close(self):
"""Close Redis connection"""
try:
self.redis_client.close()
self.logger.info("Redis connection closed")
except Exception as e:
self.logger.error(f"Error closing Redis connection: {e}")

View File

@@ -0,0 +1,203 @@
"""
Schema definitions and data transformation logic.
Handles conversion between Redis stream messages and database records.
"""
from dataclasses import dataclass
from datetime import datetime
from typing import Optional, Dict, Any
import json
@dataclass
class StreamMessage:
"""Represents a message from Redis stream"""
stream_key: str
message_id: str
device_id: str
sensor_type: str
value: float
timestamp: str
metadata: Optional[Dict[str, Any]] = None
@property
def stream_name(self) -> str:
"""Return the stream name without prefix"""
return self.stream_key
@dataclass
class TelemetryReading:
"""Represents a telemetry reading ready for database insertion - matches Telemetry model"""
time: datetime
device_id: str
metric: str # renamed from sensor_type
value: float
unit: Optional[str] = None
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for database insertion"""
return {
"time": self.time,
"device_id": self.device_id,
"metric": self.metric,
"value": self.value,
"unit": self.unit,
}
@dataclass
class ValidationResult:
"""Result of data validation"""
valid: bool
error: Optional[str] = None
class SchemaHandler:
"""Handles schema transformation and validation"""
def __init__(self):
self.logger = self._get_logger()
def _get_logger(self):
"""Get logger instance"""
import logging
return logging.getLogger(__name__)
def transform_message(
self, stream_message: StreamMessage
) -> Optional[TelemetryReading]:
"""
Transform a Redis stream message into a TelemetryReading.
Returns None if transformation fails.
"""
try:
timestamp = self._parse_timestamp(stream_message.timestamp)
reading = TelemetryReading(
time=timestamp,
device_id=stream_message.device_id,
metric=stream_message.sensor_type, # sensor_type maps to metric
value=float(stream_message.value),
unit=stream_message.metadata.get("unit")
if stream_message.metadata
else None,
)
# Validate the reading
validation = self.validate_reading(reading)
if not validation.valid:
self.logger.error(f"Invalid reading: {validation.error}")
return None
return reading
except Exception as e:
self.logger.error(f"Failed to transform message: {e}", exc_info=True)
return None
def validate_reading(self, reading: TelemetryReading) -> ValidationResult:
"""Validate a telemetry reading"""
try:
# Check required fields
if not reading.device_id:
return ValidationResult(False, "device_id is required")
if not reading.metric:
return ValidationResult(False, "metric is required")
if reading.value is None:
return ValidationResult(False, "value is required")
if not isinstance(reading.time, datetime):
return ValidationResult(False, "time must be a datetime object")
if not isinstance(reading.value, (int, float)):
return ValidationResult(False, "value must be numeric")
if reading.value < -1000000 or reading.value > 1000000:
self.logger.warning(f"Value {reading.value} is outside typical range")
return ValidationResult(True)
except Exception as e:
return ValidationResult(False, f"Validation error: {str(e)}")
def _parse_timestamp(self, timestamp_str: str) -> datetime:
"""Parse timestamp string into datetime object"""
try:
return datetime.fromisoformat(timestamp_str.replace("Z", "+00:00"))
except ValueError:
pass
formats = [
"%Y-%m-%dT%H:%M:%S.%fZ",
"%Y-%m-%dT%H:%M:%SZ",
"%Y-%m-%d %H:%M:%S.%f",
"%Y-%m-%d %H:%M:%S",
]
for fmt in formats:
try:
return datetime.strptime(timestamp_str, fmt)
except ValueError:
continue
self.logger.warning(
f"Could not parse timestamp '{timestamp_str}', using current time"
)
return datetime.utcnow()
def parse_stream_entry_new_format(
self, stream_key: str, message_id: str, fields: Dict[bytes, bytes]
) -> Optional[StreamMessage]:
"""
Parse a raw Redis stream entry with NEW single-stream format.
Expected fields: device_id, metric, value, timestamp
"""
try:
# Extract fields from message (device_id and metric are IN the payload now!)
device_id_bytes = fields.get(b"device_id")
metric_bytes = fields.get(b"metric")
value_bytes = fields.get(b"value")
timestamp_bytes = fields.get(b"timestamp") or fields.get(b"time")
if not all([device_id_bytes, metric_bytes, value_bytes, timestamp_bytes]):
self.logger.error(f"Missing required fields in message: {fields}")
return None
# Parse metadata if present
metadata = None
metadata_bytes = fields.get(b"metadata")
if metadata_bytes:
try:
metadata = json.loads(metadata_bytes.decode("utf-8"))
except json.JSONDecodeError:
self.logger.warning(f"Could not parse metadata: {metadata_bytes}")
return StreamMessage(
stream_key=stream_key,
message_id=message_id,
device_id=device_id_bytes.decode("utf-8"),
sensor_type=metric_bytes.decode("utf-8"),
value=float(value_bytes.decode("utf-8")),
timestamp=timestamp_bytes.decode("utf-8"),
metadata=metadata,
)
except Exception as e:
self.logger.error(f"Failed to parse stream entry: {e}", exc_info=True)
return None
def parse_stream_entry(
self, stream_key: str, message_id: str, fields: Dict[bytes, bytes]
) -> Optional[StreamMessage]:
"""
DEPRECATED: Old format with stream key containing device_id.
Kept for backward compatibility. Use parse_stream_entry_new_format() instead.
"""
return self.parse_stream_entry_new_format(stream_key, message_id, fields)