BMS/backend/services/alarm_engine.py
2026-03-19 11:32:17 +00:00

152 lines
9.6 KiB
Python

import logging
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession
logger = logging.getLogger(__name__)
# ── In-memory threshold cache ──────────────────────────────────────────────────
# Loaded from DB on first use; invalidated by settings API after updates.
# Falls back to hard-coded defaults if DB has no rows yet (pre-seed).
_caches: dict[str, list[dict]] = {}
_dirty_sites: set[str] = {"sg-01"} # start dirty so first request loads from DB
def invalidate_threshold_cache(site_id: str = "sg-01") -> None:
"""Mark a site's cache as stale. Called by settings API after threshold changes."""
_dirty_sites.add(site_id)
async def _ensure_cache(session: AsyncSession, site_id: str) -> None:
if site_id not in _dirty_sites and site_id in _caches:
return
result = await session.execute(text("""
SELECT sensor_type, threshold_value, direction, severity, message_template
FROM alarm_thresholds
WHERE site_id = :site_id AND enabled = true
ORDER BY id
"""), {"site_id": site_id})
rows = result.mappings().all()
if rows:
_caches[site_id] = [dict(r) for r in rows]
else:
# DB not yet seeded — fall back to hard-coded defaults
_caches[site_id] = _FALLBACK_RULES
_dirty_sites.discard(site_id)
logger.info(f"Loaded {len(_caches[site_id])} threshold rules for {site_id}")
async def check_and_update_alarms(
session: AsyncSession,
sensor_id: str,
sensor_type: str,
site_id: str,
room_id: str | None,
rack_id: str | None,
value: float,
) -> None:
await _ensure_cache(session, site_id)
for rule in _caches.get(site_id, []):
if rule["sensor_type"] != sensor_type:
continue
threshold = rule["threshold_value"]
direction = rule["direction"]
severity = rule["severity"]
msg_tpl = rule["message_template"]
breached = (
(direction == "above" and value > threshold) or
(direction == "below" and value < threshold)
)
if breached:
existing = await session.execute(text("""
SELECT id FROM alarms
WHERE sensor_id = :sid AND severity = :sev AND state = 'active'
LIMIT 1
"""), {"sid": sensor_id, "sev": severity})
if not existing.fetchone():
message = msg_tpl.format(value=value, sensor_id=sensor_id)
await session.execute(text("""
INSERT INTO alarms
(sensor_id, site_id, room_id, rack_id, severity, message, state, triggered_at)
VALUES
(:sensor_id, :site_id, :room_id, :rack_id, :severity, :message, 'active', NOW())
"""), {
"sensor_id": sensor_id, "site_id": site_id,
"room_id": room_id, "rack_id": rack_id,
"severity": severity, "message": message,
})
logger.info(f"Alarm raised [{severity}]: {message}")
else:
await session.execute(text("""
UPDATE alarms
SET state = 'resolved', resolved_at = NOW()
WHERE sensor_id = :sid AND severity = :sev AND state = 'active'
"""), {"sid": sensor_id, "sev": severity})
# ── Hard-coded fallback (used before DB seed runs) ─────────────────────────────
_FALLBACK_RULES: list[dict] = [
{"sensor_type": st, "threshold_value": tv, "direction": d, "severity": s, "message_template": m}
for st, tv, d, s, m in [
("temperature", 28.0, "above", "warning", "Temperature elevated at {sensor_id}: {value:.1f}°C"),
("temperature", 32.0, "above", "critical", "Temperature critical at {sensor_id}: {value:.1f}°C"),
("humidity", 65.0, "above", "warning", "Humidity elevated at {sensor_id}: {value:.0f}%"),
("power_kw", 7.5, "above", "warning", "PDU load elevated at {sensor_id}: {value:.1f} kW"),
("power_kw", 9.5, "above", "critical", "PDU load critical at {sensor_id}: {value:.1f} kW"),
("ups_charge", 80.0, "below", "warning", "UPS battery low at {sensor_id}: {value:.0f}%"),
("ups_charge", 50.0, "below", "critical", "UPS battery critical at {sensor_id}: {value:.0f}%"),
("ups_state", 0.5, "above", "critical", "UPS switched to battery at {sensor_id} — mains power lost"),
("ups_state", 1.5, "above", "critical", "UPS overloaded at {sensor_id} — immediate risk of failure"),
("ups_load", 85.0, "above", "warning", "UPS load high at {sensor_id}: {value:.0f}%"),
("ups_load", 95.0, "above", "critical", "UPS load critical at {sensor_id}: {value:.0f}% — overload"),
("ups_runtime", 15.0, "below", "warning", "UPS runtime low at {sensor_id}: {value:.0f} min remaining"),
("ups_runtime", 5.0, "below", "critical", "UPS runtime critical at {sensor_id}: {value:.0f} min — imminent shutdown"),
("leak", 0.5, "above", "critical", "Water leak detected at {sensor_id}!"),
("cooling_cap_pct", 90.0, "above", "warning", "CRAC near capacity limit at {sensor_id}: {value:.1f}%"),
("cooling_cop", 1.5, "below", "warning", "CRAC running inefficiently at {sensor_id}: COP {value:.2f}"),
("cooling_comp_load", 95.0, "above", "warning", "CRAC compressor overloaded at {sensor_id}: {value:.1f}%"),
("cooling_high_press", 22.0, "above", "critical", "CRAC high refrigerant pressure at {sensor_id}: {value:.1f} bar"),
("cooling_low_press", 3.0, "below", "critical", "CRAC low refrigerant pressure at {sensor_id}: {value:.1f} bar — possible leak"),
("cooling_superheat", 16.0, "above", "warning", "CRAC discharge superheat high at {sensor_id}: {value:.1f}°C"),
("cooling_filter_dp", 80.0, "above", "warning", "CRAC filter requires attention at {sensor_id}: {value:.0f} Pa"),
("cooling_filter_dp", 120.0, "above", "critical", "CRAC filter critically blocked at {sensor_id}: {value:.0f} Pa — replace now"),
("cooling_return", 36.0, "above", "warning", "CRAC return air temperature high at {sensor_id}: {value:.1f}°C"),
("cooling_return", 42.0, "above", "critical", "CRAC return air temperature critical at {sensor_id}: {value:.1f}°C"),
("gen_fuel_pct", 25.0, "below", "warning", "Generator fuel low at {sensor_id}: {value:.1f}%"),
("gen_fuel_pct", 10.0, "below", "critical", "Generator fuel critical at {sensor_id}: {value:.1f}%"),
("gen_state", 0.5, "above", "warning", "Generator running at {sensor_id} — site is on standby power"),
("gen_state", -0.5, "below", "critical", "Generator fault at {sensor_id} — no standby power available"),
("gen_load_pct", 85.0, "above", "warning", "Generator load high at {sensor_id}: {value:.1f}%"),
("gen_load_pct", 95.0, "above", "critical", "Generator overloaded at {sensor_id}: {value:.1f}%"),
("gen_coolant_c", 95.0, "above", "warning", "Generator coolant temperature high at {sensor_id}: {value:.1f}°C"),
("gen_coolant_c", 105.0, "above", "critical", "Generator coolant critical at {sensor_id}: {value:.1f}°C — risk of shutdown"),
("gen_oil_press", 2.0, "below", "critical", "Generator oil pressure low at {sensor_id}: {value:.1f} bar"),
("pdu_imbalance", 5.0, "above", "warning", "PDU phase imbalance at {sensor_id}: {value:.1f}%"),
("pdu_imbalance", 15.0, "above", "critical", "PDU phase imbalance critical at {sensor_id}: {value:.1f}%"),
("ats_active", 1.5, "above", "warning", "ATS transferred to generator at {sensor_id} — utility power lost"),
("ats_ua_v", 50.0, "below", "critical", "Utility A power failure at {sensor_id} — supply lost"),
("chiller_state", 0.5, "below", "critical", "Chiller fault at {sensor_id} — CHW supply lost"),
("chiller_cop", 2.5, "below", "warning", "Chiller running inefficiently at {sensor_id}: COP {value:.2f}"),
("vesda_level", 0.5, "above", "warning", "VESDA smoke detected at {sensor_id}: level elevated"),
("vesda_level", 1.5, "above", "warning", "VESDA action threshold reached at {sensor_id}"),
("vesda_level", 2.5, "above", "critical", "VESDA FIRE ALARM at {sensor_id}!"),
("vesda_flow", 0.5, "below", "critical", "VESDA aspirator flow fault at {sensor_id} — detector may be compromised"),
("vesda_det1", 0.5, "below", "warning", "VESDA detector 1 fault at {sensor_id}"),
("vesda_det2", 0.5, "below", "warning", "VESDA detector 2 fault at {sensor_id}"),
("net_state", 0.5, "above", "warning", "Network switch degraded at {sensor_id}"),
("net_state", 1.5, "above", "critical", "Network switch down at {sensor_id} — connectivity lost"),
("net_pkt_loss_pct", 1.0, "above", "warning", "Packet loss detected at {sensor_id}: {value:.1f}%"),
("net_pkt_loss_pct", 5.0, "above", "critical", "High packet loss at {sensor_id}: {value:.1f}%"),
("net_temp_c", 65.0, "above", "warning", "Switch temperature high at {sensor_id}: {value:.1f}°C"),
("net_temp_c", 75.0, "above", "critical", "Switch temperature critical at {sensor_id}: {value:.1f}°C"),
]
]