import logging from sqlalchemy import text from sqlalchemy.ext.asyncio import AsyncSession logger = logging.getLogger(__name__) # ── In-memory threshold cache ────────────────────────────────────────────────── # Loaded from DB on first use; invalidated by settings API after updates. # Falls back to hard-coded defaults if DB has no rows yet (pre-seed). _caches: dict[str, list[dict]] = {} _dirty_sites: set[str] = {"sg-01"} # start dirty so first request loads from DB def invalidate_threshold_cache(site_id: str = "sg-01") -> None: """Mark a site's cache as stale. Called by settings API after threshold changes.""" _dirty_sites.add(site_id) async def _ensure_cache(session: AsyncSession, site_id: str) -> None: if site_id not in _dirty_sites and site_id in _caches: return result = await session.execute(text(""" SELECT sensor_type, threshold_value, direction, severity, message_template FROM alarm_thresholds WHERE site_id = :site_id AND enabled = true ORDER BY id """), {"site_id": site_id}) rows = result.mappings().all() if rows: _caches[site_id] = [dict(r) for r in rows] else: # DB not yet seeded — fall back to hard-coded defaults _caches[site_id] = _FALLBACK_RULES _dirty_sites.discard(site_id) logger.info(f"Loaded {len(_caches[site_id])} threshold rules for {site_id}") async def check_and_update_alarms( session: AsyncSession, sensor_id: str, sensor_type: str, site_id: str, room_id: str | None, rack_id: str | None, value: float, ) -> None: await _ensure_cache(session, site_id) for rule in _caches.get(site_id, []): if rule["sensor_type"] != sensor_type: continue threshold = rule["threshold_value"] direction = rule["direction"] severity = rule["severity"] msg_tpl = rule["message_template"] breached = ( (direction == "above" and value > threshold) or (direction == "below" and value < threshold) ) if breached: existing = await session.execute(text(""" SELECT id FROM alarms WHERE sensor_id = :sid AND severity = :sev AND state = 'active' LIMIT 1 """), {"sid": sensor_id, "sev": severity}) if not existing.fetchone(): message = msg_tpl.format(value=value, sensor_id=sensor_id) await session.execute(text(""" INSERT INTO alarms (sensor_id, site_id, room_id, rack_id, severity, message, state, triggered_at) VALUES (:sensor_id, :site_id, :room_id, :rack_id, :severity, :message, 'active', NOW()) """), { "sensor_id": sensor_id, "site_id": site_id, "room_id": room_id, "rack_id": rack_id, "severity": severity, "message": message, }) logger.info(f"Alarm raised [{severity}]: {message}") else: await session.execute(text(""" UPDATE alarms SET state = 'resolved', resolved_at = NOW() WHERE sensor_id = :sid AND severity = :sev AND state = 'active' """), {"sid": sensor_id, "sev": severity}) # ── Hard-coded fallback (used before DB seed runs) ───────────────────────────── _FALLBACK_RULES: list[dict] = [ {"sensor_type": st, "threshold_value": tv, "direction": d, "severity": s, "message_template": m} for st, tv, d, s, m in [ ("temperature", 28.0, "above", "warning", "Temperature elevated at {sensor_id}: {value:.1f}°C"), ("temperature", 32.0, "above", "critical", "Temperature critical at {sensor_id}: {value:.1f}°C"), ("humidity", 65.0, "above", "warning", "Humidity elevated at {sensor_id}: {value:.0f}%"), ("power_kw", 7.5, "above", "warning", "PDU load elevated at {sensor_id}: {value:.1f} kW"), ("power_kw", 9.5, "above", "critical", "PDU load critical at {sensor_id}: {value:.1f} kW"), ("ups_charge", 80.0, "below", "warning", "UPS battery low at {sensor_id}: {value:.0f}%"), ("ups_charge", 50.0, "below", "critical", "UPS battery critical at {sensor_id}: {value:.0f}%"), ("ups_state", 0.5, "above", "critical", "UPS switched to battery at {sensor_id} — mains power lost"), ("ups_state", 1.5, "above", "critical", "UPS overloaded at {sensor_id} — immediate risk of failure"), ("ups_load", 85.0, "above", "warning", "UPS load high at {sensor_id}: {value:.0f}%"), ("ups_load", 95.0, "above", "critical", "UPS load critical at {sensor_id}: {value:.0f}% — overload"), ("ups_runtime", 15.0, "below", "warning", "UPS runtime low at {sensor_id}: {value:.0f} min remaining"), ("ups_runtime", 5.0, "below", "critical", "UPS runtime critical at {sensor_id}: {value:.0f} min — imminent shutdown"), ("leak", 0.5, "above", "critical", "Water leak detected at {sensor_id}!"), ("cooling_cap_pct", 90.0, "above", "warning", "CRAC near capacity limit at {sensor_id}: {value:.1f}%"), ("cooling_cop", 1.5, "below", "warning", "CRAC running inefficiently at {sensor_id}: COP {value:.2f}"), ("cooling_comp_load", 95.0, "above", "warning", "CRAC compressor overloaded at {sensor_id}: {value:.1f}%"), ("cooling_high_press", 22.0, "above", "critical", "CRAC high refrigerant pressure at {sensor_id}: {value:.1f} bar"), ("cooling_low_press", 3.0, "below", "critical", "CRAC low refrigerant pressure at {sensor_id}: {value:.1f} bar — possible leak"), ("cooling_superheat", 16.0, "above", "warning", "CRAC discharge superheat high at {sensor_id}: {value:.1f}°C"), ("cooling_filter_dp", 80.0, "above", "warning", "CRAC filter requires attention at {sensor_id}: {value:.0f} Pa"), ("cooling_filter_dp", 120.0, "above", "critical", "CRAC filter critically blocked at {sensor_id}: {value:.0f} Pa — replace now"), ("cooling_return", 36.0, "above", "warning", "CRAC return air temperature high at {sensor_id}: {value:.1f}°C"), ("cooling_return", 42.0, "above", "critical", "CRAC return air temperature critical at {sensor_id}: {value:.1f}°C"), ("gen_fuel_pct", 25.0, "below", "warning", "Generator fuel low at {sensor_id}: {value:.1f}%"), ("gen_fuel_pct", 10.0, "below", "critical", "Generator fuel critical at {sensor_id}: {value:.1f}%"), ("gen_state", 0.5, "above", "warning", "Generator running at {sensor_id} — site is on standby power"), ("gen_state", -0.5, "below", "critical", "Generator fault at {sensor_id} — no standby power available"), ("gen_load_pct", 85.0, "above", "warning", "Generator load high at {sensor_id}: {value:.1f}%"), ("gen_load_pct", 95.0, "above", "critical", "Generator overloaded at {sensor_id}: {value:.1f}%"), ("gen_coolant_c", 95.0, "above", "warning", "Generator coolant temperature high at {sensor_id}: {value:.1f}°C"), ("gen_coolant_c", 105.0, "above", "critical", "Generator coolant critical at {sensor_id}: {value:.1f}°C — risk of shutdown"), ("gen_oil_press", 2.0, "below", "critical", "Generator oil pressure low at {sensor_id}: {value:.1f} bar"), ("pdu_imbalance", 5.0, "above", "warning", "PDU phase imbalance at {sensor_id}: {value:.1f}%"), ("pdu_imbalance", 15.0, "above", "critical", "PDU phase imbalance critical at {sensor_id}: {value:.1f}%"), ("ats_active", 1.5, "above", "warning", "ATS transferred to generator at {sensor_id} — utility power lost"), ("ats_ua_v", 50.0, "below", "critical", "Utility A power failure at {sensor_id} — supply lost"), ("chiller_state", 0.5, "below", "critical", "Chiller fault at {sensor_id} — CHW supply lost"), ("chiller_cop", 2.5, "below", "warning", "Chiller running inefficiently at {sensor_id}: COP {value:.2f}"), ("vesda_level", 0.5, "above", "warning", "VESDA smoke detected at {sensor_id}: level elevated"), ("vesda_level", 1.5, "above", "warning", "VESDA action threshold reached at {sensor_id}"), ("vesda_level", 2.5, "above", "critical", "VESDA FIRE ALARM at {sensor_id}!"), ("vesda_flow", 0.5, "below", "critical", "VESDA aspirator flow fault at {sensor_id} — detector may be compromised"), ("vesda_det1", 0.5, "below", "warning", "VESDA detector 1 fault at {sensor_id}"), ("vesda_det2", 0.5, "below", "warning", "VESDA detector 2 fault at {sensor_id}"), ("net_state", 0.5, "above", "warning", "Network switch degraded at {sensor_id}"), ("net_state", 1.5, "above", "critical", "Network switch down at {sensor_id} — connectivity lost"), ("net_pkt_loss_pct", 1.0, "above", "warning", "Packet loss detected at {sensor_id}: {value:.1f}%"), ("net_pkt_loss_pct", 5.0, "above", "critical", "High packet loss at {sensor_id}: {value:.1f}%"), ("net_temp_c", 65.0, "above", "warning", "Switch temperature high at {sensor_id}: {value:.1f}°C"), ("net_temp_c", 75.0, "above", "critical", "Switch temperature critical at {sensor_id}: {value:.1f}°C"), ] ]