feat(live): task 1.5.4 — broadcast consumer group and fan-out

Adds the per-instance Redis Stream consumer group (live-broadcast-{instance_id})
that reads the telemetry stream and fans out each position to subscribed
WebSocket connections without affecting the durable-write consumer path.

Key changes:
- src/shared/codec.ts: moved decodePosition/CodecError out of src/core/ so
  src/live/broadcast.ts can decode positions without crossing the enforced
  src/core/ ↔ src/live/ boundary; src/core/codec.ts now re-exports from there
- src/shared/types.ts: added Position and AttributeValue (same move, same reason);
  src/core/types.ts re-exports both to preserve existing import paths
- src/live/broadcast.ts: createBroadcastConsumer factory — XREADGROUP loop,
  immediate ACK semantics, toPositionMessage mapper, fanOut per event/topic
- src/live/device-event-map.ts: createDeviceEventMap factory — in-memory cache
  of entry_devices × entries join, refreshed every LIVE_DEVICE_EVENT_REFRESH_MS
- src/db/migrations/0002_positions_faulty.sql: adds faulty boolean column and
  positions_device_ts_idx for snapshot-on-subscribe query (task 1.5.5)
- src/main.ts: wired authClient, authzClient, registry, liveServer,
  deviceEventMap, broadcastConsumer; shutdown chain: liveServer → deviceEventMap
  + broadcastConsumer → durable-write consumer → metricsServer → Redis → Postgres
- test/live-broadcast.test.ts: 4 unit tests covering single subscriber, multiple
  subscribers, orphan device, and multi-event device fan-out
This commit is contained in:
2026-05-02 17:52:33 +02:00
parent 90605614f6
commit fbb1f34e9a
9 changed files with 1130 additions and 258 deletions
+118
View File
@@ -0,0 +1,118 @@
/**
* In-memory cache of device → event mappings.
*
* The fan-out loop needs to answer "which events does this device belong to?"
* for every position record. The naive answer — query Postgres on each record —
* is wrong at any meaningful throughput. This module caches the full
* `entry_devices entries` join in memory and refreshes it on a configurable
* cadence (default: every 30 s).
*
* Staleness window: up to LIVE_DEVICE_EVENT_REFRESH_MS. This is acceptable for
* pilot — operators register devices before the event starts, and "the device
* appeared on the map after 30 s" is a tolerable UX gap. Phase 3+ can add
* invalidation signals if needed.
*
* Spec: processor-ws-contract.md §Multi-instance behaviour;
* task 1.5.4 §DeviceEventMap design
*/
import type pg from 'pg';
import type { Logger } from 'pino';
import type { Metrics } from '../shared/types.js';
import type { Config } from '../config/load.js';
// ---------------------------------------------------------------------------
// Public interface
// ---------------------------------------------------------------------------
export type DeviceEventMap = {
/** Returns the event IDs the device is currently registered to. */
readonly lookup: (deviceId: string) => readonly string[];
/** Starts the refresh timer. Immediately runs the first refresh. */
readonly start: () => Promise<void>;
/** Cancels the refresh timer. */
readonly stop: () => void;
};
// ---------------------------------------------------------------------------
// Query result type
// ---------------------------------------------------------------------------
type DeviceEventRow = {
device_id: string;
event_id: string;
};
// ---------------------------------------------------------------------------
// Factory
// ---------------------------------------------------------------------------
export function createDeviceEventMap(
pool: pg.Pool,
config: Config,
logger: Logger,
metrics: Metrics,
): DeviceEventMap {
// Mutable map; atomically swapped on each refresh.
let cache = new Map<string, Set<string>>();
let timer: ReturnType<typeof setInterval> | null = null;
async function refresh(): Promise<void> {
const start = performance.now();
try {
const result = await pool.query<DeviceEventRow>(
`SELECT ed.device_id, e.event_id
FROM entry_devices ed
JOIN entries e ON e.id = ed.entry_id`,
);
const next = new Map<string, Set<string>>();
for (const row of result.rows) {
let eventSet = next.get(row.device_id);
if (!eventSet) {
eventSet = new Set<string>();
next.set(row.device_id, eventSet);
}
eventSet.add(row.event_id);
}
cache = next;
const elapsed = performance.now() - start;
metrics.observe('processor_live_device_event_refresh_latency_ms', elapsed);
metrics.observe('processor_live_device_event_entries', next.size);
logger.debug({ devices: next.size, elapsedMs: Math.round(elapsed) }, 'device-event map refreshed');
} catch (err) {
logger.warn({ err }, 'device-event map refresh failed; retaining stale cache');
// Retain the stale cache — a stale map is better than an empty map
// which would silently drop all fan-out until the next refresh.
}
}
async function start(): Promise<void> {
await refresh();
timer = setInterval(() => {
refresh().catch((err: unknown) => {
logger.warn({ err }, 'device-event map refresh interval error');
});
}, config.LIVE_DEVICE_EVENT_REFRESH_MS);
// Do not hold the event loop open during shutdown.
timer.unref();
}
function stop(): void {
if (timer !== null) {
clearInterval(timer);
timer = null;
}
}
function lookup(deviceId: string): readonly string[] {
const events = cache.get(deviceId);
if (!events || events.size === 0) return [];
return [...events];
}
return { lookup, start, stop };
}