feat(live): task 1.5.4 — broadcast consumer group and fan-out
Adds the per-instance Redis Stream consumer group (live-broadcast-{instance_id})
that reads the telemetry stream and fans out each position to subscribed
WebSocket connections without affecting the durable-write consumer path.
Key changes:
- src/shared/codec.ts: moved decodePosition/CodecError out of src/core/ so
src/live/broadcast.ts can decode positions without crossing the enforced
src/core/ ↔ src/live/ boundary; src/core/codec.ts now re-exports from there
- src/shared/types.ts: added Position and AttributeValue (same move, same reason);
src/core/types.ts re-exports both to preserve existing import paths
- src/live/broadcast.ts: createBroadcastConsumer factory — XREADGROUP loop,
immediate ACK semantics, toPositionMessage mapper, fanOut per event/topic
- src/live/device-event-map.ts: createDeviceEventMap factory — in-memory cache
of entry_devices × entries join, refreshed every LIVE_DEVICE_EVENT_REFRESH_MS
- src/db/migrations/0002_positions_faulty.sql: adds faulty boolean column and
positions_device_ts_idx for snapshot-on-subscribe query (task 1.5.5)
- src/main.ts: wired authClient, authzClient, registry, liveServer,
deviceEventMap, broadcastConsumer; shutdown chain: liveServer → deviceEventMap
+ broadcastConsumer → durable-write consumer → metricsServer → Redis → Postgres
- test/live-broadcast.test.ts: 4 unit tests covering single subscriber, multiple
subscribers, orphan device, and multi-event device fan-out
This commit is contained in:
@@ -0,0 +1,118 @@
|
||||
/**
|
||||
* In-memory cache of device → event mappings.
|
||||
*
|
||||
* The fan-out loop needs to answer "which events does this device belong to?"
|
||||
* for every position record. The naive answer — query Postgres on each record —
|
||||
* is wrong at any meaningful throughput. This module caches the full
|
||||
* `entry_devices ⨯ entries` join in memory and refreshes it on a configurable
|
||||
* cadence (default: every 30 s).
|
||||
*
|
||||
* Staleness window: up to LIVE_DEVICE_EVENT_REFRESH_MS. This is acceptable for
|
||||
* pilot — operators register devices before the event starts, and "the device
|
||||
* appeared on the map after 30 s" is a tolerable UX gap. Phase 3+ can add
|
||||
* invalidation signals if needed.
|
||||
*
|
||||
* Spec: processor-ws-contract.md §Multi-instance behaviour;
|
||||
* task 1.5.4 §DeviceEventMap design
|
||||
*/
|
||||
|
||||
import type pg from 'pg';
|
||||
import type { Logger } from 'pino';
|
||||
import type { Metrics } from '../shared/types.js';
|
||||
import type { Config } from '../config/load.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Public interface
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export type DeviceEventMap = {
|
||||
/** Returns the event IDs the device is currently registered to. */
|
||||
readonly lookup: (deviceId: string) => readonly string[];
|
||||
/** Starts the refresh timer. Immediately runs the first refresh. */
|
||||
readonly start: () => Promise<void>;
|
||||
/** Cancels the refresh timer. */
|
||||
readonly stop: () => void;
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Query result type
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
type DeviceEventRow = {
|
||||
device_id: string;
|
||||
event_id: string;
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Factory
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export function createDeviceEventMap(
|
||||
pool: pg.Pool,
|
||||
config: Config,
|
||||
logger: Logger,
|
||||
metrics: Metrics,
|
||||
): DeviceEventMap {
|
||||
// Mutable map; atomically swapped on each refresh.
|
||||
let cache = new Map<string, Set<string>>();
|
||||
let timer: ReturnType<typeof setInterval> | null = null;
|
||||
|
||||
async function refresh(): Promise<void> {
|
||||
const start = performance.now();
|
||||
try {
|
||||
const result = await pool.query<DeviceEventRow>(
|
||||
`SELECT ed.device_id, e.event_id
|
||||
FROM entry_devices ed
|
||||
JOIN entries e ON e.id = ed.entry_id`,
|
||||
);
|
||||
|
||||
const next = new Map<string, Set<string>>();
|
||||
for (const row of result.rows) {
|
||||
let eventSet = next.get(row.device_id);
|
||||
if (!eventSet) {
|
||||
eventSet = new Set<string>();
|
||||
next.set(row.device_id, eventSet);
|
||||
}
|
||||
eventSet.add(row.event_id);
|
||||
}
|
||||
|
||||
cache = next;
|
||||
|
||||
const elapsed = performance.now() - start;
|
||||
metrics.observe('processor_live_device_event_refresh_latency_ms', elapsed);
|
||||
metrics.observe('processor_live_device_event_entries', next.size);
|
||||
|
||||
logger.debug({ devices: next.size, elapsedMs: Math.round(elapsed) }, 'device-event map refreshed');
|
||||
} catch (err) {
|
||||
logger.warn({ err }, 'device-event map refresh failed; retaining stale cache');
|
||||
// Retain the stale cache — a stale map is better than an empty map
|
||||
// which would silently drop all fan-out until the next refresh.
|
||||
}
|
||||
}
|
||||
|
||||
async function start(): Promise<void> {
|
||||
await refresh();
|
||||
timer = setInterval(() => {
|
||||
refresh().catch((err: unknown) => {
|
||||
logger.warn({ err }, 'device-event map refresh interval error');
|
||||
});
|
||||
}, config.LIVE_DEVICE_EVENT_REFRESH_MS);
|
||||
// Do not hold the event loop open during shutdown.
|
||||
timer.unref();
|
||||
}
|
||||
|
||||
function stop(): void {
|
||||
if (timer !== null) {
|
||||
clearInterval(timer);
|
||||
timer = null;
|
||||
}
|
||||
}
|
||||
|
||||
function lookup(deviceId: string): readonly string[] {
|
||||
const events = cache.get(deviceId);
|
||||
if (!events || events.size === 0) return [];
|
||||
return [...events];
|
||||
}
|
||||
|
||||
return { lookup, start, stop };
|
||||
}
|
||||
Reference in New Issue
Block a user