fbb1f34e9a
Adds the per-instance Redis Stream consumer group (live-broadcast-{instance_id})
that reads the telemetry stream and fans out each position to subscribed
WebSocket connections without affecting the durable-write consumer path.
Key changes:
- src/shared/codec.ts: moved decodePosition/CodecError out of src/core/ so
src/live/broadcast.ts can decode positions without crossing the enforced
src/core/ ↔ src/live/ boundary; src/core/codec.ts now re-exports from there
- src/shared/types.ts: added Position and AttributeValue (same move, same reason);
src/core/types.ts re-exports both to preserve existing import paths
- src/live/broadcast.ts: createBroadcastConsumer factory — XREADGROUP loop,
immediate ACK semantics, toPositionMessage mapper, fanOut per event/topic
- src/live/device-event-map.ts: createDeviceEventMap factory — in-memory cache
of entry_devices × entries join, refreshed every LIVE_DEVICE_EVENT_REFRESH_MS
- src/db/migrations/0002_positions_faulty.sql: adds faulty boolean column and
positions_device_ts_idx for snapshot-on-subscribe query (task 1.5.5)
- src/main.ts: wired authClient, authzClient, registry, liveServer,
deviceEventMap, broadcastConsumer; shutdown chain: liveServer → deviceEventMap
+ broadcastConsumer → durable-write consumer → metricsServer → Redis → Postgres
- test/live-broadcast.test.ts: 4 unit tests covering single subscriber, multiple
subscribers, orphan device, and multi-event device fan-out
119 lines
3.9 KiB
TypeScript
119 lines
3.9 KiB
TypeScript
/**
|
||
* In-memory cache of device → event mappings.
|
||
*
|
||
* The fan-out loop needs to answer "which events does this device belong to?"
|
||
* for every position record. The naive answer — query Postgres on each record —
|
||
* is wrong at any meaningful throughput. This module caches the full
|
||
* `entry_devices ⨯ entries` join in memory and refreshes it on a configurable
|
||
* cadence (default: every 30 s).
|
||
*
|
||
* Staleness window: up to LIVE_DEVICE_EVENT_REFRESH_MS. This is acceptable for
|
||
* pilot — operators register devices before the event starts, and "the device
|
||
* appeared on the map after 30 s" is a tolerable UX gap. Phase 3+ can add
|
||
* invalidation signals if needed.
|
||
*
|
||
* Spec: processor-ws-contract.md §Multi-instance behaviour;
|
||
* task 1.5.4 §DeviceEventMap design
|
||
*/
|
||
|
||
import type pg from 'pg';
|
||
import type { Logger } from 'pino';
|
||
import type { Metrics } from '../shared/types.js';
|
||
import type { Config } from '../config/load.js';
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Public interface
|
||
// ---------------------------------------------------------------------------
|
||
|
||
export type DeviceEventMap = {
|
||
/** Returns the event IDs the device is currently registered to. */
|
||
readonly lookup: (deviceId: string) => readonly string[];
|
||
/** Starts the refresh timer. Immediately runs the first refresh. */
|
||
readonly start: () => Promise<void>;
|
||
/** Cancels the refresh timer. */
|
||
readonly stop: () => void;
|
||
};
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Query result type
|
||
// ---------------------------------------------------------------------------
|
||
|
||
type DeviceEventRow = {
|
||
device_id: string;
|
||
event_id: string;
|
||
};
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Factory
|
||
// ---------------------------------------------------------------------------
|
||
|
||
export function createDeviceEventMap(
|
||
pool: pg.Pool,
|
||
config: Config,
|
||
logger: Logger,
|
||
metrics: Metrics,
|
||
): DeviceEventMap {
|
||
// Mutable map; atomically swapped on each refresh.
|
||
let cache = new Map<string, Set<string>>();
|
||
let timer: ReturnType<typeof setInterval> | null = null;
|
||
|
||
async function refresh(): Promise<void> {
|
||
const start = performance.now();
|
||
try {
|
||
const result = await pool.query<DeviceEventRow>(
|
||
`SELECT ed.device_id, e.event_id
|
||
FROM entry_devices ed
|
||
JOIN entries e ON e.id = ed.entry_id`,
|
||
);
|
||
|
||
const next = new Map<string, Set<string>>();
|
||
for (const row of result.rows) {
|
||
let eventSet = next.get(row.device_id);
|
||
if (!eventSet) {
|
||
eventSet = new Set<string>();
|
||
next.set(row.device_id, eventSet);
|
||
}
|
||
eventSet.add(row.event_id);
|
||
}
|
||
|
||
cache = next;
|
||
|
||
const elapsed = performance.now() - start;
|
||
metrics.observe('processor_live_device_event_refresh_latency_ms', elapsed);
|
||
metrics.observe('processor_live_device_event_entries', next.size);
|
||
|
||
logger.debug({ devices: next.size, elapsedMs: Math.round(elapsed) }, 'device-event map refreshed');
|
||
} catch (err) {
|
||
logger.warn({ err }, 'device-event map refresh failed; retaining stale cache');
|
||
// Retain the stale cache — a stale map is better than an empty map
|
||
// which would silently drop all fan-out until the next refresh.
|
||
}
|
||
}
|
||
|
||
async function start(): Promise<void> {
|
||
await refresh();
|
||
timer = setInterval(() => {
|
||
refresh().catch((err: unknown) => {
|
||
logger.warn({ err }, 'device-event map refresh interval error');
|
||
});
|
||
}, config.LIVE_DEVICE_EVENT_REFRESH_MS);
|
||
// Do not hold the event loop open during shutdown.
|
||
timer.unref();
|
||
}
|
||
|
||
function stop(): void {
|
||
if (timer !== null) {
|
||
clearInterval(timer);
|
||
timer = null;
|
||
}
|
||
}
|
||
|
||
function lookup(deviceId: string): readonly string[] {
|
||
const events = cache.get(deviceId);
|
||
if (!events || events.size === 0) return [];
|
||
return [...events];
|
||
}
|
||
|
||
return { lookup, start, stop };
|
||
}
|