Files
processor/src/live/broadcast.ts
T
julian fbb1f34e9a feat(live): task 1.5.4 — broadcast consumer group and fan-out
Adds the per-instance Redis Stream consumer group (live-broadcast-{instance_id})
that reads the telemetry stream and fans out each position to subscribed
WebSocket connections without affecting the durable-write consumer path.

Key changes:
- src/shared/codec.ts: moved decodePosition/CodecError out of src/core/ so
  src/live/broadcast.ts can decode positions without crossing the enforced
  src/core/ ↔ src/live/ boundary; src/core/codec.ts now re-exports from there
- src/shared/types.ts: added Position and AttributeValue (same move, same reason);
  src/core/types.ts re-exports both to preserve existing import paths
- src/live/broadcast.ts: createBroadcastConsumer factory — XREADGROUP loop,
  immediate ACK semantics, toPositionMessage mapper, fanOut per event/topic
- src/live/device-event-map.ts: createDeviceEventMap factory — in-memory cache
  of entry_devices × entries join, refreshed every LIVE_DEVICE_EVENT_REFRESH_MS
- src/db/migrations/0002_positions_faulty.sql: adds faulty boolean column and
  positions_device_ts_idx for snapshot-on-subscribe query (task 1.5.5)
- src/main.ts: wired authClient, authzClient, registry, liveServer,
  deviceEventMap, broadcastConsumer; shutdown chain: liveServer → deviceEventMap
  + broadcastConsumer → durable-write consumer → metricsServer → Redis → Postgres
- test/live-broadcast.test.ts: 4 unit tests covering single subscriber, multiple
  subscribers, orphan device, and multi-event device fan-out
2026-05-02 18:36:52 +02:00

298 lines
10 KiB
TypeScript

/**
* Broadcast consumer group — per-instance Redis Stream reader for live fan-out.
*
* Reads the same `telemetry:teltonika` stream as the durable-write consumer
* (task 1.5) but on a SEPARATE per-instance consumer group:
* `live-broadcast-{instance_id}`
*
* This means every Processor instance sees every record for its own connected
* clients. The durable-write group splits records across instances for exactly-
* once Postgres writes; the broadcast group replicates records to every
* instance for fan-out. The two groups operate independently with separate
* offsets; a slow Postgres write does not slow down broadcast.
*
* ACK semantics: ACK immediately on consume (no durability required for
* broadcast — missing a position is fine; only the latest position matters).
*
* Back-pressure: sendOutbound closes slow connections at BACKPRESSURE_THRESHOLD
* (already handled in server.ts).
*
* Spec: processor-ws-contract.md §Multi-instance behaviour;
* task 1.5.4 §Broadcast consumer group
*/
import type { Redis } from 'ioredis';
import type { Logger } from 'pino';
import type { Config } from '../config/load.js';
import type { Metrics, Position } from '../shared/types.js';
import type { SubscriptionRegistry } from './registry.js';
import type { DeviceEventMap } from './device-event-map.js';
import type { PositionMessage } from './protocol.js';
import { sendOutbound } from './server.js';
import type { LiveConnection } from './server.js';
import { decodePosition, CodecError } from '../shared/codec.js';
// ---------------------------------------------------------------------------
// Public interface
// ---------------------------------------------------------------------------
export type BroadcastConsumer = {
readonly start: () => Promise<void>;
readonly stop: () => Promise<void>;
};
// ---------------------------------------------------------------------------
// Wire-format mapper
// ---------------------------------------------------------------------------
/**
* Maps a decoded Position to a PositionMessage (minus `topic`).
* Omits fields rather than sending `null` for absent / zero values.
*
* Field mapping:
* - Position.device_id → deviceId (IMEI string; not a UUID in Phase 1)
* - Position.latitude → lat
* - Position.longitude → lon
* - Position.timestamp → ts (epoch ms)
* - Position.speed → speed (omitted if 0 — may indicate invalid GPS fix)
* - Position.angle → course (omitted if 0)
* - No accuracy field in Phase 1's Position type.
*
* Note: The WS contract spec says deviceId should be `devices.id` (UUID), but
* Phase 1's positions table stores the raw IMEI as device_id. The SPA will
* need to join on the IMEI until Phase 2 introduces UUID-based device tracking.
* This is documented as an open deviation.
*/
function toPositionMessage(
position: Position,
): Omit<PositionMessage, 'topic'> {
const msg: Omit<PositionMessage, 'topic'> = {
type: 'position',
deviceId: position.device_id,
lat: position.latitude,
lon: position.longitude,
ts: position.timestamp.getTime(),
};
// Omit speed when 0 — per Teltonika convention, 0 may indicate invalid GPS.
if (position.speed > 0) {
(msg as Record<string, unknown>)['speed'] = position.speed;
}
// Omit angle/course when 0.
if (position.angle > 0) {
(msg as Record<string, unknown>)['course'] = position.angle;
}
return msg;
}
// ---------------------------------------------------------------------------
// Raw stream entry shape (ioredis XREADGROUP return type)
// ---------------------------------------------------------------------------
type RawStreamEntry = [id: string, fields: string[]];
// ---------------------------------------------------------------------------
// Factory
// ---------------------------------------------------------------------------
export function createBroadcastConsumer(
redis: Redis,
registry: SubscriptionRegistry,
deviceToEvent: DeviceEventMap,
config: Config,
logger: Logger,
metrics: Metrics,
): BroadcastConsumer {
const stream = config.REDIS_TELEMETRY_STREAM;
const groupName = `${config.LIVE_BROADCAST_GROUP_PREFIX}-${config.INSTANCE_ID}`;
const consumerName = config.INSTANCE_ID;
const batchSize = config.LIVE_BROADCAST_BATCH_SIZE;
const batchBlockMs = config.LIVE_BROADCAST_BATCH_BLOCK_MS;
let stopping = false;
let loopPromise: Promise<void> = Promise.resolve();
// -------------------------------------------------------------------------
// Consumer group setup
// -------------------------------------------------------------------------
async function ensureGroup(): Promise<void> {
try {
await redis.xgroup('CREATE', stream, groupName, '$', 'MKSTREAM');
logger.info({ stream, group: groupName }, 'broadcast consumer group created');
} catch (err: unknown) {
if (err instanceof Error && err.message.startsWith('BUSYGROUP')) {
logger.info({ stream, group: groupName }, 'broadcast consumer group already exists');
return;
}
throw err;
}
}
// -------------------------------------------------------------------------
// Fan-out
// -------------------------------------------------------------------------
function fanOut(
entryId: string,
position: Position,
): void {
const eventIds = deviceToEvent.lookup(position.device_id);
if (eventIds.length === 0) {
metrics.inc('processor_live_broadcast_orphan_records_total', {
instance_id: config.INSTANCE_ID,
});
return;
}
const baseMsg = toPositionMessage(position);
for (const eventId of eventIds) {
const topic = `event:${eventId}`;
const conns = registry.connectionsForTopic(topic);
for (const conn of conns as Iterable<LiveConnection>) {
sendOutbound(
conn,
{ ...baseMsg, topic },
metrics,
config.LIVE_WS_BACKPRESSURE_THRESHOLD_BYTES,
);
metrics.inc('processor_live_broadcast_fanout_messages_total', {
instance_id: config.INSTANCE_ID,
});
}
}
// Broadcast lag: time from GPS fix to fan-out send.
const lagMs = Date.now() - position.timestamp.getTime();
if (lagMs >= 0) {
metrics.observe('processor_live_broadcast_lag_ms', lagMs);
}
logger.debug({ entryId, device: position.device_id, events: eventIds.length }, 'fanned out');
}
// -------------------------------------------------------------------------
// Batch decoder (mirrors core/consumer.ts decodeBatch pattern)
// -------------------------------------------------------------------------
function decodeBatch(entries: RawStreamEntry[]): Array<{
id: string;
position: Position;
}> {
const decoded: Array<{ id: string; position: Position }> = [];
for (const [id, fields] of entries) {
const fieldMap: Record<string, string> = {};
for (let i = 0; i + 1 < fields.length; i += 2) {
const key = fields[i];
const val = fields[i + 1];
if (key !== undefined && val !== undefined) {
fieldMap[key] = val;
}
}
const payload = fieldMap['payload'];
if (payload === undefined) {
logger.warn({ id, stream }, 'broadcast entry missing payload; skipping');
continue;
}
try {
const position = decodePosition(payload);
decoded.push({ id, position });
} catch (err) {
if (err instanceof CodecError) {
logger.warn({ id, err }, 'broadcast decode error; skipping record');
} else {
logger.error({ id, err }, 'unexpected broadcast decode error');
}
// Do not ACK — leave in PEL (though broadcast doesn't retry, this is
// consistent with the "never silently skip" principle for decode errors).
}
}
return decoded;
}
// -------------------------------------------------------------------------
// Read loop
// -------------------------------------------------------------------------
async function runLoop(): Promise<void> {
logger.info({ stream, group: groupName, consumer: consumerName }, 'broadcast consumer started');
while (!stopping) {
let rawResult: [string, [string, string[]][]][] | null;
try {
rawResult = (await redis.xreadgroup(
'GROUP',
groupName,
consumerName,
'COUNT',
String(batchSize),
'BLOCK',
String(batchBlockMs),
'STREAMS',
stream,
'>',
)) as [string, [string, string[]][]][] | null;
} catch (err) {
if (stopping) break;
logger.error({ err }, 'broadcast XREADGROUP failed; backing off 1s');
await new Promise<void>((resolve) => setTimeout(resolve, 1_000));
continue;
}
if (rawResult === null) continue; // BLOCK timeout — check stopping flag
const streamEntries = rawResult[0]?.[1] ?? [];
if (streamEntries.length === 0) continue;
metrics.inc('processor_live_broadcast_records_total', {
instance_id: config.INSTANCE_ID,
}, streamEntries.length);
const decoded = decodeBatch(streamEntries);
// ACK all entries immediately — broadcast has no durability requirement.
const allIds = streamEntries.map(([id]) => id);
if (allIds.length > 0) {
await redis.xack(stream, groupName, ...allIds);
}
// Fan out decoded records to subscribed clients.
for (const { id, position } of decoded) {
fanOut(id, position);
}
}
logger.info({ stream, group: groupName }, 'broadcast consumer loop exited');
}
// -------------------------------------------------------------------------
// Lifecycle
// -------------------------------------------------------------------------
async function start(): Promise<void> {
await ensureGroup();
loopPromise = runLoop();
loopPromise.catch((err: unknown) => {
logger.fatal({ err }, 'broadcast consumer loop crashed; exiting');
process.exit(1);
});
}
async function stop(): Promise<void> {
stopping = true;
await loopPromise;
}
return { start, stop };
}