feat(live): task 1.5.1 — WS server scaffold + heartbeat
Stand up the WebSocket live-broadcast server inside the Processor process: - src/live/server.ts: createLiveServer factory with start/stop lifecycle, per-connection LiveConnection type, sendOutbound helper with back-pressure guard, 30s frame-level heartbeat via ws ping/pong, pluggable onMessage handler (stub returns error/not-implemented until 1.5.2/1.5.3). - src/live/protocol.ts: zod schemas for inbound subscribe/unsubscribe messages, all outbound types (subscribed/unsubscribed/position/error), WsCloseCodes. - src/shared/types.ts: extracted Metrics interface so src/live/ can import it without crossing the enforced src/live/ ↔ src/core/ ESLint boundary. - src/core/types.ts: re-exports Metrics from shared/types to keep Phase 1 call sites unchanged. - src/config/load.ts: LIVE_WS_PORT, LIVE_WS_HOST, LIVE_WS_PING_INTERVAL_MS, LIVE_WS_DRAIN_TIMEOUT_MS, LIVE_WS_BACKPRESSURE_THRESHOLD_BYTES, DIRECTUS_BASE_URL, DIRECTUS_AUTH_TIMEOUT_MS, DIRECTUS_AUTHZ_TIMEOUT_MS, LIVE_BROADCAST_GROUP_PREFIX, LIVE_BROADCAST_BATCH_SIZE, LIVE_BROADCAST_BATCH_BLOCK_MS, LIVE_DEVICE_EVENT_REFRESH_MS. - src/observability/metrics.ts: Phase 1.5 metrics inventory (connections, inbound/outbound counters, auth/authz histograms, subscription gauge, broadcast counters + lag histogram, snapshot histograms, device-event map). - src/main.ts: wires the live server alongside the durable-write consumer; shutdown order: live server → consumer → metrics → Redis → Postgres. - eslint.config.js: import/no-restricted-paths zones for src/live/ ↔ src/core/. - test/live-server.test.ts: 7 unit tests covering connect, ping, protocol violation, valid message dispatch, connections gauge, and stop() drain.
This commit is contained in:
+45
-6
@@ -16,6 +16,9 @@ import { connectRedis, createConsumer } from './core/consumer.js';
|
||||
import type { ConsumedRecord } from './core/consumer.js';
|
||||
import { createDeviceStateStore } from './core/state.js';
|
||||
import { createWriter } from './core/writer.js';
|
||||
import { createLiveServer, sendOutbound } from './live/server.js';
|
||||
import type { LiveServer, LiveConnection } from './live/server.js';
|
||||
import type { InboundMessage } from './live/protocol.js';
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Startup: validate config (fail fast on bad env), build logger
|
||||
@@ -128,17 +131,41 @@ async function main(): Promise<void> {
|
||||
return ackIds;
|
||||
};
|
||||
|
||||
// 10. Build and start the consumer
|
||||
// 10. Build the live WebSocket server (task 1.5.1).
|
||||
// The stub message handler replies with `error/not-implemented` until
|
||||
// tasks 1.5.2 and 1.5.3 wire in the real auth + registry handler.
|
||||
const stubMessageHandler = async (
|
||||
conn: LiveConnection,
|
||||
_message: InboundMessage,
|
||||
): Promise<void> => {
|
||||
sendOutbound(
|
||||
conn,
|
||||
{ type: 'error', code: 'not-implemented' },
|
||||
metrics,
|
||||
config.LIVE_WS_BACKPRESSURE_THRESHOLD_BYTES,
|
||||
);
|
||||
};
|
||||
|
||||
const liveServer: LiveServer = createLiveServer(
|
||||
config,
|
||||
logger,
|
||||
metrics,
|
||||
stubMessageHandler,
|
||||
);
|
||||
await liveServer.start();
|
||||
|
||||
// 11. Build and start the durable-write consumer
|
||||
const consumer = createConsumer(redis, config, logger, metrics, sink);
|
||||
await consumer.start();
|
||||
|
||||
// 11. Install graceful shutdown.
|
||||
// Full Phase 3 hardening: explicit consumer-group commit on SIGTERM,
|
||||
// uncaught-exception handler, multi-instance drain mode.
|
||||
// 12. Install graceful shutdown.
|
||||
// Shutdown order: live server first (no new connections), then
|
||||
// broadcast consumer (task 1.5.4 adds this), then durable-write consumer.
|
||||
installGracefulShutdown({
|
||||
redis,
|
||||
pool,
|
||||
consumer,
|
||||
liveServer,
|
||||
metricsServer,
|
||||
pgHealth,
|
||||
lagSampler,
|
||||
@@ -151,6 +178,7 @@ async function main(): Promise<void> {
|
||||
group: config.REDIS_CONSUMER_GROUP,
|
||||
consumer: config.REDIS_CONSUMER_NAME,
|
||||
metricsPort: config.METRICS_PORT,
|
||||
wsPort: config.LIVE_WS_PORT,
|
||||
},
|
||||
'processor ready',
|
||||
);
|
||||
@@ -164,6 +192,7 @@ type ShutdownDeps = {
|
||||
readonly redis: Redis;
|
||||
readonly pool: pg.Pool;
|
||||
readonly consumer: { stop: () => Promise<void> };
|
||||
readonly liveServer: LiveServer;
|
||||
readonly metricsServer: http.Server;
|
||||
readonly pgHealth: { stop: () => void };
|
||||
readonly lagSampler: { stop: () => void };
|
||||
@@ -171,7 +200,7 @@ type ShutdownDeps = {
|
||||
};
|
||||
|
||||
function installGracefulShutdown(deps: ShutdownDeps): void {
|
||||
const { redis, pool, consumer, metricsServer, pgHealth, lagSampler, logger: log } = deps;
|
||||
const { redis, pool, consumer, liveServer, metricsServer, pgHealth, lagSampler, logger: log } = deps;
|
||||
|
||||
let shuttingDown = false;
|
||||
|
||||
@@ -187,8 +216,18 @@ function installGracefulShutdown(deps: ShutdownDeps): void {
|
||||
lagSampler.stop();
|
||||
pgHealth.stop();
|
||||
|
||||
consumer
|
||||
// Shutdown order:
|
||||
// 1. Live server — stop accepting new connections and drain existing ones
|
||||
// first, so clients know the server is going away before the consumer
|
||||
// stops processing.
|
||||
// 2. Durable-write consumer — lets the in-flight batch finish.
|
||||
// 3. Metrics server, Redis, Postgres.
|
||||
liveServer
|
||||
.stop()
|
||||
.then(() => {
|
||||
log.info('live server stopped');
|
||||
return consumer.stop();
|
||||
})
|
||||
.then(() => {
|
||||
log.info('consumer stopped');
|
||||
return new Promise<void>((resolve, reject) =>
|
||||
|
||||
Reference in New Issue
Block a user