Implement Phase 1 tasks 1.5-1.8 (consumer + state + writer + main wiring)

src/core/consumer.ts — XREADGROUP loop with consumer-group resumption,
ensureConsumerGroup (BUSYGROUP-tolerant), decodeBatch (CodecError → log
+ skip + leave pending; never speculative ACK), partial-ACK semantics,
connectRedis (mirroring tcp-ingestion's retry pattern), clean stop.

src/core/state.ts — LRU Map<device_id, DeviceState> using delete+set
bump trick (no third-party LRU dep); last_seen = max(prev, ts) so
out-of-order replays don't regress the high-water mark; evictedTotal()
counter.

src/core/writer.ts — multi-row INSERT ON CONFLICT (device_id, ts) DO
NOTHING with RETURNING. Duplicate detection by set-difference between
input and RETURNING rows (xmax=0 doesn't work for skipped-conflict
rows, only returned ones — confirmed in the task spec's own Note).
Sequential chunking to WRITE_BATCH_SIZE; bigint→string and Buffer→base64
attribute serialization that handles Buffer.toJSON shape.

src/main.ts — full pipeline: pool → migrate → redis → state → writer →
sink → consumer → graceful-shutdown stub. Sink ordering is
state.update BEFORE writer.write per spec rationale (state stays
consistent with what's been seen even if not yet persisted; redelivery
is idempotent on state). Metrics is still the trace-logging shim from
tcp-ingestion's pre-1.10 pattern; real prom-client lands in task 1.9.

Verification: typecheck, lint clean; 112 unit tests passing across 7
test files (+39 from this batch).
This commit is contained in:
2026-04-30 21:47:43 +02:00
parent 6a14eb1d01
commit 2a50aaf175
12 changed files with 2218 additions and 15 deletions
+156 -2
View File
@@ -1,6 +1,15 @@
import type { Redis } from 'ioredis';
import type pg from 'pg';
import { loadConfig } from './config/load.js';
import type { Config } from './config/load.js';
import { createLogger } from './observability/logger.js';
import { createPool, connectWithRetry } from './db/pool.js';
import { runMigrations } from './db/migrate.js';
import { connectRedis, createConsumer } from './core/consumer.js';
import type { ConsumedRecord } from './core/consumer.js';
import { createDeviceStateStore } from './core/state.js';
import { createWriter } from './core/writer.js';
import type { Metrics } from './core/types.js';
// -------------------------------------------------------------------------
// Startup: validate config (fail fast on bad env), build logger
@@ -24,5 +33,150 @@ const logger = createLogger({
logger.info('processor starting');
// Consumer, writer, and state wiring land in tasks 1.51.8.
process.exit(0);
// -------------------------------------------------------------------------
// Metrics placeholder shim (task 1.9 replaces this with prom-client)
//
// Uses trace-level logging so the calls are observable in development but
// are silent in production builds where the log level is info or higher.
// This mirrors tcp-ingestion's approach before task 1.10 landed there.
// -------------------------------------------------------------------------
const metrics: Metrics = {
inc: (name: string, labels?: Record<string, string>) => {
logger.trace({ metric: name, labels }, 'metrics.inc');
},
observe: (name: string, value: number, labels?: Record<string, string>) => {
logger.trace({ metric: name, value, labels }, 'metrics.observe');
},
};
// -------------------------------------------------------------------------
// Wire up the pipeline
// -------------------------------------------------------------------------
async function main(): Promise<void> {
// 1. Connect Postgres with exponential-backoff retry
const pool = createPool(config.POSTGRES_URL);
await connectWithRetry(pool, logger);
// 2. Run migrations before any consumer activity.
// Phase 1 limitation: multiple instances starting simultaneously both try
// to migrate. Postgres advisory locks would solve this — deferred to Phase 3
// (production hardening), which is acceptable for the Phase 1 single-instance
// pilot.
await runMigrations(pool, logger);
logger.info('migrations applied');
// 3. Connect Redis with exponential-backoff retry
const redis: Redis = await connectRedis(config.REDIS_URL, logger);
// 4. Build pipeline components
const state = createDeviceStateStore(config, logger);
const writer = createWriter(pool, config, logger, metrics);
// 5. Define the sink: central decision point for state update and Postgres write.
// State is updated BEFORE the write so that in-memory state is consistent with
// what has been seen, even if the Postgres write subsequently fails. If the write
// fails the record stays pending (not ACKed) and will be re-delivered — applying
// the same position twice to state is idempotent for last_position and last_seen;
// only position_count_session is double-counted, which is a session counter that
// resets on restart and is not a correctness concern.
const sink = async (records: ConsumedRecord[]): Promise<string[]> => {
// 5a. Update in-memory state for every record (cheap, synchronous-like, cannot
// fail meaningfully — Map operations do not throw).
for (const record of records) {
state.update(record.position);
}
// 5b. Write to Postgres
const results = await writer.write(records);
// 5c. ACK only the IDs that succeeded or were already present.
// 'failed' records are deliberately left pending for retry.
return results
.filter((r) => r.status === 'inserted' || r.status === 'duplicate')
.map((r) => r.id);
};
// 6. Build and start the consumer
const consumer = createConsumer(redis, config, logger, metrics, sink);
await consumer.start();
// 7. Install graceful shutdown stub.
// Full Phase 3 hardening: explicit consumer-group commit on SIGTERM,
// uncaught-exception handler, multi-instance drain mode.
installGracefulShutdown({ redis, pool, consumer, logger });
logger.info(
{
stream: config.REDIS_TELEMETRY_STREAM,
group: config.REDIS_CONSUMER_GROUP,
consumer: config.REDIS_CONSUMER_NAME,
},
'processor ready',
);
}
// -------------------------------------------------------------------------
// Graceful shutdown stub — Phase 3 finalizes this
// -------------------------------------------------------------------------
type ShutdownDeps = {
readonly redis: Redis;
readonly pool: pg.Pool;
readonly consumer: { stop: () => Promise<void> };
readonly logger: ReturnType<typeof createLogger>;
};
function installGracefulShutdown(deps: ShutdownDeps): void {
const { redis, pool, consumer, logger: log } = deps;
let shuttingDown = false;
function shutdown(signal: string): void {
if (shuttingDown) return;
shuttingDown = true;
log.info({ signal }, 'shutdown signal received');
// Stop consumer loop — exits after the current batch finishes.
consumer
.stop()
.then(() => {
log.info('consumer stopped');
return redis.quit();
})
.then(() => {
log.info('Redis disconnected');
return pool.end();
})
.then(() => {
log.info('graceful shutdown complete');
process.exit(0);
})
.catch((err: unknown) => {
log.error({ err }, 'error during shutdown');
process.exit(1);
});
// Force exit after 15s if the graceful path stalls (e.g. a hung Postgres write).
setTimeout(() => {
log.warn('forced exit after shutdown timeout');
process.exit(1);
}, 15_000).unref();
}
process.on('SIGTERM', () => shutdown('SIGTERM'));
process.on('SIGINT', () => shutdown('SIGINT'));
}
// -------------------------------------------------------------------------
// Entry point
// -------------------------------------------------------------------------
main().catch((err: unknown) => {
process.stderr.write(
`Fatal startup error: ${err instanceof Error ? err.message : String(err)}\n`,
);
process.exit(1);
});