Implement Phase 1 tasks 1.9-1.11 (observability + integration test + Dockerfile/CI)

src/observability/metrics.ts — full prom-client implementation. All 10 Phase 1 metrics registered (processor_consumer_reads_total, _records_total, _lag, _decode_errors_total, processor_position_writes_total {status}, _write_duration_seconds, processor_acks_total, processor_device_state_{size,evictions_total}) plus nodejs_* defaults. node:http server with /metrics, /healthz, /readyz. /readyz checks redis.status === 'ready' AND a 5s-cached SELECT 1 Postgres probe. processor_consumer_lag sampled every 10s via XINFO GROUPS, falling back to a no-op when the consumer group hasn't been created yet. src/main.ts — replaces the trace-logging shim with createMetrics() and startMetricsServer(); shutdown closes the metrics server before redis.quit() and pool.end(). test/metrics.test.ts — 22 unit tests: exposition format, every metric type behaviour, all four HTTP endpoint paths including /readyz 503 cases. test/pipeline.integration.test.ts — testcontainers Redis 7 + TimescaleDB latest-pg16. Four scenarios: happy path with bigint+Buffer attribute round-trip, idempotency on (device_id, ts), malformed payload stays in PEL (decode_errors_total increments), writer failure → retry (weaker variant per spec: stop Postgres before publish, restart, verify row appears). Skip-on-no-Docker pattern verified — exits 0 without Docker. Dockerfile — multi-stage matching tcp-ingestion. EXPOSE 9090 only, HEALTHCHECK on /readyz, image-source label points at processor repo. .gitea/workflows/build.yml — single-job workflow mirroring tcp-ingestion. Path filters cover src/, test/, build config, Dockerfile. Portainer webhook step uncommented for :main auto-deploy. compose.dev.yaml — local-build variant with Redis + TimescaleDB + processor-dev for verifying Dockerfile changes without the registry round-trip. README.md — fleshed out from stub: quick-start, Docker build, deployment note, env vars, tests (unit vs. integration), CI behavior. Flags the deploy-side change needed: deploy/compose.yaml needs a TimescaleDB service and a processor service entry added. Verification: typecheck, lint clean; 134 unit tests passing across 8 files (+22 from this batch). pnpm test:integration runs cleanly under the no-Docker skip pattern. Phase 1 is now complete. Service is pilot-ready.
2026-04-30 22:00:09 +02:00
parent 4686a9c391
commit be48da9baa
12 changed files with 1521 additions and 45 deletions
@@ -1,15 +1,21 @@
+import type * as http from 'node:http';
 import type { Redis } from 'ioredis';
 import type pg from 'pg';
 import { loadConfig } from './config/load.js';
 import type { Config } from './config/load.js';
 import { createLogger } from './observability/logger.js';
+import {
+  createMetrics,
+  startMetricsServer,
+  createPostgresHealthCheck,
+  createConsumerLagSampler,
+} from './observability/metrics.js';
 import { createPool, connectWithRetry } from './db/pool.js';
 import { runMigrations } from './db/migrate.js';
 import { connectRedis, createConsumer } from './core/consumer.js';
 import type { ConsumedRecord } from './core/consumer.js';
 import { createDeviceStateStore } from './core/state.js';
 import { createWriter } from './core/writer.js';
-import type { Metrics } from './core/types.js';

 // -------------------------------------------------------------------------
 // Startup: validate config (fail fast on bad env), build logger
@@ -33,33 +39,21 @@ const logger = createLogger({

 logger.info('processor starting');

-// -------------------------------------------------------------------------
-// Metrics placeholder shim (task 1.9 replaces this with prom-client)
-//
-// Uses trace-level logging so the calls are observable in development but
-// are silent in production builds where the log level is info or higher.
-// This mirrors tcp-ingestion's approach before task 1.10 landed there.
-// -------------------------------------------------------------------------
-
-const metrics: Metrics = {
-  inc: (name: string, labels?: Record<string, string>) => {
-    logger.trace({ metric: name, labels }, 'metrics.inc');
-  },
-  observe: (name: string, value: number, labels?: Record<string, string>) => {
-    logger.trace({ metric: name, value, labels }, 'metrics.observe');
-  },
-};
-
 // -------------------------------------------------------------------------
 // Wire up the pipeline
 // -------------------------------------------------------------------------

 async function main(): Promise<void> {
-  // 1. Connect Postgres with exponential-backoff retry
+  // 1. Build real prom-client metrics (replaces the trace-log shim from
+  //    pre-1.9 main.ts). Metrics are wired before any I/O so that counters
+  //    start at zero from the moment the process starts.
+  const metrics = createMetrics();
+
+  // 2. Connect Postgres with exponential-backoff retry
  const pool = createPool(config.POSTGRES_URL);
  await connectWithRetry(pool, logger);

-  // 2. Run migrations before any consumer activity.
+  // 3. Run migrations before any consumer activity.
  //    Phase 1 limitation: multiple instances starting simultaneously both try
  //    to migrate. Postgres advisory locks would solve this — deferred to Phase 3
  //    (production hardening), which is acceptable for the Phase 1 single-instance
@@ -67,14 +61,41 @@ async function main(): Promise<void> {
  await runMigrations(pool, logger);
  logger.info('migrations applied');

-  // 3. Connect Redis with exponential-backoff retry
+  // 4. Connect Redis with exponential-backoff retry
  const redis: Redis = await connectRedis(config.REDIS_URL, logger);

-  // 4. Build pipeline components
+  // 5. Build pipeline components
  const state = createDeviceStateStore(config, logger);
  const writer = createWriter(pool, config, logger, metrics);

-  // 5. Define the sink: central decision point for state update and Postgres write.
+  // 6. Postgres health check — background cached SELECT 1 for /readyz.
+  //    The check starts probing immediately so /readyz is accurate from the
+  //    first request after the metrics server starts listening.
+  const pgHealth = createPostgresHealthCheck(pool);
+
+  // 7. Start metrics HTTP server.
+  //    Bound before the consumer starts so /healthz responds even during the
+  //    brief window between metrics-server start and first stream read.
+  const metricsServer: http.Server = startMetricsServer(
+    config.METRICS_PORT,
+    () => metrics.serializeMetrics(),
+    {
+      isRedisReady: () => redis.status === 'ready',
+      isPostgresReady: pgHealth.isReady,
+    },
+  );
+  logger.info({ port: config.METRICS_PORT }, 'metrics server listening');
+
+  // 8. Start consumer lag sampler (background interval, every 10 s).
+  const lagSampler = createConsumerLagSampler(
+    redis,
+    config.REDIS_TELEMETRY_STREAM,
+    config.REDIS_CONSUMER_GROUP,
+    metrics,
+    (msg) => logger.debug(msg),
+  );
+
+  // 9. Define the sink: central decision point for state update and Postgres write.
  //    State is updated BEFORE the write so that in-memory state is consistent with
  //    what has been seen, even if the Postgres write subsequently fails. If the write
  //    fails the record stays pending (not ACKed) and will be re-delivered — applying
@@ -82,54 +103,75 @@ async function main(): Promise<void> {
  //    only position_count_session is double-counted, which is a session counter that
  //    resets on restart and is not a correctness concern.
  const sink = async (records: ConsumedRecord[]): Promise<string[]> => {
-    // 5a. Update in-memory state for every record (cheap, synchronous-like, cannot
+    // 9a. Update in-memory state for every record (cheap, synchronous-like, cannot
    //     fail meaningfully — Map operations do not throw).
    for (const record of records) {
      state.update(record.position);
    }

-    // 5b. Write to Postgres
+    // 9b. Emit device-state gauges (sampled per-batch; cheap).
+    metrics.observe('processor_device_state_size', state.size());
+
+    // 9c. Write to Postgres
    const results = await writer.write(records);

-    // 5c. ACK only the IDs that succeeded or were already present.
+    // 9d. ACK only the IDs that succeeded or were already present.
    //     'failed' records are deliberately left pending for retry.
-    return results
+    const ackIds = results
      .filter((r) => r.status === 'inserted' || r.status === 'duplicate')
      .map((r) => r.id);
+
+    if (ackIds.length > 0) {
+      metrics.inc('processor_acks_total');
+    }
+
+    return ackIds;
  };

-  // 6. Build and start the consumer
+  // 10. Build and start the consumer
  const consumer = createConsumer(redis, config, logger, metrics, sink);
  await consumer.start();

-  // 7. Install graceful shutdown stub.
-  //    Full Phase 3 hardening: explicit consumer-group commit on SIGTERM,
-  //    uncaught-exception handler, multi-instance drain mode.
-  installGracefulShutdown({ redis, pool, consumer, logger });
+  // 11. Install graceful shutdown.
+  //     Full Phase 3 hardening: explicit consumer-group commit on SIGTERM,
+  //     uncaught-exception handler, multi-instance drain mode.
+  installGracefulShutdown({
+    redis,
+    pool,
+    consumer,
+    metricsServer,
+    pgHealth,
+    lagSampler,
+    logger,
+  });

  logger.info(
    {
      stream: config.REDIS_TELEMETRY_STREAM,
      group: config.REDIS_CONSUMER_GROUP,
      consumer: config.REDIS_CONSUMER_NAME,
+      metricsPort: config.METRICS_PORT,
    },
    'processor ready',
  );
 }

 // -------------------------------------------------------------------------
-// Graceful shutdown stub — Phase 3 finalizes this
+// Graceful shutdown — Phase 3 finalizes this
 // -------------------------------------------------------------------------

 type ShutdownDeps = {
  readonly redis: Redis;
  readonly pool: pg.Pool;
  readonly consumer: { stop: () => Promise<void> };
+  readonly metricsServer: http.Server;
+  readonly pgHealth: { stop: () => void };
+  readonly lagSampler: { stop: () => void };
  readonly logger: ReturnType<typeof createLogger>;
 };

 function installGracefulShutdown(deps: ShutdownDeps): void {
-  const { redis, pool, consumer, logger: log } = deps;
+  const { redis, pool, consumer, metricsServer, pgHealth, lagSampler, logger: log } = deps;

  let shuttingDown = false;

@@ -139,11 +181,22 @@ function installGracefulShutdown(deps: ShutdownDeps): void {

    log.info({ signal }, 'shutdown signal received');

-    // Stop consumer loop — exits after the current batch finishes.
+    // Cancel background intervals first — they hold no resources that need
+    // draining, and stopping them early prevents spurious log noise during
+    // the shutdown sequence.
+    lagSampler.stop();
+    pgHealth.stop();
+
    consumer
      .stop()
      .then(() => {
        log.info('consumer stopped');
+        return new Promise<void>((resolve, reject) =>
+          metricsServer.close((err) => (err ? reject(err) : resolve())),
+        );
+      })
+      .then(() => {
+        log.info('metrics server closed');
        return redis.quit();
      })
      .then(() => {
@@ -0,0 +1,450 @@
+import * as http from 'node:http';
+import {
+  Registry,
+  Counter,
+  Gauge,
+  Histogram,
+  collectDefaultMetrics,
+} from 'prom-client';
+import type { Redis } from 'ioredis';
+import type pg from 'pg';
+import type { Metrics } from '../core/types.js';
+
+// ---------------------------------------------------------------------------
+// Readiness probe dependencies — injected so this module has no direct
+// dependency on Redis or Postgres clients. The caller wires the closures.
+// ---------------------------------------------------------------------------
+
+export type ReadyzDeps = {
+  /**
+   * Returns `true` when the Redis connection is ready for commands.
+   * Typically: `() => redis.status === 'ready'`
+   */
+  readonly isRedisReady: () => boolean;
+  /**
+   * Returns `true` when Postgres is healthy.
+   * Implemented as a cached `SELECT 1` (see createPostgresHealthCheck).
+   */
+  readonly isPostgresReady: () => boolean;
+};
+
+// ---------------------------------------------------------------------------
+// Internal metric registry type — one typed field per metric in the inventory.
+// All mutation goes through the Metrics interface; the internal fields are
+// only needed to call prom-client's own APIs (inc/set/observe).
+// ---------------------------------------------------------------------------
+
+type InternalRegistry = {
+  readonly registry: Registry;
+  readonly consumerReadsTotal: Counter;
+  readonly consumerRecordsTotal: Counter;
+  readonly consumerLag: Gauge;
+  readonly decodeErrorsTotal: Counter;
+  readonly positionWritesTotal: Counter;
+  readonly positionWriteDurationSeconds: Histogram;
+  readonly acksTotal: Counter;
+  readonly deviceStateSizeGauge: Gauge;
+  readonly deviceStateEvictionsTotal: Counter;
+};
+
+// ---------------------------------------------------------------------------
+// createMetrics — builds the full prom-client registry and returns a Metrics
+// wrapper that satisfies the existing call-site interface.
+// ---------------------------------------------------------------------------
+
+/**
+ * Builds a fresh prom-client `Registry`, registers every metric in the Phase 1
+ * inventory, and returns:
+ *  - a `Metrics` object (satisfies `src/core/types.ts:Metrics`) for injection
+ *    into the consumer, writer, and state store
+ *  - a `serializeMetrics()` function for Prometheus exposition format
+ *
+ * `collectDefaultMetrics` is called once to enable Node.js process metrics
+ * (GC, event loop lag, heap stats, etc.) under the same registry.
+ */
+export function createMetrics(): Metrics & {
+  serializeMetrics: () => Promise<string>;
+} {
+  const internal = buildInternalRegistry();
+
+  // Expose default Node.js process metrics (nodejs_*) on the same registry.
+  collectDefaultMetrics({ register: internal.registry });
+
+  const metricsImpl: Metrics & { serializeMetrics: () => Promise<string> } = {
+    inc(name: string, labels?: Record<string, string>): void {
+      dispatchInc(internal, name, labels);
+    },
+
+    observe(name: string, value: number, labels?: Record<string, string>): void {
+      dispatchObserve(internal, name, value, labels);
+    },
+
+    serializeMetrics(): Promise<string> {
+      return internal.registry.metrics();
+    },
+  };
+
+  return metricsImpl;
+}
+
+// ---------------------------------------------------------------------------
+// startMetricsServer — minimal node:http server for /metrics, /healthz, /readyz
+// ---------------------------------------------------------------------------
+
+/**
+ * Starts the Prometheus metrics HTTP server on the given port.
+ *
+ * Endpoints:
+ *  GET /metrics  — Prometheus exposition format (text/plain; version=0.0.4)
+ *  GET /healthz  — 200 if the process is alive (liveness probe)
+ *  GET /readyz   — 200 if Redis is connected AND Postgres is healthy;
+ *                  503 otherwise (readiness probe)
+ *
+ * @param port             Port to bind; 0 lets the OS pick (useful in tests).
+ * @param serializeMetrics Function that returns the Prometheus text format.
+ * @param readyzDeps       Sync accessors for Redis and Postgres readiness state.
+ */
+export function startMetricsServer(
+  port: number,
+  serializeMetrics: () => Promise<string>,
+  readyzDeps: ReadyzDeps,
+): http.Server {
+  const server = http.createServer((req, res) => {
+    const url = req.url ?? '/';
+    const method = req.method ?? 'GET';
+
+    // Reject non-GET requests for all endpoints.
+    if (method !== 'GET') {
+      res.writeHead(405, { 'Content-Type': 'text/plain' });
+      res.end('Method Not Allowed');
+      return;
+    }
+
+    if (url === '/metrics') {
+      serializeMetrics()
+        .then((text) => {
+          res.writeHead(200, { 'Content-Type': 'text/plain; version=0.0.4; charset=utf-8' });
+          res.end(text);
+        })
+        .catch((err: unknown) => {
+          res.writeHead(500, { 'Content-Type': 'text/plain' });
+          res.end(`Internal Server Error: ${err instanceof Error ? err.message : String(err)}`);
+        });
+      return;
+    }
+
+    if (url === '/healthz') {
+      res.writeHead(200, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ status: 'ok' }));
+      return;
+    }
+
+    if (url === '/readyz') {
+      const redisOk = readyzDeps.isRedisReady();
+      const postgresOk = readyzDeps.isPostgresReady();
+
+      if (redisOk && postgresOk) {
+        res.writeHead(200, { 'Content-Type': 'application/json' });
+        res.end(JSON.stringify({ status: 'ok' }));
+      } else {
+        res.writeHead(503, { 'Content-Type': 'application/json' });
+        res.end(JSON.stringify({ status: 'not ready', redis: redisOk, postgres: postgresOk }));
+      }
+      return;
+    }
+
+    res.writeHead(404, { 'Content-Type': 'text/plain' });
+    res.end('Not Found');
+  });
+
+  server.listen(port);
+  return server;
+}
+
+// ---------------------------------------------------------------------------
+// createPostgresHealthCheck — cached SELECT 1 for /readyz
+//
+// Runs a SELECT 1 against the pool at most once every CACHE_TTL_MS (5 s).
+// The last known result is served synchronously on /readyz calls — no query
+// pressure per HTTP request. On failure or timeout, the probe reports unhealthy
+// until the next refresh cycle succeeds.
+// ---------------------------------------------------------------------------
+
+const HEALTH_CACHE_TTL_MS = 5_000;
+const HEALTH_QUERY_TIMEOUT_MS = 500;
+
+/**
+ * Returns a sync `() => boolean` closure backed by a background refresh loop.
+ * Call `stop()` to cancel the interval during graceful shutdown.
+ */
+export function createPostgresHealthCheck(
+  pool: pg.Pool,
+): { isReady: () => boolean; stop: () => void } {
+  let healthy = false;
+
+  async function probe(): Promise<void> {
+    const client = await Promise.race([
+      pool.connect(),
+      new Promise<never>((_, reject) =>
+        setTimeout(() => reject(new Error('health probe connect timeout')), HEALTH_QUERY_TIMEOUT_MS),
+      ),
+    ]);
+
+    try {
+      await Promise.race([
+        client.query('SELECT 1'),
+        new Promise<never>((_, reject) =>
+          setTimeout(
+            () => reject(new Error('health probe query timeout')),
+            HEALTH_QUERY_TIMEOUT_MS,
+          ),
+        ),
+      ]);
+      healthy = true;
+    } finally {
+      client.release();
+    }
+  }
+
+  // Run immediately on startup, then every CACHE_TTL_MS.
+  probe().catch(() => {
+    healthy = false;
+  });
+
+  const interval = setInterval(() => {
+    probe().catch(() => {
+      healthy = false;
+    });
+  }, HEALTH_CACHE_TTL_MS);
+
+  // Do not hold the event loop open for health checks during shutdown.
+  interval.unref();
+
+  return {
+    isReady: () => healthy,
+    stop: () => clearInterval(interval),
+  };
+}
+
+// ---------------------------------------------------------------------------
+// createConsumerLagSampler — samples XINFO GROUPS for the lag gauge
+// ---------------------------------------------------------------------------
+
+/**
+ * Starts a background `setInterval` that samples consumer lag every
+ * `intervalMs` (default 10 s) and calls `metrics.observe` with the result.
+ *
+ * Uses `XINFO GROUPS <stream>` → `lag` field (Redis 7.2+). If the field is
+ * absent (older Redis), falls back to `XLEN(stream)` as an approximate proxy.
+ *
+ * Failures are logged at `debug` and do not interrupt the consumer.
+ */
+export function createConsumerLagSampler(
+  redis: Redis,
+  stream: string,
+  group: string,
+  metrics: Metrics,
+  onDebug: (msg: string) => void,
+  intervalMs = 10_000,
+): { stop: () => void } {
+  async function sample(): Promise<void> {
+    try {
+      // XINFO GROUPS returns an array of flat arrays: [field, value, ...]
+      // for each group. ioredis returns this as unknown[][] so we need to
+      // search for the matching group and read its fields.
+      const rawGroups = await redis.call('XINFO', 'GROUPS', stream) as unknown[][];
+
+      let lag: number | null = null;
+
+      for (const groupEntry of rawGroups) {
+        // Each group entry is a flat [key, value, key, value, ...] array.
+        if (!Array.isArray(groupEntry)) continue;
+
+        // Find the group name first.
+        const nameIdx = groupEntry.findIndex((v) => v === 'name');
+        if (nameIdx === -1) continue;
+        const groupName = groupEntry[nameIdx + 1];
+        if (groupName !== group) continue;
+
+        // Try to read the `lag` field (Redis 7.2+).
+        const lagIdx = groupEntry.findIndex((v) => v === 'lag');
+        if (lagIdx !== -1) {
+          const lagValue = groupEntry[lagIdx + 1];
+          if (typeof lagValue === 'number') {
+            lag = lagValue;
+          }
+        }
+
+        if (lag === null) {
+          // Fallback: XLEN gives total stream length — when the group is fully
+          // caught up this equals lag (since delivered-but-unacked = PEL size,
+          // but PEL is hard to subtract cleanly here). This is labelled
+          // "approximate" via the metric help text.
+          const xlenResult = await redis.xlen(stream);
+          lag = xlenResult;
+        }
+
+        break;
+      }
+
+      if (lag !== null) {
+        metrics.observe('processor_consumer_lag', lag);
+      }
+    } catch (err: unknown) {
+      onDebug(
+        `consumer lag sampling failed: ${err instanceof Error ? err.message : String(err)}`,
+      );
+    }
+  }
+
+  // First sample runs immediately.
+  sample().catch(() => {
+    // Already handled inside sample() — swallow at the outer level so the
+    // unhandled-rejection handler is not triggered.
+  });
+
+  const interval = setInterval(() => {
+    sample().catch(() => {});
+  }, intervalMs);
+
+  interval.unref();
+
+  return { stop: () => clearInterval(interval) };
+}
+
+// ---------------------------------------------------------------------------
+// Private: registry construction
+// ---------------------------------------------------------------------------
+
+function buildInternalRegistry(): InternalRegistry {
+  const registry = new Registry();
+
+  const consumerReadsTotal = new Counter({
+    name: 'processor_consumer_reads_total',
+    help: 'XREADGROUP calls. result=ok|empty|error. empty = BLOCK timeout, error = client error.',
+    labelNames: ['result'],
+    registers: [registry],
+  });
+
+  const consumerRecordsTotal = new Counter({
+    name: 'processor_consumer_records_total',
+    help: 'Total records pulled off the stream.',
+    registers: [registry],
+  });
+
+  const consumerLag = new Gauge({
+    name: 'processor_consumer_lag',
+    help: 'Consumer group lag (XINFO GROUPS lag field, Redis 7.2+; falls back to approximate XLEN when field absent).',
+    registers: [registry],
+  });
+
+  const decodeErrorsTotal = new Counter({
+    name: 'processor_decode_errors_total',
+    help: 'Records that failed to decode (malformed payload or sentinel error).',
+    registers: [registry],
+  });
+
+  const positionWritesTotal = new Counter({
+    name: 'processor_position_writes_total',
+    help: 'Per-record write outcomes. status=inserted|duplicate|failed.',
+    labelNames: ['status'],
+    registers: [registry],
+  });
+
+  const positionWriteDurationSeconds = new Histogram({
+    name: 'processor_position_write_duration_seconds',
+    help: 'Per-batch Postgres write latency.',
+    buckets: [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5],
+    registers: [registry],
+  });
+
+  const acksTotal = new Counter({
+    name: 'processor_acks_total',
+    help: 'Total stream entry IDs ACKed.',
+    registers: [registry],
+  });
+
+  const deviceStateSizeGauge = new Gauge({
+    name: 'processor_device_state_size',
+    help: 'Current count of devices in the in-memory LRU state map.',
+    registers: [registry],
+  });
+
+  const deviceStateEvictionsTotal = new Counter({
+    name: 'processor_device_state_evictions_total',
+    help: 'Total LRU evictions from the device state map since start.',
+    registers: [registry],
+  });
+
+  return {
+    registry,
+    consumerReadsTotal,
+    consumerRecordsTotal,
+    consumerLag,
+    decodeErrorsTotal,
+    positionWritesTotal,
+    positionWriteDurationSeconds,
+    acksTotal,
+    deviceStateSizeGauge,
+    deviceStateEvictionsTotal,
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Private: dispatch helpers — map string metric names to typed prom-client calls
+// ---------------------------------------------------------------------------
+
+function dispatchInc(
+  r: InternalRegistry,
+  name: string,
+  labels?: Record<string, string>,
+): void {
+  switch (name) {
+    case 'processor_consumer_reads_total':
+      r.consumerReadsTotal.inc(labels ?? {});
+      break;
+    case 'processor_consumer_records_total':
+      r.consumerRecordsTotal.inc();
+      break;
+    case 'processor_decode_errors_total':
+      r.decodeErrorsTotal.inc();
+      break;
+    case 'processor_position_writes_total':
+      r.positionWritesTotal.inc(labels ?? {});
+      break;
+    case 'processor_acks_total':
+      r.acksTotal.inc();
+      break;
+    case 'processor_device_state_evictions_total':
+      r.deviceStateEvictionsTotal.inc();
+      break;
+    default:
+      // Unknown metric name — silently ignore. This preserves the contract
+      // that the Metrics interface never throws, and avoids crashing the
+      // process when a call site references a metric not yet in the registry
+      // (e.g. staged rollouts or future tasks).
+      break;
+  }
+}
+
+function dispatchObserve(
+  r: InternalRegistry,
+  name: string,
+  value: number,
+  _labels?: Record<string, string>,
+): void {
+  switch (name) {
+    case 'processor_position_write_duration_seconds':
+      r.positionWriteDurationSeconds.observe(value);
+      break;
+    case 'processor_consumer_lag':
+      r.consumerLag.set(value);
+      break;
+    case 'processor_device_state_size':
+      r.deviceStateSizeGauge.set(value);
+      break;
+    default:
+      // Unknown metric name — silently ignore (see dispatchInc comment).
+      break;
+  }
+}