Files
julian dec2d190ce Task 1.2 — db-init runner script
scripts/apply-db-init.sh implements the boot-time runner that walks
db-init/*.sql in numeric-prefix order, applies each via psql, and
records successful applications in a migrations_applied guard table
so re-runs are no-ops.

All 7 acceptance criteria pass live against the dev compose stack:
empty dir, missing env var, apply, idempotent re-run, checksum
mismatch, filename collision, broken SQL.

Two retroactive Dockerfile corrections folded in (exposed by the
first live-test attempt of 1.2's script):

1. apk add bash. The directus/directus:11.17.4 base is Alpine and
   ships ash via BusyBox, not bash. The script uses bash-specific
   features (associative arrays, [[ ]], mapfile, BASH_REMATCH) and
   fails at line 69 in sh.

2. .gitattributes added at repo root forcing LF on *.sh, *.sql,
   *.yaml, *.yml. Without it, Windows checkouts with core.autocrlf=true
   (the Git-for-Windows default) silently inject CRLF, causing
   "bad interpreter: /usr/bin/env bash^M" inside the Linux container.
   This failure mode only manifests in the container.

Both corrections are documented in 01-project-scaffold.md's Done
section; 02-db-init-runner.md's Done section captures the live-test
results, the corrected docker compose run --entrypoint commands, and
the gotcha about compose env defaults masking missing-env-var tests.

ROADMAP marks 1.2 done; 1.3 next.
2026-05-01 22:35:17 +02:00

302 lines
10 KiB
Bash
Executable File

#!/usr/bin/env bash
# =============================================================================
# apply-db-init.sh — TRM directus db-init runner
#
# Walks db-init/*.sql in numeric-prefix order, applies each file via psql, and
# records successful applications in a migrations_applied guard table so
# re-runs are no-ops.
#
# Usage
# Called automatically from entrypoint.sh (wired in Phase 1 task 1.7).
# Can also be run directly for local debugging:
# DB_HOST=localhost DB_PORT=5432 DB_USER=directus DB_PASSWORD=... \
# DB_DATABASE=directus bash scripts/apply-db-init.sh
#
# Required environment variables
# DB_HOST Postgres hostname
# DB_PORT Postgres port (numeric)
# DB_USER Postgres user
# DB_PASSWORD Postgres password (exported as PGPASSWORD; never logged)
# DB_DATABASE Postgres database name
#
# Optional environment variables
# DB_INIT_DIR Directory containing *.sql files (default: /directus/db-init)
# DB_INIT_TIMEOUT_SECONDS Seconds to wait for Postgres readiness (default: 60)
# DEBUG Set to any non-empty value for verbose psql output
#
# Exit codes
# 0 All files applied or skipped successfully.
# 1 Missing required env var -OR- Postgres readiness timeout.
# 2 Checksum mismatch: a previously-applied file has been modified.
# Migrations are append-only — edit a file once applied is forbidden.
# 3 psql error while applying a migration file.
# 4 Filename collision: two files share the same numeric prefix.
#
# Transaction semantics
# Each migration file is wrapped in an implicit BEGIN/COMMIT by psql -1.
# This means the entire file either fully applies or is fully rolled back.
# Limitation: some DDL statements cannot run inside a transaction:
# - CREATE EXTENSION ... (most extensions are fine; pg_partman with
# parallel_apply is the known exception)
# - CREATE INDEX CONCURRENTLY
# If a migration needs these, split it into its own file and remove the -1
# flag from the psql invocation for that file. Document the exception in
# the migration file's header comment. For Phase 1 all three migrations
# (timescaledb extension, positions hypertable, faulty column) are safe
# inside transactions.
#
# Wired into entrypoint.sh in Phase 1 task 1.7.
# =============================================================================
set -euo pipefail
# -----------------------------------------------------------------------------
# Logging helpers
# -----------------------------------------------------------------------------
log_info() {
printf '[db-init] %s\n' "$*"
}
log_error() {
printf '[db-init] ERROR: %s\n' "$*" >&2
}
# -----------------------------------------------------------------------------
# Step 0 — Validate required environment variables
# -----------------------------------------------------------------------------
MISSING_VARS=()
for var in DB_HOST DB_PORT DB_USER DB_PASSWORD DB_DATABASE; do
if [[ -z "${!var:-}" ]]; then
MISSING_VARS+=("$var")
fi
done
if [[ ${#MISSING_VARS[@]} -gt 0 ]]; then
log_error "missing required environment variable(s): ${MISSING_VARS[*]}"
log_error "Set all of: DB_HOST, DB_PORT, DB_USER, DB_PASSWORD, DB_DATABASE"
exit 1
fi
# Optional variables with defaults
DB_INIT_DIR="${DB_INIT_DIR:-/directus/db-init}"
DB_INIT_TIMEOUT_SECONDS="${DB_INIT_TIMEOUT_SECONDS:-60}"
# Export PGPASSWORD so psql and pg_isready pick it up without a prompt.
# Never print this value.
export PGPASSWORD="${DB_PASSWORD}"
if [[ -n "${DEBUG:-}" ]]; then
log_info "DEBUG mode enabled — psql output will not be suppressed"
fi
# -----------------------------------------------------------------------------
# Shared psql invocation wrapper
# Passes all caller-supplied flags through to psql.
# Stdout is suppressed unless DEBUG is set; stderr is always visible so errors
# are never silently swallowed.
# -----------------------------------------------------------------------------
run_psql() {
# Usage: run_psql [psql args...]
if [[ -n "${DEBUG:-}" ]]; then
psql \
--host="${DB_HOST}" \
--port="${DB_PORT}" \
--username="${DB_USER}" \
--dbname="${DB_DATABASE}" \
"${@}"
else
psql \
--host="${DB_HOST}" \
--port="${DB_PORT}" \
--username="${DB_USER}" \
--dbname="${DB_DATABASE}" \
"${@}" > /dev/null
fi
}
# -----------------------------------------------------------------------------
# Step 1 — Wait for Postgres readiness
# -----------------------------------------------------------------------------
log_info "waiting for Postgres at ${DB_HOST}:${DB_PORT} (timeout: ${DB_INIT_TIMEOUT_SECONDS}s)"
elapsed=0
until pg_isready \
--host="${DB_HOST}" \
--port="${DB_PORT}" \
--username="${DB_USER}" \
--dbname="${DB_DATABASE}" \
--quiet; do
if [[ "${elapsed}" -ge "${DB_INIT_TIMEOUT_SECONDS}" ]]; then
log_error "Postgres at ${DB_HOST}:${DB_PORT} did not become ready within ${DB_INIT_TIMEOUT_SECONDS}s"
exit 1
fi
sleep 2
elapsed=$(( elapsed + 2 ))
done
log_info "Postgres is ready"
# -----------------------------------------------------------------------------
# Step 2 — Bootstrap the guard table (idempotent)
# -----------------------------------------------------------------------------
log_info "bootstrapping migrations_applied guard table"
run_psql --command="
CREATE TABLE IF NOT EXISTS migrations_applied (
filename TEXT PRIMARY KEY,
applied_at TIMESTAMPTZ NOT NULL DEFAULT now(),
checksum TEXT NOT NULL
);
"
# -----------------------------------------------------------------------------
# Step 3 — Validate filename uniqueness (detect numeric-prefix collisions)
# -----------------------------------------------------------------------------
# Collect all *.sql files in DB_INIT_DIR; proceed even if none exist.
declare -a SQL_FILES=()
if compgen -G "${DB_INIT_DIR}/*.sql" > /dev/null 2>&1; then
# Sort lexically — the NNN_ prefix enforces numeric order under lex sort.
mapfile -t SQL_FILES < <(ls "${DB_INIT_DIR}"/*.sql | sort)
fi
if [[ ${#SQL_FILES[@]} -eq 0 ]]; then
log_info "no *.sql files found in ${DB_INIT_DIR} — nothing to apply"
log_info "db-init complete: 0 applied, 0 skipped"
exit 0
fi
# Extract numeric prefix from each filename (NNN_name.sql → NNN).
# Two files with the same prefix are a collision.
declare -A SEEN_PREFIXES=()
for filepath in "${SQL_FILES[@]}"; do
basename_val="$(basename "${filepath}")"
# Match leading digits before the first underscore.
if [[ "${basename_val}" =~ ^([0-9]+)_ ]]; then
prefix="${BASH_REMATCH[1]}"
else
# No numeric prefix — still valid; treat as a non-colliding entry.
prefix="__noprefix__${basename_val}"
fi
if [[ -v "SEEN_PREFIXES[${prefix}]" ]]; then
log_error "filename collision: '${SEEN_PREFIXES[${prefix}]}' and '${basename_val}' share prefix '${prefix}'"
log_error "Each numeric prefix must be unique. Rename one of the colliding files."
exit 4
fi
SEEN_PREFIXES["${prefix}"]="${basename_val}"
done
log_info "filename uniqueness check passed (${#SQL_FILES[@]} file(s))"
# -----------------------------------------------------------------------------
# Step 4 — Walk files and apply
# -----------------------------------------------------------------------------
applied=0
skipped=0
for filepath in "${SQL_FILES[@]}"; do
basename_val="$(basename "${filepath}")"
# Compute SHA-256 checksum of the file.
# sha256sum output: "<hex> <filename>" — take only the hex field.
checksum="$(sha256sum "${filepath}" | awk '{print $1}')"
# Query the guard table for an existing row.
existing_checksum="$(
psql \
--host="${DB_HOST}" \
--port="${DB_PORT}" \
--username="${DB_USER}" \
--dbname="${DB_DATABASE}" \
--no-align \
--tuples-only \
--command="SELECT checksum FROM migrations_applied WHERE filename = '${basename_val}';"
)"
if [[ -n "${existing_checksum}" ]]; then
# Row exists — compare checksums.
existing_checksum="$(printf '%s' "${existing_checksum}" | tr -d '[:space:]')"
if [[ "${existing_checksum}" == "${checksum}" ]]; then
log_info "skip ${basename_val}"
skipped=$(( skipped + 1 ))
continue
else
log_error "checksum mismatch for '${basename_val}'"
log_error " recorded : ${existing_checksum}"
log_error " on disk : ${checksum}"
log_error "Migrations are append-only. Reverting a file that was already applied"
log_error "is forbidden. To fix: restore the original file content, or create a"
log_error "new migration file to apply the corrective change."
exit 2
fi
fi
# No existing row — apply the file.
log_info "apply ${basename_val}"
# psql flags:
# -v ON_ERROR_STOP=1 abort on first SQL error (prevents partial apply)
# -1 wrap the entire file in a single transaction
# (BEGIN/COMMIT added implicitly)
# See the transaction-semantics note at the top of this file for the
# CREATE EXTENSION / CREATE INDEX CONCURRENTLY exception.
psql_exit=0
if [[ -n "${DEBUG:-}" ]]; then
psql \
--host="${DB_HOST}" \
--port="${DB_PORT}" \
--username="${DB_USER}" \
--dbname="${DB_DATABASE}" \
-v ON_ERROR_STOP=1 \
-1 \
--file="${filepath}" \
|| psql_exit=$?
else
psql \
--host="${DB_HOST}" \
--port="${DB_PORT}" \
--username="${DB_USER}" \
--dbname="${DB_DATABASE}" \
-v ON_ERROR_STOP=1 \
-1 \
--file="${filepath}" \
> /dev/null \
|| psql_exit=$?
fi
if [[ "${psql_exit}" -ne 0 ]]; then
log_error "psql failed (exit ${psql_exit}) while applying '${basename_val}'"
log_error "The transaction was rolled back. Fix the SQL error and re-run."
log_error "The file has NOT been recorded in migrations_applied — it will retry on the next run."
exit 3
fi
# Record successful application.
psql \
--host="${DB_HOST}" \
--port="${DB_PORT}" \
--username="${DB_USER}" \
--dbname="${DB_DATABASE}" \
--command="
INSERT INTO migrations_applied (filename, checksum)
VALUES ('${basename_val}', '${checksum}');
" > /dev/null
log_info "done ${basename_val}"
applied=$(( applied + 1 ))
done
# -----------------------------------------------------------------------------
# Step 5 — Summary
# -----------------------------------------------------------------------------
log_info "db-init complete: ${applied} applied, ${skipped} skipped"