sc-idp/index.js
2026-06-01 16:40:54 -05:00

161 lines
7.3 KiB
JavaScript

// saltcorn-idp — turns Saltcorn into an SSO Identity Provider.
//
// Phase 0 (skeleton): create the _idp_* tables, bootstrap a per-tenant signing
// keypair, register the CSRF bypass for the public /idp/ endpoints, and serve
// the OIDC discovery + JWKS documents. Token issuance, LDAP, and SAML arrive in
// later phases. onLoad runs once per tenant schema in multi-tenant mode, so the
// tables and signing key are created per-tenant.
const cluster = require("cluster");
const net = require("net");
const { PLUGIN_NAME, PLUGIN_VERSION, IDP_BASE_PATH, LDAP_PORT_ENV, LDAP_HOST_ENV, LDAP_DEFAULT_HOST } = require("./lib/constants");
const { createAllTables } = require("./lib/schema");
const { initEnvIfMissing, markBootstrapped } = require("./lib/env");
const { ensureActiveKey } = require("./lib/keys");
const { routes } = require("./lib/routes");
const { startLdap, isListening } = require("./lib/ldap/server");
const { ensureSamlCert } = require("./lib/saml/idp");
// Self-heal delay for the intermittent unbound-LDAP heisenbug. ROOT CAUSE
// (captured 2026-06-01): on a flaky PG boot the cluster PRIMARY never runs
// onLoad/startLdap (every startLdap call is isPrimary=false), so nobody binds.
// So the watchdog arms in EVERY process (not just the primary), and on firing
// TCP-probes the port: if nothing is listening anywhere, it force-binds from
// THIS process. The per-worker stagger means the first worker binds and the
// rest probe-find-it-open and skip (no thundering herd). Local consts (not
// constants.js, which another module owns).
const LDAP_WATCHDOG_MS = 8000;
const LDAP_WATCHDOG_STAGGER_MS = 600;
const LDAP_PROBE_TIMEOUT_MS = 2000;
// Match server.js: the cluster API renamed isMaster -> isPrimary.
const isPrimary = () => {
return cluster.isPrimary !== undefined ? cluster.isPrimary : cluster.isMaster;
};
// Arm the LDAP self-heal watchdog at most once per process.
let ldapWatchdogArmed = false;
// Resolve true if something is already listening on host:port (so this process
// must NOT try to bind). Used cross-process: isListening() is per-process, but a
// TCP probe sees a listener bound by ANY process (e.g. the primary, or a peer
// worker that healed first).
const probeListening = (host, port) => {
return new Promise((resolve) => {
const sock = net.connect(port, host);
const done = (open) => { sock.destroy(); resolve(open); };
sock.setTimeout(LDAP_PROBE_TIMEOUT_MS);
sock.on("connect", () => done(true));
sock.on("timeout", () => done(false));
sock.on("error", () => done(false));
});
};
const log = (msg) => {
// eslint-disable-next-line no-console
console.log(`[${PLUGIN_NAME}] ${msg}`);
};
const ensureCsrfBypass = async () => {
// The /idp/ namespace holds only machine/OIDC endpoints (discovery, JWKS,
// and later authorize/token/userinfo) which are never driven by Saltcorn
// browser forms; oidc-provider manages its own CSRF/state. Admin pages live
// under /admin/idp and stay CSRF-protected. Bypass is a global (root-state)
// config evaluated at startup.
try {
const { getState } = require("@saltcorn/data/db/state");
const current = getState().getConfig("disable_csrf_routes", "");
const want = IDP_BASE_PATH + "/";
const entries = current.split(",").map((s) => s.trim()).filter(Boolean);
if (!entries.includes(want)) {
entries.push(want);
await getState().setConfig("disable_csrf_routes", entries.join(","));
}
} catch (e) {
// eslint-disable-next-line no-console
console.error(`[${PLUGIN_NAME}] failed to register csrf bypass:`, e);
}
};
const onLoad = async (cfg) => {
let bootstrapErr = null;
try {
await createAllTables();
const env = await initEnvIfMissing();
const key = await ensureActiveKey();
await ensureSamlCert();
if (!env.bootstrapped_at) {
await markBootstrapped(env.env_id);
log(`v${PLUGIN_VERSION} bootstrapped env_id=${env.env_id} signing kid=${key.kid}`);
} else {
log(`v${PLUGIN_VERSION} loaded env_id=${env.env_id} signing kid=${key.kid}`);
}
await ensureCsrfBypass();
} catch (err) {
// eslint-disable-next-line no-console
console.error(`[${PLUGIN_NAME}] onLoad bootstrap failed:`, err);
bootstrapErr = err;
}
// Bind the LDAP listener INDEPENDENTLY of the bootstrap above. The listener
// needs none of the OIDC keys / SAML cert / CSRF setup, so a transient failure
// there (e.g. DB contention during concurrent multi-tenant boot) must not skip
// the bind -- that was the suspected cause of the intermittent unbound-:1637.
// Starts only if SALTCORN_IDP_LDAP_PORT is set; the module guard binds once per
// process despite per-tenant onLoad calls; retry + loud warning live in server.js.
try {
await startLdap();
} catch (e) {
// eslint-disable-next-line no-console
console.error(`[${PLUGIN_NAME}] startLdap failed:`, e);
}
// Self-heal watchdog for the intermittent silent no-bind. Armed in ANY process
// that has LDAP configured (NOT just the primary -- the captured root cause is
// that the primary never runs onLoad on the flaky boot). One-shot, staggered by
// worker id so the first worker binds and the rest see the port already up.
if (!ldapWatchdogArmed && process.env[LDAP_PORT_ENV]) {
ldapWatchdogArmed = true;
const port = parseInt(process.env[LDAP_PORT_ENV], 10);
const hostEnv = (process.env[LDAP_HOST_ENV] || "").trim();
// Probe loopback when bound to a wildcard/loopback interface.
const probeHost = (!hostEnv || hostEnv === "0.0.0.0" || hostEnv === "::") ? LDAP_DEFAULT_HOST : hostEnv;
const workerId = (cluster.worker && cluster.worker.id) || 0;
const delay = LDAP_WATCHDOG_MS + workerId * LDAP_WATCHDOG_STAGGER_MS;
const timer = setTimeout(async () => {
if (isListening()) {
return; // this process already bound it
}
if (Number.isFinite(port) && (await probeListening(probeHost, port))) {
return; // another process (primary, or a worker that healed first) bound it
}
// eslint-disable-next-line no-console
console.error(`[${PLUGIN_NAME}] LDAP WATCHDOG: nothing listening on ${probeHost}:${port} ${delay}ms after onLoad (pid=${process.pid} isPrimary=${isPrimary()}) -- the intermittent unbound-:1637 heisenbug; force-binding from this process`);
try {
await startLdap({ force: true });
} catch (e) {
// eslint-disable-next-line no-console
console.error(`[${PLUGIN_NAME}] LDAP WATCHDOG force-bind failed:`, e);
}
}, delay);
// Do not keep the event loop alive solely for this one-shot watchdog.
if (timer && typeof timer.unref === "function") {
timer.unref();
}
}
// Preserve the original contract: surface a bootstrap failure to Saltcorn.
if (bootstrapErr) {
throw bootstrapErr;
}
};
module.exports = {
sc_plugin_api_version: 1,
plugin_name: PLUGIN_NAME,
onLoad: onLoad,
routes: routes
};