161 lines
7.3 KiB
JavaScript
161 lines
7.3 KiB
JavaScript
// saltcorn-idp — turns Saltcorn into an SSO Identity Provider.
|
|
//
|
|
// Phase 0 (skeleton): create the _idp_* tables, bootstrap a per-tenant signing
|
|
// keypair, register the CSRF bypass for the public /idp/ endpoints, and serve
|
|
// the OIDC discovery + JWKS documents. Token issuance, LDAP, and SAML arrive in
|
|
// later phases. onLoad runs once per tenant schema in multi-tenant mode, so the
|
|
// tables and signing key are created per-tenant.
|
|
|
|
const cluster = require("cluster");
|
|
const net = require("net");
|
|
|
|
const { PLUGIN_NAME, PLUGIN_VERSION, IDP_BASE_PATH, LDAP_PORT_ENV, LDAP_HOST_ENV, LDAP_DEFAULT_HOST } = require("./lib/constants");
|
|
const { createAllTables } = require("./lib/schema");
|
|
const { initEnvIfMissing, markBootstrapped } = require("./lib/env");
|
|
const { ensureActiveKey } = require("./lib/keys");
|
|
const { routes } = require("./lib/routes");
|
|
const { startLdap, isListening } = require("./lib/ldap/server");
|
|
const { ensureSamlCert } = require("./lib/saml/idp");
|
|
|
|
// Self-heal delay for the intermittent unbound-LDAP heisenbug. ROOT CAUSE
|
|
// (captured 2026-06-01): on a flaky PG boot the cluster PRIMARY never runs
|
|
// onLoad/startLdap (every startLdap call is isPrimary=false), so nobody binds.
|
|
// So the watchdog arms in EVERY process (not just the primary), and on firing
|
|
// TCP-probes the port: if nothing is listening anywhere, it force-binds from
|
|
// THIS process. The per-worker stagger means the first worker binds and the
|
|
// rest probe-find-it-open and skip (no thundering herd). Local consts (not
|
|
// constants.js, which another module owns).
|
|
const LDAP_WATCHDOG_MS = 8000;
|
|
const LDAP_WATCHDOG_STAGGER_MS = 600;
|
|
const LDAP_PROBE_TIMEOUT_MS = 2000;
|
|
|
|
// Match server.js: the cluster API renamed isMaster -> isPrimary.
|
|
const isPrimary = () => {
|
|
return cluster.isPrimary !== undefined ? cluster.isPrimary : cluster.isMaster;
|
|
};
|
|
|
|
// Arm the LDAP self-heal watchdog at most once per process.
|
|
let ldapWatchdogArmed = false;
|
|
|
|
|
|
// Resolve true if something is already listening on host:port (so this process
|
|
// must NOT try to bind). Used cross-process: isListening() is per-process, but a
|
|
// TCP probe sees a listener bound by ANY process (e.g. the primary, or a peer
|
|
// worker that healed first).
|
|
const probeListening = (host, port) => {
|
|
return new Promise((resolve) => {
|
|
const sock = net.connect(port, host);
|
|
const done = (open) => { sock.destroy(); resolve(open); };
|
|
sock.setTimeout(LDAP_PROBE_TIMEOUT_MS);
|
|
sock.on("connect", () => done(true));
|
|
sock.on("timeout", () => done(false));
|
|
sock.on("error", () => done(false));
|
|
});
|
|
};
|
|
|
|
|
|
const log = (msg) => {
|
|
// eslint-disable-next-line no-console
|
|
console.log(`[${PLUGIN_NAME}] ${msg}`);
|
|
};
|
|
|
|
|
|
const ensureCsrfBypass = async () => {
|
|
// The /idp/ namespace holds only machine/OIDC endpoints (discovery, JWKS,
|
|
// and later authorize/token/userinfo) which are never driven by Saltcorn
|
|
// browser forms; oidc-provider manages its own CSRF/state. Admin pages live
|
|
// under /admin/idp and stay CSRF-protected. Bypass is a global (root-state)
|
|
// config evaluated at startup.
|
|
try {
|
|
const { getState } = require("@saltcorn/data/db/state");
|
|
const current = getState().getConfig("disable_csrf_routes", "");
|
|
const want = IDP_BASE_PATH + "/";
|
|
const entries = current.split(",").map((s) => s.trim()).filter(Boolean);
|
|
if (!entries.includes(want)) {
|
|
entries.push(want);
|
|
await getState().setConfig("disable_csrf_routes", entries.join(","));
|
|
}
|
|
} catch (e) {
|
|
// eslint-disable-next-line no-console
|
|
console.error(`[${PLUGIN_NAME}] failed to register csrf bypass:`, e);
|
|
}
|
|
};
|
|
|
|
|
|
const onLoad = async (cfg) => {
|
|
let bootstrapErr = null;
|
|
try {
|
|
await createAllTables();
|
|
const env = await initEnvIfMissing();
|
|
const key = await ensureActiveKey();
|
|
await ensureSamlCert();
|
|
if (!env.bootstrapped_at) {
|
|
await markBootstrapped(env.env_id);
|
|
log(`v${PLUGIN_VERSION} bootstrapped env_id=${env.env_id} signing kid=${key.kid}`);
|
|
} else {
|
|
log(`v${PLUGIN_VERSION} loaded env_id=${env.env_id} signing kid=${key.kid}`);
|
|
}
|
|
await ensureCsrfBypass();
|
|
} catch (err) {
|
|
// eslint-disable-next-line no-console
|
|
console.error(`[${PLUGIN_NAME}] onLoad bootstrap failed:`, err);
|
|
bootstrapErr = err;
|
|
}
|
|
// Bind the LDAP listener INDEPENDENTLY of the bootstrap above. The listener
|
|
// needs none of the OIDC keys / SAML cert / CSRF setup, so a transient failure
|
|
// there (e.g. DB contention during concurrent multi-tenant boot) must not skip
|
|
// the bind -- that was the suspected cause of the intermittent unbound-:1637.
|
|
// Starts only if SALTCORN_IDP_LDAP_PORT is set; the module guard binds once per
|
|
// process despite per-tenant onLoad calls; retry + loud warning live in server.js.
|
|
try {
|
|
await startLdap();
|
|
} catch (e) {
|
|
// eslint-disable-next-line no-console
|
|
console.error(`[${PLUGIN_NAME}] startLdap failed:`, e);
|
|
}
|
|
// Self-heal watchdog for the intermittent silent no-bind. Armed in ANY process
|
|
// that has LDAP configured (NOT just the primary -- the captured root cause is
|
|
// that the primary never runs onLoad on the flaky boot). One-shot, staggered by
|
|
// worker id so the first worker binds and the rest see the port already up.
|
|
if (!ldapWatchdogArmed && process.env[LDAP_PORT_ENV]) {
|
|
ldapWatchdogArmed = true;
|
|
const port = parseInt(process.env[LDAP_PORT_ENV], 10);
|
|
const hostEnv = (process.env[LDAP_HOST_ENV] || "").trim();
|
|
// Probe loopback when bound to a wildcard/loopback interface.
|
|
const probeHost = (!hostEnv || hostEnv === "0.0.0.0" || hostEnv === "::") ? LDAP_DEFAULT_HOST : hostEnv;
|
|
const workerId = (cluster.worker && cluster.worker.id) || 0;
|
|
const delay = LDAP_WATCHDOG_MS + workerId * LDAP_WATCHDOG_STAGGER_MS;
|
|
const timer = setTimeout(async () => {
|
|
if (isListening()) {
|
|
return; // this process already bound it
|
|
}
|
|
if (Number.isFinite(port) && (await probeListening(probeHost, port))) {
|
|
return; // another process (primary, or a worker that healed first) bound it
|
|
}
|
|
// eslint-disable-next-line no-console
|
|
console.error(`[${PLUGIN_NAME}] LDAP WATCHDOG: nothing listening on ${probeHost}:${port} ${delay}ms after onLoad (pid=${process.pid} isPrimary=${isPrimary()}) -- the intermittent unbound-:1637 heisenbug; force-binding from this process`);
|
|
try {
|
|
await startLdap({ force: true });
|
|
} catch (e) {
|
|
// eslint-disable-next-line no-console
|
|
console.error(`[${PLUGIN_NAME}] LDAP WATCHDOG force-bind failed:`, e);
|
|
}
|
|
}, delay);
|
|
// Do not keep the event loop alive solely for this one-shot watchdog.
|
|
if (timer && typeof timer.unref === "function") {
|
|
timer.unref();
|
|
}
|
|
}
|
|
// Preserve the original contract: surface a bootstrap failure to Saltcorn.
|
|
if (bootstrapErr) {
|
|
throw bootstrapErr;
|
|
}
|
|
};
|
|
|
|
|
|
module.exports = {
|
|
sc_plugin_api_version: 1,
|
|
plugin_name: PLUGIN_NAME,
|
|
onLoad: onLoad,
|
|
routes: routes
|
|
};
|