From ec0ed9b2919b07e5fda58bf17799527fa6dd3f8d Mon Sep 17 00:00:00 2001 From: Robert Gingras Date: Mon, 15 Jun 2026 17:00:42 -0400 Subject: [PATCH 01/15] refactor: apply default env vars at configure-time, not at save Admin-defined default environment variables were merged into a container's DB record when the container was created. This froze the defaults to their creation-time values and exposed them in the edit UI. Now the container record stores only user-defined env vars. System defaults (from Settings) and NVIDIA GPU defaults are merged in at configure-time, just before the Proxmox API call, with user-defined values taking precedence. - models/container.js: buildLxcEnvConfig() now merges defaults < NVIDIA defaults < user vars; add getSystemDefaultEnvVars(), parseEnvironmentVars(), and nvidiaDefaultEnvVars() helpers. - bin/create-container.js: merge system defaults at configure-time and stop writing merged env/entrypoint back into the DB record. - bin/reconfigure-container.js: merge system defaults on edit so newly added defaults also reach existing containers. - routers/api/v1/containers.js: stop baking NVIDIA defaults into the saved record on create. Closes #349 --- create-a-container/bin/create-container.js | 100 +++------------- .../bin/reconfigure-container.js | 12 +- create-a-container/models/container.js | 109 ++++++++++++++---- .../routers/api/v1/containers.js | 10 +- 4 files changed, 119 insertions(+), 112 deletions(-) diff --git a/create-a-container/bin/create-container.js b/create-a-container/bin/create-container.js index 766ed815..7cf76351 100755 --- a/create-a-container/bin/create-container.js +++ b/create-a-container/bin/create-container.js @@ -340,62 +340,21 @@ async function main() { console.log('Container configured'); } - // Apply environment variables and entrypoint - // First read defaults from the image, then merge with user-specified values - const defaultConfig = await client.lxcConfig(node.name, vmid); - const defaultEntrypoint = defaultConfig['entrypoint'] || null; - const defaultEnvStr = defaultConfig['env'] || null; - - // Parse default env vars - let mergedEnvVars = {}; - if (defaultEnvStr) { - const pairs = defaultEnvStr.split('\0'); - for (const pair of pairs) { - const eqIndex = pair.indexOf('='); - if (eqIndex > 0) { - mergedEnvVars[pair.substring(0, eqIndex)] = pair.substring(eqIndex + 1); - } - } - } - - // Merge user-specified env vars (user values override defaults) - const userEnvVars = container.environmentVars ? JSON.parse(container.environmentVars) : {}; - - // Load system-wide default env vars from Settings. - // Descriptions are metadata only and are not passed into the container. - let systemDefaultEnvVars = {}; - try { - const entries = await Setting.getDefaultContainerEnvVars(); - for (const entry of entries) { - if (entry.key && entry.key.trim()) { - systemDefaultEnvVars[entry.key.trim()] = entry.value || ''; - } - } - } catch (_) { - console.warn('Could not load default_container_env_vars from settings, skipping'); - } + // Apply environment variables and entrypoint. + // Admin-defined system defaults are merged in here, at configure-time, and are + // intentionally NOT written back into the container's DB record (see below). + // Precedence (lowest to highest): system defaults < NVIDIA defaults < user values. + // Image-provided env/entrypoint defaults are submitted by the UI as user values, + // so they already live in container.environmentVars / container.entrypoint. + const systemDefaultEnvVars = await Container.getSystemDefaultEnvVars(); + const userEnvVars = container.parseEnvironmentVars(); + const envConfig = container.buildLxcEnvConfig(systemDefaultEnvVars); + // buildLxcEnvConfig may add a 'delete' key to unset env/entrypoint; on a fresh + // container there is nothing to delete, so drop it to avoid a no-op API param. + delete envConfig.delete; - // Merge priority: image defaults < system defaults < per-container user values - mergedEnvVars = { ...mergedEnvVars, ...systemDefaultEnvVars, ...userEnvVars }; - - // Use user entrypoint if specified, otherwise keep default - const finalEntrypoint = container.entrypoint || defaultEntrypoint; - - // Build config to apply - const envConfig = {}; - if (finalEntrypoint) { - envConfig.entrypoint = finalEntrypoint; - } - if (Object.keys(mergedEnvVars).length > 0) { - envConfig.env = Object.entries(mergedEnvVars) - .map(([key, value]) => `${key}=${value}`) - .join('\0'); - } - if (Object.keys(envConfig).length > 0) { console.log('Applying environment variables and entrypoint...'); - if (defaultEntrypoint) console.log(`Default entrypoint: ${defaultEntrypoint}`); - if (defaultEnvStr) console.log(`Image default env vars: ${Object.keys(mergedEnvVars).length - Object.keys(userEnvVars).length - Object.keys(systemDefaultEnvVars).length}`); if (Object.keys(systemDefaultEnvVars).length > 0) console.log(`System default env vars: ${Object.keys(systemDefaultEnvVars).length} from settings`); if (Object.keys(userEnvVars).length > 0) console.log(`Per-container env vars: ${Object.keys(userEnvVars).length}`); await client.updateLxcConfig(node.name, vmid, envConfig); @@ -447,11 +406,9 @@ async function main() { throw new Error('Could not extract MAC address from container configuration'); } - // Read back entrypoint and environment variables from config + // Read back configuration from Proxmox. console.log('Querying container configuration...'); const config = await client.lxcConfig(node.name, vmid); - const actualEntrypoint = config['entrypoint'] || null; - const actualEnv = config['env'] || null; // Read back the actual provisioned resources so downstream systems // (e.g. NetBox) mirror what the container really has rather than assuming @@ -459,28 +416,7 @@ async function main() { const actualCores = config['cores'] != null ? parseInt(config['cores'], 10) : null; const actualMemoryMb = config['memory'] != null ? parseInt(config['memory'], 10) : null; const actualDiskGb = parseRootfsSizeGb(config['rootfs']); - - // Parse NUL-separated env string back to JSON object - let environmentVars = {}; - if (actualEnv) { - const pairs = actualEnv.split('\0'); - for (const pair of pairs) { - const eqIndex = pair.indexOf('='); - if (eqIndex > 0) { - const key = pair.substring(0, eqIndex); - const value = pair.substring(eqIndex + 1); - environmentVars[key] = value; - } - } - } - - if (actualEntrypoint) { - console.log(`Entrypoint: ${actualEntrypoint}`); - } - if (Object.keys(environmentVars).length > 0) { - console.log(`Environment variables: ${Object.keys(environmentVars).length} vars`); - } - + // Get IP address from Proxmox interfaces API const ipv4Address = await client.getLxcIpAddress(node.name, vmid); @@ -488,13 +424,15 @@ async function main() { throw new Error('Could not get IP address from Proxmox interfaces API'); } - // Update the container record + // Update the container record. Note: environmentVars and entrypoint are NOT + // written back from the live config — the DB record stores only the values + // the user supplied. Admin-defined system defaults (and NVIDIA defaults) are + // merged in at configure-time and must not be persisted here, otherwise they + // would be frozen into the record and exposed in the edit UI. console.log('Updating container record...'); await container.update({ macAddress, ipv4Address, - entrypoint: actualEntrypoint, - environmentVars: JSON.stringify(environmentVars), status: 'running' }); diff --git a/create-a-container/bin/reconfigure-container.js b/create-a-container/bin/reconfigure-container.js index 5bcebf3a..753ccc94 100644 --- a/create-a-container/bin/reconfigure-container.js +++ b/create-a-container/bin/reconfigure-container.js @@ -80,8 +80,16 @@ async function main() { const client = await node.api(); console.log('Proxmox API client initialized'); - // Build config from environment variables and entrypoint - const lxcConfig = container.buildLxcEnvConfig(); + // Merge admin-defined default environment variables at configure-time rather + // than storing them in the container's DB record. Precedence (lowest to + // highest): system defaults < NVIDIA defaults < user-defined values. + // (NVIDIA and user-defined are applied inside buildLxcEnvConfig.) + // Image-provided defaults are submitted by the UI as user-defined values, so + // they are already part of container.environmentVars and not handled here. + const systemDefaultEnvVars = await Container.getSystemDefaultEnvVars(); + + // Build config from environment variables and entrypoint, merging defaults + const lxcConfig = container.buildLxcEnvConfig(systemDefaultEnvVars); if (Object.keys(lxcConfig).length > 0) { console.log('Applying LXC configuration...'); diff --git a/create-a-container/models/container.js b/create-a-container/models/container.js index e0ae3f18..f9a0b102 100644 --- a/create-a-container/models/container.js +++ b/create-a-container/models/container.js @@ -21,37 +21,100 @@ module.exports = (sequelize, DataTypes) => { } /** - * Build LXC config object for environment variables and entrypoint - * Returns config suitable for Proxmox API updateLxcConfig + * Load the admin-defined system default environment variables from the + * Settings table, flattened to a { KEY: value } object. Descriptions are + * metadata only and are not included. Returns an empty object if the + * setting is missing or malformed. + * @returns {Promise} Flat object of { KEY: value } system defaults + */ + static async getSystemDefaultEnvVars() { + const Setting = this.sequelize.models.Setting; + const defaults = {}; + try { + const entries = await Setting.getDefaultContainerEnvVars(); + for (const entry of entries) { + if (entry.key && entry.key.trim()) { + defaults[entry.key.trim()] = entry.value || ''; + } + } + } catch (_) { + console.warn('Could not load default_container_env_vars from settings, skipping'); + } + return defaults; + } + + /** + * Parse the container's user-defined environment variables. + * The database record only ever stores the variables the user explicitly + * provided — admin/system, image, and NVIDIA defaults are merged in at + * configure-time (see buildLxcEnvConfig) and are intentionally NOT persisted. + * @returns {object} Flat object of { KEY: value } user-defined env vars + */ + parseEnvironmentVars() { + if (!this.environmentVars) return {}; + try { + const parsed = JSON.parse(this.environmentVars); + return parsed && typeof parsed === 'object' ? parsed : {}; + } catch (err) { + console.error('Failed to parse environment variables JSON:', err.message); + return {}; + } + } + + /** + * Environment variables implied by this container's configuration that are + * applied as defaults but never stored in the DB record. Currently this is + * the NVIDIA GPU passthrough defaults, applied when nvidiaRequested is set. + * @returns {object} Flat object of { KEY: value } defaults + */ + nvidiaDefaultEnvVars() { + if (!this.nvidiaRequested) return {}; + return { + NVIDIA_VISIBLE_DEVICES: 'all', + NVIDIA_DRIVER_CAPABILITIES: 'utility compute' + }; + } + + /** + * Build LXC config object for environment variables and entrypoint. + * Returns config suitable for Proxmox API updateLxcConfig. + * + * Default environment variables are merged in here, at configure-time, + * rather than being baked into the container's DB record. User-defined + * variables take precedence over any provided defaults. + * + * @param {object} [defaults={}] - Flat object of default env vars, e.g. the + * admin-defined system defaults. User-defined values and this container's + * own NVIDIA defaults override these. * @returns {object} Config object with 'env' and 'entrypoint' properties */ - buildLxcEnvConfig() { + buildLxcEnvConfig(defaults = {}) { const config = {}; const deleteList = []; - - // Parse environment variables from JSON and format as NUL-separated list - // Format: KEY1=value1\0KEY2=value2\0KEY3=value3 - if (this.environmentVars) { - try { - const envObj = JSON.parse(this.environmentVars); - const envPairs = []; - for (const [key, value] of Object.entries(envObj)) { - if (key && value !== undefined) { - envPairs.push(`${key}=${value}`); - } - } - if (envPairs.length > 0) { - config['env'] = envPairs.join('\0'); - } else { - deleteList.push('env'); - } - } catch (err) { - console.error('Failed to parse environment variables JSON:', err.message); + + // Merge precedence (lowest to highest): + // provided defaults (admin-defined system defaults) < NVIDIA defaults + // < user-defined values + const userEnvVars = this.parseEnvironmentVars(); + const mergedEnvVars = { + ...(defaults && typeof defaults === 'object' ? defaults : {}), + ...this.nvidiaDefaultEnvVars(), + ...userEnvVars + }; + + // Format as NUL-separated list: KEY1=value1\0KEY2=value2\0KEY3=value3 + const envPairs = []; + for (const [key, value] of Object.entries(mergedEnvVars)) { + if (key && value !== undefined) { + envPairs.push(`${key}=${value}`); } + } + if (envPairs.length > 0) { + config['env'] = envPairs.join('\0'); } else { deleteList.push('env'); } - + // Set entrypoint command if (this.entrypoint && this.entrypoint.trim()) { config['entrypoint'] = this.entrypoint.trim(); diff --git a/create-a-container/routers/api/v1/containers.js b/create-a-container/routers/api/v1/containers.js index 2ca64da8..ba62d7a1 100644 --- a/create-a-container/routers/api/v1/containers.js +++ b/create-a-container/routers/api/v1/containers.js @@ -243,6 +243,10 @@ router.post( if (!hostname || !hostname.trim()) throw new ApiError(400, 'invalid_request', 'hostname is required'); const wantsNvidia = !!nvidiaRequested; + // Only the user-defined env vars are persisted on the container record. + // NVIDIA defaults (for GPU passthrough) and admin-defined system defaults + // are merged in at configure-time by the create/reconfigure jobs, so they + // are intentionally not stored here. let envVarsJson = null; if (Array.isArray(environmentVars) && environmentVars.length > 0) { const envObj = {}; @@ -251,12 +255,6 @@ router.post( } if (Object.keys(envObj).length > 0) envVarsJson = JSON.stringify(envObj); } - if (wantsNvidia) { - const obj = envVarsJson ? JSON.parse(envVarsJson) : {}; - if (!obj.NVIDIA_VISIBLE_DEVICES) obj.NVIDIA_VISIBLE_DEVICES = 'all'; - if (!obj.NVIDIA_DRIVER_CAPABILITIES) obj.NVIDIA_DRIVER_CAPABILITIES = 'utility compute'; - envVarsJson = JSON.stringify(obj); - } const imageRef = template === 'custom' ? customTemplate?.trim() : template; if (!imageRef) throw new ApiError(400, 'invalid_request', 'template is required'); From b6537115c29eed198a5cd2487dc477edacd66398 Mon Sep 17 00:00:00 2001 From: Robert Gingras Date: Tue, 16 Jun 2026 08:19:34 -0400 Subject: [PATCH 02/15] refactor: make buildLxcEnvConfig the single env-merge entrypoint Move all environment-variable merge logic into Container#buildLxcEnvConfig so there is exactly one place that determines the env to deploy. The method now loads the admin-defined system defaults itself (it became async) and composes them with the NVIDIA and user-defined values: system defaults < NVIDIA defaults < user-defined values Callers in create-container.js and reconfigure-container.js are reduced to a single `await container.buildLxcEnvConfig()` call and no longer load or pass defaults themselves. getSystemDefaultEnvVars, parseEnvironmentVars and nvidiaDefaultEnvVars are now internal helpers of that method. --- create-a-container/bin/create-container.js | 20 ++++------ .../bin/reconfigure-container.js | 15 +++----- create-a-container/models/container.js | 37 +++++++++++-------- 3 files changed, 34 insertions(+), 38 deletions(-) diff --git a/create-a-container/bin/create-container.js b/create-a-container/bin/create-container.js index 7cf76351..57fd84ba 100755 --- a/create-a-container/bin/create-container.js +++ b/create-a-container/bin/create-container.js @@ -340,23 +340,17 @@ async function main() { console.log('Container configured'); } - // Apply environment variables and entrypoint. - // Admin-defined system defaults are merged in here, at configure-time, and are - // intentionally NOT written back into the container's DB record (see below). - // Precedence (lowest to highest): system defaults < NVIDIA defaults < user values. - // Image-provided env/entrypoint defaults are submitted by the UI as user values, - // so they already live in container.environmentVars / container.entrypoint. - const systemDefaultEnvVars = await Container.getSystemDefaultEnvVars(); - const userEnvVars = container.parseEnvironmentVars(); - const envConfig = container.buildLxcEnvConfig(systemDefaultEnvVars); - // buildLxcEnvConfig may add a 'delete' key to unset env/entrypoint; on a fresh - // container there is nothing to delete, so drop it to avoid a no-op API param. + // Apply environment variables and entrypoint. buildLxcEnvConfig is the single + // source of truth for the effective env: it merges admin-defined system + // defaults and NVIDIA defaults under the user-defined values. None of this is + // written back into the DB record (see below). + const envConfig = await container.buildLxcEnvConfig(); + // On a fresh container there is nothing to unset, so drop any 'delete' key to + // avoid a no-op API param. delete envConfig.delete; if (Object.keys(envConfig).length > 0) { console.log('Applying environment variables and entrypoint...'); - if (Object.keys(systemDefaultEnvVars).length > 0) console.log(`System default env vars: ${Object.keys(systemDefaultEnvVars).length} from settings`); - if (Object.keys(userEnvVars).length > 0) console.log(`Per-container env vars: ${Object.keys(userEnvVars).length}`); await client.updateLxcConfig(node.name, vmid, envConfig); console.log('Environment/entrypoint configuration applied'); } diff --git a/create-a-container/bin/reconfigure-container.js b/create-a-container/bin/reconfigure-container.js index 753ccc94..1a345c8e 100644 --- a/create-a-container/bin/reconfigure-container.js +++ b/create-a-container/bin/reconfigure-container.js @@ -80,16 +80,11 @@ async function main() { const client = await node.api(); console.log('Proxmox API client initialized'); - // Merge admin-defined default environment variables at configure-time rather - // than storing them in the container's DB record. Precedence (lowest to - // highest): system defaults < NVIDIA defaults < user-defined values. - // (NVIDIA and user-defined are applied inside buildLxcEnvConfig.) - // Image-provided defaults are submitted by the UI as user-defined values, so - // they are already part of container.environmentVars and not handled here. - const systemDefaultEnvVars = await Container.getSystemDefaultEnvVars(); - - // Build config from environment variables and entrypoint, merging defaults - const lxcConfig = container.buildLxcEnvConfig(systemDefaultEnvVars); + // Build config from environment variables and entrypoint. buildLxcEnvConfig + // is the single source of truth for the effective env: it merges admin-defined + // system defaults and NVIDIA defaults under the user-defined values, applied + // here at configure-time rather than stored in the DB record. + const lxcConfig = await container.buildLxcEnvConfig(); if (Object.keys(lxcConfig).length > 0) { console.log('Applying LXC configuration...'); diff --git a/create-a-container/models/container.js b/create-a-container/models/container.js index f9a0b102..7bf58264 100644 --- a/create-a-container/models/container.js +++ b/create-a-container/models/container.js @@ -21,6 +21,7 @@ module.exports = (sequelize, DataTypes) => { } /** + * Internal helper for buildLxcEnvConfig. * Load the admin-defined system default environment variables from the * Settings table, flattened to a { KEY: value } object. Descriptions are * metadata only and are not included. Returns an empty object if the @@ -44,6 +45,7 @@ module.exports = (sequelize, DataTypes) => { } /** + * Internal helper for buildLxcEnvConfig. * Parse the container's user-defined environment variables. * The database record only ever stores the variables the user explicitly * provided — admin/system, image, and NVIDIA defaults are merged in at @@ -62,6 +64,7 @@ module.exports = (sequelize, DataTypes) => { } /** + * Internal helper for buildLxcEnvConfig. * Environment variables implied by this container's configuration that are * applied as defaults but never stored in the DB record. Currently this is * the NVIDIA GPU passthrough defaults, applied when nvidiaRequested is set. @@ -76,30 +79,34 @@ module.exports = (sequelize, DataTypes) => { } /** - * Build LXC config object for environment variables and entrypoint. - * Returns config suitable for Proxmox API updateLxcConfig. + * Build the LXC config object for environment variables and entrypoint to + * deploy to Proxmox via updateLxcConfig. * - * Default environment variables are merged in here, at configure-time, - * rather than being baked into the container's DB record. User-defined - * variables take precedence over any provided defaults. + * This is the single entrypoint for determining a container's effective + * environment. It owns the full merge of every env-var source, applied here + * at configure-time rather than being baked into the container's DB record: * - * @param {object} [defaults={}] - Flat object of default env vars, e.g. the - * admin-defined system defaults. User-defined values and this container's - * own NVIDIA defaults override these. - * @returns {object} Config object with 'env' and 'entrypoint' properties + * admin-defined system defaults < NVIDIA defaults < user-defined values + * + * (Image-provided defaults are submitted by the UI as user-defined values, + * so they arrive via parseEnvironmentVars and need no special handling.) + * + * Callers should simply `await container.buildLxcEnvConfig()` — no env-var + * merging belongs anywhere else. + * + * @returns {Promise} Config object with 'env' and 'entrypoint' + * properties (and a 'delete' list for any that should be unset) */ - buildLxcEnvConfig(defaults = {}) { + async buildLxcEnvConfig() { const config = {}; const deleteList = []; // Merge precedence (lowest to highest): - // provided defaults (admin-defined system defaults) < NVIDIA defaults - // < user-defined values - const userEnvVars = this.parseEnvironmentVars(); + // system defaults < NVIDIA defaults < user-defined values const mergedEnvVars = { - ...(defaults && typeof defaults === 'object' ? defaults : {}), + ...(await this.constructor.getSystemDefaultEnvVars()), ...this.nvidiaDefaultEnvVars(), - ...userEnvVars + ...this.parseEnvironmentVars() }; // Format as NUL-separated list: KEY1=value1\0KEY2=value2\0KEY3=value3 From b8230ed36e99cd5066a23833d80e90ca30868820 Mon Sep 17 00:00:00 2001 From: Robert Gingras Date: Tue, 16 Jun 2026 08:53:25 -0400 Subject: [PATCH 03/15] cleanup --- create-a-container/bin/create-container.js | 15 ++------------- create-a-container/bin/reconfigure-container.js | 5 +---- create-a-container/routers/api/v1/containers.js | 4 ---- 3 files changed, 3 insertions(+), 21 deletions(-) diff --git a/create-a-container/bin/create-container.js b/create-a-container/bin/create-container.js index 57fd84ba..b1b81f2b 100755 --- a/create-a-container/bin/create-container.js +++ b/create-a-container/bin/create-container.js @@ -340,15 +340,8 @@ async function main() { console.log('Container configured'); } - // Apply environment variables and entrypoint. buildLxcEnvConfig is the single - // source of truth for the effective env: it merges admin-defined system - // defaults and NVIDIA defaults under the user-defined values. None of this is - // written back into the DB record (see below). + // Apply environment variables and entrypoint const envConfig = await container.buildLxcEnvConfig(); - // On a fresh container there is nothing to unset, so drop any 'delete' key to - // avoid a no-op API param. - delete envConfig.delete; - if (Object.keys(envConfig).length > 0) { console.log('Applying environment variables and entrypoint...'); await client.updateLxcConfig(node.name, vmid, envConfig); @@ -418,11 +411,7 @@ async function main() { throw new Error('Could not get IP address from Proxmox interfaces API'); } - // Update the container record. Note: environmentVars and entrypoint are NOT - // written back from the live config — the DB record stores only the values - // the user supplied. Admin-defined system defaults (and NVIDIA defaults) are - // merged in at configure-time and must not be persisted here, otherwise they - // would be frozen into the record and exposed in the edit UI. + // Update the container record console.log('Updating container record...'); await container.update({ macAddress, diff --git a/create-a-container/bin/reconfigure-container.js b/create-a-container/bin/reconfigure-container.js index 1a345c8e..11d8e031 100644 --- a/create-a-container/bin/reconfigure-container.js +++ b/create-a-container/bin/reconfigure-container.js @@ -80,10 +80,7 @@ async function main() { const client = await node.api(); console.log('Proxmox API client initialized'); - // Build config from environment variables and entrypoint. buildLxcEnvConfig - // is the single source of truth for the effective env: it merges admin-defined - // system defaults and NVIDIA defaults under the user-defined values, applied - // here at configure-time rather than stored in the DB record. + // Build config from environment variables and entrypoint const lxcConfig = await container.buildLxcEnvConfig(); if (Object.keys(lxcConfig).length > 0) { diff --git a/create-a-container/routers/api/v1/containers.js b/create-a-container/routers/api/v1/containers.js index ba62d7a1..a4556ad8 100644 --- a/create-a-container/routers/api/v1/containers.js +++ b/create-a-container/routers/api/v1/containers.js @@ -243,10 +243,6 @@ router.post( if (!hostname || !hostname.trim()) throw new ApiError(400, 'invalid_request', 'hostname is required'); const wantsNvidia = !!nvidiaRequested; - // Only the user-defined env vars are persisted on the container record. - // NVIDIA defaults (for GPU passthrough) and admin-defined system defaults - // are merged in at configure-time by the create/reconfigure jobs, so they - // are intentionally not stored here. let envVarsJson = null; if (Array.isArray(environmentVars) && environmentVars.length > 0) { const envObj = {}; From 8dd152e531f053b5c8c663fce48bec6b126bf8b3 Mon Sep 17 00:00:00 2001 From: Robert Gingras Date: Tue, 16 Jun 2026 09:10:33 -0400 Subject: [PATCH 04/15] fix: validate and normalize env var keys/values to prevent corrupting the Proxmox env string Addresses PR review: env var keys were only trimmed, so a key containing '=' or NUL (or a non-string/array value) could corrupt the NUL-separated KEY=value blob sent to Proxmox (splitting one var into many, breaking parsing, or stringifying objects to "[object Object]"). Add Container.normalizeEnvVars() as the single definition of a valid env var: input must be a plain object; keys are trimmed and must match a conventional env-var name (no '=', NUL, or whitespace); values are coerced to strings, with null/undefined, objects/arrays, and NUL-containing values dropped. Apply it at every ingest/load point: - getSystemDefaultEnvVars() and parseEnvironmentVars() normalize their output. - API create/update routes serialize user env vars via a shared serializeUserEnvVars() helper that normalizes before persisting. buildLxcEnvConfig can now trust its inputs, so the per-pair guards are dropped. --- create-a-container/models/container.js | 78 +++++++++++++++---- .../routers/api/v1/containers.js | 36 +++++---- 2 files changed, 84 insertions(+), 30 deletions(-) diff --git a/create-a-container/models/container.js b/create-a-container/models/container.js index 7bf58264..a715f1b8 100644 --- a/create-a-container/models/container.js +++ b/create-a-container/models/container.js @@ -20,43 +20,87 @@ module.exports = (sequelize, DataTypes) => { Container.belongsTo(models.Job, { foreignKey: 'creationJobId', as: 'creationJob' }); } + /** + * Normalize a set of environment variables into a safe, flat + * { KEY: stringValue } object suitable for building the Proxmox `env` + * string. This is the single place that decides what a valid env var is. + * + * Rules (applied to every source — user input, settings, image defaults): + * - Input that is not a plain object (e.g. an array or null) yields {}. + * - Keys are trimmed. A key is dropped unless it is a non-empty string with + * no whitespace and no `=` or NUL — i.e. it matches a conventional env + * var name. This prevents a key from corrupting the NUL-separated + * `KEY=value` encoding. + * - Values are coerced to strings. Entries whose value is null/undefined or + * a non-primitive (object/array) are dropped rather than stringified to + * something like "[object Object]". Values containing NUL are also + * dropped, since NUL is the pair separator. + * + * @param {*} input - Candidate env vars, ideally a { key: value } object + * @returns {object} Flat object of validated { KEY: stringValue } + */ + static normalizeEnvVars(input) { + const out = {}; + if (!input || typeof input !== 'object' || Array.isArray(input)) return out; + + // Conventional env var name: starts with a letter or underscore, followed + // by letters, digits, or underscores. Excludes `=`, NUL, whitespace, etc. + const validKey = /^[A-Za-z_][A-Za-z0-9_]*$/; + + for (const [rawKey, rawValue] of Object.entries(input)) { + const key = typeof rawKey === 'string' ? rawKey.trim() : ''; + if (!validKey.test(key)) continue; + + // Only primitives (string/number/boolean) become values; skip + // null/undefined and objects/arrays. + if (rawValue === null || rawValue === undefined) continue; + if (typeof rawValue === 'object') continue; + const value = String(rawValue); + if (value.includes('\0')) continue; + + out[key] = value; + } + return out; + } + /** * Internal helper for buildLxcEnvConfig. * Load the admin-defined system default environment variables from the - * Settings table, flattened to a { KEY: value } object. Descriptions are - * metadata only and are not included. Returns an empty object if the - * setting is missing or malformed. + * Settings table, flattened to a validated { KEY: value } object. + * Descriptions are metadata only and are not included. Returns an empty + * object if the setting is missing or malformed. * @returns {Promise} Flat object of { KEY: value } system defaults */ static async getSystemDefaultEnvVars() { const Setting = this.sequelize.models.Setting; - const defaults = {}; + const raw = {}; try { const entries = await Setting.getDefaultContainerEnvVars(); for (const entry of entries) { - if (entry.key && entry.key.trim()) { - defaults[entry.key.trim()] = entry.value || ''; + // getDefaultContainerEnvVars yields { key, value, description }. + if (entry && typeof entry.key === 'string') { + raw[entry.key] = entry.value; } } } catch (_) { console.warn('Could not load default_container_env_vars from settings, skipping'); } - return defaults; + return this.normalizeEnvVars(raw); } /** * Internal helper for buildLxcEnvConfig. - * Parse the container's user-defined environment variables. - * The database record only ever stores the variables the user explicitly - * provided — admin/system, image, and NVIDIA defaults are merged in at - * configure-time (see buildLxcEnvConfig) and are intentionally NOT persisted. - * @returns {object} Flat object of { KEY: value } user-defined env vars + * Parse the container's user-defined environment variables into a validated, + * flat { KEY: value } object. The database record only ever stores the + * variables the user explicitly provided — admin/system, image, and NVIDIA + * defaults are merged in at configure-time (see buildLxcEnvConfig) and are + * intentionally NOT persisted. + * @returns {object} Flat object of validated { KEY: value } user env vars */ parseEnvironmentVars() { if (!this.environmentVars) return {}; try { - const parsed = JSON.parse(this.environmentVars); - return parsed && typeof parsed === 'object' ? parsed : {}; + return this.constructor.normalizeEnvVars(JSON.parse(this.environmentVars)); } catch (err) { console.error('Failed to parse environment variables JSON:', err.message); return {}; @@ -103,6 +147,8 @@ module.exports = (sequelize, DataTypes) => { // Merge precedence (lowest to highest): // system defaults < NVIDIA defaults < user-defined values + // Every source is already normalized to a safe { KEY: stringValue } map + // (see normalizeEnvVars), so the encoding below cannot be corrupted. const mergedEnvVars = { ...(await this.constructor.getSystemDefaultEnvVars()), ...this.nvidiaDefaultEnvVars(), @@ -112,9 +158,7 @@ module.exports = (sequelize, DataTypes) => { // Format as NUL-separated list: KEY1=value1\0KEY2=value2\0KEY3=value3 const envPairs = []; for (const [key, value] of Object.entries(mergedEnvVars)) { - if (key && value !== undefined) { - envPairs.push(`${key}=${value}`); - } + envPairs.push(`${key}=${value}`); } if (envPairs.length > 0) { config['env'] = envPairs.join('\0'); diff --git a/create-a-container/routers/api/v1/containers.js b/create-a-container/routers/api/v1/containers.js index a4556ad8..70c2947d 100644 --- a/create-a-container/routers/api/v1/containers.js +++ b/create-a-container/routers/api/v1/containers.js @@ -27,6 +27,25 @@ const router = express.Router({ mergeParams: true }); router.use(apiAuth); +/** + * Convert the `environmentVars` request payload (an array of { key, value } + * objects) into the JSON string stored on the container record, or null when + * there are no valid vars. Keys/values are validated and normalized via + * Container.normalizeEnvVars so only safe env var names are persisted (keys + * containing `=`/NUL or non-primitive values are dropped at ingest time). + * @param {Array<{key: string, value: *}>} environmentVars + * @returns {string|null} + */ +function serializeUserEnvVars(environmentVars) { + if (!Array.isArray(environmentVars)) return null; + const flat = {}; + for (const e of environmentVars) { + if (e && typeof e.key === 'string') flat[e.key] = e.value; + } + const normalized = Container.normalizeEnvVars(flat); + return Object.keys(normalized).length > 0 ? JSON.stringify(normalized) : null; +} + function normalizeDockerRef(ref) { if (ref.startsWith('http://') || ref.startsWith('https://') || ref.startsWith('git@')) return ref; let tag = 'latest'; @@ -243,14 +262,9 @@ router.post( if (!hostname || !hostname.trim()) throw new ApiError(400, 'invalid_request', 'hostname is required'); const wantsNvidia = !!nvidiaRequested; - let envVarsJson = null; - if (Array.isArray(environmentVars) && environmentVars.length > 0) { - const envObj = {}; - for (const e of environmentVars) { - if (e?.key && e.key.trim()) envObj[e.key.trim()] = e.value || ''; - } - if (Object.keys(envObj).length > 0) envVarsJson = JSON.stringify(envObj); - } + // Only the user-defined env vars are persisted on the container record; + // NVIDIA and admin-defined system defaults are merged in at configure-time. + const envVarsJson = serializeUserEnvVars(environmentVars); const imageRef = template === 'custom' ? customTemplate?.trim() : template; if (!imageRef) throw new ApiError(400, 'invalid_request', 'template is required'); @@ -383,11 +397,7 @@ router.put( let envVarsJson = container.environmentVars; if (!isRestartOnly && Array.isArray(environmentVars)) { - const obj = {}; - for (const e of environmentVars) { - if (e?.key) obj[e.key.trim()] = e.value || ''; - } - envVarsJson = Object.keys(obj).length > 0 ? JSON.stringify(obj) : null; + envVarsJson = serializeUserEnvVars(environmentVars); } else if (!isRestartOnly && !environmentVars) { envVarsJson = null; } From 2f968d0dcb919364b3690c78b175f5438e190f1f Mon Sep 17 00:00:00 2001 From: Robert Gingras Date: Tue, 16 Jun 2026 09:30:06 -0400 Subject: [PATCH 05/15] fix: don't unset template-provided env/entrypoint on container create buildLxcEnvConfig always added env/entrypoint to Proxmox's `delete` list when the container record had no value. On create the container is freshly cloned from a template that may carry its own env/entrypoint, so emitting `delete` actively wiped those template-provided defaults. Add a deleteMissing option (default false). Proxmox's config PUT is a partial update, so omitting a key leaves it untouched: - create-container.js uses the default (false): empty env/entrypoint are omitted, preserving whatever the cloned template provides. - reconfigure-container.js passes deleteMissing:true so clearing the last env var or removing a custom entrypoint still unsets it on the existing container. The reconfigure restart-detection already ignores the `delete` key, so a pure-delete config does not, by itself, trigger a restart. --- create-a-container/bin/create-container.js | 5 +++- .../bin/reconfigure-container.js | 7 +++-- create-a-container/models/container.js | 26 ++++++++++++++----- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/create-a-container/bin/create-container.js b/create-a-container/bin/create-container.js index b1b81f2b..81ae0399 100755 --- a/create-a-container/bin/create-container.js +++ b/create-a-container/bin/create-container.js @@ -340,7 +340,10 @@ async function main() { console.log('Container configured'); } - // Apply environment variables and entrypoint + // Apply environment variables and entrypoint. Use the default + // (deleteMissing=false) so that any env/entrypoint provided by the template + // we just cloned is preserved when the user didn't supply their own — only + // explicit values are pushed, nothing is unset. const envConfig = await container.buildLxcEnvConfig(); if (Object.keys(envConfig).length > 0) { console.log('Applying environment variables and entrypoint...'); diff --git a/create-a-container/bin/reconfigure-container.js b/create-a-container/bin/reconfigure-container.js index 11d8e031..ee5ba86e 100644 --- a/create-a-container/bin/reconfigure-container.js +++ b/create-a-container/bin/reconfigure-container.js @@ -80,8 +80,11 @@ async function main() { const client = await node.api(); console.log('Proxmox API client initialized'); - // Build config from environment variables and entrypoint - const lxcConfig = await container.buildLxcEnvConfig(); + // Build config from environment variables and entrypoint. Pass + // deleteMissing so that clearing env vars or removing a custom entrypoint + // actually unsets them on the existing container (vs. create, which must + // preserve template-provided values). + const lxcConfig = await container.buildLxcEnvConfig({ deleteMissing: true }); if (Object.keys(lxcConfig).length > 0) { console.log('Applying LXC configuration...'); diff --git a/create-a-container/models/container.js b/create-a-container/models/container.js index a715f1b8..8f28ae57 100644 --- a/create-a-container/models/container.js +++ b/create-a-container/models/container.js @@ -135,13 +135,27 @@ module.exports = (sequelize, DataTypes) => { * (Image-provided defaults are submitted by the UI as user-defined values, * so they arrive via parseEnvironmentVars and need no special handling.) * - * Callers should simply `await container.buildLxcEnvConfig()` — no env-var - * merging belongs anywhere else. + * Proxmox's config endpoint is a partial update: keys present in the body are + * set, omitted keys are left untouched, and a key is only removed if named in + * the special `delete` parameter. That distinction matters here: * + * - On create, the container has just been cloned from a template that may + * carry its own `env`/`entrypoint`. We must NOT delete those when the user + * didn't provide a value, or we'd wipe the template's defaults. So the + * default (deleteMissing=false) simply omits anything with no value. + * - On reconfigure, the user may have cleared their last env var or removed a + * custom entrypoint, and that change must take effect — so the caller + * passes deleteMissing=true to emit `delete` for the now-empty fields. + * + * @param {object} [options] + * @param {boolean} [options.deleteMissing=false] - When true, env/entrypoint + * that resolve to empty are added to Proxmox's `delete` list (removing any + * existing value). When false, they are simply omitted, preserving whatever + * the container/template already has. * @returns {Promise} Config object with 'env' and 'entrypoint' - * properties (and a 'delete' list for any that should be unset) + * properties (and, when deleteMissing is set, a 'delete' list) */ - async buildLxcEnvConfig() { + async buildLxcEnvConfig({ deleteMissing = false } = {}) { const config = {}; const deleteList = []; @@ -162,14 +176,14 @@ module.exports = (sequelize, DataTypes) => { } if (envPairs.length > 0) { config['env'] = envPairs.join('\0'); - } else { + } else if (deleteMissing) { deleteList.push('env'); } // Set entrypoint command if (this.entrypoint && this.entrypoint.trim()) { config['entrypoint'] = this.entrypoint.trim(); - } else { + } else if (deleteMissing) { deleteList.push('entrypoint'); } From 87875376c003a9b4ebc18828df76344d41453142 Mon Sep 17 00:00:00 2001 From: Robert Gingras Date: Tue, 16 Jun 2026 09:56:05 -0400 Subject: [PATCH 06/15] feat: snapshot template env/entrypoint onto the record at create MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reconfigure uses deleteMissing, so any env/entrypoint that exists only on the cloned template (and was never stored on the container) would be unset on the first reconfigure — surprising the user. We can't re-query the template later: templates are mutable Docker refs that may change or disappear, and we don't cache them. Compromise: at creation, after the template is cloned, read its LXC config and fold its env/entrypoint into the container record as if the user had supplied them (precedence: template < user). This guarantees those values persist across future reconfigures without changing API client expectations. System and NVIDIA defaults are deliberately NOT persisted — they remain configure-time-only (merged by buildLxcEnvConfig) so they aren't frozen into the record or exposed as user values in the edit UI. - models/container.js: add Container.parseLxcEnvString() and Container#persistTemplateDefaults(). - bin/create-container.js: call persistTemplateDefaults() with the cloned template's config before building/applying the LXC env config. --- create-a-container/bin/create-container.js | 16 ++++-- create-a-container/models/container.js | 58 ++++++++++++++++++++++ 2 files changed, 71 insertions(+), 3 deletions(-) diff --git a/create-a-container/bin/create-container.js b/create-a-container/bin/create-container.js index 81ae0399..2ea89ea3 100755 --- a/create-a-container/bin/create-container.js +++ b/create-a-container/bin/create-container.js @@ -340,10 +340,20 @@ async function main() { console.log('Container configured'); } + // Snapshot the template's env/entrypoint onto the container record now, as + // if the user had supplied them (user-supplied values still win). Templates + // are mutable Docker refs we can't re-query on a later reconfigure, so we + // persist them here; otherwise a future reconfigure (which uses + // deleteMissing) would unset template-provided values that were never + // stored. System/NVIDIA defaults are intentionally left out — they stay + // configure-time-only. + const templateConfig = await client.lxcConfig(node.name, vmid); + await container.persistTemplateDefaults(templateConfig); + // Apply environment variables and entrypoint. Use the default - // (deleteMissing=false) so that any env/entrypoint provided by the template - // we just cloned is preserved when the user didn't supply their own — only - // explicit values are pushed, nothing is unset. + // (deleteMissing=false): only explicit values are pushed, nothing is unset. + // The record now already includes the template's values, and system/NVIDIA + // defaults are merged in by buildLxcEnvConfig. const envConfig = await container.buildLxcEnvConfig(); if (Object.keys(envConfig).length > 0) { console.log('Applying environment variables and entrypoint...'); diff --git a/create-a-container/models/container.js b/create-a-container/models/container.js index 8f28ae57..721cebfe 100644 --- a/create-a-container/models/container.js +++ b/create-a-container/models/container.js @@ -63,6 +63,24 @@ module.exports = (sequelize, DataTypes) => { return out; } + /** + * Parse a Proxmox LXC `env` string (NUL-separated `KEY=value` pairs) into a + * normalized, flat { KEY: stringValue } object. Anything malformed is + * dropped via normalizeEnvVars. The inverse of the encoding done in + * buildLxcEnvConfig. + * @param {string|null|undefined} envStr - Raw Proxmox `env` value + * @returns {object} Flat object of validated { KEY: value } + */ + static parseLxcEnvString(envStr) { + if (!envStr || typeof envStr !== 'string') return {}; + const raw = {}; + for (const pair of envStr.split('\0')) { + const eq = pair.indexOf('='); + if (eq > 0) raw[pair.substring(0, eq)] = pair.substring(eq + 1); + } + return this.normalizeEnvVars(raw); + } + /** * Internal helper for buildLxcEnvConfig. * Load the admin-defined system default environment variables from the @@ -122,6 +140,46 @@ module.exports = (sequelize, DataTypes) => { }; } + /** + * Fold a template's environment variables and entrypoint into this + * container's persisted record, as if the user had supplied them. + * + * Called once at creation, after the template has been cloned, with the + * template's own LXC config. We can't recover these values later (templates + * are mutable Docker refs that may change or disappear before a reconfigure, + * and we don't cache them), so we snapshot them onto the record now. This + * keeps env/entrypoint stable across future reconfigures — which use + * deleteMissing and would otherwise unset template-provided values that were + * never stored. + * + * Precedence is template < user (user-supplied values win). System and + * NVIDIA defaults are intentionally NOT folded in here; they remain + * configure-time-only so they are not frozen into the record. + * + * Persists and returns nothing; mutates this instance. + * + * @param {object} templateConfig - The cloned template's LXC config + * @param {string} [templateConfig.env] - Proxmox NUL-separated `env` string + * @param {string} [templateConfig.entrypoint] - Template entrypoint + */ + async persistTemplateDefaults(templateConfig = {}) { + const templateEnv = this.constructor.parseLxcEnvString(templateConfig.env); + // template < user + const mergedEnv = { ...templateEnv, ...this.parseEnvironmentVars() }; + + const templateEntrypoint = + typeof templateConfig.entrypoint === 'string' && templateConfig.entrypoint.trim() + ? templateConfig.entrypoint.trim() + : null; + const userEntrypoint = this.entrypoint && this.entrypoint.trim() ? this.entrypoint.trim() : null; + const mergedEntrypoint = userEntrypoint || templateEntrypoint; + + await this.update({ + environmentVars: Object.keys(mergedEnv).length > 0 ? JSON.stringify(mergedEnv) : null, + entrypoint: mergedEntrypoint + }); + } + /** * Build the LXC config object for environment variables and entrypoint to * deploy to Proxmox via updateLxcConfig. From b2fc09c18a0bed7e2a780dd9d96cb7fb8e06de3c Mon Sep 17 00:00:00 2001 From: Robert Gingras Date: Mon, 15 Jun 2026 17:01:45 -0400 Subject: [PATCH 07/15] feat: compute live container status instead of static DB column Resolves #350. Container status was a static column updated only by the create/reconfigure job scripts, so it drifted from reality whenever Proxmox changed out of band or a job died mid-run. This replaces it with status computed on demand. New utils/container-status.js resolves status from three sources: - Proxmox: does the LXC exist, and is it running/stopped? - Jobs: active create/reconfigure job, or did the last create job fail? - Config: does the live LXC config match what the API server expects? Resolved statuses: running, offline, creating, restarting, failed, missing, out-of-sync (plus unknown when Proxmox is unreachable). New endpoint: GET /api/v1/sites/:siteId/containers/:id/status -> { data: { status } } Non-breaking: the same live status is still embedded on the list/show/create payloads, so existing consumers keep working. The static Containers.status column is dropped (migration + model). All former writers were updated to stop persisting status; templates.js now selects provisioned containers by VMID + IPv4 instead of status='running'. The list page queries the per-container status endpoint (seeded by the list value for instant paint, polling while creating/restarting). OpenAPI and README updated. --- create-a-container/README.md | 18 +- create-a-container/bin/create-container.js | 31 +- create-a-container/bin/json-to-sql.js | 2 - .../bin/reconfigure-container.js | 17 +- create-a-container/client/src/lib/queries.ts | 5 + create-a-container/client/src/lib/types.ts | 22 +- .../pages/containers/ContainersListPage.tsx | 62 ++- .../20260615000000-remove-container-status.js | 26 ++ create-a-container/models/container.js | 5 - create-a-container/openapi.v1.yaml | 44 ++- .../routers/api/v1/containers.js | 60 ++- create-a-container/routers/api/v1/nodes.js | 1 - create-a-container/routers/templates.js | 14 +- create-a-container/utils/container-status.js | 355 ++++++++++++++++++ 14 files changed, 601 insertions(+), 61 deletions(-) create mode 100644 create-a-container/migrations/20260615000000-remove-container-status.js create mode 100644 create-a-container/utils/container-status.js diff --git a/create-a-container/README.md b/create-a-container/README.md index 4c135ef0..31fb3d2f 100644 --- a/create-a-container/README.md +++ b/create-a-container/README.md @@ -22,7 +22,6 @@ erDiagram int id PK string hostname UK "FQDN hostname" string username "Owner username" - string status "pending,creating,running,failed" string template "Template name" int creationJobId FK "References Job" int nodeId FK "References Node" @@ -231,6 +230,23 @@ Delete a container from both Proxmox and the database - `403` - User doesn't own the container - `500` - Proxmox API deletion failed or node not configured +#### `GET /api/v1/sites/:siteId/containers/:id/status` (Auth Required) +Return the **live** status of a container, computed on demand rather than read +from a stored column. The status is resolved by combining the container's +presence/run-state in Proxmox, any active create/reconfigure jobs, and whether +the live LXC config matches what the API server expects. +- **Returns**: `{ data: { status } }` where `status` is one of: + - `running` — online in Proxmox + - `offline` — exists in Proxmox but stopped + - `creating` — no Proxmox VM yet, active create job + - `restarting` — active reconfigure job + - `failed` — no Proxmox VM, last create job failed + - `missing` — no Proxmox VM, create succeeded or no create job found + - `out-of-sync` — exists in Proxmox but its config differs from expectation + - `unknown` — Proxmox unreachable / node has no API credentials +- **Note**: The same `status` value is also embedded on each container returned + by the list and show endpoints, so existing consumers remain non-breaking. + #### `GET /status/:jobId` (Auth Required) View container creation progress page diff --git a/create-a-container/bin/create-container.js b/create-a-container/bin/create-container.js index 2ea89ea3..3a0e1701 100755 --- a/create-a-container/bin/create-container.js +++ b/create-a-container/bin/create-container.js @@ -177,9 +177,14 @@ async function main() { console.error(`Container with ID ${containerId} not found`); process.exit(1); } - - if (container.status !== 'pending') { - console.error(`Container is not in pending status (current: ${container.status})`); + + // Guard against double-provisioning. The status column no longer exists, so + // we use the Proxmox VMID as the signal: once a VMID has been allocated and + // stored, creation has already run for this container. + if (container.containerId) { + console.error( + `Container already has a Proxmox VMID (${container.containerId}); refusing to re-create`, + ); process.exit(1); } @@ -217,10 +222,6 @@ async function main() { console.log(`Template type: ${isDocker ? 'Docker image' : 'Proxmox template'}`); try { - // Update status to 'creating' - await container.update({ status: 'creating' }); - console.log('Status updated to: creating'); - // Get the Proxmox API client const client = await node.api(); console.log('Proxmox API client initialized'); @@ -428,8 +429,7 @@ async function main() { console.log('Updating container record...'); await container.update({ macAddress, - ipv4Address, - status: 'running' + ipv4Address }); console.log('Container creation completed successfully!'); @@ -480,15 +480,10 @@ async function main() { if (err.response?.data) { console.error('API Error Details:', JSON.stringify(err.response.data, null, 2)); } - - // Update status to failed - try { - await container.update({ status: 'failed' }); - console.log('Status updated to: failed'); - } catch (updateErr) { - console.error('Failed to update container status:', updateErr.message); - } - + + // The container status is no longer persisted. Failure is recorded by the + // job-runner marking this job 'failure'; the live status resolver maps a + // missing-in-Proxmox container whose last create job failed to 'failed'. process.exit(1); } } diff --git a/create-a-container/bin/json-to-sql.js b/create-a-container/bin/json-to-sql.js index 94c02243..f55af4d5 100644 --- a/create-a-container/bin/json-to-sql.js +++ b/create-a-container/bin/json-to-sql.js @@ -222,7 +222,6 @@ async function run() { hostname, ipv4Address: obj.ip, username: obj.user || '', - status: 'running', template: obj.template || null, containerId: obj.ctid, macAddress: obj.mac, @@ -234,7 +233,6 @@ async function run() { await container.update({ ipv4Address: obj.ip, username: obj.user || '', - status: container.status || 'running', template: obj.template || container.template, containerId: obj.ctid, macAddress: obj.mac diff --git a/create-a-container/bin/reconfigure-container.js b/create-a-container/bin/reconfigure-container.js index ee5ba86e..6e31a248 100644 --- a/create-a-container/bin/reconfigure-container.js +++ b/create-a-container/bin/reconfigure-container.js @@ -160,14 +160,13 @@ async function main() { throw new Error('Could not get IP address from Proxmox interfaces API'); } - // Update container record with MAC/IP and running status + // Update container record with MAC/IP (status is no longer persisted) await container.update({ - status: 'running', macAddress, ipv4Address }); - console.log('Status updated to: running'); + console.log('Reconfiguration applied; container running'); console.log(` MAC: ${macAddress}`); console.log(` IP: ${ipv4Address}`); } @@ -181,15 +180,9 @@ async function main() { if (err.response?.data) { console.error('API Error Details:', JSON.stringify(err.response.data, null, 2)); } - - // Update status to failed - try { - await container.update({ status: 'failed' }); - console.log('Status updated to: failed'); - } catch (updateErr) { - console.error('Failed to update container status:', updateErr.message); - } - + + // Container status is no longer persisted. The job-runner records this job as + // 'failure'; live status is derived from Proxmox + job state on read. process.exit(1); } } diff --git a/create-a-container/client/src/lib/queries.ts b/create-a-container/client/src/lib/queries.ts index b261f2ec..546db6be 100644 --- a/create-a-container/client/src/lib/queries.ts +++ b/create-a-container/client/src/lib/queries.ts @@ -8,6 +8,7 @@ import type { Container, ContainerMetadata, ContainerNewBootstrap, + ContainerStatusResult, EffectiveResources, ExternalDomain, Group, @@ -29,6 +30,8 @@ export const keys = { containers: (siteId: number | string) => ['sites', String(siteId), 'containers'] as const, container: (siteId: number | string, id: number | string) => ['sites', String(siteId), 'containers', String(id)] as const, + containerStatus: (siteId: number | string, id: number | string) => + ['sites', String(siteId), 'containers', String(id), 'status'] as const, containerBootstrap: (siteId: number | string) => ['sites', String(siteId), 'containers', 'new'] as const, containerMetadata: (image: string) => ['container-metadata', image] as const, @@ -64,6 +67,8 @@ export const queries = { api.get(`/api/v1/sites/${siteId}/containers`), getContainer: (siteId: number | string, id: number | string) => api.get(`/api/v1/sites/${siteId}/containers/${id}`), + getContainerStatus: (siteId: number | string, id: number | string) => + api.get(`/api/v1/sites/${siteId}/containers/${id}/status`), containerBootstrap: (siteId: number | string) => api.get(`/api/v1/sites/${siteId}/containers/new`), containerMetadata: (siteId: number | string, image: string) => diff --git a/create-a-container/client/src/lib/types.ts b/create-a-container/client/src/lib/types.ts index 85701ab3..a32fd6cd 100644 --- a/create-a-container/client/src/lib/types.ts +++ b/create-a-container/client/src/lib/types.ts @@ -75,7 +75,7 @@ export interface Container { hostname: string; ipv4Address: string | null; macAddress: string | null; - status: string; + status: ContainerStatus; template: string | null; creationJobId: number | null; entrypoint: string | null; @@ -90,11 +90,29 @@ export interface Container { createdAt: string; } +/** + * Live container status resolved from Proxmox + job state + config drift. + * Returned by GET /sites/:id/containers/:id/status and embedded on Container. + */ +export type ContainerStatus = + | 'running' + | 'offline' + | 'creating' + | 'restarting' + | 'failed' + | 'missing' + | 'out-of-sync' + | 'unknown'; + +export interface ContainerStatusResult { + status: ContainerStatus; +} + export interface ContainerCreateResult { containerId: number; jobId: number; hostname: string; - status: string; + status: ContainerStatus; } export interface ContainerNewBootstrap { diff --git a/create-a-container/client/src/pages/containers/ContainersListPage.tsx b/create-a-container/client/src/pages/containers/ContainersListPage.tsx index ec5a195f..5f899d29 100644 --- a/create-a-container/client/src/pages/containers/ContainersListPage.tsx +++ b/create-a-container/client/src/pages/containers/ContainersListPage.tsx @@ -34,28 +34,76 @@ import { import { api, ApiError } from '@/lib/api'; import { useSession } from '@/lib/auth'; import { keys, queries } from '@/lib/queries'; -import type { Container } from '@/lib/types'; +import type { Container, ContainerStatus } from '@/lib/types'; type ViewMode = 'cards' | 'table'; const VIEW_STORAGE_KEY = 'containers:view'; -function statusVariant(s: string): 'default' | 'success' | 'warning' | 'danger' | 'secondary' { +function statusVariant( + s: ContainerStatus, +): 'default' | 'success' | 'warning' | 'danger' | 'secondary' { switch (s) { case 'running': return 'success'; - case 'pending': + case 'creating': case 'restarting': return 'warning'; case 'failed': - case 'error': + case 'out-of-sync': return 'danger'; - case 'stopped': + case 'offline': + case 'missing': return 'secondary'; default: return 'default'; } } +// Human-readable labels for the live status values. +const STATUS_LABELS: Record = { + running: 'Running', + offline: 'Offline', + creating: 'Creating', + restarting: 'Restarting', + failed: 'Failed', + missing: 'Missing', + 'out-of-sync': 'Out of sync', + unknown: 'Unknown', +}; + +// Statuses that are transitional and worth polling for. +const TRANSITIONAL_STATUSES: ReadonlySet = new Set([ + 'creating', + 'restarting', +]); + +/** + * Live status badge. Per issue #350 each row queries the dedicated + * /containers/:id/status endpoint. The status already returned by the list + * endpoint seeds initialData so the badge paints instantly, then this query + * keeps it fresh (polling while the container is in a transitional state). + */ +function ContainerStatusBadge({ + siteId, + container, +}: { + siteId: string | undefined; + container: Container; +}) { + const { data } = useQuery({ + queryKey: keys.containerStatus(siteId!, container.id), + queryFn: () => queries.getContainerStatus(siteId!, container.id), + enabled: !!siteId, + initialData: { status: container.status }, + refetchInterval: (query) => { + const status = query.state.data?.status; + return status && TRANSITIONAL_STATUSES.has(status) ? 4000 : false; + }, + }); + const status = data?.status ?? container.status; + return {STATUS_LABELS[status] ?? status}; +} + const linkClass = 'text-(--color-primary,#1d4ed8) hover:underline'; /** Shorten a full image ref to just its name+tag, e.g. ghcr.io/mieweb/base:latest -> base:latest */ @@ -330,7 +378,7 @@ export function ContainersListPage() { {c.hostname} - {c.status} +
@@ -374,7 +422,7 @@ export function ContainersListPage() { {c.hostname} - {c.status} + diff --git a/create-a-container/migrations/20260615000000-remove-container-status.js b/create-a-container/migrations/20260615000000-remove-container-status.js new file mode 100644 index 00000000..cdcdc72c --- /dev/null +++ b/create-a-container/migrations/20260615000000-remove-container-status.js @@ -0,0 +1,26 @@ +'use strict'; +/** @type {import('sequelize-cli').Migration} */ + +/** + * Remove the static `status` column from Containers. + * + * Container status is now computed live from Proxmox + job state + config drift + * (see utils/container-status.js and GET /sites/:id/containers/:id/status), so the + * persisted column is no longer a source of truth and is dropped. + * + * The down migration restores the column (defaulting to 'running') for rollback, + * but the historical per-container value cannot be recovered. + */ +module.exports = { + async up(queryInterface) { + await queryInterface.removeColumn('Containers', 'status'); + }, + + async down(queryInterface, Sequelize) { + await queryInterface.addColumn('Containers', 'status', { + type: Sequelize.STRING(20), + allowNull: false, + defaultValue: 'running', + }); + }, +}; diff --git a/create-a-container/models/container.js b/create-a-container/models/container.js index 721cebfe..b6bfc83e 100644 --- a/create-a-container/models/container.js +++ b/create-a-container/models/container.js @@ -268,11 +268,6 @@ module.exports = (sequelize, DataTypes) => { type: DataTypes.STRING(255), allowNull: false }, - status: { - type: DataTypes.STRING(20), - allowNull: false, - defaultValue: 'pending' - }, template: { type: DataTypes.STRING(255), allowNull: true diff --git a/create-a-container/openapi.v1.yaml b/create-a-container/openapi.v1.yaml index e049dd16..ec476381 100644 --- a/create-a-container/openapi.v1.yaml +++ b/create-a-container/openapi.v1.yaml @@ -89,7 +89,7 @@ components: id: { type: integer } hostname: { type: string } containerId: { type: integer, nullable: true } - status: { type: string } + status: { $ref: '#/components/schemas/ContainerStatus' } template: { type: string } ipv4Address: { type: string, nullable: true } macAddress: { type: string, nullable: true } @@ -104,6 +104,28 @@ components: services: type: array items: { type: object } + ContainerStatus: + type: string + description: >- + Live container status, resolved on read from Proxmox presence/run-state, + active jobs, and configuration drift. + running = online in Proxmox; + offline = exists in Proxmox but stopped; + creating = no Proxmox VM yet, active create job; + restarting = active reconfigure job; + failed = no Proxmox VM, last create job failed; + missing = no Proxmox VM, create succeeded or no create job; + out-of-sync = exists in Proxmox but config differs from expectation; + unknown = Proxmox unreachable / no node credentials. + enum: + - running + - offline + - creating + - restarting + - failed + - missing + - out-of-sync + - unknown Node: type: object properties: @@ -338,6 +360,26 @@ paths: get: { tags: [Containers], responses: { '200': { description: Container } } } put: { tags: [Containers], responses: { '200': { description: Updated, optional restart job } } } delete: { tags: [Containers], responses: { '200': { description: Deleted, with DNS cleanup warnings } } } + /sites/{siteId}/containers/{id}/status: + get: + tags: [Containers] + summary: Live container status (computed from Proxmox + jobs + config drift) + parameters: + - { in: path, name: siteId, required: true, schema: { type: integer } } + - { in: path, name: id, required: true, schema: { type: integer } } + responses: + '200': + description: Resolved container status + content: + application/json: + schema: + type: object + properties: + data: + type: object + properties: + status: { $ref: '#/components/schemas/ContainerStatus' } + '404': { description: Container not found } /sites/{siteId}/nodes: parameters: [{ in: path, name: siteId, required: true, schema: { type: integer } }] diff --git a/create-a-container/routers/api/v1/containers.js b/create-a-container/routers/api/v1/containers.js index 70c2947d..48a9c3bc 100644 --- a/create-a-container/routers/api/v1/containers.js +++ b/create-a-container/routers/api/v1/containers.js @@ -21,6 +21,11 @@ const { const { parseDockerRef, getImageConfig, extractImageMetadata } = require('../../../utils/docker-registry'); const { manageDnsRecords } = require('../../../utils/cloudflare-dns'); const { deleteVirtualMachine, withNetbox } = require('../../../utils/netbox'); +const { + computeContainerStatus, + computeContainerStatuses, + STATUS, +} = require('../../../utils/container-status'); const { apiAuth, asyncHandler, ok, created, ApiError } = require('../../../middlewares/api'); const router = express.Router({ mergeParams: true }); @@ -86,7 +91,7 @@ async function loadSite(req) { return site; } -function serializeContainer(c, site) { +function serializeContainer(c, site, status) { const services = c.services || []; const ssh = services.find( (s) => @@ -110,7 +115,9 @@ function serializeContainer(c, site) { hostname: c.hostname, ipv4Address: c.ipv4Address, macAddress: c.macAddress, - status: c.status, + // Live status computed from Proxmox + jobs + config (see utils/container-status). + // Kept on every container payload so existing consumers remain non-breaking. + status: status || STATUS.UNKNOWN, template: c.template, creationJobId: c.creationJobId, entrypoint: c.entrypoint, @@ -202,10 +209,41 @@ router.get( { association: 'dnsService' }, ], }, - { association: 'node', attributes: ['id', 'name', 'apiUrl'] }, + // Full node record (incl. credentials) is required to query live Proxmox status. + { association: 'node' }, ], }); - return ok(res, rows.map((c) => serializeContainer(c, site))); + // Resolve live statuses for the whole page in one pass: one Proxmox snapshot + // per node (shared), rather than N independent round-trips from the browser. + const statuses = await computeContainerStatuses(rows, Job); + return ok( + res, + rows.map((c) => serializeContainer(c, site, statuses.get(c.id))), + ); + }), +); + +// GET /containers/:id/status — live status for a single container +router.get( + '/:id/status', + asyncHandler(async (req, res) => { + const site = await loadSite(req); + const containerId = parseInt(req.params.id, 10); + if (!Number.isInteger(containerId) || containerId <= 0) { + throw new ApiError(404, 'not_found', 'Container not found'); + } + const c = await Container.findOne({ + where: { id: containerId, username: req.session.user }, + include: [ + { association: 'node' }, + { association: 'creationJob' }, + ], + }); + if (!c || !c.node || c.node.siteId !== site.id) { + throw new ApiError(404, 'not_found', 'Container not found'); + } + const status = await computeContainerStatus({ container: c, Job }); + return ok(res, { status }); }), ); @@ -233,12 +271,14 @@ router.get( ], }, { association: 'node' }, + { association: 'creationJob' }, ], }); if (!c || !c.node || c.node.siteId !== site.id) { throw new ApiError(404, 'not_found', 'Container not found'); } - return ok(res, serializeContainer(c, site)); + const status = await computeContainerStatus({ container: c, Job }); + return ok(res, serializeContainer(c, site, status)); }), ); @@ -296,7 +336,6 @@ router.post( { hostname, username: req.session.user, - status: 'pending', template: templateName, nodeId: node.id, siteId: site.id, @@ -371,7 +410,9 @@ router.post( containerId: container.id, jobId: job.id, hostname: container.hostname, - status: 'pending', + // A create job was just enqueued and there is no Proxmox VMID yet, so the + // live status resolver would report 'creating' — return it directly. + status: STATUS.CREATING, }); } catch (err) { await t.rollback(); @@ -414,16 +455,15 @@ router.put( const dnsWarnings = []; await sequelize.transaction(async (t) => { if (envChanged || entrypointChanged) { + // Persist the new desired config only. The "restarting" state is no + // longer stored — it is derived from the active reconfigure job below. await container.update( { environmentVars: envVarsJson, entrypoint: newEntrypoint, - status: needsRestart && container.containerId ? 'restarting' : container.status, }, { transaction: t }, ); - } else if (forceRestart && container.containerId) { - await container.update({ status: 'restarting' }, { transaction: t }); } if (needsRestart && container.containerId) { restartJob = await Job.create( diff --git a/create-a-container/routers/api/v1/nodes.js b/create-a-container/routers/api/v1/nodes.js index d01e097d..2cbb3303 100644 --- a/create-a-container/routers/api/v1/nodes.js +++ b/create-a-container/routers/api/v1/nodes.js @@ -247,7 +247,6 @@ router.post( containerId: c.vmid, macAddress, ipv4Address, - status: 'running', }; }), ); diff --git a/create-a-container/routers/templates.js b/create-a-container/routers/templates.js index 4793ba3a..4969dde5 100644 --- a/create-a-container/routers/templates.js +++ b/create-a-container/routers/templates.js @@ -1,9 +1,19 @@ const express = require('express'); +const { Op } = require('sequelize'); const { Site, Node, Container, Service, HTTPService, TransportService, ExternalDomain } = require('../models'); const { requireLocalhostOrAdmin } = require('../middlewares'); const router = express.Router(); +// A container is eligible for routing/DNS config once it has been provisioned, +// i.e. it has a Proxmox VMID and an assigned IPv4 address. The old code filtered +// on the (now-removed) static status column; presence of an IP/VMID is the stable, +// Proxmox-independent proxy for "this container is live enough to route to". +const PROVISIONED_CONTAINER_WHERE = { + containerId: { [Op.ne]: null }, + ipv4Address: { [Op.ne]: null }, +}; + async function loadDnsmasqSite(siteId) { return Site.findByPk(siteId, { include: [{ @@ -12,7 +22,7 @@ async function loadDnsmasqSite(siteId) { include: [{ model: Container, as: 'containers', - where: { status: 'running' }, + where: PROVISIONED_CONTAINER_WHERE, required: false, attributes: ['macAddress', 'ipv4Address', 'hostname'], }], @@ -40,7 +50,7 @@ router.get('/sites/:siteId/nginx', requireLocalhostOrAdmin, async (req, res) => include: [{ model: Container, as: 'containers', - where: { status: 'running' }, + where: PROVISIONED_CONTAINER_WHERE, required: false, include: [{ model: Service, diff --git a/create-a-container/utils/container-status.js b/create-a-container/utils/container-status.js new file mode 100644 index 00000000..735ec22b --- /dev/null +++ b/create-a-container/utils/container-status.js @@ -0,0 +1,355 @@ +/** + * container-status.js — resolve the *live* status of a container. + * + * Historically the container "status" was a static column in the database that + * was only mutated by the create/reconfigure job scripts. That value drifts out + * of reality whenever something changes a container directly in Proxmox (or when + * a job dies without updating the row). This module computes the real status on + * demand by combining three sources of truth: + * + * 1. Proxmox — does the LXC exist, and is it running or stopped? + * 2. Jobs — is there an active create/restart job, or did the last create + * job fail? + * 3. Config — does the live LXC config match what the API server expects? + * + * Resolved statuses (a strict superset of the old running/offline values): + * running — exists in Proxmox and is online + * offline — exists in Proxmox but is stopped + * restarting — has an active (pending/running) reconfigure job + * creating — not in Proxmox, but has an active (pending/running) create job + * failed — not in Proxmox, last create job returned failure + * missing — not in Proxmox, create job succeeded or no create job found + * out-of-sync — exists in Proxmox but its config doesn't match expectation + * unknown — Proxmox could not be reached / node has no API credentials + */ + +const { Op } = require('sequelize'); + +const STATUS = Object.freeze({ + RUNNING: 'running', + OFFLINE: 'offline', + CREATING: 'creating', + RESTARTING: 'restarting', + FAILED: 'failed', + MISSING: 'missing', + OUT_OF_SYNC: 'out-of-sync', + UNKNOWN: 'unknown', +}); + +const STATUS_VALUES = Object.freeze(Object.values(STATUS)); + +// Job command fragments that identify create vs reconfigure (restart) jobs. +// Jobs are distinguished solely by their `command` string (no type column). +const CREATE_CMD = 'bin/create-container.js'; +const RECONFIGURE_CMD = 'bin/reconfigure-container.js'; + +const ACTIVE_JOB_STATUSES = ['pending', 'running']; + +function nodeHasCreds(node) { + return !!(node && node.apiUrl && node.tokenId && node.secret); +} + +/** + * Find the most recent create job for a container. + * Prefers the explicit creationJobId FK; falls back to a command-string match. + * @param {object} container - Container instance (with optional creationJob assoc) + * @param {object} Job - Job model + * @returns {Promise} + */ +async function findLatestCreateJob(container, Job) { + if (container.creationJob) return container.creationJob; + if (container.creationJobId) { + return Job.findByPk(container.creationJobId); + } + return Job.findOne({ + where: { command: { [Op.like]: `%${CREATE_CMD} --container-id=${container.id}%` } }, + order: [['createdAt', 'DESC']], + }); +} + +/** + * Find the most recent reconfigure (restart) job for a container, if any. + * @param {object} container - Container instance + * @param {object} Job - Job model + * @returns {Promise} + */ +async function findLatestReconfigureJob(container, Job) { + return Job.findOne({ + where: { command: { [Op.like]: `%${RECONFIGURE_CMD} --container-id=${container.id}%` } }, + order: [['createdAt', 'DESC']], + }); +} + +function isActiveJob(job) { + return !!job && ACTIVE_JOB_STATUSES.includes(job.status); +} + +/** + * Parse a NUL-separated Proxmox env string ("K=V\0K2=V2") into an object. + * @param {string|null|undefined} envStr + * @returns {Object} + */ +function parseEnvString(envStr) { + const out = {}; + if (!envStr) return out; + for (const pair of String(envStr).split('\0')) { + if (!pair) continue; + const eq = pair.indexOf('='); + if (eq > 0) out[pair.substring(0, eq)] = pair.substring(eq + 1); + } + return out; +} + +function shallowEqualMap(a, b) { + const ak = Object.keys(a); + const bk = Object.keys(b); + if (ak.length !== bk.length) return false; + for (const k of ak) { + if (a[k] !== b[k]) return false; + } + return true; +} + +/** + * Compare the env/entrypoint the API server expects against the live LXC config. + * + * The expectation is built the same way the reconfigure job builds it + * (`container.buildLxcEnvConfig()`), so this mirrors exactly what *would* be sent + * to Proxmox. Env is compared as an unordered set of key=value pairs because the + * Proxmox API does not preserve ordering. Returns true when the live config + * matches the expectation (i.e. in sync). + * + * @param {object} container - Container instance (provides buildLxcEnvConfig) + * @param {object} liveConfig - Result of ProxmoxApi.lxcConfig(node, vmid) + * @returns {boolean} true if in sync, false if drifted + */ +function configMatches(container, liveConfig) { + const expected = + typeof container.buildLxcEnvConfig === 'function' ? container.buildLxcEnvConfig() : {}; + const expectedDeletes = new Set( + (expected.delete ? String(expected.delete).split(',') : []).map((s) => s.trim()), + ); + + // --- entrypoint --- + const liveEntrypoint = liveConfig?.entrypoint || null; + if (expectedDeletes.has('entrypoint')) { + if (liveEntrypoint) return false; // expected absent, but present live + } else if ((expected.entrypoint || null) !== liveEntrypoint) { + return false; + } + + // --- env --- + const liveEnv = parseEnvString(liveConfig?.env); + if (expectedDeletes.has('env')) { + if (Object.keys(liveEnv).length > 0) return false; // expected none, but some live + } else { + const expectedEnv = parseEnvString(expected.env); + if (!shallowEqualMap(expectedEnv, liveEnv)) return false; + } + + return true; +} + +/** + * Locate a container in a cluster-resources snapshot by VMID. + * @param {Array|null} snapshot - Result of ProxmoxApi.clusterResources('lxc') + * @param {number} vmid + * @returns {object|null} The matching resource entry or null + */ +function findInSnapshot(snapshot, vmid) { + if (!Array.isArray(snapshot) || vmid == null) return null; + return snapshot.find((r) => Number(r.vmid) === Number(vmid)) || null; +} + +/** + * Resolve status from already-gathered facts. Pure function — no I/O — so it is + * trivial to unit test and is the single source of the decision tree. + * + * @param {object} facts + * @param {boolean} facts.proxmoxReachable - Whether Proxmox was queried OK. + * @param {boolean} facts.inProxmox - Whether the LXC exists in Proxmox. + * @param {boolean} facts.proxmoxRunning - Whether the LXC is running. + * @param {boolean} facts.inSync - Whether the live config matches expectation. + * @param {boolean} facts.hasVmid - Whether the container has a Proxmox VMID. + * @param {boolean} facts.creating - Active create job present. + * @param {boolean} facts.restarting - Active reconfigure job present. + * @param {boolean} facts.createFailed - Latest create job returned failure. + * @param {boolean} facts.hasCreateJob - A create job exists at all. + * @returns {string} STATUS value + */ +function decideStatus(facts) { + if (facts.inProxmox) { + if (facts.restarting) return STATUS.RESTARTING; + if (facts.inSync === false) return STATUS.OUT_OF_SYNC; + return facts.proxmoxRunning ? STATUS.RUNNING : STATUS.OFFLINE; + } + + if (facts.creating) return STATUS.CREATING; + if (facts.restarting) return STATUS.RESTARTING; + + // We had a VMID + creds but couldn't reach Proxmox: don't guess missing. + if (facts.hasVmid && !facts.proxmoxReachable) return STATUS.UNKNOWN; + + if (facts.createFailed) return STATUS.FAILED; + return STATUS.MISSING; +} + +/** + * Compute the live status of a single container. + * + * @param {object} params + * @param {object} params.container - Container instance (with `node` association). + * @param {object} params.Job - Job model (from models). + * @param {object} [params.api] - Optional pre-authenticated ProxmoxApi client for + * the container's node (lets callers reuse one client across many containers). + * @param {{ data: Array, ok: boolean }} [params.snapshot] - Optional + * pre-fetched clusterResources('lxc') snapshot for the node. `ok` indicates the + * query succeeded; `data` is the resource list (used for batching the index). + * @returns {Promise} One of the STATUS values. + */ +async function computeContainerStatus({ container, Job, api, snapshot }) { + const node = container.node; + const hasCreds = nodeHasCreds(node); + const hasVmid = container.containerId != null; + + let client = api || null; + async function getClient() { + if (!client) client = await node.api(); + return client; + } + + // --- Determine Proxmox presence / run state --- + let proxmoxReachable = false; + let inProxmox = false; + let proxmoxRunning = false; + + if (hasVmid && hasCreds) { + try { + let resources; + if (snapshot) { + proxmoxReachable = !!snapshot.ok; + resources = snapshot.ok ? snapshot.data : null; + } else { + const c = await getClient(); + resources = await c.clusterResources('lxc'); + proxmoxReachable = true; + } + if (proxmoxReachable) { + const resource = findInSnapshot(resources, container.containerId); + if (resource) { + inProxmox = true; + proxmoxRunning = resource.status === 'running'; + } + } + } catch (err) { + proxmoxReachable = false; + } + } + + // --- Jobs --- + const reconfigureJob = await findLatestReconfigureJob(container, Job); + const restarting = isActiveJob(reconfigureJob); + + // Drift detection (only meaningful if it exists and isn't already restarting). + let inSync = null; + if (inProxmox && !restarting && hasCreds) { + try { + const c = await getClient(); + const liveConfig = await c.lxcConfig(node.name, container.containerId); + inSync = configMatches(container, liveConfig); + } catch (err) { + inSync = null; // can't assert drift + } + } + + // Only look up the create job when we actually need it (container not running + // in Proxmox), to avoid an extra query on the happy path. + let creating = false; + let createFailed = false; + let hasCreateJob = false; + if (!inProxmox) { + const createJob = await findLatestCreateJob(container, Job); + hasCreateJob = !!createJob; + creating = isActiveJob(createJob); + createFailed = !!createJob && createJob.status === 'failure'; + } + + return decideStatus({ + proxmoxReachable, + inProxmox, + proxmoxRunning, + inSync, + hasVmid, + creating, + restarting, + createFailed, + hasCreateJob, + }); +} + +/** + * Compute live statuses for many containers efficiently. + * + * Groups containers by node so each node's Proxmox `clusterResources('lxc')` + * snapshot is fetched exactly once and a single authenticated client is reused + * for that node's containers (including per-container config reads). This keeps + * total end-user latency low for the list page versus N independent calls. + * + * @param {Array} containers - Container instances (each with `node`). + * @param {object} Job - Job model. + * @returns {Promise>} Map of container.id -> STATUS value. + */ +async function computeContainerStatuses(containers, Job) { + const result = new Map(); + + // Group by node id (containers without a node still get resolved, just without + // any Proxmox facts). + const byNode = new Map(); + for (const container of containers) { + const key = container.node ? container.node.id : `__no_node_${container.id}`; + if (!byNode.has(key)) byNode.set(key, []); + byNode.get(key).push(container); + } + + await Promise.all( + Array.from(byNode.values()).map(async (group) => { + const node = group[0].node; + let api = null; + let snapshot = { ok: false, data: null }; + + if (nodeHasCreds(node)) { + try { + api = await node.api(); + const data = await api.clusterResources('lxc'); + snapshot = { ok: true, data }; + } catch (err) { + snapshot = { ok: false, data: null }; + } + } + + // Resolve each container in the group sequentially per node (shares the + // single authenticated client); different nodes run in parallel. + for (const container of group) { + // eslint-disable-next-line no-await-in-loop + const status = await computeContainerStatus({ container, Job, api, snapshot }); + result.set(container.id, status); + } + }), + ); + + return result; +} + +module.exports = { + STATUS, + STATUS_VALUES, + computeContainerStatus, + computeContainerStatuses, + decideStatus, + // exported for reuse / testing + configMatches, + parseEnvString, + findInSnapshot, + findLatestCreateJob, + findLatestReconfigureJob, +}; From 382365aad7a618f341391f0404c81d0e964c3dc2 Mon Sep 17 00:00:00 2001 From: Robert Gingras Date: Tue, 16 Jun 2026 08:21:59 -0400 Subject: [PATCH 08/15] Drop dedicated /containers/:id/status endpoint Live status is already embedded in the existing list/show/create container payloads, so the dedicated GET /containers/:id/status endpoint is redundant. Per-row polling + hydration on the index page is not needed at this time, so revert the ContainerStatusBadge to a simple presentational badge that renders the live status from the list response. Removes the endpoint, the client query/key/result type, the OpenAPI path, and the README endpoint section (the status-value reference is retained as a field description). The ContainerStatus enum and the live resolver remain. --- create-a-container/README.md | 30 ++++++++-------- create-a-container/client/src/lib/queries.ts | 5 --- create-a-container/client/src/lib/types.ts | 6 +--- .../pages/containers/ContainersListPage.tsx | 36 +++---------------- .../20260615000000-remove-container-status.js | 4 +-- create-a-container/openapi.v1.yaml | 20 ----------- .../routers/api/v1/containers.js | 24 ------------- 7 files changed, 21 insertions(+), 104 deletions(-) diff --git a/create-a-container/README.md b/create-a-container/README.md index 31fb3d2f..6d6fb80c 100644 --- a/create-a-container/README.md +++ b/create-a-container/README.md @@ -230,22 +230,20 @@ Delete a container from both Proxmox and the database - `403` - User doesn't own the container - `500` - Proxmox API deletion failed or node not configured -#### `GET /api/v1/sites/:siteId/containers/:id/status` (Auth Required) -Return the **live** status of a container, computed on demand rather than read -from a stored column. The status is resolved by combining the container's -presence/run-state in Proxmox, any active create/reconfigure jobs, and whether -the live LXC config matches what the API server expects. -- **Returns**: `{ data: { status } }` where `status` is one of: - - `running` — online in Proxmox - - `offline` — exists in Proxmox but stopped - - `creating` — no Proxmox VM yet, active create job - - `restarting` — active reconfigure job - - `failed` — no Proxmox VM, last create job failed - - `missing` — no Proxmox VM, create succeeded or no create job found - - `out-of-sync` — exists in Proxmox but its config differs from expectation - - `unknown` — Proxmox unreachable / node has no API credentials -- **Note**: The same `status` value is also embedded on each container returned - by the list and show endpoints, so existing consumers remain non-breaking. +#### Container status (`status` field) +Every container returned by the list and show endpoints includes a **live** +`status` field, computed on demand rather than read from a stored column. It is +resolved by combining the container's presence/run-state in Proxmox, any active +create/reconfigure jobs, and whether the live LXC config matches what the API +server expects. Possible values: +- `running` — online in Proxmox +- `offline` — exists in Proxmox but stopped +- `creating` — no Proxmox VM yet, active create job +- `restarting` — active reconfigure job +- `failed` — no Proxmox VM, last create job failed +- `missing` — no Proxmox VM, create succeeded or no create job found +- `out-of-sync` — exists in Proxmox but its config differs from expectation +- `unknown` — Proxmox unreachable / node has no API credentials #### `GET /status/:jobId` (Auth Required) View container creation progress page diff --git a/create-a-container/client/src/lib/queries.ts b/create-a-container/client/src/lib/queries.ts index 546db6be..b261f2ec 100644 --- a/create-a-container/client/src/lib/queries.ts +++ b/create-a-container/client/src/lib/queries.ts @@ -8,7 +8,6 @@ import type { Container, ContainerMetadata, ContainerNewBootstrap, - ContainerStatusResult, EffectiveResources, ExternalDomain, Group, @@ -30,8 +29,6 @@ export const keys = { containers: (siteId: number | string) => ['sites', String(siteId), 'containers'] as const, container: (siteId: number | string, id: number | string) => ['sites', String(siteId), 'containers', String(id)] as const, - containerStatus: (siteId: number | string, id: number | string) => - ['sites', String(siteId), 'containers', String(id), 'status'] as const, containerBootstrap: (siteId: number | string) => ['sites', String(siteId), 'containers', 'new'] as const, containerMetadata: (image: string) => ['container-metadata', image] as const, @@ -67,8 +64,6 @@ export const queries = { api.get(`/api/v1/sites/${siteId}/containers`), getContainer: (siteId: number | string, id: number | string) => api.get(`/api/v1/sites/${siteId}/containers/${id}`), - getContainerStatus: (siteId: number | string, id: number | string) => - api.get(`/api/v1/sites/${siteId}/containers/${id}/status`), containerBootstrap: (siteId: number | string) => api.get(`/api/v1/sites/${siteId}/containers/new`), containerMetadata: (siteId: number | string, image: string) => diff --git a/create-a-container/client/src/lib/types.ts b/create-a-container/client/src/lib/types.ts index a32fd6cd..e4e077d3 100644 --- a/create-a-container/client/src/lib/types.ts +++ b/create-a-container/client/src/lib/types.ts @@ -92,7 +92,7 @@ export interface Container { /** * Live container status resolved from Proxmox + job state + config drift. - * Returned by GET /sites/:id/containers/:id/status and embedded on Container. + * Embedded on each Container returned by the list/show/create endpoints. */ export type ContainerStatus = | 'running' @@ -104,10 +104,6 @@ export type ContainerStatus = | 'out-of-sync' | 'unknown'; -export interface ContainerStatusResult { - status: ContainerStatus; -} - export interface ContainerCreateResult { containerId: number; jobId: number; diff --git a/create-a-container/client/src/pages/containers/ContainersListPage.tsx b/create-a-container/client/src/pages/containers/ContainersListPage.tsx index 5f899d29..834bce6a 100644 --- a/create-a-container/client/src/pages/containers/ContainersListPage.tsx +++ b/create-a-container/client/src/pages/containers/ContainersListPage.tsx @@ -71,36 +71,8 @@ const STATUS_LABELS: Record = { unknown: 'Unknown', }; -// Statuses that are transitional and worth polling for. -const TRANSITIONAL_STATUSES: ReadonlySet = new Set([ - 'creating', - 'restarting', -]); - -/** - * Live status badge. Per issue #350 each row queries the dedicated - * /containers/:id/status endpoint. The status already returned by the list - * endpoint seeds initialData so the badge paints instantly, then this query - * keeps it fresh (polling while the container is in a transitional state). - */ -function ContainerStatusBadge({ - siteId, - container, -}: { - siteId: string | undefined; - container: Container; -}) { - const { data } = useQuery({ - queryKey: keys.containerStatus(siteId!, container.id), - queryFn: () => queries.getContainerStatus(siteId!, container.id), - enabled: !!siteId, - initialData: { status: container.status }, - refetchInterval: (query) => { - const status = query.state.data?.status; - return status && TRANSITIONAL_STATUSES.has(status) ? 4000 : false; - }, - }); - const status = data?.status ?? container.status; +/** Status badge. The status is the live value embedded in the list response. */ +function StatusBadge({ status }: { status: ContainerStatus }) { return {STATUS_LABELS[status] ?? status}; } @@ -378,7 +350,7 @@ export function ContainersListPage() { {c.hostname} - +
@@ -422,7 +394,7 @@ export function ContainersListPage() { {c.hostname} - + diff --git a/create-a-container/migrations/20260615000000-remove-container-status.js b/create-a-container/migrations/20260615000000-remove-container-status.js index cdcdc72c..39d7b50e 100644 --- a/create-a-container/migrations/20260615000000-remove-container-status.js +++ b/create-a-container/migrations/20260615000000-remove-container-status.js @@ -5,8 +5,8 @@ * Remove the static `status` column from Containers. * * Container status is now computed live from Proxmox + job state + config drift - * (see utils/container-status.js and GET /sites/:id/containers/:id/status), so the - * persisted column is no longer a source of truth and is dropped. + * (see utils/container-status.js) and embedded on the container API payloads, so + * the persisted column is no longer a source of truth and is dropped. * * The down migration restores the column (defaulting to 'running') for rollback, * but the historical per-container value cannot be recovered. diff --git a/create-a-container/openapi.v1.yaml b/create-a-container/openapi.v1.yaml index ec476381..540fb675 100644 --- a/create-a-container/openapi.v1.yaml +++ b/create-a-container/openapi.v1.yaml @@ -360,26 +360,6 @@ paths: get: { tags: [Containers], responses: { '200': { description: Container } } } put: { tags: [Containers], responses: { '200': { description: Updated, optional restart job } } } delete: { tags: [Containers], responses: { '200': { description: Deleted, with DNS cleanup warnings } } } - /sites/{siteId}/containers/{id}/status: - get: - tags: [Containers] - summary: Live container status (computed from Proxmox + jobs + config drift) - parameters: - - { in: path, name: siteId, required: true, schema: { type: integer } } - - { in: path, name: id, required: true, schema: { type: integer } } - responses: - '200': - description: Resolved container status - content: - application/json: - schema: - type: object - properties: - data: - type: object - properties: - status: { $ref: '#/components/schemas/ContainerStatus' } - '404': { description: Container not found } /sites/{siteId}/nodes: parameters: [{ in: path, name: siteId, required: true, schema: { type: integer } }] diff --git a/create-a-container/routers/api/v1/containers.js b/create-a-container/routers/api/v1/containers.js index 48a9c3bc..84c6003d 100644 --- a/create-a-container/routers/api/v1/containers.js +++ b/create-a-container/routers/api/v1/containers.js @@ -223,30 +223,6 @@ router.get( }), ); -// GET /containers/:id/status — live status for a single container -router.get( - '/:id/status', - asyncHandler(async (req, res) => { - const site = await loadSite(req); - const containerId = parseInt(req.params.id, 10); - if (!Number.isInteger(containerId) || containerId <= 0) { - throw new ApiError(404, 'not_found', 'Container not found'); - } - const c = await Container.findOne({ - where: { id: containerId, username: req.session.user }, - include: [ - { association: 'node' }, - { association: 'creationJob' }, - ], - }); - if (!c || !c.node || c.node.siteId !== site.id) { - throw new ApiError(404, 'not_found', 'Container not found'); - } - const status = await computeContainerStatus({ container: c, Job }); - return ok(res, { status }); - }), -); - // GET /containers/:id router.get( '/:id', From 54f4d6bc9af848e771fdf31832f5e2c67a09b77f Mon Sep 17 00:00:00 2001 From: Robert Gingras Date: Tue, 16 Jun 2026 08:34:02 -0400 Subject: [PATCH 09/15] Address PR review feedback - out-of-sync is a warning, not a danger, badge variant - templates.js routes containers by ipv4Address only (a VMID isn't required to route) --- .../src/pages/containers/ContainersListPage.tsx | 2 +- create-a-container/routers/templates.js | 14 ++++++-------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/create-a-container/client/src/pages/containers/ContainersListPage.tsx b/create-a-container/client/src/pages/containers/ContainersListPage.tsx index 834bce6a..20f2de07 100644 --- a/create-a-container/client/src/pages/containers/ContainersListPage.tsx +++ b/create-a-container/client/src/pages/containers/ContainersListPage.tsx @@ -47,9 +47,9 @@ function statusVariant( return 'success'; case 'creating': case 'restarting': + case 'out-of-sync': return 'warning'; case 'failed': - case 'out-of-sync': return 'danger'; case 'offline': case 'missing': diff --git a/create-a-container/routers/templates.js b/create-a-container/routers/templates.js index 4969dde5..28dab30a 100644 --- a/create-a-container/routers/templates.js +++ b/create-a-container/routers/templates.js @@ -5,12 +5,10 @@ const { requireLocalhostOrAdmin } = require('../middlewares'); const router = express.Router(); -// A container is eligible for routing/DNS config once it has been provisioned, -// i.e. it has a Proxmox VMID and an assigned IPv4 address. The old code filtered -// on the (now-removed) static status column; presence of an IP/VMID is the stable, -// Proxmox-independent proxy for "this container is live enough to route to". -const PROVISIONED_CONTAINER_WHERE = { - containerId: { [Op.ne]: null }, +// A container is routable once it has an assigned IPv4 address. The old code +// filtered on the (now-removed) static status column; presence of an IP is the +// stable, Proxmox-independent requirement for "this container can be routed to". +const ROUTABLE_CONTAINER_WHERE = { ipv4Address: { [Op.ne]: null }, }; @@ -22,7 +20,7 @@ async function loadDnsmasqSite(siteId) { include: [{ model: Container, as: 'containers', - where: PROVISIONED_CONTAINER_WHERE, + where: ROUTABLE_CONTAINER_WHERE, required: false, attributes: ['macAddress', 'ipv4Address', 'hostname'], }], @@ -50,7 +48,7 @@ router.get('/sites/:siteId/nginx', requireLocalhostOrAdmin, async (req, res) => include: [{ model: Container, as: 'containers', - where: PROVISIONED_CONTAINER_WHERE, + where: ROUTABLE_CONTAINER_WHERE, required: false, include: [{ model: Service, From 3e6991f1e49b0e8da1d2da1e2974716fbfdcbbaa Mon Sep 17 00:00:00 2001 From: Robert Gingras Date: Tue, 16 Jun 2026 08:45:08 -0400 Subject: [PATCH 10/15] Inline routable-container where clause in templates.js --- create-a-container/routers/templates.js | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/create-a-container/routers/templates.js b/create-a-container/routers/templates.js index 28dab30a..fb0bda71 100644 --- a/create-a-container/routers/templates.js +++ b/create-a-container/routers/templates.js @@ -5,13 +5,6 @@ const { requireLocalhostOrAdmin } = require('../middlewares'); const router = express.Router(); -// A container is routable once it has an assigned IPv4 address. The old code -// filtered on the (now-removed) static status column; presence of an IP is the -// stable, Proxmox-independent requirement for "this container can be routed to". -const ROUTABLE_CONTAINER_WHERE = { - ipv4Address: { [Op.ne]: null }, -}; - async function loadDnsmasqSite(siteId) { return Site.findByPk(siteId, { include: [{ @@ -20,7 +13,8 @@ async function loadDnsmasqSite(siteId) { include: [{ model: Container, as: 'containers', - where: ROUTABLE_CONTAINER_WHERE, + // Only containers with an assigned IPv4 are routable. + where: { ipv4Address: { [Op.ne]: null } }, required: false, attributes: ['macAddress', 'ipv4Address', 'hostname'], }], @@ -48,7 +42,8 @@ router.get('/sites/:siteId/nginx', requireLocalhostOrAdmin, async (req, res) => include: [{ model: Container, as: 'containers', - where: ROUTABLE_CONTAINER_WHERE, + // Only containers with an assigned IPv4 are routable. + where: { ipv4Address: { [Op.ne]: null } }, required: false, include: [{ model: Service, From 7d3b818a51480a43ddf1f8a3fe9e753465c1b002 Mon Sep 17 00:00:00 2001 From: Robert Gingras Date: Tue, 16 Jun 2026 08:57:30 -0400 Subject: [PATCH 11/15] cleanup --- create-a-container/bin/create-container.js | 3 --- create-a-container/bin/reconfigure-container.js | 4 +--- create-a-container/routers/api/v1/containers.js | 2 -- create-a-container/routers/templates.js | 2 -- 4 files changed, 1 insertion(+), 10 deletions(-) diff --git a/create-a-container/bin/create-container.js b/create-a-container/bin/create-container.js index 3a0e1701..c4098a45 100755 --- a/create-a-container/bin/create-container.js +++ b/create-a-container/bin/create-container.js @@ -481,9 +481,6 @@ async function main() { console.error('API Error Details:', JSON.stringify(err.response.data, null, 2)); } - // The container status is no longer persisted. Failure is recorded by the - // job-runner marking this job 'failure'; the live status resolver maps a - // missing-in-Proxmox container whose last create job failed to 'failed'. process.exit(1); } } diff --git a/create-a-container/bin/reconfigure-container.js b/create-a-container/bin/reconfigure-container.js index 6e31a248..1f702ce9 100644 --- a/create-a-container/bin/reconfigure-container.js +++ b/create-a-container/bin/reconfigure-container.js @@ -160,7 +160,7 @@ async function main() { throw new Error('Could not get IP address from Proxmox interfaces API'); } - // Update container record with MAC/IP (status is no longer persisted) + // Update container record with MAC/IP await container.update({ macAddress, ipv4Address @@ -181,8 +181,6 @@ async function main() { console.error('API Error Details:', JSON.stringify(err.response.data, null, 2)); } - // Container status is no longer persisted. The job-runner records this job as - // 'failure'; live status is derived from Proxmox + job state on read. process.exit(1); } } diff --git a/create-a-container/routers/api/v1/containers.js b/create-a-container/routers/api/v1/containers.js index 84c6003d..73a698b2 100644 --- a/create-a-container/routers/api/v1/containers.js +++ b/create-a-container/routers/api/v1/containers.js @@ -431,8 +431,6 @@ router.put( const dnsWarnings = []; await sequelize.transaction(async (t) => { if (envChanged || entrypointChanged) { - // Persist the new desired config only. The "restarting" state is no - // longer stored — it is derived from the active reconfigure job below. await container.update( { environmentVars: envVarsJson, diff --git a/create-a-container/routers/templates.js b/create-a-container/routers/templates.js index fb0bda71..938ce756 100644 --- a/create-a-container/routers/templates.js +++ b/create-a-container/routers/templates.js @@ -13,7 +13,6 @@ async function loadDnsmasqSite(siteId) { include: [{ model: Container, as: 'containers', - // Only containers with an assigned IPv4 are routable. where: { ipv4Address: { [Op.ne]: null } }, required: false, attributes: ['macAddress', 'ipv4Address', 'hostname'], @@ -42,7 +41,6 @@ router.get('/sites/:siteId/nginx', requireLocalhostOrAdmin, async (req, res) => include: [{ model: Container, as: 'containers', - // Only containers with an assigned IPv4 are routable. where: { ipv4Address: { [Op.ne]: null } }, required: false, include: [{ From 6e35032b117fc4373c6ae10727bb44eb9246238b Mon Sep 17 00:00:00 2001 From: Robert Gingras Date: Tue, 16 Jun 2026 09:22:58 -0400 Subject: [PATCH 12/15] Address PR review feedback - container-status.js: match --container-id as a whole argument when looking up create/reconfigure jobs, so container 12 no longer matches container 123's jobs (which could misclassify status). LIKE narrows at the DB level; a regex confirms the id is terminated by a space or end-of-string. - resource-requests.js: the applyResourceToExistingContainers query filtered on the now-removed Containers.status column ("Unknown column 'status'" after the migration). Use containerId != null (a VMID is required to reconfigure anyway). - README: note the create endpoint also returns the live status field. --- create-a-container/README.md | 13 +++--- .../routers/api/v1/resource-requests.js | 7 +++- create-a-container/utils/container-status.js | 42 +++++++++++++++---- 3 files changed, 47 insertions(+), 15 deletions(-) diff --git a/create-a-container/README.md b/create-a-container/README.md index 6d6fb80c..3c5f596b 100644 --- a/create-a-container/README.md +++ b/create-a-container/README.md @@ -231,11 +231,11 @@ Delete a container from both Proxmox and the database - `500` - Proxmox API deletion failed or node not configured #### Container status (`status` field) -Every container returned by the list and show endpoints includes a **live** -`status` field, computed on demand rather than read from a stored column. It is -resolved by combining the container's presence/run-state in Proxmox, any active -create/reconfigure jobs, and whether the live LXC config matches what the API -server expects. Possible values: +Every container returned by the list, show, and create endpoints includes a +**live** `status` field, computed on demand rather than read from a stored +column. It is resolved by combining the container's presence/run-state in +Proxmox, any active create/reconfigure jobs, and whether the live LXC config +matches what the API server expects. Possible values: - `running` — online in Proxmox - `offline` — exists in Proxmox but stopped - `creating` — no Proxmox VM yet, active create job @@ -245,6 +245,9 @@ server expects. Possible values: - `out-of-sync` — exists in Proxmox but its config differs from expectation - `unknown` — Proxmox unreachable / node has no API credentials +The create endpoint (`POST /containers`) returns `creating` immediately, since a +create job is enqueued and there is no Proxmox VM yet. + #### `GET /status/:jobId` (Auth Required) View container creation progress page diff --git a/create-a-container/routers/api/v1/resource-requests.js b/create-a-container/routers/api/v1/resource-requests.js index ce047223..5997faef 100644 --- a/create-a-container/routers/api/v1/resource-requests.js +++ b/create-a-container/routers/api/v1/resource-requests.js @@ -217,12 +217,15 @@ router.put( ); /** - * Apply a resource change to existing running containers matching the identity. + * Apply a resource change to existing provisioned containers matching the identity. * This creates a reconfigure job for each matching container. */ async function applyResourceToExistingContainers(siteId, hostname, username, resourceType, value) { const containers = await Container.findAll({ - where: { siteId, hostname, username, status: 'running' }, + // Only containers that exist in Proxmox (have a VMID) can be reconfigured. + // The status column was removed; reconfigure-container.js exits early without + // a containerId, so this is the correct provisioned predicate. + where: { siteId, hostname, username, containerId: { [Op.ne]: null } }, }); if (containers.length === 0) return; diff --git a/create-a-container/utils/container-status.js b/create-a-container/utils/container-status.js index 735ec22b..ba81a0bb 100644 --- a/create-a-container/utils/container-status.js +++ b/create-a-container/utils/container-status.js @@ -49,6 +49,38 @@ function nodeHasCreds(node) { return !!(node && node.apiUrl && node.tokenId && node.secret); } +/** + * Escape a string for safe use inside a RegExp. + * @param {string|number} s + * @returns {string} + */ +function escapeRegExp(s) { + return String(s).replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +} + +/** + * Find the most recent job for a container whose command contains + * ` --container-id=` as a *whole* argument. + * + * A plain SQL LIKE on `--container-id=` would also match longer ids + * (id 12 matches `--container-id=123`), so we use LIKE only to narrow at the DB + * level (ordered most-recent first) and then confirm each candidate in JS with a + * regex requiring the id to be terminated by a space or end-of-string. + * + * @param {object} Job - Job model + * @param {string} cmdFragment - e.g. CREATE_CMD or RECONFIGURE_CMD + * @param {number} id - Container database id + * @returns {Promise} + */ +async function findLatestJobForContainer(Job, cmdFragment, id) { + const candidates = await Job.findAll({ + where: { command: { [Op.like]: `%${cmdFragment} --container-id=${id}%` } }, + order: [['createdAt', 'DESC']], + }); + const whole = new RegExp(`${escapeRegExp(cmdFragment)} --container-id=${escapeRegExp(id)}(?:\\s|$)`); + return candidates.find((job) => whole.test(job.command)) || null; +} + /** * Find the most recent create job for a container. * Prefers the explicit creationJobId FK; falls back to a command-string match. @@ -61,10 +93,7 @@ async function findLatestCreateJob(container, Job) { if (container.creationJobId) { return Job.findByPk(container.creationJobId); } - return Job.findOne({ - where: { command: { [Op.like]: `%${CREATE_CMD} --container-id=${container.id}%` } }, - order: [['createdAt', 'DESC']], - }); + return findLatestJobForContainer(Job, CREATE_CMD, container.id); } /** @@ -74,10 +103,7 @@ async function findLatestCreateJob(container, Job) { * @returns {Promise} */ async function findLatestReconfigureJob(container, Job) { - return Job.findOne({ - where: { command: { [Op.like]: `%${RECONFIGURE_CMD} --container-id=${container.id}%` } }, - order: [['createdAt', 'DESC']], - }); + return findLatestJobForContainer(Job, RECONFIGURE_CMD, container.id); } function isActiveJob(job) { From 665f7bd708b2c1cfd2d8cdcc0da3c94b91632285 Mon Sep 17 00:00:00 2001 From: Robert Gingras Date: Tue, 16 Jun 2026 09:36:08 -0400 Subject: [PATCH 13/15] Use exact DB match for job id lookup instead of regex The calling convention for create/reconfigure job commands is stable and deterministic, so match the command string exactly in the query rather than fetching LIKE candidates and regex-filtering in JS: - create: exact match on `node bin/create-container.js --container-id=` (create commands never carry trailing args). - reconfigure: exact match OR ` %` (exact base followed by a space), which covers the optional ` --=` flags while still preventing id 12 from matching id 123. --- create-a-container/utils/container-status.js | 66 ++++++++------------ 1 file changed, 27 insertions(+), 39 deletions(-) diff --git a/create-a-container/utils/container-status.js b/create-a-container/utils/container-status.js index ba81a0bb..ccfb31e4 100644 --- a/create-a-container/utils/container-status.js +++ b/create-a-container/utils/container-status.js @@ -38,10 +38,17 @@ const STATUS = Object.freeze({ const STATUS_VALUES = Object.freeze(Object.values(STATUS)); -// Job command fragments that identify create vs reconfigure (restart) jobs. -// Jobs are distinguished solely by their `command` string (no type column). -const CREATE_CMD = 'bin/create-container.js'; -const RECONFIGURE_CMD = 'bin/reconfigure-container.js'; +// Job command builders. Jobs are distinguished solely by their `command` string +// (no type column), and the commands are constructed deterministically by the +// container router / resource-requests, so we match them exactly here. +// create: node bin/create-container.js --container-id= +// reconfigure: node bin/reconfigure-container.js --container-id=[ --=...] +function createCommand(id) { + return `node bin/create-container.js --container-id=${id}`; +} +function reconfigureCommand(id) { + return `node bin/reconfigure-container.js --container-id=${id}`; +} const ACTIVE_JOB_STATUSES = ['pending', 'running']; @@ -49,41 +56,10 @@ function nodeHasCreds(node) { return !!(node && node.apiUrl && node.tokenId && node.secret); } -/** - * Escape a string for safe use inside a RegExp. - * @param {string|number} s - * @returns {string} - */ -function escapeRegExp(s) { - return String(s).replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); -} - -/** - * Find the most recent job for a container whose command contains - * ` --container-id=` as a *whole* argument. - * - * A plain SQL LIKE on `--container-id=` would also match longer ids - * (id 12 matches `--container-id=123`), so we use LIKE only to narrow at the DB - * level (ordered most-recent first) and then confirm each candidate in JS with a - * regex requiring the id to be terminated by a space or end-of-string. - * - * @param {object} Job - Job model - * @param {string} cmdFragment - e.g. CREATE_CMD or RECONFIGURE_CMD - * @param {number} id - Container database id - * @returns {Promise} - */ -async function findLatestJobForContainer(Job, cmdFragment, id) { - const candidates = await Job.findAll({ - where: { command: { [Op.like]: `%${cmdFragment} --container-id=${id}%` } }, - order: [['createdAt', 'DESC']], - }); - const whole = new RegExp(`${escapeRegExp(cmdFragment)} --container-id=${escapeRegExp(id)}(?:\\s|$)`); - return candidates.find((job) => whole.test(job.command)) || null; -} - /** * Find the most recent create job for a container. - * Prefers the explicit creationJobId FK; falls back to a command-string match. + * Prefers the explicit creationJobId FK; falls back to an exact command match. + * Create commands never carry trailing arguments, so the match is exact. * @param {object} container - Container instance (with optional creationJob assoc) * @param {object} Job - Job model * @returns {Promise} @@ -93,17 +69,29 @@ async function findLatestCreateJob(container, Job) { if (container.creationJobId) { return Job.findByPk(container.creationJobId); } - return findLatestJobForContainer(Job, CREATE_CMD, container.id); + return Job.findOne({ + where: { command: createCommand(container.id) }, + order: [['createdAt', 'DESC']], + }); } /** * Find the most recent reconfigure (restart) job for a container, if any. + * A reconfigure command is either exactly ` --container-id=` or that + * followed by ` --=` flags, so we match the exact form OR the + * exact form followed by a space (the latter guards against id 12 matching 123). * @param {object} container - Container instance * @param {object} Job - Job model * @returns {Promise} */ async function findLatestReconfigureJob(container, Job) { - return findLatestJobForContainer(Job, RECONFIGURE_CMD, container.id); + const base = reconfigureCommand(container.id); + return Job.findOne({ + where: { + [Op.or]: [{ command: base }, { command: { [Op.like]: `${base} %` } }], + }, + order: [['createdAt', 'DESC']], + }); } function isActiveJob(job) { From 973a545e8abfb3ff4a0f3dde86a5fc17b63cfced Mon Sep 17 00:00:00 2001 From: Robert Gingras Date: Tue, 16 Jun 2026 10:22:16 -0400 Subject: [PATCH 14/15] Batch job lookups when resolving statuses for the index Resolving the container list status was O(N) DB queries because every container ran its own create/reconfigure job lookup (up to 2 queries each). Add prefetchLatestJobs() which fetches the latest create + reconfigure job for the whole set of containers in a single query (bounded WHERE clauses, independent of N), buckets them per container, and feeds the result into computeContainerStatus via an optional `jobs` param. computeContainerStatuses now issues one job query total instead of ~2N. The single-container show path is unchanged (still queries per container when no prefetched jobs are passed). --- create-a-container/utils/container-status.js | 109 +++++++++++++++++-- 1 file changed, 99 insertions(+), 10 deletions(-) diff --git a/create-a-container/utils/container-status.js b/create-a-container/utils/container-status.js index ccfb31e4..2c7189ce 100644 --- a/create-a-container/utils/container-status.js +++ b/create-a-container/utils/container-status.js @@ -43,11 +43,28 @@ const STATUS_VALUES = Object.freeze(Object.values(STATUS)); // container router / resource-requests, so we match them exactly here. // create: node bin/create-container.js --container-id= // reconfigure: node bin/reconfigure-container.js --container-id=[ --=...] +const CREATE_PREFIX = 'node bin/create-container.js --container-id='; +const RECONFIGURE_PREFIX = 'node bin/reconfigure-container.js --container-id='; + function createCommand(id) { - return `node bin/create-container.js --container-id=${id}`; + return `${CREATE_PREFIX}${id}`; } function reconfigureCommand(id) { - return `node bin/reconfigure-container.js --container-id=${id}`; + return `${RECONFIGURE_PREFIX}${id}`; +} + +// Extract the container id from a create/reconfigure job command, or null if the +// command isn't one of ours. The id is the run of digits immediately after a +// known prefix, terminated by end-of-string or a space (so 12 != 123). +const COMMAND_CONTAINER_ID_RE = new RegExp( + `^node bin/(?:create|reconfigure)-container\\.js --container-id=(\\d+)(?: |$)`, +); +function containerIdFromCommand(command) { + const m = COMMAND_CONTAINER_ID_RE.exec(command || ''); + return m ? parseInt(m[1], 10) : null; +} +function isCreateCommand(command) { + return typeof command === 'string' && command.startsWith(CREATE_PREFIX); } const ACTIVE_JOB_STATUSES = ['pending', 'running']; @@ -94,6 +111,60 @@ async function findLatestReconfigureJob(container, Job) { }); } +/** + * Prefetch the latest create and reconfigure job for many containers in a single + * DB query, so resolving N containers' statuses is O(1) queries instead of O(N). + * + * Jobs are matched by their deterministic command strings (exact create command, + * and reconfigure command optionally followed by resource flags) for the given + * container ids, fetched newest-first, then bucketed per container keeping the + * first (latest) of each kind. + * + * @param {Array} containers - Container instances. + * @param {object} Job - Job model. + * @returns {Promise>} + */ +async function prefetchLatestJobs(containers, Job) { + const byContainer = new Map(); + const ids = []; + for (const c of containers) { + byContainer.set(c.id, { createJob: null, reconfigureJob: null }); + ids.push(c.id); + } + if (ids.length === 0) return byContainer; + + // One query for all relevant jobs, with a bounded number of WHERE clauses + // (independent of N): + // - exact create commands for these ids + // - exact (arg-less) reconfigure commands for these ids + // - any reconfigure command carrying trailing flags (prefix LIKE) + // The prefix LIKE may match reconfigure jobs for containers not on this page; + // those are discarded during bucketing below (byContainer lookup). + const orClauses = [ + { command: { [Op.in]: ids.map(createCommand) } }, + { command: { [Op.in]: ids.map(reconfigureCommand) } }, + { command: { [Op.like]: `${RECONFIGURE_PREFIX}%` } }, + ]; + const jobs = await Job.findAll({ + where: { [Op.or]: orClauses }, + order: [['createdAt', 'DESC']], + }); + + // Bucket newest-first; the first job seen for a (container, kind) is the latest. + for (const job of jobs) { + const id = containerIdFromCommand(job.command); + if (id == null) continue; + const entry = byContainer.get(id); + if (!entry) continue; + if (isCreateCommand(job.command)) { + if (!entry.createJob) entry.createJob = job; + } else if (!entry.reconfigureJob) { + entry.reconfigureJob = job; + } + } + return byContainer; +} + function isActiveJob(job) { return !!job && ACTIVE_JOB_STATUSES.includes(job.status); } @@ -219,9 +290,13 @@ function decideStatus(facts) { * @param {{ data: Array, ok: boolean }} [params.snapshot] - Optional * pre-fetched clusterResources('lxc') snapshot for the node. `ok` indicates the * query succeeded; `data` is the resource list (used for batching the index). + * @param {{ createJob: object|null, reconfigureJob: object|null }} [params.jobs] - + * Optional prefetched latest create/reconfigure jobs for this container (see + * prefetchLatestJobs). When provided, the per-container job DB lookups are + * skipped, making batch resolution O(1) queries instead of O(N). * @returns {Promise} One of the STATUS values. */ -async function computeContainerStatus({ container, Job, api, snapshot }) { +async function computeContainerStatus({ container, Job, api, snapshot, jobs }) { const node = container.node; const hasCreds = nodeHasCreds(node); const hasVmid = container.containerId != null; @@ -261,7 +336,10 @@ async function computeContainerStatus({ container, Job, api, snapshot }) { } // --- Jobs --- - const reconfigureJob = await findLatestReconfigureJob(container, Job); + // Use prefetched jobs when supplied (batch path); otherwise query per container. + const reconfigureJob = jobs + ? jobs.reconfigureJob + : await findLatestReconfigureJob(container, Job); const restarting = isActiveJob(reconfigureJob); // Drift detection (only meaningful if it exists and isn't already restarting). @@ -282,7 +360,7 @@ async function computeContainerStatus({ container, Job, api, snapshot }) { let createFailed = false; let hasCreateJob = false; if (!inProxmox) { - const createJob = await findLatestCreateJob(container, Job); + const createJob = jobs ? jobs.createJob : await findLatestCreateJob(container, Job); hasCreateJob = !!createJob; creating = isActiveJob(createJob); createFailed = !!createJob && createJob.status === 'failure'; @@ -304,10 +382,12 @@ async function computeContainerStatus({ container, Job, api, snapshot }) { /** * Compute live statuses for many containers efficiently. * - * Groups containers by node so each node's Proxmox `clusterResources('lxc')` - * snapshot is fetched exactly once and a single authenticated client is reused - * for that node's containers (including per-container config reads). This keeps - * total end-user latency low for the list page versus N independent calls. + * Two batching strategies keep this fast for the list page: + * - Jobs: a single DB query fetches the latest create/reconfigure job for every + * container up front (O(1) queries instead of O(N)). + * - Proxmox: containers are grouped by node so each node's + * `clusterResources('lxc')` snapshot is fetched exactly once and a single + * authenticated client is reused for that node's containers. * * @param {Array} containers - Container instances (each with `node`). * @param {object} Job - Job model. @@ -316,6 +396,9 @@ async function computeContainerStatus({ container, Job, api, snapshot }) { async function computeContainerStatuses(containers, Job) { const result = new Map(); + // One query for everyone's latest create/reconfigure jobs. + const jobsByContainer = await prefetchLatestJobs(containers, Job); + // Group by node id (containers without a node still get resolved, just without // any Proxmox facts). const byNode = new Map(); @@ -344,8 +427,12 @@ async function computeContainerStatuses(containers, Job) { // Resolve each container in the group sequentially per node (shares the // single authenticated client); different nodes run in parallel. for (const container of group) { + const jobs = jobsByContainer.get(container.id) || { + createJob: null, + reconfigureJob: null, + }; // eslint-disable-next-line no-await-in-loop - const status = await computeContainerStatus({ container, Job, api, snapshot }); + const status = await computeContainerStatus({ container, Job, api, snapshot, jobs }); result.set(container.id, status); } }), @@ -366,4 +453,6 @@ module.exports = { findInSnapshot, findLatestCreateJob, findLatestReconfigureJob, + prefetchLatestJobs, + containerIdFromCommand, }; From e790cfbc22a131d66cb659559affb723778ce91e Mon Sep 17 00:00:00 2001 From: Robert Gingras Date: Tue, 16 Jun 2026 10:48:10 -0400 Subject: [PATCH 15/15] Simplify live status to Proxmox run-state + create job (drop restarting, out-of-sync) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolving the container list was still slow because each container did per-item work: a command-string job lookup for 'restarting', and a Proxmox lxcConfig call for 'out-of-sync'. Both are inherently O(N) (Proxmox has no bulk config endpoint), which dominated index latency. Drop both statuses: - 'restarting': not used by known API clients (launchpad only checks == running), and reconfigure jobs have no FK so detecting it required command-string queries. - 'out-of-sync': required one lxcConfig call per in-Proxmox container; no bulk Proxmox config API exists to batch it. Status now derives only from: - Proxmox run-state, via one clusterResources('lxc') snapshot per node, and - the create job, resolved from the eager-loaded creationJob association (or the creationJobId FK) — no command-string queries. Result: the index makes one Proxmox call per node and zero per-container Proxmox or DB job queries (verified at 30 containers). Remaining statuses: running, offline, creating, failed, missing, unknown. PUT still enqueues a reconfigure job and returns its jobId; only the 'restarting' status label is removed. Updates the resolver, containers router (eager-load creationJob on the index), client types/badges, OpenAPI enum, and README. --- create-a-container/README.md | 10 +- create-a-container/client/src/lib/types.ts | 4 +- .../pages/containers/ContainersListPage.tsx | 4 - create-a-container/openapi.v1.yaml | 10 +- .../routers/api/v1/containers.js | 4 +- create-a-container/utils/container-status.js | 319 +++--------------- 6 files changed, 60 insertions(+), 291 deletions(-) diff --git a/create-a-container/README.md b/create-a-container/README.md index 3c5f596b..a9ba5832 100644 --- a/create-a-container/README.md +++ b/create-a-container/README.md @@ -233,16 +233,14 @@ Delete a container from both Proxmox and the database #### Container status (`status` field) Every container returned by the list, show, and create endpoints includes a **live** `status` field, computed on demand rather than read from a stored -column. It is resolved by combining the container's presence/run-state in -Proxmox, any active create/reconfigure jobs, and whether the live LXC config -matches what the API server expects. Possible values: +column. It is resolved by combining the container's run-state in Proxmox (from a +single per-node cluster snapshot) with the state of its create job. Possible +values: - `running` — online in Proxmox - `offline` — exists in Proxmox but stopped - `creating` — no Proxmox VM yet, active create job -- `restarting` — active reconfigure job -- `failed` — no Proxmox VM, last create job failed +- `failed` — no Proxmox VM, create job failed - `missing` — no Proxmox VM, create succeeded or no create job found -- `out-of-sync` — exists in Proxmox but its config differs from expectation - `unknown` — Proxmox unreachable / node has no API credentials The create endpoint (`POST /containers`) returns `creating` immediately, since a diff --git a/create-a-container/client/src/lib/types.ts b/create-a-container/client/src/lib/types.ts index e4e077d3..809db607 100644 --- a/create-a-container/client/src/lib/types.ts +++ b/create-a-container/client/src/lib/types.ts @@ -91,17 +91,15 @@ export interface Container { } /** - * Live container status resolved from Proxmox + job state + config drift. + * Live container status resolved from Proxmox run-state + create-job state. * Embedded on each Container returned by the list/show/create endpoints. */ export type ContainerStatus = | 'running' | 'offline' | 'creating' - | 'restarting' | 'failed' | 'missing' - | 'out-of-sync' | 'unknown'; export interface ContainerCreateResult { diff --git a/create-a-container/client/src/pages/containers/ContainersListPage.tsx b/create-a-container/client/src/pages/containers/ContainersListPage.tsx index 20f2de07..5539ebf6 100644 --- a/create-a-container/client/src/pages/containers/ContainersListPage.tsx +++ b/create-a-container/client/src/pages/containers/ContainersListPage.tsx @@ -46,8 +46,6 @@ function statusVariant( case 'running': return 'success'; case 'creating': - case 'restarting': - case 'out-of-sync': return 'warning'; case 'failed': return 'danger'; @@ -64,10 +62,8 @@ const STATUS_LABELS: Record = { running: 'Running', offline: 'Offline', creating: 'Creating', - restarting: 'Restarting', failed: 'Failed', missing: 'Missing', - 'out-of-sync': 'Out of sync', unknown: 'Unknown', }; diff --git a/create-a-container/openapi.v1.yaml b/create-a-container/openapi.v1.yaml index 540fb675..70e137e1 100644 --- a/create-a-container/openapi.v1.yaml +++ b/create-a-container/openapi.v1.yaml @@ -107,24 +107,20 @@ components: ContainerStatus: type: string description: >- - Live container status, resolved on read from Proxmox presence/run-state, - active jobs, and configuration drift. + Live container status, resolved on read from Proxmox run-state and the + create job. running = online in Proxmox; offline = exists in Proxmox but stopped; creating = no Proxmox VM yet, active create job; - restarting = active reconfigure job; - failed = no Proxmox VM, last create job failed; + failed = no Proxmox VM, create job failed; missing = no Proxmox VM, create succeeded or no create job; - out-of-sync = exists in Proxmox but config differs from expectation; unknown = Proxmox unreachable / no node credentials. enum: - running - offline - creating - - restarting - failed - missing - - out-of-sync - unknown Node: type: object diff --git a/create-a-container/routers/api/v1/containers.js b/create-a-container/routers/api/v1/containers.js index 73a698b2..8f6fbfe4 100644 --- a/create-a-container/routers/api/v1/containers.js +++ b/create-a-container/routers/api/v1/containers.js @@ -211,10 +211,12 @@ router.get( }, // Full node record (incl. credentials) is required to query live Proxmox status. { association: 'node' }, + // Eager-load the create job so status resolution needs no per-container query. + { association: 'creationJob' }, ], }); // Resolve live statuses for the whole page in one pass: one Proxmox snapshot - // per node (shared), rather than N independent round-trips from the browser. + // per node (shared), and no per-container DB queries (create job is loaded above). const statuses = await computeContainerStatuses(rows, Job); return ok( res, diff --git a/create-a-container/utils/container-status.js b/create-a-container/utils/container-status.js index 2c7189ce..0c94c9d5 100644 --- a/create-a-container/utils/container-status.js +++ b/create-a-container/utils/container-status.js @@ -5,68 +5,40 @@ * was only mutated by the create/reconfigure job scripts. That value drifts out * of reality whenever something changes a container directly in Proxmox (or when * a job dies without updating the row). This module computes the real status on - * demand by combining three sources of truth: + * demand by combining two sources of truth: * - * 1. Proxmox — does the LXC exist, and is it running or stopped? - * 2. Jobs — is there an active create/restart job, or did the last create - * job fail? - * 3. Config — does the live LXC config match what the API server expects? + * 1. Proxmox — does the LXC exist, and is it running or stopped? This comes + * from a single per-node `clusterResources('lxc')` snapshot. + * 2. Create job — for containers not yet in Proxmox: is the create job still + * active, did it fail, or is it gone? The create job is linked + * to the container by the `creationJobId` foreign key, so this + * is a cheap primary-key lookup (or free when eager-loaded). * - * Resolved statuses (a strict superset of the old running/offline values): - * running — exists in Proxmox and is online - * offline — exists in Proxmox but is stopped - * restarting — has an active (pending/running) reconfigure job - * creating — not in Proxmox, but has an active (pending/running) create job - * failed — not in Proxmox, last create job returned failure - * missing — not in Proxmox, create job succeeded or no create job found - * out-of-sync — exists in Proxmox but its config doesn't match expectation - * unknown — Proxmox could not be reached / node has no API credentials + * Resolved statuses: + * running — exists in Proxmox and is online + * offline — exists in Proxmox but is stopped + * creating — not in Proxmox, but has an active (pending/running) create job + * failed — not in Proxmox, create job returned failure + * missing — not in Proxmox, create job succeeded or no create job found + * unknown — Proxmox could not be reached / node has no API credentials + * + * Note: there is intentionally no per-container Proxmox config read here. Proxmox + * has no bulk config endpoint, so config-drift detection would be O(N) network + * round-trips for the list page; it was dropped in favour of keeping status + * resolution cheap (one Proxmox call per node, zero per container). */ -const { Op } = require('sequelize'); - const STATUS = Object.freeze({ RUNNING: 'running', OFFLINE: 'offline', CREATING: 'creating', - RESTARTING: 'restarting', FAILED: 'failed', MISSING: 'missing', - OUT_OF_SYNC: 'out-of-sync', UNKNOWN: 'unknown', }); const STATUS_VALUES = Object.freeze(Object.values(STATUS)); -// Job command builders. Jobs are distinguished solely by their `command` string -// (no type column), and the commands are constructed deterministically by the -// container router / resource-requests, so we match them exactly here. -// create: node bin/create-container.js --container-id= -// reconfigure: node bin/reconfigure-container.js --container-id=[ --=...] -const CREATE_PREFIX = 'node bin/create-container.js --container-id='; -const RECONFIGURE_PREFIX = 'node bin/reconfigure-container.js --container-id='; - -function createCommand(id) { - return `${CREATE_PREFIX}${id}`; -} -function reconfigureCommand(id) { - return `${RECONFIGURE_PREFIX}${id}`; -} - -// Extract the container id from a create/reconfigure job command, or null if the -// command isn't one of ours. The id is the run of digits immediately after a -// known prefix, terminated by end-of-string or a space (so 12 != 123). -const COMMAND_CONTAINER_ID_RE = new RegExp( - `^node bin/(?:create|reconfigure)-container\\.js --container-id=(\\d+)(?: |$)`, -); -function containerIdFromCommand(command) { - const m = COMMAND_CONTAINER_ID_RE.exec(command || ''); - return m ? parseInt(m[1], 10) : null; -} -function isCreateCommand(command) { - return typeof command === 'string' && command.startsWith(CREATE_PREFIX); -} - const ACTIVE_JOB_STATUSES = ['pending', 'running']; function nodeHasCreds(node) { @@ -74,167 +46,24 @@ function nodeHasCreds(node) { } /** - * Find the most recent create job for a container. - * Prefers the explicit creationJobId FK; falls back to an exact command match. - * Create commands never carry trailing arguments, so the match is exact. - * @param {object} container - Container instance (with optional creationJob assoc) + * Resolve the create job for a container via its `creationJobId` foreign key. + * Prefers an already-loaded `creationJob` association (zero queries); otherwise + * does a single primary-key lookup. Returns null if the container has no linked + * create job. + * @param {object} container - Container instance (ideally with creationJob assoc) * @param {object} Job - Job model * @returns {Promise} */ -async function findLatestCreateJob(container, Job) { +async function findCreateJob(container, Job) { if (container.creationJob) return container.creationJob; - if (container.creationJobId) { - return Job.findByPk(container.creationJobId); - } - return Job.findOne({ - where: { command: createCommand(container.id) }, - order: [['createdAt', 'DESC']], - }); -} - -/** - * Find the most recent reconfigure (restart) job for a container, if any. - * A reconfigure command is either exactly ` --container-id=` or that - * followed by ` --=` flags, so we match the exact form OR the - * exact form followed by a space (the latter guards against id 12 matching 123). - * @param {object} container - Container instance - * @param {object} Job - Job model - * @returns {Promise} - */ -async function findLatestReconfigureJob(container, Job) { - const base = reconfigureCommand(container.id); - return Job.findOne({ - where: { - [Op.or]: [{ command: base }, { command: { [Op.like]: `${base} %` } }], - }, - order: [['createdAt', 'DESC']], - }); -} - -/** - * Prefetch the latest create and reconfigure job for many containers in a single - * DB query, so resolving N containers' statuses is O(1) queries instead of O(N). - * - * Jobs are matched by their deterministic command strings (exact create command, - * and reconfigure command optionally followed by resource flags) for the given - * container ids, fetched newest-first, then bucketed per container keeping the - * first (latest) of each kind. - * - * @param {Array} containers - Container instances. - * @param {object} Job - Job model. - * @returns {Promise>} - */ -async function prefetchLatestJobs(containers, Job) { - const byContainer = new Map(); - const ids = []; - for (const c of containers) { - byContainer.set(c.id, { createJob: null, reconfigureJob: null }); - ids.push(c.id); - } - if (ids.length === 0) return byContainer; - - // One query for all relevant jobs, with a bounded number of WHERE clauses - // (independent of N): - // - exact create commands for these ids - // - exact (arg-less) reconfigure commands for these ids - // - any reconfigure command carrying trailing flags (prefix LIKE) - // The prefix LIKE may match reconfigure jobs for containers not on this page; - // those are discarded during bucketing below (byContainer lookup). - const orClauses = [ - { command: { [Op.in]: ids.map(createCommand) } }, - { command: { [Op.in]: ids.map(reconfigureCommand) } }, - { command: { [Op.like]: `${RECONFIGURE_PREFIX}%` } }, - ]; - const jobs = await Job.findAll({ - where: { [Op.or]: orClauses }, - order: [['createdAt', 'DESC']], - }); - - // Bucket newest-first; the first job seen for a (container, kind) is the latest. - for (const job of jobs) { - const id = containerIdFromCommand(job.command); - if (id == null) continue; - const entry = byContainer.get(id); - if (!entry) continue; - if (isCreateCommand(job.command)) { - if (!entry.createJob) entry.createJob = job; - } else if (!entry.reconfigureJob) { - entry.reconfigureJob = job; - } - } - return byContainer; + if (container.creationJobId) return Job.findByPk(container.creationJobId); + return null; } function isActiveJob(job) { return !!job && ACTIVE_JOB_STATUSES.includes(job.status); } -/** - * Parse a NUL-separated Proxmox env string ("K=V\0K2=V2") into an object. - * @param {string|null|undefined} envStr - * @returns {Object} - */ -function parseEnvString(envStr) { - const out = {}; - if (!envStr) return out; - for (const pair of String(envStr).split('\0')) { - if (!pair) continue; - const eq = pair.indexOf('='); - if (eq > 0) out[pair.substring(0, eq)] = pair.substring(eq + 1); - } - return out; -} - -function shallowEqualMap(a, b) { - const ak = Object.keys(a); - const bk = Object.keys(b); - if (ak.length !== bk.length) return false; - for (const k of ak) { - if (a[k] !== b[k]) return false; - } - return true; -} - -/** - * Compare the env/entrypoint the API server expects against the live LXC config. - * - * The expectation is built the same way the reconfigure job builds it - * (`container.buildLxcEnvConfig()`), so this mirrors exactly what *would* be sent - * to Proxmox. Env is compared as an unordered set of key=value pairs because the - * Proxmox API does not preserve ordering. Returns true when the live config - * matches the expectation (i.e. in sync). - * - * @param {object} container - Container instance (provides buildLxcEnvConfig) - * @param {object} liveConfig - Result of ProxmoxApi.lxcConfig(node, vmid) - * @returns {boolean} true if in sync, false if drifted - */ -function configMatches(container, liveConfig) { - const expected = - typeof container.buildLxcEnvConfig === 'function' ? container.buildLxcEnvConfig() : {}; - const expectedDeletes = new Set( - (expected.delete ? String(expected.delete).split(',') : []).map((s) => s.trim()), - ); - - // --- entrypoint --- - const liveEntrypoint = liveConfig?.entrypoint || null; - if (expectedDeletes.has('entrypoint')) { - if (liveEntrypoint) return false; // expected absent, but present live - } else if ((expected.entrypoint || null) !== liveEntrypoint) { - return false; - } - - // --- env --- - const liveEnv = parseEnvString(liveConfig?.env); - if (expectedDeletes.has('env')) { - if (Object.keys(liveEnv).length > 0) return false; // expected none, but some live - } else { - const expectedEnv = parseEnvString(expected.env); - if (!shallowEqualMap(expectedEnv, liveEnv)) return false; - } - - return true; -} - /** * Locate a container in a cluster-resources snapshot by VMID. * @param {Array|null} snapshot - Result of ProxmoxApi.clusterResources('lxc') @@ -254,23 +83,17 @@ function findInSnapshot(snapshot, vmid) { * @param {boolean} facts.proxmoxReachable - Whether Proxmox was queried OK. * @param {boolean} facts.inProxmox - Whether the LXC exists in Proxmox. * @param {boolean} facts.proxmoxRunning - Whether the LXC is running. - * @param {boolean} facts.inSync - Whether the live config matches expectation. * @param {boolean} facts.hasVmid - Whether the container has a Proxmox VMID. * @param {boolean} facts.creating - Active create job present. - * @param {boolean} facts.restarting - Active reconfigure job present. - * @param {boolean} facts.createFailed - Latest create job returned failure. - * @param {boolean} facts.hasCreateJob - A create job exists at all. + * @param {boolean} facts.createFailed - Create job returned failure. * @returns {string} STATUS value */ function decideStatus(facts) { if (facts.inProxmox) { - if (facts.restarting) return STATUS.RESTARTING; - if (facts.inSync === false) return STATUS.OUT_OF_SYNC; return facts.proxmoxRunning ? STATUS.RUNNING : STATUS.OFFLINE; } if (facts.creating) return STATUS.CREATING; - if (facts.restarting) return STATUS.RESTARTING; // We had a VMID + creds but couldn't reach Proxmox: don't guess missing. if (facts.hasVmid && !facts.proxmoxReachable) return STATUS.UNKNOWN; @@ -283,30 +106,21 @@ function decideStatus(facts) { * Compute the live status of a single container. * * @param {object} params - * @param {object} params.container - Container instance (with `node` association). + * @param {object} params.container - Container instance (with `node` association, + * and ideally the `creationJob` association eager-loaded). * @param {object} params.Job - Job model (from models). * @param {object} [params.api] - Optional pre-authenticated ProxmoxApi client for * the container's node (lets callers reuse one client across many containers). * @param {{ data: Array, ok: boolean }} [params.snapshot] - Optional * pre-fetched clusterResources('lxc') snapshot for the node. `ok` indicates the * query succeeded; `data` is the resource list (used for batching the index). - * @param {{ createJob: object|null, reconfigureJob: object|null }} [params.jobs] - - * Optional prefetched latest create/reconfigure jobs for this container (see - * prefetchLatestJobs). When provided, the per-container job DB lookups are - * skipped, making batch resolution O(1) queries instead of O(N). * @returns {Promise} One of the STATUS values. */ -async function computeContainerStatus({ container, Job, api, snapshot, jobs }) { +async function computeContainerStatus({ container, Job, api, snapshot }) { const node = container.node; const hasCreds = nodeHasCreds(node); const hasVmid = container.containerId != null; - let client = api || null; - async function getClient() { - if (!client) client = await node.api(); - return client; - } - // --- Determine Proxmox presence / run state --- let proxmoxReachable = false; let inProxmox = false; @@ -319,8 +133,9 @@ async function computeContainerStatus({ container, Job, api, snapshot, jobs }) { proxmoxReachable = !!snapshot.ok; resources = snapshot.ok ? snapshot.data : null; } else { - const c = await getClient(); - resources = await c.clusterResources('lxc'); + // Single-container path (no shared snapshot): fetch this node's once. + const client = api || (await node.api()); + resources = await client.clusterResources('lxc'); proxmoxReachable = true; } if (proxmoxReachable) { @@ -335,33 +150,13 @@ async function computeContainerStatus({ container, Job, api, snapshot, jobs }) { } } - // --- Jobs --- - // Use prefetched jobs when supplied (batch path); otherwise query per container. - const reconfigureJob = jobs - ? jobs.reconfigureJob - : await findLatestReconfigureJob(container, Job); - const restarting = isActiveJob(reconfigureJob); - - // Drift detection (only meaningful if it exists and isn't already restarting). - let inSync = null; - if (inProxmox && !restarting && hasCreds) { - try { - const c = await getClient(); - const liveConfig = await c.lxcConfig(node.name, container.containerId); - inSync = configMatches(container, liveConfig); - } catch (err) { - inSync = null; // can't assert drift - } - } - - // Only look up the create job when we actually need it (container not running - // in Proxmox), to avoid an extra query on the happy path. + // Only inspect the create job when the container isn't in Proxmox, to + // distinguish creating / failed / missing. The create job is linked by FK + // (creationJobId), so this is a cheap PK lookup or free when eager-loaded. let creating = false; let createFailed = false; - let hasCreateJob = false; if (!inProxmox) { - const createJob = jobs ? jobs.createJob : await findLatestCreateJob(container, Job); - hasCreateJob = !!createJob; + const createJob = await findCreateJob(container, Job); creating = isActiveJob(createJob); createFailed = !!createJob && createJob.status === 'failure'; } @@ -370,35 +165,29 @@ async function computeContainerStatus({ container, Job, api, snapshot, jobs }) { proxmoxReachable, inProxmox, proxmoxRunning, - inSync, hasVmid, creating, - restarting, createFailed, - hasCreateJob, }); } /** * Compute live statuses for many containers efficiently. * - * Two batching strategies keep this fast for the list page: - * - Jobs: a single DB query fetches the latest create/reconfigure job for every - * container up front (O(1) queries instead of O(N)). + * No per-container Proxmox calls and no per-container DB queries are issued: * - Proxmox: containers are grouped by node so each node's - * `clusterResources('lxc')` snapshot is fetched exactly once and a single - * authenticated client is reused for that node's containers. + * `clusterResources('lxc')` snapshot is fetched exactly once. + * - Create job: resolved from each container's eager-loaded `creationJob` + * association (or its creationJobId FK). * - * @param {Array} containers - Container instances (each with `node`). + * @param {Array} containers - Container instances (each with `node`, and + * ideally the `creationJob` association loaded to avoid per-container queries). * @param {object} Job - Job model. * @returns {Promise>} Map of container.id -> STATUS value. */ async function computeContainerStatuses(containers, Job) { const result = new Map(); - // One query for everyone's latest create/reconfigure jobs. - const jobsByContainer = await prefetchLatestJobs(containers, Job); - // Group by node id (containers without a node still get resolved, just without // any Proxmox facts). const byNode = new Map(); @@ -411,12 +200,11 @@ async function computeContainerStatuses(containers, Job) { await Promise.all( Array.from(byNode.values()).map(async (group) => { const node = group[0].node; - let api = null; let snapshot = { ok: false, data: null }; if (nodeHasCreds(node)) { try { - api = await node.api(); + const api = await node.api(); const data = await api.clusterResources('lxc'); snapshot = { ok: true, data }; } catch (err) { @@ -424,15 +212,11 @@ async function computeContainerStatuses(containers, Job) { } } - // Resolve each container in the group sequentially per node (shares the - // single authenticated client); different nodes run in parallel. + // Resolution is now CPU-only per container (no Proxmox/DB I/O when the + // create job is eager-loaded), so the snapshot is reused for all of them. for (const container of group) { - const jobs = jobsByContainer.get(container.id) || { - createJob: null, - reconfigureJob: null, - }; // eslint-disable-next-line no-await-in-loop - const status = await computeContainerStatus({ container, Job, api, snapshot, jobs }); + const status = await computeContainerStatus({ container, Job, snapshot }); result.set(container.id, status); } }), @@ -448,11 +232,6 @@ module.exports = { computeContainerStatuses, decideStatus, // exported for reuse / testing - configMatches, - parseEnvString, findInSnapshot, - findLatestCreateJob, - findLatestReconfigureJob, - prefetchLatestJobs, - containerIdFromCommand, + findCreateJob, };