diff --git a/create-a-container/README.md b/create-a-container/README.md index 4c135ef0..a9ba5832 100644 --- a/create-a-container/README.md +++ b/create-a-container/README.md @@ -22,7 +22,6 @@ erDiagram int id PK string hostname UK "FQDN hostname" string username "Owner username" - string status "pending,creating,running,failed" string template "Template name" int creationJobId FK "References Job" int nodeId FK "References Node" @@ -231,6 +230,22 @@ Delete a container from both Proxmox and the database - `403` - User doesn't own the container - `500` - Proxmox API deletion failed or node not configured +#### Container status (`status` field) +Every container returned by the list, show, and create endpoints includes a +**live** `status` field, computed on demand rather than read from a stored +column. It is resolved by combining the container's run-state in Proxmox (from a +single per-node cluster snapshot) with the state of its create job. Possible +values: +- `running` — online in Proxmox +- `offline` — exists in Proxmox but stopped +- `creating` — no Proxmox VM yet, active create job +- `failed` — no Proxmox VM, create job failed +- `missing` — no Proxmox VM, create succeeded or no create job found +- `unknown` — Proxmox unreachable / node has no API credentials + +The create endpoint (`POST /containers`) returns `creating` immediately, since a +create job is enqueued and there is no Proxmox VM yet. + #### `GET /status/:jobId` (Auth Required) View container creation progress page diff --git a/create-a-container/bin/create-container.js b/create-a-container/bin/create-container.js index 766ed815..c4098a45 100755 --- a/create-a-container/bin/create-container.js +++ b/create-a-container/bin/create-container.js @@ -177,9 +177,14 @@ async function main() { console.error(`Container with ID ${containerId} not found`); process.exit(1); } - - if (container.status !== 'pending') { - console.error(`Container is not in pending status (current: ${container.status})`); + + // Guard against double-provisioning. The status column no longer exists, so + // we use the Proxmox VMID as the signal: once a VMID has been allocated and + // stored, creation has already run for this container. + if (container.containerId) { + console.error( + `Container already has a Proxmox VMID (${container.containerId}); refusing to re-create`, + ); process.exit(1); } @@ -217,10 +222,6 @@ async function main() { console.log(`Template type: ${isDocker ? 'Docker image' : 'Proxmox template'}`); try { - // Update status to 'creating' - await container.update({ status: 'creating' }); - console.log('Status updated to: creating'); - // Get the Proxmox API client const client = await node.api(); console.log('Proxmox API client initialized'); @@ -340,64 +341,23 @@ async function main() { console.log('Container configured'); } - // Apply environment variables and entrypoint - // First read defaults from the image, then merge with user-specified values - const defaultConfig = await client.lxcConfig(node.name, vmid); - const defaultEntrypoint = defaultConfig['entrypoint'] || null; - const defaultEnvStr = defaultConfig['env'] || null; - - // Parse default env vars - let mergedEnvVars = {}; - if (defaultEnvStr) { - const pairs = defaultEnvStr.split('\0'); - for (const pair of pairs) { - const eqIndex = pair.indexOf('='); - if (eqIndex > 0) { - mergedEnvVars[pair.substring(0, eqIndex)] = pair.substring(eqIndex + 1); - } - } - } - - // Merge user-specified env vars (user values override defaults) - const userEnvVars = container.environmentVars ? JSON.parse(container.environmentVars) : {}; + // Snapshot the template's env/entrypoint onto the container record now, as + // if the user had supplied them (user-supplied values still win). Templates + // are mutable Docker refs we can't re-query on a later reconfigure, so we + // persist them here; otherwise a future reconfigure (which uses + // deleteMissing) would unset template-provided values that were never + // stored. System/NVIDIA defaults are intentionally left out — they stay + // configure-time-only. + const templateConfig = await client.lxcConfig(node.name, vmid); + await container.persistTemplateDefaults(templateConfig); - // Load system-wide default env vars from Settings. - // Descriptions are metadata only and are not passed into the container. - let systemDefaultEnvVars = {}; - try { - const entries = await Setting.getDefaultContainerEnvVars(); - for (const entry of entries) { - if (entry.key && entry.key.trim()) { - systemDefaultEnvVars[entry.key.trim()] = entry.value || ''; - } - } - } catch (_) { - console.warn('Could not load default_container_env_vars from settings, skipping'); - } - - // Merge priority: image defaults < system defaults < per-container user values - mergedEnvVars = { ...mergedEnvVars, ...systemDefaultEnvVars, ...userEnvVars }; - - // Use user entrypoint if specified, otherwise keep default - const finalEntrypoint = container.entrypoint || defaultEntrypoint; - - // Build config to apply - const envConfig = {}; - if (finalEntrypoint) { - envConfig.entrypoint = finalEntrypoint; - } - if (Object.keys(mergedEnvVars).length > 0) { - envConfig.env = Object.entries(mergedEnvVars) - .map(([key, value]) => `${key}=${value}`) - .join('\0'); - } - + // Apply environment variables and entrypoint. Use the default + // (deleteMissing=false): only explicit values are pushed, nothing is unset. + // The record now already includes the template's values, and system/NVIDIA + // defaults are merged in by buildLxcEnvConfig. + const envConfig = await container.buildLxcEnvConfig(); if (Object.keys(envConfig).length > 0) { console.log('Applying environment variables and entrypoint...'); - if (defaultEntrypoint) console.log(`Default entrypoint: ${defaultEntrypoint}`); - if (defaultEnvStr) console.log(`Image default env vars: ${Object.keys(mergedEnvVars).length - Object.keys(userEnvVars).length - Object.keys(systemDefaultEnvVars).length}`); - if (Object.keys(systemDefaultEnvVars).length > 0) console.log(`System default env vars: ${Object.keys(systemDefaultEnvVars).length} from settings`); - if (Object.keys(userEnvVars).length > 0) console.log(`Per-container env vars: ${Object.keys(userEnvVars).length}`); await client.updateLxcConfig(node.name, vmid, envConfig); console.log('Environment/entrypoint configuration applied'); } @@ -447,11 +407,9 @@ async function main() { throw new Error('Could not extract MAC address from container configuration'); } - // Read back entrypoint and environment variables from config + // Read back configuration from Proxmox. console.log('Querying container configuration...'); const config = await client.lxcConfig(node.name, vmid); - const actualEntrypoint = config['entrypoint'] || null; - const actualEnv = config['env'] || null; // Read back the actual provisioned resources so downstream systems // (e.g. NetBox) mirror what the container really has rather than assuming @@ -459,28 +417,7 @@ async function main() { const actualCores = config['cores'] != null ? parseInt(config['cores'], 10) : null; const actualMemoryMb = config['memory'] != null ? parseInt(config['memory'], 10) : null; const actualDiskGb = parseRootfsSizeGb(config['rootfs']); - - // Parse NUL-separated env string back to JSON object - let environmentVars = {}; - if (actualEnv) { - const pairs = actualEnv.split('\0'); - for (const pair of pairs) { - const eqIndex = pair.indexOf('='); - if (eqIndex > 0) { - const key = pair.substring(0, eqIndex); - const value = pair.substring(eqIndex + 1); - environmentVars[key] = value; - } - } - } - - if (actualEntrypoint) { - console.log(`Entrypoint: ${actualEntrypoint}`); - } - if (Object.keys(environmentVars).length > 0) { - console.log(`Environment variables: ${Object.keys(environmentVars).length} vars`); - } - + // Get IP address from Proxmox interfaces API const ipv4Address = await client.getLxcIpAddress(node.name, vmid); @@ -492,10 +429,7 @@ async function main() { console.log('Updating container record...'); await container.update({ macAddress, - ipv4Address, - entrypoint: actualEntrypoint, - environmentVars: JSON.stringify(environmentVars), - status: 'running' + ipv4Address }); console.log('Container creation completed successfully!'); @@ -546,15 +480,7 @@ async function main() { if (err.response?.data) { console.error('API Error Details:', JSON.stringify(err.response.data, null, 2)); } - - // Update status to failed - try { - await container.update({ status: 'failed' }); - console.log('Status updated to: failed'); - } catch (updateErr) { - console.error('Failed to update container status:', updateErr.message); - } - + process.exit(1); } } diff --git a/create-a-container/bin/json-to-sql.js b/create-a-container/bin/json-to-sql.js index 94c02243..f55af4d5 100644 --- a/create-a-container/bin/json-to-sql.js +++ b/create-a-container/bin/json-to-sql.js @@ -222,7 +222,6 @@ async function run() { hostname, ipv4Address: obj.ip, username: obj.user || '', - status: 'running', template: obj.template || null, containerId: obj.ctid, macAddress: obj.mac, @@ -234,7 +233,6 @@ async function run() { await container.update({ ipv4Address: obj.ip, username: obj.user || '', - status: container.status || 'running', template: obj.template || container.template, containerId: obj.ctid, macAddress: obj.mac diff --git a/create-a-container/bin/reconfigure-container.js b/create-a-container/bin/reconfigure-container.js index 5bcebf3a..1f702ce9 100644 --- a/create-a-container/bin/reconfigure-container.js +++ b/create-a-container/bin/reconfigure-container.js @@ -80,8 +80,11 @@ async function main() { const client = await node.api(); console.log('Proxmox API client initialized'); - // Build config from environment variables and entrypoint - const lxcConfig = container.buildLxcEnvConfig(); + // Build config from environment variables and entrypoint. Pass + // deleteMissing so that clearing env vars or removing a custom entrypoint + // actually unsets them on the existing container (vs. create, which must + // preserve template-provided values). + const lxcConfig = await container.buildLxcEnvConfig({ deleteMissing: true }); if (Object.keys(lxcConfig).length > 0) { console.log('Applying LXC configuration...'); @@ -157,14 +160,13 @@ async function main() { throw new Error('Could not get IP address from Proxmox interfaces API'); } - // Update container record with MAC/IP and running status + // Update container record with MAC/IP await container.update({ - status: 'running', macAddress, ipv4Address }); - console.log('Status updated to: running'); + console.log('Reconfiguration applied; container running'); console.log(` MAC: ${macAddress}`); console.log(` IP: ${ipv4Address}`); } @@ -178,15 +180,7 @@ async function main() { if (err.response?.data) { console.error('API Error Details:', JSON.stringify(err.response.data, null, 2)); } - - // Update status to failed - try { - await container.update({ status: 'failed' }); - console.log('Status updated to: failed'); - } catch (updateErr) { - console.error('Failed to update container status:', updateErr.message); - } - + process.exit(1); } } diff --git a/create-a-container/client/src/lib/types.ts b/create-a-container/client/src/lib/types.ts index 85701ab3..809db607 100644 --- a/create-a-container/client/src/lib/types.ts +++ b/create-a-container/client/src/lib/types.ts @@ -75,7 +75,7 @@ export interface Container { hostname: string; ipv4Address: string | null; macAddress: string | null; - status: string; + status: ContainerStatus; template: string | null; creationJobId: number | null; entrypoint: string | null; @@ -90,11 +90,23 @@ export interface Container { createdAt: string; } +/** + * Live container status resolved from Proxmox run-state + create-job state. + * Embedded on each Container returned by the list/show/create endpoints. + */ +export type ContainerStatus = + | 'running' + | 'offline' + | 'creating' + | 'failed' + | 'missing' + | 'unknown'; + export interface ContainerCreateResult { containerId: number; jobId: number; hostname: string; - status: string; + status: ContainerStatus; } export interface ContainerNewBootstrap { diff --git a/create-a-container/client/src/pages/containers/ContainersListPage.tsx b/create-a-container/client/src/pages/containers/ContainersListPage.tsx index ec5a195f..5539ebf6 100644 --- a/create-a-container/client/src/pages/containers/ContainersListPage.tsx +++ b/create-a-container/client/src/pages/containers/ContainersListPage.tsx @@ -34,28 +34,44 @@ import { import { api, ApiError } from '@/lib/api'; import { useSession } from '@/lib/auth'; import { keys, queries } from '@/lib/queries'; -import type { Container } from '@/lib/types'; +import type { Container, ContainerStatus } from '@/lib/types'; type ViewMode = 'cards' | 'table'; const VIEW_STORAGE_KEY = 'containers:view'; -function statusVariant(s: string): 'default' | 'success' | 'warning' | 'danger' | 'secondary' { +function statusVariant( + s: ContainerStatus, +): 'default' | 'success' | 'warning' | 'danger' | 'secondary' { switch (s) { case 'running': return 'success'; - case 'pending': - case 'restarting': + case 'creating': return 'warning'; case 'failed': - case 'error': return 'danger'; - case 'stopped': + case 'offline': + case 'missing': return 'secondary'; default: return 'default'; } } +// Human-readable labels for the live status values. +const STATUS_LABELS: Record = { + running: 'Running', + offline: 'Offline', + creating: 'Creating', + failed: 'Failed', + missing: 'Missing', + unknown: 'Unknown', +}; + +/** Status badge. The status is the live value embedded in the list response. */ +function StatusBadge({ status }: { status: ContainerStatus }) { + return {STATUS_LABELS[status] ?? status}; +} + const linkClass = 'text-(--color-primary,#1d4ed8) hover:underline'; /** Shorten a full image ref to just its name+tag, e.g. ghcr.io/mieweb/base:latest -> base:latest */ @@ -330,7 +346,7 @@ export function ContainersListPage() { {c.hostname} - {c.status} +
@@ -374,7 +390,7 @@ export function ContainersListPage() { {c.hostname} - {c.status} + diff --git a/create-a-container/migrations/20260615000000-remove-container-status.js b/create-a-container/migrations/20260615000000-remove-container-status.js new file mode 100644 index 00000000..39d7b50e --- /dev/null +++ b/create-a-container/migrations/20260615000000-remove-container-status.js @@ -0,0 +1,26 @@ +'use strict'; +/** @type {import('sequelize-cli').Migration} */ + +/** + * Remove the static `status` column from Containers. + * + * Container status is now computed live from Proxmox + job state + config drift + * (see utils/container-status.js) and embedded on the container API payloads, so + * the persisted column is no longer a source of truth and is dropped. + * + * The down migration restores the column (defaulting to 'running') for rollback, + * but the historical per-container value cannot be recovered. + */ +module.exports = { + async up(queryInterface) { + await queryInterface.removeColumn('Containers', 'status'); + }, + + async down(queryInterface, Sequelize) { + await queryInterface.addColumn('Containers', 'status', { + type: Sequelize.STRING(20), + allowNull: false, + defaultValue: 'running', + }); + }, +}; diff --git a/create-a-container/models/container.js b/create-a-container/models/container.js index e0ae3f18..b6bfc83e 100644 --- a/create-a-container/models/container.js +++ b/create-a-container/models/container.js @@ -21,41 +21,227 @@ module.exports = (sequelize, DataTypes) => { } /** - * Build LXC config object for environment variables and entrypoint - * Returns config suitable for Proxmox API updateLxcConfig - * @returns {object} Config object with 'env' and 'entrypoint' properties + * Normalize a set of environment variables into a safe, flat + * { KEY: stringValue } object suitable for building the Proxmox `env` + * string. This is the single place that decides what a valid env var is. + * + * Rules (applied to every source — user input, settings, image defaults): + * - Input that is not a plain object (e.g. an array or null) yields {}. + * - Keys are trimmed. A key is dropped unless it is a non-empty string with + * no whitespace and no `=` or NUL — i.e. it matches a conventional env + * var name. This prevents a key from corrupting the NUL-separated + * `KEY=value` encoding. + * - Values are coerced to strings. Entries whose value is null/undefined or + * a non-primitive (object/array) are dropped rather than stringified to + * something like "[object Object]". Values containing NUL are also + * dropped, since NUL is the pair separator. + * + * @param {*} input - Candidate env vars, ideally a { key: value } object + * @returns {object} Flat object of validated { KEY: stringValue } */ - buildLxcEnvConfig() { - const config = {}; - const deleteList = []; - - // Parse environment variables from JSON and format as NUL-separated list - // Format: KEY1=value1\0KEY2=value2\0KEY3=value3 - if (this.environmentVars) { - try { - const envObj = JSON.parse(this.environmentVars); - const envPairs = []; - for (const [key, value] of Object.entries(envObj)) { - if (key && value !== undefined) { - envPairs.push(`${key}=${value}`); - } - } - if (envPairs.length > 0) { - config['env'] = envPairs.join('\0'); - } else { - deleteList.push('env'); + static normalizeEnvVars(input) { + const out = {}; + if (!input || typeof input !== 'object' || Array.isArray(input)) return out; + + // Conventional env var name: starts with a letter or underscore, followed + // by letters, digits, or underscores. Excludes `=`, NUL, whitespace, etc. + const validKey = /^[A-Za-z_][A-Za-z0-9_]*$/; + + for (const [rawKey, rawValue] of Object.entries(input)) { + const key = typeof rawKey === 'string' ? rawKey.trim() : ''; + if (!validKey.test(key)) continue; + + // Only primitives (string/number/boolean) become values; skip + // null/undefined and objects/arrays. + if (rawValue === null || rawValue === undefined) continue; + if (typeof rawValue === 'object') continue; + const value = String(rawValue); + if (value.includes('\0')) continue; + + out[key] = value; + } + return out; + } + + /** + * Parse a Proxmox LXC `env` string (NUL-separated `KEY=value` pairs) into a + * normalized, flat { KEY: stringValue } object. Anything malformed is + * dropped via normalizeEnvVars. The inverse of the encoding done in + * buildLxcEnvConfig. + * @param {string|null|undefined} envStr - Raw Proxmox `env` value + * @returns {object} Flat object of validated { KEY: value } + */ + static parseLxcEnvString(envStr) { + if (!envStr || typeof envStr !== 'string') return {}; + const raw = {}; + for (const pair of envStr.split('\0')) { + const eq = pair.indexOf('='); + if (eq > 0) raw[pair.substring(0, eq)] = pair.substring(eq + 1); + } + return this.normalizeEnvVars(raw); + } + + /** + * Internal helper for buildLxcEnvConfig. + * Load the admin-defined system default environment variables from the + * Settings table, flattened to a validated { KEY: value } object. + * Descriptions are metadata only and are not included. Returns an empty + * object if the setting is missing or malformed. + * @returns {Promise} Flat object of { KEY: value } system defaults + */ + static async getSystemDefaultEnvVars() { + const Setting = this.sequelize.models.Setting; + const raw = {}; + try { + const entries = await Setting.getDefaultContainerEnvVars(); + for (const entry of entries) { + // getDefaultContainerEnvVars yields { key, value, description }. + if (entry && typeof entry.key === 'string') { + raw[entry.key] = entry.value; } - } catch (err) { - console.error('Failed to parse environment variables JSON:', err.message); } - } else { + } catch (_) { + console.warn('Could not load default_container_env_vars from settings, skipping'); + } + return this.normalizeEnvVars(raw); + } + + /** + * Internal helper for buildLxcEnvConfig. + * Parse the container's user-defined environment variables into a validated, + * flat { KEY: value } object. The database record only ever stores the + * variables the user explicitly provided — admin/system, image, and NVIDIA + * defaults are merged in at configure-time (see buildLxcEnvConfig) and are + * intentionally NOT persisted. + * @returns {object} Flat object of validated { KEY: value } user env vars + */ + parseEnvironmentVars() { + if (!this.environmentVars) return {}; + try { + return this.constructor.normalizeEnvVars(JSON.parse(this.environmentVars)); + } catch (err) { + console.error('Failed to parse environment variables JSON:', err.message); + return {}; + } + } + + /** + * Internal helper for buildLxcEnvConfig. + * Environment variables implied by this container's configuration that are + * applied as defaults but never stored in the DB record. Currently this is + * the NVIDIA GPU passthrough defaults, applied when nvidiaRequested is set. + * @returns {object} Flat object of { KEY: value } defaults + */ + nvidiaDefaultEnvVars() { + if (!this.nvidiaRequested) return {}; + return { + NVIDIA_VISIBLE_DEVICES: 'all', + NVIDIA_DRIVER_CAPABILITIES: 'utility compute' + }; + } + + /** + * Fold a template's environment variables and entrypoint into this + * container's persisted record, as if the user had supplied them. + * + * Called once at creation, after the template has been cloned, with the + * template's own LXC config. We can't recover these values later (templates + * are mutable Docker refs that may change or disappear before a reconfigure, + * and we don't cache them), so we snapshot them onto the record now. This + * keeps env/entrypoint stable across future reconfigures — which use + * deleteMissing and would otherwise unset template-provided values that were + * never stored. + * + * Precedence is template < user (user-supplied values win). System and + * NVIDIA defaults are intentionally NOT folded in here; they remain + * configure-time-only so they are not frozen into the record. + * + * Persists and returns nothing; mutates this instance. + * + * @param {object} templateConfig - The cloned template's LXC config + * @param {string} [templateConfig.env] - Proxmox NUL-separated `env` string + * @param {string} [templateConfig.entrypoint] - Template entrypoint + */ + async persistTemplateDefaults(templateConfig = {}) { + const templateEnv = this.constructor.parseLxcEnvString(templateConfig.env); + // template < user + const mergedEnv = { ...templateEnv, ...this.parseEnvironmentVars() }; + + const templateEntrypoint = + typeof templateConfig.entrypoint === 'string' && templateConfig.entrypoint.trim() + ? templateConfig.entrypoint.trim() + : null; + const userEntrypoint = this.entrypoint && this.entrypoint.trim() ? this.entrypoint.trim() : null; + const mergedEntrypoint = userEntrypoint || templateEntrypoint; + + await this.update({ + environmentVars: Object.keys(mergedEnv).length > 0 ? JSON.stringify(mergedEnv) : null, + entrypoint: mergedEntrypoint + }); + } + + /** + * Build the LXC config object for environment variables and entrypoint to + * deploy to Proxmox via updateLxcConfig. + * + * This is the single entrypoint for determining a container's effective + * environment. It owns the full merge of every env-var source, applied here + * at configure-time rather than being baked into the container's DB record: + * + * admin-defined system defaults < NVIDIA defaults < user-defined values + * + * (Image-provided defaults are submitted by the UI as user-defined values, + * so they arrive via parseEnvironmentVars and need no special handling.) + * + * Proxmox's config endpoint is a partial update: keys present in the body are + * set, omitted keys are left untouched, and a key is only removed if named in + * the special `delete` parameter. That distinction matters here: + * + * - On create, the container has just been cloned from a template that may + * carry its own `env`/`entrypoint`. We must NOT delete those when the user + * didn't provide a value, or we'd wipe the template's defaults. So the + * default (deleteMissing=false) simply omits anything with no value. + * - On reconfigure, the user may have cleared their last env var or removed a + * custom entrypoint, and that change must take effect — so the caller + * passes deleteMissing=true to emit `delete` for the now-empty fields. + * + * @param {object} [options] + * @param {boolean} [options.deleteMissing=false] - When true, env/entrypoint + * that resolve to empty are added to Proxmox's `delete` list (removing any + * existing value). When false, they are simply omitted, preserving whatever + * the container/template already has. + * @returns {Promise} Config object with 'env' and 'entrypoint' + * properties (and, when deleteMissing is set, a 'delete' list) + */ + async buildLxcEnvConfig({ deleteMissing = false } = {}) { + const config = {}; + const deleteList = []; + + // Merge precedence (lowest to highest): + // system defaults < NVIDIA defaults < user-defined values + // Every source is already normalized to a safe { KEY: stringValue } map + // (see normalizeEnvVars), so the encoding below cannot be corrupted. + const mergedEnvVars = { + ...(await this.constructor.getSystemDefaultEnvVars()), + ...this.nvidiaDefaultEnvVars(), + ...this.parseEnvironmentVars() + }; + + // Format as NUL-separated list: KEY1=value1\0KEY2=value2\0KEY3=value3 + const envPairs = []; + for (const [key, value] of Object.entries(mergedEnvVars)) { + envPairs.push(`${key}=${value}`); + } + if (envPairs.length > 0) { + config['env'] = envPairs.join('\0'); + } else if (deleteMissing) { deleteList.push('env'); } - + // Set entrypoint command if (this.entrypoint && this.entrypoint.trim()) { config['entrypoint'] = this.entrypoint.trim(); - } else { + } else if (deleteMissing) { deleteList.push('entrypoint'); } @@ -82,11 +268,6 @@ module.exports = (sequelize, DataTypes) => { type: DataTypes.STRING(255), allowNull: false }, - status: { - type: DataTypes.STRING(20), - allowNull: false, - defaultValue: 'pending' - }, template: { type: DataTypes.STRING(255), allowNull: true diff --git a/create-a-container/openapi.v1.yaml b/create-a-container/openapi.v1.yaml index e049dd16..70e137e1 100644 --- a/create-a-container/openapi.v1.yaml +++ b/create-a-container/openapi.v1.yaml @@ -89,7 +89,7 @@ components: id: { type: integer } hostname: { type: string } containerId: { type: integer, nullable: true } - status: { type: string } + status: { $ref: '#/components/schemas/ContainerStatus' } template: { type: string } ipv4Address: { type: string, nullable: true } macAddress: { type: string, nullable: true } @@ -104,6 +104,24 @@ components: services: type: array items: { type: object } + ContainerStatus: + type: string + description: >- + Live container status, resolved on read from Proxmox run-state and the + create job. + running = online in Proxmox; + offline = exists in Proxmox but stopped; + creating = no Proxmox VM yet, active create job; + failed = no Proxmox VM, create job failed; + missing = no Proxmox VM, create succeeded or no create job; + unknown = Proxmox unreachable / no node credentials. + enum: + - running + - offline + - creating + - failed + - missing + - unknown Node: type: object properties: diff --git a/create-a-container/routers/api/v1/containers.js b/create-a-container/routers/api/v1/containers.js index 2ca64da8..8f6fbfe4 100644 --- a/create-a-container/routers/api/v1/containers.js +++ b/create-a-container/routers/api/v1/containers.js @@ -21,12 +21,36 @@ const { const { parseDockerRef, getImageConfig, extractImageMetadata } = require('../../../utils/docker-registry'); const { manageDnsRecords } = require('../../../utils/cloudflare-dns'); const { deleteVirtualMachine, withNetbox } = require('../../../utils/netbox'); +const { + computeContainerStatus, + computeContainerStatuses, + STATUS, +} = require('../../../utils/container-status'); const { apiAuth, asyncHandler, ok, created, ApiError } = require('../../../middlewares/api'); const router = express.Router({ mergeParams: true }); router.use(apiAuth); +/** + * Convert the `environmentVars` request payload (an array of { key, value } + * objects) into the JSON string stored on the container record, or null when + * there are no valid vars. Keys/values are validated and normalized via + * Container.normalizeEnvVars so only safe env var names are persisted (keys + * containing `=`/NUL or non-primitive values are dropped at ingest time). + * @param {Array<{key: string, value: *}>} environmentVars + * @returns {string|null} + */ +function serializeUserEnvVars(environmentVars) { + if (!Array.isArray(environmentVars)) return null; + const flat = {}; + for (const e of environmentVars) { + if (e && typeof e.key === 'string') flat[e.key] = e.value; + } + const normalized = Container.normalizeEnvVars(flat); + return Object.keys(normalized).length > 0 ? JSON.stringify(normalized) : null; +} + function normalizeDockerRef(ref) { if (ref.startsWith('http://') || ref.startsWith('https://') || ref.startsWith('git@')) return ref; let tag = 'latest'; @@ -67,7 +91,7 @@ async function loadSite(req) { return site; } -function serializeContainer(c, site) { +function serializeContainer(c, site, status) { const services = c.services || []; const ssh = services.find( (s) => @@ -91,7 +115,9 @@ function serializeContainer(c, site) { hostname: c.hostname, ipv4Address: c.ipv4Address, macAddress: c.macAddress, - status: c.status, + // Live status computed from Proxmox + jobs + config (see utils/container-status). + // Kept on every container payload so existing consumers remain non-breaking. + status: status || STATUS.UNKNOWN, template: c.template, creationJobId: c.creationJobId, entrypoint: c.entrypoint, @@ -183,10 +209,19 @@ router.get( { association: 'dnsService' }, ], }, - { association: 'node', attributes: ['id', 'name', 'apiUrl'] }, + // Full node record (incl. credentials) is required to query live Proxmox status. + { association: 'node' }, + // Eager-load the create job so status resolution needs no per-container query. + { association: 'creationJob' }, ], }); - return ok(res, rows.map((c) => serializeContainer(c, site))); + // Resolve live statuses for the whole page in one pass: one Proxmox snapshot + // per node (shared), and no per-container DB queries (create job is loaded above). + const statuses = await computeContainerStatuses(rows, Job); + return ok( + res, + rows.map((c) => serializeContainer(c, site, statuses.get(c.id))), + ); }), ); @@ -214,12 +249,14 @@ router.get( ], }, { association: 'node' }, + { association: 'creationJob' }, ], }); if (!c || !c.node || c.node.siteId !== site.id) { throw new ApiError(404, 'not_found', 'Container not found'); } - return ok(res, serializeContainer(c, site)); + const status = await computeContainerStatus({ container: c, Job }); + return ok(res, serializeContainer(c, site, status)); }), ); @@ -243,20 +280,9 @@ router.post( if (!hostname || !hostname.trim()) throw new ApiError(400, 'invalid_request', 'hostname is required'); const wantsNvidia = !!nvidiaRequested; - let envVarsJson = null; - if (Array.isArray(environmentVars) && environmentVars.length > 0) { - const envObj = {}; - for (const e of environmentVars) { - if (e?.key && e.key.trim()) envObj[e.key.trim()] = e.value || ''; - } - if (Object.keys(envObj).length > 0) envVarsJson = JSON.stringify(envObj); - } - if (wantsNvidia) { - const obj = envVarsJson ? JSON.parse(envVarsJson) : {}; - if (!obj.NVIDIA_VISIBLE_DEVICES) obj.NVIDIA_VISIBLE_DEVICES = 'all'; - if (!obj.NVIDIA_DRIVER_CAPABILITIES) obj.NVIDIA_DRIVER_CAPABILITIES = 'utility compute'; - envVarsJson = JSON.stringify(obj); - } + // Only the user-defined env vars are persisted on the container record; + // NVIDIA and admin-defined system defaults are merged in at configure-time. + const envVarsJson = serializeUserEnvVars(environmentVars); const imageRef = template === 'custom' ? customTemplate?.trim() : template; if (!imageRef) throw new ApiError(400, 'invalid_request', 'template is required'); @@ -288,7 +314,6 @@ router.post( { hostname, username: req.session.user, - status: 'pending', template: templateName, nodeId: node.id, siteId: site.id, @@ -363,7 +388,9 @@ router.post( containerId: container.id, jobId: job.id, hostname: container.hostname, - status: 'pending', + // A create job was just enqueued and there is no Proxmox VMID yet, so the + // live status resolver would report 'creating' — return it directly. + status: STATUS.CREATING, }); } catch (err) { await t.rollback(); @@ -389,11 +416,7 @@ router.put( let envVarsJson = container.environmentVars; if (!isRestartOnly && Array.isArray(environmentVars)) { - const obj = {}; - for (const e of environmentVars) { - if (e?.key) obj[e.key.trim()] = e.value || ''; - } - envVarsJson = Object.keys(obj).length > 0 ? JSON.stringify(obj) : null; + envVarsJson = serializeUserEnvVars(environmentVars); } else if (!isRestartOnly && !environmentVars) { envVarsJson = null; } @@ -414,12 +437,9 @@ router.put( { environmentVars: envVarsJson, entrypoint: newEntrypoint, - status: needsRestart && container.containerId ? 'restarting' : container.status, }, { transaction: t }, ); - } else if (forceRestart && container.containerId) { - await container.update({ status: 'restarting' }, { transaction: t }); } if (needsRestart && container.containerId) { restartJob = await Job.create( diff --git a/create-a-container/routers/api/v1/nodes.js b/create-a-container/routers/api/v1/nodes.js index d01e097d..2cbb3303 100644 --- a/create-a-container/routers/api/v1/nodes.js +++ b/create-a-container/routers/api/v1/nodes.js @@ -247,7 +247,6 @@ router.post( containerId: c.vmid, macAddress, ipv4Address, - status: 'running', }; }), ); diff --git a/create-a-container/routers/api/v1/resource-requests.js b/create-a-container/routers/api/v1/resource-requests.js index ce047223..5997faef 100644 --- a/create-a-container/routers/api/v1/resource-requests.js +++ b/create-a-container/routers/api/v1/resource-requests.js @@ -217,12 +217,15 @@ router.put( ); /** - * Apply a resource change to existing running containers matching the identity. + * Apply a resource change to existing provisioned containers matching the identity. * This creates a reconfigure job for each matching container. */ async function applyResourceToExistingContainers(siteId, hostname, username, resourceType, value) { const containers = await Container.findAll({ - where: { siteId, hostname, username, status: 'running' }, + // Only containers that exist in Proxmox (have a VMID) can be reconfigured. + // The status column was removed; reconfigure-container.js exits early without + // a containerId, so this is the correct provisioned predicate. + where: { siteId, hostname, username, containerId: { [Op.ne]: null } }, }); if (containers.length === 0) return; diff --git a/create-a-container/routers/templates.js b/create-a-container/routers/templates.js index 4793ba3a..938ce756 100644 --- a/create-a-container/routers/templates.js +++ b/create-a-container/routers/templates.js @@ -1,4 +1,5 @@ const express = require('express'); +const { Op } = require('sequelize'); const { Site, Node, Container, Service, HTTPService, TransportService, ExternalDomain } = require('../models'); const { requireLocalhostOrAdmin } = require('../middlewares'); @@ -12,7 +13,7 @@ async function loadDnsmasqSite(siteId) { include: [{ model: Container, as: 'containers', - where: { status: 'running' }, + where: { ipv4Address: { [Op.ne]: null } }, required: false, attributes: ['macAddress', 'ipv4Address', 'hostname'], }], @@ -40,7 +41,7 @@ router.get('/sites/:siteId/nginx', requireLocalhostOrAdmin, async (req, res) => include: [{ model: Container, as: 'containers', - where: { status: 'running' }, + where: { ipv4Address: { [Op.ne]: null } }, required: false, include: [{ model: Service, diff --git a/create-a-container/utils/container-status.js b/create-a-container/utils/container-status.js new file mode 100644 index 00000000..0c94c9d5 --- /dev/null +++ b/create-a-container/utils/container-status.js @@ -0,0 +1,237 @@ +/** + * container-status.js — resolve the *live* status of a container. + * + * Historically the container "status" was a static column in the database that + * was only mutated by the create/reconfigure job scripts. That value drifts out + * of reality whenever something changes a container directly in Proxmox (or when + * a job dies without updating the row). This module computes the real status on + * demand by combining two sources of truth: + * + * 1. Proxmox — does the LXC exist, and is it running or stopped? This comes + * from a single per-node `clusterResources('lxc')` snapshot. + * 2. Create job — for containers not yet in Proxmox: is the create job still + * active, did it fail, or is it gone? The create job is linked + * to the container by the `creationJobId` foreign key, so this + * is a cheap primary-key lookup (or free when eager-loaded). + * + * Resolved statuses: + * running — exists in Proxmox and is online + * offline — exists in Proxmox but is stopped + * creating — not in Proxmox, but has an active (pending/running) create job + * failed — not in Proxmox, create job returned failure + * missing — not in Proxmox, create job succeeded or no create job found + * unknown — Proxmox could not be reached / node has no API credentials + * + * Note: there is intentionally no per-container Proxmox config read here. Proxmox + * has no bulk config endpoint, so config-drift detection would be O(N) network + * round-trips for the list page; it was dropped in favour of keeping status + * resolution cheap (one Proxmox call per node, zero per container). + */ + +const STATUS = Object.freeze({ + RUNNING: 'running', + OFFLINE: 'offline', + CREATING: 'creating', + FAILED: 'failed', + MISSING: 'missing', + UNKNOWN: 'unknown', +}); + +const STATUS_VALUES = Object.freeze(Object.values(STATUS)); + +const ACTIVE_JOB_STATUSES = ['pending', 'running']; + +function nodeHasCreds(node) { + return !!(node && node.apiUrl && node.tokenId && node.secret); +} + +/** + * Resolve the create job for a container via its `creationJobId` foreign key. + * Prefers an already-loaded `creationJob` association (zero queries); otherwise + * does a single primary-key lookup. Returns null if the container has no linked + * create job. + * @param {object} container - Container instance (ideally with creationJob assoc) + * @param {object} Job - Job model + * @returns {Promise} + */ +async function findCreateJob(container, Job) { + if (container.creationJob) return container.creationJob; + if (container.creationJobId) return Job.findByPk(container.creationJobId); + return null; +} + +function isActiveJob(job) { + return !!job && ACTIVE_JOB_STATUSES.includes(job.status); +} + +/** + * Locate a container in a cluster-resources snapshot by VMID. + * @param {Array|null} snapshot - Result of ProxmoxApi.clusterResources('lxc') + * @param {number} vmid + * @returns {object|null} The matching resource entry or null + */ +function findInSnapshot(snapshot, vmid) { + if (!Array.isArray(snapshot) || vmid == null) return null; + return snapshot.find((r) => Number(r.vmid) === Number(vmid)) || null; +} + +/** + * Resolve status from already-gathered facts. Pure function — no I/O — so it is + * trivial to unit test and is the single source of the decision tree. + * + * @param {object} facts + * @param {boolean} facts.proxmoxReachable - Whether Proxmox was queried OK. + * @param {boolean} facts.inProxmox - Whether the LXC exists in Proxmox. + * @param {boolean} facts.proxmoxRunning - Whether the LXC is running. + * @param {boolean} facts.hasVmid - Whether the container has a Proxmox VMID. + * @param {boolean} facts.creating - Active create job present. + * @param {boolean} facts.createFailed - Create job returned failure. + * @returns {string} STATUS value + */ +function decideStatus(facts) { + if (facts.inProxmox) { + return facts.proxmoxRunning ? STATUS.RUNNING : STATUS.OFFLINE; + } + + if (facts.creating) return STATUS.CREATING; + + // We had a VMID + creds but couldn't reach Proxmox: don't guess missing. + if (facts.hasVmid && !facts.proxmoxReachable) return STATUS.UNKNOWN; + + if (facts.createFailed) return STATUS.FAILED; + return STATUS.MISSING; +} + +/** + * Compute the live status of a single container. + * + * @param {object} params + * @param {object} params.container - Container instance (with `node` association, + * and ideally the `creationJob` association eager-loaded). + * @param {object} params.Job - Job model (from models). + * @param {object} [params.api] - Optional pre-authenticated ProxmoxApi client for + * the container's node (lets callers reuse one client across many containers). + * @param {{ data: Array, ok: boolean }} [params.snapshot] - Optional + * pre-fetched clusterResources('lxc') snapshot for the node. `ok` indicates the + * query succeeded; `data` is the resource list (used for batching the index). + * @returns {Promise} One of the STATUS values. + */ +async function computeContainerStatus({ container, Job, api, snapshot }) { + const node = container.node; + const hasCreds = nodeHasCreds(node); + const hasVmid = container.containerId != null; + + // --- Determine Proxmox presence / run state --- + let proxmoxReachable = false; + let inProxmox = false; + let proxmoxRunning = false; + + if (hasVmid && hasCreds) { + try { + let resources; + if (snapshot) { + proxmoxReachable = !!snapshot.ok; + resources = snapshot.ok ? snapshot.data : null; + } else { + // Single-container path (no shared snapshot): fetch this node's once. + const client = api || (await node.api()); + resources = await client.clusterResources('lxc'); + proxmoxReachable = true; + } + if (proxmoxReachable) { + const resource = findInSnapshot(resources, container.containerId); + if (resource) { + inProxmox = true; + proxmoxRunning = resource.status === 'running'; + } + } + } catch (err) { + proxmoxReachable = false; + } + } + + // Only inspect the create job when the container isn't in Proxmox, to + // distinguish creating / failed / missing. The create job is linked by FK + // (creationJobId), so this is a cheap PK lookup or free when eager-loaded. + let creating = false; + let createFailed = false; + if (!inProxmox) { + const createJob = await findCreateJob(container, Job); + creating = isActiveJob(createJob); + createFailed = !!createJob && createJob.status === 'failure'; + } + + return decideStatus({ + proxmoxReachable, + inProxmox, + proxmoxRunning, + hasVmid, + creating, + createFailed, + }); +} + +/** + * Compute live statuses for many containers efficiently. + * + * No per-container Proxmox calls and no per-container DB queries are issued: + * - Proxmox: containers are grouped by node so each node's + * `clusterResources('lxc')` snapshot is fetched exactly once. + * - Create job: resolved from each container's eager-loaded `creationJob` + * association (or its creationJobId FK). + * + * @param {Array} containers - Container instances (each with `node`, and + * ideally the `creationJob` association loaded to avoid per-container queries). + * @param {object} Job - Job model. + * @returns {Promise>} Map of container.id -> STATUS value. + */ +async function computeContainerStatuses(containers, Job) { + const result = new Map(); + + // Group by node id (containers without a node still get resolved, just without + // any Proxmox facts). + const byNode = new Map(); + for (const container of containers) { + const key = container.node ? container.node.id : `__no_node_${container.id}`; + if (!byNode.has(key)) byNode.set(key, []); + byNode.get(key).push(container); + } + + await Promise.all( + Array.from(byNode.values()).map(async (group) => { + const node = group[0].node; + let snapshot = { ok: false, data: null }; + + if (nodeHasCreds(node)) { + try { + const api = await node.api(); + const data = await api.clusterResources('lxc'); + snapshot = { ok: true, data }; + } catch (err) { + snapshot = { ok: false, data: null }; + } + } + + // Resolution is now CPU-only per container (no Proxmox/DB I/O when the + // create job is eager-loaded), so the snapshot is reused for all of them. + for (const container of group) { + // eslint-disable-next-line no-await-in-loop + const status = await computeContainerStatus({ container, Job, snapshot }); + result.set(container.id, status); + } + }), + ); + + return result; +} + +module.exports = { + STATUS, + STATUS_VALUES, + computeContainerStatus, + computeContainerStatuses, + decideStatus, + // exported for reuse / testing + findInSnapshot, + findCreateJob, +};