08919752e3
Checkpoint multi-chantiers (arbre vert : tsc 0 erreur, 70 tests, build OK). - tâche 1.9 Phase 1 : schéma socle (machine_state/events/reports/raw_artifacts/ hardware/metrics + colonnes étendues) + wiring refresh/execute. Migration 0002. - tâche 1.9 Phase 2 : machine_credentials + machine_host_keys (non destructif, dual-read + backfill). Migration 0003. Fix séquence journal de migration. - tâche 2 : SJ-0 (types étendus rétro-compatibles, réducteur Docker, resolveTemplate), SJ-1 (update-analyze enrichi), SJ-2 (apply + diff dpkg + timeout inactivité SSH), SJ-3 (reboot vérifié boot_id). - WIP parallèle inclus : /api/capabilities, auth/apiTokens/apiClients, system metrics, scaffold app_rust, ajustements frontend. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
95 lines
3.3 KiB
TypeScript
95 lines
3.3 KiB
TypeScript
// server/services/rebootVerify.ts
|
|
import { runPlain, type SshCreds } from "../ssh/client.js";
|
|
import type { RebootResult } from "@shared/types.js";
|
|
|
|
export function parseBootIdBefore(raw: string): string | null {
|
|
const s = raw.indexOf("===SU:BOOT_ID_BEFORE===");
|
|
if (s === -1) return null;
|
|
const from = s + "===SU:BOOT_ID_BEFORE===".length;
|
|
const e = raw.indexOf("===SU:REBOOT_NOW===", from);
|
|
const id = raw.slice(from, e === -1 ? undefined : e).trim();
|
|
return id || null;
|
|
}
|
|
|
|
export interface RebootSignals {
|
|
beforeBootId: string | null;
|
|
afterBootId: string | null;
|
|
wentDown: boolean;
|
|
cameBack: boolean;
|
|
}
|
|
|
|
/** Détermine le statut d'un reboot vérifié (fonction pure). */
|
|
export function classifyReboot(s: RebootSignals): { status: RebootResult["status"] } {
|
|
if (!s.wentDown) return { status: "ssh_never_went_down" };
|
|
if (!s.cameBack || s.afterBootId === null) return { status: "machine_did_not_return" };
|
|
if (s.beforeBootId !== null && s.afterBootId === s.beforeBootId) return { status: "boot_id_unchanged" };
|
|
return { status: "ok" };
|
|
}
|
|
|
|
async function readBootId(creds: SshCreds): Promise<string | null> {
|
|
try {
|
|
const res = await runPlain(creds, "cat /proc/sys/kernel/random/boot_id");
|
|
const id = res.stdout.trim();
|
|
return id || null;
|
|
} catch {
|
|
return null; // connexion impossible (machine down)
|
|
}
|
|
}
|
|
|
|
const sleep = (ms: number) => new Promise((r) => setTimeout(r, ms));
|
|
|
|
export interface VerifyOptions {
|
|
beforeBootId: string | null;
|
|
requestedAt: string;
|
|
downTimeoutMs?: number; // détection de la coupure
|
|
upTimeoutMs?: number; // attente du retour
|
|
pollMs?: number;
|
|
}
|
|
|
|
/**
|
|
* Orchestration : attend la coupure SSH (machine qui reboote) puis le retour,
|
|
* relit le boot_id, et classe le résultat. Réseau ; non testé unitairement.
|
|
*/
|
|
export async function verifyReboot(creds: SshCreds, opt: VerifyOptions): Promise<RebootResult> {
|
|
const downTimeoutMs = opt.downTimeoutMs ?? 60000;
|
|
const upTimeoutMs = opt.upTimeoutMs ?? 600000;
|
|
const pollMs = opt.pollMs ?? 5000;
|
|
const t0 = Date.now();
|
|
|
|
// Phase A : attendre que la machine devienne injoignable.
|
|
let wentDown = false;
|
|
let sshWentDownAt: string | null = null;
|
|
while (Date.now() - t0 < downTimeoutMs) {
|
|
const id = await readBootId(creds);
|
|
if (id === null) { wentDown = true; sshWentDownAt = new Date().toISOString(); break; }
|
|
await sleep(pollMs);
|
|
}
|
|
|
|
// Phase B : attendre le retour (seulement si on a vu la coupure).
|
|
let cameBack = false;
|
|
let sshCameBackAt: string | null = null;
|
|
let afterBootId: string | null = null;
|
|
if (wentDown) {
|
|
const tB = Date.now();
|
|
while (Date.now() - tB < upTimeoutMs) {
|
|
const id = await readBootId(creds);
|
|
if (id !== null) { cameBack = true; sshCameBackAt = new Date().toISOString(); afterBootId = id; break; }
|
|
await sleep(pollMs);
|
|
}
|
|
}
|
|
|
|
const { status } = classifyReboot({ beforeBootId: opt.beforeBootId, afterBootId, wentDown, cameBack });
|
|
const waitedSeconds = Math.round((Date.now() - t0) / 1000);
|
|
return {
|
|
beforeBootId: opt.beforeBootId,
|
|
afterBootId,
|
|
requestedAt: opt.requestedAt,
|
|
sshWentDownAt,
|
|
sshCameBackAt,
|
|
waitedSeconds,
|
|
status,
|
|
lastRebootDurationSeconds: status === "ok" ? waitedSeconds : undefined,
|
|
nextRecommendedWaitSeconds: status === "ok" ? Math.round(waitedSeconds * 1.5) + 30 : undefined,
|
|
};
|
|
}
|