Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions notebook-rerun-determinism-guard/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Notebook rerun determinism guard

This module checks whether hosted notebooks can receive a reproducibility badge after a rerun.

It focuses on one part of Scientific/Engineering Data & Code Hosting: stable notebook outputs. It does not run containers, call external compute, or access live datasets.

## What it checks

- environment digest
- pinned random seed
- raw data digest
- output digest parity
- numeric drift tolerance
- missing figures
- slow reruns
- provenance bundle presence

## Run it

```bash
node notebook-rerun-determinism-guard/test.js
node notebook-rerun-determinism-guard/demo.js
```

The demo writes JSON and Markdown artifacts to `artifacts/`.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Notebook rerun determinism report

## NB-100: RNA velocity notebook
Decision: PUBLISH_REPRODUCIBLE
Next step: publish the rerun badge and output manifest

## NB-210: Climate downscaling notebook
Decision: REVIEW_BEFORE_RELEASE
Next step: ask a reviewer to approve the reproducibility packet
Warnings:
- rerun is slower than expected
- provenance bundle is missing

## NB-404: Tumor segmentation notebook
Decision: HOLD_RELEASE
Next step: hold the notebook until the rerun packet matches the baseline
Blockers:
- missing environment digest
- random seed is not pinned
- output digest changed from baseline
- numeric drift exceeds allowed tolerance
- expected figures are missing
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
[
{
"notebookId": "NB-100",
"title": "RNA velocity notebook",
"decision": "PUBLISH_REPRODUCIBLE",
"blockers": [],
"warnings": [],
"nextStep": "publish the rerun badge and output manifest"
},
{
"notebookId": "NB-210",
"title": "Climate downscaling notebook",
"decision": "REVIEW_BEFORE_RELEASE",
"blockers": [],
"warnings": [
"rerun is slower than expected",
"provenance bundle is missing"
],
"nextStep": "ask a reviewer to approve the reproducibility packet"
},
{
"notebookId": "NB-404",
"title": "Tumor segmentation notebook",
"decision": "HOLD_RELEASE",
"blockers": [
"missing environment digest",
"random seed is not pinned",
"output digest changed from baseline",
"numeric drift exceeds allowed tolerance",
"expected figures are missing"
],
"warnings": [],
"nextStep": "hold the notebook until the rerun packet matches the baseline"
}
]
13 changes: 13 additions & 0 deletions notebook-rerun-determinism-guard/demo.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
const fs = require("fs");
const path = require("path");
const packets = require("./sample-data.json");
const { evaluateNotebooks, renderMarkdownReport } = require("./index");

const artifactsDir = path.join(__dirname, "artifacts");
fs.mkdirSync(artifactsDir, { recursive: true });

const results = evaluateNotebooks(packets);
fs.writeFileSync(path.join(artifactsDir, "notebook-rerun-results.json"), JSON.stringify(results, null, 2));
fs.writeFileSync(path.join(artifactsDir, "notebook-rerun-report.md"), renderMarkdownReport(results));

console.log(renderMarkdownReport(results));
67 changes: 67 additions & 0 deletions notebook-rerun-determinism-guard/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
function evaluateNotebook(packet) {
const blockers = [];
const warnings = [];

if (!packet.environmentDigest) blockers.push("missing environment digest");
if (!packet.randomSeedPinned) blockers.push("random seed is not pinned");
if (!packet.rawDataDigest) blockers.push("missing raw data digest");
if (!packet.outputDigest) blockers.push("missing output digest");
if (packet.baselineOutputDigest && packet.outputDigest && packet.baselineOutputDigest !== packet.outputDigest) {
blockers.push("output digest changed from baseline");
}
if (packet.numericDrift > packet.allowedDrift) blockers.push("numeric drift exceeds allowed tolerance");
if (packet.missingFigures > 0) blockers.push("expected figures are missing");
if (packet.rerunMinutes > packet.maxRerunMinutes) warnings.push("rerun is slower than expected");
if (!packet.provenanceAttached) warnings.push("provenance bundle is missing");

let decision = "PUBLISH_REPRODUCIBLE";
if (blockers.length) {
decision = "HOLD_RELEASE";
} else if (warnings.length) {
decision = "REVIEW_BEFORE_RELEASE";
}

return {
notebookId: packet.id,
title: packet.title,
decision,
blockers,
warnings,
nextStep: nextStepFor(decision),
};
}

function nextStepFor(decision) {
if (decision === "PUBLISH_REPRODUCIBLE") return "publish the rerun badge and output manifest";
if (decision === "REVIEW_BEFORE_RELEASE") return "ask a reviewer to approve the reproducibility packet";
return "hold the notebook until the rerun packet matches the baseline";
}

function evaluateNotebooks(packets) {
return packets.map(evaluateNotebook);
}

function renderMarkdownReport(results) {
const lines = ["# Notebook rerun determinism report", ""];
for (const result of results) {
lines.push(`## ${result.notebookId}: ${result.title}`);
lines.push(`Decision: ${result.decision}`);
lines.push(`Next step: ${result.nextStep}`);
if (result.blockers.length) {
lines.push("Blockers:");
for (const blocker of result.blockers) lines.push(`- ${blocker}`);
}
if (result.warnings.length) {
lines.push("Warnings:");
for (const warning of result.warnings) lines.push(`- ${warning}`);
}
lines.push("");
}
return lines.join("\n");
}

module.exports = {
evaluateNotebook,
evaluateNotebooks,
renderMarkdownReport,
};
47 changes: 47 additions & 0 deletions notebook-rerun-determinism-guard/sample-data.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
[
{
"id": "NB-100",
"title": "RNA velocity notebook",
"environmentDigest": "env-a12",
"randomSeedPinned": true,
"rawDataDigest": "raw-991",
"baselineOutputDigest": "out-441",
"outputDigest": "out-441",
"numericDrift": 0.002,
"allowedDrift": 0.01,
"missingFigures": 0,
"rerunMinutes": 12,
"maxRerunMinutes": 20,
"provenanceAttached": true
},
{
"id": "NB-210",
"title": "Climate downscaling notebook",
"environmentDigest": "env-b77",
"randomSeedPinned": true,
"rawDataDigest": "raw-103",
"baselineOutputDigest": "out-915",
"outputDigest": "out-915",
"numericDrift": 0.004,
"allowedDrift": 0.01,
"missingFigures": 0,
"rerunMinutes": 42,
"maxRerunMinutes": 30,
"provenanceAttached": false
},
{
"id": "NB-404",
"title": "Tumor segmentation notebook",
"environmentDigest": "",
"randomSeedPinned": false,
"rawDataDigest": "raw-700",
"baselineOutputDigest": "out-201",
"outputDigest": "out-999",
"numericDrift": 0.09,
"allowedDrift": 0.02,
"missingFigures": 2,
"rerunMinutes": 18,
"maxRerunMinutes": 25,
"provenanceAttached": true
}
]
15 changes: 15 additions & 0 deletions notebook-rerun-determinism-guard/test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
const assert = require("assert");
const packets = require("./sample-data.json");
const { evaluateNotebooks } = require("./index");

const results = evaluateNotebooks(packets);
const byId = Object.fromEntries(results.map((result) => [result.notebookId, result]));

assert.strictEqual(byId["NB-100"].decision, "PUBLISH_REPRODUCIBLE");
assert.strictEqual(byId["NB-210"].decision, "REVIEW_BEFORE_RELEASE");
assert.ok(byId["NB-210"].warnings.some((warning) => warning.includes("slower")));
assert.strictEqual(byId["NB-404"].decision, "HOLD_RELEASE");
assert.ok(byId["NB-404"].blockers.some((blocker) => blocker.includes("output digest")));
assert.ok(byId["NB-404"].blockers.some((blocker) => blocker.includes("random seed")));

console.log("notebook rerun determinism guard tests passed");