diff --git a/.gitignore b/.gitignore
index a744cfb..801aca5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -19,4 +19,6 @@ Cargo.lock
 
 # Visuals generated by aurora...
 /viz/
-out.txt
+
+# Temporary output files...
+/out*.txt
diff --git a/.zed/settings.json b/.zed/settings.json
new file mode 100644
index 0000000..d229d6c
--- /dev/null
+++ b/.zed/settings.json
@@ -0,0 +1,16 @@
+// Folder-specific settings
+//
+// For a full list of overridable settings, and general information on folder-specific settings,
+// see the documentation: https://zed.dev/docs/configuring-zed#settings-files
+{
+  "terminal": {
+    "detect_venv": {
+      "on": {
+        "directories": [
+          ".venv"
+        ],
+        "activate_script": "default"
+      }
+    }
+  }
+}
diff --git a/Cargo.toml b/Cargo.toml
index fbf7952..1a636d7 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -16,6 +16,7 @@ serde_json = "1.0.93"
 itertools = "0.11.0"
 rayon = "1.8.0"
 base64 = "0.22.1"
+puruspe = "0.4.4"
 
 [target.'cfg(not(target_env = "msvc"))'.dependencies]
 tikv-jemallocator = "0.5"
@@ -29,3 +30,5 @@ lto = "thin"
 codegen-units = 1
 debug = false
 
+[dev-dependencies]
+rand = "0.10.1"
diff --git a/fixtures/soda/table.html b/fixtures/soda/table.html
index 73ccfeb..26ca293 100644
--- a/fixtures/soda/table.html
+++ b/fixtures/soda/table.html
@@ -2,16 +2,18 @@
 <html>
     <head>
         <title>PAGE_TITLE</title>
-        <link rel="icon" type="image/x-icon" href="icon.svg"/>
+        <link rel="icon" type="image/x-icon" href="icon.svg" />
         <style>
             table {
-                border-collapse: collapse; 
+                border-collapse: collapse;
                 border: 2px solid rgb(140 140 140);
-                font-family: sans-serif; font-size: 0.8rem; 
-                letter-spacing: 1px; 
+                font-family: sans-serif;
+                font-size: 0.8rem;
+                letter-spacing: 1px;
             }
-            th, td { 
-                border: 1px solid rgb(160 160 160); 
+            th,
+            td {
+                border: 1px solid rgb(160 160 160);
                 padding: 8px 10px;
             }
 
@@ -34,211 +36,466 @@
             figure {
                 margin: 0;
             }
+
+            th input[type="text"] {
+                width: 6em;
+            }
         </style>
+        <script id="table-data" type="text/tab-separated-values">
+            TSV_TARGET
+        </script>
         <script type="module">
             SODA_TARGET;
-            
+
+            let ELEMS_PER_PAGE = 50;
+
             let d3 = soda.internalD3;
 
-            d3.selectAll("table").each(function(d, i) {
-                let header = d3.select(this).select("thead");
-                let body = d3.select(this).select("tbody");
+            let tsv = d3.tsvParse(
+                d3.select("#table-data").text().trim(),
+                (d) => {
+                    let new_obj = {};
+                    for (let [k, v] of Object.entries(d)) {
+                        let k_parts = k.split("_");
+                        let k_type = k_parts[k_parts.length - 1];
+                        let new_val;
+                        switch (k_type) {
+                            case "boxplot":
+                            case "violin":
+                                new_val = v
+                                    .split(":")
+                                    .map((v) => parseFloat(v));
+                                break;
+                            case "float":
+                                new_val = parseFloat(v);
+                                break;
+                            case "region":
+                            case "int":
+                                new_val = parseInt(v);
+                                break;
+                            case "string":
+                            default:
+                                new_val = v;
+                        }
+
+                        new_obj[k] = new_val;
+                    }
+
+                    return new_obj;
+                },
+            );
+
+            function getCurrentOrder(fallback = [null, "desc"]) {
+                let url = new URL(window.location.href);
+                let order = url.searchParams.get("sortby");
+
+                if (order) {
+                    let [col, ord] = order.split(":");
+                    if (!fallback[0] || col == fallback[0]) {
+                        return [col, ord == "asc" ? "asc" : "desc"];
+                    }
+                }
+                return fallback;
+            }
 
-                function to_value(v) {
-                    let vf = parseFloat(v);
-                    return (vf != vf)? v: vf;
+            function getFilter() {
+                let url = new URL(window.location.href);
+                let filter = url.searchParams.get("filter");
+
+                if (filter) {
+                    let new_obj = {};
+                    for (let item of filter.split(",")) {
+                        let [k, v] = item.split(":");
+                        new_obj[k] = v;
+                    }
+                    return new_obj;
                 }
 
-                header.selectAll("th")
-                    .on("click", function(d, i) {
-                        header.selectAll("th")
-                            .attr("class", function(d2, i2) {
-                                return (i2 != i)? "": ({
-                                    "": "desc",
-                                    "desc": "asc",
-                                    "asc": "desc"
-                                }[this.className] ?? "desc");
-                            });
+                return {};
+            }
+
+            function encodeFilter(filter) {
+                return Object.entries(filter)
+                    .map(([k, v]) => `${k}:${v}`)
+                    .join(",");
+            }
+
+            function updateUrl(data) {
+                let url = new URL(window.location.href);
+                let params = url.searchParams;
+                for (let [k, v] of Object.entries(data)) {
+                    params.set(k, v);
+                }
+                window.history.pushState(null, "", url);
+            }
+
+            function renderHeader() {
+                let header = d3.select("#table-target").select("thead");
+                let row = header.select("tr");
+                let full_filter = getFilter();
+
+                row.selectAll("th")
+                    .data(tsv.columns)
+                    .enter()
+                    .append("th")
+                    .each(function (d) {
+                        let this_sel = d3.select(this);
+                        let k_parts = d.split("_");
+                        let k_type = k_parts[k_parts.length - 1];
+                        let k_name = k_parts.slice(0, -1).join(" ");
+                        this_sel.text(k_name);
+
+                        if (!["boxplot", "violin"].includes(k_type)) {
+                            let [col, direction] = getCurrentOrder();
 
-                        let order = this.className;
+                            if (col == d) {
+                                this_sel.attr("class", direction);
+                            }
 
-                        let ordered_elements = body.selectAll("tr")
-                            .nodes()
-                            .sort((a, b) => {
-                                let a_val = to_value(d3.select(a).selectAll("td").nodes()[i].innerText);
-                                let b_val = to_value(d3.select(b).selectAll("td").nodes()[i].innerText);
-                                return (order === "asc")? d3.ascending(a_val, b_val): d3.descending(a_val, b_val);
+                            this_sel.on("click", function () {
+                                let [col, ord] = getCurrentOrder([d, "asc"]);
+                                let new_ord = ord == "asc" ? "desc" : "asc";
+                                row.selectAll("th").attr(
+                                    "class",
+                                    function (other_d) {
+                                        return other_d == d ? new_ord : "";
+                                    },
+                                );
+                                updateUrl({
+                                    sortby: `${d}:${new_ord}`,
+                                    page: 0,
+                                });
+                                renderData();
                             });
 
-                        for(let elm of ordered_elements) {
-                            let parent = elm.parentElement;
-                            parent.removeChild(elm);
-                            parent.appendChild(elm);
-                        };
+                            this_sel.append("br");
+                            let prior_text = full_filter[d] ?? "";
+                            this_sel
+                                .append("input")
+                                .attr("id", d)
+                                .attr("type", "text")
+                                .attr("value", prior_text)
+                                .on("click", function (d) {
+                                    //d3.event.preventDefault();
+                                    d3.event.stopPropagation();
+                                    return false;
+                                })
+                                .on("input", function () {
+                                    let value = d3.event.target.value;
+                                    if (value == prior_text) {
+                                        return;
+                                    }
+                                    prior_text = value;
+                                    let filter = getFilter();
+                                    if (value) {
+                                        filter[d] = value;
+                                    } else {
+                                        delete filter[d];
+                                    }
+
+                                    updateUrl({
+                                        filter: encodeFilter(filter),
+                                        page: 0,
+                                    });
+                                    renderData();
+                                });
+                        }
                     });
+            }
 
-                header.selectAll("th").append("br");
-                header.selectAll("th")
-                    .append("input")
-                    .attr("type", "text")
-                    .on("click", function(d, i) {
-                        d3.event.stopPropagation();
-                    })
-                    .on("input", function(d, i, sel) {
-                        let filters = sel.map((v) => v.value);
-
-                        body.selectAll("tr")
-                            .style("display", function(d, i) {
-                                let vals = d3.select(this).selectAll("td").nodes().map((v) => v.innerText);
-                                let isValid = vals.every((v, i) => filters[i] == "" || v.includes(filters[i]));
-                                return (isValid)? "table-row": "none";
-                            });
+            function renderData() {
+                let filter = getFilter();
+                let [col, ord] = getCurrentOrder();
+
+                let inner_tsv = tsv;
+
+                if (Object.keys(filter).length > 0) {
+                    inner_tsv = inner_tsv.filter((row) => {
+                        return Object.entries(filter).every(([k, v]) => {
+                            return row[k].toString().includes(v.toString());
+                        });
                     });
-            });
-
-            d3.selectAll("figure.boxplot")
-                .each(function() {
-                    let width = 250;
-                    let height = 70;
-                    let margin = {
-                        top: 10, right: 0, bottom: 30, left: 0
-                    };
-                    let inner_width = width - margin.left - margin.right;
-                    let inner_height = height - margin.bottom - margin.top; 
-
-                    let svg = d3.select(this)
-                        .append("svg")
-                        .attr("width", width)
-                        .attr("height", height)
-                        .append("g")
-                        .attr("transform", "translate(" + margin.left + "," + margin.top + ")");
-                    
-                    let data = this.dataset.samples.split(",").map((v) => parseFloat(v)).sort(d3.ascending);
-
-                    let q1 = d3.quantile(data, 0.25)
-                    let median = d3.quantile(data, 0.5)
-                    let q3 = d3.quantile(data, 0.75)
-                    let min = d3.min(data);
-                    let max = d3.max(data);
-                    let range = max - min;
-                    let offset = Math.max(range * 0.1, 1);
-
-                    let x = d3.scaleLinear()
-                        .domain([min - offset, max + offset])
-                        .range([0, inner_width]);
-                    svg.append("g")
-                        .attr("transform", "translate(0," + inner_height + ")")
-                        .call(d3.axisBottom(x).ticks(5));
-
-                    let y = d3.scaleLinear()
-                        .domain([0, 1])
-                        .range([inner_height, 0]);
-
-                    let center = y(0.5);
-                    let bar_thickness = y(0) - y(0.5)
-                    
-                    svg.append("line")
-                        .attr("x1", x(min))
-                        .attr("x2", x(max))
-                        .attr("y1", center)
-                        .attr("y2", center)
-                        .attr("stroke", "black");
-
-                    svg.append("rect")
-                        .attr("x", x(q1))
-                        .attr("y", center - bar_thickness / 2)
-                        .attr("width", x(q3) - x(q1))
-                        .attr("height", bar_thickness)
-                        .attr("stroke", "black")
-                        .style("fill", "#69b3a2");
-
-                    svg.selectAll("toto")
-                        .data([min, median, max])
-                        .enter()
-                        .append("line")
-                        .attr("x1", (d) => x(d))
-                        .attr("x2", (d) => x(d))
-                        .attr("y1", center - bar_thickness / 2)
-                        .attr("y2", center + bar_thickness / 2)
-                        .attr("stroke", "black")
-                });
+                }
 
-            d3.selectAll("figure.violin")
-                .each(function() {
-                    function kernelDensityEstimator(kernel, X) {
-                        return (V) => {
-                            return X.map((x) => {
-                                return [x, d3.mean(V, (v) => { return kernel(x - v); })];
-                            });
-                        };
-                    }
-                    function kernelEpanechnikov(k) {
-                        return (v) => Math.abs(v /= k) <= 1 ? 0.75 * (1 - v * v) / k : 0;
-                    }
-                    function gaussianKernel(k) {
-                        return (v) => (1 / Math.sqrt(2 * Math.PI)) * Math.exp(-0.5 * (v / k) * (v / k)) / k;
+                inner_tsv.columns = tsv.columns;
+
+                if (inner_tsv.columns.includes(col)) {
+                    if (ord == "asc") {
+                        inner_tsv = inner_tsv.sort((a, b) =>
+                            d3.ascending(a[col], b[col]),
+                        );
+                    } else {
+                        inner_tsv = inner_tsv.sort((a, b) =>
+                            d3.descending(a[col], b[col]),
+                        );
                     }
+                }
+
+                inner_tsv.columns = tsv.columns;
+
+                let url = new URL(window.location.href);
+                let maxPage = Math.ceil(inner_tsv.length / ELEMS_PER_PAGE);
+                let page = Math.min(
+                    maxPage - 1,
+                    Math.max(
+                        0,
+                        Math.floor(parseInt(url.searchParams.get("page") ?? 0)),
+                    ),
+                );
+                let offset = ELEMS_PER_PAGE * page;
+                let offset_end = Math.min(
+                    ELEMS_PER_PAGE * (page + 1),
+                    inner_tsv.length,
+                );
+
+                d3.select("#page-indicator")
+                    .style("white-space", "pre")
+                    .text(
+                        `Page: ${page + 1}/${maxPage}     Families: ${offset + 1}-${offset_end}/${inner_tsv.length}`,
+                    );
+                d3.select("#prior-page").on("click", () => {
+                    updateUrl({ page: Math.max(0, page - 1) });
+                    renderData();
+                });
+                d3.select("#next-page").on("click", () => {
+                    updateUrl({ page: Math.min(maxPage - 1, page + 1) });
+                    renderData();
+                });
+
+                d3.select("#table-target")
+                    .select("tbody")
+                    .selectAll("tr")
+                    .remove();
+
+                let rows = d3
+                    .select("#table-target")
+                    .select("tbody")
+                    .selectAll("tr")
+                    .data(inner_tsv.slice(offset, offset + ELEMS_PER_PAGE))
+                    .enter()
+                    .append("tr");
+
+                let cells = rows
+                    .selectAll("td")
+                    .data((row) => {
+                        return inner_tsv.columns.map((col) => {
+                            return { column: col, value: row[col] };
+                        });
+                    })
+                    .enter()
+                    .append("td")
+                    .each(function (d, i) {
+                        let k_parts = d.column.split("_");
+                        let k_type = k_parts[k_parts.length - 1];
+
+                        switch (k_type) {
+                            case "region":
+                                d3.select(this)
+                                    .append("a")
+                                    .attr("href", `${d.value}/index.html`)
+                                    .text(d.value);
+                                break;
+                            case "boxplot":
+                                renderBoxplot(d3.select(this), d.value);
+                                break;
+                            case "violin":
+                                renderViolin(d3.select(this), d.value);
+                                break;
+                            case "float":
+                            case "int":
+                            case "string":
+                            default:
+                                d3.select(this).text(d.value);
+                        }
+                    });
+            }
+
+            function renderBoxplot(selection, data) {
+                let width = 250;
+                let height = 70;
+                let margin = {
+                    top: 10,
+                    right: 0,
+                    bottom: 30,
+                    left: 0,
+                };
+                let inner_width = width - margin.left - margin.right;
+                let inner_height = height - margin.bottom - margin.top;
+
+                let svg = selection
+                    .append("svg")
+                    .attr("width", width)
+                    .attr("height", height)
+                    .append("g")
+                    .attr(
+                        "transform",
+                        "translate(" + margin.left + "," + margin.top + ")",
+                    );
+
+                data = data.sort(d3.ascending);
+
+                let q1 = d3.quantile(data, 0.25);
+                let median = d3.quantile(data, 0.5);
+                let q3 = d3.quantile(data, 0.75);
+                let min = d3.min(data);
+                let max = d3.max(data);
+                let range = max - min;
+                let offset = Math.max(range * 0.1, 1);
+
+                let x = d3
+                    .scaleLinear()
+                    .domain([min - offset, max + offset])
+                    .range([0, inner_width]);
+                svg.append("g")
+                    .attr("transform", "translate(0," + inner_height + ")")
+                    .call(d3.axisBottom(x).ticks(5));
+
+                let y = d3
+                    .scaleLinear()
+                    .domain([0, 1])
+                    .range([inner_height, 0]);
+
+                let center = y(0.5);
+                let bar_thickness = y(0) - y(0.5);
 
-                    let width = 250;
-                    let height = 70;
-                    let margin = {
-                        top: 10, right: 0, bottom: 30, left: 0
+                svg.append("line")
+                    .attr("x1", x(min))
+                    .attr("x2", x(max))
+                    .attr("y1", center)
+                    .attr("y2", center)
+                    .attr("stroke", "black");
+
+                svg.append("rect")
+                    .attr("x", x(q1))
+                    .attr("y", center - bar_thickness / 2)
+                    .attr("width", x(q3) - x(q1))
+                    .attr("height", bar_thickness)
+                    .attr("stroke", "black")
+                    .style("fill", "#69b3a2");
+
+                svg.selectAll("toto")
+                    .data([min, median, max])
+                    .enter()
+                    .append("line")
+                    .attr("x1", (d) => x(d))
+                    .attr("x2", (d) => x(d))
+                    .attr("y1", center - bar_thickness / 2)
+                    .attr("y2", center + bar_thickness / 2)
+                    .attr("stroke", "black");
+            }
+
+            function renderViolin(selection, data) {
+                function kernelDensityEstimator(kernel, X) {
+                    return (V) => {
+                        return X.map((x) => {
+                            return [
+                                x,
+                                d3.mean(V, (v) => {
+                                    return kernel(x - v);
+                                }),
+                            ];
+                        });
                     };
-                    let inner_width = width - margin.left - margin.right;
-                    let inner_height = height - margin.bottom - margin.top; 
-
-                    let svg = d3.select(this)
-                        .append("svg")
-                        .attr("width", width)
-                        .attr("height", height)
-                        .append("g")
-                        .attr("transform", "translate(" + margin.left + "," + margin.top + ")");
-                    
-                    let data = this.dataset.samples.split(",").map((v) => parseFloat(v)).sort(d3.ascending);
-
-                    let min = d3.min(data);
-                    let max = d3.max(data);
-                    let range = max - min;
-                    let offset = Math.max(range * 0.1, 1);
-
-                    let irq = (d3.quantile(data, 0.75) - d3.quantile(data, 0.25)) / 1.349
-                    let sigma = Math.min(d3.deviation(data), irq);
-                    let est_h = 0.5 * sigma * (data.length ** (-0.2));
-                    est_h = (est_h != est_h)? 0.2: est_h;
-
-                    let x = d3.scaleLinear()
-                        .domain([min - offset, max + offset])
-                        .range([0, inner_width]);
-                    svg.append("g")
-                        .attr("transform", "translate(0," + inner_height + ")")
-                        .call(d3.axisBottom(x).ticks(5));
-
-                    let kde = kernelDensityEstimator(gaussianKernel(est_h), x.ticks(100));
-                    let kde_result = kde(data);
-                    let kde_max = d3.max(kde_result.map((v) => v[1]));
-
-                    let y = d3.scaleLinear()
-                        .domain([0, kde_max])
-                        .range([inner_height, 0]);
-
-                    svg.append("path")
-                        .datum(kde_result)
-                        .style("stroke", "none")
-                        .style("fill","#69b3a2")
-                        .attr("d", d3.area()
+                }
+                function kernelEpanechnikov(k) {
+                    return (v) =>
+                        Math.abs((v /= k)) <= 1 ? (0.75 * (1 - v * v)) / k : 0;
+                }
+                function gaussianKernel(k) {
+                    return (v) =>
+                        ((1 / Math.sqrt(2 * Math.PI)) *
+                            Math.exp(-0.5 * (v / k) * (v / k))) /
+                        k;
+                }
+
+                let width = 250;
+                let height = 70;
+                let margin = {
+                    top: 10,
+                    right: 0,
+                    bottom: 30,
+                    left: 0,
+                };
+                let inner_width = width - margin.left - margin.right;
+                let inner_height = height - margin.bottom - margin.top;
+
+                let svg = selection
+                    .append("svg")
+                    .attr("width", width)
+                    .attr("height", height)
+                    .append("g")
+                    .attr(
+                        "transform",
+                        "translate(" + margin.left + "," + margin.top + ")",
+                    );
+
+                data = data.sort(d3.ascending);
+
+                let min = d3.min(data);
+                let max = d3.max(data);
+                let range = max - min;
+                let offset = Math.max(range * 0.1, 1);
+
+                let irq =
+                    (d3.quantile(data, 0.75) - d3.quantile(data, 0.25)) / 1.349;
+                let sigma = Math.min(d3.deviation(data), irq);
+                let est_h = 0.5 * sigma * data.length ** -0.2;
+                est_h = est_h != est_h ? 0.2 : est_h;
+
+                let x = d3
+                    .scaleLinear()
+                    .domain([min - offset, max + offset])
+                    .range([0, inner_width]);
+                svg.append("g")
+                    .attr("transform", "translate(0," + inner_height + ")")
+                    .call(d3.axisBottom(x).ticks(5));
+
+                let kde = kernelDensityEstimator(
+                    gaussianKernel(est_h),
+                    x.ticks(100),
+                );
+                let kde_result = kde(data);
+                let kde_max = d3.max(kde_result.map((v) => v[1]));
+
+                let y = d3
+                    .scaleLinear()
+                    .domain([0, kde_max])
+                    .range([inner_height, 0]);
+
+                svg.append("path")
+                    .datum(kde_result)
+                    .style("stroke", "none")
+                    .style("fill", "#69b3a2")
+                    .attr(
+                        "d",
+                        d3
+                            .area()
                             .x((d) => x(d[0]))
                             .y1((d) => y(d[1]))
                             .y0((d) => y(0))
-                            .curve(d3.curveCatmullRom)
-                        );
-                })
+                            .curve(d3.curveCatmullRom),
+                    );
+            }
+
+            renderHeader();
+            renderData();
         </script>
     </head>
     <body>
         <h1>PAGE_TITLE</h1>
-        <br>
+        <br />
         <a href="index.html">&lt Back</a>
-        <br>
-        TABLE_TARGET
+        <br />
+        <div style="display: flex; gap: 2em; padding: 2em 0.5em 0.25em 0.5em">
+            <input type="button" id="prior-page" value="Prior Page" />
+            <div id="page-indicator"></div>
+            <input type="button" id="next-page" value="Next Page" />
+        </div>
+        <table id="table-target">
+            <thead>
+                <tr></tr>
+            </thead>
+            <tbody></tbody>
+        </table>
     </body>
-</html>
\ No newline at end of file
+</html>
diff --git a/scripts/plot_consensus_distance_caf.py b/scripts/plot_consensus_distance_caf.py
new file mode 100644
index 0000000..18d4937
--- /dev/null
+++ b/scripts/plot_consensus_distance_caf.py
@@ -0,0 +1,44 @@
+import sys
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+caf_file = sys.argv[1]
+
+gap_info = {}
+prior = {}
+
+with open(caf_file, "r") as f:
+    for line in f:
+        tokens = line.strip().split(",")
+        name = tokens[8].strip()
+        qstart = int(tokens[10])
+        qend = int(tokens[11])
+        tstart = int(tokens[5])
+        is_neg_strand = int(tokens[13])
+
+        if name in prior:
+            pstart, pend, p_is_neg, p_tstart = prior[name]
+
+            if tstart - p_tstart < 10000:
+                if name not in gap_info:
+                    gap_info[name] = []
+
+                if not is_neg_strand and not p_is_neg:
+                    gap = qstart - pend
+                elif is_neg_strand and p_is_neg:
+                    gap = qend - pstart
+                else:
+                    continue
+
+                gap_info[name].append(gap)
+
+        prior[name] = (qstart, qend, is_neg_strand, tstart)
+
+
+for gap_name, gap_vals in sorted(
+    gap_info.items(), key=lambda k: len(k[1]), reverse=True
+):
+    plt.title(gap_name)
+    plt.hist(gap_vals, 500)
+    plt.show()
diff --git a/scripts/plot_consensus_distance_repeatmasker.py b/scripts/plot_consensus_distance_repeatmasker.py
new file mode 100644
index 0000000..23dab81
--- /dev/null
+++ b/scripts/plot_consensus_distance_repeatmasker.py
@@ -0,0 +1,191 @@
+# This file plots histograms of join distance per tandem repeat family given a repeatmasker formatted bed file as an argument.
+
+import sys
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+caf_file = sys.argv[1]
+
+gap_info = {}
+length_estimate = {}
+sequences = []
+
+
+def get_gap(
+    pstart,
+    pend,
+    qstart,
+    qend,
+    qremaining,
+    ppos,
+    is_pos,
+    check_valid: bool = True,
+):
+    c_len = max(qremaining + qend, qremaining + qstart)
+    try:
+        if is_pos and ppos:
+            if check_valid:
+                assert pstart <= qstart <= qend and pstart <= pend <= qend
+            gap = qstart - pend
+            gap /= c_len
+        elif not is_pos and not ppos:
+            if check_valid:
+                assert qstart <= qend <= pend and qstart <= pstart <= pend
+            gap = pstart - qend
+            gap /= c_len
+        else:
+            gap = None
+    except AssertionError:
+        if is_pos and ppos:
+            print(f"Violation: {pstart} => {pend} => {qstart} => {qend}")
+        else:
+            print(f"Violation: {pend} <= {pstart} <= {qend} <= {qstart}")
+
+        print(f"Offending annotation: {' '.join(line.split('\t')[:-1])}")
+        print(f"Offending sequences:\n\t {'\n\t'.join(seqs)}")
+        gap = None
+
+    return gap
+
+
+with open(caf_file, "r") as f:
+    for line in f:
+        shared_name = line.strip().split("\t")[3]  # .split("#")[-1]
+        seqs = line.strip().split("\t")[-1].split(",")
+        sequences.extend(seqs)
+
+        if len(seqs) <= 1:
+            continue
+
+        # Sort by location on the target...
+        seqs = sorted(seqs, key=lambda k: int(k.split(" ")[5]))
+
+        prior = None
+        length = 0
+
+        for seq in seqs:
+            tokens = seq.split(" ")
+            name = f"{tokens[9]}#{tokens[10]}"
+            is_pos = tokens[8] == "+"
+            join_id = int(tokens[14])
+
+            if is_pos:
+                qstart = int(tokens[11])
+                qend = int(tokens[12])
+                qremaining = int(tokens[13].strip("()"))
+            else:
+                qstart = int(tokens[13])
+                qend = int(tokens[12])
+                qremaining = int(tokens[11].strip("()"))
+
+            assert qend >= qstart
+            length += qend - qstart
+
+            if prior is not None:
+                pname, ppos, pstart, pend = prior
+                if pname != name:
+                    gap = None
+                else:
+                    gap = get_gap(pstart, pend, qstart, qend, qremaining, ppos, is_pos)
+
+                if gap is not None:
+                    if shared_name not in gap_info:
+                        gap_info[shared_name] = []
+
+                    if gap > 2:
+                        print(
+                            f"Gap > 2, Gap Value {gap}: {seq}\n\t {'\n\t'.join(seqs)}"
+                        )
+                    gap_info[shared_name].append(gap)
+
+            prior = (name, is_pos, qstart, qend)
+
+        old_length_est = length_estimate.get(shared_name, (0, 0, 0))
+        length_estimate[shared_name] = (
+            old_length_est[0] + length,
+            old_length_est[1] + 1,
+            max(old_length_est[2], length),
+        )
+
+
+sequences.sort(key=lambda seq: int(seq.split(" ")[5]))
+gap_nojoin_info = {}
+other_priors = {}
+target_nojoin_info = {}
+
+for seq in sequences:
+    tokens = seq.split(" ")
+    name = f"{tokens[9]}#{tokens[10]}"
+    is_pos = tokens[8] == "+"
+    join_id = int(tokens[14])
+    tstart = int(tokens[5])
+
+    if is_pos:
+        qstart = int(tokens[11])
+        qend = int(tokens[12])
+        qremaining = int(tokens[13].strip("()"))
+    else:
+        qstart = int(tokens[13])
+        qend = int(tokens[12])
+        qremaining = int(tokens[11].strip("()"))
+
+    random_prior = other_priors.get(name, None)
+    if random_prior is not None:
+        ppos, pstart, pend, pjoin_id, p_tstart = random_prior
+        if pjoin_id == join_id:
+            gap = None
+        else:
+            gap = get_gap(pstart, pend, qstart, qend, qremaining, ppos, is_pos, False)
+
+        target_gap = tstart - p_tstart
+
+        if gap is not None:
+            if name not in target_nojoin_info:
+                target_nojoin_info[name] = []
+            target_nojoin_info[name].append(target_gap)
+
+            if name not in gap_nojoin_info:
+                gap_nojoin_info[name] = []
+            gap_nojoin_info[name].append(gap)
+
+    other_priors[name] = (is_pos, qstart, qend, join_id, tstart)
+
+
+for gap_name, gap_join_vals in sorted(
+    gap_info.items(), key=lambda k: len(k[1]), reverse=True
+):
+    gap_join_vals = np.array(gap_join_vals)
+    # gap_vals = gap_vals[np.abs(gap_vals) > 1]
+
+    avg_len = length_estimate[gap_name][0] / length_estimate[gap_name][1]
+    max_len = length_estimate[gap_name][2]
+
+    avg_pos_d = np.mean(gap_join_vals[gap_join_vals >= 0])
+    avg_neg_d = np.mean(gap_join_vals[gap_join_vals <= 0])
+
+    info = f"Positive Avg: {avg_pos_d:.02f},\n Negative Avg: {avg_neg_d:.02f}, ({avg_len / avg_pos_d:.02f}, {max_len / avg_pos_d:.02f})"
+
+    range = (0, np.max(gap_join_vals))
+    x = np.arange(*range)
+    lamb = 1 / avg_pos_d
+
+    fig, (ax1, ax2, ax3) = plt.subplots(1, 3, squeeze=True)
+    ax1.set_title(f"{gap_name} (Avg Length: {avg_len:.02f}, Max Length: {max_len})")
+
+    ax1.hist(gap_join_vals, 50, label=info)
+    # plt.plot(x, lamb * np.exp(-lamb * x))
+    ax1.legend()
+
+    ax2.set_title(f"{gap_name} Non-Join Consensus Gaps")
+    ax2.hist(gap_nojoin_info[gap_name], 50)
+
+    ax3.set_title(f"{gap_name} Scatterplot")
+    ax3.scatter(target_nojoin_info[gap_name], gap_nojoin_info[gap_name])
+    ax3.set_xlabel("Target Distance")
+    ax3.set_ylabel("Consensus Distance")
+
+    w, h = fig.get_size_inches()
+    fig.set_size_inches(w * 3, h)
+    fig.tight_layout()
+    plt.show()
diff --git a/scripts/plot_distributions_aurora.py b/scripts/plot_distributions_aurora.py
new file mode 100644
index 0000000..dfe8961
--- /dev/null
+++ b/scripts/plot_distributions_aurora.py
@@ -0,0 +1,434 @@
+import sys
+import typing
+from dataclasses import dataclass, fields
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+import numpy as np
+from matplotlib.colors import to_rgba
+from scipy.optimize import curve_fit, minimize
+from scipy.stats import (
+    ecdf,
+    expon,
+    genpareto,
+    gumbel_r,
+    halfnorm,
+    invweibull,
+    laplace_asymmetric,
+    norm,
+    weibull_min,
+)
+
+
+@dataclass
+class AuroraEntry:
+    annotation_count: int
+    target: str
+    target_start: list[int]
+    target_end: list[int]
+    query: list[str]
+    query_start: list[int]
+    query_end: list[int]
+    strand: list[str]
+    score: float
+    kimera80: list[float]
+    join_id: int
+    region: int
+
+    @classmethod
+    def from_line(cls, line: str) -> typing.Self:
+        parts = line.strip().split()
+
+        parsed = []
+        max_list_len = 0
+
+        for field, part in zip(fields(cls), parts):
+            if isinstance(field.type, str):
+                raise ValueError("Can't parse strings!")
+
+            origin = typing.get_origin(field.type)
+            if origin is not None and issubclass(origin, list):
+                tp = typing.get_args(field.type)[0]
+                parsed.append([tp(v) for v in part.split(",")])
+                max_list_len = max(max_list_len, len(parsed[-1]))
+            else:
+                parsed.append(field.type(part))
+
+        if max_list_len > 0:
+            parsed_new = []
+
+            for v in parsed:
+                if isinstance(v, list):
+                    if len(v) == max_list_len:
+                        parsed_new.append(v)
+                    elif len(v) == 1:
+                        parsed_new.append(v * max_list_len)
+                    else:
+                        raise ValueError(
+                            f"One of the elements has and invalid length: {v}"
+                        )
+                else:
+                    parsed_new.append(v)
+
+            parsed = parsed_new
+
+        return cls(*parsed)
+
+    def get_key(self) -> tuple[int, int]:
+        return (self.region, self.join_id)
+
+    def select(self, idx: int) -> typing.Self:
+        return type(self)(
+            self.annotation_count,
+            self.target,
+            [self.target_start[idx]],
+            [self.target_end[idx]],
+            [self.query[idx]],
+            [self.query_start[idx]],
+            [self.query_end[idx]],
+            [self.strand[idx]],
+            self.score,
+            [self.kimera80[idx]],
+            self.join_id,
+            self.region,
+        )
+
+
+if len(sys.argv) not in [2, 3]:
+    print("Usage:")
+    print(f"\t{Path(sys.argv[0]).name} AURORA_OUTPUT_FILE [dist|scatter]")
+    sys.exit(1)
+
+aurora_file = sys.argv[1]
+mode = sys.argv[2] if len(sys.argv) > 2 else "dist"
+
+if mode not in ["scatter", "dist"]:
+    print("Second argument (the mode) must be 'scatter' or 'dist'!")
+    sys.exit(1)
+
+if mode == "dist":
+    print("Generating distributions plots...")
+else:
+    print("Generating scatter plots...")
+
+joined_annots: dict[tuple[int, int], list[AuroraEntry]] = {}
+
+with open(aurora_file, "r") as f:
+    for line in f:
+        entry = AuroraEntry.from_line(line)
+        key = entry.get_key()
+        if key not in joined_annots:
+            joined_annots[key] = []
+        joined_annots[key].append(entry)
+
+for key, annots in joined_annots.items():
+    annots.sort(key=lambda k: min(k.target_start))
+
+
+random_stats = {}
+join_stats = {}
+prior_vals = {}
+seq_size = {}
+
+
+def consensus_dist(a: AuroraEntry, b: AuroraEntry, a_idx: int, b_idx: int):
+    def best(*a):
+        return min(a, key=lambda v: abs(v))
+
+    match (a.strand[a_idx], b.strand[b_idx]):
+        case ("+", "+"):
+            return b.query_start[b_idx] - a.query_end[a_idx] - 1
+        case ("-", "-"):
+            return a.query_end[a_idx] - b.query_start[b_idx] - 1
+        case ("-", "+"):
+            return None
+            return best(
+                a.query_start[a_idx] - b.query_start[b_idx] - 1,
+                b.query_end[b_idx] - a.query_end[a_idx] - 1,
+            )
+        case ("+", "-"):
+            return None
+            return best(
+                b.query_start[b_idx] - a.query_start[a_idx] - 1,
+                a.query_end[a_idx] - b.query_end[b_idx] - 1,
+            )
+        case _:
+            raise ValueError("Unknown stand configuration.")
+
+
+def _target_distance(a: AuroraEntry, b: AuroraEntry, ai: int, bi: int):
+    assert b.target_start[bi] - a.target_end[ai] - 1 >= 0
+    return b.target_start[bi] - a.target_end[ai] - 1
+
+
+def _kimura_dist(a, b, ai, bi):
+    return abs(b.kimera80[bi] - a.kimera80[ai])
+
+
+def _relative_consensus_dist(a, b, ai, bi):
+    d = consensus_dist(a, b, ai, bi)
+    sum_seq_len = max(
+        [
+            abs(v)
+            for v in (
+                a.query_end[ai] - a.query_start[ai],
+                b.query_end[bi] - b.query_start[bi],
+            )
+        ]
+    )
+    if d is None:
+        return None
+    try:
+        return d / sum_seq_len
+    except ZeroDivisionError:
+        return None
+
+
+stats_to_compute = {
+    "Target Distance": _target_distance,
+    "Divergence Change": _kimura_dist,
+    "Relative Consensus Distance": _relative_consensus_dist,
+}
+
+
+class Distribution:
+    def __init__(self, dist, defaults, exclude_location=True):
+        self._dist = dist
+        self.DEFAULTS = list(defaults)
+        self._excl_loc = exclude_location
+        self.NAME = getattr(
+            dist, "__name__", getattr(type(dist), "__qualname__", repr(dist))
+        )
+
+    def pdf(self, x, *args):
+        # print(*args)
+        if self._excl_loc:
+            return self._dist.pdf(x, *args[:-1], 0.0, args[-1])
+        else:
+            return self._dist.pdf(x, *args)
+
+    def cdf(self, x, *args):
+        if self._excl_loc:
+            return self._dist.cdf(x, *args[:-1], 0.0, args[-1])
+        else:
+            return self._dist.cdf(x, *args)
+
+    def logcdf(self, x, *args):
+        print(*args)
+        if self._excl_loc:
+            return self._dist.logcdf(x, *args[:-1], 0.0, args[-1])
+        else:
+            return self._dist.logcdf(x, *args)
+
+
+estimator = {
+    "Relative Consensus Distance": Distribution(
+        invweibull, (1.0, 0.0, 1.0), False
+    ),  # Distribution(invweibull, (1.0, 0.0, 1.0), False), Distribution(laplace_asymmetric, (1.0, 0.0, 1.0), False)
+    "Target Distance": Distribution(
+        genpareto, (0.0, 1.0)
+    ),  # Distribution(expon, (1.0,)),  # Distribution(genpareto, (0.0, 1.0)), Distribution(weibull_min, (1.0, 10000)
+    "Divergence Change": Distribution(halfnorm, (1.0,)),
+}
+
+
+def fit_dist(data, dist):
+    emp_cdf = ecdf(data).cdf
+
+    return curve_fit(
+        dist.cdf,
+        emp_cdf.quantiles,
+        emp_cdf.probabilities,
+        dist.DEFAULTS,
+        full_output=True,
+    )[0]
+
+
+random_idx_reference = {}
+random_is_join = {}
+
+for name in stats_to_compute:
+    join_stats[name] = {}
+    random_stats[name] = {}
+
+all_anots_flat = [ann for annots in joined_annots.values() for ann in annots]
+all_anots_flat.sort(key=lambda v: min(v.target_start))
+
+
+for ann_i, ann in enumerate(all_anots_flat):
+    for i in range(len(ann.query)):
+        name = ann.query[i]
+        (pann, j, pann_i) = prior_vals.get(name, (None, None, None))
+        if pann is not None:
+            # if pann.region == ann.region and pann.join_id == ann.join_id:
+            #    continue
+
+            (ann1, idx1), (ann2, idx2) = sorted(
+                [(pann, j), (ann, i)], key=lambda v: v[0].target_start[v[1]]
+            )
+
+            stats = {
+                stat_name: stats_to_compute[stat_name](ann1, ann2, idx1, idx2)
+                for stat_name in stats_to_compute
+            }
+
+            if all([v is not None for v in stats.values()]):
+                for stat_name, value in stats.items():
+                    values_per_query = random_stats[stat_name]
+
+                    if name not in values_per_query:
+                        values_per_query[name] = []
+                    values_per_query[name].append(value)
+
+                if name not in random_idx_reference:
+                    random_idx_reference[name] = []
+                    random_is_join[name] = []
+
+                random_idx_reference[name].append((ann_i, i, pann_i, j))
+                random_is_join[name].append(
+                    ann in joined_annots.get(pann.get_key(), [])
+                )
+
+        seq_size[name] = max(
+            seq_size.get(name, 1),
+            ann.query_start[i],
+            ann.query_end[i],
+        )
+        prior_vals[name] = (ann, i, ann_i)
+
+for k, annots in joined_annots.items():
+    if len(annots) <= 1:
+        continue
+
+    for a, b in zip(annots[:-1], annots[1:]):
+        for i in range(len(a.query)):
+            name = a.query[i]
+
+            stats = {
+                stat_name: stats_to_compute[stat_name](a, b, i, i)
+                for stat_name in stats_to_compute
+            }
+
+            if all([v is not None for v in stats.values()]):
+                for stat_name, value in stats.items():
+                    values_per_query = join_stats[stat_name]
+
+                    if name not in values_per_query:
+                        values_per_query[name] = []
+                    values_per_query[name].append(value)
+
+
+for query_name, _ in sorted(
+    next(iter(join_stats.values())).items(), key=lambda k: -len(k[1])
+):
+    # if not query_name.startswith("sin"):
+    #    continue
+    join_indexes = np.flatnonzero(random_is_join[query_name])
+    not_join_indexes = np.flatnonzero(~np.array(random_is_join[query_name]))
+
+    if mode == "dist":
+        fig, axs = plt.subplots(3, len(stats_to_compute))
+        axs = axs.T
+
+        fig.suptitle(f"{query_name} (Size: {seq_size.get(query_name, 0)})")
+
+        for name, (ax1, ax2, ax3) in zip(stats_to_compute, axs):
+            est = estimator[name]
+
+            join_samples = np.array(join_stats[name][query_name])
+            sx = np.linspace(join_samples.min(), join_samples.max(), 1000)
+            fit = fit_dist(join_samples, est)
+            ax1.set_title(f"Join {name}")
+            ax1.hist(
+                join_samples,
+                200,
+                density=True,
+                label=f"Mean: {np.mean(join_samples):.02f}\nSTD: {np.std(join_samples):.02f}",
+            )
+            ax1.plot(
+                sx,
+                est.pdf(sx, *fit),
+                label=f"Fit: {', '.join(f'{v:.02f}' for v in fit)}",
+            )
+            ax1.legend(fontsize="xx-small")
+
+            random_samples = np.array(random_stats[name][query_name])
+            sx2 = np.linspace(random_samples.min(), random_samples.max(), 1000)
+            fit2 = fit_dist(random_samples[not_join_indexes], est)
+            ax2.set_title(f"All {name}")
+            ax2.plot(
+                [0],
+                [0],
+                color="black",
+                visible=False,
+                label=f"Mean: {np.mean(random_samples):.02f}\nSTD: {np.std(random_samples):.02f}",
+            )
+            ax2.hist(
+                [random_samples[not_join_indexes], random_samples[join_indexes]],
+                200,
+                label=["Not Joined", "Joined"],
+                density=True,
+                stacked=True,
+                color=[to_rgba(c) for c in ["tab:orange", "tab:blue"]],
+            )
+            ax2.plot(
+                sx2,
+                est.pdf(sx2, *fit2),
+                "black",
+                label=f"Fit: {', '.join(f'{v:.02f}' for v in fit2)}",
+            )
+            ax2.legend(fontsize="xx-small")
+
+            ax3.set_title("CDFs")
+            ax3.ecdf(join_samples, label="Joins CDF")
+            ax3.ecdf(random_samples[not_join_indexes], label="No Joins CDF")
+            ax3.plot(sx, est.cdf(sx, *fit), label="Est. Join CDF")
+            ax3.plot(sx2, est.cdf(sx2, *fit2), label="Est. No Join CDF")
+            ax3.legend(fontsize="xx-small")
+
+        fig.set_size_inches(16, 8)
+        fig.tight_layout()
+        plt.show()
+    else:
+        plt.title(f"{query_name} (Size: {seq_size.get(query_name, 0)})")
+
+        join_art = plt.plot(
+            np.array(random_stats["Target Distance"][query_name])[join_indexes],
+            np.array(random_stats["Consensus Distance"][query_name])[join_indexes],
+            "ro",
+            picker=5,
+            label="Joins",
+        )
+        no_join_art = plt.plot(
+            np.array(random_stats["Target Distance"][query_name])[not_join_indexes],
+            np.array(random_stats["Consensus Distance"][query_name])[not_join_indexes],
+            "bo",
+            picker=5,
+            label="Not Joins",
+        )
+        plt.xlabel("Target Distance")
+        plt.ylabel("Consensus Distance")
+        plt.legend()
+        fig = plt.gcf()
+
+        def on_pick(evt):
+            mask = join_indexes if evt.artist == join_art else not_join_indexes
+
+            for idx in evt.ind:
+                idx = mask[idx]
+                annot_idx, sub_i, pann_idx, p_sub_i = random_idx_reference[query_name][
+                    idx
+                ]
+                print(f"Index: {annot_idx}, Sub-Index: {sub_i}")
+                print(
+                    f"\tTarget Distance: {random_stats['Target Distance'][query_name][idx]}"
+                )
+                print(
+                    f"\tConsensus Distance: {random_stats['Consensus Distance'][query_name][idx]}"
+                )
+                print(f"\tPrior: {all_anots_flat[pann_idx].select(p_sub_i)}")
+                print(f"\tCurrent: {all_anots_flat[annot_idx].select(sub_i)}")
+                print(f"\tIs Joined: {random_is_join[query_name][idx]}")
+
+        fig.canvas.mpl_connect("pick_event", on_pick)
+        plt.show()
diff --git a/scripts/plot_target_distance_distribution.py b/scripts/plot_target_distance_distribution.py
new file mode 100755
index 0000000..c107e36
--- /dev/null
+++ b/scripts/plot_target_distance_distribution.py
@@ -0,0 +1,200 @@
+# Plots distributions of distances between sequences of the same family in the Genome
+# Takes a single bed file as an argument.
+# Results vary between exponential and power-law depending on the family. Basically all look exponential with fatter tails for a good chunk of families.
+#
+# Most likely, based on results on understanding the genome (and shape!), the actual distributions are very likely Hyper-Exponential Distributions.
+# Basically, it means each family is sampled from one of a weighted set of exponential distributions with different rates.
+# This would make sense as TE's have different rates depending on what part of the genome your in (basically, there are functional domains or regions)
+#
+# Testing this though, would require an implementation of Prony's method to determine the fit. I wasn't able to find any implementations so this
+# and it seems implementing such a method would take quite some time. It may be worth eventually adding if better fit's are needed.
+#
+# For now, an exponential distribution seems to provide a good enough approximation for use in aurora. It also is easy to fit well.
+# Here's an interesting paper on the topic that seems to have landed in the same space I've been in: https://www.columbia.edu/~ww2040/FittingMixturesPerfEval98.pdf
+
+bed_file = sys.argv[1]
+
+seq_info = {}
+cs = np.inf
+ce = -np.inf
+
+with open(bed_file, "r") as f:
+    for line in f:
+        tokens = line.strip().split()
+        start = int(tokens[1])
+        end = int(tokens[2])
+        join_id = int(tokens[12])
+        name = str(tokens[3])
+
+        if name.endswith("Simple_repeat"):
+            continue
+
+        name = name.upper()
+
+        if name not in seq_info:
+            seq_info[name] = []
+        seq_info[name].append((start, end, join_id))
+
+        cs = min(cs, start)
+        ce = max(ce, end)
+
+
+class Distribution:
+    def __init__(self, dist, defaults, exclude_location=True):
+        self._dist = dist
+        self.DEFAULTS = list(defaults)
+        self._excl_loc = exclude_location
+        self.NAME = getattr(
+            dist, "__name__", getattr(type(dist), "__qualname__", repr(dist))
+        )
+
+    def pdf(self, x, *args):
+        # print(*args)
+        if self._excl_loc:
+            return self._dist.pdf(x, *args[:-1], 0.0, args[-1])
+        else:
+            return self._dist.pdf(x, *args)
+
+    def cdf(self, x, *args):
+        if self._excl_loc:
+            return self._dist.cdf(x, *args[:-1], 0.0, args[-1])
+        else:
+            return self._dist.cdf(x, *args)
+
+    def logcdf(self, x, *args):
+        print(*args)
+        if self._excl_loc:
+            return self._dist.logcdf(x, *args[:-1], 0.0, args[-1])
+        else:
+            return self._dist.logcdf(x, *args)
+
+
+distributions = [
+    Distribution(expon, (1000.0,)),
+    Distribution(weibull_min, (2.0, 1000.0)),
+    # Distribution(invweibull, (1.0, 20000.0)),
+    Distribution(lomax, (1.0, 1000.0)),
+    # Distribution(burr12, (1.0, 1.0, 1000.0)),
+    # Distribution(lognorm, (1.0, 1000.0)),
+    # Distribution(fisk, (1.0, 1000.0)),
+    Distribution(genpareto, (0.0, 1000.0)),
+    Distribution(betaprime, (1.0, 2.0, 1000.0)),
+]
+
+
+for name, info in sorted(seq_info.items(), key=lambda k: -len(k[1])):
+    # if not name.startswith("CHARLIE1#"):
+    #    continue
+
+    info = np.array(info)
+    print(info.shape)
+    info = info[np.argsort(info[:, 0])]
+    starts = info[:, 0]
+    ends = info[:, 1]
+
+    # Get distances between consecutive sequences...
+    _, indexes = np.unique(info[:, 2], return_index=True)
+    indexes = np.sort(indexes)
+
+    dists = np.diff(starts[indexes])
+
+    counts, bins = np.histogram(dists, "auto", density=True)
+    x = (bins[1:] + bins[:-1]) / 2
+
+    # Get fit for exponential dist...
+    beta = np.mean(dists)  # np.median(dists) / np.log(2)
+
+    # Get fit for weibull dist... (and CDF)...
+
+    valid_values = np.isfinite(wcdfx) & np.isfinite(wcdfy)
+    fit_line = curve_fit(
+        lambda x, m, b: m * x + b,
+        wcdfx[valid_values],
+        wcdfy[valid_values],
+        sigma=wcdfy_err[valid_values],
+    )[0]
+
+    wk = fit_line[0]
+    w_beta = np.exp(-fit_line[1] / wk)
+
+    cdf_fits = [
+        curve_fit(
+            distrib.cdf,
+            cdf.quantiles,
+            cdf.probabilities,
+            distrib.DEFAULTS,
+            full_output=True,
+        )[0]
+        for distrib in distributions
+    ]
+    # print(cdf_fits)
+    # raise ValueError
+
+    # Printout results...
+    print(f"Count: {starts.shape[0]}")
+    print(f"Genome Size: {ce - cs}")
+    est_scale = (ce - cs) / starts.shape[0]
+    print(f"Avg Occurance Rate (in nucleotides): {est_scale}")
+    print(f"Exponential Fit: Scale (beta): {beta}, Rate (lambda): {1 / beta}")
+    print(
+        f"Weibull Line Fit: Shape (k): {wk}, Scale (beta): {w_beta}, Rate (lambda): {1 / w_beta}"
+    )
+    with np.printoptions(suppress=True):
+        for distrib, fit in zip(distributions, cdf_fits):
+            print(f"{distrib.NAME} CDF Fit: {fit}")
+
+    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2)
+    fig.suptitle(name)
+
+    ax1.set_title("Histogram and Predicted PDFs")
+    ax1.set_ylabel("Density")
+    ax1.set_xlabel("Distance (nucleotides)")
+    ax1.stairs(counts, bins, fill=True, color="tan", label="Histogram")
+    ax1.plot(
+        x, weibull_min.pdf(x, wk, 0.0, w_beta), color="orange", label="Weibull Line Fit"
+    )
+    ax1.plot(x, expon.pdf(x, 0.0, est_scale), color="green", label="Naive Fit Exp")
+    for distrib, fit in zip(distributions, cdf_fits):
+        ax1.plot(x, distrib.pdf(x, *fit), label=f"{distrib.NAME} CDF Fit")
+    ax1.legend()
+
+    ax2.set_title("Weibull-Space of EDF")
+    ax2.set_ylabel(r"$ \ln(x) $")
+    ax2.set_xlabel(r"$ \ln(-\ln(1 - F(x))) $")
+    ax2.scatter(wcdfx, wcdfy, label="Empirical Distribution Function")
+    ax2.plot(
+        wcdfx,
+        fit_line[1] + fit_line[0] * wcdfx,
+        label=f"Line: {fit_line[0]:.02f}x + {fit_line[1]:.02f}",
+    )
+    ax2.legend()
+
+    ax3.set_title("Cumulative Distribution Functions")
+    ax3.set_ylabel(r"$ P(X <= x) $")
+    ax3.set_xlabel("Distance (nucleotides)")
+    ax3.scatter(cdf.quantiles, cdf.probabilities, color="tan", label="EDF")
+    ax3.plot(
+        cdf.quantiles,
+        weibull_min.cdf(cdf.quantiles, wk, 0.0, w_beta),
+        color="orange",
+        label="Weibull Line Fit",
+    )
+    ax3.plot(
+        cdf.quantiles,
+        expon.cdf(cdf.quantiles, 0.0, est_scale),
+        color="green",
+        label="Naive Fit Exp",
+    )
+    for distrib, fit in zip(distributions, cdf_fits):
+        ax3.plot(
+            cdf.quantiles,
+            distrib.cdf(cdf.quantiles, *fit),
+            label=f"{distrib.NAME} CDF Fit",
+        )
+    ax3.legend()
+
+    ax4.plot(cdf.quantiles, cdf.probabilities)
+
+    fig.set_size_inches(12, 12)
+    fig.tight_layout()
+    plt.show()
diff --git a/src/alignment.rs b/src/alignment.rs
index d0d073c..a99580e 100644
--- a/src/alignment.rs
+++ b/src/alignment.rs
@@ -1,4 +1,5 @@
-use anyhow::Result;
+use anyhow::{anyhow, Context, Result};
+use itertools::Itertools;
 use serde::Deserialize;
 use std::collections::HashMap;
 use std::io::{BufRead, BufReader, Read};
@@ -520,7 +521,7 @@ impl AlignmentData {
         //   16: ACCT/GGA/TCT/CGTGGCCT/CGGGGGTTGGGGACCCCTG
         //   17: 14p41g.matrix
 
-        let substitution_matrices = VecMap::from(SubstitutionMatrix::parse(matrices));
+        let substitution_matrices = VecMap::from(SubstitutionMatrix::parse(matrices)?);
 
         let mut target_groups: Vec<TargetGroup> = vec![];
         let mut target_name_map: VecMap<String> = VecMap::new();
@@ -531,31 +532,47 @@ impl AlignmentData {
         let caf_lines = BufReader::new(caf).lines();
 
         caf_lines
-            .map(|l| l.expect("failed to read line"))
-            .filter(|l| !l.is_empty())
+            .filter_ok(|l| !l.is_empty())
             .enumerate()
-            .for_each(|(line_num, line)| {
+            .try_for_each(|(line_num, line_unchecked)| {
+                let error_msg =
+                    |msg, col| move || format!("{} at line '{}', column '{}'", msg, line_num, col);
+
+                let error_msg_str = |msg: String, col: usize| {
+                    move || format!("{} at line '{}', column '{}'", msg, line_num, col)
+                };
+
+                let line = line_unchecked?;
                 let tokens: Vec<&str> = line.split(',').collect();
 
+                if tokens.len() < 18 {
+                    return Err(anyhow!(
+                        "line {} does not have at least 18 columns!",
+                        line_num
+                    ));
+                }
+
                 let target_name = tokens[4].to_string();
-                let target_start =
-                    str::parse::<usize>(tokens[5]).expect("failed to parse target start");
-                let target_end =
-                    str::parse::<usize>(tokens[6]).expect("failed to parse target end");
+                let target_start = str::parse::<usize>(tokens[5])
+                    .with_context(error_msg("failed to parse target start", 5))?;
+                let target_end = str::parse::<usize>(tokens[6])
+                    .with_context(error_msg("failed to parse target end", 6))?;
                 let query_name = tokens[8].to_string();
-                let query_start =
-                    str::parse::<usize>(tokens[10]).expect("failed to parse query start");
-                let query_end = str::parse::<usize>(tokens[11]).expect("failed to parse query end");
-                let query_remaining =
-                    str::parse::<usize>(tokens[12]).expect("failed to parse query remaining");
+                let query_start = str::parse::<usize>(tokens[10])
+                    .with_context(error_msg("failed to parse query start", 10))?;
+                let query_end = str::parse::<usize>(tokens[11])
+                    .with_context(error_msg("failed to parse query end", 11))?;
+                let query_remaining = str::parse::<usize>(tokens[12])
+                    .with_context(error_msg("failed to parse query remaining", 12))?;
 
                 let strand = match tokens[13] {
-                    "0" => Strand::Forward,
-                    "1" => Strand::Reverse,
-                    _ => {
-                        panic!()
-                    }
-                };
+                    "0" => Ok(Strand::Forward),
+                    "1" => Ok(Strand::Reverse),
+                    v => Err(anyhow!(error_msg_str(
+                        format!("invalid strand value: '{}'", v),
+                        13
+                    )())),
+                }?;
 
                 let (query_start, query_end) = match strand {
                     Strand::Forward => (query_start, query_end),
@@ -595,7 +612,10 @@ impl AlignmentData {
                     .values()
                     .enumerate()
                     .find(|(_, m)| m.name == substitution_matrix_name)
-                    .expect("unknown substitution matrix")
+                    .with_context(error_msg_str(
+                        format!("unknown substitution matrix '{}'", substitution_matrix_name),
+                        17,
+                    ))?
                     .0;
 
                 target_group.alignments.push(Alignment {
@@ -610,7 +630,9 @@ impl AlignmentData {
                     query_id,
                     substitution_matrix_id,
                 });
-            });
+
+                Ok(())
+            })?;
 
         if let Some(buf) = ultra {
             let buf_reader = BufReader::new(buf);
diff --git a/src/annotation.rs b/src/annotation.rs
index c904a57..0c97aba 100644
--- a/src/annotation.rs
+++ b/src/annotation.rs
@@ -108,7 +108,12 @@ fn get_strings(simple_annotations: &[SimpleAnnotation], simplify: bool) -> [Stri
     [
         get_mutli_option_string(simple_annotations, |v| v.target_start, simplify),
         get_mutli_option_string(simple_annotations, |v| v.target_end, simplify),
-        get_mutli_option_string(simple_annotations, |v| v.query_name.clone(), simplify),
+        get_mutli_option_string(
+            simple_annotations,
+            // Remove spaces as it breaks the format...
+            |v| v.query_name.replace(" ", "-"),
+            simplify,
+        ),
         get_mutli_option_string(simple_annotations, |v| v.query_start, simplify),
         get_mutli_option_string(simple_annotations, |v| v.query_end, simplify),
         get_mutli_option_string(simple_annotations, |v| v.strand, simplify),
@@ -127,7 +132,8 @@ impl AmbiguousAnnotation {
         format!(
             "{:w0$} {:w1$} {:w2$} {:w3$} {:w4$} {:w5$} {:w6$} {:w7$} {:4.3} {:w8$} {:w9$} {}",
             self.annotations.len(),
-            self.target_name,
+            // Remove spaces as it breaks the format...
+            self.target_name.replace(" ", "-"),
             ts,
             te,
             qn,
diff --git a/src/assembly.rs b/src/assembly.rs
index 527e08f..0c0c166 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -4,8 +4,10 @@ use itertools::Itertools;
 
 use crate::{
     alignment::{Alignment, Strand},
-    score_params::ScoreParams,
-    segments::SegmentedMatrix,
+    chunks::ProximityGroup,
+    join_estimation::{JoinEstimator, JoinStatisticsCollector, LinkInfo},
+    segments::{Block, InitialSegments, SegmentedMatrix, SegmentedMatrixView},
+    trace_statistics::{calculate_region_statistics, QueryStatistics, RegionStatistics},
     AnnotationArgs,
 };
 
@@ -79,73 +81,280 @@ pub struct Edge {
     pub link_type: LinkType,
 }
 
-fn piecewise_linear_cost(
-    neg_start: f64,
-    pos_start: f64,
-    neg_slope: f64,
-    pos_slope: f64,
-    value: f64,
-) -> f64 {
-    if value < neg_start {
-        (value - neg_start).abs() * neg_slope
-    } else if value > pos_slope {
-        (value - pos_start).abs() * pos_slope
+pub fn block_target_distance(first_block: &Block, second_block: &Block) -> isize {
+    second_block.col_start as isize - first_block.col_end as isize - 1
+}
+
+pub fn block_consensus_distance(first_block: &Block, second_block: &Block) -> (isize, LinkType) {
+    let select_closest = |prop1: (isize, LinkType), prop2: (isize, LinkType)| {
+        if prop1.0.abs() < prop2.0.abs() {
+            prop1
+        } else {
+            prop2
+        }
+    };
+
+    match (first_block.strand, second_block.strand) {
+        (Strand::Forward, Strand::Forward) => (
+            second_block.query_start as isize - first_block.query_end as isize - 1,
+            LinkType::Forward,
+        ),
+        (Strand::Reverse, Strand::Reverse) => (
+            first_block.query_end as isize - second_block.query_start as isize - 1,
+            LinkType::Reverse,
+        ),
+        (Strand::Forward, Strand::Reverse) => select_closest(
+            (
+                first_block.query_start as isize - second_block.query_start as isize - 1,
+                LinkType::FRInversion1,
+            ),
+            (
+                second_block.query_end as isize - first_block.query_end as isize - 1,
+                LinkType::FRInversion2,
+            ),
+        ),
+        (Strand::Reverse, Strand::Forward) => select_closest(
+            (
+                second_block.query_start as isize - first_block.query_start as isize - 1,
+                LinkType::RFInversion1,
+            ),
+            (
+                first_block.query_end as isize - second_block.query_end as isize - 1,
+                LinkType::RFInversion2,
+            ),
+        ),
+        _ => panic!("Invalid strand types!"),
+    }
+}
+
+pub fn block_length_on_query(b: &Block) -> usize {
+    b.query_end.abs_diff(b.query_start) + 1
+}
+
+#[allow(dead_code)]
+pub enum ConsensusDistanceNormalization {
+    Max,
+    Min,
+    Sum,
+    WithLength(usize),
+    WithUBAndLength(usize, usize),
+}
+
+pub fn relative_consensus_distance(
+    first_block: &Block,
+    second_block: &Block,
+    mode: ConsensusDistanceNormalization,
+) -> (f64, LinkType) {
+    let (mut dist, link_type) = block_consensus_distance(first_block, second_block);
+
+    if let ConsensusDistanceNormalization::WithUBAndLength(ub, _length) = mode {
+        dist = if dist > 0 {
+            dist.saturating_sub(ub as isize).max(0)
+        } else {
+            dist
+        }
+    }
+
+    let div = match mode {
+        ConsensusDistanceNormalization::Sum => {
+            block_length_on_query(first_block) + block_length_on_query(second_block)
+        }
+        ConsensusDistanceNormalization::Max => {
+            block_length_on_query(first_block).max(block_length_on_query(second_block))
+        }
+        ConsensusDistanceNormalization::Min => {
+            block_length_on_query(first_block).min(block_length_on_query(second_block))
+        }
+        ConsensusDistanceNormalization::WithLength(length) => length,
+        ConsensusDistanceNormalization::WithUBAndLength(_ub, length) => length,
+    };
+    (dist as f64 / div as f64, link_type)
+}
+
+fn is_joinable(
+    target_distance: isize,
+    consensus_distance: isize,
+    link_type: LinkType,
+    min_block_length: usize,
+    args: &AnnotationArgs,
+) -> bool {
+    let within_target_distance_threshold =
+        target_distance < args.target_join_distance as isize && target_distance >= 0;
+
+    let consensus_is_colinear = if link_type.is_inversion() {
+        consensus_distance.abs() < args.inversion_distance
     } else {
-        0.0
+        consensus_distance > -args.consensus_join_overlap
+            && consensus_distance < args.consensus_join_distance
+    };
+
+    // TODO: Hardcoded, change later...
+    let is_significant =
+        min_block_length >= 10 && -consensus_distance <= ((min_block_length / 2) as isize);
+
+    within_target_distance_threshold && consensus_is_colinear && is_significant
+}
+
+fn new_alignment_to_blocks_map(
+    segments: SegmentedMatrixView,
+    alignments: &[Alignment],
+) -> Vec<Vec<SegmentAndDenseRow>> {
+    let mut alignment_block_map = vec![Vec::<SegmentAndDenseRow>::new(); alignments.len()];
+
+    for (s_idx, segment) in segments.iter().enumerate() {
+        for (b_idx, block) in segment.blocks.iter().enumerate() {
+            if block.row_idx > 0 && block.row_idx <= alignments.len() {
+                alignment_block_map[block.row_idx - 1].push((s_idx, b_idx));
+            }
+        }
     }
+
+    alignment_block_map
+}
+
+fn calculate_unexplained_bases(
+    segments: SegmentedMatrixView,
+    region_statistics: &RegionStatistics,
+    first_block_segment: usize,
+    second_block_segment: usize,
+    second_block_target_start: usize,
+) -> usize {
+    let ub = region_statistics.unexplained_bases[second_block_segment]
+        .abs_diff(region_statistics.unexplained_bases[first_block_segment])
+        + (second_block_target_start - segments[second_block_segment].start_col);
+    ub
 }
 
-fn get_link_cost(
+pub fn gather_join_statistics<T: JoinStatisticsCollector>(
+    group: &ProximityGroup,
+    initial_segments: &InitialSegments,
+    query_lengths: &HashMap<usize, usize>,
     annotation_args: &AnnotationArgs,
-    score_params: &ScoreParams,
-    consensus_gap: f64,
-    target_gap: f64,
-) -> f64 {
-    // Minimum cost (a query loop)
-    let min_value = score_params.query_loop_score;
-    let value_range = (score_params.query_loop_score - score_params.query_jump_score).abs();
-
-    // Get overlap and gap ranges with free areas incorperated in, otherwise math is not quite right.
-    let overlap_range = ((annotation_args.consensus_join_overlap as f64)
-        - (annotation_args.free_join_consensus_overlap as f64))
-        .abs()
-        .max(1.0);
-    let gap_range = ((annotation_args.consensus_join_distance as f64)
-        - (annotation_args.free_join_consensus_gap as f64))
-        .abs()
-        .max(1.0);
-
-    // Compute slopes....
-    let lambda = -value_range
-        * (annotation_args.join_target_gap_penalty
-            / annotation_args.target_join_distance.max(1) as f64)
-            .abs();
-    let alpha =
-        -value_range * (annotation_args.join_consensus_overlap_penalty / overlap_range).abs();
-    let beta = -value_range * (annotation_args.join_consensus_gap_penalty / gap_range).abs();
-
-    // Cost = linear consensus cost + linear target gap cost...
-    min_value
-        + piecewise_linear_cost(
-            -(annotation_args.free_join_consensus_overlap as f64).abs(),
-            (annotation_args.free_join_consensus_gap as f64).abs(),
-            alpha,
-            beta,
-            consensus_gap,
-        )
-        + lambda * target_gap
+) -> Vec<(usize, T)> {
+    let alignments = group.alignments;
+
+    let mut query_ids: Vec<usize> = alignments.iter().map(|a| a.query_id).unique().collect();
+    query_ids.sort();
+
+    let region_stats = calculate_region_statistics(initial_segments);
+    let mut query_stats: Vec<(usize, T)> = Vec::with_capacity(query_ids.len());
+
+    query_ids
+        .iter()
+        // grab the alignments for this ID
+        .map(|id| {
+            (
+                *id,
+                alignments
+                    .iter()
+                    .enumerate()
+                    .filter(|&(_, a)| a.query_id == *id)
+                    .map(|(i, a)| {
+                        let b = Block::from_alignment(a, group.target_start, i, 0.0, 0.0);
+                        let seg_i = initial_segments
+                            .view_segments()
+                            .partition_point(|v| v.start_col <= b.col_start)
+                            .saturating_sub(1);
+                        (seg_i, b)
+                    }),
+            )
+        })
+        .for_each(|(id, compat_alignments)| {
+            let mut new_stats = T::new();
+
+            gather_join_statistics_single_family(
+                compat_alignments,
+                *query_lengths
+                    .get(&id)
+                    .expect("Query length missing for alignment!"),
+                initial_segments.view_segments(),
+                &region_stats,
+                annotation_args,
+                &mut new_stats,
+            );
+
+            query_stats.push((id, new_stats));
+        });
+
+    query_stats
+}
+
+fn link_info(
+    first_block: &Block,
+    second_block: &Block,
+    annotation_args: &AnnotationArgs,
+    unexplained_bases: usize,
+    consensus_length: usize,
+    neighbors: bool,
+) -> LinkInfo {
+    let (consensus_distance, link_type) = block_consensus_distance(first_block, second_block);
+    let joinable = is_joinable(
+        block_target_distance(first_block, second_block),
+        consensus_distance,
+        link_type,
+        block_length_on_query(first_block).min(block_length_on_query(second_block)),
+        annotation_args,
+    );
+
+    LinkInfo {
+        target_distance: block_target_distance(first_block, second_block),
+        consensus_distance,
+        link_type,
+        consensus_length,
+        unexplained_bases,
+        neighbors,
+        joinable,
+    }
+}
+
+fn gather_join_statistics_single_family<'a>(
+    compatable_alignments: impl Iterator<Item = (usize, Block)>,
+    consensus_length: usize,
+    segments: SegmentedMatrixView,
+    region_statistics: &RegionStatistics,
+    args: &AnnotationArgs,
+    join_stats: &mut impl JoinStatisticsCollector,
+) {
+    let compatable_blocks = compatable_alignments
+        .sorted_by_key(|(_u_b, a)| a.col_start)
+        .collect_vec();
+
+    compatable_blocks
+        .iter()
+        .enumerate()
+        .for_each(|(idx, (a_segment_idx, a_block))| {
+            compatable_blocks[idx + 1..].iter().enumerate().for_each(
+                |(idx2, (b_segment_idx, b_block))| {
+                    let link_info = &link_info(
+                        a_block,
+                        b_block,
+                        args,
+                        calculate_unexplained_bases(
+                            segments,
+                            region_statistics,
+                            *a_segment_idx,
+                            *b_segment_idx,
+                            b_block.col_start,
+                        ),
+                        consensus_length,
+                        idx + 1 == idx2,
+                    );
+                    join_stats.add(a_block, b_block, link_info);
+                },
+            )
+        })
 }
 
-fn link_assemblies(
+fn link_assemblies<T: JoinEstimator>(
     graph: &mut HashMap<(SegmentAndDenseRow, SegmentAndDenseRow), Edge>,
     compatable_blocks: impl Iterator<Item = (usize, usize)>,
-    alignments: &[Alignment],
+    consensus_length: usize,
     segments: &SegmentedMatrix,
-    score_params: &ScoreParams,
+    query_statistics: &QueryStatistics<T>,
+    region_statistics: &RegionStatistics,
     args: &AnnotationArgs,
 ) {
     // this relies on the alignments being sorted by target start
-    // note: this assertion iter will only run in debug mode
     let compatable_blocks = compatable_blocks.sorted().collect_vec();
 
     compatable_blocks.iter().enumerate().for_each(|(idx, a)| {
@@ -158,98 +367,43 @@ fn link_assemblies(
             let a_block = &segments[a.0].blocks[a.1];
             let b_block = &segments[b.0].blocks[b.1];
 
-            // We allow this now, otherwise inversions might not properly join...
-            // If same alignment, and neighboring segments, don't join...
-            //if a_block.row_idx == b_block.row_idx && ((b.0 - 1) <= a.0) {
-            //    return;
-            //}
-
-            let target_distance = b_block.col_start as isize - a_block.col_end as isize;
-
-            let a_length = a_block.query_end.abs_diff(a_block.query_start);
-            let b_length = b_block.query_end.abs_diff(b_block.query_start);
-            let min_length = a_length.min(b_length);
-
-            let select_closest = |prop1: (isize, LinkType), prop2: (isize, LinkType)| {
-                if prop1.0.abs() < prop2.0.abs() {
-                    prop1
-                } else {
-                    prop2
-                }
-            };
-
-            // Query bounds are reversed for reverse sequences, so the start is actually greater than the end (Ex. start: 1510 -> end: 105)
-
-            let (consensus_distance, link_type) = match (
-                alignments[a_block.row_idx - 1].strand,
-                alignments[b_block.row_idx - 1].strand,
-            ) {
-                (Strand::Forward, Strand::Forward) => (
-                    b_block.query_start as isize - a_block.query_end as isize,
-                    LinkType::Forward,
-                ),
-                (Strand::Reverse, Strand::Reverse) => (
-                    a_block.query_end as isize - b_block.query_start as isize,
-                    LinkType::Reverse,
-                ),
-                (Strand::Forward, Strand::Reverse) => select_closest(
-                    (
-                        a_block.query_start as isize - b_block.query_start as isize,
-                        LinkType::FRInversion1,
-                    ),
-                    (
-                        b_block.query_end as isize - a_block.query_end as isize,
-                        LinkType::FRInversion2,
-                    ),
-                ),
-                (Strand::Reverse, Strand::Forward) => select_closest(
-                    (
-                        b_block.query_start as isize - a_block.query_start as isize,
-                        LinkType::RFInversion1,
-                    ),
-                    (
-                        a_block.query_end as isize - b_block.query_end as isize,
-                        LinkType::RFInversion2,
-                    ),
+            let link = link_info(
+                a_block,
+                b_block,
+                args,
+                calculate_unexplained_bases(
+                    segments,
+                    region_statistics,
+                    a.0,
+                    b.0,
+                    b_block.col_start,
                 ),
-                _ => panic!("Invalid strand types!"),
-            };
-
-            let within_target_distance_threshold =
-                target_distance < args.target_join_distance as isize;
-
-            let consensus_is_colinear = if link_type.is_inversion() {
-                consensus_distance.abs() < args.inversion_distance
-            } else {
-                consensus_distance > -args.consensus_join_overlap
-                    && consensus_distance < args.consensus_join_distance
-            };
-
-            // TODO: Hardcoded, change later...
-            let is_significant =
-                min_length >= 10 && -consensus_distance <= ((min_length / 2) as isize);
-
-            let weight = if a_block.row_idx == b_block.row_idx && ((b.0 - 1) <= a.0) {
-                score_params.query_loop_score
-            } else {
-                get_link_cost(
-                    args,
-                    score_params,
-                    consensus_distance as f64,
-                    target_distance as f64,
-                )
-            };
-
-            if within_target_distance_threshold && consensus_is_colinear && is_significant {
-                graph.insert(
-                    ((a.0, a_block.row_idx), (b.0, b_block.row_idx)),
-                    Edge {
-                        weight,
-                        first_sparse_row: a.1,
-                        second_sparse_row: b.1,
-                        link_type,
-                    },
-                );
+                consensus_length,
+                a.0 + 1 == b.0,
+            );
+
+            if link.joinable {
+                let join_prob = query_statistics
+                    .estimator
+                    .predict(a_block, b_block, &link, false);
+
+                if join_prob >= args.join_likelihood_threshold {
+                    let weight = if a_block.row_idx == b_block.row_idx && ((b.0 - 1) <= a.0) {
+                        1.0
+                    } else {
+                        join_prob
+                    };
+
+                    graph.insert(
+                        ((a.0, a_block.row_idx), (b.0, b_block.row_idx)),
+                        Edge {
+                            weight,
+                            first_sparse_row: a.1,
+                            second_sparse_row: b.1,
+                            link_type: link.link_type,
+                        },
+                    );
+                }
             }
         });
     });
@@ -266,22 +420,15 @@ pub struct SegmentAssemblyGraph {
 }
 
 impl SegmentAssemblyGraph {
-    pub fn new(
+    pub fn new<T: JoinEstimator>(
         alignments: &[Alignment],
+        query_lengths: &HashMap<usize, usize>,
         segments: &SegmentedMatrix,
-        score_params: &ScoreParams,
+        region_statistics: &RegionStatistics,
+        query_statistics: &[QueryStatistics<T>],
         annotation_args: &AnnotationArgs,
     ) -> Self {
-        let mut alignment_block_map = vec![Vec::<SegmentAndDenseRow>::new(); alignments.len()];
-
-        for (s_idx, segment) in segments.iter().enumerate() {
-            for (b_idx, block) in segment.blocks.iter().enumerate() {
-                if block.row_idx > 0 && block.row_idx <= alignments.len() {
-                    alignment_block_map[block.row_idx - 1].push((s_idx, b_idx));
-                }
-            }
-        }
-
+        let alignment_block_map = new_alignment_to_blocks_map(segments, alignments);
         let mut query_ids: Vec<usize> = alignments.iter().map(|a| a.query_id).unique().collect();
 
         query_ids.sort();
@@ -292,19 +439,25 @@ impl SegmentAssemblyGraph {
             .iter()
             // grab the alignments for this ID
             .map(|id| {
-                alignments
-                    .iter()
-                    .enumerate()
-                    .filter(|&(_, a)| a.query_id == *id)
-                    .flat_map(|(a_idx, _)| alignment_block_map[a_idx].iter().copied())
+                (
+                    *id,
+                    alignments
+                        .iter()
+                        .enumerate()
+                        .filter(|&(_, a)| a.query_id == *id)
+                        .flat_map(|(a_idx, _)| alignment_block_map[a_idx].iter().copied()),
+                )
             })
-            .for_each(|compat_blocks| {
+            .for_each(|(id, compat_blocks)| {
                 link_assemblies(
                     &mut link_graph,
                     compat_blocks,
-                    alignments,
+                    *query_lengths
+                        .get(&id)
+                        .expect("Unable to find query length for alignment!"),
                     segments,
-                    score_params,
+                    &query_statistics[id],
+                    region_statistics,
                     annotation_args,
                 );
             });
diff --git a/src/balanced_tree.rs b/src/balanced_tree.rs
index 603b68d..80629cf 100644
--- a/src/balanced_tree.rs
+++ b/src/balanced_tree.rs
@@ -181,7 +181,7 @@ impl<T: Into<usize> + TryFrom<usize> + Copy + Debug> AVLIndexSet<T> {
     }
 
     /// Iterate over the elements in the tree, in sorted order. Returns the element index and the depth of the element in the tree.
-    pub fn iter(&self) -> AVLInOrderSetIterator<T> {
+    pub fn iter(&self) -> AVLInOrderSetIterator<'_, T> {
         let mut stack = Vec::with_capacity(self.depth() + 1);
         if let Some(root) = self.root {
             stack.push((root, 0_u8));
@@ -190,7 +190,7 @@ impl<T: Into<usize> + TryFrom<usize> + Copy + Debug> AVLIndexSet<T> {
         AVLInOrderSetIterator { tree: self, stack }
     }
 
-    pub fn bfs(&self) -> AVLBFSSetIterator<T> {
+    pub fn bfs(&self) -> AVLBFSSetIterator<'_, T> {
         let mut queue = VecDeque::new();
 
         if let Some(root) = self.root {
diff --git a/src/history_tracing.rs b/src/history_tracing.rs
index 8f39011..a01f220 100644
--- a/src/history_tracing.rs
+++ b/src/history_tracing.rs
@@ -265,7 +265,7 @@ struct JoinLink {
     origin_history: usize,
     linked_history: usize,
     link_side: Side,
-    score: f64,
+    probability: f64,
 }
 
 impl JoinLink {
@@ -273,7 +273,7 @@ impl JoinLink {
         self.origin_history == other.origin_history
             && self.linked_history == other.linked_history
             && self.link_side == other.link_side
-            && ((self.score - other.score).abs() < epsilon)
+            && ((self.probability - other.probability).abs() < epsilon)
     }
 }
 
@@ -297,7 +297,7 @@ impl Ord for JoinLink {
             .cmp(&other.origin_history)
             .then_with(|| self.linked_history.cmp(&other.linked_history))
             .then_with(|| self.link_side.cmp(&other.link_side))
-            .then_with(|| self.score.total_cmp(&other.score))
+            .then_with(|| self.probability.total_cmp(&other.probability))
     }
 }
 
@@ -362,7 +362,7 @@ fn get_valid_joins_for_current_group(
                                         origin_history: prior_origin_history,
                                         linked_history: prior_link_history,
                                         link_side: prior_linkable_side,
-                                        score: weight,
+                                        probability: weight,
                                     });
 
                                 solved_current +=
@@ -659,6 +659,15 @@ fn get_join_endpoints_from_links(
     (left_side, right_side)
 }
 
+fn prior_history_index(entry: &HistoryEntry) -> usize {
+    if let HistoryEntry::Append(info) | HistoryEntry::Join(info) = entry {
+        info.prior_history
+    } else {
+        // 0 for the root of the histories...
+        0
+    }
+}
+
 fn add_single_join(
     histories: &mut Vec<HistoryEntry>,
     segments: &[Segment],
@@ -679,11 +688,13 @@ fn add_single_join(
     let new_group_index =
         segment_groups[segment_idx].add_group(&segments[segment_idx], join_blocks);
 
-    let join_prior_index = match (left_join_link, right_join_link) {
-        (Some(lv), Some(rv)) => lv.origin_history.min(rv.origin_history),
-        (Some(v), None) | (None, Some(v)) => v.origin_history,
-        _ => prior_hist_idx,
-    };
+    let join_prior_index = prior_history_index(
+        &histories[match (left_join_link, right_join_link) {
+            (Some(lv), Some(rv)) => lv.origin_history.min(rv.origin_history),
+            (Some(v), None) | (None, Some(v)) => v.origin_history,
+            _ => panic!("Unreachable branch here, something went really wrong..."),
+        }],
+    );
 
     // Clean expired history entries from the join path....
     let simplified_join_index = remove_expired_history_entries(
@@ -694,8 +705,19 @@ fn add_single_join(
         history_depth,
     );
 
-    let right_score = right_join_link.as_ref().map(|v| v.score).unwrap_or(0.0);
-    let left_score = left_join_link.as_ref().map(|v| v.score).unwrap_or(0.0);
+    let prior_is_skip = match &histories[prior_hist_idx] {
+        HistoryEntry::Append(val) | HistoryEntry::Join(val) => val.group_index == 0,
+        HistoryEntry::Root => true,
+    };
+
+    let right_score = right_join_link
+        .as_ref()
+        .map(|v| score_params.join_transition(prior_is_skip, v.probability))
+        .unwrap_or(0.0);
+    let left_score = left_join_link
+        .as_ref()
+        .map(|v| score_params.join_transition(prior_is_skip, v.probability))
+        .unwrap_or(0.0);
     // If two joins, we incurred a expensive query-to-query jump in the past, so now we undo that cost...
     let bonus = if left_join_link.is_some() && right_join_link.is_some() {
         -score_params.query_jump_score
@@ -1158,6 +1180,7 @@ fn get_joinable_extensions<'a>(
         .collect_vec()
 }
 
+#[derive(Debug)]
 struct JoinStackEntry {
     joined_history_offset: usize,
     trace_segment_offset: usize,
@@ -1170,6 +1193,7 @@ struct AddedBlockInfo {
     join_index: usize,
 }
 
+#[derive(Debug)]
 struct JoinStack {
     pub stack: Vec<JoinStackEntry>,
     pub next_join_index: usize,
@@ -1193,7 +1217,7 @@ impl JoinStack {
     /// Try adding one or two joins to the join stack if this block is a join and has linked edges.
     fn try_push(&mut self, entry: &HistoryEntry, added_block_info: Option<&AddedBlockInfo>) {
         if let (HistoryEntry::Join(val), Some(info)) = (entry, added_block_info) {
-            let mut top_stack_entries = 0;
+            let top_offset = self.stack.len();
 
             if val.join_left_block.caused_by_history != info.history_index {
                 self.stack.push(JoinStackEntry {
@@ -1201,7 +1225,6 @@ impl JoinStack {
                     trace_segment_offset: info.trace_stack_index,
                     join_index: info.join_index,
                 });
-                top_stack_entries += 1;
             }
 
             if val.join_right_block.caused_by_history != info.history_index {
@@ -1210,11 +1233,9 @@ impl JoinStack {
                     trace_segment_offset: info.trace_stack_index,
                     join_index: info.join_index,
                 });
-                top_stack_entries += 1;
             }
 
-            let top_vals_offset = self.stack.len() - top_stack_entries;
-            self.stack[top_vals_offset..].sort_unstable_by_key(|v| v.joined_history_offset);
+            self.stack[top_offset..].sort_unstable_by_key(|v| v.joined_history_offset);
         }
     }
 
@@ -1278,7 +1299,7 @@ fn history_backtrace_append_block(
         .any(|&b| matches!(b.block_type, BlockType::Alignment | BlockType::TandemRepeat))
     {
         match joiner.check_for_join(current_history_index) {
-            // Case 2: Involved in a join, add new block, but don't
+            // Case 2: Involved in a join, add new block.
             BlockAction::Join(join_index, stack_pos) => {
                 let joins = get_joinable_extensions(
                     blocks.iter().copied(),
@@ -1383,6 +1404,13 @@ pub fn backtrace_histories(
         current_entry = &history.entries[current_idx];
     }
 
+    if joiner.stack.len() != 0 {
+        panic!(
+            "Backtrace not done properly, there are {} leftover values on the join stack!",
+            joiner.stack.len()
+        );
+    }
+
     // Reverse so trace segments go from start to end instead of end to start.
     refined_segments.reverse();
     refined_segments
diff --git a/src/join_estimation.rs b/src/join_estimation.rs
new file mode 100644
index 0000000..ea259b3
--- /dev/null
+++ b/src/join_estimation.rs
@@ -0,0 +1,343 @@
+use std::{
+    f64::{self},
+    fmt::Debug,
+    ops,
+};
+
+use crate::{
+    assembly::{relative_consensus_distance, ConsensusDistanceNormalization, LinkType},
+    p2estimator::custom_quantile_estimator::{LomaxQuant, MedianEstimator},
+    segments::Block,
+    statistics::{ln_add_exp, AssymetricLaplace, Distribution, ExponentialEstimator, HalfT, Lomax},
+};
+
+pub trait JoinEstimator: Clone + Default + Debug {
+    fn predict(
+        &self,
+        first_block: &Block,
+        second_block: &Block,
+        link_info: &LinkInfo,
+        log_space: bool,
+    ) -> f64;
+}
+
+pub struct LinkInfo {
+    #[allow(dead_code)]
+    pub target_distance: isize,
+    #[allow(dead_code)]
+    pub consensus_distance: isize,
+    pub link_type: LinkType,
+    pub consensus_length: usize,
+    pub unexplained_bases: usize,
+    #[allow(dead_code)]
+    pub neighbors: bool,
+    pub joinable: bool,
+}
+
+pub trait JoinStatisticsCollector: Clone + Debug {
+    fn new() -> Self;
+    fn new_from_prior(bayesian_prior: &Self, pseudo_count: usize) -> Self;
+    fn combine(&self, other: &Self) -> Self;
+    fn add(&mut self, first_block: &Block, second_block: &Block, link_info: &LinkInfo);
+}
+
+#[derive(Debug, Clone, Default)]
+pub struct BayesianJoinEstimator {
+    target_distance_join: ExponentialEstimator,
+    target_distance_nojoin: ExponentialEstimator,
+    divergence_join: HalfT,
+    divergence_nojoin: HalfT,
+    consensus_distance_join: AssymetricLaplace,
+    consensus_distance_nojoin: AssymetricLaplace,
+    join_prior: f64,
+}
+
+impl JoinEstimator for BayesianJoinEstimator {
+    fn predict(
+        &self,
+        first_block: &Block,
+        second_block: &Block,
+        link_info: &LinkInfo,
+        log_space: bool,
+    ) -> f64 {
+        let target_dist = link_info.unexplained_bases as f64;
+        // Absolute value as t-dist is symmetric and we want to get prob in tail, also, we know the mean is 0...
+        let divergence_diff = (second_block.kimura80 - first_block.kimura80).abs();
+        let (rel_con_dist, _join_type) = relative_consensus_distance(
+            first_block,
+            second_block,
+            ConsensusDistanceNormalization::WithUBAndLength(
+                link_info.unexplained_bases,
+                link_info.consensus_length,
+            ),
+        );
+
+        let join_score = self.join_prior.ln()
+            + self.target_distance_join.logpdf(target_dist)
+            + self.divergence_join.logpdf(divergence_diff)
+            + self.consensus_distance_join.logpdf(rel_con_dist);
+        let nojoin_score = (-self.join_prior).ln_1p()
+            + self.target_distance_nojoin.logpdf(target_dist)
+            + self.divergence_nojoin.logpdf(divergence_diff)
+            + self.consensus_distance_nojoin.logpdf(rel_con_dist);
+
+        let score_norm = ln_add_exp(join_score, nojoin_score);
+        let score = join_score - score_norm;
+
+        if log_space {
+            score
+        } else {
+            score.exp()
+        }
+    }
+}
+
+impl From<BayesianJoinStatistics> for BayesianJoinEstimator {
+    fn from(value: BayesianJoinStatistics) -> Self {
+        Self::from(&value)
+    }
+}
+
+#[derive(Debug, Clone, Copy)]
+struct MomentEstimator {
+    sum_square: f64,
+    sum: f64,
+    samples: usize,
+}
+
+impl MomentEstimator {
+    fn new() -> Self {
+        Self {
+            sum_square: 0.0,
+            sum: 0.0,
+            samples: 0,
+        }
+    }
+
+    fn to_psuedo_count(&self, count: usize) -> Self {
+        Self {
+            sum_square: (self.sum_square / self.samples.max(1) as f64) * count as f64,
+            sum: (self.sum / self.samples.max(1) as f64) * count as f64,
+            samples: count,
+        }
+    }
+
+    fn mean(&self) -> f64 {
+        self.sum / self.samples.max(1) as f64
+    }
+
+    fn variance(&self) -> f64 {
+        // TODO: Use shifted data alg for more accuracy...
+        (self.sum_square - (self.sum * self.sum) / self.samples.max(1) as f64)
+            / (self.samples.max(2) as f64 - 1.0)
+    }
+
+    fn standard_deviation(&self) -> f64 {
+        self.variance().sqrt()
+    }
+
+    fn samples(&self) -> usize {
+        self.samples
+    }
+}
+
+impl Default for MomentEstimator {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl ops::Add<&MomentEstimator> for &MomentEstimator {
+    type Output = MomentEstimator;
+
+    fn add(self, rhs: &MomentEstimator) -> Self::Output {
+        MomentEstimator {
+            sum_square: self.sum_square + rhs.sum_square,
+            sum: self.sum + rhs.sum,
+            samples: self.samples + rhs.samples,
+        }
+    }
+}
+
+impl ops::AddAssign<MomentEstimator> for MomentEstimator {
+    fn add_assign(&mut self, rhs: MomentEstimator) {
+        self.sum_square += rhs.sum_square;
+        self.sum += rhs.sum;
+        self.samples += rhs.samples;
+    }
+}
+
+impl ops::AddAssign<f64> for MomentEstimator {
+    fn add_assign(&mut self, rhs: f64) {
+        self.sum_square += rhs * rhs;
+        self.sum += rhs;
+        self.samples += 1;
+    }
+}
+
+impl From<&MomentEstimator> for ExponentialEstimator {
+    fn from(value: &MomentEstimator) -> Self {
+        Self::new(value.mean(), value.samples().max(1))
+    }
+}
+
+impl From<&MomentEstimator> for HalfT {
+    fn from(value: &MomentEstimator) -> Self {
+        Self::from_sample_mean(value.mean(), value.samples().max(1))
+    }
+}
+
+impl From<&LomaxQuant> for Lomax {
+    fn from(value: &LomaxQuant) -> Self {
+        // Using quantile selection trick originally developed for frechet... (quant ratio formula)
+        // Choose first quantile p, then second such that p2 = 1 - (1 - p)^2 and you get this nice closed form for alpha...
+        let p1 = LomaxQuant::PROB1;
+        let p2 = LomaxQuant::PROB2;
+
+        let v1 = value.ppf(p1);
+        let v2 = value.ppf(p2);
+
+        let a = -(1.0 - p1).ln() / (v2 / v1 - 1.0).ln();
+        let y = v1 / ((1.0 - p1).powf(-1.0 / a) - 1.0);
+        Lomax::new(a, y)
+    }
+}
+
+impl From<&MedianEstimator> for ExponentialEstimator {
+    fn from(value: &MedianEstimator) -> Self {
+        Self::new(value.ppf(0.5) / 2.0_f64.ln(), value.samples())
+    }
+}
+
+impl From<&MedianEstimator> for HalfT {
+    fn from(value: &MedianEstimator) -> Self {
+        Self::new(
+            value.ppf(0.5) / Self::new(1.0, value.samples()).ppf(0.5),
+            value.samples(),
+        )
+    }
+}
+
+impl From<&BayesianJoinStatistics> for BayesianJoinEstimator {
+    fn from(statistics: &BayesianJoinStatistics) -> Self {
+        Self {
+            target_distance_join: (&statistics.joinable_target_distance).into(),
+            target_distance_nojoin: (&statistics.unjoinable_target_distance).into(),
+            divergence_join: (&statistics.joinable_divergence).into(),
+            divergence_nojoin: HalfT::from_sample_mean(
+                statistics
+                    .unjoinable_divergence
+                    .mean()
+                    .max(statistics.joinable_divergence.mean()),
+                statistics.unjoinable_divergence.samples(),
+            ),
+            consensus_distance_join: AssymetricLaplace::from_exponential_halves(
+                0.0,
+                statistics.joinable_consensus_neg.mean(),
+                statistics.joinable_consensus_pos.mean(),
+            ),
+            // TODO: Consider replacing with beta dist, better matches dists we see...
+            consensus_distance_nojoin: AssymetricLaplace::symmetric_from_moments(
+                statistics.unjoinable_consensus.mean(),
+                statistics.unjoinable_consensus.standard_deviation(),
+            ),
+            // We take sqrt since we count all pairs, not just neighbors.
+            join_prior: (statistics.joinable_target_distance.samples() as f64
+                / (statistics.joinable_target_distance.samples()
+                    + statistics.unjoinable_target_distance.samples())
+                .max(1) as f64),
+        }
+    }
+}
+
+#[derive(Debug, Clone, Default)]
+pub struct BayesianJoinStatistics {
+    joinable_target_distance: MomentEstimator,
+    unjoinable_target_distance: MomentEstimator,
+    joinable_divergence: MomentEstimator,
+    unjoinable_divergence: MomentEstimator,
+    joinable_consensus_pos: MomentEstimator,
+    joinable_consensus_neg: MomentEstimator,
+    unjoinable_consensus: MomentEstimator,
+}
+
+impl JoinStatisticsCollector for BayesianJoinStatistics {
+    fn new() -> Self {
+        Self::default()
+    }
+
+    fn new_from_prior(bayesian_prior: &Self, pseudo_count: usize) -> Self {
+        Self {
+            joinable_target_distance: bayesian_prior
+                .joinable_target_distance
+                .to_psuedo_count(pseudo_count),
+            unjoinable_target_distance: bayesian_prior
+                .unjoinable_target_distance
+                .to_psuedo_count(pseudo_count),
+            joinable_divergence: bayesian_prior
+                .joinable_divergence
+                .to_psuedo_count(pseudo_count),
+            unjoinable_divergence: bayesian_prior
+                .unjoinable_divergence
+                .to_psuedo_count(pseudo_count),
+            joinable_consensus_pos: bayesian_prior
+                .joinable_consensus_pos
+                .to_psuedo_count(pseudo_count),
+            joinable_consensus_neg: bayesian_prior
+                .joinable_consensus_neg
+                .to_psuedo_count(pseudo_count),
+            unjoinable_consensus: bayesian_prior
+                .unjoinable_consensus
+                .to_psuedo_count(pseudo_count),
+        }
+    }
+
+    fn add(&mut self, first_block: &Block, second_block: &Block, link_info: &LinkInfo) {
+        //if !link_info.neighbors {
+        //    return;
+        //}
+
+        let target_dist = link_info.unexplained_bases;
+        let divergence_diff = (second_block.kimura80 - first_block.kimura80).abs();
+        let (rel_con_dist, join_type) = relative_consensus_distance(
+            first_block,
+            second_block,
+            ConsensusDistanceNormalization::WithUBAndLength(
+                link_info.unexplained_bases,
+                link_info.consensus_length,
+            ),
+        );
+
+        if link_info.joinable {
+            self.joinable_target_distance += target_dist as f64;
+            self.joinable_divergence += divergence_diff;
+            if matches!(join_type, LinkType::Forward | LinkType::Reverse) {
+                if rel_con_dist >= 0.0 {
+                    self.joinable_consensus_pos += rel_con_dist.abs();
+                } else {
+                    self.joinable_consensus_neg += rel_con_dist.abs();
+                }
+            }
+        } else {
+            self.unjoinable_target_distance += target_dist as f64;
+            self.unjoinable_divergence += divergence_diff;
+            if matches!(join_type, LinkType::Forward | LinkType::Reverse) {
+                self.unjoinable_consensus += rel_con_dist;
+            }
+        }
+    }
+
+    fn combine(&self, other: &Self) -> Self {
+        Self {
+            joinable_target_distance: &self.joinable_target_distance
+                + &other.joinable_target_distance,
+            unjoinable_target_distance: &self.unjoinable_target_distance
+                + &other.unjoinable_target_distance,
+            joinable_divergence: &self.joinable_divergence + &other.joinable_divergence,
+            unjoinable_divergence: &self.unjoinable_divergence + &other.unjoinable_divergence,
+            joinable_consensus_pos: &self.joinable_consensus_pos + &other.joinable_consensus_pos,
+            joinable_consensus_neg: &self.joinable_consensus_neg + &other.joinable_consensus_neg,
+            unjoinable_consensus: &self.unjoinable_consensus + &other.unjoinable_consensus,
+        }
+    }
+}
diff --git a/src/main.rs b/src/main.rs
index 9d6b56b..d4dcbc4 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -6,11 +6,18 @@ mod balanced_tree;
 mod chunks;
 mod confidence;
 mod history_tracing;
+mod join_estimation;
 mod matrix;
+
+// Keeping around for enhanced parameter estimation work...
+#[allow(dead_code)]
+mod p2estimator;
+
 mod pipeline;
 mod score_params;
 mod segment_groups;
 mod segments;
+mod statistics;
 mod substitution_matrix;
 mod support;
 
@@ -18,31 +25,39 @@ mod support;
 #[allow(dead_code)]
 mod union_find;
 
+mod trace_statistics;
 mod util;
 mod viterbi;
 mod viz;
 mod windowed_scores;
 
+use core::ops::RangeBounds;
 use std::{
     collections::HashMap,
+    fmt::Debug,
     fs::{self, create_dir_all, File},
     io::{BufRead, BufReader, BufWriter, Write},
+    ops::Bound,
     path::PathBuf,
 };
 
 use alignment::AlignmentData;
 use chunks::ProximityGroup;
 
-use anyhow::{Ok, Result};
+use anyhow::{anyhow, Context, Ok, Result};
 use clap::{Args, Parser};
 use itertools::Itertools;
 use rayon::prelude::*;
+use std::str::FromStr;
+use thiserror::Error;
 use viz::VizConstraint;
 
 use crate::{
     annotation::AmbiguousAnnotation,
     chunks::validate_groups,
+    join_estimation::{BayesianJoinEstimator, BayesianJoinStatistics},
     pipeline::{run_history_trace, run_naive_trace, NaiveTraceResults},
+    trace_statistics::{trace_statistics, OccuranceCountingMode, TraceStatistics},
     viz::{
         stats::{write_family_statistics, write_inversion_statistics},
         write_index_file, ICON_SVG,
@@ -101,6 +116,31 @@ pub struct PerformanceArgs {
     pub num_threads: usize,
 }
 
+#[derive(Error, Debug)]
+enum ParseRangedError<T: Debug, E> {
+    #[error(transparent)]
+    ParseError(#[from] E),
+    #[error("float value {0:?} is not between {1:?} and {2:?}")]
+    RangeError(T, Bound<T>, Bound<T>),
+}
+
+const fn ranged<F: PartialOrd + Debug + FromStr + Send + Sync + Clone + Copy + 'static>(
+    range: impl RangeBounds<F> + Send + Sync + Clone + 'static,
+) -> impl Fn(&str) -> Result<F, ParseRangedError<F, F::Err>> + Clone + Send + Sync + 'static {
+    move |v: &str| {
+        let f = F::from_str(v)?;
+        if range.contains(&f) {
+            Result::Ok(f)
+        } else {
+            Result::Err(ParseRangedError::RangeError(
+                f,
+                range.start_bound().map(|v| *v),
+                range.end_bound().map(|v| *v),
+            ))
+        }
+    }
+}
+
 #[derive(Args, Debug, Clone, Default)]
 pub struct AnnotationArgs {
     /// The penalty of jumping between query models
@@ -108,7 +148,8 @@ pub struct AnnotationArgs {
         short = 'J',
         long = "query-jump",
         default_value = "-127.0",
-        value_name = "f"
+        value_name = "f",
+        value_parser = ranged::<f64>(..-1.0)
     )]
     pub query_jump_penalty: f64,
 
@@ -118,11 +159,12 @@ pub struct AnnotationArgs {
         short = 'L',
         long = "skip-loop",
         default_value = "30",
-        value_name = "n"
+        value_name = "n",
+        value_parser = ranged::<usize>(1..)
     )]
     pub num_skip_loops_eq_to_jump: usize,
 
-    /// The max distance across unaligned positions
+    /// The max distance across positions
     /// in the target (genome) at which a join is
     /// considered between compatible alignments
     #[arg(
@@ -133,13 +175,24 @@ pub struct AnnotationArgs {
     )]
     pub target_join_distance: usize,
 
-    /// The maximum overlap in the consensus at which
+    /// Removes joins that fall below this threshold of occuring.
+    /// Value can be set between 0 and 1.
+    #[arg(
+        long = "join-likelihood-threshold",
+        default_value = "0.25",
+        value_name = "f",
+        value_parser = ranged::<f64>(0.0..1.0)
+    )]
+    pub join_likelihood_threshold: f64,
+
+    /// The maximum allowed overlap in the consensus at which
     /// a join is considered between compatible alignments.
     #[arg(
         short = 'O',
         long = "consensus-join-overlap",
         default_value = "200",
-        value_name = "n"
+        value_name = "n",
+        value_parser = ranged::<isize>(0..)
     )]
     pub consensus_join_overlap: isize,
 
@@ -148,14 +201,21 @@ pub struct AnnotationArgs {
     #[arg(
         short = 'C',
         long = "consensus-join-distance",
-        default_value = "2000",
-        value_name = "n"
+        default_value = "2500",
+        value_name = "n",
+        value_parser = ranged::<isize>(0..)
     )]
     pub consensus_join_distance: isize,
 
     /// The maximum seperation or overlap in nucleotides on both target and consensus
     /// for a join to be allowed between inverted alignments.
-    #[arg(long = "inversion-distance", default_value = "50", value_name = "n")]
+    #[arg(
+        short = 'I',
+        long = "inversion-distance",
+        default_value = "200",
+        value_name = "n",
+        value_parser = ranged::<isize>(0..)
+    )]
     pub inversion_distance: isize,
 
     /// The size of the window looked at to determine a single alignment score in nucleotides.
@@ -163,7 +223,8 @@ pub struct AnnotationArgs {
         short = 'W',
         long = "window-size",
         default_value = "31",
-        value_name = "n"
+        value_name = "n",
+        value_parser = ranged::<usize>(1..)
     )]
     pub score_window_size: usize,
 
@@ -172,7 +233,8 @@ pub struct AnnotationArgs {
         short = 'B',
         long = "background-window-size",
         default_value = "61",
-        value_name = "n"
+        value_name = "n",
+        value_parser = ranged::<usize>(1..)
     )]
     pub background_window_size: usize,
 
@@ -189,7 +251,8 @@ pub struct AnnotationArgs {
     #[arg(
         long = "min-segment-confidence",
         default_value = "0.1",
-        value_name = "f"
+        value_name = "f",
+        value_parser = ranged::<f64>(0.0..1.0)
     )]
     pub min_block_confidence: f64,
 
@@ -208,42 +271,6 @@ pub struct AnnotationArgs {
     /// Set to 0 or greater to disable.
     #[arg(long = "min-history-score", default_value = "-500.0", value_name = "f")]
     pub min_relative_history_score: f64,
-
-    /// The amount of overlap between two joinable sequences in the consensus
-    /// before a penalty starts being applied to the join.
-    #[arg(long = "free-join-overlap", default_value = "4", value_name = "n")]
-    pub free_join_consensus_overlap: usize,
-
-    /// The amount of gap between two joinable sequences
-    /// before a penalty starts being applied to the join.
-    #[arg(long = "free-join-gap", default_value = "10", value_name = "n")]
-    pub free_join_consensus_gap: usize,
-
-    /// The amount of penalty to apply to a join at the maximum allowed consensus overlap
-    /// A value of 1 means to apply a penalty equal to a query jump.
-    /// The cost grows linearly to this value as the overlap increases.
-    #[arg(
-        long = "consensus-overlap-penalty",
-        default_value = "1.0",
-        value_name = "f"
-    )]
-    pub join_consensus_overlap_penalty: f64,
-
-    /// The amount of penalty to apply to a join at the maximum allowed consensus gap
-    /// A value of 1 means to apply a penalty equal to a query jump.
-    /// The cost grows linearly to this value as the gap increases.
-    #[arg(
-        long = "consensus-gap-penalty",
-        default_value = "0.5",
-        value_name = "f"
-    )]
-    pub join_consensus_gap_penalty: f64,
-
-    /// The amount of penalty to apply to a join at the maximum allowed target gap
-    /// A value of 1 means to apply a penalty equal to a query jump.
-    /// The cost grows linearly to this value as the gap between the sequences in the target space increases.
-    #[arg(long = "target-gap-penalty", default_value = "0.4", value_name = "f")]
-    pub join_target_gap_penalty: f64,
 }
 
 #[derive(Args, Debug, Clone, Default)]
@@ -321,18 +348,18 @@ fn main() -> Result<()> {
         if let Result::Ok(metadata) = fs::metadata(&viz_args.viz_output_path) {
             if metadata.is_dir() {
                 // TODO: real error
-                panic!(
-                    "directory: {} already exists",
-                    viz_args.viz_output_path.to_str().unwrap()
-                )
+                return Result::Err(anyhow!(
+                    "directory: '{}' already exists",
+                    viz_args.viz_output_path.to_str().unwrap_or("?")
+                ));
             }
         }
 
-        create_dir_all(&viz_args.viz_output_path)?;
-        viz_args.viz_output_path = viz_args.viz_output_path.canonicalize()?;
-
         if let Some(path) = &viz_args.viz_reference_bed_path {
-            let file = File::open(path).expect("failed to open viz reference bed file");
+            let file = File::open(path).context(format!(
+                "failed to open viz reference bed file: '{}'",
+                path.to_str().unwrap_or("?")
+            ))?;
             let reader = BufReader::new(file);
 
             let mut chrom_list = vec![String::from("sentinel")];
@@ -342,36 +369,51 @@ fn main() -> Result<()> {
                 .lines()
                 .map(|l| l.unwrap())
                 .enumerate()
-                .for_each(|(line_num, line)| {
+                .try_for_each(|(line_num, line)| {
+                    let line_num_info = || format!("failed to read line {}", line);
+
                     let tokens: Vec<&str> = line.split_whitespace().collect();
                     let chrom = tokens[0].to_string();
-                    let start = tokens[1].parse::<usize>().expect("failed to parse int");
+                    let start = tokens[1].parse::<usize>().with_context(line_num_info)?;
 
-                    let last_chrom = chrom_list.last().expect("chrom list is empty");
+                    let last_chrom = chrom_list.last().context("chrom list is empty")?;
 
                     if chrom == *last_chrom {
                         if prev_start > start {
-                            panic!("bed file is unsorted");
+                            return Result::Err(anyhow!("bed file is unsorted"));
                         }
                     } else if !chrom_list.contains(&chrom) {
                         chrom_list.push(chrom.clone());
                         index.insert(chrom, line_num);
                     } else {
-                        panic!("bed file is unsorted");
+                        return Result::Err(anyhow!("bed file is unsorted"));
                     }
 
                     prev_start = start;
-                });
+
+                    Ok(())
+                })
+                .context(format!(
+                    "failed to parse bed file: '{}'",
+                    path.to_str().unwrap_or("?")
+                ))?;
 
             viz_args.viz_reference_bed_index = index;
         }
     }
 
-    let alignments_file = File::open(&args.alignments)?;
-    let matrices_file = File::open(&args.matrices)?;
+    let alignments_file = File::open(&args.alignments).context(format!(
+        "failed to open alignments file: '{}'",
+        args.alignments
+    ))?;
+    let matrices_file = File::open(&args.matrices)
+        .context(format!("failed to open matrices file: '{}'", args.matrices))?;
 
     let ultra_file = match args.ultra_args.ultra_file_path {
-        Some(ref path) => Some(File::open(path)?),
+        Some(ref path) => Some(File::open(path).context(format!(
+            "failed to open ultra file: '{}'",
+            path.to_str().unwrap_or("?")
+        ))?),
         None => None,
     };
 
@@ -395,22 +437,27 @@ fn main() -> Result<()> {
     if let Some(path) = &args.io_args.regions_path {
         let regions_file = File::create(path).unwrap();
         let mut regions_writer = BufWriter::new(regions_file);
-        proximity_groups.iter().enumerate().for_each(|(idx, g)| {
-            writeln!(
-                &mut regions_writer,
-                "{},{},{}:{},{}:{}",
-                idx,
-                alignment_data.target_name_map.get(g.target_id),
-                g.target_start,
-                g.target_end,
-                g.line_start,
-                g.line_end,
-            )
-            .expect("failed to write to regions file")
-        });
+        proximity_groups
+            .iter()
+            .enumerate()
+            .try_for_each(|(idx, g)| {
+                writeln!(
+                    &mut regions_writer,
+                    "{},{},{}:{},{}:{}",
+                    idx,
+                    alignment_data.target_name_map.get(g.target_id),
+                    g.target_start,
+                    g.target_end,
+                    g.line_start,
+                    g.line_end,
+                )
+                .context("failed to write to regions file")
+            })?;
     }
 
     if viz_args.viz {
+        create_dir_all(&viz_args.viz_output_path)?;
+        viz_args.viz_output_path = viz_args.viz_output_path.canonicalize()?;
         let mut index_file = File::create(viz_args.viz_output_path.join("index.html")).unwrap();
 
         write_index_file(
@@ -419,7 +466,7 @@ fn main() -> Result<()> {
             &proximity_groups,
             &viz_args.viz_constraints,
         )
-        .expect("failed to write to index.html");
+        .context("failed to write to index.html file for visualization")?;
     }
 
     debug_assert!(validate_groups(
@@ -448,22 +495,29 @@ fn main() -> Result<()> {
         .par_iter()
         .panic_fuse()
         .enumerate()
-        .map(|(region_idx, group)| {
-            (
-                region_idx,
-                run_naive_trace(group, &alignment_data, region_idx, &args),
-            )
-        })
-        .collect::<Vec<(usize, NaiveTraceResults)>>();
-    naive_results.sort_by_key(|v| v.0);
+        .map(|(region_idx, group)| run_naive_trace(group, &alignment_data, region_idx, &args))
+        .collect::<Vec<NaiveTraceResults<BayesianJoinStatistics>>>();
+    naive_results.sort_by_key(|v| v.region_index);
+
+    let trace_stats: TraceStatistics<BayesianJoinEstimator> = trace_statistics(
+        &naive_results,
+        &alignment_data,
+        OccuranceCountingMode::Segments,
+    );
 
     let mut results: Vec<(usize, Vec<AmbiguousAnnotation>)> = proximity_groups
         .par_iter()
         .zip(naive_results)
-        .map(|(group, (region_idx, mut naive_trace))| {
+        .map(|(group, mut naive_trace)| {
             (
-                region_idx,
-                run_history_trace(group, &alignment_data, &mut naive_trace, &args),
+                naive_trace.region_index,
+                run_history_trace(
+                    group,
+                    &alignment_data,
+                    &trace_stats,
+                    &mut naive_trace,
+                    &args,
+                ),
             )
         })
         .collect();
@@ -487,7 +541,11 @@ fn main() -> Result<()> {
                 .viz_output_path
                 .join("family_stats.html"),
         )?;
-        write_family_statistics(&mut family_stats_writer, &results)?;
+        write_family_statistics(
+            &mut family_stats_writer,
+            &results,
+            &alignment_data.query_lengths,
+        )?;
         let mut inv_stats_writer = File::create(
             args.visualization_args
                 .viz_output_path
diff --git a/src/p2estimator.rs b/src/p2estimator.rs
new file mode 100644
index 0000000..509d943
--- /dev/null
+++ b/src/p2estimator.rs
@@ -0,0 +1,842 @@
+use std::{
+    cmp::Ordering,
+    ops::{Add, AddAssign},
+    usize,
+};
+
+use crate::statistics::Distribution;
+use itertools::Itertools;
+// Implementation of P2 estimator.
+// See "The P2 Algorithm for Dynamic Statistical Computing Calculation of Quantiles and Histograms Without Storing Observations"
+// at https://www.cse.wustl.edu/~jain/papers/ftp/psqr.pdf
+//
+// We replace the P2 interpolation with PCHIP instead (See paper A Method for Constructing Local Monotone Piecewise Cubic Interpolants by F. N. Fritsch and J. Butland, or https://doi.org/10.1137/0905021)
+
+pub struct QuantileEstimatorData<'a> {
+    ranks: &'a [usize],
+    values: &'a [f64],
+    targets: &'a [f64],
+    observations: &'a usize,
+}
+
+pub struct MutableQuantileEstimatorData<'a> {
+    ranks: &'a mut [usize],
+    values: &'a mut [f64],
+    targets: &'a [f64],
+    observations: &'a mut usize,
+}
+
+fn cubic_hermite_spline(x0: f64, y0: f64, x1: f64, y1: f64, m0: f64, m1: f64, x: f64) -> f64 {
+    let t = (x - x0) / (x1 - x0);
+    let ms0 = (x1 - x0) * m0;
+    let ms1 = (x1 - x0) * m1;
+
+    let h0 = (2.0 * t - 3.0) * t * t + 1.0;
+    let h1 = ((t - 2.0) * t + 1.0) * t;
+    let h2 = (-2.0 * t + 3.0) * t * t;
+    let h3 = (t - 1.0) * t * t;
+
+    h0 * y0 + h1 * ms0 + h2 * y1 + h3 * ms1
+}
+
+fn pchip_point_derivative(dx0: f64, dy0: f64, dx1: f64, dy1: f64) -> f64 {
+    let s0 = if dx0 != 0.0 { dy0 / dx0 } else { 0.0 };
+    let s1 = if dx1 != 0.0 { dy1 / dx1 } else { 0.0 };
+    if s0 * s1 > 0.0 {
+        let alpha = (1.0 / 3.0) * (1.0 + dx1 / (dx0 + dx1));
+        s0 * s1 / (alpha * s1 + (1.0 - alpha) * s0)
+    } else {
+        0.0
+    }
+}
+
+fn pchip_prediction(x_points: &[f64; 4], y_points: &[f64; 4], x: f64) -> f64 {
+    debug_assert!(x_points.is_sorted() && x <= x_points[2] && x >= x_points[1]);
+    let m0 = pchip_point_derivative(
+        x_points[1] - x_points[0],
+        y_points[1] - y_points[0],
+        x_points[2] - x_points[1],
+        y_points[2] - y_points[1],
+    );
+    let m1 = pchip_point_derivative(
+        x_points[2] - x_points[1],
+        y_points[2] - y_points[1],
+        x_points[3] - x_points[2],
+        y_points[3] - y_points[2],
+    );
+
+    cubic_hermite_spline(
+        x_points[1],
+        y_points[1],
+        x_points[2],
+        y_points[2],
+        m0,
+        m1,
+        x,
+    )
+}
+
+fn debug_check_valid_estimator(
+    ranks: &[usize],
+    values: &[f64],
+    targets: &[f64],
+    observations: usize,
+) {
+    debug_assert!(ranks.len() > 2);
+    debug_assert!(ranks.len() == values.len() && ranks.len() == targets.len());
+    debug_assert!(values.is_sorted() && ranks.is_sorted() && targets.is_sorted());
+    debug_assert!(targets.first() == Some(&0.0) && targets.last() == Some(&1.0));
+    debug_assert!(observations >= ranks.len());
+    debug_assert!(ranks.first() == Some(&0) && ranks.last() == Some(&(observations - 1)));
+}
+
+fn debug_check_uninitialized_estimator(ranks: &[usize], values: &[f64], targets: &[f64]) {
+    debug_assert!(ranks.len() > 2);
+    debug_assert!(ranks.len() == values.len() && ranks.len() == targets.len());
+    debug_assert!(targets.is_sorted());
+    debug_assert!(targets.first() == Some(&0.0) && targets.last() == Some(&1.0));
+}
+
+fn _add_sample_to_estimator(data: MutableQuantileEstimatorData, sample: f64) {
+    let MutableQuantileEstimatorData {
+        ranks,
+        values,
+        targets,
+        observations,
+    } = data;
+    debug_check_valid_estimator(ranks, values, targets, *observations);
+    // Find where sample falls within distribution...
+    let p = values.partition_point(|&v| v < sample);
+    let bound_p = p.min(values.len() - 1);
+
+    // Update extremes...
+    if bound_p == 0 {
+        values[bound_p] = values[bound_p].min(sample);
+    } else if bound_p == (values.len() - 1) {
+        values[bound_p] = values[bound_p].max(sample);
+    }
+
+    // Increment ranks of markers above newly inserted sample...
+    for i in bound_p.max(1)..ranks.len() {
+        ranks[i] = ranks[i] + 1;
+    }
+
+    // Adjust inner markers to within 1 of their target quantile using p2 formula...
+    for i in 1..(values.len() - 1) {
+        // Observations hasn't been incremented yet, don't need to subtract 1...
+        let target_rank = (targets[i] * (*observations) as f64) as usize;
+        let current_rank = ranks[i];
+        if current_rank.abs_diff(target_rank) > 1 {
+            //println!("{:?}, {}, {}", ranks, i, ranks[i]);
+            let new_rank: usize = target_rank.clamp(
+                ranks[i - 1].saturating_add(1),
+                ranks[i + 1].saturating_sub(1),
+            );
+            if new_rank == current_rank {
+                continue;
+            }
+
+            let idx_shift = if new_rank > current_rank { 1 } else { 0 };
+
+            let indexes = [
+                (i + idx_shift).saturating_sub(2),
+                (i + idx_shift).saturating_sub(1),
+                (i + idx_shift),
+                (i + idx_shift).saturating_add(1).min(ranks.len() - 1),
+            ];
+
+            values[i] = pchip_prediction(
+                &indexes.map(|i| ranks[i] as f64),
+                &indexes.map(|i| values[i]),
+                new_rank as f64,
+            );
+            ranks[i] = new_rank;
+        }
+    }
+
+    *observations += 1;
+}
+
+fn _merge_estimators(
+    q1: QuantileEstimatorData,
+    q2: QuantileEstimatorData,
+    new_estimator: MutableQuantileEstimatorData,
+) {
+    debug_check_valid_estimator(q1.ranks, q1.values, q1.targets, *q1.observations);
+    debug_check_valid_estimator(q2.ranks, q2.values, q2.targets, *q2.observations);
+    debug_check_uninitialized_estimator(
+        new_estimator.ranks,
+        new_estimator.values,
+        new_estimator.targets,
+    );
+
+    assert!(new_estimator.targets.len() <= (*q1.observations + *q2.observations));
+
+    fn get_at(a: &QuantileEstimatorData, i: usize) -> (usize, f64) {
+        (a.ranks[i], a.values[i])
+    }
+
+    // May eventually replace with algorithm that doesn't use extra memory...
+    // Calculate a "merged" quantiles by linearly iterpolating ranks based on the values we see...
+    let mut dual_est_quants: Vec<(f64, f64)> = Vec::with_capacity(q1.ranks.len() + q2.ranks.len());
+
+    let mut q1_prior: Option<(usize, f64)> = None;
+    let mut q2_prior: Option<(usize, f64)> = None;
+
+    let mut q1_idx = 0;
+    let mut q2_idx = 0;
+
+    loop {
+        let q1_past_end = q1_idx >= q1.values.len();
+        let q2_past_end = q2_idx >= q2.values.len();
+
+        if q1_past_end && q2_past_end {
+            break;
+        } else if q1_past_end {
+            let next = get_at(&q2, q2_idx);
+            dual_est_quants.push((
+                (next.0 + q1_prior.map(|v| v.0 + 1).unwrap_or(0)) as f64,
+                next.1,
+            ));
+            q2_prior = Some(next);
+            q2_idx += 1;
+        } else if q2_past_end {
+            let next = get_at(&q1, q1_idx);
+            dual_est_quants.push((
+                (next.0 + q2_prior.map(|v| v.0 + 1).unwrap_or(0)) as f64,
+                next.1,
+            ));
+            q1_prior = Some(next);
+            q1_idx += 1;
+        } else if q1.values[q1_idx] <= q2.values[q2_idx] {
+            let other_next = get_at(&q2, q2_idx);
+            let next = get_at(&q1, q1_idx);
+            let w = q2_prior
+                .map(|other_prior| (next.1 - other_prior.1) / (other_next.1 - other_prior.1))
+                .unwrap_or(0.0);
+            let other_rank_est = q2_prior
+                .map(|other_prior| {
+                    (other_prior.0 + 1) as f64 * (1.0 - w) + (other_next.0 + 1) as f64 * w
+                })
+                .unwrap_or(0.0);
+            dual_est_quants.push((next.0 as f64 + other_rank_est, next.1));
+            q1_prior = Some(next);
+            q1_idx += 1;
+        } else {
+            let other_next = get_at(&q1, q1_idx);
+            let next = get_at(&q2, q2_idx);
+            let w = q1_prior
+                .map(|other_prior| (next.1 - other_prior.1) / (other_next.1 - other_prior.1))
+                .unwrap_or(0.0);
+            let other_rank_est = q1_prior
+                .map(|other_prior| {
+                    (other_prior.0 + 1) as f64 * (1.0 - w) + (other_next.0 + 1) as f64 * w
+                })
+                .unwrap_or(0.0);
+            dual_est_quants.push((next.0 as f64 + other_rank_est, next.1));
+            q2_prior = Some(next);
+            q2_idx += 1;
+        }
+    }
+
+    // New number of observations is the sum of both...
+    *(new_estimator.observations) = *(q1.observations) + *(q2.observations);
+    let rank_range = *(new_estimator.observations) - 1;
+
+    // Solve all inner quantiles using traditional interpolation...
+    let mut index_between = 0;
+
+    new_estimator.ranks.first_mut().map(|r| *r = 0);
+    new_estimator.ranks.last_mut().map(|r| *r = rank_range);
+    new_estimator
+        .values
+        .first_mut()
+        .map(|v| *v = dual_est_quants[0].1);
+    new_estimator
+        .values
+        .last_mut()
+        .map(|v| *v = dual_est_quants[dual_est_quants.len() - 1].1);
+
+    for ti in 1..new_estimator.targets.len() - 1 {
+        // Calculate new rank...
+        let target = new_estimator.targets[ti];
+        let approx_obs_rank = ((target * rank_range as f64) as usize)
+            .clamp(ti, rank_range - (new_estimator.targets.len() - (ti + 1)));
+
+        // Find where it lands in cdf...
+        while index_between < dual_est_quants.len()
+            && (approx_obs_rank as f64) > dual_est_quants[index_between].0
+        {
+            index_between += 1;
+        }
+
+        // Get pchip estimate for the value...
+        let indexes = [
+            index_between.saturating_sub(2),
+            index_between.saturating_sub(1),
+            index_between.min(dual_est_quants.len() - 1),
+            index_between
+                .saturating_add(1)
+                .min(dual_est_quants.len() - 1),
+        ];
+
+        new_estimator.values[ti] = pchip_prediction(
+            &indexes.map(|i| dual_est_quants[i].0),
+            &indexes.map(|i| dual_est_quants[i].1),
+            approx_obs_rank as f64,
+        );
+        new_estimator.ranks[ti] = approx_obs_rank;
+    }
+}
+
+trait PrimativeCast<T> {
+    fn as_(&self) -> T;
+}
+
+impl PrimativeCast<f64> for f64 {
+    #[inline]
+    fn as_(&self) -> f64 {
+        *self
+    }
+}
+
+impl PrimativeCast<f64> for usize {
+    #[inline]
+    fn as_(&self) -> f64 {
+        *self as f64
+    }
+}
+
+fn _interpolated_value_prediction<
+    I: PartialOrd + PrimativeCast<f64> + Copy,
+    O: PartialOrd + PrimativeCast<f64> + Copy,
+>(
+    xs: &[I],
+    ys: &[O],
+    x: f64,
+    lower_val: f64,
+    upper_val: f64,
+    not_enough_data_value: f64,
+) -> f64 {
+    debug_assert!(xs.is_sorted());
+    debug_assert!(xs.len() == ys.len());
+
+    if xs.len() < 1 {
+        return not_enough_data_value;
+    }
+
+    let idx = xs.partition_point(|&v| v.as_() < x);
+    if idx >= xs.len() {
+        upper_val
+    } else if idx == 0 {
+        lower_val
+    } else {
+        let indexes = [
+            idx.saturating_sub(2),
+            idx.saturating_sub(1),
+            idx,
+            idx.saturating_add(1).min(xs.len() - 1),
+        ];
+
+        pchip_prediction(
+            &indexes.map(|i| xs[i].as_()),
+            &indexes.map(|i| ys[i].as_()),
+            x,
+        )
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct QuantileEstimator<T: QuantileEstimatorRepresentation>(T);
+
+impl<T: QuantileEstimatorRepresentation> QuantileEstimator<T> {
+    pub fn samples(&self) -> usize {
+        *self.0._data().observations
+    }
+
+    pub fn to_psuedo_count(&self, count: usize) -> Self {
+        let prior_data = self.0._data();
+        let mut new_self = Self(T::new_like(&self.0));
+        let new_data = new_self.0._mut_data();
+
+        let new_observations = count.max(1) * prior_data.ranks.len();
+
+        for i in 0..new_data.targets.len() {
+            let closest_rank = ((new_data.targets[i] * (new_observations - 1) as f64) as usize)
+                .clamp(
+                    i,
+                    (new_observations - 1) - (new_data.targets.len() - (i + 1)),
+                );
+
+            new_data.ranks[i] = closest_rank;
+            new_data.values[i] = self.ppf(closest_rank as f64 / (new_observations - 1) as f64)
+        }
+        *new_data.observations = new_observations;
+
+        new_self
+    }
+}
+
+impl QuantileEstimator<VectorQuantileRepresentation> {
+    pub fn new_from_slice(targets: &[f64]) -> Self {
+        Self(VectorQuantileRepresentation::new(targets))
+    }
+}
+
+impl<const N: usize> QuantileEstimator<ArrayQuantileRepresentation<N>> {
+    pub fn new_from_array(targets: &[f64; N]) -> Self {
+        Self(ArrayQuantileRepresentation::<N>::new(targets))
+    }
+}
+
+impl<T: QuantileEstimatorRepresentation + Default> Default for QuantileEstimator<T> {
+    fn default() -> Self {
+        Self(T::default())
+    }
+}
+
+impl<T: QuantileEstimatorRepresentation + Default> QuantileEstimator<T> {
+    fn new() -> Self {
+        Self::default()
+    }
+}
+
+impl<T: QuantileEstimatorRepresentation> AddAssign<f64> for QuantileEstimator<T> {
+    fn add_assign(&mut self, sample: f64) {
+        let data = self.0._mut_data();
+
+        match (*data.observations + 1).cmp(&data.values.len()) {
+            Ordering::Less => {
+                data.values[*data.observations] = sample;
+                *data.observations += 1;
+            }
+            Ordering::Equal => {
+                data.values[*data.observations] = sample;
+                data.values.sort_by(|a, b| a.total_cmp(b));
+                for i in 0..data.ranks.len() {
+                    data.ranks[i] = i;
+                }
+                *data.observations += 1;
+            }
+            Ordering::Greater => {
+                _add_sample_to_estimator(data, sample);
+            }
+        }
+    }
+}
+
+impl<T: QuantileEstimatorRepresentation> AddAssign<&[f64]> for QuantileEstimator<T> {
+    fn add_assign(&mut self, samples: &[f64]) {
+        for &s in samples.iter() {
+            *self += s;
+        }
+    }
+}
+
+impl<T: QuantileEstimatorRepresentation> Add<&QuantileEstimator<T>> for &QuantileEstimator<T> {
+    type Output = QuantileEstimator<T>;
+
+    fn add(self, rhs: &QuantileEstimator<T>) -> Self::Output {
+        match (self.0._is_initialized(), rhs.0._is_initialized()) {
+            (true, true) => {
+                let mut new_quant_est = QuantileEstimator::<T>(T::new_like(&self.0));
+
+                _merge_estimators(self.0._data(), rhs.0._data(), new_quant_est.0._mut_data());
+
+                new_quant_est
+            }
+            (true, false) | (false, false) => {
+                let other_data = rhs.0._data();
+                let mut new_quant_est = self.clone();
+                new_quant_est += &other_data.values[..*other_data.observations];
+                new_quant_est
+            }
+            (false, true) => {
+                let self_data = self.0._data();
+                let mut new_quant_est = rhs.clone();
+                new_quant_est += &self_data.values[..*self_data.observations];
+                new_quant_est
+            }
+        }
+    }
+}
+
+impl<T: QuantileEstimatorRepresentation> Distribution for QuantileEstimator<T> {
+    fn cdf(&self, x: f64) -> f64 {
+        let data = self.0._data();
+        if self.0._is_initialized() {
+            _interpolated_value_prediction(
+                data.values,
+                data.ranks,
+                x,
+                0.0,
+                (*data.observations - 1) as f64,
+                0.0,
+            ) / (*data.observations - 1).max(1) as f64
+        } else {
+            let xs_sorted = data.values[..*data.observations]
+                .iter()
+                .copied()
+                .sorted_by(|a, b| a.total_cmp(b))
+                .collect_vec();
+            let ys = (0..*data.observations).collect_vec();
+            _interpolated_value_prediction(
+                &xs_sorted,
+                &ys,
+                x,
+                0.0,
+                (*data.observations - 1) as f64,
+                0.0,
+            )
+        }
+    }
+
+    fn logcdf(&self, x: f64) -> f64 {
+        self.cdf(x).ln()
+    }
+
+    fn ccdf(&self, x: f64) -> f64 {
+        1.0 - self.cdf(x)
+    }
+
+    fn logccdf(&self, x: f64) -> f64 {
+        (-self.cdf(x)).ln_1p()
+    }
+
+    fn ppf(&self, p: f64) -> f64 {
+        let data = self.0._data();
+        let est_rank = p.clamp(0.0, 1.0) * (*data.observations - 1) as f64;
+
+        let data = self.0._data();
+        let (min_val, max_val) = self.support();
+
+        if self.0._is_initialized() {
+            _interpolated_value_prediction(
+                data.ranks,
+                data.values,
+                est_rank,
+                min_val,
+                max_val,
+                0.0_f64.clamp(min_val, max_val),
+            )
+        } else {
+            let ys_sorted = data.values[..*data.observations]
+                .iter()
+                .copied()
+                .sorted_by(|a, b| a.total_cmp(b))
+                .collect_vec();
+            let xs = (0..*data.observations).collect_vec();
+            _interpolated_value_prediction(
+                &xs,
+                &ys_sorted,
+                est_rank,
+                min_val,
+                max_val,
+                0.0_f64.clamp(min_val, max_val),
+            )
+        }
+    }
+
+    fn pdf(&self, _x: f64) -> f64 {
+        // Will have to calculate derivatives, cache normalization factor (such that area under curve is 1)...
+        // May be worth splitting out into different class to allow pre-processing this stuff...
+        // TODO: Would prefer monotonic quintic splines for this as we can just use the derivative... Allows us to avoid normalization...
+        // Idea: Traditional splines aren't monotonic, but if we make their derivative take the form (p(x))^2 where p(x) is a polynomial
+        // we are guaranteed to have monotonic splines. Final step would be solving for the basis functions.
+        panic!("Currently not supported!");
+    }
+
+    fn logpdf(&self, x: f64) -> f64 {
+        self.pdf(x).ln()
+    }
+
+    fn support(&self) -> (f64, f64) {
+        let data = self.0._data();
+
+        if self.0._is_initialized() {
+            (
+                *data.values.first().unwrap_or(&f64::NEG_INFINITY),
+                *data.values.last().unwrap_or(&f64::INFINITY),
+            )
+        } else {
+            data.values[..*data.observations]
+                .iter()
+                .copied()
+                .minmax()
+                .into_option()
+                .unwrap_or((f64::NEG_INFINITY, f64::INFINITY))
+        }
+    }
+}
+
+pub trait QuantileEstimatorRepresentation: Clone {
+    fn new_like(other: &Self) -> Self;
+    fn _data(&self) -> QuantileEstimatorData<'_>;
+    fn _mut_data(&mut self) -> MutableQuantileEstimatorData<'_>;
+    fn _is_initialized(&self) -> bool {
+        let data = self._data();
+        *data.observations >= data.ranks.len()
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct ArrayQuantileRepresentation<const N: usize> {
+    values: [f64; N],
+    ranks: [usize; N],
+    targets: [f64; N],
+    observations: usize,
+}
+
+impl<const N: usize> ArrayQuantileRepresentation<N> {
+    fn new(targets: &[f64; N]) -> Self {
+        assert!(
+            targets.is_sorted() && targets.first() == Some(&0.0) && targets.last() == Some(&1.0)
+        );
+        Self {
+            values: [0.0; N],
+            ranks: [0; N],
+            targets: targets.clone(),
+            observations: 0,
+        }
+    }
+}
+
+impl<const N: usize> QuantileEstimatorRepresentation for ArrayQuantileRepresentation<N> {
+    fn new_like(other: &Self) -> Self {
+        Self::new(&other.targets)
+    }
+
+    fn _data(&self) -> QuantileEstimatorData<'_> {
+        QuantileEstimatorData {
+            ranks: &self.ranks,
+            values: &self.values,
+            targets: &self.targets,
+            observations: &self.observations,
+        }
+    }
+
+    fn _mut_data(&mut self) -> MutableQuantileEstimatorData<'_> {
+        MutableQuantileEstimatorData {
+            ranks: &mut self.ranks,
+            values: &mut self.values,
+            targets: &self.targets,
+            observations: &mut self.observations,
+        }
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct VectorQuantileRepresentation {
+    values: Vec<f64>,
+    ranks: Vec<usize>,
+    targets: Vec<f64>,
+    observations: usize,
+}
+
+impl VectorQuantileRepresentation {
+    fn new(targets: &[f64]) -> Self {
+        assert!(
+            targets.is_sorted() && targets.first() == Some(&0.0) && targets.last() == Some(&1.0)
+        );
+        Self {
+            values: vec![0.0; targets.len()],
+            ranks: (0..targets.len()).collect_vec(),
+            targets: Vec::from(targets),
+            observations: 0,
+        }
+    }
+}
+
+impl QuantileEstimatorRepresentation for VectorQuantileRepresentation {
+    fn new_like(other: &Self) -> Self {
+        Self::new(&other.targets)
+    }
+
+    fn _data(&self) -> QuantileEstimatorData<'_> {
+        QuantileEstimatorData {
+            ranks: &self.ranks,
+            values: &self.values,
+            targets: &self.targets,
+            observations: &self.observations,
+        }
+    }
+
+    fn _mut_data(&mut self) -> MutableQuantileEstimatorData<'_> {
+        MutableQuantileEstimatorData {
+            ranks: &mut self.ranks,
+            values: &mut self.values,
+            targets: &self.targets,
+            observations: &mut self.observations,
+        }
+    }
+}
+
+pub mod custom_quantile_estimator {
+    use super::*;
+
+    macro_rules! replace_expr {
+        ($_t:tt,$sub:expr) => {
+            $sub
+        };
+    }
+
+    macro_rules! count_exprs {
+        ($($val:expr),+) => {<[()]>::len(&[$(replace_expr!($val,())),+])};
+    }
+
+    macro_rules! implement_fixed_quantile_estimator {
+        ($name:ident, $repr_name:ident, [$($val:expr), +]) => {
+            #[derive(Clone, Debug)]
+            pub struct $repr_name {
+                values: [f64; Self::COUNT],
+                ranks: [usize; Self::COUNT],
+                observations: usize,
+            }
+
+            impl $repr_name {
+                const TARGETS: [f64; count_exprs!($($val),+) + 2] = [0.0, $($val),+, 1.0];
+                const COUNT: usize = Self::TARGETS.len();
+
+                pub fn new() -> Self {
+                    Self {
+                        values: [0.0; Self::COUNT],
+                        ranks: [0; Self::COUNT],
+                        observations: 0
+                    }
+                }
+            }
+
+            impl Default for $repr_name {
+                fn default() -> Self {
+                    Self::new()
+                }
+            }
+
+            impl QuantileEstimatorRepresentation for $repr_name {
+                fn new_like(_other: &Self) -> Self {
+                    Self::default()
+                }
+                fn _data(&self) -> QuantileEstimatorData<'_> {
+                    QuantileEstimatorData {
+                        ranks: &self.ranks,
+                        values: &self.values,
+                        targets: &Self::TARGETS,
+                        observations: &self.observations,
+                    }
+                }
+                fn _mut_data(&mut self) -> MutableQuantileEstimatorData<'_> {
+                    MutableQuantileEstimatorData {
+                        ranks: &mut self.ranks,
+                        values: &mut self.values,
+                        targets: &Self::TARGETS,
+                        observations: &mut self.observations,
+                    }
+                }
+            }
+
+            pub type $name = QuantileEstimator<$repr_name>;
+        };
+    }
+
+    implement_fixed_quantile_estimator!(
+        LomaxQuant,
+        LomaxQuantRepr,
+        [0.18, 0.36, 0.4752, 0.5904, 0.7952]
+    );
+    impl LomaxQuant {
+        pub const PROB1: f64 = 0.36;
+        pub const PROB2: f64 = 0.59;
+    }
+    implement_fixed_quantile_estimator!(MedianEstimator, MedianEstimatorRepr, [0.25, 0.5, 0.75]);
+}
+
+#[cfg(test)]
+mod test {
+    use crate::{
+        p2estimator::QuantileEstimator,
+        statistics::{Distribution, Exponential},
+    };
+    use itertools::Itertools;
+    use rand::{rngs::Xoshiro256PlusPlus, RngExt, SeedableRng};
+
+    pub fn linspace(start: f64, stop: f64, steps: usize) -> impl Iterator<Item = f64> {
+        (0..steps)
+            .map(move |n| n as f64 / (steps as f64 - 1.0))
+            .map(move |n| start * (1.0 - n) + stop * n)
+    }
+
+    fn is_close(a: f64, b: f64) -> bool {
+        let rel_tol = 1e-9;
+        let abs_tol = 0.0;
+        if a == b {
+            true
+        } else {
+            (a - b).abs() <= (rel_tol * (a.abs()).max(b.abs())).max(abs_tol)
+        }
+    }
+
+    #[test]
+    fn quantiles_on_exponential_dist() {
+        let expon = Exponential::new(1.0);
+        let mut estimator =
+            QuantileEstimator::new_from_array(&[0.0, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]);
+
+        let mut rng = Xoshiro256PlusPlus::seed_from_u64(12345654321);
+
+        for _ in 0..10_000 {
+            let sample = expon.ppf(rng.random());
+            estimator += sample;
+        }
+
+        assert!(estimator.samples() == 10_000);
+
+        for val in linspace(0.0, 0.90, 90) {
+            assert!((estimator.ppf(val) - expon.ppf(val)).abs() <= 0.04);
+            let dist_val = expon.ppf(val);
+            assert!((estimator.cdf(dist_val) - expon.cdf(dist_val)).abs() <= 0.04);
+
+            // Basic probability distribution checks...
+            assert!(is_close(
+                estimator.ccdf(dist_val),
+                1.0 - estimator.cdf(dist_val)
+            ));
+
+            assert!(is_close(
+                estimator.logcdf(dist_val),
+                estimator.cdf(dist_val).ln()
+            ));
+            assert!(is_close(
+                estimator.logccdf(dist_val),
+                estimator.ccdf(dist_val).ln()
+            ));
+        }
+    }
+
+    #[test]
+    fn test_quantile_merging() {
+        let expon = Exponential::new(1.0);
+        let mut merged_estimator =
+            QuantileEstimator::new_from_slice(&linspace(0.0, 1.0, 10).collect_vec());
+
+        let mut rng = Xoshiro256PlusPlus::seed_from_u64(12345654321);
+
+        for _ in 0..100 {
+            let targets: Vec<f64> = linspace(0.0, 1.0, rng.random_range(5..15)).collect();
+            let mut estimator = QuantileEstimator::new_from_slice(&targets);
+
+            for _ in 0..100 {
+                estimator += expon.ppf(rng.random());
+            }
+
+            merged_estimator = &merged_estimator + &estimator;
+        }
+
+        assert!(merged_estimator.samples() == 10_000);
+
+        for val in linspace(0.0, 0.75, 75) {
+            assert!((merged_estimator.ppf(val) - expon.ppf(val)).abs() <= 0.1);
+            let dist_val = expon.ppf(val);
+            assert!((merged_estimator.cdf(dist_val) - expon.cdf(dist_val)).abs() <= 0.04)
+        }
+    }
+}
diff --git a/src/pipeline.rs b/src/pipeline.rs
index 64e8d11..bf93a42 100644
--- a/src/pipeline.rs
+++ b/src/pipeline.rs
@@ -5,15 +5,18 @@ use itertools::Itertools;
 use crate::{
     alignment::{AlignmentData, Strand},
     annotation::{AmbiguousAnnotation, SimpleAnnotation},
+    assembly::gather_join_statistics,
     chunks::ProximityGroup,
     confidence::confidence,
     history_tracing::{
         backtrace_histories, history_viterbi_on_segments, History, RefinedTraceSegment,
     },
+    join_estimation::{JoinEstimator, JoinStatisticsCollector},
     matrix::{Matrix, MatrixDef},
     score_params::{approximate_ideal_skip_state_score, ScoreParams},
     segments::{assemble_and_link_segments, segments_from_matrix_trace, InitialSegments},
     support::windowed_confidence,
+    trace_statistics::TraceStatistics,
     viterbi::{trace_segments, traceback, viterbi_collapsed, TraceSegment},
     viz::{
         debug::{dump_debug_history_info, dump_final_trace_statistics},
@@ -59,7 +62,7 @@ pub fn to_annotations(
                                         a.row_idx - proximity_group.alignments.len() - 1;
                                     let repeat = &proximity_group.tandem_repeats[tandem_repeat_idx];
                                     format!(
-                                        "({}:{})#tandem repeat",
+                                        "({}:{})#tandem-repeat",
                                         repeat.period, repeat.consensus_pattern,
                                     )
                                 }
@@ -139,22 +142,25 @@ fn get_active_columns<T: Copy + Default + Display>(matrix: &Matrix<T>) -> Vec<(u
     active_cols
 }
 
-pub struct NaiveTraceResults {
+pub struct NaiveTraceResults<T: JoinStatisticsCollector> {
+    pub target_start: usize,
+    pub target_end: usize,
     pub trace_segments: Vec<TraceSegment>,
     pub segments: InitialSegments,
     pub score_params: ScoreParams,
     pub alignment_confidences: Vec<f64>,
     pub active_columns: Vec<(usize, usize)>,
+    pub query_join_statistics: Vec<(usize, T)>,
     pub viz_writer: AdjudicationSodaWriter,
     pub region_index: usize,
 }
 
-pub fn run_naive_trace(
+pub fn run_naive_trace<T: JoinStatisticsCollector>(
     proximity_group: &ProximityGroup,
     alignment_data: &AlignmentData,
     region_idx: usize,
     args: &AuroraArgs,
-) -> NaiveTraceResults {
+) -> NaiveTraceResults<T> {
     let annot_args = &args.annotation_args;
 
     let score_params = ScoreParams::new(
@@ -199,7 +205,10 @@ pub fn run_naive_trace(
     .unwrap();
 
     confidence(&mut confidence_matrix);
-    let confidence_by_row = windowed_confidence(&mut confidence_matrix);
+    let confidence_by_row = windowed_confidence(
+        &mut confidence_matrix,
+        args.annotation_args.score_window_size,
+    );
 
     let segments;
     let simple_trace;
@@ -246,21 +255,32 @@ pub fn run_naive_trace(
             .expect("Unable to write confidences!!!");
     }
 
+    let query_join_statistics = gather_join_statistics(
+        proximity_group,
+        &segments,
+        &alignment_data.query_lengths,
+        &args.annotation_args,
+    );
+
     NaiveTraceResults {
+        target_start: proximity_group.target_start,
+        target_end: proximity_group.target_end,
         trace_segments: simple_trace,
         segments,
         score_params,
         alignment_confidences: confidence_by_row,
         active_columns: get_active_columns(&confidence_matrix),
+        query_join_statistics,
         viz_writer,
         region_index: region_idx,
     }
 }
 
-pub fn run_history_trace(
+pub fn run_history_trace<T: JoinEstimator, S: JoinStatisticsCollector>(
     proximity_group: &ProximityGroup,
     alignment_data: &AlignmentData,
-    naive_trace: &mut NaiveTraceResults,
+    trace_statistics: &TraceStatistics<T>,
+    naive_trace: &mut NaiveTraceResults<S>,
     args: &AuroraArgs,
 ) -> Vec<AmbiguousAnnotation> {
     let vis_args = &args.visualization_args;
@@ -269,8 +289,11 @@ pub fn run_history_trace(
         proximity_group,
         &mut naive_trace.segments,
         &naive_trace.trace_segments,
+        &trace_statistics.region_statistics[naive_trace.region_index],
+        &trace_statistics.query_statistics,
         &naive_trace.score_params,
         &args.annotation_args,
+        &alignment_data.query_lengths,
     );
 
     let history = history_viterbi_on_segments(
diff --git a/src/score_params.rs b/src/score_params.rs
index 783d0b6..868a948 100644
--- a/src/score_params.rs
+++ b/src/score_params.rs
@@ -69,4 +69,18 @@ impl ScoreParams {
             prior_is_different,
         )
     }
+
+    pub fn join_transition(&self, prior_is_skip: bool, join_probability: f64) -> f64 {
+        let true_join_cost = fast_select(
+            self.query_to_skip_score + (self.query_loop_score - self.query_jump_score).abs(),
+            self.query_loop_score,
+            prior_is_skip,
+        );
+        let false_join_cost = fast_select(
+            self.query_to_skip_score,
+            self.query_jump_score,
+            prior_is_skip,
+        );
+        true_join_cost * join_probability + (1.0 - join_probability) * false_join_cost
+    }
 }
diff --git a/src/segments.rs b/src/segments.rs
index f0914f1..118e13a 100644
--- a/src/segments.rs
+++ b/src/segments.rs
@@ -1,9 +1,17 @@
 use core::f64;
-use std::{cmp::Ordering, fmt::Debug, iter::Fuse};
+use std::{cmp::Ordering, collections::HashMap, fmt::Debug, iter::Fuse};
 
 use crate::{
-    assembly::SegmentAssemblyGraph, chunks::ProximityGroup, matrix::Matrix,
-    score_params::ScoreParams, viterbi::TraceSegment, AnnotationArgs,
+    alignment::{Alignment, Strand},
+    annotation::AmbiguousAnnotation,
+    assembly::SegmentAssemblyGraph,
+    chunks::ProximityGroup,
+    join_estimation::JoinEstimator,
+    matrix::Matrix,
+    score_params::ScoreParams,
+    trace_statistics::{QueryStatistics, RegionStatistics},
+    viterbi::TraceSegment,
+    AnnotationArgs,
 };
 use itertools::Itertools;
 
@@ -53,6 +61,7 @@ pub enum BlockType {
 pub struct Block {
     pub row_idx: usize,
     pub block_type: BlockType,
+    pub strand: Strand,
     pub query_id: Option<usize>,
     pub col_start: usize,
     pub col_end: usize,
@@ -60,6 +69,7 @@ pub struct Block {
     pub query_end: usize,
     pub avg_confidence: f64,
     pub alignment_score: f64,
+    pub kimura80: f64,
     pub can_join_up_to: usize,
 }
 
@@ -90,6 +100,52 @@ impl Block {
     pub fn to_comparable(&self) -> (Option<usize>, usize) {
         (self.query_id, self.row_idx)
     }
+
+    pub fn from_alignment(
+        alignment: &Alignment,
+        group_start: usize,
+        row: usize,
+        confidence: f64,
+        score: f64,
+    ) -> Self {
+        Self {
+            row_idx: row,
+            block_type: BlockType::Alignment,
+            strand: alignment.strand,
+            query_id: Some(alignment.query_id),
+            col_start: alignment.target_start.saturating_sub(group_start),
+            col_end: alignment.target_end.saturating_sub(group_start),
+            query_start: alignment.query_start,
+            query_end: alignment.query_end,
+            avg_confidence: confidence,
+            alignment_score: score,
+            kimura80: alignment.kimura80(alignment.query_start, alignment.query_end),
+            can_join_up_to: 0,
+        }
+    }
+
+    pub fn from_annotation(
+        annotation: &AmbiguousAnnotation,
+        selected_index: usize,
+        row: usize,
+    ) -> Self {
+        let simple_annot = &annotation.annotations[selected_index];
+
+        Self {
+            row_idx: row,
+            block_type: BlockType::Alignment,
+            strand: simple_annot.strand,
+            query_id: Some(simple_annot.query_id),
+            col_start: simple_annot.target_start,
+            col_end: simple_annot.target_end,
+            query_start: simple_annot.query_start,
+            query_end: simple_annot.query_end,
+            avg_confidence: annotation.confidence,
+            alignment_score: annotation.confidence.ln(),
+            kimura80: simple_annot.kimura80,
+            can_join_up_to: 0,
+        }
+    }
 }
 
 #[derive(Debug)]
@@ -102,6 +158,7 @@ pub struct Segment {
 }
 
 pub type SegmentedMatrix = Vec<Segment>;
+pub type SegmentedMatrixView<'a> = &'a [Segment];
 
 #[derive(Copy, Clone, Debug)]
 enum MergeEntry<T> {
@@ -172,6 +229,7 @@ pub struct MergeIterator<
     val1: MergeEntry<I::Item>,
     val2: MergeEntry<I::Item>,
     prior_val: MergeEntry<I::Item>,
+    only_unique: bool,
     comparator: F,
 }
 
@@ -180,13 +238,14 @@ impl<I: Iterator, J: Iterator<Item = I::Item>, F: Fn(&I::Item, &I::Item) -> Orde
 where
     I::Item: Copy,
 {
-    pub fn new(iter1: I, iter2: J, comparator: F) -> Self {
+    pub fn new(iter1: I, iter2: J, comparator: F, only_unique: bool) -> Self {
         Self {
             iter1: iter1.fuse(),
             iter2: iter2.fuse(),
             val1: MergeEntry::Start,
             val2: MergeEntry::Start,
             prior_val: MergeEntry::Start,
+            only_unique,
             comparator,
         }
     }
@@ -204,21 +263,14 @@ pub struct InitialSegments {
     initial_trace_scores: Vec<f64>,
 }
 
-#[allow(dead_code)]
-pub struct SegmentView<'a> {
-    pub start_col: usize,
-    pub end_col: usize,
-    pub blocks: &'a [Block],
-}
-
 #[allow(dead_code)]
 impl InitialSegments {
-    pub fn iter_segments(&self) -> impl Iterator<Item = SegmentView> {
-        self.segments.iter().map(|v| SegmentView {
-            start_col: v.start_col,
-            end_col: v.end_col,
-            blocks: &v.blocks,
-        })
+    pub fn view_segments(&self) -> SegmentedMatrixView<'_> {
+        return &self.segments;
+    }
+
+    pub fn len(&self) -> usize {
+        self.segments.len()
     }
 }
 
@@ -249,6 +301,10 @@ where
                 next_val = self.val2;
                 self.val2 = self.iter2.next().into();
             }
+
+            if !self.only_unique {
+                break;
+            }
         }
 
         self.prior_val = next_val;
@@ -275,7 +331,7 @@ pub fn unique_merging_iterator<I: Iterator, J: Iterator<Item = I::Item>>(
 where
     I::Item: Copy + Ord,
 {
-    MergeIterator::new(list1, list2, |a, b| a.cmp(b))
+    MergeIterator::new(list1, list2, |a, b| a.cmp(b), true)
 }
 
 #[derive(Debug)]
@@ -441,7 +497,13 @@ pub fn segments_from_matrix_trace(
             .collect_vec();
 
         // Compute scores and start/end points for all rows in this segment....
-        // TODO: This isn't fully correct, if we want it to be we need to track if this segment starts in, and if the segment ends in a skip state to compute correct transitions for history tracing...
+        // TODO: This isn't fully correct,
+        // if we want it to be identical to initial viterbi trace,
+        // we need to track if each block in this segment starts in the skip state,
+        // and if each block in each segment ends in a skip state to compute correct
+        // transitions for history tracing...
+        // This would require a few additional booleans/states for each block and adjustments
+        // to the history tracing to incorperate them...
         for column in seg.col_start..=seg.col_end {
             let rows = &matrix_definition.active_rows_by_col[column];
             let row_iter = rows
@@ -524,17 +586,34 @@ pub fn segments_from_matrix_trace(
                         _ => None,
                     };
 
+                    let query_start = confidence_matrix.consensus_position(row_idx, start);
+                    let query_end = confidence_matrix.consensus_position(row_idx, end);
+
+                    let kimura80 = match block_type {
+                        BlockType::Alignment => {
+                            group.alignments[row_idx - 1].kimura80(query_start, query_end)
+                        }
+                        _ => 0.0,
+                    };
+
+                    let strand = match block_type {
+                        BlockType::Alignment => group.alignments[row_idx - 1].strand,
+                        _ => Strand::Forward,
+                    };
+
                     Block {
                         row_idx,
                         block_type,
+                        strand,
                         query_id,
                         col_start: start,
                         col_end: end,
-                        query_start: confidence_matrix.consensus_position(row_idx, start),
-                        query_end: confidence_matrix.consensus_position(row_idx, end),
+                        query_start,
+                        query_end,
                         avg_confidence: row_conf_sum[row_idx]
                             / (row_valid_cell_count[row_idx].max(1) as f64),
                         alignment_score: row_scores[row_idx],
+                        kimura80,
                         can_join_up_to: s_idx,
                     }
                 })
@@ -552,17 +631,22 @@ pub fn segments_from_matrix_trace(
     }
 }
 
-pub fn assemble_and_link_segments<'a>(
+pub fn assemble_and_link_segments<'a, T: JoinEstimator>(
     proximity_group: &ProximityGroup,
     initial_segments: &'a mut InitialSegments,
     trace_segments: &[TraceSegment],
+    region_statistics: &RegionStatistics,
+    query_statistics: &[QueryStatistics<T>],
     score_params: &ScoreParams,
     annotation_args: &AnnotationArgs,
+    query_lengths: &HashMap<usize, usize>,
 ) -> (&'a SegmentedMatrix, SegmentAssemblyGraph) {
     let assembly_graph = SegmentAssemblyGraph::new(
         proximity_group.alignments,
+        query_lengths,
         &initial_segments.segments,
-        score_params,
+        region_statistics,
+        query_statistics,
         annotation_args,
     );
     finalize_segments(
diff --git a/src/statistics.rs b/src/statistics.rs
new file mode 100644
index 0000000..014aac1
--- /dev/null
+++ b/src/statistics.rs
@@ -0,0 +1,462 @@
+use core::f64;
+use puruspe::{beta, betai, invbetai};
+use std::fmt::Debug;
+
+pub fn ln_add_exp(a: f64, b: f64) -> f64 {
+    let max = a.max(b);
+    let min = a.min(b);
+    // TODO: Possibly use more stable ln_1p_exp at https://github.com/JuliaStats/LogExpFunctions.jl/files/8218470/log1pexp.pdf (Implemented at https://github.com/JuliaStats/LogExpFunctions.jl/blob/master/src/basicfuns.jl#L263)
+    max + (min - max).exp().ln_1p()
+}
+
+#[allow(dead_code)]
+pub trait Distribution {
+    fn pdf(&self, x: f64) -> f64;
+    fn cdf(&self, x: f64) -> f64;
+    fn ppf(&self, p: f64) -> f64;
+    fn support(&self) -> (f64, f64);
+    fn ccdf(&self, x: f64) -> f64;
+    fn logpdf(&self, x: f64) -> f64;
+    fn logcdf(&self, x: f64) -> f64;
+    fn logccdf(&self, x: f64) -> f64;
+}
+
+#[allow(dead_code)]
+pub trait ParameterizedDistribution: Distribution + Debug + Default + Clone {
+    fn unit() -> Self {
+        Self::default()
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct Exponential {
+    lambda: f64,
+}
+
+impl ParameterizedDistribution for Exponential {}
+
+impl Exponential {
+    pub fn new(lambda: f64) -> Self {
+        Self { lambda }
+    }
+
+    pub fn from_scale(beta: f64) -> Self {
+        Self::new(1.0 / beta)
+    }
+}
+
+impl Default for Exponential {
+    fn default() -> Self {
+        Self::new(1.0)
+    }
+}
+
+impl Distribution for Exponential {
+    fn pdf(&self, x: f64) -> f64 {
+        self.lambda * (-self.lambda * x).exp()
+    }
+
+    fn cdf(&self, x: f64) -> f64 {
+        1.0 - (-self.lambda * x).exp()
+    }
+
+    fn ppf(&self, p: f64) -> f64 {
+        -(1.0 - p).ln() / self.lambda
+    }
+
+    fn ccdf(&self, x: f64) -> f64 {
+        (-self.lambda * x).exp()
+    }
+
+    fn logpdf(&self, x: f64) -> f64 {
+        self.lambda.ln() - self.lambda * x
+    }
+
+    fn logcdf(&self, x: f64) -> f64 {
+        (-(-self.lambda * x).exp()).ln_1p()
+    }
+
+    fn logccdf(&self, x: f64) -> f64 {
+        -self.lambda * x
+    }
+
+    fn support(&self) -> (f64, f64) {
+        (0.0, f64::INFINITY)
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct ExponentialEstimator {
+    sample_mean: f64,
+    degrees_of_freedom: usize,
+}
+
+impl ParameterizedDistribution for ExponentialEstimator {}
+
+impl ExponentialEstimator {
+    pub fn new(sample_mean: f64, sample_size: usize) -> Self {
+        Self {
+            sample_mean: sample_mean,
+            degrees_of_freedom: sample_size,
+        }
+    }
+}
+
+impl From<ExponentialEstimator> for Exponential {
+    fn from(value: ExponentialEstimator) -> Self {
+        Self::from_scale(value.sample_mean)
+    }
+}
+
+impl Default for ExponentialEstimator {
+    fn default() -> Self {
+        Self {
+            sample_mean: 1.0,
+            degrees_of_freedom: 1,
+        }
+    }
+}
+
+impl Distribution for ExponentialEstimator {
+    fn logpdf(&self, x: f64) -> f64 {
+        let n = self.degrees_of_freedom as f64;
+        let sm = self.sample_mean;
+        ((n + 1.0) * n.ln() + n * sm.ln()) - ((n + 1.0) * (n * sm + x).ln())
+    }
+
+    fn pdf(&self, x: f64) -> f64 {
+        self.logpdf(x).exp()
+    }
+
+    fn logccdf(&self, x: f64) -> f64 {
+        let n = self.degrees_of_freedom as f64;
+        let sm = self.sample_mean;
+        n * ((n * sm).ln() - (n * sm + x).ln())
+    }
+
+    fn logcdf(&self, x: f64) -> f64 {
+        (-self.logccdf(x).exp()).ln_1p()
+    }
+
+    fn cdf(&self, x: f64) -> f64 {
+        -(self.logccdf(x).exp_m1())
+    }
+
+    fn ccdf(&self, x: f64) -> f64 {
+        self.logccdf(x).exp()
+    }
+
+    fn ppf(&self, p: f64) -> f64 {
+        let n = self.degrees_of_freedom as f64;
+        let sm = self.sample_mean;
+        (n * sm) * ((1.0 - p).powf(-1.0 / n) - 1.0)
+    }
+
+    fn support(&self) -> (f64, f64) {
+        (0.0, f64::INFINITY)
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct HalfT {
+    standard_deviation: f64,
+    degrees_of_freedom: usize,
+}
+
+impl ParameterizedDistribution for HalfT {}
+
+impl HalfT {
+    #[allow(dead_code)]
+    pub fn new(standard_deviation: f64, degrees_of_freedom: usize) -> Self {
+        Self {
+            standard_deviation,
+            degrees_of_freedom,
+        }
+    }
+
+    pub fn from_sample_mean(mean: f64, degrees_of_freedom: usize) -> Self {
+        Self {
+            standard_deviation: mean * (2.0 / f64::consts::PI).sqrt(),
+            degrees_of_freedom,
+        }
+    }
+}
+
+impl Default for HalfT {
+    fn default() -> Self {
+        Self {
+            standard_deviation: 1.0,
+            degrees_of_freedom: 1,
+        }
+    }
+}
+
+impl Distribution for HalfT {
+    fn logpdf(&self, x: f64) -> f64 {
+        let v = self.degrees_of_freedom as f64;
+        let s = self.standard_deviation;
+        let z = x / s;
+        if z >= 0.0 {
+            let norm = (2.0_f64).ln() - (0.5 * v.ln() + beta(0.5, 0.5 * v).ln() + s.ln());
+            norm - 0.5 * (v + 1.0) * ((z * z) / v).ln_1p()
+        } else {
+            0.0
+        }
+    }
+
+    fn pdf(&self, x: f64) -> f64 {
+        self.logpdf(x).exp()
+    }
+
+    fn cdf(&self, x: f64) -> f64 {
+        let v = self.degrees_of_freedom as f64;
+        let z = x / self.standard_deviation;
+        if z >= 0.0 {
+            1.0 - betai(0.5 * v, 0.5, v / (z * z + v))
+        } else {
+            0.0
+        }
+    }
+
+    fn logcdf(&self, x: f64) -> f64 {
+        self.cdf(x).ln()
+    }
+
+    fn ppf(&self, p: f64) -> f64 {
+        let v = self.degrees_of_freedom as f64;
+        let inv_out = invbetai(1.0 - p, 0.5 * v, 0.5);
+        let x_unit = (v / inv_out - v).sqrt();
+        x_unit * self.standard_deviation
+    }
+
+    fn ccdf(&self, x: f64) -> f64 {
+        1.0 - self.cdf(x)
+    }
+
+    fn logccdf(&self, x: f64) -> f64 {
+        self.ccdf(x).ln()
+    }
+
+    fn support(&self) -> (f64, f64) {
+        (0.0, f64::INFINITY)
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct Lomax {
+    alpha: f64,
+    lambda: f64,
+}
+
+impl Lomax {
+    pub fn new(alpha: f64, lambda: f64) -> Self {
+        Self { alpha, lambda }
+    }
+}
+
+impl ParameterizedDistribution for Lomax {}
+
+impl Default for Lomax {
+    fn default() -> Self {
+        Self::new(1.0, 1.0)
+    }
+}
+
+impl Distribution for Lomax {
+    fn logpdf(&self, x: f64) -> f64 {
+        let a = self.alpha;
+        let y = self.lambda;
+        (a / y).ln() - (a + 1.0) * (1.0 + x / y).ln()
+    }
+
+    fn pdf(&self, x: f64) -> f64 {
+        self.logpdf(x).exp()
+    }
+
+    fn logccdf(&self, x: f64) -> f64 {
+        let a = self.alpha;
+        let y = self.lambda;
+        -a * (1.0 + x / y).ln()
+    }
+
+    fn ccdf(&self, x: f64) -> f64 {
+        self.logccdf(x).exp()
+    }
+
+    fn cdf(&self, x: f64) -> f64 {
+        -(self.logccdf(x).exp_m1())
+    }
+
+    fn logcdf(&self, x: f64) -> f64 {
+        (-self.ccdf(x)).ln_1p()
+    }
+
+    fn ppf(&self, p: f64) -> f64 {
+        let a = self.alpha;
+        let y = self.lambda;
+        y * ((1.0 - p).powf(-1.0 / a) - 1.0)
+    }
+
+    fn support(&self) -> (f64, f64) {
+        (0.0, f64::INFINITY)
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct AssymetricLaplace {
+    mode: f64,
+    scale: f64,
+    mode_quantile: f64,
+}
+
+impl ParameterizedDistribution for AssymetricLaplace {}
+
+impl AssymetricLaplace {
+    pub fn new(mode: f64, scale: f64, mode_quantile: f64) -> Self {
+        Self {
+            mode,
+            scale,
+            mode_quantile,
+        }
+    }
+
+    pub fn from_exponential_halves(mode: f64, negative_mean: f64, positive_mean: f64) -> Self {
+        let nm = negative_mean.max(1e-8);
+        let pm = positive_mean.max(1e-8);
+        Self::new(mode, (nm * pm) / (nm + pm), 1.0 / (pm / nm + 1.0))
+    }
+
+    pub fn symmetric_from_moments(mean: f64, standard_deviation: f64) -> Self {
+        Self::new(mean, standard_deviation / (8.0_f64.sqrt()), 0.5)
+    }
+}
+
+impl Default for AssymetricLaplace {
+    fn default() -> Self {
+        Self::symmetric_from_moments(0.0, 1.0)
+    }
+}
+
+impl Distribution for AssymetricLaplace {
+    fn logpdf(&self, x: f64) -> f64 {
+        let m = self.mode;
+        let l = self.scale;
+        let p = self.mode_quantile;
+        let exp_comp = if x <= m {
+            ((1.0 - p) / l) * (x - m)
+        } else {
+            -(p / l) * (x - m)
+        };
+
+        ((p * (1.0 - p)) / l).ln() + exp_comp
+    }
+
+    fn pdf(&self, x: f64) -> f64 {
+        self.logpdf(x).exp()
+    }
+
+    fn cdf(&self, x: f64) -> f64 {
+        let m = self.mode;
+        let l = self.scale;
+        let p = self.mode_quantile;
+        if x <= m {
+            p * (((1.0 - p) / l) * (x - m)).exp()
+        } else {
+            1.0 - (1.0 - p) * (-(p / l) * (x - m)).exp()
+        }
+    }
+
+    fn logcdf(&self, x: f64) -> f64 {
+        self.cdf(x).ln()
+    }
+
+    fn ccdf(&self, x: f64) -> f64 {
+        1.0 - self.cdf(x)
+    }
+
+    fn logccdf(&self, x: f64) -> f64 {
+        self.ccdf(x).ln()
+    }
+
+    fn ppf(&self, p: f64) -> f64 {
+        let m = self.mode;
+        let l = self.scale;
+        let pm = self.mode_quantile;
+        if p <= pm {
+            m + (l / (1.0 - pm)) * (p / pm).ln()
+        } else {
+            m - (l / pm) * ((1.0 - p) / (1.0 - pm)).ln()
+        }
+    }
+
+    fn support(&self) -> (f64, f64) {
+        (f64::NEG_INFINITY, f64::INFINITY)
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use std::fmt::Debug;
+
+    pub fn linspace(start: f64, stop: f64, steps: usize) -> impl Iterator<Item = f64> {
+        (0..steps)
+            .map(move |n| n as f64 / (steps as f64 - 1.0))
+            .map(move |n| start * (1.0 - n) + stop * n)
+    }
+
+    // Add debug trait to allow for printout...
+    pub trait TestDistribution: Distribution + Debug {}
+    impl<T: Distribution + Debug> TestDistribution for T {}
+
+    fn as_box<T: TestDistribution + 'static>(d: T) -> Box<dyn TestDistribution> {
+        Box::new(d)
+    }
+
+    use super::{Exponential, ParameterizedDistribution};
+
+    fn get_dists() -> [Box<dyn TestDistribution>; 5] {
+        [
+            as_box(Exponential::unit()),
+            as_box(ExponentialEstimator::unit()),
+            as_box(HalfT::unit()),
+            as_box(AssymetricLaplace::unit()),
+            as_box(Lomax::unit()),
+        ]
+    }
+
+    fn is_close(a: f64, b: f64) -> bool {
+        let rel_tol = 1e-9;
+        let abs_tol = 0.0;
+        (a - b).abs() <= (rel_tol * (a.abs()).max(b.abs())).max(abs_tol)
+    }
+
+    #[test]
+    fn basic_distribution_propery_checks() {
+        for dist in get_dists() {
+            println!("Testing distribution: {:?}", dist);
+            let (mut low, mut high) = dist.support();
+
+            assert!(dist.cdf(low) == 0.0);
+            assert!(dist.cdf(high) == 1.0);
+
+            if high == f64::INFINITY {
+                high = 5.0;
+            }
+            if low == f64::NEG_INFINITY {
+                low = -5.0;
+            }
+
+            for x in linspace(low, high, 100) {
+                // Basic properties...
+                // println!("{x} -> {} vs {}", dist.tpdf(x), dist.tlogpdf(x).exp());
+                assert!(is_close(dist.pdf(x), dist.logpdf(x).exp()));
+                assert!(is_close(dist.cdf(x), dist.logcdf(x).exp()));
+                assert!(is_close(dist.ccdf(x), dist.logccdf(x).exp()));
+                assert!(is_close(dist.ccdf(x), 1.0 - dist.cdf(x)));
+                // println!("{x} -> {}", dist.tppf(dist.tcdf(x)));
+                assert!(is_close(dist.ppf(dist.cdf(x)), x));
+            }
+        }
+    }
+}
diff --git a/src/substitution_matrix.rs b/src/substitution_matrix.rs
index 6364857..901a7d1 100644
--- a/src/substitution_matrix.rs
+++ b/src/substitution_matrix.rs
@@ -1,14 +1,14 @@
-use std::{
-    fs::File,
-    io::{BufRead, BufReader, Read},
-    path::Path,
-};
+use std::io::{BufRead, BufReader, Read};
+
+use anyhow::{anyhow, Context};
+use itertools::Itertools;
 
 use crate::alphabet::{
     ALIGNMENT_ALPHABET_STR, GAP_EXTEND_DIGITAL, GAP_OPEN_DIGITAL, STR_TO_DIGITAL_NUCLEOTIDE,
 };
 
 pub trait AlignmentScore {
+    #[allow(dead_code)]
     fn score(&self, target_char: u8, query_char: u8) -> f64;
     fn score_with_background(&self, target_char: u8, query_char: u8, frequencies: &[f64; 4])
         -> f64;
@@ -193,13 +193,7 @@ impl SubstitutionMatrix {
         }
     }
 
-    #[allow(dead_code)]
-    pub fn from_file(path: impl AsRef<Path>) -> Vec<SubstitutionMatrix> {
-        let file = File::open(path).expect("failed to open matrix file");
-        SubstitutionMatrix::parse(file)
-    }
-
-    pub fn parse<R: Read>(matrix_buf: R) -> Vec<SubstitutionMatrix> {
+    pub fn parse<R: Read>(matrix_buf: R) -> anyhow::Result<Vec<SubstitutionMatrix>> {
         let mut matrices = vec![];
 
         let buf_reader = BufReader::new(matrix_buf);
@@ -207,9 +201,8 @@ impl SubstitutionMatrix {
 
         let mut lines: Vec<String> = buf_reader
             .lines()
-            .map(|l| l.expect("failed to read line"))
-            .filter(|l| !l.is_empty())
-            .collect();
+            .filter_ok(|l| !l.is_empty())
+            .try_collect()?;
 
         // add a single blank line to the
         // end to serve as a sentinel
@@ -231,26 +224,38 @@ impl SubstitutionMatrix {
         line_tokens
             .iter()
             .zip(line_tokens.iter().skip(1))
-            .for_each(|(tokens, next_tokens)| {
+            .enumerate()
+            .try_for_each(|(line_num, (tokens, next_tokens))| {
+                let error_msg = |msg| move || format!("{} at line {}", msg, line_num);
+
                 match state {
                     ParserState::Header => match tokens.first() {
                         Some(&"#matrix") => {
                             name = tokens[1].to_string();
                         }
                         Some(&"#gap-open") => {
-                            gap_open = tokens[1].parse::<f64>().expect("failed to parse float");
+                            gap_open = tokens[1]
+                                .parse::<f64>()
+                                .with_context(error_msg("failed to parse int"))?;
                         }
                         Some(&"#gap-ext") => {
-                            gap_extend = tokens[1].parse::<f64>().expect("failed to parse float");
+                            gap_extend = tokens[1]
+                                .parse::<f64>()
+                                .with_context(error_msg("failed to parse float"))?;
                         }
                         Some(&"#lambda") => {
-                            lambda = tokens[1].parse::<f64>().expect("failed to parse float");
+                            lambda = tokens[1]
+                                .parse::<f64>()
+                                .with_context(error_msg("failed to parse float"))?;
                         }
                         Some(&"#fi") => {
                             let freqs: Vec<f64> = tokens[1..=4]
                                 .iter()
-                                .map(|&f| f.parse::<f64>().expect("failed to parse float"))
-                                .collect();
+                                .map(|&f| {
+                                    f.parse::<f64>()
+                                        .with_context(error_msg("failed to parse float"))
+                                })
+                                .try_collect()?;
                             background_freqs_i
                                 .iter_mut()
                                 .zip(freqs)
@@ -259,14 +264,17 @@ impl SubstitutionMatrix {
                         Some(&"#fj") => {
                             let freqs: Vec<f64> = tokens[1..=4]
                                 .iter()
-                                .map(|&f| f.parse::<f64>().expect("failed to parse float"))
-                                .collect();
+                                .map(|&f| {
+                                    f.parse::<f64>()
+                                        .with_context(error_msg("failed to parse float"))
+                                })
+                                .try_collect()?;
                             background_freqs_j
                                 .iter_mut()
                                 .zip(freqs)
                                 .for_each(|(a, b)| *a = b);
                         }
-                        _ => panic!(),
+                        _ => return Err(anyhow!(error_msg("Unknown matrices file syntax!")())),
                     },
                     ParserState::Chars => {
                         chars = tokens.iter().map(|t| t.to_string()).collect();
@@ -274,8 +282,11 @@ impl SubstitutionMatrix {
                     ParserState::Scores => scores_vec.push(
                         tokens
                             .iter()
-                            .map(|t| t.parse::<f64>().expect("failed to parse float"))
-                            .collect(),
+                            .map(|t| {
+                                t.parse::<f64>()
+                                    .with_context(error_msg("failed to parse float"))
+                            })
+                            .try_collect()?,
                     ),
                 }
 
@@ -287,9 +298,12 @@ impl SubstitutionMatrix {
                     let char_indices: Vec<usize> = chars
                         .iter()
                         .map(|c| {
-                            (*STR_TO_DIGITAL_NUCLEOTIDE.get(c).expect("invalid char")) as usize
+                            STR_TO_DIGITAL_NUCLEOTIDE
+                                .get(c)
+                                .ok_or_else(|| anyhow!(error_msg("invalid char")()))
+                                .map(|v| *v as usize)
                         })
-                        .collect();
+                        .try_collect()?;
 
                     scores_vec
                         .iter()
@@ -320,26 +334,30 @@ impl SubstitutionMatrix {
                         scores,
                     ));
                     scores_vec = vec![];
+
+                    Ok::<_, anyhow::Error>(())
                 };
 
                 state = match next_tokens.first() {
                     Some(token) if token.starts_with('#') => {
                         if let ParserState::Scores = state {
-                            add_matrix()
+                            add_matrix()?;
                         }
                         ParserState::Header
                     }
                     Some(token) if token.parse::<f64>().is_err() => ParserState::Chars,
                     Some(token) if token.parse::<f64>().is_ok() => ParserState::Scores,
                     None => {
-                        add_matrix();
-                        return;
+                        add_matrix()?;
+                        return Ok(());
                     }
-                    _ => panic!(),
+                    _ => return Err(anyhow!(error_msg("Unknown syntax in matrices file!")())),
                 };
-            });
 
-        matrices
+                Ok(())
+            })?;
+
+        Ok(matrices)
     }
 }
 
@@ -431,7 +449,7 @@ mod tests {
             [0.0220, 0.1331, 0.0368, 0.9131],
         ];
 
-        let matrix_vec = SubstitutionMatrix::parse(matrix_buf);
+        let matrix_vec = SubstitutionMatrix::parse(matrix_buf)?;
         let matrix = matrix_vec.first().unwrap();
 
         matrix
@@ -476,7 +494,7 @@ mod tests {
             -30 -30 -30 -30 -30 -30 -30 -30 -30 -30 -30 -30"
             .as_bytes();
 
-        let matrix_vec = SubstitutionMatrix::parse(matrix_buf);
+        let matrix_vec = SubstitutionMatrix::parse(matrix_buf)?;
         let matrix = matrix_vec.first().unwrap();
 
         let correct: [[f64; 12]; 12] = [
@@ -567,7 +585,7 @@ mod tests {
             -30 -30 -30 -30 -30 -30 -30 -30 -30 -30 -30 -30"
             .as_bytes();
 
-        let matrix_vec = SubstitutionMatrix::parse(matrix_buf);
+        let matrix_vec = SubstitutionMatrix::parse(matrix_buf)?;
         let matrix = matrix_vec.first().unwrap();
 
         let mut ali: Vec<Alignment> = [
diff --git a/src/support.rs b/src/support.rs
index 4250363..1a6f9c0 100644
--- a/src/support.rs
+++ b/src/support.rs
@@ -2,10 +2,8 @@ use crate::matrix::Matrix;
 
 /// Smooth the values of a confidence matrix by convolving it with a fixed size rectangular kernel that adds to 1.
 /// This is the same as averaging values over the window range.
-/// The current kernel size is hardcoded to 31.
-pub fn windowed_confidence(matrix: &mut Matrix<f64>) -> Vec<f64> {
-    // TODO: parameterize this
-    let half_window_size = 15usize;
+pub fn windowed_confidence(matrix: &mut Matrix<f64>, window_size: usize) -> Vec<f64> {
+    let half_window_size = (window_size - 1) / 2;
 
     let mut confidence_by_row = vec![0.0; matrix.num_rows()];
 
diff --git a/src/trace_statistics.rs b/src/trace_statistics.rs
new file mode 100644
index 0000000..f71fa39
--- /dev/null
+++ b/src/trace_statistics.rs
@@ -0,0 +1,174 @@
+use std::fmt::Debug;
+
+use itertools::izip;
+
+use crate::{
+    alignment::AlignmentData,
+    join_estimation::{JoinEstimator, JoinStatisticsCollector},
+    pipeline::NaiveTraceResults,
+    segments::{InitialSegments, Segment},
+};
+
+#[derive(Debug)]
+pub struct RegionStatistics {
+    pub total_bases: usize,
+    pub unexplained_bases: Vec<usize>,
+}
+
+#[derive(Debug, Clone)]
+pub struct QueryStatistics<T: JoinEstimator> {
+    pub occurances: usize,
+    pub coverage: usize,
+    pub target_span: usize,
+    pub estimator: T,
+}
+
+#[derive(Debug)]
+pub struct TraceStatistics<T: JoinEstimator> {
+    #[allow(dead_code)]
+    pub total_bases: usize,
+    pub query_statistics: Vec<QueryStatistics<T>>,
+    pub region_statistics: Vec<RegionStatistics>,
+}
+
+pub enum OccuranceCountingMode {
+    Segments,
+    #[allow(dead_code)]
+    Trace,
+}
+
+pub fn calculate_region_statistics(segments: &InitialSegments) -> RegionStatistics {
+    let mut region_stat = RegionStatistics {
+        total_bases: 0,
+        unexplained_bases: Vec::with_capacity(segments.len()),
+    };
+
+    let mut unexplained_bases_up_to: usize = 0;
+    let mut prior_segment: Option<&Segment> = None;
+
+    for seg in segments.view_segments() {
+        if let Some(prior_segment) = prior_segment {
+            // If a skip block was the prior block, add it's bases as unexplained.
+            if prior_segment.blocks.len() == 1 && prior_segment.blocks[0].row_idx == 0 {
+                unexplained_bases_up_to += seg.end_col - seg.start_col + 1;
+            }
+            unexplained_bases_up_to += seg.start_col - prior_segment.end_col - 1;
+            region_stat.total_bases += seg.start_col - prior_segment.end_col - 1;
+        }
+        region_stat.total_bases += seg.end_col - seg.start_col + 1;
+        region_stat.unexplained_bases.push(unexplained_bases_up_to);
+
+        prior_segment = Some(seg);
+    }
+
+    region_stat
+}
+
+pub fn trace_statistics<S: JoinStatisticsCollector + Debug + Into<E>, E: JoinEstimator>(
+    naive_traces: &[NaiveTraceResults<S>],
+    alignment_data: &AlignmentData,
+    count_mode: OccuranceCountingMode,
+) -> TraceStatistics<E> {
+    // Asumption... All regions are sorted, no gaps. At least 1 region expected...
+    debug_assert!(naive_traces.first().map(|v| v.region_index) == Some(0));
+    debug_assert!(naive_traces
+        .iter()
+        .zip(naive_traces.iter().skip(1))
+        .all(|(v1, v2)| v1.region_index + 1 == v2.region_index));
+
+    assert!(naive_traces
+        .iter()
+        .zip(naive_traces.iter().skip(1))
+        .all(|(v1, v2)| v1.region_index + 1 == v2.region_index && v1.target_end < v2.target_start));
+
+    let mut query_stats: Vec<QueryStatistics<E>> = vec![
+        QueryStatistics {
+            occurances: 0,
+            coverage: 0,
+            target_span: 0,
+            estimator: E::default(),
+        };
+        alignment_data.query_name_map.size()
+    ];
+
+    let mut query_span: Vec<Option<(usize, usize)>> =
+        vec![None; alignment_data.query_name_map.size()];
+
+    let mut all_region_stats: Vec<RegionStatistics> = Vec::with_capacity(naive_traces.len());
+    // We combine stats for all families to use as a prior (psuedo-count, single sample) for all stats...
+    let mut all_family_stats: S = S::new();
+
+    for trace_results in naive_traces.iter() {
+        for (_query_id, stats) in trace_results.query_join_statistics.iter() {
+            all_family_stats = all_family_stats.combine(stats);
+        }
+
+        match count_mode {
+            OccuranceCountingMode::Segments => {
+                for seg in trace_results.segments.view_segments().iter() {
+                    for blk in seg.blocks.iter() {
+                        if let Some(query_id) = blk.query_id {
+                            query_stats[query_id].occurances += 1;
+                            query_stats[query_id].coverage += blk.col_end - blk.col_start + 1;
+                            query_span[query_id] = match query_span[query_id] {
+                                None => Some((
+                                    trace_results.target_start + blk.col_start,
+                                    trace_results.target_start + blk.col_end,
+                                )),
+                                Some((start, end)) => Some((
+                                    start.min(trace_results.target_start + blk.col_start),
+                                    end.max(trace_results.target_start + blk.col_end),
+                                )),
+                            }
+                        }
+                    }
+                }
+            }
+            OccuranceCountingMode::Trace => {
+                for trace_blk in trace_results.trace_segments.iter() {
+                    query_stats[trace_blk.query_id].occurances += 1;
+                    query_stats[trace_blk.query_id].coverage +=
+                        trace_blk.col_end - trace_blk.col_start + 1;
+
+                    query_span[trace_blk.query_id] = match query_span[trace_blk.query_id] {
+                        None => Some((
+                            trace_results.target_start + trace_blk.col_start,
+                            trace_results.target_start + trace_blk.col_end,
+                        )),
+                        Some((start, end)) => Some((
+                            start.min(trace_results.target_start + trace_blk.col_start),
+                            end.max(trace_results.target_start + trace_blk.col_end),
+                        )),
+                    }
+                }
+            }
+        }
+
+        all_region_stats.push(calculate_region_statistics(&trace_results.segments));
+    }
+
+    // Calculate join statistics for all families using combined prior as a starting point...
+    let mut all_join_stats: Vec<S> =
+        vec![S::new_from_prior(&all_family_stats, 1); alignment_data.query_name_map.size()];
+
+    for trace_results in naive_traces.iter() {
+        for (query_id, stats) in trace_results.query_join_statistics.iter() {
+            all_join_stats[*query_id] = all_join_stats[*query_id].combine(stats);
+        }
+    }
+
+    for (query_info, query_span, join_stat) in
+        izip!(query_stats.iter_mut(), query_span.iter(), all_join_stats)
+    {
+        if let Some((start, end)) = query_span {
+            query_info.target_span = end - start + 1;
+        }
+        query_info.estimator = join_stat.into();
+    }
+
+    TraceStatistics {
+        total_bases: all_region_stats.iter().map(|v| v.total_bases).sum(),
+        query_statistics: query_stats,
+        region_statistics: all_region_stats,
+    }
+}
diff --git a/src/util.rs b/src/util.rs
index 62cabe0..1c7fe5c 100644
--- a/src/util.rs
+++ b/src/util.rs
@@ -15,7 +15,7 @@ impl<T: std::cmp::PartialEq> VecMap<T> {
         Self { values }
     }
 
-    pub fn values(&self) -> std::slice::Iter<T> {
+    pub fn values(&self) -> std::slice::Iter<'_, T> {
         self.values.iter()
     }
 
diff --git a/src/viz/block.rs b/src/viz/block.rs
index 6005b35..4065e6d 100644
--- a/src/viz/block.rs
+++ b/src/viz/block.rs
@@ -189,7 +189,7 @@ impl BlockGroup {
                     None
                 } else {
                     match elems[8] {
-                        "C" => Some(bed.strand),
+                        "C" => Some(Strand::Reverse),
                         _ => Some(Strand::from_str(elems[8])),
                     }
                 }
diff --git a/src/viz/mod.rs b/src/viz/mod.rs
index ad03f7f..06e12c3 100644
--- a/src/viz/mod.rs
+++ b/src/viz/mod.rs
@@ -48,7 +48,7 @@ pub fn write_index_file(
         .enumerate()
         .for_each(|(idx, c)| {
             index_links.push_str(&format!(
-                "<div class=\"region\" data-target=\"{name}\" data-start=\"{start}\" data-end=\"{end}\"><a href=\"{name}-{start}-{end}.html\">slice {idx} | {name} {start}:{end}</a></div><br>\n",
+                "<div class=\"region\" data-target=\"{name}\" data-start=\"{start}\" data-end=\"{end}\"><a href=\"{name}-{start}-{end}.html\"><h3>slice {idx} | {name} {start}:{end}</h3></a></div>\n",
                 name = c.target_name,
                 start = c.target_start,
                 end = c.target_end,
@@ -58,7 +58,7 @@ pub fn write_index_file(
 
     proximity_groups.iter().enumerate().for_each(|(idx, g)| {
         index_links.push_str(&format!(
-            "<div class=\"region\" data-target=\"{name}\" data-start=\"{start}\" data-end=\"{end}\"><a href=\"{idx}/index.html\"><h3>region {idx} | {name} {start}:{end}</h3></a>\n",
+            "<div class=\"region\" data-target=\"{name}\" data-start=\"{start}\" data-end=\"{end}\"><a href=\"{idx}/index.html\"><h3>region {idx} | {name} {start}:{end}</h3></a></div>\n",
             name = alignment_data.target_name_map.get(g.target_id),
             start = g.target_start,
             end = g.target_end,
@@ -746,7 +746,7 @@ impl<'a> AdjudicationSodaData<'a> {
         let columns = self
             .active_columns
             .iter()
-            .flat_map(|&(start, end)| (start..=end))
+            .flat_map(|&(start, end)| start..=end)
             .collect_vec();
         vec![columns]
     }
diff --git a/src/viz/stats.rs b/src/viz/stats.rs
index 4b74fb1..82f7201 100644
--- a/src/viz/stats.rs
+++ b/src/viz/stats.rs
@@ -1,5 +1,11 @@
 use crate::{
-    alignment::Strand, annotation::AmbiguousAnnotation, segments::Unordered, viz::SODA_JS,
+    alignment::Strand,
+    annotation::AmbiguousAnnotation,
+    assembly::{
+        block_target_distance, relative_consensus_distance, ConsensusDistanceNormalization,
+    },
+    segments::{Block, Unordered},
+    viz::SODA_JS,
 };
 use core::str;
 use itertools::Itertools;
@@ -12,6 +18,12 @@ struct FamilyInfo {
     pub occurrences: usize,
     pub coverage: usize,
     pub kimura80_values: Unordered<Vec<f64>>,
+    pub join_consensus_dist: Unordered<Vec<f64>>,
+    pub nojoin_consensus_dist: Unordered<Vec<f64>>,
+    pub join_target_dist: Unordered<Vec<f64>>,
+    pub nojoin_target_dist: Unordered<Vec<f64>>,
+    pub join_kimura_dist: Unordered<Vec<f64>>,
+    pub nojoin_kimura_dist: Unordered<Vec<f64>>,
 }
 
 #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
@@ -20,29 +32,17 @@ struct InversionInfo {
     pub normal_joins: usize,
 }
 
-fn write_table<const N: usize, A: std::fmt::Display, B: std::fmt::Display>(
+fn write_tsv<const N: usize, A: std::fmt::Display, B: std::fmt::Display>(
     writer: &mut impl Write,
     header: &[A; N],
     data: &[[B; N]],
 ) -> std::io::Result<()> {
-    writeln!(writer, "<table>\n<thead>")?;
-
-    for heading in header.iter() {
-        writeln!(writer, "<th>{}</th>", heading)?;
-    }
-
-    writeln!(writer, "</thead>\n<tbody>")?;
+    writeln!(writer, "{}", header.iter().join("\t"))?;
 
     for row in data.iter() {
-        write!(writer, "<tr>")?;
-        for entry in row.iter() {
-            write!(writer, "<td>{}</td>", entry)?;
-        }
-        writeln!(writer, "</tr>")?;
+        writeln!(writer, "{}", row.iter().join("\t"))?;
     }
 
-    writeln!(writer, "</tbody>\n</table>")?;
-
     Ok(())
 }
 
@@ -54,13 +54,13 @@ fn write_statistics_table_page<const N: usize, A: std::fmt::Display, B: std::fmt
 ) -> std::io::Result<()> {
     let mut tmp_writer = Vec::<u8>::new();
 
-    write_table(&mut tmp_writer, header, data)?;
+    write_tsv(&mut tmp_writer, header, data)?;
 
     let table_page = TABLE_HTML
         .replace("PAGE_TITLE", title)
         .replace("SODA_TARGET", SODA_JS)
         .replace(
-            "TABLE_TARGET",
+            "TSV_TARGET",
             str::from_utf8(&tmp_writer).expect("UTF8 decoding failed!"),
         );
 
@@ -72,14 +72,39 @@ fn write_statistics_table_page<const N: usize, A: std::fmt::Display, B: std::fmt
 pub fn write_family_statistics(
     stats_writer: &mut impl Write,
     region_annotations: &[(usize, Vec<AmbiguousAnnotation>)],
+    query_lengths: &HashMap<usize, usize>,
 ) -> std::io::Result<()> {
     let mut family_stats = HashMap::<&String, FamilyInfo>::new();
 
     for (_region, annotations) in region_annotations.iter() {
+        let mut prior_elem = HashMap::<usize, (&AmbiguousAnnotation, usize)>::new();
+
         for amb_annot in annotations.iter() {
-            for query in amb_annot.annotations.iter() {
+            for (query_idx, query) in amb_annot.annotations.iter().enumerate() {
                 let name = &query.query_name;
                 let target_range = query.target_end - query.target_start;
+                let (join_cons, nojoin_cons, join_target, nojoin_target, join_div, nojoin_div) =
+                    if let Some(&(prior_annot, prior_query_idx)) = prior_elem.get(&query.query_id) {
+                        let block_c = Block::from_annotation(amb_annot, query_idx, 0);
+                        let block_p = Block::from_annotation(prior_annot, prior_query_idx, 0);
+                        let q_len = *query_lengths.get(&query.query_id).unwrap_or(&1);
+
+                        let (c_dist, _join_type) = relative_consensus_distance(
+                            &block_p,
+                            &block_c,
+                            ConsensusDistanceNormalization::WithLength(q_len),
+                        );
+                        let t_dist = block_target_distance(&block_p, &block_c) as f64;
+                        let d_dist = (block_c.kimura80 - block_p.kimura80).abs();
+
+                        if prior_annot.join_id == amb_annot.join_id {
+                            (Some(c_dist), None, Some(t_dist), None, Some(d_dist), None)
+                        } else {
+                            (None, Some(c_dist), None, Some(t_dist), None, Some(d_dist))
+                        }
+                    } else {
+                        (None, None, None, None, None, None)
+                    };
 
                 family_stats
                     .entry(name)
@@ -87,12 +112,28 @@ pub fn write_family_statistics(
                         f.occurrences += 1;
                         f.coverage += target_range;
                         f.kimura80_values.0.push(query.kimura80);
+                        f.join_consensus_dist.0.extend(join_cons.iter());
+                        f.nojoin_consensus_dist.0.extend(nojoin_cons.iter());
+                        f.join_target_dist.0.extend(join_target.iter());
+                        f.nojoin_target_dist.0.extend(nojoin_target.iter());
+                        f.join_kimura_dist.0.extend(join_div.iter());
+                        f.nojoin_kimura_dist.0.extend(nojoin_div.iter());
                     })
                     .or_insert(FamilyInfo {
                         occurrences: 1,
                         coverage: target_range,
                         kimura80_values: Unordered(vec![query.kimura80]),
+                        join_consensus_dist: Unordered(join_cons.iter().copied().collect()),
+                        nojoin_consensus_dist: Unordered(nojoin_cons.iter().copied().collect()),
+                        join_target_dist: Unordered(join_target.iter().copied().collect()),
+                        nojoin_target_dist: Unordered(nojoin_target.iter().copied().collect()),
+                        join_kimura_dist: Unordered(join_div.iter().copied().collect()),
+                        nojoin_kimura_dist: Unordered(nojoin_div.iter().copied().collect()),
                     });
+
+                prior_elem
+                    .entry(query.query_id)
+                    .insert_entry((amb_annot, query_idx));
             }
         }
     }
@@ -101,11 +142,16 @@ pub fn write_family_statistics(
         stats_writer,
         "Family Statistics",
         &[
-            "Family",
-            "Occurrences",
-            "Coverage",
-            "Kimura80 Boxplot",
-            "Kimura80 KDE",
+            "Family_string",
+            "Occurrences_int",
+            "Coverage_int",
+            "Kimura80_KDE_violin",
+            "Joined_Consensus_Distance_violin",
+            "Unjoined_Consensus_Distance_violin",
+            "Joined_Target_Distance_violin",
+            "Unjoined_Target_Distance_violin",
+            "Joined_Kimura_Difference_violin",
+            "Unjoined_Kimura_Difference_violin",
         ],
         &family_stats
             .iter()
@@ -115,14 +161,13 @@ pub fn write_family_statistics(
                     k.to_string(),
                     v.occurrences.to_string(),
                     v.coverage.to_string(),
-                    format!(
-                        "<figure class=\"boxplot\" data-samples=\"{}\"></figure>",
-                        v.kimura80_values.0.iter().join(",")
-                    ),
-                    format!(
-                        "<figure class=\"violin\" data-samples=\"{}\"></figure>",
-                        v.kimura80_values.0.iter().join(",")
-                    ),
+                    v.kimura80_values.0.iter().join(":"),
+                    v.join_consensus_dist.0.iter().join(":"),
+                    v.nojoin_consensus_dist.0.iter().join(":"),
+                    v.join_target_dist.0.iter().join(":"),
+                    v.nojoin_target_dist.0.iter().join(":"),
+                    v.join_kimura_dist.0.iter().join(":"),
+                    v.nojoin_kimura_dist.0.iter().join(":"),
                 ]
             })
             .collect_vec(),
@@ -165,13 +210,13 @@ pub fn write_inversion_statistics(
     write_statistics_table_page(
         stats_writer,
         "Inversion Statistics",
-        &["Region", "Inversions", "Normal Joins"],
+        &["Region_region", "Inversions_int", "Normal_Joins_int"],
         &inversion_stats
             .iter()
             .sorted_by(|v1, v2| v2.1.cmp(v1.1))
             .map(|(k, v)| {
                 [
-                    format!("<a href=\"{}/index.html\">{}</a>", k, k),
+                    k.to_string(),
                     v.inversions.to_string(),
                     v.normal_joins.to_string(),
                 ]
diff --git a/src/windowed_scores.rs b/src/windowed_scores.rs
index 6044e30..39456b4 100644
--- a/src/windowed_scores.rs
+++ b/src/windowed_scores.rs
@@ -45,6 +45,7 @@ impl BackgroundFrequencies for Background<'_> {
     }
 }
 
+#[allow(dead_code)]
 pub struct DummyBackground {}
 
 impl BackgroundFrequencies for DummyBackground {