From 211f1334d078de97563978df19c63dfa3fe8e605 Mon Sep 17 00:00:00 2001 From: Dave Marion Date: Wed, 17 Jun 2026 19:32:17 +0000 Subject: [PATCH 1/4] Modified the Monitor InformationFetcher class to purge stale data Closes #6390 --- .../monitor/next/InformationFetcher.java | 53 ++++++++++++++----- 1 file changed, 40 insertions(+), 13 deletions(-) diff --git a/server/monitor/src/main/java/org/apache/accumulo/monitor/next/InformationFetcher.java b/server/monitor/src/main/java/org/apache/accumulo/monitor/next/InformationFetcher.java index a6f3dece4fe..a90f94ce568 100644 --- a/server/monitor/src/main/java/org/apache/accumulo/monitor/next/InformationFetcher.java +++ b/server/monitor/src/main/java/org/apache/accumulo/monitor/next/InformationFetcher.java @@ -78,6 +78,7 @@ import org.apache.accumulo.core.tabletscan.thrift.ActiveScan; import org.apache.accumulo.core.tabletscan.thrift.TabletScanClientService; import org.apache.accumulo.core.trace.TraceUtil; +import org.apache.accumulo.core.util.Timer; import org.apache.accumulo.core.util.UtilWaitThread; import org.apache.accumulo.core.util.compaction.ExternalCompactionUtil; import org.apache.accumulo.core.util.threads.ThreadPools; @@ -690,13 +691,12 @@ public void onRemoval(@Nullable ServerId server, @Nullable MetricResponse respon if (server == null) { return; } - try { - getSummary().processError(server); - LOG.info("{} has been evicted", server); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - LOG.warn("{} could not be evicted", server, e); + final SystemInformation currentSummary = summaryRef.get(); + if (currentSummary == null) { + return; } + currentSummary.processError(server); + LOG.info("{} has been evicted", server); } /** @@ -792,26 +792,53 @@ private void fetchTabletInformation(SystemInformation summary, UpdateTasks futur public void run() { long lastRunTime = 0; + final Timer noConnectionTimer = Timer.startNew(); + final int clearStateMins = 10; + final Duration clearStateDuration = Duration.ofMinutes(clearStateMins); + final long minimumRefreshTimeMs = 5000; while (true) { + // Only refresh internal data structure every 5s (old monitor logic). + while (NanoTime.millisElapsed(lastRunTime, NanoTime.now()) < minimumRefreshTimeMs) { + LOG.trace("Waiting for the 5s refresh interval"); + try { + Thread.sleep(250); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new IllegalStateException( + "Thread " + Thread.currentThread().getName() + " interrupted", e); + } + } + // Don't fetch new data if there are no connections. - // On an initial connection, no data may be displayed. - // If a connection has not been made in a while, stale data may be displayed. - // Only refresh every 5s (old monitor logic). - while (!newConnectionEvent.get() && connectionCount.get() == 0 - && NanoTime.millisElapsed(lastRunTime, NanoTime.now()) > 5000) { + // When summaryRef is not set, then the REST endpoint will wait + // until data is retrieved. summaryRef is not set on initial + // connection or when there has been no connection for 5 minutes + noConnectionTimer.restart(); + while (!newConnectionEvent.get() && connectionCount.get() == 0) { + LOG.trace("Waiting for a connection, connections: {}", connectionCount.get()); try { - Thread.sleep(100); + Thread.sleep(250); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new IllegalStateException( "Thread " + Thread.currentThread().getName() + " interrupted", e); } + // If a connection has not been made in 5 minutes, + // then clear the summaryRef so that stale data is not displayed. + if (this.summaryRef.get() != null && noConnectionTimer.hasElapsed(clearStateDuration)) { + LOG.debug("Clearing internal summary state due to no connection for {} minutes", + clearStateMins); + SystemInformation oldSummary = summaryRef.getAndSet(null); + if (oldSummary != null) { + oldSummary.clear(); + } + } } // reset the connection event flag newConnectionEvent.compareAndExchange(true, false); - LOG.info("Fetching information from servers"); + LOG.info("Fetching information from servers, connection count: {}", connectionCount.get()); long fetchCycleStart = System.currentTimeMillis(); final UpdateTasks futures = new UpdateTasks(); From 692da2312060a5f929341975a5a402e7f37dd212 Mon Sep 17 00:00:00 2001 From: Dave Marion Date: Thu, 18 Jun 2026 12:36:18 -0400 Subject: [PATCH 2/4] Apply suggestion from @DomGarguilo Co-authored-by: Dom G. --- .../apache/accumulo/monitor/next/InformationFetcher.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/server/monitor/src/main/java/org/apache/accumulo/monitor/next/InformationFetcher.java b/server/monitor/src/main/java/org/apache/accumulo/monitor/next/InformationFetcher.java index a90f94ce568..9199bb8576c 100644 --- a/server/monitor/src/main/java/org/apache/accumulo/monitor/next/InformationFetcher.java +++ b/server/monitor/src/main/java/org/apache/accumulo/monitor/next/InformationFetcher.java @@ -664,7 +664,11 @@ private SystemInformation getSummary() throws InterruptedException { while (summaryRef.get() == null) { Thread.sleep(100); } - return summaryRef.get(); + SystemInformation summary; + while ((summary = summaryRef.get()) == null) { + Thread.sleep(100); + } + return summary; } /** From 3b819b5f8785bb5bcff2ec2757f3216fe8590495 Mon Sep 17 00:00:00 2001 From: Dave Marion Date: Thu, 18 Jun 2026 16:50:56 +0000 Subject: [PATCH 3/4] Implement PR suggestions --- .../apache/accumulo/monitor/next/InformationFetcher.java | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/server/monitor/src/main/java/org/apache/accumulo/monitor/next/InformationFetcher.java b/server/monitor/src/main/java/org/apache/accumulo/monitor/next/InformationFetcher.java index 9199bb8576c..7dd921cb624 100644 --- a/server/monitor/src/main/java/org/apache/accumulo/monitor/next/InformationFetcher.java +++ b/server/monitor/src/main/java/org/apache/accumulo/monitor/next/InformationFetcher.java @@ -661,9 +661,6 @@ public void newConnectionEvent() { // Protect against NPE and wait for initial data gathering private SystemInformation getSummary() throws InterruptedException { - while (summaryRef.get() == null) { - Thread.sleep(100); - } SystemInformation summary; while ((summary = summaryRef.get()) == null) { Thread.sleep(100); @@ -817,7 +814,7 @@ public void run() { // Don't fetch new data if there are no connections. // When summaryRef is not set, then the REST endpoint will wait // until data is retrieved. summaryRef is not set on initial - // connection or when there has been no connection for 5 minutes + // connection or when there has been no connection for the configured duration. noConnectionTimer.restart(); while (!newConnectionEvent.get() && connectionCount.get() == 0) { LOG.trace("Waiting for a connection, connections: {}", connectionCount.get()); @@ -828,7 +825,7 @@ public void run() { throw new IllegalStateException( "Thread " + Thread.currentThread().getName() + " interrupted", e); } - // If a connection has not been made in 5 minutes, + // If a connection has not been made in the configured duration, // then clear the summaryRef so that stale data is not displayed. if (this.summaryRef.get() != null && noConnectionTimer.hasElapsed(clearStateDuration)) { LOG.debug("Clearing internal summary state due to no connection for {} minutes", From 9b5cf1a2a1c72c9638228499567c78c536889207 Mon Sep 17 00:00:00 2001 From: Dave Marion Date: Thu, 18 Jun 2026 17:41:27 +0000 Subject: [PATCH 4/4] Added property for internal state retention duration --- .../java/org/apache/accumulo/core/conf/Property.java | 9 +++++++++ .../apache/accumulo/monitor/next/InformationFetcher.java | 9 +++++---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/core/src/main/java/org/apache/accumulo/core/conf/Property.java b/core/src/main/java/org/apache/accumulo/core/conf/Property.java index f5f8cde9f38..b148c1a60e5 100644 --- a/core/src/main/java/org/apache/accumulo/core/conf/Property.java +++ b/core/src/main/java/org/apache/accumulo/core/conf/Property.java @@ -951,6 +951,15 @@ Each key is the name of the pool (can be assigned any string). Each value is a J The resources that are used by default can be seen in \ `accumulo/server/monitor/src/main/resources/templates/default.ftl`. """, "2.0.0"), + MONITOR_PURGE_STATE_THRESHOLD("monitor.purge.state.threshold", "10m", PropertyType.TIMEDURATION, + """ + The Monitor contains an internal data structure that contains all of the data used in the UI. \ + This property controls how long the Monitor should retain that data structure after the last \ + connection has disconnected from the Monitor. Keeping the data structure will allow for an \ + immediate display of the last known state. Removing the data structure will cause the UI \ + to wait for new data to be fetched to compute the known state. + """, + "4.0.0"), MONITOR_FETCH_TIMEOUT("monitor.fetch.timeout", "5m", PropertyType.TIMEDURATION, """ The Monitor fetches information for display in a set of background threads. This property \ controls the amount of time that process should wait before cancelling any remaining \ diff --git a/server/monitor/src/main/java/org/apache/accumulo/monitor/next/InformationFetcher.java b/server/monitor/src/main/java/org/apache/accumulo/monitor/next/InformationFetcher.java index 7dd921cb624..91cd5982411 100644 --- a/server/monitor/src/main/java/org/apache/accumulo/monitor/next/InformationFetcher.java +++ b/server/monitor/src/main/java/org/apache/accumulo/monitor/next/InformationFetcher.java @@ -794,8 +794,9 @@ public void run() { long lastRunTime = 0; final Timer noConnectionTimer = Timer.startNew(); - final int clearStateMins = 10; - final Duration clearStateDuration = Duration.ofMinutes(clearStateMins); + final long clearStateThreshold = + ctx.getConfiguration().getTimeInMillis(Property.MONITOR_FETCH_TIMEOUT); + final Duration clearStateDuration = Duration.ofMillis(clearStateThreshold); final long minimumRefreshTimeMs = 5000; while (true) { @@ -828,8 +829,8 @@ public void run() { // If a connection has not been made in the configured duration, // then clear the summaryRef so that stale data is not displayed. if (this.summaryRef.get() != null && noConnectionTimer.hasElapsed(clearStateDuration)) { - LOG.debug("Clearing internal summary state due to no connection for {} minutes", - clearStateMins); + LOG.debug("Clearing internal summary state due to no connection for {} ms", + clearStateThreshold); SystemInformation oldSummary = summaryRef.getAndSet(null); if (oldSummary != null) { oldSummary.clear();