diff --git a/core/src/main/java/org/apache/accumulo/core/conf/Property.java b/core/src/main/java/org/apache/accumulo/core/conf/Property.java index f5f8cde9f38..b148c1a60e5 100644 --- a/core/src/main/java/org/apache/accumulo/core/conf/Property.java +++ b/core/src/main/java/org/apache/accumulo/core/conf/Property.java @@ -951,6 +951,15 @@ Each key is the name of the pool (can be assigned any string). Each value is a J The resources that are used by default can be seen in \ `accumulo/server/monitor/src/main/resources/templates/default.ftl`. """, "2.0.0"), + MONITOR_PURGE_STATE_THRESHOLD("monitor.purge.state.threshold", "10m", PropertyType.TIMEDURATION, + """ + The Monitor contains an internal data structure that contains all of the data used in the UI. \ + This property controls how long the Monitor should retain that data structure after the last \ + connection has disconnected from the Monitor. Keeping the data structure will allow for an \ + immediate display of the last known state. Removing the data structure will cause the UI \ + to wait for new data to be fetched to compute the known state. + """, + "4.0.0"), MONITOR_FETCH_TIMEOUT("monitor.fetch.timeout", "5m", PropertyType.TIMEDURATION, """ The Monitor fetches information for display in a set of background threads. This property \ controls the amount of time that process should wait before cancelling any remaining \ diff --git a/server/monitor/src/main/java/org/apache/accumulo/monitor/next/InformationFetcher.java b/server/monitor/src/main/java/org/apache/accumulo/monitor/next/InformationFetcher.java index a6f3dece4fe..91cd5982411 100644 --- a/server/monitor/src/main/java/org/apache/accumulo/monitor/next/InformationFetcher.java +++ b/server/monitor/src/main/java/org/apache/accumulo/monitor/next/InformationFetcher.java @@ -78,6 +78,7 @@ import org.apache.accumulo.core.tabletscan.thrift.ActiveScan; import org.apache.accumulo.core.tabletscan.thrift.TabletScanClientService; import org.apache.accumulo.core.trace.TraceUtil; +import org.apache.accumulo.core.util.Timer; import org.apache.accumulo.core.util.UtilWaitThread; import org.apache.accumulo.core.util.compaction.ExternalCompactionUtil; import org.apache.accumulo.core.util.threads.ThreadPools; @@ -660,10 +661,11 @@ public void newConnectionEvent() { // Protect against NPE and wait for initial data gathering private SystemInformation getSummary() throws InterruptedException { - while (summaryRef.get() == null) { + SystemInformation summary; + while ((summary = summaryRef.get()) == null) { Thread.sleep(100); } - return summaryRef.get(); + return summary; } /** @@ -690,13 +692,12 @@ public void onRemoval(@Nullable ServerId server, @Nullable MetricResponse respon if (server == null) { return; } - try { - getSummary().processError(server); - LOG.info("{} has been evicted", server); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - LOG.warn("{} could not be evicted", server, e); + final SystemInformation currentSummary = summaryRef.get(); + if (currentSummary == null) { + return; } + currentSummary.processError(server); + LOG.info("{} has been evicted", server); } /** @@ -792,26 +793,54 @@ private void fetchTabletInformation(SystemInformation summary, UpdateTasks futur public void run() { long lastRunTime = 0; + final Timer noConnectionTimer = Timer.startNew(); + final long clearStateThreshold = + ctx.getConfiguration().getTimeInMillis(Property.MONITOR_FETCH_TIMEOUT); + final Duration clearStateDuration = Duration.ofMillis(clearStateThreshold); + final long minimumRefreshTimeMs = 5000; while (true) { + // Only refresh internal data structure every 5s (old monitor logic). + while (NanoTime.millisElapsed(lastRunTime, NanoTime.now()) < minimumRefreshTimeMs) { + LOG.trace("Waiting for the 5s refresh interval"); + try { + Thread.sleep(250); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new IllegalStateException( + "Thread " + Thread.currentThread().getName() + " interrupted", e); + } + } + // Don't fetch new data if there are no connections. - // On an initial connection, no data may be displayed. - // If a connection has not been made in a while, stale data may be displayed. - // Only refresh every 5s (old monitor logic). - while (!newConnectionEvent.get() && connectionCount.get() == 0 - && NanoTime.millisElapsed(lastRunTime, NanoTime.now()) > 5000) { + // When summaryRef is not set, then the REST endpoint will wait + // until data is retrieved. summaryRef is not set on initial + // connection or when there has been no connection for the configured duration. + noConnectionTimer.restart(); + while (!newConnectionEvent.get() && connectionCount.get() == 0) { + LOG.trace("Waiting for a connection, connections: {}", connectionCount.get()); try { - Thread.sleep(100); + Thread.sleep(250); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new IllegalStateException( "Thread " + Thread.currentThread().getName() + " interrupted", e); } + // If a connection has not been made in the configured duration, + // then clear the summaryRef so that stale data is not displayed. + if (this.summaryRef.get() != null && noConnectionTimer.hasElapsed(clearStateDuration)) { + LOG.debug("Clearing internal summary state due to no connection for {} ms", + clearStateThreshold); + SystemInformation oldSummary = summaryRef.getAndSet(null); + if (oldSummary != null) { + oldSummary.clear(); + } + } } // reset the connection event flag newConnectionEvent.compareAndExchange(true, false); - LOG.info("Fetching information from servers"); + LOG.info("Fetching information from servers, connection count: {}", connectionCount.get()); long fetchCycleStart = System.currentTimeMillis(); final UpdateTasks futures = new UpdateTasks();