diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/http/BaseHttpServer.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/http/BaseHttpServer.java index 9b2a0a61fdac..d56b96e2f515 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/http/BaseHttpServer.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/http/BaseHttpServer.java @@ -329,6 +329,7 @@ public void start() throws IOException { .register("prometheus", "Hadoop metrics prometheus exporter", prometheusMetricsSink); } + BuildInfoMetrics.create(name); updateConnectorAddress(); } @@ -341,6 +342,7 @@ private boolean isEnabled() { public void stop() throws Exception { if (httpServer != null) { httpServer.stop(); + BuildInfoMetrics.unregister(); } } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/http/BuildInfoMetrics.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/http/BuildInfoMetrics.java new file mode 100644 index 000000000000..2eda069b4c20 --- /dev/null +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/http/BuildInfoMetrics.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.server.http; + +import org.apache.hadoop.hdds.utils.HddsVersionInfo; +import org.apache.hadoop.hdds.utils.VersionInfo; +import org.apache.hadoop.metrics2.MetricsCollector; +import org.apache.hadoop.metrics2.MetricsRecordBuilder; +import org.apache.hadoop.metrics2.MetricsSource; +import org.apache.hadoop.metrics2.MetricsSystem; +import org.apache.hadoop.metrics2.MetricsTag; +import org.apache.hadoop.metrics2.annotation.Metrics; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; +import org.apache.hadoop.metrics2.lib.Interns; +import org.apache.hadoop.ozone.OzoneConsts; + +/** + * Exposes build version and git revision as a Prometheus info metric. + * + *
Follows the OpenMetrics Info pattern: the value is always 1, and the + * identifying strings are carried as labels. The metric name uses the + * conventional {@code _build_info} suffix.
+ * + *
+ * ozone_build_info{component="OM",version="2.0.0",revision="abc1234"} 1
+ *
+ */
+@Metrics(about = "Ozone build version info", context = OzoneConsts.OZONE)
+public final class BuildInfoMetrics implements MetricsSource {
+
+ public static final String METRICS_SOURCE_NAME = "OzoneBuildInfo";
+ /**
+ * Record name chosen so that prometheusName("Ozone", "BuildInfo") produces
+ * the conventional "ozone_build_info" metric name.
+ */
+ public static final String RECORD_NAME = "Ozone";
+
+ private final String component;
+ private final String version;
+ private final String revision;
+
+ private BuildInfoMetrics(String component, VersionInfo versionInfo) {
+ this.component = component;
+ this.version = versionInfo.getVersion();
+ this.revision = versionInfo.getRevision();
+ }
+
+ /**
+ * Return the existing build-info source if one is already registered,
+ * otherwise create a new one, register it and return it. Build info is
+ * process-wide, so a single source is shared by every {@code BaseHttpServer}
+ * in the JVM (e.g. the S3 Gateway runs two). Making this idempotent keeps a
+ * second caller from failing with a duplicate-source error.
+ *
+ * @return a new or existing {@link BuildInfoMetrics}
+ */
+ public static synchronized BuildInfoMetrics create(String component) {
+ MetricsSystem ms = DefaultMetricsSystem.instance();
+ MetricsSource existing = ms.getSource(METRICS_SOURCE_NAME);
+ if (existing != null) {
+ return (BuildInfoMetrics) existing;
+ }
+ BuildInfoMetrics source =
+ new BuildInfoMetrics(component, HddsVersionInfo.HDDS_VERSION_INFO);
+ return ms.register(METRICS_SOURCE_NAME, "Ozone build version info", source);
+ }
+
+ /**
+ * Unregister the build-info source. Idempotent: a no-op if it was never
+ * registered or was already removed. Called when the owning server stops so
+ * that a later {@link #create(String)} does not fail with a duplicate source.
+ */
+ public static synchronized void unregister() {
+ DefaultMetricsSystem.instance().unregisterSource(METRICS_SOURCE_NAME);
+ }
+
+ @Override
+ public void getMetrics(MetricsCollector collector, boolean all) {
+ MetricsRecordBuilder builder = collector.addRecord(RECORD_NAME)
+ .add(new MetricsTag(Interns.info("component", "Ozone component name"), component))
+ .add(new MetricsTag(Interns.info("revision", "Source control revision"), revision))
+ .add(new MetricsTag(Interns.info("version", "Ozone build version"), version))
+ .addGauge(Interns.info("BuildInfo", "Always 1; identifying info is in labels"), 1L);
+ builder.endRecord();
+ }
+}
diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/server/http/TestBuildInfoMetrics.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/server/http/TestBuildInfoMetrics.java
new file mode 100644
index 000000000000..d669da1dba53
--- /dev/null
+++ b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/server/http/TestBuildInfoMetrics.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdds.server.http;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.util.concurrent.TimeoutException;
+import org.apache.hadoop.metrics2.MetricsSystem;
+import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
+import org.apache.ozone.test.GenericTestUtils;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Tests for {@link BuildInfoMetrics}.
+ */
+public class TestBuildInfoMetrics {
+
+ private MetricsSystem metricsSystem;
+ private PrometheusMetricsSink sink;
+
+ @BeforeEach
+ public void setUp() {
+ metricsSystem = DefaultMetricsSystem.instance();
+ metricsSystem.init("test");
+ sink = new PrometheusMetricsSink("testserver");
+ metricsSystem.register("Prometheus", "Prometheus", sink);
+ }
+
+ @AfterEach
+ public void tearDown() {
+ metricsSystem.unregisterSource(BuildInfoMetrics.METRICS_SOURCE_NAME);
+ metricsSystem.stop();
+ metricsSystem.shutdown();
+ }
+
+ @Test
+ public void testBuildInfoMetricPublished()
+ throws IOException, InterruptedException, TimeoutException {
+ BuildInfoMetrics.create("OM");
+
+ String output = waitForMetricsToPublish("ozone_build_info");
+
+ assertThat(output).contains("# TYPE ozone_build_info gauge");
+ assertThat(output).contains("ozone_build_info{");
+ assertThat(output).contains("component=\"OM\"");
+ assertThat(output).contains("version=");
+ assertThat(output).contains("revision=");
+ // Info metrics always have value 1
+ assertThat(output).containsPattern("ozone_build_info\\{.*\\} 1");
+ }
+
+ @Test
+ public void testBuildInfoMetricOnlyOneTypeComment()
+ throws IOException, InterruptedException, TimeoutException {
+ BuildInfoMetrics.create("SCM");
+
+ String output = waitForMetricsToPublish("ozone_build_info");
+
+ assertEquals(1, countOccurrences(output, "# TYPE ozone_build_info gauge"),
+ "Expected exactly one TYPE comment for ozone_build_info");
+ }
+
+ private String publishAndGet() throws IOException {
+ metricsSystem.publishMetricsNow();
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ OutputStreamWriter writer = new OutputStreamWriter(bos, UTF_8);
+ sink.writeMetrics(writer);
+ writer.flush();
+ return bos.toString(UTF_8.name());
+ }
+
+ private String waitForMetricsToPublish(String metric)
+ throws InterruptedException, TimeoutException {
+ String[] result = new String[1];
+ GenericTestUtils.waitFor(() -> {
+ try {
+ result[0] = publishAndGet();
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ return result[0].contains(metric);
+ }, 500, 30000);
+ return result[0];
+ }
+
+ private static int countOccurrences(String text, String substring) {
+ int count = 0;
+ int idx = 0;
+ while ((idx = text.indexOf(substring, idx)) != -1) {
+ count++;
+ idx += substring.length();
+ }
+ return count;
+ }
+}
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMMetrics.java
index d5dab7800c41..3387db5a5ecb 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMMetrics.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMMetrics.java
@@ -180,7 +180,6 @@ public void addRatisEvent(String event) {
}
}
- @Metric("Ratis state machine events")
public String getRatisEvents() {
synchronized (ratisEvents) {
return String.join("\n", ratisEvents);
diff --git a/hadoop-ozone/dist/src/main/compose/common/grafana/dashboards/Ozone - Rolling Upgrade.json b/hadoop-ozone/dist/src/main/compose/common/grafana/dashboards/Ozone - Rolling Upgrade.json
new file mode 100644
index 000000000000..00704acbb9ff
--- /dev/null
+++ b/hadoop-ozone/dist/src/main/compose/common/grafana/dashboards/Ozone - Rolling Upgrade.json
@@ -0,0 +1,1124 @@
+{
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": {
+ "type": "grafana",
+ "uid": "-- Grafana --"
+ },
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "fiscalYearStartMonth": 0,
+ "graphTooltip": 0,
+ "id": 13,
+ "links": [],
+ "panels": [
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "id": 17,
+ "panels": [],
+ "title": "Full Cluster Summary",
+ "type": "row"
+ },
+ {
+ "datasource": {
+ "type": "prometheus"
+ },
+ "description": "Number of distinct build versions present across all instances in the cluster. This will be 2 when an upgrade is in progress.",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "decimals": 0,
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": 0
+ },
+ {
+ "color": "yellow",
+ "value": 2
+ },
+ {
+ "color": "red",
+ "value": 3
+ }
+ ]
+ },
+ "unit": "suffix:Active Versions"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 3,
+ "w": 8,
+ "x": 0,
+ "y": 1
+ },
+ "id": 15,
+ "options": {
+ "colorMode": "background",
+ "graphMode": "none",
+ "justifyMode": "center",
+ "orientation": "horizontal",
+ "percentChangeColorMode": "standard",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "value",
+ "wideLayout": true
+ },
+ "pluginVersion": "12.3.1",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "count(count by (version) (ozone_build_info))",
+ "instant": true,
+ "legendFormat": "",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus"
+ },
+ "description": "Cluster upgrade status. The upgrade is complete when all instances are running the same version and do not require finalization.",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [
+ {
+ "options": {
+ "0": {
+ "color": "orange",
+ "index": 0,
+ "text": "Upgrading"
+ },
+ "1": {
+ "color": "orange",
+ "index": 1,
+ "text": "Finalizing"
+ },
+ "2": {
+ "color": "yellow",
+ "index": 2,
+ "text": "Unfinalized"
+ },
+ "3": {
+ "color": "green",
+ "index": 3,
+ "text": "Complete"
+ }
+ },
+ "type": "value"
+ }
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "orange",
+ "value": 0
+ },
+ {
+ "color": "orange",
+ "value": 1
+ },
+ {
+ "color": "yellow",
+ "value": 2
+ },
+ {
+ "color": "green",
+ "value": 3
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 3,
+ "w": 8,
+ "x": 8,
+ "y": 1
+ },
+ "id": 14,
+ "options": {
+ "colorMode": "background",
+ "graphMode": "none",
+ "justifyMode": "center",
+ "orientation": "horizontal",
+ "percentChangeColorMode": "standard",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "value",
+ "wideLayout": true
+ },
+ "pluginVersion": "12.3.1",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "(count(count by (version) (ozone_build_info)) == bool 1) * (1 + (1 - max(om_metrics_finalization_in_progress)) * (1 + (sum((component_version_manager_metrics_apparent_version != bool component_version_manager_metrics_software_version) * on(instance) group_left() (ozone_build_info{component!~\"recon\"})) == bool 0)))",
+ "instant": true,
+ "legendFormat": "",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus"
+ },
+ "description": "Number of instances currently hidden by the component and instance selectors at the top of the dashboard. 0 means all nodes are visible.",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "decimals": 0,
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": 0
+ },
+ {
+ "color": "red",
+ "value": 1
+ }
+ ]
+ },
+ "unit": "suffix:Instances Hidden"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 3,
+ "w": 8,
+ "x": 16,
+ "y": 1
+ },
+ "id": 16,
+ "options": {
+ "colorMode": "background",
+ "graphMode": "none",
+ "justifyMode": "center",
+ "orientation": "horizontal",
+ "percentChangeColorMode": "standard",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "value",
+ "wideLayout": true
+ },
+ "pluginVersion": "12.3.1",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "count(ozone_build_info) - count(ozone_build_info{component=~\"$component\",instance=~\"$instance\"})",
+ "instant": true,
+ "legendFormat": "",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "",
+ "type": "stat"
+ },
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 4
+ },
+ "id": 4,
+ "panels": [],
+ "title": "Versions",
+ "type": "row"
+ },
+ {
+ "datasource": {
+ "type": "prometheus"
+ },
+ "description": "Total number of selected instances running each version. The chart fills with a single slice when the upgrade completes.",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "fixedColor": "#354c6f",
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ }
+ },
+ "decimals": 0,
+ "displayName": "${__field.labels.version}",
+ "mappings": [],
+ "unit": "short"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 9,
+ "w": 9,
+ "x": 0,
+ "y": 5
+ },
+ "id": 2,
+ "options": {
+ "displayLabels": [
+ "name"
+ ],
+ "legend": {
+ "calcs": [],
+ "displayMode": "table",
+ "placement": "right",
+ "showLegend": true,
+ "values": [
+ "value",
+ "percent"
+ ]
+ },
+ "pieType": "pie",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "sort": "desc",
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "12.3.1",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "count by (version) (ozone_build_info{component=~\"$component\",instance=~\"$instance\"})",
+ "instant": true,
+ "legendFormat": "{{version}}",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "Instances Per Version",
+ "type": "piechart"
+ },
+ {
+ "datasource": {
+ "type": "prometheus"
+ },
+ "description": "Build version of each selected instance. Use this to see which instances still need to be upgraded.",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "custom": {
+ "align": "left",
+ "cellOptions": {
+ "type": "auto"
+ },
+ "footer": {
+ "reducers": []
+ },
+ "inspect": false
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": 0
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 9,
+ "w": 14,
+ "x": 9,
+ "y": 5
+ },
+ "id": 5,
+ "options": {
+ "cellHeight": "sm",
+ "enablePagination": true,
+ "footer": {
+ "countRows": false,
+ "enablePagination": true,
+ "fields": "",
+ "reducer": [
+ "sum"
+ ],
+ "show": false
+ },
+ "showHeader": true,
+ "sortBy": [
+ {
+ "desc": false,
+ "displayName": "Version"
+ }
+ ]
+ },
+ "pluginVersion": "12.3.1",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "ozone_build_info{instance=~\"$instance\"}",
+ "format": "table",
+ "instant": true,
+ "refId": "A"
+ }
+ ],
+ "title": "Versions",
+ "transformations": [
+ {
+ "id": "organize",
+ "options": {
+ "excludeByName": {
+ "Time": true,
+ "Value": true,
+ "__name__": true,
+ "context": true,
+ "exported_component": true,
+ "hostname": true,
+ "job": true,
+ "revision": true
+ },
+ "indexByName": {
+ "component": 0,
+ "instance": 1,
+ "version": 2
+ },
+ "renameByName": {
+ "component": "Component",
+ "instance": "Instance",
+ "version": "Version"
+ }
+ }
+ }
+ ],
+ "type": "table"
+ },
+ {
+ "datasource": {
+ "type": "prometheus"
+ },
+ "description": "Total number of selected instances running each build version over time.",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "Instances",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "stepAfter",
+ "lineWidth": 2,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "showValues": false,
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "decimals": 0,
+ "mappings": [],
+ "min": 0,
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": 0
+ }
+ ]
+ },
+ "unit": "short"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 9,
+ "w": 23,
+ "x": 0,
+ "y": 14
+ },
+ "id": 6,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "multi",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "12.3.1",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "count by (version) (ozone_build_info{instance=~\"$instance\"})",
+ "legendFormat": "{{version}}",
+ "range": true,
+ "refId": "A"
+ }
+ ],
+ "title": "Instances Per Version Over Time",
+ "type": "timeseries"
+ },
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 23
+ },
+ "id": 9,
+ "panels": [],
+ "title": "Finalization Status",
+ "type": "row"
+ },
+ {
+ "datasource": {
+ "type": "prometheus"
+ },
+ "description": "Total number of selected instances which have finalized. Recon and S3 gateway are excluded.",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ }
+ },
+ "decimals": 0,
+ "mappings": [],
+ "unit": "short"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "finalized"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "green",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "unfinalized"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "semi-dark-yellow",
+ "mode": "fixed"
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 9,
+ "x": 0,
+ "y": 24
+ },
+ "id": 3,
+ "options": {
+ "displayLabels": [
+ "name"
+ ],
+ "legend": {
+ "calcs": [],
+ "displayMode": "table",
+ "placement": "right",
+ "showLegend": true,
+ "values": [
+ "value",
+ "percent"
+ ]
+ },
+ "pieType": "pie",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "sort": "desc",
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "12.3.1",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "sum((component_version_manager_metrics_software_version{instance=~\"$instance\"} == bool component_version_manager_metrics_apparent_version{instance=~\"$instance\"}) * on(instance) group_left() (ozone_build_info{component!~\"recon\"}))",
+ "instant": true,
+ "legendFormat": "finalized",
+ "range": false,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "count(component_version_manager_metrics_software_version{instance=~\"$instance\"} * on(instance) group_left() (ozone_build_info{component!~\"recon\"})) - sum((component_version_manager_metrics_software_version{instance=~\"$instance\"} == bool component_version_manager_metrics_apparent_version{instance=~\"$instance\"}) * on(instance) group_left() (ozone_build_info{component!~\"recon\"}))",
+ "instant": true,
+ "legendFormat": "unfinalized",
+ "range": false,
+ "refId": "B"
+ }
+ ],
+ "title": "Finalized vs Total Instances",
+ "type": "piechart"
+ },
+ {
+ "datasource": {
+ "type": "prometheus"
+ },
+ "description": "Finalization status of each selected instance. All instances except S3 Gateway and Recon finalize automatically when a finalize command is given.",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "custom": {
+ "align": "left",
+ "cellOptions": {
+ "type": "auto"
+ },
+ "footer": {
+ "reducers": []
+ },
+ "inspect": false
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": 0
+ }
+ ]
+ }
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Finalized?"
+ },
+ "properties": [
+ {
+ "id": "custom.cellOptions",
+ "value": {
+ "type": "color-text"
+ }
+ },
+ {
+ "id": "mappings",
+ "value": [
+ {
+ "options": {
+ "0": {
+ "color": "red",
+ "index": 0,
+ "text": "false"
+ },
+ "1": {
+ "color": "green",
+ "index": 1,
+ "text": "true"
+ }
+ },
+ "type": "value"
+ }
+ ]
+ },
+ {
+ "id": "custom.width",
+ "value": 244
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Software Version"
+ },
+ "properties": [
+ {
+ "id": "custom.width",
+ "value": 226
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Instance"
+ },
+ "properties": [
+ {
+ "id": "custom.width",
+ "value": 492
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Apparent Version"
+ },
+ "properties": [
+ {
+ "id": "custom.width",
+ "value": 439
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 14,
+ "x": 9,
+ "y": 24
+ },
+ "id": 10,
+ "options": {
+ "cellHeight": "sm",
+ "enablePagination": true,
+ "footer": {
+ "countRows": false,
+ "enablePagination": true,
+ "fields": "",
+ "reducer": [
+ "sum"
+ ],
+ "show": false
+ },
+ "showHeader": true,
+ "sortBy": [
+ {
+ "desc": true,
+ "displayName": "Finalized?"
+ }
+ ]
+ },
+ "pluginVersion": "12.3.1",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "(component_version_manager_metrics_software_version{instance=~\"$instance\"} == bool component_version_manager_metrics_apparent_version{instance=~\"$instance\"}) * on(instance) group_left() (ozone_build_info{component!~\"recon\"})",
+ "format": "table",
+ "instant": true,
+ "refId": "A"
+ }
+ ],
+ "title": "Finalized Instances",
+ "transformations": [
+ {
+ "id": "organize",
+ "options": {
+ "excludeByName": {
+ "Time": true,
+ "__name__": true,
+ "context": true,
+ "exported_component": true,
+ "hostname": true,
+ "job": true,
+ "revision": true,
+ "version": true
+ },
+ "indexByName": {
+ "Value": 2,
+ "component": 0,
+ "instance": 1
+ },
+ "renameByName": {
+ "Value": "Finalized?",
+ "component": "Component",
+ "instance": "Instance"
+ }
+ }
+ }
+ ],
+ "type": "table"
+ },
+ {
+ "datasource": {
+ "type": "prometheus"
+ },
+ "description": "Number of selected instances which have finalized over time (green), stacked with the number still pending finalization (yellow). The top of the stack is the total eligible instance count.",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "Instances",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "stepAfter",
+ "lineWidth": 2,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "showValues": false,
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "decimals": 0,
+ "mappings": [],
+ "min": 0,
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": 0
+ }
+ ]
+ },
+ "unit": "short"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "finalized"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "green",
+ "mode": "fixed"
+ }
+ },
+ {
+ "id": "custom.fillOpacity",
+ "value": 25
+ },
+ {
+ "id": "custom.stacking",
+ "value": {
+ "group": "A",
+ "mode": "normal"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "remaining"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "yellow",
+ "mode": "fixed"
+ }
+ },
+ {
+ "id": "custom.fillOpacity",
+ "value": 25
+ },
+ {
+ "id": "custom.lineWidth",
+ "value": 0
+ },
+ {
+ "id": "custom.showPoints",
+ "value": "never"
+ },
+ {
+ "id": "custom.stacking",
+ "value": {
+ "group": "A",
+ "mode": "normal"
+ }
+ },
+ {
+ "id": "custom.hideFrom",
+ "value": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 9,
+ "w": 23,
+ "x": 0,
+ "y": 32
+ },
+ "id": 11,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "multi",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "12.3.1",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "count(((component_version_manager_metrics_software_version{instance=~\"$instance\"} == bool component_version_manager_metrics_apparent_version{instance=~\"$instance\"}) * on(instance) group_left() (ozone_build_info{component!~\"recon\"})) == 1) or vector(0)",
+ "legendFormat": "finalized",
+ "range": true,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "count(component_version_manager_metrics_software_version{instance=~\"$instance\"} * on(instance) group_left() (ozone_build_info{component!~\"recon\"})) - (count(((component_version_manager_metrics_software_version{instance=~\"$instance\"} == bool component_version_manager_metrics_apparent_version{instance=~\"$instance\"}) * on(instance) group_left() (ozone_build_info{component!~\"recon\"})) == 1) or vector(0))",
+ "legendFormat": "remaining",
+ "range": true,
+ "refId": "C"
+ }
+ ],
+ "title": "Finalized vs Total Instances Over Time",
+ "type": "timeseries"
+ }
+ ],
+ "preload": false,
+ "refresh": "30s",
+ "schemaVersion": 42,
+ "tags": [
+ "ozone",
+ "upgrade"
+ ],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "text": "All",
+ "value": "$__all"
+ },
+ "datasource": {
+ "type": "prometheus"
+ },
+ "definition": "label_values(ozone_build_info, component)",
+ "includeAll": true,
+ "label": "component",
+ "multi": true,
+ "name": "component",
+ "options": [],
+ "query": {
+ "query": "label_values(ozone_build_info, component)",
+ "refId": "StandardVariableQuery"
+ },
+ "refresh": 1,
+ "regex": "",
+ "sort": 1,
+ "type": "query"
+ },
+ {
+ "current": {
+ "text": "All",
+ "value": "$__all"
+ },
+ "datasource": {
+ "type": "prometheus"
+ },
+ "definition": "label_values(ozone_build_info{component=~\"$component\"}, instance)",
+ "includeAll": true,
+ "label": "instance",
+ "multi": true,
+ "name": "instance",
+ "options": [],
+ "query": {
+ "query": "label_values(ozone_build_info{component=~\"$component\"}, instance)",
+ "refId": "StandardVariableQuery"
+ },
+ "refresh": 1,
+ "regex": "",
+ "sort": 1,
+ "type": "query"
+ }
+ ]
+ },
+ "time": {
+ "from": "now-30m",
+ "to": "now"
+ },
+ "timepicker": {},
+ "timezone": "",
+ "title": "Ozone - Rolling Upgrade",
+ "uid": "ozone-rolling-upgrade",
+ "version": 1
+}
diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java
index 5a70483b2bc8..e12fa8316ed2 100644
--- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java
+++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java
@@ -86,6 +86,8 @@ public class OMMetrics implements OmMetadataReaderMetrics {
private @Metric MutableCounterLong numListSnapshotDiffJobs;
private @Metric MutableGaugeInt numSnapshotCacheSize;
+ @Metric("Set to 1 if OM is monitoring ongoing upgrade finalization, 0 otherwise")
+ private MutableGaugeInt finalizationInProgress;
private @Metric MutableCounterLong numGetFileStatus;
private @Metric MutableCounterLong numCreateDirectory;
private @Metric MutableCounterLong numCreateFile;
@@ -616,6 +618,14 @@ public void decNumSnapshotCacheSize() {
numSnapshotCacheSize.decr();
}
+ public void setFinalizationInProgress(boolean inProgress) {
+ finalizationInProgress.set(inProgress ? 1 : 0);
+ }
+
+ public int getFinalizationInProgress() {
+ return finalizationInProgress.value();
+ }
+
public void incNumCompleteMultipartUploadFails() {
numCompleteMultipartUploadFails.incr();
}
diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
index 8d99f122e49c..ce1529ac05bc 100644
--- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
+++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
@@ -45,6 +45,7 @@
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SCM_BLOCK_SIZE_DEFAULT;
import static org.apache.hadoop.ozone.OzoneConsts.DB_TRANSIENT_MARKER;
import static org.apache.hadoop.ozone.OzoneConsts.DEFAULT_OM_UPDATE_ID;
+import static org.apache.hadoop.ozone.OzoneConsts.FINALIZATION_IN_PROGRESS_KEY;
import static org.apache.hadoop.ozone.OzoneConsts.OM_DB_NAME;
import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX;
import static org.apache.hadoop.ozone.OzoneConsts.OM_METRICS_FILE;
@@ -955,6 +956,8 @@ private void instantiateServices(boolean withNewSnapshot) throws IOException {
new OmMetadataManagerImpl(configuration, this);
this.metadataManager = metadataManagerImpl;
versionManager.validateDBVersion(metadataManager.getMetaTable());
+ metrics.setFinalizationInProgress(
+ metadataManager.getMetaTable().get(FINALIZATION_IN_PROGRESS_KEY) != null);
LOG.info("S3 Multi-Tenancy is {}",
isS3MultiTenancyEnabled ? "enabled" : "disabled");
if (isS3MultiTenancyEnabled) {
diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/upgrade/OMFinalizeUpgradeRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/upgrade/OMFinalizeUpgradeRequest.java
index 0aab6e4cdc39..06c3a9bcaf58 100644
--- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/upgrade/OMFinalizeUpgradeRequest.java
+++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/upgrade/OMFinalizeUpgradeRequest.java
@@ -99,6 +99,7 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, Execut
// Clear the finalization_in_progress key from the cache
omMetadataManager.getMetaTable().addCacheEntry(
new CacheKey<>(OzoneConsts.FINALIZATION_IN_PROGRESS_KEY), CacheValue.get(context.getIndex()));
+ ozoneManager.getMetrics().setFinalizationInProgress(false);
FinalizeUpgradeResponse omResponse =
FinalizeUpgradeResponse.newBuilder()
diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/upgrade/OMStartFinalizeUpgradeRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/upgrade/OMStartFinalizeUpgradeRequest.java
index aa3849d935ce..ef2e016fbbd5 100644
--- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/upgrade/OMStartFinalizeUpgradeRequest.java
+++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/upgrade/OMStartFinalizeUpgradeRequest.java
@@ -80,6 +80,7 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, Execut
OMMetadataManager omMetadataManager = ozoneManager.getMetadataManager();
omMetadataManager.getMetaTable().addCacheEntry(
new CacheKey<>(OzoneConsts.FINALIZATION_IN_PROGRESS_KEY), CacheValue.get(context.getIndex(), "ignored"));
+ ozoneManager.getMetrics().setFinalizationInProgress(true);
OzoneManagerProtocolProtos.StartFinalizeUpgradeResponse omResponse =
diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHttpServer.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHttpServer.java
index a9c1cf2e2055..aac504c9cecf 100644
--- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHttpServer.java
+++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHttpServer.java
@@ -96,9 +96,7 @@ public static Collection