From 11de4c512462bf148657781c7f64b677c7ca6945 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Wed, 23 Jul 2025 09:01:22 +0200 Subject: [PATCH 01/17] WIP: Scrape everything --- .../prometheus-service-monitor.yaml | 215 +++++++++++++++++- 1 file changed, 206 insertions(+), 9 deletions(-) diff --git a/stacks/_templates/prometheus-service-monitor.yaml b/stacks/_templates/prometheus-service-monitor.yaml index 3d597a7b..cdfc2e87 100644 --- a/stacks/_templates/prometheus-service-monitor.yaml +++ b/stacks/_templates/prometheus-service-monitor.yaml @@ -1,39 +1,236 @@ +# ### Products +# +# Use something like this to check for metrics: +# count by(job) ({__name__!=""}) +# +# See https://github.com/stackabletech/issues/issues/735 +# +# - [x] Airflow - exporter +# - [x] Druid - native +# - [x] HBase - native +# - [x] Hadoop HDFS - native +# - [x] Hive - exporter +# - [x] Kafka - exporter +# - [x] NiFi 1 - native +# - [ ] NiFi 2 - native - partially working, needs mTLS +# - [ ] OpenSearch +# - [ ] Spark - native +# - [x] Superset - exporter +# - [x] Trino - native +# - [x] ZooKeeper - native +# - [x] OPA - native --- apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: - name: scrape-label + name: stackable labels: stackable.tech/vendor: Stackable release: prometheus spec: + namespaceSelector: + any: true + selector: + matchLabels: + stackable.tech/vendor: Stackable + prometheus.io/scrape: "true" + matchExpressions: + - key: app.kubernetes.io/name + operator: In + values: + - airflow + - druid + - hive + - kafka + - nifi # This only works for NiFi 1, NiFi 2 has a special ServiceMonitor below + - opa + - superset + - trino endpoints: - - port: metrics + - scheme: http + port: metrics + path: /metrics + jobLabel: app.kubernetes.io/instance +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: stackable-native-metrics + labels: + stackable.tech/vendor: Stackable + release: prometheus +spec: + namespaceSelector: + any: true selector: matchLabels: + stackable.tech/vendor: Stackable prometheus.io/scrape: "true" + matchExpressions: + - key: app.kubernetes.io/name + operator: In + values: + - zookeeper + endpoints: + - scheme: http + port: native-metrics + path: /metrics + jobLabel: app.kubernetes.io/instance +--- +# Kafka is special in that the operator totally messes up services: +# 1. The metrics Service is missing +# 2. The role level simple-kafka-broker-default has the prometheus.io/scrape label, but exposes no ports... +# 3. The role level simple-kafka-broker-default is labeled with app.kubernetes.io/name: listener??? +# So we have a dedicated config for it +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: stackable-kafka + labels: + stackable.tech/vendor: Stackable + release: prometheus +spec: namespaceSelector: any: true + selector: + matchLabels: + stackable.tech/vendor: Stackable + app.kubernetes.io/name: listener # Dafuq? + app.kubernetes.io/component: broker # We need to filter on brokers instead, as the app.kubernetes.io/name is messed up + endpoints: + - scheme: http + port: metrics + path: /metrics jobLabel: app.kubernetes.io/instance --- +# We prefer the native metrics over the statsd-exporter apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: - name: scrape-minio + name: stackable-hdfs labels: stackable.tech/vendor: Stackable release: prometheus spec: + namespaceSelector: + any: true + selector: + matchLabels: + stackable.tech/vendor: Stackable + prometheus.io/scrape: "true" + app.kubernetes.io/name: hdfs endpoints: - - path: /minio/v2/metrics/node - port: http - scheme: http - - path: /minio/v2/metrics/cluster - port: http - scheme: http + - scheme: http + port: http # Don't use metrics! + path: /prom + jobLabel: app.kubernetes.io/instance +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: stackable-hbase + labels: + stackable.tech/vendor: Stackable + release: prometheus +spec: + namespaceSelector: + any: true selector: matchLabels: + stackable.tech/vendor: Stackable + prometheus.io/scrape: "true" + app.kubernetes.io/name: hbase + endpoints: + - scheme: http + port: ui-http + path: /prometheus + jobLabel: app.kubernetes.io/instance +--- +# NiFI 2 is a beast on it's own.... +# The current state produces "server returned HTTP status 401 Unauthorized", but it's at least a +# good starting point for a working version +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: stackable-nifi-2 + labels: + stackable.tech/vendor: Stackable + release: prometheus +spec: + namespaceSelector: + any: true + selector: + matchLabels: + stackable.tech/vendor: Stackable + prometheus.io/scrape: "true" + matchExpressions: + - key: app.kubernetes.io/name + operator: In + values: + - nifi + # - key: app.kubernetes.io/version + # operator: NotIn + # values: + # - List all 1.x.x version combinations + endpoints: + - scheme: https + port: https + path: /nifi-api/flow/metrics/prometheus + # TODO: Use mTLS + # See https://github.com/stackabletech/demos/pull/260 + # See https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#monitoring.coreos.com/v1.TLSConfig + tlsConfig: + insecureSkipVerify: true + relabelings: + - sourceLabels: + - __meta_kubernetes_pod_name + - __meta_kubernetes_service_name + - __meta_kubernetes_namespace + - __meta_kubernetes_pod_container_port_number + targetLabel: __address__ + replacement: ${1}.${2}-headless.${3}.svc.cluster.local:${4} + regex: (.+);(.+?)(?:-metrics)?;(.+);(.+) + jobLabel: app.kubernetes.io/instance +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: stackable-minio-http + labels: + stackable.tech/vendor: Stackable + release: prometheus +spec: + namespaceSelector: + any: true + selector: + matchLabels: + stackable.tech/vendor: Stackable app: minio monitoring: "true" + endpoints: + - scheme: http + port: http + path: /minio/v2/metrics/cluster +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: stackable-minio-https + labels: + stackable.tech/vendor: Stackable + release: prometheus +spec: namespaceSelector: any: true + selector: + matchLabels: + stackable.tech/vendor: Stackable + app: minio + monitoring: "true" + endpoints: + - scheme: https + port: https + path: /minio/v2/metrics/cluster + # Prevent "tls: failed to verify certificate: x509: cannot validate certificate for 100.96.234.154 because it doesn't contain any IP SANs" + tlsConfig: + insecureSkipVerify: true From 559a7655fe2f7a65e55a10ab19248f6c08fe1c22 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Wed, 23 Jul 2025 09:47:08 +0200 Subject: [PATCH 02/17] Move files to monitoring stack folder --- .../prometheus-service-monitors.yaml} | 0 stacks/{_templates => monitoring}/prometheus.yaml | 0 stacks/stacks-v2.yaml | 4 ++-- 3 files changed, 2 insertions(+), 2 deletions(-) rename stacks/{_templates/prometheus-service-monitor.yaml => monitoring/prometheus-service-monitors.yaml} (100%) rename stacks/{_templates => monitoring}/prometheus.yaml (100%) diff --git a/stacks/_templates/prometheus-service-monitor.yaml b/stacks/monitoring/prometheus-service-monitors.yaml similarity index 100% rename from stacks/_templates/prometheus-service-monitor.yaml rename to stacks/monitoring/prometheus-service-monitors.yaml diff --git a/stacks/_templates/prometheus.yaml b/stacks/monitoring/prometheus.yaml similarity index 100% rename from stacks/_templates/prometheus.yaml rename to stacks/monitoring/prometheus.yaml diff --git a/stacks/stacks-v2.yaml b/stacks/stacks-v2.yaml index 8f6c9d07..b2aa0e76 100644 --- a/stacks/stacks-v2.yaml +++ b/stacks/stacks-v2.yaml @@ -12,8 +12,8 @@ stacks: - grafana manifests: - plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/stacks/monitoring/grafana-dashboards.yaml - - helmChart: https://raw.githubusercontent.com/stackabletech/demos/main/stacks/_templates/prometheus.yaml - - plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/stacks/_templates/prometheus-service-monitor.yaml + - helmChart: https://raw.githubusercontent.com/stackabletech/demos/main/stacks/monitoring/prometheus.yaml + - plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/stacks/monitoring/prometheus-service-monitors.yaml supportedNamespaces: [] resourceRequests: cpu: 1750m From 1f3d2bdf4a23275c7923994610443e2fb84f1ac8 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Wed, 23 Jul 2025 09:49:33 +0200 Subject: [PATCH 03/17] Update MinIO chart --- stacks/monitoring/grafana-dashboards.yaml | 3900 ++++++++++++--------- 1 file changed, 2202 insertions(+), 1698 deletions(-) diff --git a/stacks/monitoring/grafana-dashboards.yaml b/stacks/monitoring/grafana-dashboards.yaml index 523c2ed7..c20a727e 100644 --- a/stacks/monitoring/grafana-dashboards.yaml +++ b/stacks/monitoring/grafana-dashboards.yaml @@ -3268,9 +3268,8 @@ data: "description": "MinIO Grafana Dashboard - https://min.io/", "editable": true, "fiscalYearStartMonth": 0, - "gnetId": 13502, "graphTooltip": 0, - "id": 29, + "id": 35, "links": [ { "icon": "external link", @@ -3282,7 +3281,6 @@ data: "type": "dashboards" } ], - "liveNow": false, "panels": [ { "datasource": { @@ -3307,8 +3305,7 @@ data: "mode": "percentage", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, @@ -3323,13 +3320,13 @@ data: "y": 0 }, "id": 1, - "links": [], "maxDataPoints": 100, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "mean" @@ -3337,10 +3334,12 @@ data: "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.4", + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -3348,7 +3347,7 @@ data: "uid": "prometheus" }, "exemplar": true, - "expr": "time() - max(minio_node_process_starttime_seconds{job=\"$scrape_jobs\"})", + "expr": "time() - max(minio_node_process_starttime_seconds{job=~\"$scrape_jobs\"})", "format": "time_series", "instant": true, "interval": "", @@ -3385,8 +3384,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, @@ -3401,13 +3399,13 @@ data: "y": 0 }, "id": 65, - "links": [], "maxDataPoints": 100, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "last" @@ -3415,19 +3413,20 @@ data: "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.4", + "pluginVersion": "12.0.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, - "editorMode": "code", "exemplar": true, - "expr": "sum (minio_s3_traffic_received_bytes{job=\"$scrape_jobs\"})", + "expr": "sum by (instance) (minio_s3_traffic_received_bytes{job=~\"$scrape_jobs\"})", "format": "table", "hide": false, "instant": false, @@ -3439,7 +3438,7 @@ data: "step": 60 } ], - "title": "Total S3 Traffic Inbound", + "title": "Total S3 Ingress", "type": "stat" }, { @@ -3514,7 +3513,6 @@ data: }, "id": 50, "interval": "1m", - "links": [], "maxDataPoints": 100, "options": { "displayLabels": [], @@ -3535,22 +3533,21 @@ data: "values": false }, "tooltip": { + "hideZeros": false, "mode": "single", "sort": "none" } }, - "pluginVersion": "8.2.1", + "pluginVersion": "12.0.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, - "editorMode": "code", "exemplar": true, - "expr": "topk(1, max(sum(minio_cluster_capacity_usable_total_bytes{job=\"$scrape_jobs\"}) by (instance))) - topk(1, max(sum(minio_cluster_capacity_usable_free_bytes{job=\"$scrape_jobs\"}) by (instance)))", + "expr": "topk(1, sum(minio_cluster_capacity_usable_total_bytes{job=~\"$scrape_jobs\"}) by (instance)) - topk(1, sum(minio_cluster_capacity_usable_free_bytes{job=~\"$scrape_jobs\"}) by (instance))", "format": "time_series", - "hide": false, "instant": false, "interval": "1m", "intervalFactor": 1, @@ -3563,13 +3560,11 @@ data: "type": "prometheus", "uid": "prometheus" }, - "editorMode": "code", "exemplar": true, - "expr": "topk(1, max(sum(minio_cluster_capacity_usable_free_bytes{job=\"$scrape_jobs\"}) by (instance)))", + "expr": "topk(1, sum(minio_cluster_capacity_usable_free_bytes{job=~\"$scrape_jobs\"}) by (instance)) ", "hide": false, "interval": "1m", "legendFormat": "Free", - "range": true, "refId": "B" } ], @@ -3587,11 +3582,13 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", @@ -3600,6 +3597,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -3621,8 +3619,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -3689,11 +3686,12 @@ data: "showLegend": false }, "tooltip": { + "hideZeros": false, "mode": "single", "sort": "none" } }, - "pluginVersion": "8.2.1", + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -3702,7 +3700,7 @@ data: }, "editorMode": "code", "exemplar": true, - "expr": "max(sum(minio_bucket_usage_total_bytes{job=\"$scrape_jobs\"}) by (instance))", + "expr": "max(minio_cluster_usage_total_bytes{job=~\"$scrape_jobs\"})", "interval": "", "legendFormat": "Usage", "range": true, @@ -3719,17 +3717,15 @@ data: }, "fieldConfig": { "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null - }, - { - "color": "semi-dark-red", - "value": 80 + "color": "green" } ] } @@ -3743,43 +3739,54 @@ data: "y": 0 }, "id": 52, - "links": [], "options": { - "displayMode": "basic", - "minVizHeight": 10, - "minVizWidth": 0, + "displayMode": "lcd", + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "maxVizHeight": 300, + "minVizHeight": 16, + "minVizWidth": 8, + "namePlacement": "auto", "orientation": "horizontal", "reduceOptions": { "calcs": [ - "mean" + "lastNotNull" ], "fields": "", "values": false }, - "showUnfilled": false, - "text": {} + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" }, - "pluginVersion": "9.2.4", + "pluginVersion": "12.0.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, + "disableTextWrap": false, "editorMode": "code", - "exemplar": false, - "expr": "max by (range) (minio_bucket_objects_size_distribution{job=\"$scrape_jobs\"})", + "exemplar": true, + "expr": "minio_cluster_objects_size_distribution{job=~\"$scrape_jobs\"}", "format": "time_series", - "instant": true, + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, "interval": "", "intervalFactor": 1, "legendFormat": "{{range}}", - "range": false, "refId": "A", - "step": 300 + "step": 300, + "useBackend": false } ], - "title": "Object size distribution", + "title": "Object Size Distribution", "type": "bargauge" }, { @@ -3790,31 +3797,55 @@ data: "description": "", "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" } - ], + }, + "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", - "value": 2000 + "value": 80 } ] - }, - "unit": "short" + } }, "overrides": [] }, @@ -3825,45 +3856,43 @@ data: "y": 0 }, "id": 61, - "links": [], "maxDataPoints": 100, "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false + "legend": { + "calcs": [], + "displayMode": "hidden", + "placement": "right", + "showLegend": false }, - "text": {}, - "textMode": "auto" + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } }, - "pluginVersion": "9.2.4", + "pluginVersion": "12.0.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, + "editorMode": "code", "exemplar": true, - "expr": "sum (minio_node_file_descriptor_open_total{job=\"$scrape_jobs\"})", - "format": "table", + "expr": "minio_node_file_descriptor_open_total{job=~\"$scrape_jobs\"}", + "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "", + "legendFormat": "{{server}}", "metric": "process_start_time_seconds", "refId": "A", "step": 60 } ], - "title": "Total Open FDs", - "type": "stat" + "title": "Open FDs ", + "type": "timeseries" }, { "datasource": { @@ -3888,8 +3917,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, @@ -3904,13 +3932,13 @@ data: "y": 3 }, "id": 64, - "links": [], "maxDataPoints": 100, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "last" @@ -3918,19 +3946,20 @@ data: "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.4", + "pluginVersion": "12.0.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, - "editorMode": "code", "exemplar": true, - "expr": "sum (minio_s3_traffic_sent_bytes{job=\"$scrape_jobs\"})", + "expr": "sum by (instance) (minio_s3_traffic_sent_bytes{job=~\"$scrape_jobs\"})", "format": "table", "hide": false, "instant": false, @@ -3942,7 +3971,7 @@ data: "step": 60 } ], - "title": "Total S3 Traffic Outbound", + "title": "Total S3 Egress", "type": "stat" }, { @@ -3953,31 +3982,55 @@ data: "description": "", "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" } - ], + }, + "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", - "value": 2000 + "value": 80 } ] - }, - "unit": "short" + } }, "overrides": [] }, @@ -3988,24 +4041,21 @@ data: "y": 3 }, "id": 62, - "links": [], "maxDataPoints": 100, "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false + "legend": { + "calcs": [], + "displayMode": "hidden", + "placement": "right", + "showLegend": false }, - "text": {}, - "textMode": "auto" + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } }, - "pluginVersion": "9.2.4", + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -4014,54 +4064,38 @@ data: }, "editorMode": "code", "exemplar": true, - "expr": "sum without (server,instance,pod) (minio_node_go_routine_total{job=\"$scrape_jobs\"})", - "format": "table", + "expr": "minio_node_go_routine_total{job=~\"$scrape_jobs\"}", + "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "", + "legendFormat": "{{server}}", "metric": "process_start_time_seconds", "refId": "A", "step": 60 } ], - "title": "Total Goroutines", - "type": "stat" + "title": "Goroutines", + "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, - "description": "", "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 + "color": "green" } ] }, - "unit": "short" + "unit": "bool_on_off" }, "overrides": [] }, @@ -4071,45 +4105,45 @@ data: "x": 0, "y": 6 }, - "id": 53, - "links": [], - "maxDataPoints": 100, + "id": 94, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ - "mean" + "lastNotNull" ], "fields": "", "values": false }, - "text": {}, - "textMode": "auto" + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.4", + "pluginVersion": "12.0.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, + "disableTextWrap": false, + "editorMode": "code", "exemplar": true, - "expr": "minio_cluster_nodes_online_total{job=\"$scrape_jobs\"}", - "format": "table", - "hide": false, - "instant": true, + "expr": "minio_cluster_health_status{job=~\"$scrape_jobs\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, "interval": "", - "intervalFactor": 1, - "legendFormat": "", - "metric": "process_start_time_seconds", + "legendFormat": "Pool: {{pool}} Set: {{set}}", + "range": true, "refId": "A", - "step": 60 + "useBackend": false } ], - "title": "Total Online Servers", + "title": "Cluster Health Status", "type": "stat" }, { @@ -4120,59 +4154,42 @@ data: "description": "", "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "mappings": [], "thresholds": { - "mode": "absolute", + "mode": "percentage", "steps": [ { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 + "color": "green" } ] - }, - "unit": "short" + } }, "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 3, "x": 3, "y": 6 }, - "id": 9, - "links": [], + "id": 78, "maxDataPoints": 100, "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", + "minVizHeight": 75, + "minVizWidth": 75, "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "lastNotNull" ], "fields": "", "values": false }, - "text": {}, - "textMode": "auto" + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" }, - "pluginVersion": "9.2.4", + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -4181,21 +4198,36 @@ data: }, "editorMode": "code", "exemplar": false, - "expr": "min(minio_cluster_disk_online_total{job=\"$scrape_jobs\"})", - "format": "table", + "expr": "max(minio_cluster_drive_online_total{job=~\"$scrape_jobs\"})", + "format": "time_series", "hide": false, "instant": true, "interval": "", "intervalFactor": 1, - "legendFormat": "Total online disks in MinIO Cluster", + "legendFormat": ".", "metric": "process_start_time_seconds", "range": false, "refId": "A", "step": 60 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(minio_cluster_drive_offline_total{job=~\"$scrape_jobs\"})", + "format": "time_series", + "hide": false, + "instant": true, + "legendFormat": ".", + "range": false, + "refId": "B" } ], - "title": "Total Online Disks", - "type": "stat" + "title": "Total Online/Offline Drives", + "type": "gauge" }, { "datasource": { @@ -4219,8 +4251,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "dark-yellow", @@ -4243,13 +4274,13 @@ data: "y": 6 }, "id": 66, - "links": [], "maxDataPoints": 100, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "horizontal", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" @@ -4257,18 +4288,21 @@ data: "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.4", + "pluginVersion": "12.0.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, + "editorMode": "code", "exemplar": true, - "expr": "count(count by (bucket) (minio_bucket_usage_total_bytes{job=\"$scrape_jobs\"}))", + "expr": "max(minio_cluster_bucket_total{job=~\"$scrape_jobs\"})", "format": "time_series", "instant": false, "interval": "1m", @@ -4281,230 +4315,152 @@ data: "type": "stat" }, { - "aliasColors": { - "S3 Errors": "light-red", - "S3 Requests": "light-green" - }, - "bars": false, - "dashLength": 10, - "dashes": false, "datasource": { "type": "prometheus", "uid": "prometheus" }, "fieldConfig": { "defaults": { - "unit": "bits" + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "binBps" }, "overrides": [] }, - "fill": 1, - "fillGradient": 0, "gridPos": { "h": 6, "w": 7, "x": 9, "y": 6 }, - "hiddenSeries": false, "id": 63, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "9.2.4", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum by (server) (rate(minio_s3_traffic_received_bytes{job=\"$scrape_jobs\"}[$__rate_interval]))*8", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "Data Received [{{server}}]", - "range": true, - "refId": "A" - } - ], - "thresholds": [], - "timeRegions": [], - "title": "S3 API Data Received Rate ", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:331", - "format": "bits", - "logBase": 1, - "show": true + "legend": { + "calcs": [], + "displayMode": "hidden", + "placement": "right", + "showLegend": false }, - { - "$$hashKey": "object:332", - "format": "short", - "logBase": 1, - "show": false + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" } - ], - "yaxis": { - "align": false - } - }, - { - "aliasColors": { - "S3 Errors": "light-red", - "S3 Requests": "light-green" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "unit": "bits" - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 6, - "w": 8, - "x": 16, - "y": 6 }, - "hiddenSeries": false, - "id": 70, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "9.2.4", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, - "editorMode": "code", "exemplar": true, - "expr": "sum by (server) (rate(minio_s3_traffic_sent_bytes{job=\"$scrape_jobs\"}[$__rate_interval]))*8", + "expr": "sum by (server) (rate(minio_s3_traffic_received_bytes{job=~\"$scrape_jobs\"}[$__rate_interval]))", "interval": "1m", "intervalFactor": 2, - "legendFormat": "Data Sent [{{server}}]", - "range": true, + "legendFormat": "Data Received [{{server}}]", "refId": "A" } ], - "thresholds": [], - "timeRegions": [], - "title": "S3 API Data Sent Rate ", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:331", - "format": "bits", - "logBase": 1, - "show": true - }, - { - "$$hashKey": "object:332", - "format": "short", - "logBase": 1, - "show": false - } - ], - "yaxis": { - "align": false - } + "title": "S3 API Ingress Rate ", + "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, - "description": "", "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" } - ], + }, + "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -4512,35 +4468,31 @@ data: } ] }, - "unit": "short" + "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 2, - "w": 3, - "x": 0, - "y": 8 + "h": 6, + "w": 8, + "x": 16, + "y": 6 }, - "id": 69, - "links": [], - "maxDataPoints": 100, + "id": 70, "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false + "legend": { + "calcs": [], + "displayMode": "hidden", + "placement": "right", + "showLegend": false }, - "text": {}, - "textMode": "auto" + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } }, - "pluginVersion": "9.2.4", + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -4548,20 +4500,15 @@ data: "uid": "prometheus" }, "exemplar": true, - "expr": "minio_cluster_nodes_offline_total{job=\"$scrape_jobs\"}", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "intervalFactor": 1, - "legendFormat": "", - "metric": "process_start_time_seconds", - "refId": "A", - "step": 60 + "expr": "sum by (server) (rate(minio_s3_traffic_sent_bytes{job=~\"$scrape_jobs\"}[$__rate_interval]))", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "Data Sent [{{server}}]", + "refId": "A" } ], - "title": "Total Offline Servers", - "type": "stat" + "title": "S3 API Egress Rate ", + "type": "timeseries" }, { "datasource": { @@ -4586,8 +4533,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -4602,17 +4548,17 @@ data: "gridPos": { "h": 2, "w": 3, - "x": 3, + "x": 0, "y": 8 }, - "id": 78, - "links": [], + "id": 53, "maxDataPoints": 100, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "mean" @@ -4620,19 +4566,20 @@ data: "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.4", + "pluginVersion": "12.0.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, - "editorMode": "code", "exemplar": true, - "expr": "max(minio_cluster_disk_offline_total{job=\"$scrape_jobs\"})", + "expr": "max(minio_cluster_nodes_online_total{job=~\"$scrape_jobs\"})", "format": "table", "hide": false, "instant": true, @@ -4644,7 +4591,7 @@ data: "step": 60 } ], - "title": "Total Offline Disks", + "title": "Total Online Servers", "type": "stat" }, { @@ -4669,8 +4616,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "dark-yellow", @@ -4693,13 +4639,13 @@ data: "y": 9 }, "id": 44, - "links": [], "maxDataPoints": 100, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "horizontal", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" @@ -4707,10 +4653,12 @@ data: "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.4", + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -4719,7 +4667,7 @@ data: }, "editorMode": "code", "exemplar": true, - "expr": "topk(1, max(sum(minio_bucket_usage_object_total{job=\"$scrape_jobs\"}) by (instance)))", + "expr": "max(minio_cluster_usage_object_total{job=~\"$scrape_jobs\"})", "format": "time_series", "instant": false, "interval": "1m", @@ -4747,8 +4695,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, @@ -4763,13 +4710,13 @@ data: "y": 10 }, "id": 80, - "links": [], "maxDataPoints": 100, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "last" @@ -4777,30 +4724,31 @@ data: "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.4", + "pluginVersion": "12.0.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, - "editorMode": "code", "exemplar": true, - "expr": "min(minio_heal_time_last_activity_nano_seconds{job=\"$scrape_jobs\"})", + "expr": "max(minio_heal_time_last_activity_nano_seconds{job=~\"$scrape_jobs\"})", "format": "time_series", "instant": true, "interval": "", "intervalFactor": 1, - "legendFormat": "{{server}}", + "legendFormat": "", "metric": "process_start_time_seconds", "refId": "A", "step": 60 } ], - "title": "Time Since Last Heal Activity", + "title": "Time Since Last Heal", "type": "stat" }, { @@ -4819,8 +4767,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, @@ -4835,13 +4782,13 @@ data: "y": 10 }, "id": 81, - "links": [], "maxDataPoints": 100, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "last" @@ -4849,30 +4796,31 @@ data: "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.4", + "pluginVersion": "12.0.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, - "editorMode": "code", "exemplar": true, - "expr": "min(minio_usage_last_activity_nano_seconds{job=\"$scrape_jobs\"})", + "expr": "max(minio_usage_last_activity_nano_seconds{job=~\"$scrape_jobs\"})", "format": "time_series", "instant": true, "interval": "", "intervalFactor": 1, - "legendFormat": "{{server}}", + "legendFormat": "", "metric": "process_start_time_seconds", "refId": "A", "step": 60 } ], - "title": "Time Since Last Scan Activity", + "title": "Time Since Last Scan", "type": "stat" }, { @@ -4880,34 +4828,36 @@ data: "type": "prometheus", "uid": "prometheus" }, - "description": "Total s3 bytes sent per bucket", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 5, + "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, - "lineInterpolation": "smooth", + "insertNulls": false, + "lineInterpolation": "linear", "lineWidth": 1, - "pointSize": 2, + "pointSize": 5, "scaleDistribution": { "type": "linear" }, - "showPoints": "auto", - "spanNulls": true, + "showPoints": "never", + "spanNulls": false, "stacking": { "group": "A", "mode": "none" @@ -4921,8 +4871,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -4930,48 +4879,78 @@ data: } ] }, - "unit": "binbps" + "unit": "none" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "S3 Errors" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "light-red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "S3 Requests" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "light-green", + "mode": "fixed" + } + } + ] + } + ] }, "gridPos": { - "h": 10, - "w": 12, + "h": 6, + "w": 9, "x": 0, "y": 12 }, - "id": 90, + "id": 60, "options": { + "alertThreshold": true, "legend": { - "calcs": [ - "mean", - "max" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true, - "sortBy": "Max", - "sortDesc": true + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } }, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, - "editorMode": "code", - "expr": "sum by(bucket) (rate(minio_bucket_traffic_sent_bytes{job=\"$scrape_jobs\"}[$__rate_interval]))*8", - "legendFormat": "__auto", - "range": true, + "exemplar": true, + "expr": "sum by (server,api) (increase(minio_s3_requests_total{job=~\"$scrape_jobs\"}[$__rate_interval]))", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{server,api}}", "refId": "A" } ], - "title": "Bucket Traffic Sent", + "title": "S3 API Request Rate", "type": "timeseries" }, { @@ -4979,34 +4958,36 @@ data: "type": "prometheus", "uid": "prometheus" }, - "description": "Total s3 bytes received per bucket", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 5, + "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, - "lineInterpolation": "smooth", + "insertNulls": false, + "lineInterpolation": "linear", "lineWidth": 1, - "pointSize": 2, + "pointSize": 5, "scaleDistribution": { "type": "linear" }, - "showPoints": "always", - "spanNulls": true, + "showPoints": "never", + "spanNulls": false, "stacking": { "group": "A", "mode": "none" @@ -5020,8 +5001,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -5029,46 +5009,78 @@ data: } ] }, - "unit": "binbps" + "unit": "none" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "S3 Errors" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "light-red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "S3 Requests" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "light-green", + "mode": "fixed" + } + } + ] + } + ] }, "gridPos": { - "h": 10, - "w": 12, - "x": 12, + "h": 6, + "w": 7, + "x": 9, "y": 12 }, - "id": 92, + "id": 88, "options": { + "alertThreshold": true, "legend": { - "calcs": [ - "mean", - "max" - ], - "displayMode": "table", - "placement": "right", + "calcs": [], + "displayMode": "list", + "placement": "bottom", "showLegend": true }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } }, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, - "editorMode": "code", - "expr": "sum by(bucket) (rate(minio_bucket_traffic_received_bytes{job=\"$scrape_jobs\"}[$__rate_interval]))*8", - "legendFormat": "__auto", - "range": true, + "exemplar": true, + "expr": "sum by (server,api) (increase(minio_s3_requests_4xx_errors_total{job=~\"$scrape_jobs\"}[$__rate_interval]))", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{server,api}}", "refId": "A" } ], - "title": "Bucket Traffic Received", + "title": "S3 API Request Error Rate (4xx)", "type": "timeseries" }, { @@ -5076,34 +5088,36 @@ data: "type": "prometheus", "uid": "prometheus" }, - "description": "Total s3 bytes sent per bucket", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 5, + "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, - "lineInterpolation": "smooth", + "insertNulls": false, + "lineInterpolation": "linear", "lineWidth": 1, - "pointSize": 2, + "pointSize": 5, "scaleDistribution": { "type": "linear" }, - "showPoints": "auto", - "spanNulls": true, + "showPoints": "never", + "spanNulls": false, "stacking": { "group": "A", "mode": "none" @@ -5117,8 +5131,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -5128,47 +5141,76 @@ data: }, "unit": "none" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "S3 Errors" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "light-red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "S3 Requests" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "light-green", + "mode": "fixed" + } + } + ] + } + ] }, "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 22 + "h": 6, + "w": 8, + "x": 16, + "y": 12 }, - "id": 93, + "id": 86, "options": { + "alertThreshold": true, "legend": { - "calcs": [ - "max", - "last" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true, - "sortBy": "Max", - "sortDesc": true + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } }, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, - "editorMode": "code", - "expr": "max by (bucket) (minio_bucket_usage_object_total{job=\"$scrape_jobs\"})", - "hide": false, - "legendFormat": "__auto", - "range": true, - "refId": "B" + "exemplar": true, + "expr": "sum by (server,api) (increase(minio_s3_requests_5xx_errors_total{job=~\"$scrape_jobs\"}[$__rate_interval]))", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{server,api}}", + "refId": "A" } ], - "title": "Bucket objects", + "title": "S3 API Request Error Rate (5xx)", "type": "timeseries" }, { @@ -5176,1113 +5218,1616 @@ data: "type": "prometheus", "uid": "prometheus" }, - "description": "Total s3 bytes sent per bucket", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 5, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "smooth", - "lineWidth": 1, - "pointSize": 2, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "mode": "fixed" }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 + "color": "green" } ] - }, - "unit": "bytes" + } }, "overrides": [] }, "gridPos": { - "h": 10, + "h": 8, "w": 12, - "x": 12, - "y": 22 + "x": 0, + "y": 18 }, - "id": 94, + "id": 99, "options": { + "displayMode": "lcd", "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "maxVizHeight": 300, + "minVizHeight": 16, + "minVizWidth": 8, + "namePlacement": "auto", + "orientation": "horizontal", + "reduceOptions": { "calcs": [ - "max", - "last" + "lastNotNull" ], - "displayMode": "table", - "placement": "right", - "showLegend": true, - "sortBy": "Max", - "sortDesc": true + "fields": "", + "values": false }, - "tooltip": { - "mode": "multi", - "sort": "none" - } + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" }, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, - "editorMode": "code", - "expr": "max by (bucket) (minio_bucket_usage_total_bytes{job=\"$scrape_jobs\"})", + "disableTextWrap": false, + "editorMode": "builder", + "expr": "minio_cluster_health_erasure_set_online_drives{job=~\"$scrape_jobs\"}", + "fullMetaSearch": false, "hide": false, - "legendFormat": "__auto", + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Pool {{pool}} / Set {{set}} - Online Drives", "range": true, - "refId": "B" - } - ], - "title": "Bucket size", - "type": "timeseries" - }, - { - "aliasColors": { - "S3 Errors": "light-red", - "S3 Requests": "light-green" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 32 - }, - "hiddenSeries": false, - "id": 60, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "9.2.4", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ + "refId": "A", + "useBackend": false + }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, - "exemplar": true, - "expr": "sum by (server,api) (increase(minio_s3_requests_total{job=\"$scrape_jobs\"}[$__rate_interval]))", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{server,api}}", - "refId": "A" - } - ], - "thresholds": [], - "timeRegions": [], - "title": "S3 API Request Rate", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:331", - "format": "none", - "logBase": 1, - "show": true + "disableTextWrap": false, + "editorMode": "builder", + "expr": "minio_cluster_health_erasure_set_read_quorum{job=~\"$scrape_jobs\"}", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Pool {{pool}} / Set {{set}} - Read Quorum", + "range": true, + "refId": "B", + "useBackend": false }, - { - "$$hashKey": "object:332", - "format": "short", - "logBase": 1, - "show": false - } - ], - "yaxis": { - "align": false - } - }, - { - "aliasColors": { - "S3 Errors": "light-red", - "S3 Requests": "light-green" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 32 - }, - "hiddenSeries": false, - "id": 71, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "9.2.4", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, - "exemplar": true, - "expr": "sum by (server,api) (increase(minio_s3_requests_errors_total{job=\"$scrape_jobs\"}[$__rate_interval]))", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{server,api}}", - "refId": "A" - } - ], - "thresholds": [], - "timeRegions": [], - "title": "S3 API Request Error Rate", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:331", - "format": "none", - "logBase": 1, - "show": true + "disableTextWrap": false, + "editorMode": "builder", + "expr": "minio_cluster_health_erasure_set_write_quorum{job=~\"$scrape_jobs\"}", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Pool {{pool}} / Set {{set}} - Write Quorum", + "range": true, + "refId": "C", + "useBackend": false }, - { - "$$hashKey": "object:332", - "format": "short", - "logBase": 1, - "show": false - } - ], - "yaxis": { - "align": false - } - }, - { - "aliasColors": { - "S3 Errors": "light-red", - "S3 Requests": "light-green" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 42 - }, - "hiddenSeries": false, - "id": 86, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "9.2.4", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, - "exemplar": true, - "expr": "sum by (server,api) (increase(minio_s3_requests_5xx_errors_total{job=\"$scrape_jobs\"}[$__rate_interval]))", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{server,api}}", - "refId": "A" - } - ], - "thresholds": [], - "timeRegions": [], - "title": "S3 API Request Error Rate (5xx)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:331", - "format": "none", - "logBase": 1, - "show": true + "disableTextWrap": false, + "editorMode": "builder", + "expr": "minio_cluster_health_erasure_set_healing_drives{job=~\"$scrape_jobs\"}", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Pool {{pool}} / Set {{set}} - Healing Drives", + "range": true, + "refId": "D", + "useBackend": false }, { - "$$hashKey": "object:332", - "format": "short", - "logBase": 1, - "show": false + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "minio_cluster_health_erasure_set_status{job=~\"$scrape_jobs\"}", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Pool {{pool}} / Set {{set}} - Status", + "range": true, + "refId": "E", + "useBackend": false } ], - "yaxis": { - "align": false - } + "title": "Health Breakdown", + "type": "bargauge" }, { - "aliasColors": { - "S3 Errors": "light-red", - "S3 Requests": "light-green" - }, - "bars": false, - "dashLength": 10, - "dashes": false, "datasource": { "type": "prometheus", "uid": "prometheus" }, - "fill": 1, - "fillGradient": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, "gridPos": { - "h": 10, + "h": 8, "w": 12, "x": 12, - "y": 42 + "y": 18 }, - "hiddenSeries": false, - "id": 88, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", + "id": 76, "options": { - "alertThreshold": true + "displayMode": "lcd", + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "maxVizHeight": 300, + "minVizHeight": 16, + "minVizWidth": 8, + "namePlacement": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" }, - "percentage": false, - "pluginVersion": "9.2.4", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, - "exemplar": true, - "expr": "sum by (server,api) (increase(minio_s3_requests_4xx_errors_total{job=\"$scrape_jobs\"}[$__rate_interval]))", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{server,api}}", + "editorMode": "code", + "exemplar": false, + "expr": "minio_node_process_resident_memory_bytes{job=~\"$scrape_jobs\"}", + "format": "time_series", + "instant": false, + "interval": "", + "legendFormat": "{{server}}", + "range": true, "refId": "A" } ], - "thresholds": [], - "timeRegions": [], - "title": "S3 API Request Error Rate (4xx)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:331", - "format": "none", - "logBase": 1, - "show": true - }, - { - "$$hashKey": "object:332", - "format": "short", - "logBase": 1, - "show": false - } - ], - "yaxis": { - "align": false - } + "title": "Memory Usage ", + "type": "bargauge" }, { - "aliasColors": { - "10.13.1.25:9000 DELETE": "red", - "10.13.1.25:9000 GET": "green", - "10.13.1.25:9000 POST": "blue" - }, - "bars": true, - "dashLength": 10, - "dashes": false, "datasource": { "type": "prometheus", "uid": "prometheus" }, - "description": "Total number of bytes received and sent among all MinIO server instances", "fieldConfig": { "defaults": { - "links": [], - "unit": "binbps" + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" }, "overrides": [] }, - "fill": 10, - "fillGradient": 1, "gridPos": { - "h": 9, + "h": 7, "w": 12, "x": 0, - "y": 52 + "y": 26 }, - "hiddenSeries": false, - "id": 17, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", + "id": 73, "options": { - "alertThreshold": true + "alertThreshold": true, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } }, - "percentage": false, - "pluginVersion": "9.2.4", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, - "editorMode": "code", "exemplar": true, - "expr": "rate(minio_inter_node_traffic_sent_bytes{job=\"$scrape_jobs\"}[$__rate_interval])*8", + "expr": "rate(minio_node_io_rchar_bytes{job=~\"$scrape_jobs\"}[$__rate_interval])", "format": "time_series", + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "Internode Bytes Received [{{server}}]", - "metric": "minio_http_requests_duration_seconds_count", - "range": true, - "refId": "A", - "step": 4 + "legendFormat": "Node RChar [{{server}}]", + "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, - "editorMode": "code", "exemplar": true, - "expr": "rate(minio_inter_node_traffic_received_bytes{job=\"$scrape_jobs\"}[$__rate_interval])*8", + "expr": "rate(minio_node_io_wchar_bytes{job=~\"$scrape_jobs\"}[$__rate_interval])", "interval": "", - "legendFormat": "Internode Bytes Sent [{{server}}]", - "range": true, + "legendFormat": "Node WChar [{{server}}]", "refId": "B" } ], - "thresholds": [], - "timeRegions": [], - "title": "Internode Data Transfer", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:211", - "format": "binbps", - "logBase": 1, - "show": true - }, - { - "$$hashKey": "object:212", - "format": "s", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false - } + "title": "Read, Write I/O", + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, "datasource": { "type": "prometheus", "uid": "prometheus" }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 52 - }, - "hiddenSeries": false, - "id": 84, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "9.2.4", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "exemplar": true, - "expr": "sum by (instance) (minio_heal_objects_heal_total{job=\"$scrape_jobs\"})", - "interval": "", - "legendFormat": "Objects healed in current self heal run", - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "exemplar": true, - "expr": "sum by (instance) (minio_heal_objects_error_total{job=\"$scrape_jobs\"})", - "hide": false, - "interval": "", - "legendFormat": "Heal errors in current self heal run", - "refId": "B" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green" + }, + { + "color": "orange", + "value": 70 + }, + { + "color": "red", + "value": 85 + } + ] }, - "exemplar": true, - "expr": "sum by (instance) (minio_heal_objects_total{job=\"$scrape_jobs\"}) ", - "hide": false, - "interval": "", - "legendFormat": "Objects scanned in current self heal run", - "refId": "C" - } - ], - "thresholds": [], - "timeRegions": [], - "title": "Healing", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:846", - "format": "short", - "logBase": 1, - "show": true + "unit": "s" }, - { - "$$hashKey": "object:847", - "format": "short", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false - } - }, - { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "overrides": [] }, - "fill": 1, - "fillGradient": 0, "gridPos": { - "h": 9, + "h": 7, "w": 12, - "x": 0, - "y": 61 + "x": 12, + "y": 26 }, - "hiddenSeries": false, "id": 77, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", "options": { - "alertThreshold": true + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" }, - "percentage": false, - "pluginVersion": "9.2.4", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, + "editorMode": "code", "exemplar": true, - "expr": "rate(minio_node_process_cpu_total_seconds{job=\"$scrape_jobs\"}[$__rate_interval])", + "expr": "rate(minio_node_process_cpu_total_seconds{job=~\"$scrape_jobs\"}[$__rate_interval])", "interval": "", - "legendFormat": "CPU Usage Rate [{{server}}]", + "legendFormat": "{{server}}", + "range": true, "refId": "A" } ], - "thresholds": [], - "timeRegions": [], - "title": "Node CPU Usage", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:1043", - "format": "none", - "logBase": 1, - "show": true - }, - { - "$$hashKey": "object:1044", - "format": "short", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false - } + "title": "CPU Usage", + "type": "gauge" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, "datasource": { "type": "prometheus", "uid": "prometheus" }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 61 - }, - "hiddenSeries": false, - "id": 76, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "9.2.4", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "description": "Total number of bytes received and sent on MinIO cluster", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - "exemplar": true, - "expr": "minio_node_process_resident_memory_bytes{job=\"$scrape_jobs\"}", - "interval": "", - "legendFormat": "Memory Used [{{server}}]", - "refId": "A" - } - ], - "thresholds": [], - "timeRegions": [], - "title": "Node Memory Usage", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:1043", - "format": "bytes", - "logBase": 1, - "show": true + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" }, - { - "$$hashKey": "object:1044", - "format": "short", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "overrides": [] }, - "fill": 1, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 70 - }, - "hiddenSeries": false, - "id": 74, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", + "y": 33 + }, + "id": 17, "options": { - "alertThreshold": true + "legend": { + "calcs": [], + "displayMode": "hidden", + "placement": "right", + "showLegend": false + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } }, - "percentage": false, - "pluginVersion": "9.2.4", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, + "editorMode": "code", "exemplar": true, - "expr": "minio_node_disk_used_bytes{job=\"$scrape_jobs\"}", + "expr": "rate(minio_inter_node_traffic_sent_bytes{job=~\"$scrape_jobs\"}[$__rate_interval])", "format": "time_series", - "instant": false, "interval": "", - "legendFormat": "Used Capacity [{{server}}:{{disk}}]", - "refId": "A" - } - ], - "thresholds": [], - "timeRegions": [], - "title": "Drive Used Capacity", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:381", - "format": "bytes", - "logBase": 1, - "show": true + "intervalFactor": 2, + "legendFormat": "Internode Bytes Received [{{server}}]", + "metric": "minio_http_requests_duration_seconds_count", + "range": true, + "refId": "A", + "step": 4 }, - { - "$$hashKey": "object:382", - "format": "short", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 70 - }, - "hiddenSeries": false, - "id": 82, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "9.2.4", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, "exemplar": true, - "expr": "minio_cluster_disk_free_inodes{job=\"$scrape_jobs\"}", - "format": "time_series", - "instant": false, + "expr": "rate(minio_inter_node_traffic_received_bytes{job=~\"$scrape_jobs\"}[$__rate_interval])", "interval": "", - "legendFormat": "Free Inodes [{{server}}:{{disk}}]", - "refId": "A" - } - ], - "thresholds": [], - "timeRegions": [], - "title": "Drives Free Inodes", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:381", - "format": "none", - "logBase": 1, - "show": true - }, - { - "$$hashKey": "object:382", - "format": "short", - "logBase": 1, - "show": true + "legendFormat": "Internode Bytes Sent [{{server}}]", + "refId": "B" } ], - "yaxis": { - "align": false - } + "title": "Internode Traffic", + "type": "timeseries" }, { - "aliasColors": { - "Offline 10.13.1.25:9000": "dark-red", - "Total 10.13.1.25:9000": "blue" - }, - "bars": true, - "dashLength": 10, - "dashes": false, "datasource": { "type": "prometheus", "uid": "prometheus" }, - "description": "Number of online disks per MinIO Server", + "description": "", "fieldConfig": { "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 9, - "w": 12, + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "available 10.13.1.25:9000" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "used 10.13.1.25:9000" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 33 + }, + "id": 8, + "options": { + "alertThreshold": true, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "exemplar": true, + "expr": "minio_node_file_descriptor_open_total{job=~\"$scrape_jobs\"}", + "interval": "", + "legendFormat": "Open FDs [{{server}}]", + "refId": "B" + } + ], + "title": "File Descriptors", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "Number of online drives per MinIO Server", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Offline 10.13.1.25:9000" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "dark-red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total 10.13.1.25:9000" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 40 + }, + "id": 11, + "options": { + "alertThreshold": true, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "exemplar": true, + "expr": "rate(minio_node_syscall_read_total{job=~\"$scrape_jobs\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Read Syscalls [{{server}}]", + "metric": "process_start_time_seconds", + "refId": "A", + "step": 60 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "exemplar": true, + "expr": "rate(minio_node_syscall_write_total{job=~\"$scrape_jobs\"}[$__rate_interval])", + "interval": "", + "legendFormat": "Write Syscalls [{{server}}]", + "refId": "B" + } + ], + "title": "Syscalls", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 40 + }, + "id": 95, + "options": { + "alertThreshold": true, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "exemplar": true, + "expr": "rate(minio_node_scanner_objects_scanned{job=~\"$scrape_jobs\"}[$__rate_interval])", + "interval": "1m", + "legendFormat": "[{{server}}]", + "refId": "A" + } + ], + "title": "Scanned Objects", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 47 + }, + "id": 75, + "options": { + "alertThreshold": true, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "exemplar": true, + "expr": "rate(minio_node_scanner_versions_scanned{job=~\"$scrape_jobs\"}[$__rate_interval])", + "interval": "1m", + "legendFormat": "[{{server}}]", + "refId": "A" + } + ], + "title": "Scanned Versions", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 47 + }, + "id": 96, + "options": { + "alertThreshold": true, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "exemplar": true, + "expr": "rate(minio_node_scanner_directories_scanned{job=~\"$scrape_jobs\"}[$__rate_interval])", + "interval": "1m", + "legendFormat": "[{{server}}]", + "refId": "A" + } + ], + "title": "Scanned Directories", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "dtdurations" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 54 + }, + "id": 89, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "exemplar": true, + "expr": "minio_cluster_kms_uptime{job=~\"$scrape_jobs\"}", + "format": "time_series", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "minio_cluster_kms_uptime", + "refId": "A", + "step": 60 + } + ], + "title": "KMS Uptime", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "S3 Errors" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "light-red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "S3 Requests" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "light-green", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 4, + "y": 54 + }, + "id": 91, + "options": { + "alertThreshold": true, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "exemplar": true, + "expr": "sum by (server) (increase(minio_cluster_kms_request_error{job=~\"$scrape_jobs\"}[$__rate_interval]))", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{server}}", + "refId": "A" + } + ], + "title": "KMS Request 4xx Error Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bool_on_off" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 8, + "y": 54 + }, + "id": 90, + "options": { + "alertThreshold": true, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "exemplar": true, + "expr": "sum by (server) (minio_cluster_kms_online{job=~\"$scrape_jobs\"})", + "interval": "1m", + "legendFormat": "{{server}}", + "refId": "A" + } + ], + "title": "KMS Online(1)/Offline(0)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 54 + }, + "id": 98, + "options": { + "alertThreshold": true, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "exemplar": true, + "expr": "rate(minio_node_scanner_bucket_scans_finished{job=~\"$scrape_jobs\"}[$__rate_interval])", + "interval": "1m", + "legendFormat": "[{{server}}]", + "refId": "A" + } + ], + "title": "Bucket Scans Finished", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "S3 Errors" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "light-red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "S3 Requests" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "light-green", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 6, "x": 0, - "y": 78 + "y": 58 }, - "hiddenSeries": false, - "id": 11, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": false - }, - "lines": false, - "linewidth": 1, - "links": [], - "nullPointMode": "null", + "id": 92, "options": { - "alertThreshold": true + "alertThreshold": true, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } }, - "percentage": false, - "pluginVersion": "9.2.4", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -6290,116 +6835,129 @@ data: "uid": "prometheus" }, "exemplar": true, - "expr": "rate(minio_node_syscall_read_total{job=\"$scrape_jobs\"}[$__rate_interval])", - "format": "time_series", - "interval": "", + "expr": "sum by (server) (increase(minio_cluster_kms_request_failure{job=~\"$scrape_jobs\"}[$__rate_interval]))", + "interval": "1m", "intervalFactor": 2, - "legendFormat": "Read Syscalls [{{server}}]", - "metric": "process_start_time_seconds", - "refId": "A", - "step": 60 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "exemplar": true, - "expr": "rate(minio_node_syscall_write_total{job=\"$scrape_jobs\"}[$__rate_interval])", - "interval": "", - "legendFormat": "Write Syscalls [{{server}}]", - "refId": "B" - } - ], - "thresholds": [], - "timeRegions": [], - "title": "Node Syscalls", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:185", - "decimals": 0, - "format": "short", - "logBase": 1, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:186", - "format": "short", - "logBase": 1, - "show": true + "legendFormat": "{{server}}", + "refId": "A" } ], - "yaxis": { - "align": false - } + "title": "KMS Request 5xx Error Rate", + "type": "timeseries" }, { - "aliasColors": { - "available 10.13.1.25:9000": "green", - "used 10.13.1.25:9000": "blue" - }, - "bars": false, - "dashLength": 10, - "dashes": false, "datasource": { "type": "prometheus", "uid": "prometheus" }, - "description": "", "fieldConfig": { "defaults": { - "links": [] + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "S3 Errors" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "light-red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "S3 Requests" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "light-green", + "mode": "fixed" + } + } + ] + } + ] }, - "fill": 1, - "fillGradient": 0, "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 78 + "h": 5, + "w": 6, + "x": 6, + "y": 58 }, - "hiddenSeries": false, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", + "id": 93, "options": { - "alertThreshold": true + "alertThreshold": true, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } }, - "percentage": false, - "pluginVersion": "9.2.4", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -6407,160 +6965,119 @@ data: "uid": "prometheus" }, "exemplar": true, - "expr": "minio_node_file_descriptor_open_total{job=\"$scrape_jobs\"}", - "interval": "", - "legendFormat": "Open FDs [{{server}}]", - "refId": "B" - } - ], - "thresholds": [], - "timeRegions": [], - "title": "Node File Descriptors", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:212", - "format": "none", - "logBase": 1, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:213", - "format": "none", - "logBase": 1, - "min": "0", - "show": true + "expr": "sum by (server) (rate(minio_cluster_kms_request_success{job=~\"$scrape_jobs\"}[$__rate_interval]))", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "KMS Request Success [{{server}}]", + "refId": "A" } ], - "yaxis": { - "align": false - } + "title": "KMS Request Success Rate ", + "type": "timeseries" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, "datasource": { "type": "prometheus", "uid": "prometheus" }, "fieldConfig": { "defaults": { - "unit": "binbps" + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" }, "overrides": [] }, - "fill": 1, - "fillGradient": 0, "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 87 + "h": 9, + "w": 12, + "x": 12, + "y": 61 }, - "hiddenSeries": false, - "id": 73, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", + "id": 97, "options": { - "alertThreshold": true + "alertThreshold": true, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } }, - "percentage": false, - "pluginVersion": "9.2.4", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, - "editorMode": "code", "exemplar": true, - "expr": "rate(minio_node_io_rchar_bytes{job=\"$scrape_jobs\"}[$__rate_interval])*8", - "format": "time_series", - "instant": false, - "interval": "", - "legendFormat": "Node RChar [{{server}}]", + "expr": "rate(minio_node_scanner_bucket_scans_started{job=~\"$scrape_jobs\"}[$__rate_interval])", + "interval": "1m", + "legendFormat": "[{{server}}]", "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "exemplar": true, - "expr": "rate(minio_node_io_wchar_bytes{job=\"$scrape_jobs\"}[$__rate_interval])*8", - "interval": "", - "legendFormat": "Node WChar [{{server}}]", - "range": true, - "refId": "B" } ], - "thresholds": [], - "timeRegions": [], - "title": "Node IO", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:381", - "format": "binbps", - "logBase": 1, - "show": true - }, - { - "$$hashKey": "object:382", - "format": "short", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false - } + "title": "Bucket Scans Started", + "type": "timeseries" } ], - "refresh": false, - "schemaVersion": 37, - "style": "dark", + "preload": false, + "refresh": "", + "schemaVersion": 41, "tags": [ "minio" ], @@ -6568,35 +7085,34 @@ data: "list": [ { "current": { - "selected": false, - "text": "minio", - "value": "minio" + "text": [ + "All" + ], + "value": [ + "$__all" + ] }, "datasource": { "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(minio_cluster_disk_total,job)", - "hide": 0, - "includeAll": false, - "label": "Instance", - "multi": false, + "definition": "label_values(job)", + "includeAll": true, + "multi": true, "name": "scrape_jobs", "options": [], "query": { - "query": "label_values(minio_cluster_disk_total,job)", + "query": "label_values(job)", "refId": "StandardVariableQuery" }, "refresh": 1, "regex": "", - "skipUrlSync": false, - "sort": 0, "type": "query" } ] }, "time": { - "from": "now-3h", + "from": "now-15m", "to": "now" }, "timepicker": { @@ -6610,24 +7126,12 @@ data: "1h", "2h", "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" ] }, "timezone": "", "title": "MinIO", "uid": "TgmJnqnnk", - "version": 2, - "weekStart": "" + "version": 1 } hdfs.json: | { From 5c232435917a31ae69a9c4964a7504cd5ca7580b Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Wed, 23 Jul 2025 11:18:19 +0200 Subject: [PATCH 04/17] Add metric collection overview dashboard --- stacks/monitoring/grafana-dashboards.yaml | 291 +++++++++++++++++- .../prometheus-service-monitors.yaml | 44 ++- 2 files changed, 322 insertions(+), 13 deletions(-) diff --git a/stacks/monitoring/grafana-dashboards.yaml b/stacks/monitoring/grafana-dashboards.yaml index c20a727e..ef752a09 100644 --- a/stacks/monitoring/grafana-dashboards.yaml +++ b/stacks/monitoring/grafana-dashboards.yaml @@ -5,6 +5,289 @@ kind: ConfigMap metadata: name: stackable-grafana-dashboards data: + metric-collection-overview.json: | + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 35, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 6, + "panels": [], + "title": "Stacklets", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 4, + "maxPerRow": 6, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.0.2", + "repeat": "product", + "repeatDirection": "h", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "count(count by (app_kubernetes_io_instance) ({app_kubernetes_io_name=\"${product}\"}))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "${product}", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 5, + "panels": [], + "title": "Pods", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 7, + "maxPerRow": 6, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.0.2", + "repeat": "product", + "repeatDirection": "h", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "count(count by (pod) ({app_kubernetes_io_name=\"${product}\",pod!=\"\"}))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "${product}", + "type": "stat" + } + ], + "preload": false, + "schemaVersion": 41, + "tags": [], + "templating": { + "list": [ + { + "allowCustomValue": true, + "current": { + "text": "$__all", + "value": "$__all" + }, + "description": "", + "includeAll": true, + "multi": true, + "name": "product", + "options": [ + { + "selected": false, + "text": "airflow", + "value": "airflow" + }, + { + "selected": false, + "text": "druid", + "value": "druid" + }, + { + "selected": false, + "text": "hbase", + "value": "hbase" + }, + { + "selected": false, + "text": "hdfs", + "value": "hdfs" + }, + { + "selected": false, + "text": "hive", + "value": "hive" + }, + { + "selected": false, + "text": "nifi", + "value": "nifi" + }, + { + "selected": false, + "text": "opensearch", + "value": "opensearch" + }, + { + "selected": false, + "text": "spark", + "value": "spark" + }, + { + "selected": false, + "text": "superset", + "value": "superset" + }, + { + "selected": false, + "text": "trino", + "value": "trino" + }, + { + "selected": false, + "text": "zookeeper", + "value": "zookeeper" + }, + { + "selected": false, + "text": "opa", + "value": "opa" + } + ], + "query": "airflow,druid,hbase,hdfs,hive,nifi,opensearch,spark-k8s,superset,trino,zookeeper,opa", + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Metric collection overview", + "uid": "289ad709-e51e-4b91-86a1-6e426300d0eb", + "version": 1 + } trino.json: | { "annotations": { @@ -3275,9 +3558,7 @@ data: "icon": "external link", "includeVars": true, "keepTime": true, - "tags": [ - "minio" - ], + "tags": [], "type": "dashboards" } ], @@ -7078,9 +7359,7 @@ data: "preload": false, "refresh": "", "schemaVersion": 41, - "tags": [ - "minio" - ], + "tags": [], "templating": { "list": [ { diff --git a/stacks/monitoring/prometheus-service-monitors.yaml b/stacks/monitoring/prometheus-service-monitors.yaml index cdfc2e87..9102f020 100644 --- a/stacks/monitoring/prometheus-service-monitors.yaml +++ b/stacks/monitoring/prometheus-service-monitors.yaml @@ -1,7 +1,7 @@ # ### Products # # Use something like this to check for metrics: -# count by(job) ({__name__!=""}) +# count by (app_kubernetes_io_name, app_kubernetes_io_instance, pod) ({app_kubernetes_io_name!="",pod!=""}) # # See https://github.com/stackabletech/issues/issues/735 # @@ -50,7 +50,12 @@ spec: - scheme: http port: metrics path: /metrics - jobLabel: app.kubernetes.io/instance + podTargetLabels: + - app.kubernetes.io/name + - app.kubernetes.io/instance + - app.kubernetes.io/component + - app.kubernetes.io/role-group + - app.kubernetes.io/version --- apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor @@ -75,7 +80,12 @@ spec: - scheme: http port: native-metrics path: /metrics - jobLabel: app.kubernetes.io/instance + podTargetLabels: + - app.kubernetes.io/name + - app.kubernetes.io/instance + - app.kubernetes.io/component + - app.kubernetes.io/role-group + - app.kubernetes.io/version --- # Kafka is special in that the operator totally messes up services: # 1. The metrics Service is missing @@ -101,7 +111,12 @@ spec: - scheme: http port: metrics path: /metrics - jobLabel: app.kubernetes.io/instance + podTargetLabels: + - app.kubernetes.io/name + - app.kubernetes.io/instance + - app.kubernetes.io/component + - app.kubernetes.io/role-group + - app.kubernetes.io/version --- # We prefer the native metrics over the statsd-exporter apiVersion: monitoring.coreos.com/v1 @@ -123,7 +138,12 @@ spec: - scheme: http port: http # Don't use metrics! path: /prom - jobLabel: app.kubernetes.io/instance + podTargetLabels: + - app.kubernetes.io/name + - app.kubernetes.io/instance + - app.kubernetes.io/component + - app.kubernetes.io/role-group + - app.kubernetes.io/version --- apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor @@ -144,7 +164,12 @@ spec: - scheme: http port: ui-http path: /prometheus - jobLabel: app.kubernetes.io/instance + podTargetLabels: + - app.kubernetes.io/name + - app.kubernetes.io/instance + - app.kubernetes.io/component + - app.kubernetes.io/role-group + - app.kubernetes.io/version --- # NiFI 2 is a beast on it's own.... # The current state produces "server returned HTTP status 401 Unauthorized", but it's at least a @@ -190,7 +215,12 @@ spec: targetLabel: __address__ replacement: ${1}.${2}-headless.${3}.svc.cluster.local:${4} regex: (.+);(.+?)(?:-metrics)?;(.+);(.+) - jobLabel: app.kubernetes.io/instance + podTargetLabels: + - app.kubernetes.io/name + - app.kubernetes.io/instance + - app.kubernetes.io/component + - app.kubernetes.io/role-group + - app.kubernetes.io/version --- apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor From e093bb90e29474f5c3b65045bcbca79a6e9e18bd Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Wed, 23 Jul 2025 13:28:32 +0200 Subject: [PATCH 05/17] Update HBase --- stacks/monitoring/grafana-dashboards.yaml | 227 +++++++++++++--------- 1 file changed, 130 insertions(+), 97 deletions(-) diff --git a/stacks/monitoring/grafana-dashboards.yaml b/stacks/monitoring/grafana-dashboards.yaml index ef752a09..8687b88f 100644 --- a/stacks/monitoring/grafana-dashboards.yaml +++ b/stacks/monitoring/grafana-dashboards.yaml @@ -9504,9 +9504,8 @@ data: "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 29, + "id": 3, "links": [], - "liveNow": false, "panels": [ { "datasource": { @@ -9523,8 +9522,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -9547,6 +9545,7 @@ data: "graphMode": "none", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" @@ -9554,9 +9553,11 @@ data: "fields": "", "values": false }, - "textMode": "auto" + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.4", + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -9565,7 +9566,7 @@ data: }, "editorMode": "code", "exemplar": false, - "expr": "count(Hadoop_HBase_numMasterWALs{namespace=~\"$namespace\",job=~\"$cluster\"})", + "expr": "count(procedure_num_master_wa_ls{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", "instant": true, "legendFormat": "__auto", "range": false, @@ -9590,8 +9591,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -9614,6 +9614,7 @@ data: "graphMode": "none", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" @@ -9621,9 +9622,11 @@ data: "fields": "", "values": false }, - "textMode": "auto" + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.4", + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -9632,7 +9635,7 @@ data: }, "editorMode": "code", "exemplar": false, - "expr": "max(Hadoop_HBase_numRegionServers{namespace=~\"$namespace\",job=~\"$cluster\"})", + "expr": "max(server_num_region_servers{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", "instant": true, "legendFormat": "__auto", "range": false, @@ -9657,8 +9660,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -9681,6 +9683,7 @@ data: "graphMode": "area", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" @@ -9688,9 +9691,11 @@ data: "fields": "", "values": false }, - "textMode": "auto" + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.4", + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -9699,7 +9704,7 @@ data: }, "editorMode": "code", "exemplar": false, - "expr": "sum(max by (namespace, pod)(Hadoop_HBase_regionCount{namespace=~\"$namespace\",job=~\"$cluster\"}))", + "expr": "sum(max by (namespace, pod)(server_region_count{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"}))", "instant": false, "legendFormat": "__auto", "range": true, @@ -9710,9 +9715,9 @@ data: "type": "stat" }, { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "fieldConfig": { + "defaults": {}, + "overrides": [] }, "gridPos": { "h": 3, @@ -9730,20 +9735,7 @@ data: "content": "

$cluster

", "mode": "html" }, - "pluginVersion": "9.2.4", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], + "pluginVersion": "12.0.2", "title": "Cluster name", "type": "text" }, @@ -9762,8 +9754,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -9786,6 +9777,7 @@ data: "graphMode": "area", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" @@ -9793,10 +9785,12 @@ data: "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.4", + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -9804,7 +9798,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "sum(max by (namespace, pod)(Hadoop_HBase_ritCount{namespace=~\"$namespace\",job=~\"$cluster\"}))", + "expr": "sum(max by (namespace, pod)(assignment_manager_rit_count{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"}))", "legendFormat": "__auto", "range": true, "refId": "A" @@ -9829,8 +9823,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "dark-yellow", @@ -9858,6 +9851,7 @@ data: "graphMode": "area", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" @@ -9865,10 +9859,12 @@ data: "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.4", + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -9876,7 +9872,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "max(max by (pod)(Hadoop_HBase_ritOldestAge{namespace=~\"$namespace\",job=~\"$cluster\"}))", + "expr": "max(max by (pod)(assignment_manager_rit_oldest_age{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"}))", "legendFormat": "__auto", "range": true, "refId": "A" @@ -9896,12 +9892,14 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "axisSoftMin": 0, "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "opacity", @@ -9910,6 +9908,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineStyle": { "fill": "solid" @@ -9934,8 +9933,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -10011,10 +10009,12 @@ data: "showLegend": false }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } }, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -10067,12 +10067,14 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "axisSoftMin": 0, "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "opacity", @@ -10081,6 +10083,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -10102,8 +10105,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -10180,10 +10182,12 @@ data: "showLegend": false }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } }, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -10224,12 +10228,14 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "axisSoftMin": 0, "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -10238,6 +10244,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -10259,8 +10266,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -10335,10 +10341,12 @@ data: "showLegend": true }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } }, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -10379,11 +10387,13 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 50, "gradientMode": "opacity", @@ -10392,6 +10402,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -10413,8 +10424,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -10444,10 +10454,12 @@ data: "sortDesc": true }, "tooltip": { + "hideZeros": false, "mode": "single", "sort": "none" } }, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -10455,7 +10467,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "sum(max by (pod)(Hadoop_HBase_numActiveHandler{namespace=~\"$namespace\",job=~\"$cluster\"}))", + "expr": "sum(max by (pod)(master_num_active_handler{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"}))", "legendFormat": "active handlers", "range": true, "refId": "A" @@ -10475,12 +10487,14 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "axisSoftMin": 0, "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -10489,6 +10503,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -10510,8 +10525,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -10586,10 +10600,12 @@ data: "showLegend": true }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } }, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -10597,7 +10613,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "sum(max by(pod)(rate(Hadoop_HBase_readRequestCount{namespace=~\"$namespace\",job=~\"$cluster\"}[$__rate_interval])))*8", + "expr": "sum(max by(pod)(rate(server_read_request_count{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"}[$__rate_interval])))*8", "hide": false, "legendFormat": "read", "range": true, @@ -10609,7 +10625,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "-sum(max by(pod)(rate(Hadoop_HBase_writeRequestCount{namespace=~\"$namespace\",job=~\"$cluster\"}[$__rate_interval])))*8", + "expr": "-sum(max by(pod)(rate(server_write_request_count{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"}[$__rate_interval])))*8", "hide": false, "legendFormat": "write", "range": true, @@ -10630,11 +10646,13 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", @@ -10643,6 +10661,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -10664,8 +10683,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -10695,10 +10713,12 @@ data: "sortDesc": true }, "tooltip": { + "hideZeros": false, "mode": "single", "sort": "none" } }, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -10706,7 +10726,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "sum(max by (pod)(rate(Hadoop_HBase_slowGetCount{namespace=~\"$namespace\",job=~\"$cluster\"}[$__rate_interval])))", + "expr": "sum(max by (pod)(rate(server_slow_get_count{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"}[$__rate_interval])))", "legendFormat": "get", "range": true, "refId": "A" @@ -10717,7 +10737,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "sum(max by (pod)(rate(Hadoop_HBase_slowPutCount{namespace=~\"$namespace\",job=~\"$cluster\"}[$__rate_interval])))", + "expr": "sum(max by (pod)(rate(server_slow_put_count{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"}[$__rate_interval])))", "hide": false, "legendFormat": "put", "range": true, @@ -10729,7 +10749,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "sum(max by (pod)(rate(Hadoop_HBase_slowAppendCount{namespace=~\"$namespace\",job=~\"$cluster\"}[$__rate_interval])))", + "expr": "sum(max by (pod)(rate(server_slow_append_count{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"}[$__rate_interval])))", "hide": false, "legendFormat": "append", "range": true, @@ -10741,7 +10761,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "sum(max by (pod)(rate(Hadoop_HBase_slowDeleteCount{namespace=~\"$namespace\",job=~\"$cluster\"}[$__rate_interval])))", + "expr": "sum(max by (pod)(rate(server_slow_delete_count{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"}[$__rate_interval])))", "hide": false, "legendFormat": "delete", "range": true, @@ -10753,7 +10773,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "sum(max by (pod)(rate(Hadoop_HBase_slowIncrementCount{namespace=~\"$namespace\",job=~\"$cluster\"}[$__rate_interval])))", + "expr": "sum(max by (pod)(rate(server_slow_increment_count{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"}[$__rate_interval])))", "hide": false, "legendFormat": "increment", "range": true, @@ -10774,11 +10794,13 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", @@ -10787,6 +10809,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -10808,8 +10831,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -10839,10 +10861,12 @@ data: "sortDesc": true }, "tooltip": { + "hideZeros": false, "mode": "single", "sort": "none" } }, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -10850,7 +10874,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "sum(max by (pod)(Hadoop_HBase_ritCount{namespace=~\"$namespace\",job=~\"$cluster\"}))", + "expr": "sum(max by (pod)(assignment_manager_rit_count{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"}))", "legendFormat": "In transition", "range": true, "refId": "A" @@ -10883,11 +10907,13 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", @@ -10896,6 +10922,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -10917,8 +10944,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -10948,10 +10974,12 @@ data: "sortDesc": true }, "tooltip": { + "hideZeros": false, "mode": "single", "sort": "none" } }, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -10959,7 +10987,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "max by (pod)(Hadoop_HBase_compactionQueueLength{namespace=~\"$namespace\",job=~\"$cluster\"})", + "expr": "max by (pod)(server_compaction_queue_length{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", "legendFormat": "{{pod}}", "range": true, "refId": "A" @@ -10979,11 +11007,13 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", @@ -10992,6 +11022,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -11013,8 +11044,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -11044,10 +11074,12 @@ data: "sortDesc": true }, "tooltip": { + "hideZeros": false, "mode": "single", "sort": "none" } }, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -11055,7 +11087,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "max by (pod)(Hadoop_HBase_flushQueueLength{namespace=~\"$namespace\",job=~\"$cluster\"})", + "expr": "max by (pod)(server_flush_queue_length{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", "legendFormat": "{{pod}}", "range": true, "refId": "A" @@ -11075,11 +11107,13 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", @@ -11088,6 +11122,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -11109,8 +11144,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -11140,10 +11174,12 @@ data: "sortDesc": true }, "tooltip": { + "hideZeros": false, "mode": "single", "sort": "none" } }, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -11151,7 +11187,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "max by (pod)(Hadoop_HBase_splitQueueLength{namespace=~\"$namespace\",job=~\"$cluster\"})", + "expr": "max by (pod)(server_split_queue_length{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", "legendFormat": "{{pod}}", "range": true, "refId": "A" @@ -11161,22 +11197,25 @@ data: "type": "timeseries" } ], + "preload": false, "refresh": false, - "schemaVersion": 37, - "style": "dark", + "schemaVersion": 41, "tags": [], "templating": { "list": [ { + "current": { + "text": "hbase", + "value": "hbase" + }, "hide": 2, "name": "product", "query": "hbase", - "skipUrlSync": false, + "skipUrlSync": true, "type": "constant" }, { "current": { - "selected": false, "text": "default", "value": "default" }, @@ -11184,47 +11223,42 @@ data: "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(Hadoop_HBase_numTables, namespace)", - "hide": 0, + "definition": "label_values(tables_num_tables,namespace)", "includeAll": false, "label": "Namespace", - "multi": false, "name": "namespace", "options": [], "query": { - "query": "label_values(Hadoop_HBase_numTables, namespace)", - "refId": "StandardVariableQuery" + "qryType": 1, + "query": "label_values(tables_num_tables,namespace)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, "regex": "", - "skipUrlSync": false, "sort": 1, "type": "query" }, { "current": { - "selected": false, - "text": "hbase", - "value": "hbase" + "text": "simple-hbase", + "value": "simple-hbase" }, "datasource": { "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(Hadoop_HBase_numTables{namespace=~\"$namespace\"}, job)", - "hide": 0, + "definition": "label_values(tables_num_tables{namespace=~\"$namespace\"},app_kubernetes_io_instance)", "includeAll": false, "label": "Cluster", - "multi": false, "name": "cluster", "options": [], "query": { - "query": "label_values(Hadoop_HBase_numTables{namespace=~\"$namespace\"}, job)", - "refId": "StandardVariableQuery" + "qryType": 1, + "query": "label_values(tables_num_tables{namespace=~\"$namespace\"},app_kubernetes_io_instance)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 2, "regex": "", - "skipUrlSync": false, "sort": 1, "type": "query" } @@ -11238,7 +11272,6 @@ data: "timezone": "", "title": "HBase", "uid": "vVpFeeAVz", - "version": 17, - "weekStart": "" + "version": 6 } # {% endraw %} From 6dac95f588ea9b03e10cc10c0911714458d038ea Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Wed, 23 Jul 2025 13:30:49 +0200 Subject: [PATCH 06/17] Update HDFS --- stacks/monitoring/grafana-dashboards.yaml | 392 ++++++++++++---------- 1 file changed, 213 insertions(+), 179 deletions(-) diff --git a/stacks/monitoring/grafana-dashboards.yaml b/stacks/monitoring/grafana-dashboards.yaml index 8687b88f..de5106b3 100644 --- a/stacks/monitoring/grafana-dashboards.yaml +++ b/stacks/monitoring/grafana-dashboards.yaml @@ -7439,9 +7439,8 @@ data: "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 3, + "id": 4, "links": [], - "liveNow": false, "panels": [ { "datasource": { @@ -7459,8 +7458,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -7483,6 +7481,7 @@ data: "graphMode": "none", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" @@ -7490,9 +7489,11 @@ data: "fields": "", "values": false }, - "textMode": "auto" + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.4", + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -7525,8 +7526,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -7549,6 +7549,7 @@ data: "graphMode": "none", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" @@ -7556,9 +7557,11 @@ data: "fields": "", "values": false }, - "textMode": "auto" + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.4", + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -7591,8 +7594,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -7616,6 +7618,7 @@ data: "graphMode": "none", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" @@ -7623,9 +7626,11 @@ data: "fields": "", "values": false }, - "textMode": "auto" + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.4", + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -7643,9 +7648,9 @@ data: "type": "stat" }, { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "fieldConfig": { + "defaults": {}, + "overrides": [] }, "gridPos": { "h": 3, @@ -7663,20 +7668,7 @@ data: "content": "

$cluster

", "mode": "html" }, - "pluginVersion": "9.2.4", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], + "pluginVersion": "12.0.2", "title": "Cluster name", "type": "text" }, @@ -7695,8 +7687,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -7719,6 +7710,7 @@ data: "graphMode": "none", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" @@ -7726,9 +7718,11 @@ data: "fields": "", "values": false }, - "textMode": "auto" + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.4", + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -7737,7 +7731,7 @@ data: }, "editorMode": "code", "exemplar": false, - "expr": "count(hadoop_journalnode_version{namespace=~\"$namespace\",job=~\"$cluster\"})", + "expr": "count(journal_node_bytes_written{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", "instant": true, "legendFormat": "__auto", "range": false, @@ -7762,8 +7756,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -7786,6 +7779,7 @@ data: "graphMode": "none", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" @@ -7793,9 +7787,11 @@ data: "fields": "", "values": false }, - "textMode": "auto" + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.4", + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -7804,7 +7800,7 @@ data: }, "editorMode": "code", "exemplar": false, - "expr": "count(hadoop_namenode_version{namespace=~\"$namespace\",job=~\"$cluster\"})", + "expr": "count(name_node_name_node_state{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", "instant": true, "legendFormat": "__auto", "range": false, @@ -7829,8 +7825,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -7853,6 +7848,7 @@ data: "graphMode": "none", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" @@ -7860,9 +7856,11 @@ data: "fields": "", "values": false }, - "textMode": "auto" + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.4", + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -7871,7 +7869,7 @@ data: }, "editorMode": "code", "exemplar": false, - "expr": "count(hadoop_datanode_version{namespace=~\"$namespace\",job=~\"$cluster\"})", + "expr": "count(datanode_data_node_packet_responder_count{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", "instant": true, "legendFormat": "__auto", "range": false, @@ -7892,12 +7890,14 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "axisSoftMin": 0, "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "opacity", @@ -7906,6 +7906,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineStyle": { "fill": "solid" @@ -7930,8 +7931,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -8007,10 +8007,12 @@ data: "showLegend": false }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } }, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -8063,12 +8065,14 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "axisSoftMin": 0, "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "opacity", @@ -8077,6 +8081,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -8098,8 +8103,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -8176,10 +8180,12 @@ data: "showLegend": false }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } }, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -8220,12 +8226,14 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "axisSoftMin": 0, "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -8234,6 +8242,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -8255,8 +8264,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -8331,10 +8339,12 @@ data: "showLegend": true }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } }, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -8447,11 +8457,12 @@ data: "values": false }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } }, - "pluginVersion": "9.2.4", + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -8459,7 +8470,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "min(hadoop_namenode_capacity_remaining{namespace=~\"$namespace\",job=~\"$cluster\"})", + "expr": "min(fs_namesystem_capacity_remaining{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", "legendFormat": "remaining", "range": true, "refId": "A" @@ -8470,7 +8481,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "max(hadoop_namenode_capacity_used{namespace=~\"$namespace\",job=~\"$cluster\"})", + "expr": "max(fs_namesystem_capacity_used{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", "hide": false, "legendFormat": "used", "range": true, @@ -8496,8 +8507,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -8521,6 +8531,7 @@ data: "graphMode": "none", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" @@ -8528,9 +8539,11 @@ data: "fields": "", "values": false }, - "textMode": "auto" + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.4", + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -8538,7 +8551,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "min(hadoop_namenode_capacity_total{namespace=~\"$namespace\",job=~\"$cluster\"})", + "expr": "min(fs_namesystem_capacity_total{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", "legendFormat": "__auto", "range": true, "refId": "A" @@ -8558,12 +8571,14 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "axisSoftMin": 0, "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -8572,6 +8587,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -8593,8 +8609,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -8669,10 +8684,12 @@ data: "showLegend": true }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } }, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -8680,7 +8697,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "sum(max by(pod)(rate(hadoop_datanode_bytes_read{namespace=~\"$namespace\",job=~\"$cluster\"}[$__rate_interval])))*8", + "expr": "sum(max by(pod)(rate(datanode_bytes_read{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"}[$__rate_interval])))*8", "hide": false, "legendFormat": "rx", "range": true, @@ -8692,7 +8709,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "-sum(max by(pod)(rate(hadoop_datanode_bytes_written{namespace=~\"$namespace\",job=~\"$cluster\"}[$__rate_interval])))*8", + "expr": "-sum(max by(pod)(rate(datanode_bytes_written{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"}[$__rate_interval])))*8", "hide": false, "legendFormat": "tx", "range": true, @@ -8713,11 +8730,13 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", @@ -8726,6 +8745,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -8747,8 +8767,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -8760,7 +8779,7 @@ data: "overrides": [] }, "gridPos": { - "h": 5, + "h": 7, "w": 10, "x": 14, "y": 10 @@ -8777,10 +8796,12 @@ data: "showLegend": true }, "tooltip": { + "hideZeros": false, "mode": "single", "sort": "none" } }, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -8788,7 +8809,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "max(hadoop_namenode_files_total{namespace=~\"$namespace\",job=~\"$cluster\"})", + "expr": "max(fs_namesystem_files_total{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", "legendFormat": "total", "range": true, "refId": "A" @@ -8799,7 +8820,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "max(hadoop_namenode_num_files_under_construction{namespace=~\"$namespace\",job=~\"$cluster\"})", + "expr": "max(fs_namesystem_num_files_under_construction{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", "hide": false, "legendFormat": "under construction", "range": true, @@ -8825,8 +8846,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -8850,6 +8870,7 @@ data: "graphMode": "none", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" @@ -8857,9 +8878,11 @@ data: "fields": "", "values": false }, - "textMode": "auto" + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.4", + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -8867,7 +8890,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "min(hadoop_namenode_capacity_used{namespace=~\"$namespace\",job=~\"$cluster\"})", + "expr": "min(fs_namesystem_capacity_used{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", "legendFormat": "__auto", "range": true, "refId": "A" @@ -8876,6 +8899,75 @@ data: "title": "DFS used", "type": "stat" }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "green", + "mode": "fixed" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 4, + "y": 16 + }, + "id": 31, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "min(fs_namesystem_capacity_remaining{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "DFS remaining", + "type": "stat" + }, { "datasource": { "type": "prometheus", @@ -8887,11 +8979,13 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", @@ -8900,6 +8994,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -8921,8 +9016,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -8995,10 +9089,10 @@ data: ] }, "gridPos": { - "h": 5, + "h": 7, "w": 10, "x": 14, - "y": 15 + "y": 17 }, "id": 35, "options": { @@ -9012,10 +9106,12 @@ data: "showLegend": true }, "tooltip": { + "hideZeros": false, "mode": "single", "sort": "none" } }, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -9023,7 +9119,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "max(hadoop_namenode_blocks_total{namespace=~\"$namespace\",job=~\"$cluster\"})", + "expr": "max(fs_namesystem_blocks_total{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", "legendFormat": "total", "range": true, "refId": "A" @@ -9034,7 +9130,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "max(hadoop_namenode_missing_blocks{namespace=~\"$namespace\",job=~\"$cluster\"})", + "expr": "max(fs_namesystem_missing_blocks{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", "hide": false, "legendFormat": "mssing", "range": true, @@ -9046,7 +9142,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "max(hadoop_namenode_corrupt_blocks{namespace=~\"$namespace\",job=~\"$cluster\"})", + "expr": "max(fs_namesystem_corrupt_blocks{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", "hide": false, "legendFormat": "corrupt", "range": true, @@ -9058,7 +9154,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "max(hadoop_namenode_under_replicated_blocks{namespace=~\"$namespace\",job=~\"$cluster\"})", + "expr": "max(fs_namesystem_under_replicated_blocks{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", "hide": false, "legendFormat": "underreplicated", "range": true, @@ -9070,7 +9166,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "max(hadoop_namenode_excess_blocks{namespace=~\"$namespace\",job=~\"$cluster\"})", + "expr": "max(fs_namesystem_excess_blocks{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", "hide": false, "legendFormat": "excess", "range": true, @@ -9080,73 +9176,6 @@ data: "title": "Blocks", "type": "timeseries" }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "fixedColor": "green", - "mode": "fixed" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 3, - "w": 3, - "x": 4, - "y": 16 - }, - "id": 31, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "9.2.4", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "min(hadoop_namenode_capacity_remaining{namespace=~\"$namespace\",job=~\"$cluster\"})", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "DFS remaining", - "type": "stat" - }, { "datasource": { "type": "prometheus", @@ -9158,11 +9187,13 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", @@ -9171,6 +9202,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -9192,8 +9224,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -9206,7 +9237,7 @@ data: }, "gridPos": { "h": 5, - "w": 6, + "w": 7, "x": 0, "y": 19 }, @@ -9219,10 +9250,12 @@ data: "showLegend": false }, "tooltip": { + "hideZeros": false, "mode": "single", "sort": "none" } }, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -9230,7 +9263,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "max(hadoop_namenode_total_load{namespace=~\"$namespace\",job=~\"$cluster\"})", + "expr": "max(fs_namesystem_total_load{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", "legendFormat": "__auto", "range": true, "refId": "A" @@ -9250,12 +9283,14 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "axisSoftMin": 0, "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "opacity", @@ -9264,6 +9299,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -9285,8 +9321,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -9350,8 +9385,8 @@ data: }, "gridPos": { "h": 5, - "w": 6, - "x": 6, + "w": 7, + "x": 7, "y": 19 }, "id": 23, @@ -9363,10 +9398,12 @@ data: "showLegend": false }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } }, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -9397,22 +9434,25 @@ data: "type": "timeseries" } ], + "preload": false, "refresh": false, - "schemaVersion": 37, - "style": "dark", + "schemaVersion": 41, "tags": [], "templating": { "list": [ { + "current": { + "text": "hdfs", + "value": "hdfs" + }, "hide": 2, "name": "product", "query": "hdfs", - "skipUrlSync": false, + "skipUrlSync": true, "type": "constant" }, { "current": { - "selected": false, "text": "default", "value": "default" }, @@ -9420,47 +9460,42 @@ data: "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(hadoop_namenode_blocks_total, namespace)", - "hide": 0, + "definition": "label_values(fs_namesystem_blocks_total,namespace)", "includeAll": false, "label": "Namespace", - "multi": false, "name": "namespace", "options": [], "query": { - "query": "label_values(hadoop_namenode_blocks_total, namespace)", - "refId": "StandardVariableQuery" + "qryType": 1, + "query": "label_values(fs_namesystem_blocks_total,namespace)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, "regex": "", - "skipUrlSync": false, "sort": 1, "type": "query" }, { "current": { - "selected": false, - "text": "hdfs", - "value": "hdfs" + "text": "simple-hdfs", + "value": "simple-hdfs" }, "datasource": { "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(hadoop_namenode_blocks_total{namespace=~\"$namespace\"}, job)", - "hide": 0, + "definition": "label_values(fs_namesystem_blocks_total{namespace=~\"$namespace\"},app_kubernetes_io_instance)", "includeAll": false, "label": "Cluster", - "multi": false, "name": "cluster", "options": [], "query": { - "query": "label_values(hadoop_namenode_blocks_total{namespace=~\"$namespace\"}, job)", - "refId": "StandardVariableQuery" + "qryType": 1, + "query": "label_values(fs_namesystem_blocks_total{namespace=~\"$namespace\"},app_kubernetes_io_instance)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 2, "regex": "", - "skipUrlSync": false, "sort": 1, "type": "query" } @@ -9474,8 +9509,7 @@ data: "timezone": "", "title": "HDFS", "uid": "qjZGZ3AVz", - "version": 4, - "weekStart": "" + "version": 4 } hbase.json: | { From 06722487d06a788e5684a94b27180e890eff344e Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Wed, 23 Jul 2025 13:31:36 +0200 Subject: [PATCH 07/17] Update Kafka --- stacks/monitoring/grafana-dashboards.yaml | 184 ++++++++++++---------- 1 file changed, 104 insertions(+), 80 deletions(-) diff --git a/stacks/monitoring/grafana-dashboards.yaml b/stacks/monitoring/grafana-dashboards.yaml index de5106b3..8a310913 100644 --- a/stacks/monitoring/grafana-dashboards.yaml +++ b/stacks/monitoring/grafana-dashboards.yaml @@ -1981,9 +1981,8 @@ data: "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 3, + "id": 5, "links": [], - "liveNow": false, "panels": [ { "datasource": { @@ -2001,8 +2000,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2025,6 +2023,7 @@ data: "graphMode": "none", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" @@ -2032,9 +2031,11 @@ data: "fields": "", "values": false }, - "textMode": "auto" + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.4", + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -2067,8 +2068,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2091,6 +2091,7 @@ data: "graphMode": "none", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" @@ -2098,9 +2099,11 @@ data: "fields": "", "values": false }, - "textMode": "auto" + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.4", + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -2133,8 +2136,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2158,6 +2160,7 @@ data: "graphMode": "none", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" @@ -2165,9 +2168,11 @@ data: "fields": "", "values": false }, - "textMode": "auto" + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.4", + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -2185,9 +2190,9 @@ data: "type": "stat" }, { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "fieldConfig": { + "defaults": {}, + "overrides": [] }, "gridPos": { "h": 3, @@ -2205,20 +2210,7 @@ data: "content": "

$cluster

", "mode": "html" }, - "pluginVersion": "9.2.4", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], + "pluginVersion": "12.0.2", "title": "Cluster name", "type": "text" }, @@ -2237,8 +2229,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2261,6 +2252,7 @@ data: "graphMode": "none", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" @@ -2268,9 +2260,11 @@ data: "fields": "", "values": false }, - "textMode": "auto" + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.4", + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -2279,7 +2273,7 @@ data: }, "editorMode": "code", "exemplar": false, - "expr": "max(kafka_controller_kafkacontroller_activebrokercount{namespace=~\"$namespace\",job=~\"$cluster\"})", + "expr": "max(kafka_controller_kafkacontroller_activebrokercount{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", "instant": true, "legendFormat": "__auto", "range": false, @@ -2304,8 +2298,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2328,6 +2321,7 @@ data: "graphMode": "none", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" @@ -2335,9 +2329,11 @@ data: "fields": "", "values": false }, - "textMode": "auto" + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.4", + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -2346,7 +2342,7 @@ data: }, "editorMode": "code", "exemplar": false, - "expr": "count(count by (topic) (kafka_server_brokertopicmetrics_messagesin_total{namespace=~\"$namespace\",job=~\"$cluster\",topic!=\"\"}))", + "expr": "count(count by (topic) (kafka_server_brokertopicmetrics_messagesin_total{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\",topic!=\"\"}))", "instant": true, "legendFormat": "__auto", "range": false, @@ -2367,12 +2363,14 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "axisSoftMin": 0, "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "opacity", @@ -2381,6 +2379,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineStyle": { "fill": "solid" @@ -2405,8 +2404,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2482,10 +2480,12 @@ data: "showLegend": false }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } }, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -2538,12 +2538,14 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "axisSoftMin": 0, "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "opacity", @@ -2552,6 +2554,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2573,8 +2576,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2651,10 +2653,12 @@ data: "showLegend": false }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } }, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -2695,12 +2699,14 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "axisSoftMin": 0, "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -2709,6 +2715,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2730,8 +2737,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2806,10 +2812,12 @@ data: "showLegend": true }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } }, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -2850,11 +2858,13 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", @@ -2863,6 +2873,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2884,8 +2895,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2916,10 +2926,12 @@ data: "sortDesc": true }, "tooltip": { + "hideZeros": false, "mode": "single", "sort": "none" } }, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -2927,7 +2939,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "sum by(topic)(rate(kafka_server_brokertopicmetrics_messagesin_total{namespace=~\"$namespace\",job=~\"$cluster\",topic!=\"\"}[$__rate_interval]))", + "expr": "sum by(topic)(rate(kafka_server_brokertopicmetrics_messagesin_total{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\",topic!=\"\"}[$__rate_interval]))", "legendFormat": "{{topic}}", "range": true, "refId": "A" @@ -2947,11 +2959,13 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", @@ -2960,6 +2974,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2981,8 +2996,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -3013,10 +3027,12 @@ data: "sortDesc": true }, "tooltip": { + "hideZeros": false, "mode": "single", "sort": "none" } }, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -3024,7 +3040,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "sum by(topic)(rate(kafka_server_brokertopicmetrics_bytesin_total{namespace=~\"$namespace\",job=~\"$cluster\",topic!=\"\"}[$__rate_interval]))*8", + "expr": "sum by(topic)(rate(kafka_server_brokertopicmetrics_bytesin_total{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\",topic!=\"\"}[$__rate_interval]))*8", "legendFormat": "{{topic}}", "range": true, "refId": "A" @@ -3044,11 +3060,13 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", @@ -3057,6 +3075,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -3078,8 +3097,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -3110,10 +3128,12 @@ data: "sortDesc": true }, "tooltip": { + "hideZeros": false, "mode": "single", "sort": "none" } }, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -3121,7 +3141,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "sum by(topic)(kafka_log_log_size{namespace=~\"$namespace\",job=~\"$cluster\",topic!=\"\"})", + "expr": "sum by(topic)(kafka_log_log_size{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\",topic!=\"\"})", "legendFormat": "{{topic}}", "range": true, "refId": "A" @@ -3141,12 +3161,14 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "axisSoftMin": 0, "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "opacity", @@ -3155,6 +3177,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -3176,8 +3199,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -3254,10 +3276,12 @@ data: "showLegend": false }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } }, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -3298,12 +3322,14 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "axisSoftMin": 0, "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -3312,6 +3338,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -3333,8 +3360,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -3410,10 +3436,12 @@ data: "showLegend": true }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } }, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -3421,7 +3449,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "sum(rate(kafka_server_kafkaserver_linux_disk_read_bytes{namespace=~\"$namespace\",job=~\"$cluster\"}[$__rate_interval]))*8", + "expr": "sum(rate(kafka_server_kafkaserver_linux_disk_read_bytes{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"}[$__rate_interval]))*8", "hide": false, "legendFormat": "rx", "range": true, @@ -3433,7 +3461,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "-sum(rate(kafka_server_kafkaserver_linux_disk_write_bytes{namespace=~\"$namespace\",job=~\"$cluster\"}[$__rate_interval]))*8", + "expr": "-sum(rate(kafka_server_kafkaserver_linux_disk_write_bytes{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"}[$__rate_interval]))*8", "hide": false, "legendFormat": "tx", "range": true, @@ -3444,22 +3472,25 @@ data: "type": "timeseries" } ], + "preload": false, "refresh": false, - "schemaVersion": 37, - "style": "dark", + "schemaVersion": 41, "tags": [], "templating": { "list": [ { + "current": { + "text": "kafka", + "value": "kafka" + }, "hide": 2, "name": "product", "query": "kafka", - "skipUrlSync": false, + "skipUrlSync": true, "type": "constant" }, { "current": { - "selected": false, "text": "default", "value": "default" }, @@ -3468,10 +3499,8 @@ data: "uid": "prometheus" }, "definition": "label_values(kafka_server_kafkaserver_brokerstate, namespace)", - "hide": 0, "includeAll": false, "label": "Namespace", - "multi": false, "name": "namespace", "options": [], "query": { @@ -3480,34 +3509,30 @@ data: }, "refresh": 1, "regex": "", - "skipUrlSync": false, "sort": 1, "type": "query" }, { "current": { - "selected": false, - "text": "kafka", - "value": "kafka" + "text": "simple-kafka", + "value": "simple-kafka" }, "datasource": { "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(kafka_server_kafkaserver_brokerstate{namespace=~\"$namespace\"}, job)", - "hide": 0, + "definition": "label_values(kafka_server_kafkaserver_brokerstate{namespace=~\"$namespace\"},app_kubernetes_io_instance)", "includeAll": false, "label": "Cluster", - "multi": false, "name": "cluster", "options": [], "query": { - "query": "label_values(kafka_server_kafkaserver_brokerstate{namespace=~\"$namespace\"}, job)", - "refId": "StandardVariableQuery" + "qryType": 1, + "query": "label_values(kafka_server_kafkaserver_brokerstate{namespace=~\"$namespace\"},app_kubernetes_io_instance)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 2, "regex": "", - "skipUrlSync": false, "sort": 1, "type": "query" } @@ -3521,8 +3546,7 @@ data: "timezone": "", "title": "Kafka", "uid": "C_8qIX04k", - "version": 4, - "weekStart": "" + "version": 3 } minio.json: | { From 758283145a4cc692ecb0b09424371676aee40966 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Wed, 23 Jul 2025 13:33:24 +0200 Subject: [PATCH 08/17] Update Trino --- stacks/monitoring/grafana-dashboards.yaml | 178 ++++++++++++---------- 1 file changed, 100 insertions(+), 78 deletions(-) diff --git a/stacks/monitoring/grafana-dashboards.yaml b/stacks/monitoring/grafana-dashboards.yaml index 8a310913..91e82d80 100644 --- a/stacks/monitoring/grafana-dashboards.yaml +++ b/stacks/monitoring/grafana-dashboards.yaml @@ -315,9 +315,8 @@ data: "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 2, + "id": 8, "links": [], - "liveNow": false, "panels": [ { "datasource": { @@ -335,8 +334,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -359,6 +357,7 @@ data: "graphMode": "none", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" @@ -366,9 +365,11 @@ data: "fields": "", "values": false }, - "textMode": "auto" + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.4", + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -401,8 +402,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -425,6 +425,7 @@ data: "graphMode": "none", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" @@ -432,9 +433,11 @@ data: "fields": "", "values": false }, - "textMode": "auto" + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.4", + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -467,8 +470,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -492,6 +494,7 @@ data: "graphMode": "none", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" @@ -499,9 +502,11 @@ data: "fields": "", "values": false }, - "textMode": "auto" + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.4", + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -519,9 +524,9 @@ data: "type": "stat" }, { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "fieldConfig": { + "defaults": {}, + "overrides": [] }, "gridPos": { "h": 3, @@ -539,20 +544,7 @@ data: "content": "

$cluster

", "mode": "html" }, - "pluginVersion": "9.2.4", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], + "pluginVersion": "12.0.2", "title": "Cluster name", "type": "text" }, @@ -571,8 +563,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -595,6 +586,7 @@ data: "graphMode": "none", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" @@ -602,9 +594,11 @@ data: "fields": "", "values": false }, - "textMode": "auto" + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.4", + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -613,7 +607,7 @@ data: }, "editorMode": "code", "exemplar": false, - "expr": "trino_memory_clustermemorypool_nodes{namespace=~\"$namespace\",job=~\"$cluster\"} - 1 # Subtract 1 for coordinator", + "expr": "trino_memory_clustermemorypool_nodes{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"} - 1 # Subtract 1 for coordinator", "instant": true, "legendFormat": "__auto", "range": false, @@ -634,12 +628,14 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "axisSoftMin": 0, "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "opacity", @@ -648,6 +644,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineStyle": { "fill": "solid" @@ -672,8 +669,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -749,10 +745,12 @@ data: "showLegend": false }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } }, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -805,12 +803,14 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "axisSoftMin": 0, "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "opacity", @@ -819,6 +819,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -840,8 +841,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -918,10 +918,12 @@ data: "showLegend": false }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } }, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -962,12 +964,14 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "axisSoftMin": 0, "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -976,6 +980,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -997,8 +1002,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1073,10 +1077,12 @@ data: "showLegend": true }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } }, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -1117,11 +1123,13 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -1130,6 +1138,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1151,8 +1160,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1227,10 +1235,12 @@ data: "showLegend": true }, "tooltip": { + "hideZeros": false, "mode": "single", "sort": "none" } }, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -1238,7 +1248,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "sum by (namespae, job) (trino_execution_querymanager_runningqueries{namespace=~\"$namespace\",job=~\"$cluster\"})", + "expr": "sum by (namespae, job) (trino_execution_querymanager_runningqueries{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", "legendFormat": "running", "range": true, "refId": "A" @@ -1249,7 +1259,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "sum by (namespae, job) (trino_execution_querymanager_queuedqueries{namespace=~\"$namespace\",job=~\"$cluster\"})", + "expr": "sum by (namespae, job) (trino_execution_querymanager_queuedqueries{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", "hide": false, "legendFormat": "queueud", "range": true, @@ -1270,11 +1280,13 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", @@ -1283,6 +1295,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1304,8 +1317,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1379,10 +1391,12 @@ data: "showLegend": true }, "tooltip": { + "hideZeros": false, "mode": "single", "sort": "none" } }, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -1390,7 +1404,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "sum by (namespae, job) (trino_execution_querymanager_completedqueries_totalcount{namespace=~\"$namespace\",job=~\"$cluster\"})", + "expr": "sum by (namespae, job) (trino_execution_querymanager_completedqueries_totalcount{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", "legendFormat": "completed", "range": true, "refId": "A" @@ -1401,7 +1415,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "sum by (namespae, job) (trino_execution_querymanager_failedqueries_totalcount{namespace=~\"$namespace\",job=~\"$cluster\"})", + "expr": "sum by (namespae, job) (trino_execution_querymanager_failedqueries_totalcount{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", "hide": false, "legendFormat": "failed", "range": true, @@ -1413,7 +1427,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "sum by (namespae, job) (trino_execution_querymanager_abandonedqueries_totalcount{namespace=~\"$namespace\",job=~\"$cluster\"})", + "expr": "sum by (namespae, job) (trino_execution_querymanager_abandonedqueries_totalcount{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", "hide": false, "legendFormat": "abandoned", "range": true, @@ -1425,7 +1439,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "sum by (namespae, job) (trino_execution_querymanager_canceledqueries_totalcount{namespace=~\"$namespace\",job=~\"$cluster\"})", + "expr": "sum by (namespae, job) (trino_execution_querymanager_canceledqueries_totalcount{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", "hide": false, "legendFormat": "canceled", "range": true, @@ -1446,12 +1460,14 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "axisSoftMin": 0, "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "opacity", @@ -1460,6 +1476,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1481,8 +1498,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1559,10 +1575,12 @@ data: "showLegend": false }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } }, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -1570,7 +1588,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "trino_memory_clustermemorymanager_clustertotalmemoryreservation{namespace=~\"$namespace\",job=~\"$cluster\"}", + "expr": "trino_memory_clustermemorymanager_clustertotalmemoryreservation{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"}", "hide": false, "legendFormat": "used", "range": true, @@ -1582,7 +1600,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "trino_memory_clustermemorymanager_clustermemorybytes{namespace=~\"$namespace\",job=~\"$cluster\"}", + "expr": "trino_memory_clustermemorymanager_clustermemorybytes{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"}", "hide": false, "legendFormat": "limit", "range": true, @@ -1603,12 +1621,14 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "axisSoftMin": 0, "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -1617,6 +1637,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1638,8 +1659,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1712,10 +1732,12 @@ data: "showLegend": false }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } }, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -1723,7 +1745,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "rate(trino_execution_querymanager_consumedinputrows_totalcount{namespace=~\"$namespace\",job=~\"$cluster\"}[$__rate_interval])", + "expr": "rate(trino_execution_querymanager_consumedinputrows_totalcount{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"}[$__rate_interval])", "hide": false, "legendFormat": "read", "range": true, @@ -1744,12 +1766,14 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "axisSoftMin": 0, "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -1758,6 +1782,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1779,8 +1804,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1853,10 +1877,12 @@ data: "showLegend": false }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } }, + "pluginVersion": "12.0.2", "targets": [ { "datasource": { @@ -1864,7 +1890,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "rate(trino_execution_querymanager_consumedinputbytes_totalcount{namespace=~\"$namespace\",job=~\"$cluster\"}[$__rate_interval])*8", + "expr": "rate(trino_execution_querymanager_consumedinputbytes_totalcount{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"}[$__rate_interval])*8", "hide": false, "legendFormat": "read", "range": true, @@ -1875,14 +1901,13 @@ data: "type": "timeseries" } ], - "schemaVersion": 37, - "style": "dark", + "preload": false, + "schemaVersion": 41, "tags": [], "templating": { "list": [ { "current": { - "selected": false, "text": "default", "value": "default" }, @@ -1891,10 +1916,8 @@ data: "uid": "prometheus" }, "definition": "label_values(trino_memory_clustermemorypool_nodes, namespace)", - "hide": 0, "includeAll": false, "label": "Namespace", - "multi": false, "name": "namespace", "options": [], "query": { @@ -1903,13 +1926,11 @@ data: }, "refresh": 1, "regex": "", - "skipUrlSync": false, "sort": 1, "type": "query" }, { "current": { - "selected": false, "text": "trino", "value": "trino" }, @@ -1917,28 +1938,30 @@ data: "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(trino_memory_clustermemorypool_nodes{namespace=~\"$namespace\"}, job)", - "hide": 0, + "definition": "label_values(trino_memory_clustermemorypool_nodes{namespace=~\"$namespace\"},app_kubernetes_io_instance)", "includeAll": false, "label": "Cluster", - "multi": false, "name": "cluster", "options": [], "query": { - "query": "label_values(trino_memory_clustermemorypool_nodes{namespace=~\"$namespace\"}, job)", - "refId": "StandardVariableQuery" + "qryType": 1, + "query": "label_values(trino_memory_clustermemorypool_nodes{namespace=~\"$namespace\"},app_kubernetes_io_instance)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 2, "regex": "", - "skipUrlSync": false, "sort": 1, "type": "query" }, { + "current": { + "text": "trino", + "value": "trino" + }, "hide": 2, "name": "product", "query": "trino", - "skipUrlSync": false, + "skipUrlSync": true, "type": "constant" } ] @@ -1951,8 +1974,7 @@ data: "timezone": "", "title": "Trino", "uid": "-mVc4yO4k", - "version": 5, - "weekStart": "" + "version": 3 } kafka.json: | { From 688ebf63d1d5dd50289d9ea0c76be6dcf7d81048 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Wed, 23 Jul 2025 14:22:37 +0200 Subject: [PATCH 09/17] Scrape NiFi 2 using mTLS --- ...etheus-tls-certificate-serviceaccount.yaml | 56 ++++++++++++++ .../create-prometheus-tls-certificate.yaml | 73 +++++++++++++++++++ .../prometheus-service-monitors.yaml | 58 ++++++++------- stacks/stacks-v2.yaml | 9 ++- 4 files changed, 165 insertions(+), 31 deletions(-) create mode 100644 stacks/monitoring/create-prometheus-tls-certificate-serviceaccount.yaml create mode 100644 stacks/monitoring/create-prometheus-tls-certificate.yaml diff --git a/stacks/monitoring/create-prometheus-tls-certificate-serviceaccount.yaml b/stacks/monitoring/create-prometheus-tls-certificate-serviceaccount.yaml new file mode 100644 index 00000000..ff153f11 --- /dev/null +++ b/stacks/monitoring/create-prometheus-tls-certificate-serviceaccount.yaml @@ -0,0 +1,56 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: create-prometheus-tls-certificate-serviceaccount +# --- +# apiVersion: rbac.authorization.k8s.io/v1 +# kind: ClusterRoleBinding +# metadata: +# name: create-prometheus-tls-certificate-clusterrolebinding +# subjects: +# - kind: ServiceAccount +# name: create-prometheus-tls-certificate-serviceaccount +# namespace: {{ NAMESPACE }} +# roleRef: +# kind: ClusterRole +# name: create-prometheus-tls-certificate-clusterrole +# apiGroup: rbac.authorization.k8s.io +# --- +# apiVersion: rbac.authorization.k8s.io/v1 +# kind: ClusterRole +# metadata: +# name: create-prometheus-tls-certificate-clusterrole +# rules: +# - apiGroups: +# - "" +# resources: +# - secret +# verbs: +# - create +# - patch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: create-prometheus-tls-certificate-rolebinding +subjects: +- kind: ServiceAccount + name: create-prometheus-tls-certificate-serviceaccount + namespace: {{ NAMESPACE }} +roleRef: + kind: Role + name: create-prometheus-tls-certificate-role + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: create-prometheus-tls-certificate-role +rules: +- apiGroups: [""] + resources: ["secrets"] + verbs: ["get", "create", "patch"] +- apiGroups: [""] + resources: ["pods"] + verbs: ["delete"] diff --git a/stacks/monitoring/create-prometheus-tls-certificate.yaml b/stacks/monitoring/create-prometheus-tls-certificate.yaml new file mode 100644 index 00000000..6eb8f59a --- /dev/null +++ b/stacks/monitoring/create-prometheus-tls-certificate.yaml @@ -0,0 +1,73 @@ +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: create-prometheus-tls-certificate + labels: + app: create-prometheus-tls-certificate +spec: + replicas: 1 + selector: + matchLabels: + app: create-prometheus-tls-certificate + template: + metadata: + labels: + app: create-prometheus-tls-certificate + spec: + serviceAccountName: create-prometheus-tls-certificate-serviceaccount + containers: + - name: create-prometheus-tls-certificate + image: oci.stackable.tech/sdp/tools:1.0.0-stackable0.0.0-dev + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + command: + - bash + - -euo + - pipefail + - -c + - | + # "kubectl create secret" fails on existing Secrets, so we "kubectl apply" instead + kubectl create secret generic prometheus-tls-certificate \ + --from-file=/prometheus-tls-certificate/ca.crt \ + --from-file=/prometheus-tls-certificate/tls.crt \ + --from-file=/prometheus-tls-certificate/tls.key \ + --dry-run=client -o yaml \ + | kubectl apply -f - + + echo Sleeping 6 hours before deleting my own Pod + sleep 21600 # 6 * 60 * 60 + + echo "Deleting our own Pod, so that it gets re-created and secret-operator issues a new certificate (only crash-looping the container is not enough!)" + kubectl --namespace "$POD_NAMESPACE" delete pod "$POD_NAME" + exit 0 + volumeMounts: + - name: prometheus-tls-certificate + mountPath: /prometheus-tls-certificate + volumes: + - name: prometheus-tls-certificate + ephemeral: + volumeClaimTemplate: + metadata: + annotations: + # secrets.stackable.tech/backend.autotls.cert.lifetime: "1d" + secrets.stackable.tech/class: "tls" + secrets.stackable.tech/format: "tls-pem" + secrets.stackable.tech/scope: "service=prometheus" + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: "1" + storageClassName: secrets.stackable.tech + volumeMode: Filesystem + securityContext: + fsGroup: 1000 diff --git a/stacks/monitoring/prometheus-service-monitors.yaml b/stacks/monitoring/prometheus-service-monitors.yaml index 9102f020..57868f23 100644 --- a/stacks/monitoring/prometheus-service-monitors.yaml +++ b/stacks/monitoring/prometheus-service-monitors.yaml @@ -1,24 +1,20 @@ -# ### Products -# # Use something like this to check for metrics: # count by (app_kubernetes_io_name, app_kubernetes_io_instance, pod) ({app_kubernetes_io_name!="",pod!=""}) # -# See https://github.com/stackabletech/issues/issues/735 -# -# - [x] Airflow - exporter -# - [x] Druid - native -# - [x] HBase - native -# - [x] Hadoop HDFS - native -# - [x] Hive - exporter -# - [x] Kafka - exporter -# - [x] NiFi 1 - native -# - [ ] NiFi 2 - native - partially working, needs mTLS -# - [ ] OpenSearch -# - [ ] Spark - native -# - [x] Superset - exporter -# - [x] Trino - native -# - [x] ZooKeeper - native -# - [x] OPA - native +# [x] Airflow - exporter +# [x] Druid - native +# [x] HBase - native +# [x] Hadoop HDFS - native +# [x] Hive - exporter +# [x] Kafka - exporter +# [x] NiFi 1 - native +# [x] NiFi 2 - native +# [ ] OpenSearch - not officially part of the platform yet +# [ ] Spark - native - was too lazy, no idea if we even expose metrics +# [x] Superset - exporter +# [x] Trino - native +# [x] ZooKeeper - native +# [x] OPA - native --- apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor @@ -171,9 +167,8 @@ spec: - app.kubernetes.io/role-group - app.kubernetes.io/version --- -# NiFI 2 is a beast on it's own.... -# The current state produces "server returned HTTP status 401 Unauthorized", but it's at least a -# good starting point for a working version +# NiFI 2 is a beast on it's own... +# We need to use mTLS (otherwise we get a 401) and can not use the PodIP apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: @@ -193,19 +188,26 @@ spec: operator: In values: - nifi - # - key: app.kubernetes.io/version - # operator: NotIn - # values: - # - List all 1.x.x version combinations endpoints: - scheme: https port: https path: /nifi-api/flow/metrics/prometheus - # TODO: Use mTLS - # See https://github.com/stackabletech/demos/pull/260 # See https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#monitoring.coreos.com/v1.TLSConfig tlsConfig: - insecureSkipVerify: true + ca: + secret: + name: prometheus-tls-certificate + key: ca.crt + cert: + secret: + name: prometheus-tls-certificate + key: tls.crt + keySecret: + name: prometheus-tls-certificate + key: tls.key + # We need to talk to the Pod via the FQDN of the Pod because of the stupid SNI check of NiFi. + # We can not use the typical PodIP, as it is not contained in the NiFi certificate, + # see https://github.com/stackabletech/secret-operator/issues/620 relabelings: - sourceLabels: - __meta_kubernetes_pod_name diff --git a/stacks/stacks-v2.yaml b/stacks/stacks-v2.yaml index b2aa0e76..9e5a88e6 100644 --- a/stacks/stacks-v2.yaml +++ b/stacks/stacks-v2.yaml @@ -6,14 +6,17 @@ stacks: stackableOperators: - commons - listener + - secret labels: - monitoring - prometheus - grafana manifests: - - plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/stacks/monitoring/grafana-dashboards.yaml - - helmChart: https://raw.githubusercontent.com/stackabletech/demos/main/stacks/monitoring/prometheus.yaml - - plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/stacks/monitoring/prometheus-service-monitors.yaml + - plainYaml: stacks/monitoring/grafana-dashboards.yaml + - plainYaml: stacks/monitoring/create-prometheus-tls-certificate-serviceaccount.yaml + - plainYaml: stacks/monitoring/create-prometheus-tls-certificate.yaml + - helmChart: stacks/monitoring/prometheus.yaml + - plainYaml: stacks/monitoring/prometheus-service-monitors.yaml supportedNamespaces: [] resourceRequests: cpu: 1750m From 424b5a87b03c5e3ff831c06e4a794d69d926b12a Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Wed, 23 Jul 2025 14:32:28 +0200 Subject: [PATCH 10/17] Remove leftover code --- ...etheus-tls-certificate-serviceaccount.yaml | 26 ------------------- 1 file changed, 26 deletions(-) diff --git a/stacks/monitoring/create-prometheus-tls-certificate-serviceaccount.yaml b/stacks/monitoring/create-prometheus-tls-certificate-serviceaccount.yaml index ff153f11..3b79bba5 100644 --- a/stacks/monitoring/create-prometheus-tls-certificate-serviceaccount.yaml +++ b/stacks/monitoring/create-prometheus-tls-certificate-serviceaccount.yaml @@ -3,32 +3,6 @@ apiVersion: v1 kind: ServiceAccount metadata: name: create-prometheus-tls-certificate-serviceaccount -# --- -# apiVersion: rbac.authorization.k8s.io/v1 -# kind: ClusterRoleBinding -# metadata: -# name: create-prometheus-tls-certificate-clusterrolebinding -# subjects: -# - kind: ServiceAccount -# name: create-prometheus-tls-certificate-serviceaccount -# namespace: {{ NAMESPACE }} -# roleRef: -# kind: ClusterRole -# name: create-prometheus-tls-certificate-clusterrole -# apiGroup: rbac.authorization.k8s.io -# --- -# apiVersion: rbac.authorization.k8s.io/v1 -# kind: ClusterRole -# metadata: -# name: create-prometheus-tls-certificate-clusterrole -# rules: -# - apiGroups: -# - "" -# resources: -# - secret -# verbs: -# - create -# - patch --- apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding From 4a2b1f28e35f0bcdd671b0467bab91f0aae09369 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Wed, 23 Jul 2025 14:33:42 +0200 Subject: [PATCH 11/17] Add comment --- stacks/monitoring/create-prometheus-tls-certificate.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/stacks/monitoring/create-prometheus-tls-certificate.yaml b/stacks/monitoring/create-prometheus-tls-certificate.yaml index 6eb8f59a..b69505c8 100644 --- a/stacks/monitoring/create-prometheus-tls-certificate.yaml +++ b/stacks/monitoring/create-prometheus-tls-certificate.yaml @@ -57,6 +57,9 @@ spec: volumeClaimTemplate: metadata: annotations: + # I assume prometheus supports hot-reloading the certificate Secret. + # If not we can try to increase the lifetime here, but watch out that there is an + # upper limit on the SecretClass! # secrets.stackable.tech/backend.autotls.cert.lifetime: "1d" secrets.stackable.tech/class: "tls" secrets.stackable.tech/format: "tls-pem" From 1da9eb3a520a2ca324cf55041b315468d57cdec0 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Thu, 24 Jul 2025 10:11:06 +0200 Subject: [PATCH 12/17] Add simple NiFi Dashboard --- stacks/monitoring/grafana-dashboards.yaml | 1790 +++++++++++++++++ .../prometheus-service-monitors.yaml | 4 +- 2 files changed, 1792 insertions(+), 2 deletions(-) diff --git a/stacks/monitoring/grafana-dashboards.yaml b/stacks/monitoring/grafana-dashboards.yaml index 91e82d80..f682a463 100644 --- a/stacks/monitoring/grafana-dashboards.yaml +++ b/stacks/monitoring/grafana-dashboards.yaml @@ -11354,4 +11354,1794 @@ data: "uid": "vVpFeeAVz", "version": 6 } + nifi.json: | + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 5, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "green", + "mode": "fixed" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 0, + "y": 0 + }, + "id": 6, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(kube_pod_container_resource_requests{resource=\"cpu\"} * on (namespace, pod) group_left() (max by (namespace, pod) (kube_pod_labels{namespace=~\"$namespace\",label_app_kubernetes_io_name=~\"$product\",label_app_kubernetes_io_component=~\".*\",label_app_kubernetes_io_instance=~\"$cluster\"})))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "CPU request", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "green", + "mode": "fixed" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 3, + "y": 0 + }, + "id": 7, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(kube_pod_container_resource_limits{resource=\"cpu\"} * on (namespace, pod) group_left() (max by (namespace, pod) (kube_pod_labels{namespace=~\"$namespace\",label_app_kubernetes_io_name=~\"$product\",label_app_kubernetes_io_component=~\".*\",label_app_kubernetes_io_instance=~\"$cluster\"})))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "CPU limit", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "green", + "mode": "fixed" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 6, + "y": 0 + }, + "id": 8, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(kube_pod_container_resource_requests{resource=\"memory\"} * on (namespace, pod) group_left() (max by (namespace, pod) (kube_pod_labels{namespace=~\"$namespace\",label_app_kubernetes_io_name=~\"$product\",label_app_kubernetes_io_component=~\".*\",label_app_kubernetes_io_instance=~\"$cluster\"})))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Memory limit", + "type": "stat" + }, + { + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 5, + "x": 9, + "y": 0 + }, + "id": 2, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

$cluster

", + "mode": "html" + }, + "pluginVersion": "12.0.2", + "title": "Cluster name", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 14, + "y": 0 + }, + "id": 3, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count(count by (pod)(nifi_amount_bytes_read{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"}))", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Nodes", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "used" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 25 + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 3 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(max by (namespace, pod)(rate(container_cpu_usage_seconds_total{container!=\"\"}[$__rate_interval]) * on (namespace, pod) group_left() (max by (namespace, pod) (kube_pod_labels{namespace=~\"$namespace\",label_app_kubernetes_io_name=~\"$product\",label_app_kubernetes_io_component=~\".*\",label_app_kubernetes_io_instance=~\"$cluster\"}))))", + "hide": false, + "legendFormat": "used", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(max by (namespace, pod)(kube_pod_container_resource_limits{resource=\"cpu\"} * on (namespace, pod) group_left() (max by (namespace, pod) (kube_pod_labels{namespace=~\"$namespace\",label_app_kubernetes_io_name=~\"$product\",label_app_kubernetes_io_component=~\".*\",label_app_kubernetes_io_instance=~\"$cluster\"}))))", + "hide": false, + "legendFormat": "limit", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(max by (namespace, pod)(kube_pod_container_resource_requests{resource=\"cpu\"} * on (namespace, pod) group_left() (max by (namespace, pod) (kube_pod_labels{namespace=~\"$namespace\",label_app_kubernetes_io_name=~\"$product\",label_app_kubernetes_io_component=~\".*\",label_app_kubernetes_io_instance=~\"$cluster\"}))))", + "hide": false, + "legendFormat": "request", + "range": true, + "refId": "C" + } + ], + "title": "CPU usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "used" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 25 + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 3 + }, + "id": 9, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(max by (namespace, pod)(container_memory_working_set_bytes{container!=\"\"} * on (namespace, pod) group_left() (max by (namespace, pod) (kube_pod_labels{namespace=~\"$namespace\",label_app_kubernetes_io_name=~\"$product\",label_app_kubernetes_io_component=~\".*\",label_app_kubernetes_io_instance=~\"$cluster\"}))))", + "hide": false, + "legendFormat": "used", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(max by (namespace, pod)(kube_pod_container_resource_requests{resource=\"memory\"} * on (namespace, pod) group_left() (max by (namespace, pod) (kube_pod_labels{namespace=~\"$namespace\",label_app_kubernetes_io_name=~\"$product\",label_app_kubernetes_io_component=~\".*\",label_app_kubernetes_io_instance=~\"$cluster\"}))))", + "hide": false, + "legendFormat": "limit", + "range": true, + "refId": "C" + } + ], + "title": "Memory usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "binbps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "used" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 3 + }, + "id": 15, + "options": { + "legend": { + "calcs": [ + "last" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(max by (namespace, pod)(rate(container_network_receive_bytes_total[$__rate_interval]) * on (namespace, pod) group_left() (max by (namespace, pod) (kube_pod_labels{namespace=~\"$namespace\",label_app_kubernetes_io_name=~\"$product\",label_app_kubernetes_io_component=~\".*\",label_app_kubernetes_io_instance=~\"$cluster\"}))))*8", + "hide": false, + "legendFormat": "rx", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "-sum(max by (namespace, pod)(rate(container_network_transmit_bytes_total[$__rate_interval]) * on (namespace, pod) group_left() (max by (namespace, pod) (kube_pod_labels{namespace=~\"$namespace\",label_app_kubernetes_io_name=~\"$product\",label_app_kubernetes_io_component=~\".*\",label_app_kubernetes_io_instance=~\"$cluster\"}))))*8", + "hide": false, + "legendFormat": "tx", + "range": true, + "refId": "B" + } + ], + "title": "Network usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 10 + }, + "id": 24, + "options": { + "legend": { + "calcs": [ + "last" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "max by(pod)(nifi_amount_threads_active{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", + "legendFormat": "{{pod}}", + "range": true, + "refId": "A" + } + ], + "title": "Active threads", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMax": 1, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "dashed" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 0.9 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 10 + }, + "id": 18, + "options": { + "legend": { + "calcs": [ + "last" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "max by(pod)(nifi_jvm_heap_usage{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", + "legendFormat": "{{pod}}", + "range": true, + "refId": "A" + } + ], + "title": "JVM heap usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 18 + }, + "id": 25, + "options": { + "legend": { + "calcs": [ + "last" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(nifi_amount_bytes_read{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", + "legendFormat": "read", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(nifi_amount_bytes_written{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", + "hide": false, + "legendFormat": "written", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(nifi_amount_bytes_received{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", + "hide": false, + "legendFormat": "received", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(nifi_amount_bytes_sent{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", + "hide": false, + "legendFormat": "sent", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(nifi_amount_bytes_transferred{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", + "hide": false, + "legendFormat": "transferred", + "range": true, + "refId": "E" + } + ], + "title": "Amount of bytes", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 18 + }, + "id": 26, + "options": { + "legend": { + "calcs": [ + "last" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(nifi_amount_items_input{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", + "legendFormat": "input", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(nifi_amount_items_output{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", + "hide": false, + "legendFormat": "output", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(nifi_amount_items_queued{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", + "hide": false, + "legendFormat": "queued", + "range": true, + "refId": "C" + } + ], + "title": "Items", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 9, + "x": 0, + "y": 26 + }, + "id": 27, + "options": { + "legend": { + "calcs": [ + "last" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "max(nifi_content_repo_used_space_bytes{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", + "legendFormat": "content", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "max(nifi_flow_file_repo_used_space_bytes{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", + "hide": false, + "legendFormat": "flow file", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "max(nifi_provenance_repo_used_space_bytes{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", + "hide": false, + "legendFormat": "provenance", + "range": true, + "refId": "C" + } + ], + "title": "Used repository space", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 9, + "x": 9, + "y": 26 + }, + "id": 28, + "options": { + "legend": { + "calcs": [ + "last" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "max(nifi_content_repo_free_space_bytes{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", + "legendFormat": "content", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "max(nifi_flow_file_repo_free_space_bytes{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", + "hide": false, + "legendFormat": "flow file", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "max(nifi_provenance_repo_free_space_bytes{namespace=~\"$namespace\",app_kubernetes_io_instance=~\"$cluster\"})", + "hide": false, + "legendFormat": "provenance", + "range": true, + "refId": "C" + } + ], + "title": "Free repository space", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "used" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 25 + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 26 + }, + "id": 23, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(max by (namespace, persistentvolumeclaim)(kubelet_volume_stats_used_bytes) * on (namespace, persistentvolumeclaim) group_left() (max by (namespace, persistentvolumeclaim) (kube_persistentvolumeclaim_labels{namespace=~\"$namespace\",label_app_kubernetes_io_name=~\"$product\",label_app_kubernetes_io_component=~\".*\",label_app_kubernetes_io_instance=~\"$cluster\"})))", + "hide": false, + "legendFormat": "used", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(max by (namespace, persistentvolumeclaim)(kubelet_volume_stats_capacity_bytes) * on (namespace, persistentvolumeclaim) group_left() (max by (namespace, persistentvolumeclaim) (kube_persistentvolumeclaim_labels{namespace=~\"$namespace\",label_app_kubernetes_io_name=~\"$product\",label_app_kubernetes_io_component=~\".*\",label_app_kubernetes_io_instance=~\"$cluster\"})))", + "hide": false, + "legendFormat": "limit", + "range": true, + "refId": "C" + } + ], + "title": "PVC usage", + "type": "timeseries" + } + ], + "preload": false, + "refresh": false, + "schemaVersion": 41, + "tags": [], + "templating": { + "list": [ + { + "current": { + "text": "nifi", + "value": "nifi" + }, + "hide": 2, + "name": "product", + "query": "nifi", + "skipUrlSync": true, + "type": "constant" + }, + { + "current": { + "text": "default", + "value": "default" + }, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "definition": "label_values(nifi_amount_bytes_read,namespace)", + "includeAll": false, + "label": "Namespace", + "name": "namespace", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(nifi_amount_bytes_read,namespace)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "sort": 1, + "type": "query" + }, + { + "current": { + "text": "nifi", + "value": "nifi" + }, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "definition": "label_values(nifi_amount_bytes_read{namespace=~\"$namespace\"},app_kubernetes_io_instance)", + "includeAll": false, + "label": "Cluster", + "name": "cluster", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(nifi_amount_bytes_read{namespace=~\"$namespace\"},app_kubernetes_io_instance)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "NiFi", + "uid": "C_8qIX04k", + "version": 13 + } # {% endraw %} diff --git a/stacks/monitoring/prometheus-service-monitors.yaml b/stacks/monitoring/prometheus-service-monitors.yaml index 57868f23..3a6bb88d 100644 --- a/stacks/monitoring/prometheus-service-monitors.yaml +++ b/stacks/monitoring/prometheus-service-monitors.yaml @@ -236,7 +236,7 @@ spec: any: true selector: matchLabels: - stackable.tech/vendor: Stackable + # stackable.tech/vendor: Stackable # This is not always set, e.g. missing in the nifi-kafka-druid-water-level-data demo app: minio monitoring: "true" endpoints: @@ -256,7 +256,7 @@ spec: any: true selector: matchLabels: - stackable.tech/vendor: Stackable + # stackable.tech/vendor: Stackable # This is not always set, e.g. missing in the nifi-kafka-druid-water-level-data demo app: minio monitoring: "true" endpoints: From 71be4a3fc63fc559e5fd609d03a55088fb60dd53 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Thu, 24 Jul 2025 10:18:10 +0200 Subject: [PATCH 13/17] typos --- stacks/monitoring/create-prometheus-tls-certificate.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stacks/monitoring/create-prometheus-tls-certificate.yaml b/stacks/monitoring/create-prometheus-tls-certificate.yaml index b69505c8..a8af0c02 100644 --- a/stacks/monitoring/create-prometheus-tls-certificate.yaml +++ b/stacks/monitoring/create-prometheus-tls-certificate.yaml @@ -57,8 +57,8 @@ spec: volumeClaimTemplate: metadata: annotations: - # I assume prometheus supports hot-reloading the certificate Secret. - # If not we can try to increase the lifetime here, but watch out that there is an + # I assume Prometheus supports hot-reloading the certificate Secret. + # If not, we can try to increase the lifetime here, but watch out that there is an # upper limit on the SecretClass! # secrets.stackable.tech/backend.autotls.cert.lifetime: "1d" secrets.stackable.tech/class: "tls" From 2ad5dca5af5a375f838e971e8b225bdb6976c2f9 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Fri, 25 Jul 2025 09:26:12 +0200 Subject: [PATCH 14/17] mention cert rotation --- stacks/monitoring/create-prometheus-tls-certificate.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/stacks/monitoring/create-prometheus-tls-certificate.yaml b/stacks/monitoring/create-prometheus-tls-certificate.yaml index a8af0c02..faecd89b 100644 --- a/stacks/monitoring/create-prometheus-tls-certificate.yaml +++ b/stacks/monitoring/create-prometheus-tls-certificate.yaml @@ -57,9 +57,9 @@ spec: volumeClaimTemplate: metadata: annotations: - # I assume Prometheus supports hot-reloading the certificate Secret. - # If not, we can try to increase the lifetime here, but watch out that there is an - # upper limit on the SecretClass! + # Highly professional tests have shown that Prometheus is able to handle the + # certificate rotation :) + # You can change the certificate lifetime here for easier testing: # secrets.stackable.tech/backend.autotls.cert.lifetime: "1d" secrets.stackable.tech/class: "tls" secrets.stackable.tech/format: "tls-pem" From 8a8b39138d695684b1e3b22b44db45685f703afd Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Fri, 25 Jul 2025 09:26:53 +0200 Subject: [PATCH 15/17] change links --- stacks/stacks-v2.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/stacks/stacks-v2.yaml b/stacks/stacks-v2.yaml index c731414c..9dd32287 100644 --- a/stacks/stacks-v2.yaml +++ b/stacks/stacks-v2.yaml @@ -12,11 +12,11 @@ stacks: - prometheus - grafana manifests: - - plainYaml: stacks/monitoring/grafana-dashboards.yaml - - plainYaml: stacks/monitoring/create-prometheus-tls-certificate-serviceaccount.yaml - - plainYaml: stacks/monitoring/create-prometheus-tls-certificate.yaml - - helmChart: stacks/monitoring/prometheus.yaml - - plainYaml: stacks/monitoring/prometheus-service-monitors.yaml + - plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/stacks/monitoring/grafana-dashboards.yaml + - plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/stacks/monitoring/create-prometheus-tls-certificate-serviceaccount.yaml + - plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/stacks/monitoring/create-prometheus-tls-certificate.yaml + - helmChart: https://raw.githubusercontent.com/stackabletech/demos/main/stacks/monitoring/prometheus.yaml + - plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/stacks/monitoring/prometheus-service-monitors.yaml supportedNamespaces: [] resourceRequests: cpu: 1750m From 71e8ab12af393c7b6c54b932c9e172fed3d131b9 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Fri, 25 Jul 2025 09:28:55 +0200 Subject: [PATCH 16/17] give nifi dashboard a different id --- stacks/monitoring/grafana-dashboards.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stacks/monitoring/grafana-dashboards.yaml b/stacks/monitoring/grafana-dashboards.yaml index f682a463..dd8fa0d6 100644 --- a/stacks/monitoring/grafana-dashboards.yaml +++ b/stacks/monitoring/grafana-dashboards.yaml @@ -13141,7 +13141,7 @@ data: "timepicker": {}, "timezone": "", "title": "NiFi", - "uid": "C_8qIX04k", + "uid": "C_8qIX04l", "version": 13 } # {% endraw %} From 6c758505b1eb480dc758743ba481ac26366e17d7 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Fri, 25 Jul 2025 09:45:34 +0200 Subject: [PATCH 17/17] update nifi dashboard --- stacks/monitoring/grafana-dashboards.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stacks/monitoring/grafana-dashboards.yaml b/stacks/monitoring/grafana-dashboards.yaml index dd8fa0d6..21967e5c 100644 --- a/stacks/monitoring/grafana-dashboards.yaml +++ b/stacks/monitoring/grafana-dashboards.yaml @@ -12856,7 +12856,7 @@ data: "placement": "right", "showLegend": true, "sortBy": "Last", - "sortDesc": true + "sortDesc": false }, "tooltip": { "hideZeros": false,