From 7c73a022059574ac0be1a5deb269d6ea32ccd1dd Mon Sep 17 00:00:00 2001 From: j_r0dd <285797+jr0dd@users.noreply.github.com> Date: Mon, 13 Sep 2021 10:14:11 -0400 Subject: [PATCH] [network-ups-tools] metrics refactor (#1178) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [network-ups-tools] metrics refactor * add template to rules * Update charts/stable/network-ups-tools/templates/prometheusrules.yaml Co-authored-by: ᗪєνιη ᗷυнʟ * Update charts/stable/network-ups-tools/templates/prometheusrules.yaml Co-authored-by: ᗪєνιη ᗷυнʟ * Update charts/stable/network-ups-tools/templates/prometheusrules.yaml Co-authored-by: ᗪєνιη ᗷυнʟ * Update charts/stable/network-ups-tools/templates/prometheusrules.yaml Co-authored-by: ᗪєνιη ᗷυнʟ Co-authored-by: ᗪєνιη ᗷυнʟ --- charts/stable/network-ups-tools/Chart.yaml | 2 +- charts/stable/network-ups-tools/README.md | 29 ++++++---- .../README_CHANGELOG.md.gotmpl | 7 +++ .../network-ups-tools/templates/common.yaml | 14 ++--- .../templates/prometheusrules.yaml | 55 +++++++++++++++++++ .../templates/servicemonitor.yaml | 8 +-- charts/stable/network-ups-tools/values.yaml | 44 ++++++++++----- 7 files changed, 124 insertions(+), 35 deletions(-) create mode 100644 charts/stable/network-ups-tools/templates/prometheusrules.yaml diff --git a/charts/stable/network-ups-tools/Chart.yaml b/charts/stable/network-ups-tools/Chart.yaml index 7ac20f45..1f403350 100644 --- a/charts/stable/network-ups-tools/Chart.yaml +++ b/charts/stable/network-ups-tools/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 appVersion: v2.7.4-2479-g86a32237 description: Network UPS Tools is a collection of programs which provide a common interface for monitoring and administering UPS, PDU and SCD hardware. name: network-ups-tools -version: 5.1.0 +version: 6.0.0 kubeVersion: ">=1.16.0-0" keywords: - nut diff --git a/charts/stable/network-ups-tools/README.md b/charts/stable/network-ups-tools/README.md index cd1f8c07..b12a12c9 100644 --- a/charts/stable/network-ups-tools/README.md +++ b/charts/stable/network-ups-tools/README.md @@ -1,6 +1,6 @@ # network-ups-tools -![Version: 5.1.0](https://img.shields.io/badge/Version-5.1.0-informational?style=flat-square) ![AppVersion: v2.7.4-2479-g86a32237](https://img.shields.io/badge/AppVersion-v2.7.4--2479--g86a32237-informational?style=flat-square) +![Version: 6.0.0](https://img.shields.io/badge/Version-6.0.0-informational?style=flat-square) ![AppVersion: v2.7.4-2479-g86a32237](https://img.shields.io/badge/AppVersion-v2.7.4--2479--g86a32237-informational?style=flat-square) Network UPS Tools is a collection of programs which provide a common interface for monitoring and administering UPS, PDU and SCD hardware. @@ -79,19 +79,21 @@ N/A | config.mode | string | `"values"` | If set to 'values', the configuration will be read from these values. Otherwise you have to mount a volume to /etc/nut containing the configuration files. | | env | object | See below | environment variables. | | env.TZ | string | `"UTC"` | Set the container timezone | -| exporter.enabled | bool | See values.yaml | Enable and configure prometheus-nut-exporter sidecar and Prometheus serviceMonitor. | -| exporter.env.logLevel | string | `"info"` | log level [info|debug|trace] | -| exporter.env.port | int | `9995` | metrics port | -| exporter.image.pullPolicy | string | `"IfNotPresent"` | image pull policy | -| exporter.image.repository | string | `"hon95/prometheus-nut-exporter"` | image repository | -| exporter.image.tag | string | `"1.1.1"` | image tag | -| exporter.serviceMonitor.interval | string | `"30s"` | | -| exporter.serviceMonitor.labels | object | `{}` | | -| exporter.serviceMonitor.scrapeTimeout | string | `"10s"` | | | image.pullPolicy | string | `"IfNotPresent"` | image pull policy | | image.repository | string | `"ghcr.io/k8s-at-home/network-ups-tools"` | image repository | | image.tag | string | `"v2.7.4-2479-g86a32237"` | image tag | | ingress.main | object | See values.yaml | Enable and configure ingress settings for the chart under this key. | +| metrics.enabled | bool | See values.yaml | Enable and configure prometheus-nut-exporter sidecar and Prometheus serviceMonitor. | +| metrics.exporter.env.logLevel | string | `"info"` | log level [info|debug|trace] | +| metrics.exporter.env.port | int | `9995` | metrics port | +| metrics.exporter.image.pullPolicy | string | `"IfNotPresent"` | image pull policy | +| metrics.exporter.image.repository | string | `"hon95/prometheus-nut-exporter"` | image repository | +| metrics.exporter.image.tag | string | `"1.1.1"` | image tag | +| metrics.prometheusRule | object | See values.yaml | Enable and configure Prometheus Rules for the chart under this key. | +| metrics.prometheusRule.rules | list | See prometheusrules.yaml | Configure additionial rules for the chart under this key. | +| metrics.serviceMonitor.interval | string | `"30s"` | | +| metrics.serviceMonitor.labels | object | `{}` | | +| metrics.serviceMonitor.scrapeTimeout | string | `"10s"` | | | persistence | object | See values.yaml | Configure persistence settings for the chart under this key. | | persistence.ups.hostPath | string | `"/dev/bus/usb/001/001"` | The path of the usb ups on the host. | | persistence.ups.mountPath | string | `"/dev/bus/usb/001/001"` | Optional path for ups to mount in the container. | @@ -104,6 +106,12 @@ All notable changes to this application Helm chart will be documented in this fi The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +### [6.0.0] + +#### Changed + +- **BREAKING**: Refactored Prometheus metrics section to add rules. Enabling metrics automatically enables the serviceMonitor and exporter sidecar. + ### [5.1.0] ### Added @@ -131,6 +139,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Initial version +[6.0.0]: #600 [5.1.0]: #510 [5.0.0]: #500 [4.0.0]: #400 diff --git a/charts/stable/network-ups-tools/README_CHANGELOG.md.gotmpl b/charts/stable/network-ups-tools/README_CHANGELOG.md.gotmpl index f0460d68..13fdec7d 100644 --- a/charts/stable/network-ups-tools/README_CHANGELOG.md.gotmpl +++ b/charts/stable/network-ups-tools/README_CHANGELOG.md.gotmpl @@ -9,6 +9,12 @@ All notable changes to this application Helm chart will be documented in this fi The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +### [6.0.0] + +#### Changed + +- **BREAKING**: Refactored Prometheus metrics section to add rules. Enabling metrics automatically enables the serviceMonitor and exporter sidecar. + ### [5.1.0] ### Added @@ -36,6 +42,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Initial version +[6.0.0]: #600 [5.1.0]: #510 [5.0.0]: #500 [4.0.0]: #400 diff --git a/charts/stable/network-ups-tools/templates/common.yaml b/charts/stable/network-ups-tools/templates/common.yaml index 583d52f8..2a165913 100644 --- a/charts/stable/network-ups-tools/templates/common.yaml +++ b/charts/stable/network-ups-tools/templates/common.yaml @@ -15,22 +15,22 @@ persistence: defaultMode: 256 optional: true -{{ if .Values.exporter.enabled }} +{{ if .Values.metrics.enabled }} additionalContainers: exporter: name: exporter - image: "{{ .Values.exporter.image.repository }}:{{ .Values.exporter.image.tag }}" - imagePullPolicy: {{ .Values.exporter.image.pullPolicy }} + image: "{{ .Values.metrics.exporter.image.repository }}:{{ .Values.metrics.exporter.image.tag }}" + imagePullPolicy: {{ .Values.metrics.exporter.image.pullPolicy }} env: - name: HTTP_PORT - value: "{{ .Values.exporter.env.port }}" + value: "{{ .Values.metrics.exporter.env.port }}" - name: HTTP_PATH value: "/metrics" - name: RUST_LOG - value: "{{ .Values.exporter.env.logLevel }}" + value: "{{ .Values.metrics.exporter.env.logLevel }}" ports: - name: metrics - containerPort: {{ .Values.exporter.env.port }} + containerPort: {{ .Values.metrics.exporter.env.port }} service: metrics: @@ -39,7 +39,7 @@ service: metrics: enabled: true protocol: TCP - port: {{ .Values.exporter.env.port }} + port: {{ .Values.metrics.exporter.env.port }} {{ end }} {{- end -}} {{- $_ := mergeOverwrite .Values (include "network-ups-tools.harcodedValues" . | fromYaml) -}} diff --git a/charts/stable/network-ups-tools/templates/prometheusrules.yaml b/charts/stable/network-ups-tools/templates/prometheusrules.yaml new file mode 100644 index 00000000..b5c561aa --- /dev/null +++ b/charts/stable/network-ups-tools/templates/prometheusrules.yaml @@ -0,0 +1,55 @@ +{{- if and .Values.metrics.enabled .Values.metrics.prometheusRule.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ include "common.names.fullname" . }} + labels: + {{- include "common.labels" . | nindent 4 }} + {{- with .Values.metrics.prometheusRule.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + groups: + - name: {{ include "common.names.fullname" . }} + rules: + - alert: NutExporterAbsent + annotations: + description: NUT Exporter has disappeared from Prometheus target discovery. + summary: NUT Exporter is down. + expr: | + absent(up{job=~".*{{ include "common.names.fullname" . }}.*"} == 1) + for: 5m + labels: + severity: critical + - alert: UpsStatusUnknown + annotations: + description: UPS {{ "{{ $labels.ups }}" }} is reporting a status of unknown. + summary: UPS status unknown. + expr: | + nut_status{job=~".*{{ include "common.names.fullname" . }}.*"} == 0 + for: 10s + labels: + severity: critical + - alert: UpsOnBattery + annotations: + description: UPS {{ "{{ $labels.ups }}" }} has lost power and is running on battery. + summary: UPS is running on battery. + expr: | + nut_status{job=~".*{{ include "common.names.fullname" . }}.*"} == 2 + for: 10s + labels: + severity: critical + - alert: UpsLowBattery + annotations: + description: UPS {{ "{{ $labels.ups }}" }} battery is low and the system is getting + ready to shutdown. + summary: UPS battery is low. + expr: | + nut_status{job=~".*{{ include "common.names.fullname" . }}.*"} == 3 + for: 5s + labels: + severity: critical + {{- with .Values.metrics.prometheusRule.rules }} + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/charts/stable/network-ups-tools/templates/servicemonitor.yaml b/charts/stable/network-ups-tools/templates/servicemonitor.yaml index 61a49c12..c8110015 100644 --- a/charts/stable/network-ups-tools/templates/servicemonitor.yaml +++ b/charts/stable/network-ups-tools/templates/servicemonitor.yaml @@ -1,11 +1,11 @@ -{{- if .Values.exporter.enabled }} +{{- if .Values.metrics.enabled }} apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: name: {{ template "common.names.fullname" . }} labels: {{- include "common.labels" . | nindent 4 }} - {{- with .Values.exporter.serviceMonitor.labels }} + {{- with .Values.metrics.serviceMonitor.labels }} {{- toYaml . | nindent 4 }} {{- end }} spec: @@ -14,10 +14,10 @@ spec: {{- include "common.labels.selectorLabels" . | nindent 6 }} endpoints: - port: metrics - {{- with .Values.exporter.serviceMonitor.interval }} + {{- with .Values.metrics.serviceMonitor.interval }} interval: {{ . }} {{- end }} - {{- with .Values.exporter.serviceMonitor.scrapeTimeout }} + {{- with .Values.metrics.serviceMonitor.scrapeTimeout }} scrapeTimeout: {{ . }} {{- end }} path: /metrics diff --git a/charts/stable/network-ups-tools/values.yaml b/charts/stable/network-ups-tools/values.yaml index 53a2372b..3ce72b87 100644 --- a/charts/stable/network-ups-tools/values.yaml +++ b/charts/stable/network-ups-tools/values.yaml @@ -74,7 +74,7 @@ config: port = dummy-ups.dev desc = "dummy-ups in dummy mode" -exporter: +metrics: # -- Enable and configure prometheus-nut-exporter sidecar and Prometheus serviceMonitor. # @default -- See values.yaml enabled: false @@ -82,15 +82,33 @@ exporter: interval: 30s scrapeTimeout: 10s labels: {} - image: - # -- image repository - repository: hon95/prometheus-nut-exporter - # -- image tag - tag: 1.1.1 - # -- image pull policy - pullPolicy: IfNotPresent - env: - # -- metrics port - port: 9995 - # -- log level [info|debug|trace] - logLevel: info + # -- Enable and configure Prometheus Rules for the chart under this key. + # @default -- See values.yaml + prometheusRule: + enabled: false + labels: {} + # -- Configure additionial rules for the chart under this key. + # @default -- See prometheusrules.yaml + rules: [] + # - alert: UpsStatusUnknown + # annotations: + # description: UPS {{ "{{ $labels.ups }}" }} is reporting a status of unknown. + # summary: UPS status unknown. + # expr: | + # nut_status == 0 + # for: 10s + # labels: + # severity: critical + exporter: + image: + # -- image repository + repository: hon95/prometheus-nut-exporter + # -- image tag + tag: 1.1.1 + # -- image pull policy + pullPolicy: IfNotPresent + env: + # -- metrics port + port: 9995 + # -- log level [info|debug|trace] + logLevel: info