Explorar el Código

Alerts rationalization for Galera

Change-Id: I65eadcf2d54576dce7cb59552d1bb7421d4ab6d6
Closes-Bug: PROD-19880
master
Mateusz Matuszkowiak hace 6 años
padre
commit
cb2bb079d6
Se han modificado 1 ficheros con 30 adiciones y 13 borrados
  1. +30
    -13
      galera/meta/prometheus.yml

+ 30
- 13
galera/meta/prometheus.yml Ver fichero

@@ -1,31 +1,48 @@
{% raw %}
server:
alert:
GaleraServiceDown:
if: >-
mysql_up != 1
{%- raw %}
labels:
severity: warning
severity: minor
service: mysql
annotations:
summary: 'Galera service down'
description: 'Galera service is down on node {{ $labels.host }}'
summary: "Galera service is down"
description: "The Galera service on the {{ $labels.host }} node is down."
{%- endraw %}
GaleraServiceOutage:
if: >-
count(label_replace(mysql_up, "cluster", "$1", "host", "([^0-9]+).+")) by (cluster) == count(label_replace(mysql_up == 0, "cluster", "$1", "host", "([^0-9]+).+")) by (cluster)
{%- raw %}
labels:
severity: critical
service: mysql
annotations:
summary: "Galera service outage"
description: "All Galera services within the {{ $labels.cluster }} cluster are down."
{% endraw %}
GaleraNodeNotReady:
if: 'mysql_wsrep_ready != 1'
if: >-
mysql_wsrep_ready != 1
{%- raw %}
for: 1m
labels:
severity: warning
severity: major
service: mysql
annotations:
summary: 'Galera on {{ $labels.host }} not ready'
description: 'The Galera service on {{ $labels.host }} is not ready to serve queries.'
summary: "Galera service is not ready"
description: "The Galera service on the {{ $labels.host }} node is not ready to serve queries for at least 1 minute."
{%- endraw %}
GaleraNodeNotConnected:
if: 'mysql_wsrep_connected != 1'
if: >-
mysql_wsrep_connected != 1
{%- raw %}
for: 1m
labels:
severity: warning
severity: major
service: mysql
annotations:
summary: 'Galera on {{ $labels.host }} not connected'
description: 'The Galera service on {{ $labels.host }} is not connected to the cluster.'
{% endraw %}
summary: "Galera service is not connected"
description: "The Galera service on the {{ $labels.host }} node is not connected to the cluster for at least 1 minute."
{%- endraw %}

Cargando…
Cancelar
Guardar