Browse Source

Merge "Add variables in prometheus alerts"

pull/92/head
Filip Pytloun 7 years ago
parent
commit
3da8449582
1 changed files with 13 additions and 5 deletions
  1. +13
    -5
      linux/meta/prometheus.yml

+ 13
- 5
linux/meta/prometheus.yml View File

{% raw %}
server: server:
alert: alert:
AvgCPUUsageIdle: AvgCPUUsageIdle:
if: 'avg_over_time(cpu_usage_idle{cpu="cpu-total"}[5m]) < 10'
if: avg_over_time(cpu_usage_idle{cpu="cpu-total"}[5m]) < {{ prometheus_server.get('alert', {}).get('AvgCPUUsageIdle', {}).get('var', {}).get('threshold', 10) }}
{% raw %}
labels: labels:
severity: warning severity: warning
service: system service: system
annotations: annotations:
summary: 'Avarage CPU usage (idle) for node {{ $labels.host }} is low' summary: 'Avarage CPU usage (idle) for node {{ $labels.host }} is low'
description: 'Avarage CPU usage (idle) for node {{ $labels.host }} is low {{ $value }}' description: 'Avarage CPU usage (idle) for node {{ $labels.host }} is low {{ $value }}'
{% endraw %}
PredictLinearDiskFree: PredictLinearDiskFree:
if: 'predict_linear(disk_free[1h], 8*3600) < 0' if: 'predict_linear(disk_free[1h], 8*3600) < 0'
{% raw %}
labels: labels:
severity: warning severity: warning
service: system service: system
annotations: annotations:
summary: 'Disk space ({{ $labels.path }}) is filling on {{ $labels.host }}' summary: 'Disk space ({{ $labels.path }}) is filling on {{ $labels.host }}'
description: 'Disk space ({{ $labels.path }}) will be full in 8h on {{ $labels.host }}' description: 'Disk space ({{ $labels.path }}) will be full in 8h on {{ $labels.host }}'
{% endraw %}
PredictLinearDiskInodesFree: PredictLinearDiskInodesFree:
if: 'predict_linear(disk_inodes_free[1h], 8*3600) < 0' if: 'predict_linear(disk_inodes_free[1h], 8*3600) < 0'
{% raw %}
labels: labels:
severity: warning severity: warning
service: system service: system
annotations: annotations:
summary: 'Disk inodes ({{ $labels.path }}) are filling on {{ $labels.host }}' summary: 'Disk inodes ({{ $labels.path }}) are filling on {{ $labels.host }}'
description: 'Disk inodes ({{ $labels.path }}) will be full in 8h on {{ $labels.host }}' description: 'Disk inodes ({{ $labels.path }}) will be full in 8h on {{ $labels.host }}'
{% endraw %}
AvgMemAvailablePercent: AvgMemAvailablePercent:
if: 'avg_over_time(mem_available_percent[5m]) < 10'
if: avg_over_time(mem_available_percent[5m]) < {{ prometheus_server.get('alert', {}).get('AvgMemAvailablePercent', {}).get('var', {}).get('threshold', 10) }}
{% raw %}
labels: labels:
severity: warning severity: warning
service: system service: system
annotations: annotations:
summary: 'Free memory is low on {{ $labels.host }}' summary: 'Free memory is low on {{ $labels.host }}'
description: 'Free memory percent for node {{ $labels.host }} is low {{ $value }}' description: 'Free memory percent for node {{ $labels.host }} is low {{ $value }}'
{% endraw %}
SystemLoad5: SystemLoad5:
if: 'system_load5 / system_n_cpus > 3'
if: system_load5 / system_n_cpus > {{ prometheus_server.get('alert', {}).get('SystemLoad5', {}).get('var', {}).get('threshold', 3) }}
{% raw %}
labels: labels:
severity: warning severity: warning
service: system service: system
annotations: annotations:
summary: 'High system load (5m) on {{ $labels.host }}' summary: 'High system load (5m) on {{ $labels.host }}'
description: 'High system load (5m) on node {{ $labels.host }}' description: 'High system load (5m) on node {{ $labels.host }}'
{% endraw %}
{% endraw %}

Loading…
Cancel
Save