|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188 |
- {%- from "linux/map.jinja" import monitoring with context %}
- metric_collector:
- trigger:
- linux_system_cpu_critical:
- description: 'The CPU usage is too high.'
- severity: critical
- rules:
- - metric: cpu_wait
- relational_operator: '>='
- threshold: 35
- window: 120
- periods: 0
- function: avg
- - metric: cpu_idle
- relational_operator: <=
- threshold: 5
- window: 120
- function: avg
- linux_system_cpu_warning:
- description: 'The CPU wait times are high.'
- severity: warning
- rules:
- - metric: cpu_wait
- relational_operator: '>='
- threshold: 15
- window: 120
- periods: 0
- function: avg
- linux_system_swap_usage_critical:
- description: 'There is no more swap free space'
- severity: critical
- rules:
- - metric: swap_free
- relational_operator: '=='
- threshold: 0
- window: 60
- periods: 0
- function: max
- linux_system_swap_activity_warning:
- description: 'The swap activity is high'
- severity: warning
- rules:
- - metric: swap_io_in
- relational_operator: '>='
- threshold: 1048576 # 1 Mb/s
- window: 120
- periods: 0
- function: avg
- - metric: swap_io_out
- relational_operator: '>='
- threshold: 1048576 # 1 Mb/s
- window: 120
- periods: 0
- function: avg
- linux_system_swap_usage_warning:
- description: 'The swap free space is low'
- severity: warning
- rules:
- - metric: swap_percent_used
- relational_operator: '>='
- threshold: 0.8
- window: 60
- periods: 0
- function: avg
- linux_system_root_fs_warning:
- description: "The root filesystem's free space is low"
- severity: warning
- rules:
- - metric: fs_space_percent_free
- field:
- fs: '/'
- relational_operator: '<'
- threshold: 10
- window: 60
- periods: 0
- function: min
- linux_system_root_fs_critical:
- description: "The root filesystem's free space is too low"
- severity: critical
- rules:
- - metric: fs_space_percent_free
- field:
- fs: '/'
- relational_operator: '<'
- threshold: 5
- window: 60
- periods: 0
- function: min
- linux_system_network_warning_dropped_rx:
- description: 'Some received packets have been dropped'
- severity: warning
- rules:
- - metric: if_dropped_rx
- relational_operator: '>'
- threshold: 100
- window: 60
- periods: 0
- function: avg
- linux_system_network_critical_dropped_rx:
- description: 'Too many received packets have been dropped'
- severity: critical
- rules:
- - metric: if_dropped_rx
- relational_operator: '>'
- threshold: 1000
- window: 60
- periods: 0
- function: avg
- linux_system_network_warning_dropped_tx:
- description: 'Some transmitted packets have been dropped'
- severity: warning
- rules:
- - metric: if_dropped_tx
- relational_operator: '>'
- threshold: 100
- window: 60
- periods: 0
- function: avg
- linux_system_network_critical_dropped_tx:
- description: 'Too many transmitted packets have been dropped'
- severity: critical
- rules:
- - metric: if_dropped_tx
- relational_operator: '>'
- threshold: 1000
- function: avg
- window: 60
- linux_system_hdd_errors_critical:
- description: 'Errors on hard drive(s) have been detected'
- severity: critical
- no_data_policy: okay
- rules:
- - metric: hdd_errors_rate
- group_by: [device]
- relational_operator: '>'
- threshold: 0
- window: 60
- periods: 0
- function: max
- {%- if monitoring.bond_status.interfaces is defined and monitoring.bond_status.interfaces is list %}
- linux_bond_status_critical:
- description: Bond members are down.
- rules:
- - function: last
- metric: bond_status_links_down
- periods: 0
- relational_operator: '>'
- threshold: 0
- window: 120
- severity: critical
- {%- endif %}
- alarm:
- linux_system_cpu:
- alerting: enabled
- triggers:
- - linux_system_cpu_warning
- - linux_system_cpu_critical
- linux_system_swap:
- alerting: enabled
- triggers:
- - linux_system_swap_usage_critical
- - linux_system_swap_activity_warning
- - linux_system_swap_usage_warning
- linux_system_root_fs:
- alerting: enabled
- triggers:
- - linux_system_root_fs_critical
- - linux_system_root_fs_warning
- linux_system_network_rx:
- alerting: enabled
- triggers:
- - linux_system_network_critical_dropped_rx
- - linux_system_network_warning_dropped_rx
- linux_system_network_tx:
- alerting: enabled
- triggers:
- - linux_system_network_critical_dropped_tx
- - linux_system_network_warning_dropped_tx
- linux_system_hdd_errors:
- alerting: enabled_with_notification
- triggers:
- - linux_system_hdd_errors_critical
- {%- if monitoring.bond_status.interfaces is defined and monitoring.bond_status.interfaces is list %}
- linux_bond_status:
- alerting: enabled
- triggers:
- - linux_bond_status_critical
- {%- endif %}
|