Browse Source

Merge pull request #33 from elemoine/stacklight-alarm

Add more alarms
tags/2016.12
Éric Lemoine 8 years ago
parent
commit
3e38377ab3
1 changed files with 136 additions and 0 deletions
  1. +136
    -0
      linux/meta/heka.yml

+ 136
- 0
linux/meta/heka.yml View File

@@ -54,9 +54,145 @@ metric_collector:
window: 120
periods: 0
function: avg
linux_system_swap_usage_critical:
description: 'There is no more swap free space'
severity: critical
rules:
- metric: swap_free
relational_operator: '=='
threshold: 0
window: 60
periods: 0
function: max
linux_system_swap_activity_warning:
description: 'The swap activity is high'
severity: warning
rules:
- metric: swap_io_in
relational_operator: '>='
threshold: 1048576 # 1 Mb/s
window: 120
periods: 0
function: avg
- metric: swap_io_out
relational_operator: '>='
threshold: 1048576 # 1 Mb/s
window: 120
periods: 0
function: avg
linux_system_swap_usage_warning:
description: 'The swap free space is low'
severity: warning
rules:
- metric: swap_percent_used
relational_operator: '>='
threshold: 0.8
window: 60
periods: 0
function: avg
linux_system_root_fs_warning:
description: "The root filesystem's free space is low"
severity: warning
rules:
- metric: fs_space_percent_free
field:
fs: '/'
relational_operator: '<'
threshold: 10
window: 60
periods: 0
function: min
linux_system_root_fs_critical:
description: "The root filesystem's free space is too low"
severity: critical
rules:
- metric: fs_space_percent_free
field:
fs: '/'
relational_operator: '<'
threshold: 5
window: 60
periods: 0
function: min
linux_system_network_warning_dropped_rx:
description: 'Some received packets have been dropped'
severity: warning
rules:
- metric: if_dropped_rx
relational_operator: '>'
threshold: 100
window: 60
periods: 0
function: avg
linux_system_network_critical_dropped_rx:
description: 'Too many received packets have been dropped'
severity: critical
rules:
- metric: if_dropped_rx
relational_operator: '>'
threshold: 1000
window: 60
periods: 0
function: avg
linux_system_network_warning_dropped_tx:
description: 'Some transmitted packets have been dropped'
severity: warning
rules:
- metric: if_dropped_tx
relational_operator: '>'
threshold: 100
window: 60
periods: 0
function: avg
linux_system_network_critical_dropped_tx:
description: 'Too many transmitted packets have been dropped'
severity: critical
rules:
- metric: if_dropped_tx
relational_operator: '>'
threshold: 1000
function: avg
window: 60
linux_system_hdd_errors_critical:
description: 'Errors on hard drive(s) have been detected'
severity: critical
no_data_policy: okay
rules:
- metric: hdd_errors_rate
group_by: [device]
relational_operator: '>'
threshold: 0
window: 60
periods: 0
function: max
alarm:
linux_system_cpu:
alerting: enabled
triggers:
- linux_system_cpu_warning
- linux_system_cpu_critical
linux_system_swap:
alerting: enabled
triggers:
- linux_system_swap_usage_critical
- linux_system_swap_activity_warning
- linux_system_swap_usage_warning
linux_system_root_fs:
alerting: enabled
triggers:
- linux_system_root_fs_critical
- linux_system_root_fs_warning
linux_system_network_rx:
alerting: enabled
triggers:
- linux_system_network_critical_dropped_rx
- linux_system_network_warning_dropped_rx
linux_system_network_tx:
alerting: enabled
triggers:
- linux_system_network_critical_dropped_tx
- linux_system_network_warning_dropped_tx
linux_system_hdd_errors:
alerting: enabled_with_notification
triggers:
- linux_system_hdd_errors_critical

Loading…
Cancel
Save