log_collector: decoder: system: engine: sandbox module_file: /usr/share/lma_collector/decoders/generic_syslog.lua module_dir: /usr/share/lma_collector/common;/usr/share/heka/lua_modules adjust_timezone: true config: syslog_pattern: '%TIMESTAMP% %HOSTNAME% %syslogtag%%msg:::sp-if-no-1st-sp%%msg%\n' input: linux_log_stream: engine: logstreamer log_directory: "/var/log" file_match: '(?P<Service>daemon\.log|cron\.log|haproxy\.log|kern\.log|auth\.log|syslog|messages|debug)' differentiator: [ 'system.', 'Service' ] decoder: "system_decoder" splitter: "TokenSplitter" filter: linux_hdd_errors: engine: sandbox module_file: /usr/share/lma_collector/filters/hdd_errors_counter.lua module_dir: /usr/share/lma_collector/common;/usr/share/heka/lua_modules preserve_data: false message_matcher: "Type == 'log' && Logger == 'system.kern'" ticker_interval: 10 config: grace_interval: 10 patterns: "/error%s.+([sv]d[a-z][a-z]?)%d?/ /([sv]d[a-z][a-z]?)%d?.+%serror/" hostname: '{{ grains.host }}' metric_collector: trigger: linux_system_cpu_critical: description: 'The CPU usage is too high.' severity: critical rules: - metric: cpu_wait relational_operator: '>=' threshold: 35 window: 120 periods: 0 function: avg - metric: cpu_idle relational_operator: <= threshold: 5 window: 120 function: avg linux_system_cpu_warning: description: 'The CPU wait times are high.' severity: critical rules: - metric: cpu_wait relational_operator: '>=' threshold: 15 window: 120 periods: 0 function: avg linux_system_swap_usage_critical: description: 'There is no more swap free space' severity: critical rules: - metric: swap_free relational_operator: '==' threshold: 0 window: 60 periods: 0 function: max linux_system_swap_activity_warning: description: 'The swap activity is high' severity: warning rules: - metric: swap_io_in relational_operator: '>=' threshold: 1048576 # 1 Mb/s window: 120 periods: 0 function: avg - metric: swap_io_out relational_operator: '>=' threshold: 1048576 # 1 Mb/s window: 120 periods: 0 function: avg linux_system_swap_usage_warning: description: 'The swap free space is low' severity: warning rules: - metric: swap_percent_used relational_operator: '>=' threshold: 0.8 window: 60 periods: 0 function: avg linux_system_root_fs_warning: description: "The root filesystem's free space is low" severity: warning rules: - metric: fs_space_percent_free field: fs: '/' relational_operator: '<' threshold: 10 window: 60 periods: 0 function: min linux_system_root_fs_critical: description: "The root filesystem's free space is too low" severity: critical rules: - metric: fs_space_percent_free field: fs: '/' relational_operator: '<' threshold: 5 window: 60 periods: 0 function: min linux_system_network_warning_dropped_rx: description: 'Some received packets have been dropped' severity: warning rules: - metric: if_dropped_rx relational_operator: '>' threshold: 100 window: 60 periods: 0 function: avg linux_system_network_critical_dropped_rx: description: 'Too many received packets have been dropped' severity: critical rules: - metric: if_dropped_rx relational_operator: '>' threshold: 1000 window: 60 periods: 0 function: avg linux_system_network_warning_dropped_tx: description: 'Some transmitted packets have been dropped' severity: warning rules: - metric: if_dropped_tx relational_operator: '>' threshold: 100 window: 60 periods: 0 function: avg linux_system_network_critical_dropped_tx: description: 'Too many transmitted packets have been dropped' severity: critical rules: - metric: if_dropped_tx relational_operator: '>' threshold: 1000 function: avg window: 60 linux_system_hdd_errors_critical: description: 'Errors on hard drive(s) have been detected' severity: critical no_data_policy: okay rules: - metric: hdd_errors_rate group_by: [device] relational_operator: '>' threshold: 0 window: 60 periods: 0 function: max alarm: linux_system_cpu: alerting: enabled triggers: - linux_system_cpu_warning - linux_system_cpu_critical linux_system_swap: alerting: enabled triggers: - linux_system_swap_usage_critical - linux_system_swap_activity_warning - linux_system_swap_usage_warning linux_system_root_fs: alerting: enabled triggers: - linux_system_root_fs_critical - linux_system_root_fs_warning linux_system_network_rx: alerting: enabled triggers: - linux_system_network_critical_dropped_rx - linux_system_network_warning_dropped_rx linux_system_network_tx: alerting: enabled triggers: - linux_system_network_critical_dropped_tx - linux_system_network_warning_dropped_tx linux_system_hdd_errors: alerting: enabled_with_notification triggers: - linux_system_hdd_errors_critical