Browse Source

Merge pull request #39 from tcpcloud/stacklight

Stacklight
tags/2016.12
Filip Pytloun 8 years ago
parent
commit
45236b54cd
13 changed files with 4182 additions and 38 deletions
  1. +2
    -0
      README.rst
  2. +17
    -0
      _modules/linux_netlink.py
  3. +4
    -0
      linux/files/collectd_df.conf
  4. +10
    -0
      linux/files/collectd_netlink.conf
  5. +3928
    -0
      linux/files/grafana_dashboards/system_influxdb.json
  6. +1
    -1
      linux/map.jinja
  7. +9
    -14
      linux/meta/collectd.yml
  8. +4
    -0
      linux/meta/grafana.yml
  9. +198
    -19
      linux/meta/heka.yml
  10. +3
    -1
      linux/network/host.sls
  11. +1
    -0
      linux/network/interface.sls
  12. +2
    -2
      linux/storage/mount.sls
  13. +3
    -1
      metadata/service/support.yml

+ 2
- 0
README.rst View File

@@ -558,6 +558,7 @@ Linux with mounted Samba
enabled: true
mount:
samba1:
- enabled: true
- path: /media/myuser/public/
- device: //192.168.0.1/storage
- file_system: cifs
@@ -599,6 +600,7 @@ LVM group `vg1` with one device and `data` volume mounted into `/mnt/data`
storage:
mount:
data:
enabled: true
device: /dev/vg1/data
file_system: ext4
path: /mnt/data

+ 17
- 0
_modules/linux_netlink.py View File

@@ -0,0 +1,17 @@
# -*- coding: utf-8 -*-

import re

_alphanum_re = re.compile(r'^[a-z0-9]+$')
_lo_re = re.compile(r'^lo$')


def _filter(interface):
return _alphanum_re.match(interface) and not _lo_re.match(interface)


def ls():
"""
Provide a list of network interfaces.
"""
return filter(_filter, __salt__['grains.get']('ip_interfaces', {}).keys())

+ 4
- 0
linux/files/collectd_df.conf View File

@@ -7,4 +7,8 @@
FSType {{ fs_type }}
{%- endfor %}
IgnoreSelected {{ plugin.get('ignore_selected', False)|lower }}
ReportByDevice false
ReportInodes true
ValuesAbsolute true
ValuesPercentage true
</Plugin>

+ 10
- 0
linux/files/collectd_netlink.conf View File

@@ -0,0 +1,10 @@
<LoadPlugin netlink>
Globals false
</LoadPlugin>

<Plugin netlink>
{%- for interface_name in plugin.get('interfaces', []) %}
VerboseInterface "{{ interface_name }}"
{%- endfor %}
IgnoreSelected {{ plugin.get('ignore_selected', False)|lower }}
</Plugin>

+ 3928
- 0
linux/files/grafana_dashboards/system_influxdb.json
File diff suppressed because it is too large
View File


+ 1
- 1
linux/map.jinja View File

@@ -15,7 +15,7 @@
'doc_validity_pkgs': ['python-yaml'],
},
'Debian': {
'pkgs': ['python-apt','vim-nox', 'apt-transport-https'],
'pkgs': ['python-apt','vim-nox', 'apt-transport-https', 'libmnl0'],
'utc': true,
'user': {},
'group': {},

+ 9
- 14
linux/meta/collectd.yml View File

@@ -1,31 +1,28 @@
plugin:
linux_network_interface:
plugin: interface
execution: local
local_plugin:
linux_network_netlink:
plugin: netlink
template: linux/files/collectd_netlink.conf
ignore_selected: false
interfaces:
{%- for interface_name in salt['linux_netlink.ls']() %}
- {{ interface_name }}
{%- endfor %}
linux_system_cpu:
plugin: cpu
execution: local
linux_system_entropy:
plugin: entropy
execution: local
linux_system_load:
plugin: load
execution: local
linux_system_contextswitch:
plugin: contextswitch
execution: local
linux_system_memory:
plugin: memory
execution: local
linux_system_uptime:
plugin: uptime
execution: local
linux_system_users:
plugin: users
execution: local
linux_storage_df:
plugin: df
execution: local
template: linux/files/collectd_df.conf
ignore_selected: True
fs_types:
@@ -39,9 +36,7 @@ plugin:
- cgroup
linux_storage_disk:
plugin: disk
execution: local
template: linux/files/collectd_disk.conf
ignore_selected: True
linux_storage_swap:
plugin: swap
execution: local

+ 4
- 0
linux/meta/grafana.yml View File

@@ -0,0 +1,4 @@
dashboard:
linux:
format: json
template: linux/files/grafana_dashboards/system_influxdb.json

+ 198
- 19
linux/meta/heka.yml View File

@@ -1,19 +1,198 @@
input:
linux_rsyslog_syslog:
engine: logstreamer
log_directory: /var/log
file_match: syslog\.?(?P<Index>\d+)?(.gz)?
priority: ["^Index"]
decoder: RsyslogDecoder
linux_rsyslog_auth:
engine: logstreamer
log_directory: /var/log
file_match: auth\.log\.?(?P<Index>\d+)?(.gz)?
priority: ["^Index"]
decoder: RsyslogDecoder
linux_rsyslog_kern:
engine: logstreamer
log_directory: /var/log
file_match: kern\.log\.?(?P<Index>\d+)?(.gz)?
priority: ["^Index"]
decoder: RsyslogDecoder
log_collector:
decoder:
system:
engine: sandbox
module_file: /usr/share/lma_collector/decoders/generic_syslog.lua
module_dir: /usr/share/lma_collector/common;/usr/share/heka/lua_modules
adjust_timezone: true
config:
syslog_pattern: '%TIMESTAMP% %HOSTNAME% %syslogtag%%msg:::sp-if-no-1st-sp%%msg%\n'
input:
linux_log_stream:
engine: logstreamer
log_directory: "/var/log"
file_match: '(?P<Service>daemon\.log|cron\.log|haproxy\.log|kern\.log|auth\.log|syslog|messages|debug)'
differentiator: [ 'system.', 'Service' ]
decoder: "system_decoder"
splitter: "TokenSplitter"
filter:
linux_hdd_errors:
engine: sandbox
module_file: /usr/share/lma_collector/filters/hdd_errors_counter.lua
module_dir: /usr/share/lma_collector/common;/usr/share/heka/lua_modules
preserve_data: false
message_matcher: "Type == 'log' && Logger == 'system.kern'"
ticker_interval: 10
config:
grace_interval: 10
patterns: "/error%s.+([sv]d[a-z][a-z]?)%d?/ /([sv]d[a-z][a-z]?)%d?.+%serror/"
hostname: '{{ grains.host }}'
metric_collector:
trigger:
linux_system_cpu_critical:
description: 'The CPU usage is too high.'
severity: critical
rules:
- metric: cpu_wait
relational_operator: '>='
threshold: 35
window: 120
periods: 0
function: avg
- metric: cpu_idle
relational_operator: <=
threshold: 5
window: 120
function: avg
linux_system_cpu_warning:
description: 'The CPU wait times are high.'
severity: critical
rules:
- metric: cpu_wait
relational_operator: '>='
threshold: 15
window: 120
periods: 0
function: avg
linux_system_swap_usage_critical:
description: 'There is no more swap free space'
severity: critical
rules:
- metric: swap_free
relational_operator: '=='
threshold: 0
window: 60
periods: 0
function: max
linux_system_swap_activity_warning:
description: 'The swap activity is high'
severity: warning
rules:
- metric: swap_io_in
relational_operator: '>='
threshold: 1048576 # 1 Mb/s
window: 120
periods: 0
function: avg
- metric: swap_io_out
relational_operator: '>='
threshold: 1048576 # 1 Mb/s
window: 120
periods: 0
function: avg
linux_system_swap_usage_warning:
description: 'The swap free space is low'
severity: warning
rules:
- metric: swap_percent_used
relational_operator: '>='
threshold: 0.8
window: 60
periods: 0
function: avg
linux_system_root_fs_warning:
description: "The root filesystem's free space is low"
severity: warning
rules:
- metric: fs_space_percent_free
field:
fs: '/'
relational_operator: '<'
threshold: 10
window: 60
periods: 0
function: min
linux_system_root_fs_critical:
description: "The root filesystem's free space is too low"
severity: critical
rules:
- metric: fs_space_percent_free
field:
fs: '/'
relational_operator: '<'
threshold: 5
window: 60
periods: 0
function: min
linux_system_network_warning_dropped_rx:
description: 'Some received packets have been dropped'
severity: warning
rules:
- metric: if_dropped_rx
relational_operator: '>'
threshold: 100
window: 60
periods: 0
function: avg
linux_system_network_critical_dropped_rx:
description: 'Too many received packets have been dropped'
severity: critical
rules:
- metric: if_dropped_rx
relational_operator: '>'
threshold: 1000
window: 60
periods: 0
function: avg
linux_system_network_warning_dropped_tx:
description: 'Some transmitted packets have been dropped'
severity: warning
rules:
- metric: if_dropped_tx
relational_operator: '>'
threshold: 100
window: 60
periods: 0
function: avg
linux_system_network_critical_dropped_tx:
description: 'Too many transmitted packets have been dropped'
severity: critical
rules:
- metric: if_dropped_tx
relational_operator: '>'
threshold: 1000
function: avg
window: 60
linux_system_hdd_errors_critical:
description: 'Errors on hard drive(s) have been detected'
severity: critical
no_data_policy: okay
rules:
- metric: hdd_errors_rate
group_by: [device]
relational_operator: '>'
threshold: 0
window: 60
periods: 0
function: max
alarm:
linux_system_cpu:
alerting: enabled
triggers:
- linux_system_cpu_warning
- linux_system_cpu_critical
linux_system_swap:
alerting: enabled
triggers:
- linux_system_swap_usage_critical
- linux_system_swap_activity_warning
- linux_system_swap_usage_warning
linux_system_root_fs:
alerting: enabled
triggers:
- linux_system_root_fs_critical
- linux_system_root_fs_warning
linux_system_network_rx:
alerting: enabled
triggers:
- linux_system_network_critical_dropped_rx
- linux_system_network_warning_dropped_rx
linux_system_network_tx:
alerting: enabled
triggers:
- linux_system_network_critical_dropped_tx
- linux_system_network_warning_dropped_tx
linux_system_hdd_errors:
alerting: enabled_with_notification
triggers:
- linux_system_hdd_errors_critical

+ 3
- 1
linux/network/host.sls View File

@@ -28,6 +28,8 @@ linux_host_{{ name }}_order_fix:
- repl: {{ after }}
- watch:
- host: linux_host_{{ name }}
- onlyif:
- grep -q "{{ before }}" /etc/hosts

{%- endif %}

@@ -35,4 +37,4 @@ linux_host_{{ name }}_order_fix:

{%- endfor %}

{%- endif %}
{%- endif %}

+ 1
- 0
linux/network/interface.sls View File

@@ -89,6 +89,7 @@ linux_interface_{{ interface_name }}:
- type: {{ interface.type }}
{%- if interface.address is defined %}
{%- if grains.os_family == 'Debian' %}
- unless: grep -q "iface {{ interface_name }} " /etc/network/interfaces
- proto: {{ interface.get('proto', 'static') }}
{% endif %}
{%- if grains.os_family == 'RedHat' %}

+ 2
- 2
linux/storage/mount.sls View File

@@ -5,7 +5,7 @@

{%- if mount.enabled %}

{%- if not mount.file_system in ['nfs', 'nfs4', 'cifs'] %}
{%- if not mount.file_system in ['nfs', 'nfs4', 'cifs', 'tmpfs'] %}

mkfs_{{ mount.device}}:
cmd.run:
@@ -50,4 +50,4 @@ xfs_packages_{{ mount.device }}:

{%- endfor %}

{%- endif %}
{%- endif %}

+ 3
- 1
metadata/service/support.yml View File

@@ -4,8 +4,10 @@ parameters:
collectd:
enabled: true
heka:
enabled: false
enabled: true
sensu:
enabled: true
sphinx:
enabled: true
grafana:
enabled: true

Loading…
Cancel
Save