Browse Source

Merge pull request #39 from tcpcloud/stacklight

Stacklight
tags/2016.12
Filip Pytloun 8 years ago
parent
commit
45236b54cd
13 changed files with 4182 additions and 38 deletions
  1. +2
    -0
      README.rst
  2. +17
    -0
      _modules/linux_netlink.py
  3. +4
    -0
      linux/files/collectd_df.conf
  4. +10
    -0
      linux/files/collectd_netlink.conf
  5. +3928
    -0
      linux/files/grafana_dashboards/system_influxdb.json
  6. +1
    -1
      linux/map.jinja
  7. +9
    -14
      linux/meta/collectd.yml
  8. +4
    -0
      linux/meta/grafana.yml
  9. +198
    -19
      linux/meta/heka.yml
  10. +3
    -1
      linux/network/host.sls
  11. +1
    -0
      linux/network/interface.sls
  12. +2
    -2
      linux/storage/mount.sls
  13. +3
    -1
      metadata/service/support.yml

+ 2
- 0
README.rst View File

enabled: true enabled: true
mount: mount:
samba1: samba1:
- enabled: true
- path: /media/myuser/public/ - path: /media/myuser/public/
- device: //192.168.0.1/storage - device: //192.168.0.1/storage
- file_system: cifs - file_system: cifs
storage: storage:
mount: mount:
data: data:
enabled: true
device: /dev/vg1/data device: /dev/vg1/data
file_system: ext4 file_system: ext4
path: /mnt/data path: /mnt/data

+ 17
- 0
_modules/linux_netlink.py View File

# -*- coding: utf-8 -*-

import re

_alphanum_re = re.compile(r'^[a-z0-9]+$')
_lo_re = re.compile(r'^lo$')


def _filter(interface):
return _alphanum_re.match(interface) and not _lo_re.match(interface)


def ls():
"""
Provide a list of network interfaces.
"""
return filter(_filter, __salt__['grains.get']('ip_interfaces', {}).keys())

+ 4
- 0
linux/files/collectd_df.conf View File

FSType {{ fs_type }} FSType {{ fs_type }}
{%- endfor %} {%- endfor %}
IgnoreSelected {{ plugin.get('ignore_selected', False)|lower }} IgnoreSelected {{ plugin.get('ignore_selected', False)|lower }}
ReportByDevice false
ReportInodes true
ValuesAbsolute true
ValuesPercentage true
</Plugin> </Plugin>

+ 10
- 0
linux/files/collectd_netlink.conf View File

<LoadPlugin netlink>
Globals false
</LoadPlugin>

<Plugin netlink>
{%- for interface_name in plugin.get('interfaces', []) %}
VerboseInterface "{{ interface_name }}"
{%- endfor %}
IgnoreSelected {{ plugin.get('ignore_selected', False)|lower }}
</Plugin>

+ 3928
- 0
linux/files/grafana_dashboards/system_influxdb.json
File diff suppressed because it is too large
View File


+ 1
- 1
linux/map.jinja View File

'doc_validity_pkgs': ['python-yaml'], 'doc_validity_pkgs': ['python-yaml'],
}, },
'Debian': { 'Debian': {
'pkgs': ['python-apt','vim-nox', 'apt-transport-https'],
'pkgs': ['python-apt','vim-nox', 'apt-transport-https', 'libmnl0'],
'utc': true, 'utc': true,
'user': {}, 'user': {},
'group': {}, 'group': {},

+ 9
- 14
linux/meta/collectd.yml View File

plugin:
linux_network_interface:
plugin: interface
execution: local
local_plugin:
linux_network_netlink:
plugin: netlink
template: linux/files/collectd_netlink.conf
ignore_selected: false
interfaces:
{%- for interface_name in salt['linux_netlink.ls']() %}
- {{ interface_name }}
{%- endfor %}
linux_system_cpu: linux_system_cpu:
plugin: cpu plugin: cpu
execution: local
linux_system_entropy: linux_system_entropy:
plugin: entropy plugin: entropy
execution: local
linux_system_load: linux_system_load:
plugin: load plugin: load
execution: local
linux_system_contextswitch: linux_system_contextswitch:
plugin: contextswitch plugin: contextswitch
execution: local
linux_system_memory: linux_system_memory:
plugin: memory plugin: memory
execution: local
linux_system_uptime: linux_system_uptime:
plugin: uptime plugin: uptime
execution: local
linux_system_users: linux_system_users:
plugin: users plugin: users
execution: local
linux_storage_df: linux_storage_df:
plugin: df plugin: df
execution: local
template: linux/files/collectd_df.conf template: linux/files/collectd_df.conf
ignore_selected: True ignore_selected: True
fs_types: fs_types:
- cgroup - cgroup
linux_storage_disk: linux_storage_disk:
plugin: disk plugin: disk
execution: local
template: linux/files/collectd_disk.conf template: linux/files/collectd_disk.conf
ignore_selected: True ignore_selected: True
linux_storage_swap: linux_storage_swap:
plugin: swap plugin: swap
execution: local

+ 4
- 0
linux/meta/grafana.yml View File

dashboard:
linux:
format: json
template: linux/files/grafana_dashboards/system_influxdb.json

+ 198
- 19
linux/meta/heka.yml View File

input:
linux_rsyslog_syslog:
engine: logstreamer
log_directory: /var/log
file_match: syslog\.?(?P<Index>\d+)?(.gz)?
priority: ["^Index"]
decoder: RsyslogDecoder
linux_rsyslog_auth:
engine: logstreamer
log_directory: /var/log
file_match: auth\.log\.?(?P<Index>\d+)?(.gz)?
priority: ["^Index"]
decoder: RsyslogDecoder
linux_rsyslog_kern:
engine: logstreamer
log_directory: /var/log
file_match: kern\.log\.?(?P<Index>\d+)?(.gz)?
priority: ["^Index"]
decoder: RsyslogDecoder
log_collector:
decoder:
system:
engine: sandbox
module_file: /usr/share/lma_collector/decoders/generic_syslog.lua
module_dir: /usr/share/lma_collector/common;/usr/share/heka/lua_modules
adjust_timezone: true
config:
syslog_pattern: '%TIMESTAMP% %HOSTNAME% %syslogtag%%msg:::sp-if-no-1st-sp%%msg%\n'
input:
linux_log_stream:
engine: logstreamer
log_directory: "/var/log"
file_match: '(?P<Service>daemon\.log|cron\.log|haproxy\.log|kern\.log|auth\.log|syslog|messages|debug)'
differentiator: [ 'system.', 'Service' ]
decoder: "system_decoder"
splitter: "TokenSplitter"
filter:
linux_hdd_errors:
engine: sandbox
module_file: /usr/share/lma_collector/filters/hdd_errors_counter.lua
module_dir: /usr/share/lma_collector/common;/usr/share/heka/lua_modules
preserve_data: false
message_matcher: "Type == 'log' && Logger == 'system.kern'"
ticker_interval: 10
config:
grace_interval: 10
patterns: "/error%s.+([sv]d[a-z][a-z]?)%d?/ /([sv]d[a-z][a-z]?)%d?.+%serror/"
hostname: '{{ grains.host }}'
metric_collector:
trigger:
linux_system_cpu_critical:
description: 'The CPU usage is too high.'
severity: critical
rules:
- metric: cpu_wait
relational_operator: '>='
threshold: 35
window: 120
periods: 0
function: avg
- metric: cpu_idle
relational_operator: <=
threshold: 5
window: 120
function: avg
linux_system_cpu_warning:
description: 'The CPU wait times are high.'
severity: critical
rules:
- metric: cpu_wait
relational_operator: '>='
threshold: 15
window: 120
periods: 0
function: avg
linux_system_swap_usage_critical:
description: 'There is no more swap free space'
severity: critical
rules:
- metric: swap_free
relational_operator: '=='
threshold: 0
window: 60
periods: 0
function: max
linux_system_swap_activity_warning:
description: 'The swap activity is high'
severity: warning
rules:
- metric: swap_io_in
relational_operator: '>='
threshold: 1048576 # 1 Mb/s
window: 120
periods: 0
function: avg
- metric: swap_io_out
relational_operator: '>='
threshold: 1048576 # 1 Mb/s
window: 120
periods: 0
function: avg
linux_system_swap_usage_warning:
description: 'The swap free space is low'
severity: warning
rules:
- metric: swap_percent_used
relational_operator: '>='
threshold: 0.8
window: 60
periods: 0
function: avg
linux_system_root_fs_warning:
description: "The root filesystem's free space is low"
severity: warning
rules:
- metric: fs_space_percent_free
field:
fs: '/'
relational_operator: '<'
threshold: 10
window: 60
periods: 0
function: min
linux_system_root_fs_critical:
description: "The root filesystem's free space is too low"
severity: critical
rules:
- metric: fs_space_percent_free
field:
fs: '/'
relational_operator: '<'
threshold: 5
window: 60
periods: 0
function: min
linux_system_network_warning_dropped_rx:
description: 'Some received packets have been dropped'
severity: warning
rules:
- metric: if_dropped_rx
relational_operator: '>'
threshold: 100
window: 60
periods: 0
function: avg
linux_system_network_critical_dropped_rx:
description: 'Too many received packets have been dropped'
severity: critical
rules:
- metric: if_dropped_rx
relational_operator: '>'
threshold: 1000
window: 60
periods: 0
function: avg
linux_system_network_warning_dropped_tx:
description: 'Some transmitted packets have been dropped'
severity: warning
rules:
- metric: if_dropped_tx
relational_operator: '>'
threshold: 100
window: 60
periods: 0
function: avg
linux_system_network_critical_dropped_tx:
description: 'Too many transmitted packets have been dropped'
severity: critical
rules:
- metric: if_dropped_tx
relational_operator: '>'
threshold: 1000
function: avg
window: 60
linux_system_hdd_errors_critical:
description: 'Errors on hard drive(s) have been detected'
severity: critical
no_data_policy: okay
rules:
- metric: hdd_errors_rate
group_by: [device]
relational_operator: '>'
threshold: 0
window: 60
periods: 0
function: max
alarm:
linux_system_cpu:
alerting: enabled
triggers:
- linux_system_cpu_warning
- linux_system_cpu_critical
linux_system_swap:
alerting: enabled
triggers:
- linux_system_swap_usage_critical
- linux_system_swap_activity_warning
- linux_system_swap_usage_warning
linux_system_root_fs:
alerting: enabled
triggers:
- linux_system_root_fs_critical
- linux_system_root_fs_warning
linux_system_network_rx:
alerting: enabled
triggers:
- linux_system_network_critical_dropped_rx
- linux_system_network_warning_dropped_rx
linux_system_network_tx:
alerting: enabled
triggers:
- linux_system_network_critical_dropped_tx
- linux_system_network_warning_dropped_tx
linux_system_hdd_errors:
alerting: enabled_with_notification
triggers:
- linux_system_hdd_errors_critical

+ 3
- 1
linux/network/host.sls View File

- repl: {{ after }} - repl: {{ after }}
- watch: - watch:
- host: linux_host_{{ name }} - host: linux_host_{{ name }}
- onlyif:
- grep -q "{{ before }}" /etc/hosts


{%- endif %} {%- endif %}




{%- endfor %} {%- endfor %}


{%- endif %}
{%- endif %}

+ 1
- 0
linux/network/interface.sls View File

- type: {{ interface.type }} - type: {{ interface.type }}
{%- if interface.address is defined %} {%- if interface.address is defined %}
{%- if grains.os_family == 'Debian' %} {%- if grains.os_family == 'Debian' %}
- unless: grep -q "iface {{ interface_name }} " /etc/network/interfaces
- proto: {{ interface.get('proto', 'static') }} - proto: {{ interface.get('proto', 'static') }}
{% endif %} {% endif %}
{%- if grains.os_family == 'RedHat' %} {%- if grains.os_family == 'RedHat' %}

+ 2
- 2
linux/storage/mount.sls View File



{%- if mount.enabled %} {%- if mount.enabled %}


{%- if not mount.file_system in ['nfs', 'nfs4', 'cifs'] %}
{%- if not mount.file_system in ['nfs', 'nfs4', 'cifs', 'tmpfs'] %}


mkfs_{{ mount.device}}: mkfs_{{ mount.device}}:
cmd.run: cmd.run:


{%- endfor %} {%- endfor %}


{%- endif %}
{%- endif %}

+ 3
- 1
metadata/service/support.yml View File

collectd: collectd:
enabled: true enabled: true
heka: heka:
enabled: false
enabled: true
sensu: sensu:
enabled: true enabled: true
sphinx: sphinx:
enabled: true enabled: true
grafana:
enabled: true

Loading…
Cancel
Save