Saltstack Official Linux Formula
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

199 lines
5.7KB

  1. log_collector:
  2. decoder:
  3. system:
  4. engine: sandbox
  5. module_file: /usr/share/lma_collector/decoders/generic_syslog.lua
  6. module_dir: /usr/share/lma_collector/common;/usr/share/heka/lua_modules
  7. adjust_timezone: true
  8. config:
  9. syslog_pattern: '%TIMESTAMP% %HOSTNAME% %syslogtag%%msg:::sp-if-no-1st-sp%%msg%\n'
  10. input:
  11. linux_log_stream:
  12. engine: logstreamer
  13. log_directory: "/var/log"
  14. file_match: '(?P<Service>daemon\.log|cron\.log|haproxy\.log|kern\.log|auth\.log|syslog|messages|debug)'
  15. differentiator: [ 'system.', 'Service' ]
  16. decoder: "system_decoder"
  17. splitter: "TokenSplitter"
  18. filter:
  19. linux_hdd_errors:
  20. engine: sandbox
  21. module_file: /usr/share/lma_collector/filters/hdd_errors_counter.lua
  22. module_dir: /usr/share/lma_collector/common;/usr/share/heka/lua_modules
  23. preserve_data: false
  24. message_matcher: "Type == 'log' && Logger == 'system.kern'"
  25. ticker_interval: 10
  26. config:
  27. grace_interval: 10
  28. patterns: "/error%s.+([sv]d[a-z][a-z]?)%d?/ /([sv]d[a-z][a-z]?)%d?.+%serror/"
  29. hostname: '{{ grains.host }}'
  30. metric_collector:
  31. trigger:
  32. linux_system_cpu_critical:
  33. description: 'The CPU usage is too high.'
  34. severity: critical
  35. rules:
  36. - metric: cpu_wait
  37. relational_operator: '>='
  38. threshold: 35
  39. window: 120
  40. periods: 0
  41. function: avg
  42. - metric: cpu_idle
  43. relational_operator: <=
  44. threshold: 5
  45. window: 120
  46. function: avg
  47. linux_system_cpu_warning:
  48. description: 'The CPU wait times are high.'
  49. severity: critical
  50. rules:
  51. - metric: cpu_wait
  52. relational_operator: '>='
  53. threshold: 15
  54. window: 120
  55. periods: 0
  56. function: avg
  57. linux_system_swap_usage_critical:
  58. description: 'There is no more swap free space'
  59. severity: critical
  60. rules:
  61. - metric: swap_free
  62. relational_operator: '=='
  63. threshold: 0
  64. window: 60
  65. periods: 0
  66. function: max
  67. linux_system_swap_activity_warning:
  68. description: 'The swap activity is high'
  69. severity: warning
  70. rules:
  71. - metric: swap_io_in
  72. relational_operator: '>='
  73. threshold: 1048576 # 1 Mb/s
  74. window: 120
  75. periods: 0
  76. function: avg
  77. - metric: swap_io_out
  78. relational_operator: '>='
  79. threshold: 1048576 # 1 Mb/s
  80. window: 120
  81. periods: 0
  82. function: avg
  83. linux_system_swap_usage_warning:
  84. description: 'The swap free space is low'
  85. severity: warning
  86. rules:
  87. - metric: swap_percent_used
  88. relational_operator: '>='
  89. threshold: 0.8
  90. window: 60
  91. periods: 0
  92. function: avg
  93. linux_system_root_fs_warning:
  94. description: "The root filesystem's free space is low"
  95. severity: warning
  96. rules:
  97. - metric: fs_space_percent_free
  98. field:
  99. fs: '/'
  100. relational_operator: '<'
  101. threshold: 10
  102. window: 60
  103. periods: 0
  104. function: min
  105. linux_system_root_fs_critical:
  106. description: "The root filesystem's free space is too low"
  107. severity: critical
  108. rules:
  109. - metric: fs_space_percent_free
  110. field:
  111. fs: '/'
  112. relational_operator: '<'
  113. threshold: 5
  114. window: 60
  115. periods: 0
  116. function: min
  117. linux_system_network_warning_dropped_rx:
  118. description: 'Some received packets have been dropped'
  119. severity: warning
  120. rules:
  121. - metric: if_dropped_rx
  122. relational_operator: '>'
  123. threshold: 100
  124. window: 60
  125. periods: 0
  126. function: avg
  127. linux_system_network_critical_dropped_rx:
  128. description: 'Too many received packets have been dropped'
  129. severity: critical
  130. rules:
  131. - metric: if_dropped_rx
  132. relational_operator: '>'
  133. threshold: 1000
  134. window: 60
  135. periods: 0
  136. function: avg
  137. linux_system_network_warning_dropped_tx:
  138. description: 'Some transmitted packets have been dropped'
  139. severity: warning
  140. rules:
  141. - metric: if_dropped_tx
  142. relational_operator: '>'
  143. threshold: 100
  144. window: 60
  145. periods: 0
  146. function: avg
  147. linux_system_network_critical_dropped_tx:
  148. description: 'Too many transmitted packets have been dropped'
  149. severity: critical
  150. rules:
  151. - metric: if_dropped_tx
  152. relational_operator: '>'
  153. threshold: 1000
  154. function: avg
  155. window: 60
  156. linux_system_hdd_errors_critical:
  157. description: 'Errors on hard drive(s) have been detected'
  158. severity: critical
  159. no_data_policy: okay
  160. rules:
  161. - metric: hdd_errors_rate
  162. group_by: [device]
  163. relational_operator: '>'
  164. threshold: 0
  165. window: 60
  166. periods: 0
  167. function: max
  168. alarm:
  169. linux_system_cpu:
  170. alerting: enabled
  171. triggers:
  172. - linux_system_cpu_warning
  173. - linux_system_cpu_critical
  174. linux_system_swap:
  175. alerting: enabled
  176. triggers:
  177. - linux_system_swap_usage_critical
  178. - linux_system_swap_activity_warning
  179. - linux_system_swap_usage_warning
  180. linux_system_root_fs:
  181. alerting: enabled
  182. triggers:
  183. - linux_system_root_fs_critical
  184. - linux_system_root_fs_warning
  185. linux_system_network_rx:
  186. alerting: enabled
  187. triggers:
  188. - linux_system_network_critical_dropped_rx
  189. - linux_system_network_warning_dropped_rx
  190. linux_system_network_tx:
  191. alerting: enabled
  192. triggers:
  193. - linux_system_network_critical_dropped_tx
  194. - linux_system_network_warning_dropped_tx
  195. linux_system_hdd_errors:
  196. alerting: enabled_with_notification
  197. triggers:
  198. - linux_system_hdd_errors_critical