Saltstack Official Linux Formula
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

189 line
5.3KB

  1. {%- from "linux/map.jinja" import monitoring with context %}
  2. metric_collector:
  3. trigger:
  4. linux_system_cpu_critical:
  5. description: 'The CPU usage is too high.'
  6. severity: critical
  7. rules:
  8. - metric: cpu_wait
  9. relational_operator: '>='
  10. threshold: 35
  11. window: 120
  12. periods: 0
  13. function: avg
  14. - metric: cpu_idle
  15. relational_operator: <=
  16. threshold: 5
  17. window: 120
  18. function: avg
  19. linux_system_cpu_warning:
  20. description: 'The CPU wait times are high.'
  21. severity: warning
  22. rules:
  23. - metric: cpu_wait
  24. relational_operator: '>='
  25. threshold: 15
  26. window: 120
  27. periods: 0
  28. function: avg
  29. linux_system_swap_usage_critical:
  30. description: 'There is no more swap free space'
  31. severity: critical
  32. rules:
  33. - metric: swap_free
  34. relational_operator: '=='
  35. threshold: 0
  36. window: 60
  37. periods: 0
  38. function: max
  39. linux_system_swap_activity_warning:
  40. description: 'The swap activity is high'
  41. severity: warning
  42. rules:
  43. - metric: swap_io_in
  44. relational_operator: '>='
  45. threshold: 1048576 # 1 Mb/s
  46. window: 120
  47. periods: 0
  48. function: avg
  49. - metric: swap_io_out
  50. relational_operator: '>='
  51. threshold: 1048576 # 1 Mb/s
  52. window: 120
  53. periods: 0
  54. function: avg
  55. linux_system_swap_usage_warning:
  56. description: 'The swap free space is low'
  57. severity: warning
  58. rules:
  59. - metric: swap_percent_used
  60. relational_operator: '>='
  61. threshold: 0.8
  62. window: 60
  63. periods: 0
  64. function: avg
  65. linux_system_root_fs_warning:
  66. description: "The root filesystem's free space is low"
  67. severity: warning
  68. rules:
  69. - metric: fs_space_percent_free
  70. field:
  71. fs: '/'
  72. relational_operator: '<'
  73. threshold: 10
  74. window: 60
  75. periods: 0
  76. function: min
  77. linux_system_root_fs_critical:
  78. description: "The root filesystem's free space is too low"
  79. severity: critical
  80. rules:
  81. - metric: fs_space_percent_free
  82. field:
  83. fs: '/'
  84. relational_operator: '<'
  85. threshold: 5
  86. window: 60
  87. periods: 0
  88. function: min
  89. linux_system_network_warning_dropped_rx:
  90. description: 'Some received packets have been dropped'
  91. severity: warning
  92. rules:
  93. - metric: if_dropped_rx
  94. relational_operator: '>'
  95. threshold: 100
  96. window: 60
  97. periods: 0
  98. function: avg
  99. linux_system_network_critical_dropped_rx:
  100. description: 'Too many received packets have been dropped'
  101. severity: critical
  102. rules:
  103. - metric: if_dropped_rx
  104. relational_operator: '>'
  105. threshold: 1000
  106. window: 60
  107. periods: 0
  108. function: avg
  109. linux_system_network_warning_dropped_tx:
  110. description: 'Some transmitted packets have been dropped'
  111. severity: warning
  112. rules:
  113. - metric: if_dropped_tx
  114. relational_operator: '>'
  115. threshold: 100
  116. window: 60
  117. periods: 0
  118. function: avg
  119. linux_system_network_critical_dropped_tx:
  120. description: 'Too many transmitted packets have been dropped'
  121. severity: critical
  122. rules:
  123. - metric: if_dropped_tx
  124. relational_operator: '>'
  125. threshold: 1000
  126. function: avg
  127. window: 60
  128. linux_system_hdd_errors_critical:
  129. description: 'Errors on hard drive(s) have been detected'
  130. severity: critical
  131. no_data_policy: okay
  132. rules:
  133. - metric: hdd_errors_rate
  134. group_by: [device]
  135. relational_operator: '>'
  136. threshold: 0
  137. window: 60
  138. periods: 0
  139. function: max
  140. {%- if monitoring.bond_status.interfaces is defined and monitoring.bond_status.interfaces is list %}
  141. linux_bond_status_critical:
  142. description: Bond members are down.
  143. rules:
  144. - function: last
  145. metric: bond_status_links_down
  146. periods: 0
  147. relational_operator: '>'
  148. threshold: 0
  149. window: 120
  150. severity: critical
  151. {%- endif %}
  152. alarm:
  153. linux_system_cpu:
  154. alerting: enabled
  155. triggers:
  156. - linux_system_cpu_warning
  157. - linux_system_cpu_critical
  158. linux_system_swap:
  159. alerting: enabled
  160. triggers:
  161. - linux_system_swap_usage_critical
  162. - linux_system_swap_activity_warning
  163. - linux_system_swap_usage_warning
  164. linux_system_root_fs:
  165. alerting: enabled
  166. triggers:
  167. - linux_system_root_fs_critical
  168. - linux_system_root_fs_warning
  169. linux_system_network_rx:
  170. alerting: enabled
  171. triggers:
  172. - linux_system_network_critical_dropped_rx
  173. - linux_system_network_warning_dropped_rx
  174. linux_system_network_tx:
  175. alerting: enabled
  176. triggers:
  177. - linux_system_network_critical_dropped_tx
  178. - linux_system_network_warning_dropped_tx
  179. linux_system_hdd_errors:
  180. alerting: enabled_with_notification
  181. triggers:
  182. - linux_system_hdd_errors_critical
  183. {%- if monitoring.bond_status.interfaces is defined and monitoring.bond_status.interfaces is list %}
  184. linux_bond_status:
  185. alerting: enabled
  186. triggers:
  187. - linux_bond_status_critical
  188. {%- endif %}