Saltstack Official Linux Formula
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

104 line
4.9KB

  1. {%- from "linux/map.jinja" import monitoring with context %}
  2. server:
  3. alert:
  4. SystemCpuIdleTooLow:
  5. {%- set cpu_idle_threshold = monitoring.cpu_idle_percentage.warn|float %}
  6. if: avg_over_time(cpu_usage_idle{cpu="cpu-total"}[5m]) < {{ cpu_idle_threshold }}
  7. {% raw %}
  8. labels:
  9. severity: warning
  10. service: system
  11. annotations:
  12. summary: 'Idle CPU usage too low on {{ $labels.host }}'
  13. description: 'The average idle CPU usage is too low on node {{ $labels.host }} (current value={{ $value }}, threshold={% endraw %}{{ cpu_idle_threshold}})'
  14. SystemDiskSpaceTooLow:
  15. if: 'predict_linear(disk_free[1h], 8*3600) < 0'
  16. {% raw %}
  17. labels:
  18. severity: warning
  19. service: system
  20. annotations:
  21. summary: 'Free space for {{ $labels.path }} too low on {{ $labels.host }}'
  22. description: 'The disk partition ({{ $labels.path }}) will be full in less than 8 hours on {{ $labels.host }}'
  23. {% endraw %}
  24. SystemDiskInodesTooLow:
  25. if: 'predict_linear(disk_inodes_free[1h], 8*3600) < 0'
  26. {% raw %}
  27. labels:
  28. severity: warning
  29. service: system
  30. annotations:
  31. summary: 'Free inodes for {{ $labels.path }} too low on {{ $labels.host }}'
  32. description: 'The disk inodes ({{ $labels.path }}) will be full in less than 8 hours on {{ $labels.host }}'
  33. {% endraw %}
  34. SystemMemoryAvailableTooLow:
  35. {%- set mem_avail_threshold = monitoring.free_memory_percentage.warn|float %}
  36. if: avg_over_time(mem_available_percent[5m]) < {{ mem_avail_threshold }}
  37. {% raw %}
  38. labels:
  39. severity: warning
  40. service: system
  41. annotations:
  42. summary: 'Free memory too low on {{ $labels.host }}'
  43. description: 'The percentage of free memory is too low on node {{ $labels.host }} (current value={{ $value }}, threshold={% endraw %}{{ mem_avail_threshold }})'
  44. SystemLoad5TooHigh:
  45. if: system_load5 / system_n_cpus > {{ monitoring.load_5.warn }}
  46. {% raw %}
  47. labels:
  48. severity: warning
  49. service: system
  50. annotations:
  51. summary: 'High system load (5m) on {{ $labels.host }}'
  52. description: 'High system load (5m) on node {{ $labels.host }}'
  53. {% endraw %}
  54. SystemRxPacketsDroppedTooHigh:
  55. {%- set net_rx_dropped_threshold = monitoring.rx_packets_dropped_rate.warn %}
  56. if: avg_over_time(net_drop_in[1m]) > {{ net_rx_dropped_threshold }}
  57. {% raw %}
  58. labels:
  59. severity: warning
  60. service: system
  61. annotations:
  62. summary: 'Too many received packets dropped on {{ $labels.host }} for interface {{ $labels.interface }}'
  63. description: 'The average number of received packets which are dropped is too high on node {{ $labels.host }} for interface {{ $label.interface }} (current value={{ $value }}, threshold={% endraw %}{{ net_rx_dropped_threshold }})'
  64. SystemTxPacketsDroppedTooHigh:
  65. {%- set net_tx_dropped_threshold = monitoring.tx_packets_dropped_rate.warn %}
  66. if: avg_over_time(net_drop_out[1m]) > {{ net_tx_dropped_threshold }}
  67. {% raw %}
  68. labels:
  69. severity: warning
  70. service: system
  71. annotations:
  72. summary: 'Too many transmitted packets dropped on {{ $labels.host }} for interface {{ $labels.interface }}'
  73. description: 'The average number of transmitted packets which are dropped is too high on node {{ $labels.host }} for interface {{ $label.interface }} (current value={{ $value }}, threshold={% endraw %}{{ net_tx_dropped_threshold }})'
  74. SystemSwapUsed:
  75. {%- set swap_used_threshold = monitoring.swap.warn.strip('%')|float %}
  76. if: avg_over_time(swap_used_percent[1m]) > {{ swap_used_threshold }}
  77. {% raw %}
  78. labels:
  79. severity: warning
  80. service: system
  81. annotations:
  82. summary: 'Swap usage too high on {{ $labels.host }}'
  83. description: 'The average percentage of used swap is too high on node {{ $labels.host }} (current value={{ $value }}%, threshold={% endraw %}{{ swap_used_threshold }})'
  84. SystemSwapIn:
  85. {%- set swap_in_threshold = monitoring.swap_in_rate.warn %}
  86. if: rate(swap_in[2m]) > {{ swap_in_threshold }}
  87. {% raw %}
  88. labels:
  89. severity: warning
  90. service: system
  91. annotations:
  92. summary: 'Swap input throughput too high on {{ $labels.host }}'
  93. description: 'The rate of swap input bytes is too high on node {{ $labels.host }} (current value={{ $value }}b/s, threshold={% endraw %}{{ swap_in_threshold }})'
  94. SystemSwapOut:
  95. {%- set swap_out_threshold = monitoring.swap_out_rate.warn %}
  96. if: rate(swap_out[2m]) > {{ swap_out_threshold }}
  97. {% raw %}
  98. labels:
  99. severity: warning
  100. service: system
  101. annotations:
  102. summary: 'Swap output throughput too high on {{ $labels.host }}'
  103. description: 'The rate of swap output bytes is too high on node {{ $labels.host }} (current value={{ $value }}b/s, threshold={% endraw %}{{ swap_out_threshold }})'