Saltstack Official Linux Formula
Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

125 linhas
5.8KB

  1. {%- from "linux/map.jinja" import monitoring with context %}
  2. server:
  3. alert:
  4. SystemCpuIdleTooLow:
  5. {%- set cpu_idle_threshold = monitoring.cpu_idle_percentage.warn|float %}
  6. if: avg_over_time(cpu_usage_idle{cpu="cpu-total"}[5m]) < {{ cpu_idle_threshold }}
  7. {% raw %}
  8. labels:
  9. severity: warning
  10. service: system
  11. annotations:
  12. summary: 'Idle CPU usage too low on {{ $labels.host }}'
  13. description: 'The average idle CPU usage is too low on node {{ $labels.host }} (current value={{ $value }}%, threshold={% endraw %}{{ cpu_idle_threshold}}%).'
  14. SystemDiskSpaceTooLow:
  15. if: 'predict_linear(disk_free[1h], 8*3600) < 0'
  16. {% raw %}
  17. for: 15m
  18. labels:
  19. severity: warning
  20. service: system
  21. annotations:
  22. summary: 'Free space for {{ $labels.path }} too low on {{ $labels.host }}'
  23. description: 'The disk partition ({{ $labels.path }}) will be full in less than 8 hours on {{ $labels.host }}.'
  24. {% endraw %}
  25. SystemDiskSpaceFull:
  26. if: 'disk_used_percent >= 99 and disk_inodes_total > 0'
  27. {% raw %}
  28. labels:
  29. severity: critical
  30. service: system
  31. annotations:
  32. summary: 'Disk partition {{ $labels.path }} full on {{ $labels.host }}'
  33. description: 'The disk partition ({{ $labels.path }}) is used at {{ $value }}% on {{ $labels.host }}.'
  34. {% endraw %}
  35. SystemDiskInodesTooLow:
  36. if: 'predict_linear(disk_inodes_free[1h], 8*3600) < 0'
  37. {% raw %}
  38. for: 15m
  39. labels:
  40. severity: warning
  41. service: system
  42. annotations:
  43. summary: 'Free inodes for {{ $labels.path }} too low on {{ $labels.host }}'
  44. description: 'The disk inodes ({{ $labels.path }}) will be full in less than 8 hours on {{ $labels.host }}.'
  45. {% endraw %}
  46. SystemDiskInodesFull:
  47. if: 'disk_inodes_used / disk_inodes_total >= 0.99'
  48. {% raw %}
  49. labels:
  50. severity: critical
  51. service: system
  52. annotations:
  53. summary: 'Inodes for {{ $labels.path }} full on {{ $labels.host }}'
  54. description: 'The disk inodes ({{ $labels.path }}) are used at {{ $value }}% on {{ $labels.host }}.'
  55. {% endraw %}
  56. SystemMemoryAvailableLow:
  57. {%- set mem_avail_warn_threshold = monitoring.free_memory_percentage.warn|float %}
  58. if: avg_over_time(mem_available_percent[5m]) < {{ mem_avail_warn_threshold }}
  59. {% raw %}
  60. labels:
  61. severity: warning
  62. service: system
  63. annotations:
  64. summary: 'Free memory low on {{ $labels.host }}'
  65. description: 'The percentage of free memory is low on node {{ $labels.host }} (current value={{ $value }}%, threshold={% endraw %}{{ mem_avail_warn_threshold }}%).'
  66. SystemMemoryAvailableTooLow:
  67. {%- set mem_avail_crit_threshold = monitoring.free_memory_percentage.crit|float %}
  68. if: avg_over_time(mem_available_percent[5m]) < {{ mem_avail_crit_threshold }}
  69. {% raw %}
  70. labels:
  71. severity: critical
  72. service: system
  73. annotations:
  74. summary: 'Free memory too low on {{ $labels.host }}'
  75. description: 'The percentage of free memory is too low on node {{ $labels.host }} (current value={{ $value }}%, threshold={% endraw %}{{ mem_avail_crit_threshold }}%).'
  76. SystemLoad5TooHigh:
  77. if: system_load5 / system_n_cpus > {{ monitoring.load_5.warn }}
  78. {% raw %}
  79. labels:
  80. severity: warning
  81. service: system
  82. annotations:
  83. summary: 'High system load (5m) on {{ $labels.host }}'
  84. description: 'The 5-minutes system load is too high on node {{ $labels.host }} (current value={{ $value }}, threshold={% endraw %}{{ monitoring.load_5.warn }}).'
  85. SystemRxPacketsDroppedTooHigh:
  86. {%- set net_rx_dropped_threshold = monitoring.rx_packets_dropped_rate.warn %}
  87. if: rate(net_drop_in[1m]) > {{ net_rx_dropped_threshold }}
  88. {% raw %}
  89. labels:
  90. severity: critical
  91. service: system
  92. annotations:
  93. summary: 'Too many received packets dropped on {{ $labels.host }} for interface {{ $labels.interface }}'
  94. description: 'The rate of received packets which are dropped is too high on node {{ $labels.host }} for interface {{ $labels.interface }} (current value={{ $value }}/sec, threshold={% endraw %}{{ net_rx_dropped_threshold }}/sec)'
  95. SystemTxPacketsDroppedTooHigh:
  96. {%- set net_tx_dropped_threshold = monitoring.tx_packets_dropped_rate.warn %}
  97. if: rate(net_drop_out[1m]) > {{ net_tx_dropped_threshold }}
  98. {% raw %}
  99. labels:
  100. severity: critical
  101. service: system
  102. annotations:
  103. summary: 'Too many transmitted packets dropped on {{ $labels.host }} for interface {{ $labels.interface }}'
  104. description: 'The rate of transmitted packets which are dropped is too high on node {{ $labels.host }} for interface {{ $labels.interface }} (current value={{ $value }}/sec, threshold={% endraw %}{{ net_tx_dropped_threshold }}/sec)'
  105. SystemSwapIn:
  106. {%- set swap_in_threshold = monitoring.swap_in_rate.warn %}
  107. if: rate(swap_in[2m]) > {{ swap_in_threshold }}
  108. {% raw %}
  109. labels:
  110. severity: warning
  111. service: system
  112. annotations:
  113. summary: 'Swap input throughput too high on {{ $labels.host }}'
  114. description: 'The rate of swap input bytes is too high on node {{ $labels.host }} (current value={{ $value }}b/s, threshold={% endraw %}{{ swap_in_threshold }}b/s).'
  115. SystemSwapOut:
  116. {%- set swap_out_threshold = monitoring.swap_out_rate.warn %}
  117. if: rate(swap_out[2m]) > {{ swap_out_threshold }}
  118. {% raw %}
  119. labels:
  120. severity: warning
  121. service: system
  122. annotations:
  123. summary: 'Swap output throughput too high on {{ $labels.host }}'
  124. description: 'The rate of swap output bytes is too high on node {{ $labels.host }} (current value={{ $value }}b/s, threshold={% endraw %}{{ swap_out_threshold }}b/s).'