Saltstack Official Linux Formula
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

73 lines
3.7KB

  1. server:
  2. alert:
  3. AvgCPUUsageIdle:
  4. {%- set cpu_idle_threshold = prometheus_server.get('alert', {}).get('AvgCPUUsageIdle', {}).get('var', {}).get('threshold', 10) %}
  5. if: avg_over_time(cpu_usage_idle{cpu="cpu-total"}[5m]) < {{ cpu_idle_threshold }}
  6. {% raw %}
  7. labels:
  8. severity: warning
  9. service: system
  10. annotations:
  11. summary: 'Idle CPU usage too low on {{ $labels.host }}'
  12. description: 'The average idle CPU usage is too low on node {{ $labels.host }} (current value={{ $value }}, threshold={% endraw %}{{ cpu_idle_threshold}})'
  13. PredictLinearDiskFree:
  14. if: 'predict_linear(disk_free[1h], 8*3600) < 0'
  15. {% raw %}
  16. labels:
  17. severity: warning
  18. service: system
  19. annotations:
  20. summary: 'Free space for {{ $labels.path }} too low on {{ $labels.host }}'
  21. description: 'The disk partition ({{ $labels.path }}) will be full in less than 8 hours on {{ $labels.host }}'
  22. {% endraw %}
  23. PredictLinearDiskInodesFree:
  24. if: 'predict_linear(disk_inodes_free[1h], 8*3600) < 0'
  25. {% raw %}
  26. labels:
  27. severity: warning
  28. service: system
  29. annotations:
  30. summary: 'Free inodes for {{ $labels.path }} too low on {{ $labels.host }}'
  31. description: 'The disk inodes ({{ $labels.path }}) will be full in less than 8 hours on {{ $labels.host }}'
  32. {% endraw %}
  33. AvgMemAvailablePercent:
  34. {%- set mem_avail_threshold = prometheus_server.get('alert', {}).get('AvgMemAvailablePercent', {}).get('var', {}).get('threshold', 10) %}
  35. if: avg_over_time(mem_available_percent[5m]) < {{ mem_avail_threshold }}
  36. {% raw %}
  37. labels:
  38. severity: warning
  39. service: system
  40. annotations:
  41. summary: 'Free memory too low on {{ $labels.host }}'
  42. description: 'The percentage of free memory is too low on node {{ $labels.host }} (current value={{ $value }}, threshold={% endraw %}{{ mem_avail_threshold }})'
  43. SystemLoad5:
  44. if: system_load5 / system_n_cpus > {{ prometheus_server.get('alert', {}).get('SystemLoad5', {}).get('var', {}).get('threshold', 3) }}
  45. {% raw %}
  46. labels:
  47. severity: warning
  48. service: system
  49. annotations:
  50. summary: 'High system load (5m) on {{ $labels.host }}'
  51. description: 'High system load (5m) on node {{ $labels.host }}'
  52. {% endraw %}
  53. NetworkRxPacketsDropped:
  54. {%- set net_rx_dropped_threshold = prometheus_server.get('alert', {}).get('NetworkRxPacketsDropped', {}).get('var', {}).get('threshold', 100) %}
  55. if: avg_over_time(net_drop_in[1m]) > {{ net_rx_dropped_threshold }}
  56. {% raw %}
  57. labels:
  58. severity: warning
  59. service: system
  60. annotations:
  61. summary: 'Too many received packets dropped on {{ $labels.host }} for interface {{ $labels.interface }}'
  62. description: 'The average number of received packets which are dropped is too high on node {{ $labels.host }} for interface {{ $label.interface }} (current value={{ $value }}, threshold={% endraw %}{{ net_rx_dropped_threshold }})'
  63. NetworkTxPacketsDropped:
  64. {%- set net_tx_dropped_threshold = prometheus_server.get('alert', {}).get('NetworkTxPacketsDropped', {}).get('var', {}).get('threshold', 100) %}
  65. if: avg_over_time(net_drop_out[1m]) > {{ net_tx_dropped_threshold }}
  66. {% raw %}
  67. labels:
  68. severity: warning
  69. service: system
  70. annotations:
  71. summary: 'Too many transmitted packets dropped on {{ $labels.host }} for interface {{ $labels.interface }}'
  72. description: 'The average number of transmitted packets which are dropped is too high on node {{ $labels.host }} for interface {{ $label.interface }} (current value={{ $value }}, threshold={% endraw %}{{ net_tx_dropped_threshold }})'