|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199 |
- {%- from "linux/map.jinja" import system with context %}
- {%- set mcelog = system.mcelog %}
- #
- # Example config file for mcelog
- # mcelog is the user space backend that decodes and process machine check events
- # (cpu hardware errors) reported by the CPU to the kernel
- #
-
- # general format
- #optionname = value
- # white space is not allowed in value currently, except at the end where it is dropped
- #
-
- # In general all command line options that are not commands work here.
- # See man mcelog or mcelog --help for a list.
- # e.g. to enable the --no-syslog option use
- #no-syslog = yes (or no to disable)
- # when the option has a argument
- #logfile = /tmp/logfile
- # below are the options which are not command line options.
-
- # Set CPU type for which mcelog decodes events:
- #cpu = type
- # For valid values for type please see mcelog --help.
- # If this value is set incorrectly the decoded output will be likely incorrect.
- # By default when this parameter is not set mcelog uses the CPU it is running on
- # on very new kernels the mcelog events reported by the kernel also carry
- # the CPU type which is used too when available and not overriden.
-
- # Enable daemon mode:
- #daemon = yes
- # By default mcelog just processes the currently pending events and exits.
- # In daemon mode it will keep running as a daemon in the background and poll
- # the kernel for events and then decode them.
-
- # Filter out known broken events by default.
- filter = yes
- # Don't log memory errors individually.
- # They still get accounted if that is enabled.
- #filter-memory-errors = yes
-
- # output in undecoded raw format to be easier machine readable
- # (default is decoded).
- #raw = yes
-
- # Set CPU Mhz to decode uptime from time stamp counter (output
- # unreliable, not needed on new kernels which report the event time
- # directly. A lot of systems don't have a linear time stamp clock
- # and the output is wrong then.
- # Normally mcelog tries to figure out if it the TSC is reliable
- # and only uses the current frequency then.
- # Setting a frequency forces timestamp decoding.
- # This setting is obsolete with modern kernels which report the time
- # directly.
- #cpumhz = 1800.00
-
- # log output options
- # Log decoded machine checks in syslog (default stdout or syslog for daemon)
- #syslog = yes
- # Log decoded machine checks in syslog with error level
- #syslog-error = yes
- # Never log anything to syslog
- #no-syslog = yes
- # Append log output to logfile instead of stdout. Only when no syslog logging is active
- #logfile = filename
-
- {%- if mcelog.logging is defined %}
-
- {%- if mcelog.logging.syslog is defined %}
- syslog = {{ 'yes' if mcelog.logging.syslog else 'no' }}
- {%- endif %}
- {%- if mcelog.logging.syslog_error is defined %}
- syslog-error = {{ 'yes' if mcelog.logging.syslog_error else 'no' }}
- {%- endif %}
- {%- if mcelog.logging.no_syslog is defined %}
- no-syslog = {{ 'yes' if mcelog.logging.no_syslog else 'no' }}
- {%- endif %}
- {%- if mcelog.logging.logfile is defined %}
- logfile = {{ mcelog.logging.logfile }}
- {%- endif %}
-
- {%- endif %}
- # Use SMBIOS information to decode DIMMs (needs root).
- # This function is not recommended to use right now and generally not needed.
- # The exception is memdb prepopulation, which is configured separately below.
- #dmi = no
-
- # When in daemon mode run as this user after set up.
- # Note that the triggers will run as this user too.
- # Setting this to non root will mean that triggers cannot take some corrective
- # action, like offlining objects.
- #run-credentials-user = root
-
- # group to run as daemon with
- # default to the group of the run-credentials-user
- #run-credentials-group = nobody
-
- [server]
- # user allowed to access client socket.
- # when set to * match any
- # root is always allowed to access.
- # default: root only
- client-user = root
- # group allowed to access mcelog
- # When no group is configured any group matches (but still user checking).
- # when set to * match any
- #client-group = root
- # Path to the unix socket for client<->server communication.
- # When no socket-path is configured the server will not start
- #socket-path = /var/run/mcelog-client
- # When mcelog starts it checks if a server is already running. This configures the timeout
- # for this check.
- #initial-ping-timeout = 2
- #
- [dimm]
- # Is the in memory DIMM error tracking enabled?
- # Only works on systems with integrated memory controller and
- # which are supported.
- # Only takes effect in daemon mode.
- dimm-tracking-enabled = yes
- # Use DMI information from the BIOS to prepopulate DIMM database.
- # Note this might not work with all BIOS and requires mcelog to run as root.
- # Alternative is to let mcelog create DIMM objects on demand.
- dmi-prepopulate = yes
- #
- # Execute these triggers when the rate of corrected or uncorrected
- # Errors per DIMM exceeds the threshold.
- # Note when the hardware does not report DIMMs this might also
- # be per channel.
- # The default of 10/24h is reasonable for server quality
- # DDR3 DIMMs as of 2009/10.
- #uc-error-trigger = dimm-error-trigger
- uc-error-threshold = 1 / 24h
- #ce-error-trigger = dimm-error-trigger
- ce-error-threshold = 10 / 24h
-
- [socket]
- # Enable memory error accounting per socket.
- socket-tracking-enabled = yes
-
- # Threshold and trigger for uncorrected memory errors on a socket.
- # mem-uc-error-trigger = socket-memory-error-trigger
-
- mem-uc-error-threshold = 100 / 24h
-
- # Trigger script for corrected memory errors on a socket.
- mem-ce-error-trigger = socket-memory-error-trigger
-
- # Threshold on when to trigger a correct error for the socket.
-
- mem-ce-error-threshold = 100 / 24h
-
- # Log socket error threshold explicitely?
- mem-ce-error-log = yes
-
- # Trigger script for uncorrected bus error events
- bus-uc-threshold-trigger = bus-error-trigger
-
- # Trigger script for uncorrected IOMCA erors
- iomca-threshold-trigger = iomca-error-trigger
-
- # Trigger script for other uncategorized errors
- unknown-threshold-trigger = unknown-error-trigger
-
- [cache]
- # Processing of cache error thresholds reported by Intel CPUs.
- cache-threshold-trigger = cache-error-trigger
-
- # Should cache threshold events be logged explicitely?
- cache-threshold-log = yes
-
- [page]
- # Memory error accouting per 4K memory page.
- # Threshold for the correct memory errors trigger script.
- memory-ce-threshold = 10 / 24h
-
- # Trigger script for corrected errors.
- # memory-ce-trigger = page-error-trigger
-
- # Should page threshold events be logged explicitely?
- memory-ce-log = yes
-
- # specify the internal action in mcelog to exceeding a page error threshold
- # this is done in addition to executing the trigger script if available
- # off no action
- # account only account errors
- # soft try to soft-offline page without killing any processes
- # This requires an uptodate kernel. Might not be successfull.
- # hard try to hard-offline page by killing processes
- # Requires an uptodate kernel. Might not be successfull.
- # soft-then-hard First try to soft offline, then try hard offlining
- #memory-ce-action = off|account|soft|hard|soft-then-hard
- memory-ce-action = soft
-
- [trigger]
- # Maximum number of running triggers
- children-max = 2
- # execute triggers in this directory
- directory = /etc/mcelog
|