summaryrefslogtreecommitdiffstats
path: root/etc
diff options
context:
space:
mode:
Diffstat (limited to 'etc')
-rwxr-xr-xetc/munin/plugins/cpu246
1 files changed, 246 insertions, 0 deletions
diff --git a/etc/munin/plugins/cpu b/etc/munin/plugins/cpu
new file mode 100755
index 00000000..1708bcf6
--- /dev/null
+++ b/etc/munin/plugins/cpu
@@ -0,0 +1,246 @@
+#!/bin/sh
+#
+: <<=cut
+
+=head1 NAME
+
+cpu - Plugin to monitor CPU usage.
+
+=head1 APPLICABLE SYSTEMS
+
+All Linux systems
+
+=head1 CONFIGURATION
+
+The following is default configuration
+
+ [cpu]
+ env.HZ 100
+
+See "BUGS" for a explanation of this setting.
+
+=head2 EXAMPLE WARNING AND CRITICAL SETTINGS
+
+You can also set warning and critical levels for each of the data
+series the plugin reports. The following environment variables are
+used as default for all fields:
+
+ env.warning
+ env.critical
+
+But each field can be controlled separately:
+
+ env.system_warning
+ env.system_critical
+
+ env.user_warning
+ env.user_critical
+
+ env.nice_warning
+ env.nice_critical
+
+ env.idle_warning
+ env.idle_critical
+
+For some kernels there is also the following settings:
+
+ env.iowait_warning
+ env.iowait_critical
+ env.irq_warning
+ env.irq_critical
+ env.softirq_warning
+ env.softirq_critical
+ env.steal_warning
+ env.steal_critical
+ env.guest_warning
+ env.guest_critical
+
+=head1 INTERPRETATION
+
+The plugin shows cpu usage in percent. In case of more than one core
+it displays 100% for each core.
+
+If a core is 100% busy there will be no "iowait" showing, that only
+shows if the CPU has nothing else to do while it waits on IO.
+Therefore a 100% busy core can hide a lot of iowait. Please refer to
+the IO latency and other disk related graphs for further information
+about IO performance.
+
+=head1 MAGIC MARKERS
+
+ #%# family=auto
+ #%# capabilities=autoconf
+
+
+=head1 VERSION
+
+ $Id$
+
+=head1 BUGS
+
+Some combinations of hardware and Linux (probably only 2.4 kernels)
+use 1000 units/second in /proc/stat corresponding to the systems
+HZ. (see /usr/src/linux/include/asm/param.h). But Almost all systems
+use 100 units/second and this is our default. Even if
+Documentation/proc.txt in the kernel source says otherwise. - Finding
+and fix by dz@426.ch
+
+Otherwise none known
+
+=head1 AUTHOR
+
+Unknown
+
+=head1 LICENSE
+
+GPLv2
+
+=cut
+
+
+. $MUNIN_LIBDIR/plugins/plugin.sh
+
+if [ "$1" = "autoconf" ]; then
+ if [ -r /proc/stat ]; then
+ echo yes
+ exit 0
+ else
+ echo no
+ exit 0
+ fi
+fi
+
+HZ=${HZ:-100}
+
+extinfo=""
+
+if egrep -q '^cpu +[0-9]+ +[0-9]+ +[0-9]+ +[0-9]+ +[0-9]+ +[0-9]+ +[0-9]+' /proc/stat; then
+ extinfo="iowait irq softirq"
+ if egrep -q '^cpu +[0-9]+ +[0-9]+ +[0-9]+ +[0-9]+ +[0-9]+ +[0-9]+ +[0-9]+ +[0-9]+' /proc/stat; then
+ extextinfo="steal"
+ fi
+ if egrep -q '^cpu +[0-9]+ +[0-9]+ +[0-9]+ +[0-9]+ +[0-9]+ +[0-9]+ +[0-9]+ +[0-9]+ +[0-9]+' /proc/stat; then
+ extextextinfo="guest"
+ fi
+
+fi
+
+if [ "$1" = "config" ]; then
+
+ NCPU=$(egrep '^cpu[0-9]+ ' /proc/stat | wc -l)
+ if [ "$scaleto100" = "yes" ]; then
+ graphlimit=100
+ else
+ graphlimit=$(($NCPU * 100))
+ fi
+ echo 'graph_title CPU usage'
+ echo "graph_order system user nice idle" $extinfo
+ echo "graph_args --base 1000 -r --lower-limit 0 --upper-limit $graphlimit"
+ echo 'graph_vlabel %'
+ echo 'graph_scale no'
+ echo 'graph_info This graph shows how CPU time is spent.'
+ echo 'graph_category system'
+ echo 'graph_period second'
+ echo 'system.label system'
+ echo 'system.draw AREA'
+ echo 'system.min 0'
+ echo 'system.type DERIVE'
+ echo "system.info CPU time spent by the kernel in system activities"
+ echo 'user.label user'
+ echo 'user.draw STACK'
+ echo 'user.min 0'
+ echo 'user.type DERIVE'
+ echo 'user.info CPU time spent by normal programs and daemons'
+ echo 'nice.label nice'
+ echo 'nice.draw STACK'
+ echo 'nice.min 0'
+ echo 'nice.type DERIVE'
+ echo 'nice.info CPU time spent by nice(1)d programs'
+ echo 'idle.label idle'
+ echo 'idle.draw STACK'
+ echo 'idle.min 0'
+ echo 'idle.type DERIVE'
+ echo 'idle.info Idle CPU time'
+
+ for field in system user nice idle; do
+ print_adjusted_thresholds "$field" "$graphlimit"
+ done
+
+ if [ "$scaleto100" = "yes" ]; then
+ echo "system.cdef system,$NCPU,/"
+ echo "user.cdef user,$NCPU,/"
+ echo "nice.cdef nice,$NCPU,/"
+ echo "idle.cdef idle,$NCPU,/"
+ fi
+ if [ ! -z "$extinfo" ]
+ then
+ echo 'iowait.label iowait'
+ echo 'iowait.draw STACK'
+ echo 'iowait.min 0'
+ echo 'iowait.type DERIVE'
+ echo 'iowait.info CPU time spent waiting for I/O operations to finish when there is nothing else to do.'
+ echo 'irq.label irq'
+ echo 'irq.draw STACK'
+ echo 'irq.min 0'
+ echo 'irq.type DERIVE'
+ echo 'irq.info CPU time spent handling interrupts'
+ echo 'softirq.label softirq'
+ echo 'softirq.draw STACK'
+ echo 'softirq.min 0'
+ echo 'softirq.type DERIVE'
+ echo 'softirq.info CPU time spent handling "batched" interrupts'
+ if [ "$scaleto100" = "yes" ]; then
+ echo "iowait.cdef iowait,$NCPU,/"
+ echo "irq.cdef irq,$NCPU,/"
+ echo "softirq.cdef softirq,$NCPU,/"
+ fi
+ for field in iowait irq softirq; do
+ print_adjusted_thresholds "$field" "$graphlimit"
+ done
+ fi
+
+ if [ ! -z "$extextinfo" ]
+ then
+ echo 'steal.label steal'
+ echo 'steal.draw STACK'
+ echo 'steal.min 0'
+ echo 'steal.type DERIVE'
+ echo 'steal.info The time that a virtual CPU had runnable tasks, but the virtual CPU itself was not running'
+ if [ "$scaleto100" = "yes" ]; then
+ echo "steal.cdef steal,$NCPU,/"
+ fi
+ for field in steal; do
+ print_adjusted_thresholds "$field" "$graphlimit"
+ done
+ fi
+
+ if [ ! -z "$extextextinfo" ]
+ then
+ echo 'guest.label guest'
+ echo 'guest.draw STACK'
+ echo 'guest.min 0'
+ echo 'guest.type DERIVE'
+ echo 'guest.info The time spent running a virtual CPU for guest operating systems under the control of the Linux kernel.'
+ if [ "$scaleto100" = "yes" ]; then
+ echo "guest.cdef guest,$NCPU,/"
+ fi
+ for field in guest; do
+ print_adjusted_thresholds "$field" "$graphlimit"
+ done
+ fi
+
+ exit 0
+fi
+
+# Note: Counters/derive need to report integer values. Also we need
+# to avoid 10e+09 and the like %.0f should do this.
+
+if [ ! -z "$extextextinfo" ]; then
+ awk -v hz=$HZ '/^cpu / { epoch = systime(); printf "user.value %d:%.0f\nnice.value %d:%.0f\nsystem.value %d:%.0f\nidle.value %d:%.0f\niowait.value %d:%.0f\nirq.value %d:%.0f\nsoftirq.value %d:%.0f\nsteal.value %d:%.0f\nguest.value %d:%.0f\n", epoch, $2*100/hz, epoch, $3*100/hz, epoch, $4*100/hz, epoch, $5*100/hz, epoch, $6*100/hz, epoch, $7*100/hz, epoch, $8*100/hz, epoch, $9*100/hz, epoch, $10*100/hz }' < /proc/stat
+elif [ ! -z "$extextinfo" ]; then
+ awk -v hz=$HZ '/^cpu / { epoch = systime(); printf "user.value %d:%.0f\nnice.value %d:%.0f\nsystem.value %d:%.0f\nidle.value %d:%.0f\niowait.value %d:%.0f\nirq.value %d:%.0f\nsoftirq.value %d:%.0f\nsteal.value %d:%.0f\n", epoch, $2*100/hz, epoch, $3*100/hz, epoch, $4*100/hz, epoch, $5*100/hz, epoch, $6*100/hz, epoch, $7*100/hz, epoch, $8*100/hz, epoch, $9*100/hz }' < /proc/stat
+elif [ ! -z "$extinfo" ]; then
+ awk -v hz=$HZ '/^cpu / { epoch = systime(); printf "user.value %d:%.0f\nnice.value %d:%.0f\nsystem.value %d:%.0f\nidle.value %d:%.0f\niowait.value %d:%.0f\nirq.value %d:%.0f\nsoftirq.value %d:%.0f\n", epoch, $2*100/hz, epoch, $3*100/hz, epoch, $4*100/hz, epoch, $5*100/hz, epoch, $6*100/hz, epoch, $7*100/hz, epoch, $8*100/hz }' < /proc/stat
+else
+ awk -v hz=$HZ '/^cpu / { epoch = systime(); printf "user.value %d:%.0f\nnice.value %d:%.0f\nsystem.value %d:%.0f\nidle.value %d:%.0f\niowait.value %d:%.0f\nirq.value %d:%.0f\n", epoch, $2*100/hz, epoch, $3*100/hz, epoch, $4*100/hz, epoch, $5*100/hz, epoch, $6*100/hz, epoch, $7*100/hz }' < /proc/stat
+fi