From 315ead533e3d4e67ce3908a13ebe5b75ef9060c4 Mon Sep 17 00:00:00 2001 From: Holger Levsen Date: Mon, 27 Jul 2015 14:31:57 +0200 Subject: move etc to hosts/jenkins/etc --- hosts/jenkins/etc/munin/plugins/cpu | 250 +++++++++++++++++++++ hosts/jenkins/etc/munin/plugins/iostat_ios | 220 ++++++++++++++++++ hosts/jenkins/etc/munin/plugins/jenkins_builds | 64 ++++++ .../etc/munin/plugins/jenkins_builds_results | 75 +++++++ .../munin/plugins/jenkins_builds_results_summary | 70 ++++++ .../etc/munin/plugins/jenkins_builds_running | 43 ++++ hosts/jenkins/etc/munin/plugins/jenkins_jobs | 64 ++++++ hosts/jenkins/etc/munin/plugins/munin_stats | 112 +++++++++ 8 files changed, 898 insertions(+) create mode 100755 hosts/jenkins/etc/munin/plugins/cpu create mode 100755 hosts/jenkins/etc/munin/plugins/iostat_ios create mode 100755 hosts/jenkins/etc/munin/plugins/jenkins_builds create mode 100755 hosts/jenkins/etc/munin/plugins/jenkins_builds_results create mode 100755 hosts/jenkins/etc/munin/plugins/jenkins_builds_results_summary create mode 100755 hosts/jenkins/etc/munin/plugins/jenkins_builds_running create mode 100755 hosts/jenkins/etc/munin/plugins/jenkins_jobs create mode 100755 hosts/jenkins/etc/munin/plugins/munin_stats (limited to 'hosts/jenkins/etc/munin/plugins') diff --git a/hosts/jenkins/etc/munin/plugins/cpu b/hosts/jenkins/etc/munin/plugins/cpu new file mode 100755 index 00000000..b66ed381 --- /dev/null +++ b/hosts/jenkins/etc/munin/plugins/cpu @@ -0,0 +1,250 @@ +#!/bin/sh +# +# -- unfinished WORK IN PROGRESS -- +# +# includes modifications trying to fix #767100 +# +: <<=cut + +=head1 NAME + +cpu - Plugin to monitor CPU usage. + +=head1 APPLICABLE SYSTEMS + +All Linux systems + +=head1 CONFIGURATION + +The following is default configuration + + [cpu] + env.HZ 100 + +See "BUGS" for a explanation of this setting. + +=head2 EXAMPLE WARNING AND CRITICAL SETTINGS + +You can also set warning and critical levels for each of the data +series the plugin reports. The following environment variables are +used as default for all fields: + + env.warning + env.critical + +But each field can be controlled separately: + + env.system_warning + env.system_critical + + env.user_warning + env.user_critical + + env.nice_warning + env.nice_critical + + env.idle_warning + env.idle_critical + +For some kernels there is also the following settings: + + env.iowait_warning + env.iowait_critical + env.irq_warning + env.irq_critical + env.softirq_warning + env.softirq_critical + env.steal_warning + env.steal_critical + env.guest_warning + env.guest_critical + +=head1 INTERPRETATION + +The plugin shows cpu usage in percent. In case of more than one core +it displays 100% for each core. + +If a core is 100% busy there will be no "iowait" showing, that only +shows if the CPU has nothing else to do while it waits on IO. +Therefore a 100% busy core can hide a lot of iowait. Please refer to +the IO latency and other disk related graphs for further information +about IO performance. + +=head1 MAGIC MARKERS + + #%# family=auto + #%# capabilities=autoconf + + +=head1 VERSION + + $Id$ + +=head1 BUGS + +Some combinations of hardware and Linux (probably only 2.4 kernels) +use 1000 units/second in /proc/stat corresponding to the systems +HZ. (see /usr/src/linux/include/asm/param.h). But Almost all systems +use 100 units/second and this is our default. Even if +Documentation/proc.txt in the kernel source says otherwise. - Finding +and fix by dz@426.ch + +Otherwise none known + +=head1 AUTHOR + +Unknown + +=head1 LICENSE + +GPLv2 + +=cut + + +. $MUNIN_LIBDIR/plugins/plugin.sh + +if [ "$1" = "autoconf" ]; then + if [ -r /proc/stat ]; then + echo yes + exit 0 + else + echo no + exit 0 + fi +fi + +HZ=${HZ:-100} + +extinfo="" + +if egrep -q '^cpu +[0-9]+ +[0-9]+ +[0-9]+ +[0-9]+ +[0-9]+ +[0-9]+ +[0-9]+' /proc/stat; then + extinfo="iowait irq softirq" + if egrep -q '^cpu +[0-9]+ +[0-9]+ +[0-9]+ +[0-9]+ +[0-9]+ +[0-9]+ +[0-9]+ +[0-9]+' /proc/stat; then + extextinfo="steal" + fi + if egrep -q '^cpu +[0-9]+ +[0-9]+ +[0-9]+ +[0-9]+ +[0-9]+ +[0-9]+ +[0-9]+ +[0-9]+ +[0-9]+' /proc/stat; then + extextextinfo="guest" + fi + +fi + +if [ "$1" = "config" ]; then + + NCPU=$(egrep '^cpu[0-9]+ ' /proc/stat | wc -l) + if [ "$scaleto100" = "yes" ]; then + graphlimit=100 + else + graphlimit=$(($NCPU * 100)) + fi + echo 'graph_title CPU usage' + echo "graph_order system user nice idle" $extinfo + echo "graph_args --base 1000 -r --lower-limit 0 --upper-limit $graphlimit" + echo 'graph_vlabel %' + echo 'graph_scale no' + echo 'graph_info This graph shows how CPU time is spent.' + echo 'graph_category system' + echo 'graph_period second' + echo 'system.label system' + echo 'system.draw AREA' + echo 'system.min 0' + echo 'system.type DERIVE' + echo "system.info CPU time spent by the kernel in system activities" + echo 'user.label user' + echo 'user.draw STACK' + echo 'user.min 0' + echo 'user.type DERIVE' + echo 'user.info CPU time spent by normal programs and daemons' + echo 'nice.label nice' + echo 'nice.draw STACK' + echo 'nice.min 0' + echo 'nice.type DERIVE' + echo 'nice.info CPU time spent by nice(1)d programs' + echo 'idle.label idle' + echo 'idle.draw STACK' + echo 'idle.min 0' + echo 'idle.type DERIVE' + echo 'idle.info Idle CPU time' + + for field in system user nice idle; do + print_adjusted_thresholds "$field" "$graphlimit" + done + + if [ "$scaleto100" = "yes" ]; then + echo "system.cdef system,$NCPU,/" + echo "user.cdef user,$NCPU,/" + echo "nice.cdef nice,$NCPU,/" + echo "idle.cdef idle,$NCPU,/" + fi + if [ ! -z "$extinfo" ] + then + echo 'iowait.label iowait' + echo 'iowait.draw STACK' + echo 'iowait.min 0' + echo 'iowait.type DERIVE' + echo 'iowait.info CPU time spent waiting for I/O operations to finish when there is nothing else to do.' + echo 'irq.label irq' + echo 'irq.draw STACK' + echo 'irq.min 0' + echo 'irq.type DERIVE' + echo 'irq.info CPU time spent handling interrupts' + echo 'softirq.label softirq' + echo 'softirq.draw STACK' + echo 'softirq.min 0' + echo 'softirq.type DERIVE' + echo 'softirq.info CPU time spent handling "batched" interrupts' + if [ "$scaleto100" = "yes" ]; then + echo "iowait.cdef iowait,$NCPU,/" + echo "irq.cdef irq,$NCPU,/" + echo "softirq.cdef softirq,$NCPU,/" + fi + for field in iowait irq softirq; do + print_adjusted_thresholds "$field" "$graphlimit" + done + fi + + if [ ! -z "$extextinfo" ] + then + echo 'steal.label steal' + echo 'steal.draw STACK' + echo 'steal.min 0' + echo 'steal.type DERIVE' + echo 'steal.info The time that a virtual CPU had runnable tasks, but the virtual CPU itself was not running' + if [ "$scaleto100" = "yes" ]; then + echo "steal.cdef steal,$NCPU,/" + fi + for field in steal; do + print_adjusted_thresholds "$field" "$graphlimit" + done + fi + + if [ ! -z "$extextextinfo" ] + then + echo 'guest.label guest' + echo 'guest.draw STACK' + echo 'guest.min 0' + echo 'guest.type DERIVE' + echo 'guest.info The time spent running a virtual CPU for guest operating systems under the control of the Linux kernel.' + if [ "$scaleto100" = "yes" ]; then + echo "guest.cdef guest,$NCPU,/" + fi + for field in guest; do + print_adjusted_thresholds "$field" "$graphlimit" + done + fi + + exit 0 +fi + +# Note: Counters/derive need to report integer values. Also we need +# to avoid 10e+09 and the like %.0f should do this. + +if [ ! -z "$extextextinfo" ]; then + awk -v hz=$HZ '/^cpu / { printf "user.value %.0f\nnice.value %.0f\nsystem.value %.0f\nidle.value %.0f\niowait.value %.0f\nirq.value %.0f\nsoftirq.value %.0f\nsteal.value %.0f\nguest.value %.0f\n", ($2-$10)*100/hz, $3*100/hz, $4*100/hz, $5*100/hz, $6*100/hz, $7*100/hz, $8*100/hz, $9*100/hz, $10*100/hz }' < /proc/stat +elif [ ! -z "$extextinfo" ]; then + awk -v hz=$HZ '/^cpu / { epoch = systime(); printf "user.value %d:%.0f\nnice.value %d:%.0f\nsystem.value %d:%.0f\nidle.value %d:%.0f\niowait.value %d:%.0f\nirq.value %d:%.0f\nsoftirq.value %d:%.0f\nsteal.value %d:%.0f\n", epoch, $2*100/hz, epoch, $3*100/hz, epoch, $4*100/hz, epoch, $5*100/hz, epoch, $6*100/hz, epoch, $7*100/hz, epoch, $8*100/hz, epoch, $9*100/hz }' < /proc/stat +elif [ ! -z "$extinfo" ]; then + awk -v hz=$HZ '/^cpu / { epoch = systime(); printf "user.value %d:%.0f\nnice.value %d:%.0f\nsystem.value %d:%.0f\nidle.value %d:%.0f\niowait.value %d:%.0f\nirq.value %d:%.0f\nsoftirq.value %d:%.0f\n", epoch, $2*100/hz, epoch, $3*100/hz, epoch, $4*100/hz, epoch, $5*100/hz, epoch, $6*100/hz, epoch, $7*100/hz, epoch, $8*100/hz }' < /proc/stat +else + awk -v hz=$HZ '/^cpu / { epoch = systime(); printf "user.value %d:%.0f\nnice.value %d:%.0f\nsystem.value %d:%.0f\nidle.value %d:%.0f\niowait.value %d:%.0f\nirq.value %d:%.0f\n", epoch, $2*100/hz, epoch, $3*100/hz, epoch, $4*100/hz, epoch, $5*100/hz, epoch, $6*100/hz, epoch, $7*100/hz }' < /proc/stat +fi diff --git a/hosts/jenkins/etc/munin/plugins/iostat_ios b/hosts/jenkins/etc/munin/plugins/iostat_ios new file mode 100755 index 00000000..94c2272e --- /dev/null +++ b/hosts/jenkins/etc/munin/plugins/iostat_ios @@ -0,0 +1,220 @@ +#!/usr/bin/perl -w +# -*- cperl -*- +=head1 NAME + +iostat_ios - Show IO-operation latency pr. device. + +=head1 APPLICABLE SYSTEMS + +Any Linux system + +=head1 CONFIGURATION + +None needed + +=head1 USAGE + +Link this into /etc/munin/plugins/ and restart the munin-node. + +=head1 INTERPRETATION + +The plugin shows the average time a IO-operation needs to complete for +each disk or block device on the system. + +Simple partitioned disks will only show as the whole disk. When/if +you use some device layer over that, such as LVM then LV devices will +show up individualy. + +When the IO-operation time here increases it is to be expected that +the iowait on the CPU graph increases, but please read about how +iowait reporting works to fully understand that number. + +=head1 MAGIC MARKERS + + #%# family=legacy + #%# capabilities=autoconf + +=head1 VERSION + + $Id$ + +=head1 BUGS + +Should show more useful device names/labels for device-mapper devices, +such as those used by LVM and so on. + +=head1 AUTHOR + +(C) 2004 Per Buer. Documentation and some modifications by Nicolai +Langfeldt. + +=head1 LICENSE + +GPLv2 + +=cut + +use strict; +use warnings; + +use IO::File; +use Storable qw(store retrieve); +use Munin::Plugin; + +use constant STATEFILE => "$ENV{MUNIN_PLUGSTATE}/iostat-ios.state"; + + +if (defined($ARGV[0]) and $ARGV[0] eq 'autoconf') { + if (-r "/proc/diskstats" or -r "/proc/partitions") { + print "yes\n"; + exit 0; + } else { + print "no (no /proc/diskstats or /proc/partitions)\n"; + exit 0; + } +} + +if (defined($ARGV[0]) and $ARGV[0] eq 'config') { + print_config(); + exit; +} + +my ($r, $old_r); +my %name; + +$r = get_ios(); + +($old_r) = get_state(); + +if ($old_r) { + cmp_io($old_r, $r); +} else { + warn "iostat_ios: No historic data present\n"; +} + +store_state( $r ); + +sub filter { + my ($major, $minor, $tmpnam) = @_; + if(defined($major)) { + return 0 if ($major == 1); # RAM devices + return 0 if ($major == 9); # MD devices + return 0 if ($major == 58); # LVM devices + return 0 if ($major == 253); # LVM2 devices + } + if(defined($tmpnam)) { + return 0 if ($tmpnam =~ /part\d+$/); + return 0 if ($tmpnam =~ /^\s*(?:sd|hd|vd)[a-z]\d+\s*$/); + } + + return 1; +} + + +sub get_ios { + my ($opt) = @_; + + my %R; + my ($parts, $kernel); + my @dev; + + if (-r "/proc/diskstats") { + $kernel = 2.6; + $parts = new IO::File("/proc/diskstats") || die(); + } else { + $kernel = 2.4; + $parts = new IO::File("/proc/partitions"); + die("kernel $kernel not supported yet\n"); + } + + unless ($parts) { + print "Could not gather statistics\n"; + return undef; + } + + while (<$parts>) { + my ($maj, $min, $name, $rio, $rtime, $wio, $wtime) = + (split(/\s+/, $_ ))[1,2,3,4,7,8,11]; + + next unless (defined($min) && defined($maj)); + next unless ($wio and $rio and $rtime and $wtime); + + next if (filter($maj, $min, $name) == 0); + + $R{$maj}{$min}{rio} = $rio; + $R{$maj}{$min}{rtime} = $rtime; + + $R{$maj}{$min}{wio} = $wio; + $R{$maj}{$min}{wtime} = $wtime; + + my $label = "dev${maj}_${min}"; + + push(@dev, $label); + + $name{$label} = $name; + } + $parts->close(); + + if ($opt) { + return \@dev; + } else { + return \%R; + } +} + + +sub print_config { + print("graph_title IO Service time\n", + "graph_args --base 1000 --logarithmic\n", + "graph_category disk\n", + "graph_vlabel seconds\n", + "graph_info For each applicable disk device this plugin shows the latency (or delay) for I/O operations on that disk device. The delay is in part made up of waiting for the disk to flush the data, and if data arrives at the disk faster than it can read or write it then the delay time will include the time needed for waiting in the queue.\n"); + + for my $d ( @{ get_ios(1) } ) { + print "${d}_rtime.label ",$name{$d}," read\n", + "${d}_rtime.type GAUGE\n", + "${d}_rtime.cdef ${d}_rtime,1000,/\n", + "${d}_wtime.label ",$name{$d}," write\n", + "${d}_wtime.type GAUGE\n", + "${d}_wtime.cdef ${d}_wtime,1000,/\n"; + print_thresholds($d); + } +} + + +sub cmp_io { + my ($old_io, $new_io) = @_; + + for my $maj (sort keys %{$new_io} ) { + for my $min (sort keys %{ $new_io->{$maj} } ) { + my $rio_diff = $$new_io{$maj}{$min}{rio} - $$old_io{$maj}{$min}{rio}; + my $rtime_diff = $$new_io{$maj}{$min}{rtime} - $$old_io{$maj}{$min}{rtime}; + + my $wio_diff = $$new_io{$maj}{$min}{wio} - $$old_io{$maj}{$min}{wio}; + my $wtime_diff = $$new_io{$maj}{$min}{wtime} - $$old_io{$maj}{$min}{wtime}; + + my $dev = "dev${maj}_${min}"; + + print "${dev}_rtime.value ", ($rio_diff > 0 and $rtime_diff > 0) ? ($rtime_diff / $rio_diff) : 'U', "\n", + "${dev}_wtime.value ", ($wio_diff > 0 and $wtime_diff > 0) ? ($wtime_diff / $wio_diff) : 'U', "\n"; + } + } + +} + + +sub store_state { + my ($R) = @_; + store($R, STATEFILE); +} + + +sub get_state { + my ($R); + return(undef) unless ( -r STATEFILE); + $R = retrieve( STATEFILE ); + return($R); +} + + +# vim: ft=perl : ts=4 : sw=4 : et diff --git a/hosts/jenkins/etc/munin/plugins/jenkins_builds b/hosts/jenkins/etc/munin/plugins/jenkins_builds new file mode 100755 index 00000000..3010e393 --- /dev/null +++ b/hosts/jenkins/etc/munin/plugins/jenkins_builds @@ -0,0 +1,64 @@ +#!/bin/sh +# -*- sh -*- + +: << =cut + +=head1 NAME + +jenkins_builds - Plugin to measure number of jenkins builds + +=head1 AUTHOR + +Contributed by Holger Levsen + +=head1 LICENSE + +GPLv2 + +=head1 MAGIC MARKERS + + #%# family=auto + #%# capabilities=autoconf + +=cut + +. $MUNIN_LIBDIR/plugins/plugin.sh + +if [ "$1" = "autoconf" ]; then + echo yes + exit 0 +fi + +STATEFILE=$MUNIN_PLUGSTATE/$(basename $0) + +# delete statefile if it's older than ${jenkins_update_interval} set in /etc/munin/plugin-conf.d/jenkins +if test $(find $STATEFILE -mmin +${jenkins_update_interval}) ; then + rm -f $STATEFILE +fi + +if [ -f $STATEFILE ] && [ "$1" = "" ] ; then + cat $STATEFILE + exit 0 +fi + +JOB_PREFIXES=$(ls -1 /var/lib/jenkins/jobs/|cut -d "_" -f1|sort -f -u) +if [ "$1" = "config" ]; then + echo 'graph_title Jenkins Builds in the last 24h' + echo 'graph_args --base 1000 -l 0 ' + echo 'graph_scale no' + echo 'graph_total total' + echo 'graph_vlabel Jenkins Builds per category in the last 24h' + echo 'graph_category jenkins' + draw=AREA + for PREFIX in $JOB_PREFIXES ; do + echo "jenkins_builds_$PREFIX.label $PREFIX builds" + echo "jenkins_builds_$PREFIX.draw $draw" + if [ "$draw" = "AREA" ] ; then draw=STACK ; fi + done + exit 0 +fi + +for PREFIX in $JOB_PREFIXES ; do + NR=$(find /var/lib/jenkins/jobs/$PREFIX*/builds/ -type d -mtime -1 -name "*_*"| wc -l) + echo "jenkins_builds_$PREFIX.value $NR" | tee -a $STATEFILE +done diff --git a/hosts/jenkins/etc/munin/plugins/jenkins_builds_results b/hosts/jenkins/etc/munin/plugins/jenkins_builds_results new file mode 100755 index 00000000..596281ff --- /dev/null +++ b/hosts/jenkins/etc/munin/plugins/jenkins_builds_results @@ -0,0 +1,75 @@ +#!/bin/sh +# -*- sh -*- + +: << =cut + +=head1 NAME + +jenkins_builds_results - Plugin to measure number of jenkins builds + +=head1 AUTHOR + +Contributed by Holger Levsen + +=head1 LICENSE + +GPLv2 + +=head1 MAGIC MARKERS + + #%# family=auto + #%# capabilities=autoconf + +=cut + +. $MUNIN_LIBDIR/plugins/plugin.sh + +if [ "$1" = "autoconf" ]; then + echo yes + exit 0 +fi + +STATEFILE=$MUNIN_PLUGSTATE/$(basename $0) + +# delete statefile if it's older than ${jenkins_update_interval} set in /etc/munin/plugin-conf.d/jenkins +if test $(find $STATEFILE -mmin +${jenkins_update_interval}) ; then + rm -f $STATEFILE +fi + +if [ -f $STATEFILE ] && [ "$1" = "" ] ; then + cat $STATEFILE + exit 0 +fi + +JOB_PREFIXES=$(ls -1 /var/lib/jenkins/jobs/|cut -d "_" -f1|sort -f -u) +if [ "$1" = "config" ]; then + echo 'graph_title Jenkins Builds results' + echo 'graph_args --base 1000 -l 0 ' + echo 'graph_scale no' + echo 'graph_total total' + echo 'graph_vlabel Jenkins Builds results per category' + echo 'graph_category jenkins' + draw=AREA + for PREFIX in $JOB_PREFIXES ; do + for STATE in success unstable failure ; do + echo "jenkins_builds_results_${PREFIX}_${STATE}.label ${PREFIX} ${STATE}" + echo "jenkins_builds_results_${PREFIX}_${STATE}.draw $draw" + if [ "$draw" = "AREA" ] ; then draw=STACK ; fi + done + done + exit 0 +fi + +for PREFIX in $JOB_PREFIXES ; do + PREFIX_RESULTS=$(for i in /var/lib/jenkins/jobs/${PREFIX}*/builds/*_*/log ; do tail -1 $i 2>/dev/null; echo " \n"; done ) + for STATE in success unstable failure ; do + NR=0 + if [ "$STATE" = "failure" ] ; then + # count aborted as failed + NR=$(echo -e "$PREFIX_RESULTS" | egrep -i -c "($STATE|aborted)") + else + NR=$(echo -e "$PREFIX_RESULTS" | grep -i -c $STATE) + fi + echo "jenkins_builds_results_${PREFIX}_${STATE}.value $NR" | tee -a $STATEFILE + done +done diff --git a/hosts/jenkins/etc/munin/plugins/jenkins_builds_results_summary b/hosts/jenkins/etc/munin/plugins/jenkins_builds_results_summary new file mode 100755 index 00000000..b4f5b540 --- /dev/null +++ b/hosts/jenkins/etc/munin/plugins/jenkins_builds_results_summary @@ -0,0 +1,70 @@ +#!/bin/sh +# -*- sh -*- + +: << =cut + +=head1 NAME + +jenkins_builds_results_summary - Plugin to measure results of all jenkins builds + +=head1 AUTHOR + +Contributed by Holger Levsen + +=head1 LICENSE + +GPLv2 + +=head1 MAGIC MARKERS + + #%# family=auto + #%# capabilities=autoconf + +=cut + +. $MUNIN_LIBDIR/plugins/plugin.sh + +if [ "$1" = "autoconf" ]; then + echo yes + exit 0 +fi + +STATEFILE=$MUNIN_PLUGSTATE/$(basename $0) + +# delete statefile if it's older than ${jenkins_update_interval} set in /etc/munin/plugin-conf.d/jenkins +if test $(find $STATEFILE -mmin +${jenkins_update_interval}) ; then + rm -f $STATEFILE +fi + +if [ -f $STATEFILE ] && [ "$1" = "" ] ; then + cat $STATEFILE + exit 0 +fi + +if [ "$1" = "config" ]; then + echo 'graph_title Jenkins Builds results summary' + echo 'graph_args --base 1000 -l 0 ' + echo 'graph_scale no' + echo 'graph_total total' + echo 'graph_vlabel Jenkins Builds results summary' + echo 'graph_category jenkins' + draw=AREA + for STATE in success unstable failure ; do + echo "jenkins_builds_results_all_${STATE}.label ${PREFIX} ${STATE}" + echo "jenkins_builds_results_all_${STATE}.draw $draw" + if [ "$draw" = "AREA" ] ; then draw=STACK ; fi + done + exit 0 +fi + +RESULTS=$(for i in /var/lib/jenkins/jobs/*/builds/*_*/log ; do tail -1 $i 2>/dev/null; echo " \n"; done ) +for STATE in success unstable failure ; do + NR=0 + if [ "$STATE" = "failure" ] ; then + # count aborted as failed + NR=$(echo -e "$RESULTS" | egrep -i -c "($STATE|aborted)") + else + NR=$(echo -e "$RESULTS" | grep -i -c $STATE) + fi + echo "jenkins_builds_results_all_${STATE}.value $NR" | tee -a $STATEFILE +done diff --git a/hosts/jenkins/etc/munin/plugins/jenkins_builds_running b/hosts/jenkins/etc/munin/plugins/jenkins_builds_running new file mode 100755 index 00000000..24084f10 --- /dev/null +++ b/hosts/jenkins/etc/munin/plugins/jenkins_builds_running @@ -0,0 +1,43 @@ +#!/bin/sh +# -*- sh -*- + +: << =cut + +=head1 NAME + +jenkins_builds_running - Plugin to measure number of jenkins builds which are currently running + +=head1 AUTHOR + +Contributed by Holger Levsen + +=head1 LICENSE + +GPLv2 + +=head1 MAGIC MARKERS + + #%# family=auto + #%# capabilities=autoconf + +=cut + +. $MUNIN_LIBDIR/plugins/plugin.sh + +if [ "$1" = "autoconf" ]; then + echo yes + exit 0 +fi + +if [ "$1" = "config" ]; then + echo 'graph_title Jenkins Builds running' + echo 'graph_args --base 1000 -l 0 ' + echo 'graph_scale no' + echo 'graph_vlabel Jenkins Builds currently running' + echo 'graph_category jenkins' + echo "jenkins_builds_running.label $PREFIX builds_running" + echo "jenkins_builds_running.draw AREA" + exit 0 +fi + +echo "jenkins_builds_running.value $(ps fax|grep /tmp/hudson|grep -v grep|wc -l)" diff --git a/hosts/jenkins/etc/munin/plugins/jenkins_jobs b/hosts/jenkins/etc/munin/plugins/jenkins_jobs new file mode 100755 index 00000000..e7f8e219 --- /dev/null +++ b/hosts/jenkins/etc/munin/plugins/jenkins_jobs @@ -0,0 +1,64 @@ +#!/bin/sh +# -*- sh -*- + +: << =cut + +=head1 NAME + +jenkins_jobs - Plugin to measure number of jenkins jobs + +=head1 AUTHOR + +Contributed by Holger Levsen + +=head1 LICENSE + +GPLv2 + +=head1 MAGIC MARKERS + + #%# family=auto + #%# capabilities=autoconf + +=cut + +. $MUNIN_LIBDIR/plugins/plugin.sh + +if [ "$1" = "autoconf" ]; then + echo yes + exit 0 +fi + +STATEFILE=$MUNIN_PLUGSTATE/$(basename $0) + +# delete statefile if it's older than ${jenkins_update_interval} set in /etc/munin/plugin-conf.d/jenkins +if test $(find $STATEFILE -mmin +${jenkins_update_interval}) ; then + rm -f $STATEFILE +fi + +if [ -f $STATEFILE ] && [ "$1" = "" ] ; then + cat $STATEFILE + exit 0 +fi + +JOB_PREFIXES=$(ls -1 /var/lib/jenkins/jobs/|cut -d "_" -f1|sort -f -u) +if [ "$1" = "config" ]; then + echo 'graph_title Jenkins Jobs' + echo 'graph_args --base 1000 -l 0 ' + echo 'graph_scale no' + echo 'graph_total total' + echo 'graph_vlabel Jenkins Jobs per category' + echo 'graph_category jenkins' + draw=AREA + for PREFIX in $JOB_PREFIXES ; do + echo "jenkins_jobs_$PREFIX.label $PREFIX jobs" + echo "jenkins_jobs_$PREFIX.draw $draw" + if [ "$draw" = "AREA" ] ; then draw=STACK ; fi + done + exit 0 +fi + +for PREFIX in $JOB_PREFIXES ; do + NR=$(find /var/lib/jenkins/jobs/ -maxdepth 1 -name "$PREFIX*" -type d | wc -l) + echo "jenkins_jobs_$PREFIX.value $NR" | tee -a $STATEFILE +done diff --git a/hosts/jenkins/etc/munin/plugins/munin_stats b/hosts/jenkins/etc/munin/plugins/munin_stats new file mode 100755 index 00000000..8c7c82a2 --- /dev/null +++ b/hosts/jenkins/etc/munin/plugins/munin_stats @@ -0,0 +1,112 @@ +#!/usr/bin/perl +# -*- perl -*- +# Copyright (C) 2006-2009 Rodolphe Quiedeville +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; version 2 dated June, +# 1991. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# $Id$ +# +# Magic markers (used by munin-node-configure and some installation scripts): +#%# family=auto +#%# capabilities=autoconf + +use strict; +use warnings; + +use Munin::Plugin; + +my @logs = qw/update html limits/; +use Munin::Master::GraphOld; +my $conffile = "$Munin::Common::Defaults::MUNIN_CONFDIR/munin.conf"; +if (! graph_check_cron() ) { + push (@logs, "graph"); +} + +my $logdir = ($ENV{'logdir'} || $ENV{'MUNIN_LOGDIR'} || '/var/log/munin'); + +if ($ARGV[0] and $ARGV[0] eq 'autoconf') { + my $munin_update_location = + "$Munin::Common::Defaults::MUNIN_LIBDIR/munin-update"; + + if (! -e $munin_update_location) { + print "no (munin-update was not found at $munin_update_location)\n"; + exit 0; + } + + if (! -x $munin_update_location) { + print "no ($munin_update_location was found, but is not executable)\n"; + exit 0; + } + else { + print "yes\n"; + exit 0; + } +} + +if ($ARGV[0] and $ARGV[0] eq "config") { + print "graph_title Munin processing time\n"; + if (! graph_check_cron() ) { + print "graph_info This graph shows the run time of the four different processes making up a munin-master run. Munin-master is run from cron every 5 minutes and we want each of the programmes in munin-master to complete before the next instance starts. Especially munin-update and munin-graph are time consuming and their run time bears watching. If munin-update uses too long time to run please see the munin-update graph to determine which host is slowing it down. If munin-graph is running too slow you need to get clever (email the munin-users mailing list) unless you can buy a faster computer with better disks to run munin on.\n"; + } else { + print "graph_info This graph shows the run time of the thre different processes making up a munin-master run. Munin-master is run from cron every 5 minutes and we want each of the programmes in munin-master to complete before the next instance starts. Especially munin-update is time consuming and its run time bears watching. If munin-update uses too long time to run please see the munin-update graph to determine which host is slowing it down.\n"; + } + print "graph_args --base 1000 -l 0\n", + "graph_scale yes\n", + "graph_vlabel seconds\n", + "graph_category munin\n"; + foreach my $log (@logs) { + print "$log.label munin $log\n"; + print "$log.draw AREASTACK\n"; + } + print "update.warning 240\n"; + print "update.critical 285\n"; + if (! graph_check_cron() ) { + print "graph.warning 240\n"; + print "graph.critical 285\n"; + } + exit 0; +} + +my %positions = restore_state(); +my %times; + +foreach my $log (@logs) { + my $logfile = "$logdir/munin-$log.log"; + my $time = 'U'; + + if (! -r $logfile) { + print "$log.extinfo Can't open $logfile for reading\n"; + print "$log.value $time\n"; + next; + } + + if (exists $positions{$log}) { + my ($LOGFILE, undef) = tail_open($logfile, $positions{$log}); + while (<$LOGFILE>) { + $time = $1 if (/finished \((\d+\.\d+)\ssec\)$/); + } + $positions{$log} = tail_close($LOGFILE); + } + else { + # Do nothing on first run except find the current file end. + $positions{$log} = (stat $logfile)[7]; + } + + print "$log.value $time\n"; +} + +save_state(%positions); + +# vim: ft=perl : ts=4 : expandtab -- cgit v1.2.3-70-g09d2