#!/usr/bin/perl -w
# -*- cperl -*-
=head1 NAME

iostat_ios - Show IO-operation latency pr. device.

=head1 APPLICABLE SYSTEMS

Any Linux system

=head1 CONFIGURATION

None needed

=head1 USAGE

Link this into /etc/munin/plugins/ and restart the munin-node.

=head1 INTERPRETATION

The plugin shows the average time a IO-operation needs to complete for
each disk or block device on the system.

Simple partitioned disks will only show as the whole disk.  When/if
you use some device layer over that, such as LVM then LV devices will
show up individualy.

When the IO-operation time here increases it is to be expected that
the iowait on the CPU graph increases, but please read about how
iowait reporting works to fully understand that number.

=head1 MAGIC MARKERS

  #%# family=legacy
  #%# capabilities=autoconf

=head1 VERSION

  $Id$

=head1 BUGS

Should show more useful device names/labels for device-mapper devices,
such as those used by LVM and so on.

=head1 AUTHOR

(C) 2004 Per Buer.  Documentation and some modifications by Nicolai
Langfeldt.

=head1 LICENSE

GPLv2

=cut

use strict;
use warnings;

use IO::File;
use Storable qw(store retrieve);
use Munin::Plugin;

use constant STATEFILE => "$ENV{MUNIN_PLUGSTATE}/iostat-ios.state";


if (defined($ARGV[0]) and $ARGV[0] eq 'autoconf') {
    if (-r "/proc/diskstats" or -r "/proc/partitions") {
        print "yes\n";
        exit 0;
    } else {
        print "no (no /proc/diskstats or /proc/partitions)\n";
        exit 0;
    }
}

if (defined($ARGV[0]) and $ARGV[0] eq 'config') {
    print_config();
    exit;
}

my ($r, $old_r);
my %name;

$r = get_ios();

($old_r) = get_state();

if ($old_r) {
    cmp_io($old_r, $r);
} else {
    warn "iostat_ios: No historic data present\n";
}

store_state( $r );

sub filter {
    my ($major, $minor, $tmpnam) = @_;
    if(defined($major)) {
        return 0 if ($major ==   1); # RAM devices
        return 0 if ($major ==   9); # MD devices
        return 0 if ($major ==  58); # LVM devices
        return 0 if ($major == 253); # LVM2 devices
    }
    if(defined($tmpnam)) {
        return 0 if ($tmpnam =~ /part\d+$/);
        return 0 if ($tmpnam =~ /^\s*(?:sd|hd|vd)[a-z]\d+\s*$/);
    }

    return 1;
}


sub get_ios {
    my ($opt) = @_;

    my %R;
    my ($parts, $kernel);
    my @dev;

    if (-r "/proc/diskstats") {
        $kernel = 2.6;
        $parts = new IO::File("/proc/diskstats") || die();
    } else {
        $kernel = 2.4;
        $parts = new IO::File("/proc/partitions");
        die("kernel $kernel not supported yet\n");
    }

    unless ($parts) {
        print "Could not gather statistics\n";
        return undef;
    }

    while (<$parts>) {
         my ($maj, $min, $name, $rio, $rtime, $wio, $wtime) =
           (split(/\s+/, $_ ))[1,2,3,4,7,8,11];

         next unless (defined($min) && defined($maj));
         next unless ($wio and $rio and $rtime and $wtime);

         next if (filter($maj, $min, $name) == 0);

         $R{$maj}{$min}{rio}   = $rio;
         $R{$maj}{$min}{rtime} = $rtime;

         $R{$maj}{$min}{wio}   = $wio;
         $R{$maj}{$min}{wtime} = $wtime;

         my $label = "dev${maj}_${min}";

         push(@dev, $label);

        $name{$label} =  $name;
    }
    $parts->close();

    if ($opt) {
        return \@dev;
    } else {
        return \%R;
    }
}


sub print_config {
    print("graph_title IO Service time\n",
          "graph_args --base 1000 --logarithmic\n",
          "graph_category disk\n",
          "graph_vlabel seconds\n",
          "graph_info For each applicable disk device this plugin shows the latency (or delay) for I/O operations on that disk device.  The delay is in part made up of waiting for the disk to flush the data, and if data arrives at the disk faster than it can read or write it then the delay time will include the time needed for waiting in the queue.\n");

    for my $d ( @{ get_ios(1) } ) {
        print "${d}_rtime.label ",$name{$d}," read\n",
              "${d}_rtime.type GAUGE\n",
              "${d}_rtime.cdef ${d}_rtime,1000,/\n",
              "${d}_wtime.label ",$name{$d}," write\n",
              "${d}_wtime.type GAUGE\n",
              "${d}_wtime.cdef ${d}_wtime,1000,/\n";
        print_thresholds($d);
    }
}


sub cmp_io {
    my ($old_io, $new_io) = @_;

    for my $maj (sort keys %{$new_io} ) {
        for my $min (sort keys %{ $new_io->{$maj} } ) {
            my $rio_diff   = $$new_io{$maj}{$min}{rio}   - $$old_io{$maj}{$min}{rio};
            my $rtime_diff = $$new_io{$maj}{$min}{rtime} - $$old_io{$maj}{$min}{rtime};

            my $wio_diff   = $$new_io{$maj}{$min}{wio}   - $$old_io{$maj}{$min}{wio};
            my $wtime_diff = $$new_io{$maj}{$min}{wtime} - $$old_io{$maj}{$min}{wtime};

            my $dev = "dev${maj}_${min}";

            print "${dev}_rtime.value ", ($rio_diff > 0 and $rtime_diff > 0) ? ($rtime_diff / $rio_diff) : 'U', "\n",
                  "${dev}_wtime.value ", ($wio_diff > 0 and $wtime_diff > 0) ? ($wtime_diff / $wio_diff) : 'U', "\n";
        }
    }

}


sub store_state {
    my ($R) = @_;
    store($R, STATEFILE);
}


sub get_state {
    my ($R);
    return(undef) unless ( -r STATEFILE);
    $R = retrieve( STATEFILE );
    return($R);
}


# vim: ft=perl : ts=4 : sw=4 : et