#! /usr/bin/perl -w # bacula-du 1.0 # Written by Kjetil Torgrim Homme # Released under GPLv3 or the same terms as Bacula itself sub usage { print <<"_END_"; Usage: $0 [OPTIONS] -j JOBID Summarize disk usage of directories included in the backup JOBID Options are: -a, --all write counts for all files, not just directories -b, --bytes use size in octets rather than number of blocks -B, --block-size=SIZE report SIZE-byte blocks (default 1Ki) -m like --block-size=1Mi -S, --separate-dirs do not include size of subdirectories -t, --threshold=SIZE skip output for files or directories with usage below SIZE -L, --largest=NUM only print NUM largest directories/files SIZE may be (or may be an integer optionally followed by) one of following: k (1000), Ki (1024), M (1000*1000), Mi (1024*1024), G, Gi, T, Ti, P, Pi. _END_ exit(64); } use strict; use DBD::mysql; use DBI; use MIME::Base64; use Getopt::Long qw(:config bundling no_ignore_case); use Data::Dumper; my $dbhost = "localhost"; my $db = "bacula"; my $dsn = "DBI:mysql:database=$db;mysql_read_default_group=clientp"; my $dbuser = $db; my $dbpass = undef; ####################### my $i = 0; my %base64 = map { $_ => $i++ } split("", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"); sub decode_bacula_base64 { my $acc = 0; for (split("", $_[0])) { $acc <<= 6; $acc += $base64{$_}; } return $acc; } sub extract_size_from_lstat { return decode_bacula_base64((split(" ", shift))[7]); } sub extract_blocks_from_lstat { return 512 * decode_bacula_base64((split(" ", shift))[9]); } sub convert_units { my $num = shift; my %units = ("k" => 1000**1, "Ki" => 1024**1, "ki" => 1024**1, "M" => 1000**2, "Mi" => 1024**2, "G" => 1000**3, "Gi" => 1024**3, "T" => 1000**4, "Ti" => 1024**4, "P" => 1000**5, "Pi" => 1024**5); if ($num =~ /^(\d*)([kKMGTP]i?)B?$/) { $num = ($1 ? $1 : 1) * $units{$2}; } elsif ($num !~ /^\d+$/) { die "Can't parse: $num\n"; } return $num; } ### main program resumes my $threshold = 1; # omit 0 octet sized files/directories by default my $blocksize = 1024; my ($jobid, $all, $bytes, $separate_dirs, $largest); GetOptions("jobid|j=i" => \$jobid, "threshold|t=s" => \$threshold, "separate-dirs|S" => \$separate_dirs, "all|a" => \$all, "bytes|b" => \$bytes, "block-size|B=s" => \$blocksize, "largest|L=i" => \$largest, "m" => sub { $blocksize = "1Mi" }, ) || usage(); usage() unless $jobid; $threshold = convert_units($threshold); $blocksize = convert_units($blocksize); my @padding = ("", "A==", "==", "="); sub extract_size_from_lstat_foo { my ($b64) = (split(" ", shift))[7]; my $acc = 0; for (split("", decode_base64($b64 . $padding[length($b64) % 4]))) { $acc <<= 8; $acc += ord($_); } return $acc; } my $extract_size = $bytes ? \&extract_size_from_lstat : \&extract_blocks_from_lstat; my $dbh; unless ($dbh = DBI->connect($dsn, $dbuser, $dbpass, {AutoCommit => 0})) { print STDERR "Could not connect to database $db on host $dbhost\n"; exit 2; } print STDERR "DB connect \n"; my $sth = $dbh->prepare(" SELECT p.Path, fn.Name, LStat FROM Path p JOIN File f ON f.PathId = p.PathId JOIN Filename fn ON f.FilenameId = fn.FilenameId WHERE f.JobId = $jobid"); $sth->execute(); print STDERR "DB prepare \n"; my %du; my $rowcount = 0; while (my ($path, $fname, $lstat) = $sth->fetchrow_array) { my $size = $extract_size->($lstat); print STDERR "Got '$path' size $size\n"; $du{"$path$fname"} += $size if $all; $du{$path} += $size; next if $separate_dirs; while ($path ne '/') { $path =~ s,[^/]+/$,,; $du{$path} += $size; } if ((++$rowcount % 1000) == 0) { print STDERR "got $rowcount rows\r"; } } $dbh->disconnect(); print STDERR "done reading database.\n"; if ($largest) { my @sizes = sort { $a <=> $b } values %du; my $cutoff = $largest < @sizes ? $sizes[-$largest] : 0; $threshold = $cutoff unless ($threshold && $threshold > $cutoff); } # We add ~ to the filename so that the parent directory is printed # below the children. ('~' could be any character which sorts after # '/') for my $path (sort { "$a~" cmp "$b~" } keys %du) { next if $du{$path} < $threshold; printf("%9d %s\n", ($du{$path} + $blocksize - 1) / $blocksize, $path); }