Veritas-bu

[Veritas-bu] Splitting large jobs

2005-03-04 13:50:16
Subject: [Veritas-bu] Splitting large jobs
From: hskelly1 AT san.rr DOT com (Hal Skelly)
Date: Fri, 4 Mar 2005 10:50:16 -0800
This is a multi-part message in MIME format.

------=_NextPart_000_0070_01C520A7.F3561760
Content-Type: text/plain;
        charset="US-ASCII"
Content-Transfer-Encoding: 7bit

I started work on this script a while ago and haven't released it into
the wild yet  (I believe this was published in Sysadmin magazine a while
ago).  Thus I'm not going to guarantee it.  BUT, if you are familiar
with PERL, this should get you along the way to scripting how to split a
large file system.

Hal Skelly


-----Original Message-----
From: veritas-bu-admin AT mailman.eng.auburn DOT edu
[mailto:veritas-bu-admin AT mailman.eng.auburn DOT edu] On Behalf Of Thomas
Stewart
Sent: Friday, March 04, 2005 4:35 AM
To: veritas-bu AT mailman.eng.auburn DOT edu
Subject: [Veritas-bu] Splitting large jobs


Hi,
I have a 1TB volume mounted on /big, it mostly contains the users
mydocs.

At the moment I backup the box nightly to a 6 tape library using the 
ALL_LOCAL_DRIVES directive, which works fine. However it is getting to
the 
stage where the job that does /big is taking to long.

Is there any way to automatically split the job up, so that it can make
use of 
the other tape drives in the library that are idle?

So far my only thought is to split it manually by changing the backup 
selections to something like:
/
NEW_STREAM
/big/mydocs/[0-9A-Za-m]*
NEW_STREAM
/big/mydocs/[n-z]*
NEW_STREAM

However that then leaves out /big/*

Any thoughts?

Regards
-- 
Tom
_______________________________________________
Veritas-bu maillist  -  Veritas-bu AT mailman.eng.auburn DOT edu
http://mailman.eng.auburn.edu/mailman/listinfo/veritas-bu

------=_NextPart_000_0070_01C520A7.F3561760
Content-Type: application/octet-stream;
        name="chopit.pl"
Content-Transfer-Encoding: quoted-printable
Content-Disposition: attachment;
        filename="chopit.pl"

#!/bin/perl=0A=
=0A=
# Copyright 2001, Harold F. Skelly jr.=0A=
=0A=
# To be done, =0A=
#       - format the output to create a NetBackup includes file=0A=
#       - check that the number of streams created is <=3D number of =
streams=0A=
#         requested=0A=
=0A=
# Add proper option parsing for -s chunksize -d startting dir=0A=
=0A=
use File::Find;=0A=
use Getopt::Std;=0A=
=0A=
$i=3D$j=3D0;=0A=
=0A=
getopt('cdv');=0A=
# -c chunksize=0A=
# -d root=0A=
# -v verbose=0A=
=0A=
$USAGE =3D "depth2 -c streamCount -d rootdir [-v]erbose \nWhere:\=0A=
    streamcount is the number of streams to divide the filessystem into =
and\=0A=
    rootdir is the mountpoint that must be split up.\n";=0A=
=0A=
if (! $opt_c)  {print $USAGE; exit 1;}=0A=
else {$streamtotal =3D $opt_c;}=0A=
=0A=
if ($opt_d) {$starting =3D $opt_d;}=0A=
else {$starting=3D'.';}=0A=
=0A=
=0A=
if ($opt_h or $opt_x) {print $USAGE;}=0A=
=0A=
# See perl module FILE ( pp 439,440 of 2nd Edition Programming Perl=0A=
finddepth(\&wanted, $starting);=0A=
=0A=
print "Total size to divide up is $dirs{$starting}\n\n";=0A=
=0A=
# Set the chunk size to be the total size of the directory tree =
(basically=0A=
# dirs{$starting} DIVIDED by the number of streams, $streamtotal=0A=
=0A=
$chunksize =3D $dirs{$starting} / $streamtotal;=0A=
=0A=
$streamct=3D1;=0A=
=0A=
buildstreams( $starting);=0A=
=0A=
&printstreams;=0A=
=0A=
exit;=0A=
=0A=
=0A=
#=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=0A=
=0A=
sub wanted {=0A=
        #Get current name of directory/file, w/o path=0A=
              # Note that this is the proto-ized function needed by =
'finddepth' =0A=
              # which is a depth first search =0A=
        $filename =3D $_;=0A=
=0A=
        #Get current directory name=0A=
        my $currdir =3D $File::Find::dir;=0A=
=0A=
        #Get full pathname of currdir and file=0A=
        my $full =3D $File::Find::name;=0A=
=0A=
        #If the current name is a directory=0A=
        if (-d $full) { =0A=
                #Add size of it's own directory entry to this directory=0A=
                $dirs{$full}+=3D (stat $full)[7];=0A=
                    # we only need to skip when looking at 'myself'=0A=
                    # otherwise we end up doubling the total size=0A=
                return if ( $full eq $starting );=0A=
=0A=
                #Add size of contents of this subdir to this directory=0A=
                $dirs{$currdir}+=3D $dirs{$full};=0A=
        }=0A=
=0A=
        #If not a directory, add its size to directory total=0A=
        else { =0A=
                $dirs{$currdir}+=3D -s $full ;=0A=
                }=0A=
=0A=
}=0A=
=0A=
=0A=
#=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=0A=
=0A=
sub bydepth {=0A=
        #find the one with the larger number of path elements=0A=
        split($SEP,$a) <=3D> split($SEP, $b);=0A=
}=0A=
=0A=
#=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=0A=
=0A=
sub printstreams {=0A=
        # print out streams and chunk sizes.  Ea. STREAM is a comma =
separated list=0A=
        #  of directories=0A=
        open (RSLTS,"> ./includes") || die "can open ./includes\n";=0A=
        print "Created the following streams of less than $chunksize =
size\n";=0A=
        for ($k=3D0; $k<$streamct; $k++)   {=0A=
                $STREAMS[$k] =3D~ s/,/\n/g;=0A=
                printf RSLTS "\nNEW_STREAM\n";=0A=
                print "\nNEW_STREAM\n" if $opt_v;=0A=
                printf RSLTS "$STREAMS[$k]\n";=0A=
                print "$STREAMS[$k]\n" if $opt_v;=0A=
                $STREAMS[$k] =3D~ s/\n/,/g;=0A=
                $sz =3D strmsize($STREAMS[$k]);=0A=
                print "\tSIZE=3D$sz\n" if $opt_v;=0A=
                $grandsum+=3D$sz;=0A=
                }=0A=
        close RSLTS;=0A=
=0A=
        print "\nThe Grand total of all streams is $grandsum\n" if opt_v;=0A=
}=0A=
=0A=
=0A=
=0A=
#=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=0A=
=0A=
sub buildstreams {=0A=
        #  take a directory and return set of streams composed of =0A=
        #  various subdirs. and files =0A=
=0A=
        #  look at ea. directory from the top down.  If the directory =
size =0A=
        #  is LESS than chunksize, then include this (and thus all =
subdirs =0A=
        #  and files) in the # backup stream.  If if is too large =
though, =0A=
        #  then loop through all of the subdirs. down one level.=0A=
=0A=
        my $indir =3D @_[0];=0A=
        my $i, $elem, $sz;=0A=
        my @allelems;=0A=
        my $streamlist;=0A=
        =0A=
        =0A=
        # check size of current dirname to see if it will fit in any =
existing =0A=
        # stream and add it if so.=0A=
        for ($i=3D0; $i<$streamct; $i++) {=0A=
                if ( (strmsize($STREAMS[$i]) + $dirs{$indir}) < =
$chunksize ) {=0A=
                        $streamlist =3D $STREAMS[$i];=0A=
                        #$streamlist =3D $streamlist.",". =
$indir.$dirs{$indir};=0A=
                        $streamlist =3D $streamlist.",". $indir;=0A=
                        $STREAMS[$i] =3D $streamlist;=0A=
                        return;=0A=
                }=0A=
        }=0A=
        =0A=
        # We didn't find an existing stream large enough so either create=0A=
        # a new stream (if it will fit in one) or descend to new subdirs.=0A=
        if ( $dirs{$indir} < $chunksize ) {=0A=
                # Note here that $streamct is 1 greater than the index =
of the=0A=
                # the last element of $STREAMS=0A=
                $STREAMS[$streamct++] =3D $indir;=0A=
                return;=0A=
        }=0A=
=0A=
        else {          =0A=
            #go down one level using opendir and readdir till done=0A=
            opendir THISDIR, $indir or die "couldn't open $indir to =
recurse\n";=0A=
            # get rid of . and .. and make all full path names =0A=
            @allelems =3D map "$indir/$_", grep !/^\.\.?$/,  readdir =
THISDIR;=0A=
            close THISDIR;=0A=
=0A=
            # run the following loop twice to look a subdirs first then =
files=0A=
            # second recursing on directories.=0A=
            foreach $elem (@allelems) {=0A=
                next if (-f $elem);=0A=
                        # else we recurse on each subdir=0A=
                        if  ( -d $elem ) {buildstreams($elem);}=0A=
                        }=0A=
            ELEM:=0A=
            foreach $elem (@allelems) {=0A=
                next if (-d $elem);    #we've already streamified dirs, =
right?=0A=
                $sz =3D -s $elem;=0A=
                if  ( -f $elem  || -l $elem) {=0A=
                    # add to a stream if it will fit, else build a new =
stream=0A=
                        for ($i=3D0; $i < $streamct; $i++) {=0A=
                            if ( (strmsize($STREAMS[$i]) + $sz) < =
$chunksize) {=0A=
                                    $streamlist =3D $STREAMS[$i];=0A=
                                    #$streamlist =3D $elem.$sz.",". =
$streamlist;=0A=
                                    $streamlist =3D $streamlist.",". =
$elem;=0A=
                                    $STREAMS[$i] =3D $streamlist;=0A=
                                    next ELEM;    #we've placed in a =
stream=0A=
                            }=0A=
                        }=0A=
                        if ( $sz < $chunksize) {=0A=
                                $STREAMS[$streamct++] =3D $elem;=0A=
                                next ELEM;=0A=
                        }=0A=
        else { die "single file $elem greater than $chunksize \=0A=
             (1/$streamtotal of the size of the root directory)\n";=0A=
                        }=0A=
                }=0A=
            }=0A=
        }=0A=
}=0A=
=0A=
#=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=0A=
=0A=
sub strmsize {=0A=
        # get a CSV string of dir names and add up the sizes=0A=
        my $sum;=0A=
        my $indir =3D @_[0];=0A=
        my @list;=0A=
        @list =3D split ',', $indir;=0A=
        foreach $elm (@list) { =0A=
                if ( -f $elm ) { $sum+=3D -s $elm }=0A=
                else {$sum+=3D$dirs{$elm} } =0A=
        }=0A=
        return $sum;=0A=
}=0A=
=0A=

------=_NextPart_000_0070_01C520A7.F3561760--


<Prev in Thread] Current Thread [Next in Thread>