Automatic digest processor <LISTSERV AT VM.MARIST DOT EDU> writes:
>Date: Mon, 12 May 1997 11:37:44 -0400
>From: "Pittson, Timothy ,HiServ/NA" <tpittson AT HIMAIL.HCC DOT COM>
>Subject: Re: ADSM Recovery Log filling up
>
>Chuck,
> I don't think it's entirely fixed - I'm running ADSM version 2.1.5.12
>and just experienced this problem last week...
>
>Tim Pittson
>tpittson AT himail.hcc DOT com
>
This is very interesting. Have you contacted ADSM support on this and openned
a PMR for this? If so could I get the PMR Number. One of the things I did
here
to check for this 'checkpoint' problem was to write a script that would check
for this condition, (in addition check to make sure that I could get a
session).
I will include the script at the bottom incase anyone is intrested in it. It
uses a program call gscpage to do the paging. You can use the paging program
of your choice.
Chuck T.
#!/afs/prism/@sys/bin/perl5
#
# Description:
# This scripts checks to see if it can get an admin session with the server.
# If it can't a page is sent out to the who2page list. It also checks the
# Check Point Log size to make sure it does not get too big.
#
# Dependencies:
# - The sysop password is pulled from the /home/root/.passwd.adsm file.
#
# USAGE: ckdsm.session.pl [<server>]
# CRONTAB Entry:
# 0,30 * * * * /home/root/bin/ckdsm.session.pl >/dev/null# Check ADSM session.
#
$threshold=500000; # Threshold for when to send page alert.
$alarm_time=300; # this is 5 mins.
$alarm_pid=0; # used to set the child process id.
$Mail_to="root bob\@host.ibm.com"; # who to send mail notifications to.
$who2page_file="/home/root/who2page.txt"; # File with a list of ids to use to
# for paging.
$ADMIN="sysop"; # ADSM Admin Account to use.
#Get Password for the ADSM Admin Account.
open(PWD,"/home/root/.passwd.adsm");chomp($APWD=<PWD>);close(PWD);
if ($ARGV[0]){$SERVER="-se=$ARGV[0]";print "SERVER = $SERVER","\n";}
# Check to see if the TCP/IP/shared memory interface is up by using the
# dsmadmc command. If it fails retry $loop_limit times before returning
# with a non 0 return code ($rc).
$loop_limit=3;
$loop_count=0;
$rc=999;
while ( $loop_count < $loop_limit && $rc ne 0 ){
&alarm_on; # Set an alarm to go off in 5 mins
# Check to see if the TCP/IP/shared memory interface is up.
$out=`/usr/bin/dsmadmc -id=$ADMIN -password=$APWD $SERVER show dbv `;
$rc=$?;
&alarm_off; # Turn the alarm off.
if ( $rc ne 0 ){
print "Sleeping for a while and will try again.\n";
sleep 30; # Wait 30 seconds and try again.....
}
++$loop_count; # increment loop counter.
}
# Check Session access.
if ( $rc ne 0 ){
# send page about not being able to get a session.
$line="Unable to get Admin Session.";
print $line,"\n";
&sendpage;
&sendmail;
exit;
}
# While I have the data let's check the Checkpoints
# Get the CheckPoint Log size
if ( $out =~ /CkptLogBytes=(\w+)/ ) {$ckptlogbyte=$1;}
# Get the Interval that CkPt should happen
if ( $out =~ /CkptInterval=(\w+)/ ) {$ckptinterval=$1;}
if ($ckptlogbyte > $ckptinterval ) {
# send mail if CkPt is above the Interval value. A CkPt should have already
# happened or is about to happen.
$line="Ckpt Log Byte ($ckptlogbyte) > Ckpt Interval ($ckptinterval)";
&sendmail;
}
if ( $ckptlogbyte > $threshold && $ckptlogbyte > $ckptinterval ){
# If the CkPt Log is bigger then the threshold we have big trouble. PAGE
# someone and let them know.
$line="Database Checkpoint Log is too big: $ckptlogbyte";
&sendpage; # Send out a page of failure.
}
print "CkptLogBytes = $ckptlogbyte \nCkptInterval = $ckptinterval \n";
exit;
# End of main prg.
sub sendpage {
# send pages to list of people in the who2page_file. The text of the
# page is passed in with the $line variable.
open(PAGE, $who2page_file); # open the who2pag file and read in the list
while ( <PAGE> ){ # of gsc ids and push them on a stack.
chomp;
push(@who2page, $_);
}
close(PAGE);
# page each person on the stack, poping off that id until the stack is
# empty.
while ( $i=pop(@who2page) ){
# Send out a page of failure.
`gscpage $i \"$hostname $line\"`;
}
print $line,"\n";
print $out;
}
sub sendmail {
# send mail to let someone know what happened.
# $Mail_to, $line, $out are passed in.
open(MAIL,"|sendmail -t -f adsm-admin ");
print MAIL "To: $Mail_to\n";
print MAIL "From: adsm-admin\n";
print MAIL "Reply-To: nobody\@austin.ibm.com\n";
print MAIL "Subject: $HOST: $line \n\n";
print MAIL $out,"\n";
close(MAIL);
}
sub alarm_on {
# the idea here is to turn the alarm on, then run the command. If the
# alarm is not turned off after the command is ran then the command
# never finished or it "timed out". A page will be sent to say as much.
# create a child process to be the alarm
if (!( $child = fork) ) {
print "Alarm set : $child\n";
# here in the child process the work is done.
sleep $alarm_time;
$line="dsmadmc command timed out";
&sendpage ;
exit;
} # end of alarm process.
$alarm_pid=$child;
}
sub alarm_off {
# go kill the child process before the alarm goes off.
kill 9, $alarm_pid;
print "Alarm truned off : $alarm_pid\n";
}
# End of perl script.
|