Amanda-Users

Stranded on waitq failure (planner: Message too long)

2008-10-31 10:59:29
Subject: Stranded on waitq failure (planner: Message too long)
From: Leon Meßner <l.messner AT physik.tu-berlin DOT de>
To: amanda-users AT amanda DOT org
Date: Fri, 31 Oct 2008 15:01:17 +0100
Hi,
i get the following message with Amanda-2.5.1p3 on FreeBSD7.0: 

        FAILED [hmm, disk was stranded on waitq]

planner: ERROR Request to myhostandserver failed: error sending REQ:
send REQ to myhostandserver failed: Message too long

Machine myhostandserver is both amanda-client and -server tar'ing
to nfs-mounted vtapes.
It started after an issue with the nfs vtape storage which was unavailable 
when the backup cronjob fired. Since then i forced a lvl0 dump (which ended
succesfully), made incrementals on the next run (same error again), then
deleted the corresponding entries in amandates and the gnutar-lists [1].
Nothing helped so far. 

Below i pasted part of the amdump.1, planner.`date`.debug, amanda.conf
If you need more Information im happy to supply it. 
TIA
leon

[1]: http://www.mail-archive.com/amanda-users AT amanda DOT org/msg37479.html


FAILURE AND STRANGE DUMP SUMMARY:
myhostandserver  /export/home         lev 0  FAILED [hmm,
disk was stranded on waitq]
myhostandserver  /export/home/z       lev 0  FAILED [hmm,
disk was stranded on waitq]

<more dle's>

myhostandserver  /export/home/a       lev 0  FAILED [hmm,
disk was stranded on waitq]
  planner: ERROR Request to myhostandserver failed: error
sending REQ: send REQ to myhostandserver failed: Message
too long


%cat planner.20081031000002.debug
planner: debug 1 pid 71262 ruid 10 euid 0: start at Fri Oct 31 00:00:02
2008
planner: debug 1 pid 71262 ruid 10 euid 10: rename at Fri Oct 31
00:00:02 2008
security_getdriver(name=BSD) returns 0x800997040
security_handleinit(handle=0x801048080, driver=0x800997040 (BSD))
planner: time 0.194: bind_portrange2: Try  port 720: Available   -
Success
planner: time 0.194: dgram_bind: socket bound to 0.0.0.0.720
planner: dgram_send_addr(addr=0x7fffffffbc90, dgram=0x80099bca8)
planner: time 0.197: (sockaddr_in *)0x7fffffffbc90 = { 2, 10080,
myip }
planner: dgram_send_addr: 0x80099bca8->socket = 3
planner: time 0.495: dgram_recv(dgram=0x80099bca8, timeout=0,
fromaddr=0x8009abca0)
planner: time 0.495: (sockaddr_in *)0x8009abca0 = { 2, 10080,
myip }
planner: dgram_send_addr(addr=0x7fffffffd290, dgram=0x80099bca8)
planner: time 30.826: (sockaddr_in *)0x7fffffffd290 = { 2, 10080,
myip }
planner: dgram_send_addr: 0x80099bca8->socket = 3
planner: time 30.829: dgram_recv(dgram=0x80099bca8, timeout=0,
fromaddr=0x8009abca0)
planner: time 30.829: (sockaddr_in *)0x8009abca0 = { 2, 10080,
myip }
planner: time 30.831: dgram_recv(dgram=0x80099bca8, timeout=0,
fromaddr=0x8009abca0)
planner: time 30.831: (sockaddr_in *)0x8009abca0 = { 2, 10080,
myip }
planner: dgram_send_addr(addr=0x7fffffffd220, dgram=0x80099bca8)
planner: time 30.831: (sockaddr_in *)0x7fffffffd220 = { 2, 10080,
myip }
planner: dgram_send_addr: 0x80099bca8->socket = 3
security_getdriver(name=BSD) returns 0x800997040
security_handleinit(handle=0x801048120, driver=0x800997040 (BSD))
planner: dgram_send_addr(addr=0x7fffffffb8d0, dgram=0x80099bca8)
planner: time 30.834: (sockaddr_in *)0x7fffffffb8d0 = { 2, 10080,
myip }
planner: dgram_send_addr: 0x80099bca8->socket = 3
planner: time 30.834: dgram_send_addr: sendto(myip.10080)
failed: Message too long
security_seterror(handle=0x801048120, driver=0x800997040 (BSD)
error=send REQ to myhostandserver failed: Message too
long)
security_seterror(handle=0x801048120, driver=0x800997040 (BSD)
error=error sending REQ: send REQ to myhostandserver
failed: Message too long)
security_close(handle=0x801048120, driver=0x800997040 (BSD))
security_close(handle=0x801048080, driver=0x800997040 (BSD))
planner: time 30.836: pid 71262 finish time Fri Oct 31 00:00:33 2008

org      "Home"        # your organization name for reports
dumpcycle 2 weeks       # the number of days in the normal dump cycle
runspercycle 14         # the number of amdump runs in dumpcycle days
                        # (4 weeks * 5 amdump runs per week -- just
                        # weekdays)
tapecycle 30 tapes      # the number of tapes in rotation
                        # 4 weeks (dumpcycle) times 5 tapes per week
                        # (just
                        # the weekdays) plus a few to handle errors that
                        # need amflush and so we do not overwrite the
                        # full
                        # backups performed at the beginning of the
                        # previous
                        # cycle

displayunit "M"         # Possible values: "k|m|g|t"
                        # Default: k.
                        # The unit used to print many numbers.
                        # k=kilo, m=mega, g=giga, t=tera


runtapes 3                      # number of tapes to be used in a single
run of amdump

tapedev "file:/mnt/amstore/dc_home"      # the no-rewind tape device to
be used

changerfile "/usr/local/etc/amanda/Home/changer"

mailto   "mymail"

# If you want Amanda to automatically label any non-Amanda tapes it
# encounters, uncomment the line below. Note that this will ERASE any
# non-Amanda tapes you may have, and may also ERASE any near-failing
# tapes.
# Use with caution.
label_new_tapes "Home-%%"

labelstr "^Home-[0-9][0-9]*$"  # label constraint regex: all tapes must
match

# Note that, although the keyword below is infofile, it is only so for
# historic reasons, since now it is supposed to be a directory (unless
# you have selected some database format other than the `text' default)
infofile "/var/log/amanda/home/curinfo"  # database DIRECTORY
logdir   "/var/log/amanda/home"          # log directory
indexdir "/var/log/amanda/home/index"            # index directory

# tapelist is stored, by default, in the directory that contains
# amanda.conf

includefile "/usr/local/etc/amanda/global2.conf"

%cat global2.conf
dumpuser "amanda"       # the user to run dumps under
inparallel 4            # maximum dumpers that will run in parallel (max
63)
dumporder "sssS"
netusage 10000000 Kbps      # maximum net bandwidth for Amanda, in KB
per sec
bumpsize 20 Mb          # minimum savings (threshold) to bump level 1 ->
2
bumppercent 20          # minimum savings (threshold) to bump level 1 ->
2
bumpdays 1              # minimum days at each level
bumpmult 4              # threshold = bumpsize * bumpmult^(level-1)
#etimeout 60             # number of seconds per filesystem for
estimates.
dtimeout 10800          # number of idle seconds before a dump is
aborted.
ctimeout 30             # maximum number of seconds that amcheck waits
tapebufs 20             # A positive integer telling taper how many
                        # 32k buffers to allocate.  The default is 20
                        # (640k).
usetimestamps yes
tpchanger "chg-disk"            # the tape-changer glue script
tapetype NAS            # what kind of tape it is (see tapetypes below)
amrecover_do_fsf yes            # amrecover will call amrestore with the
                                # -f flag for faster positioning of the
                                # tape.
amrecover_check_label yes       # amrecover will call amrestore with the
                                # -l flag to check the label.
define tapetype NAS {
        comment "Dump to nfs-disk"
        length 402400 mbytes
}

define dumptype global {
    comment "Global definitions"
    index yes
    # record no
    # split_diskbuffer "/raid/amanda"
    # fallback_splitsize 64m
}

define dumptype ini {
   global
   program "GNUTAR"
   compress client fast
   estimate server
   #exclude list "/exclude"
}

define dumptype server-ossl {
   global
   program "GNUTAR"
   comment "server symmetric encryption and client compression"
   compress client fast
   index
   encrypt server
   server_encrypt "/usr/local/sbin/amcrypt-ossl"
   server_decrypt_option "-d"
   exclude list "/usr/local/etc/amanda/exclude_list"
}

Attachment: pgpWKSxjU6gWp.pgp
Description: PGP signature

<Prev in Thread] Current Thread [Next in Thread>
  • Stranded on waitq failure (planner: Message too long), Leon Meßner <=