Amanda-Users

Re: discrepency between amadmin, logs and tape content?

2007-10-30 11:50:47
Subject: Re: discrepency between amadmin, logs and tape content?
From: Jean-Louis Martineau <martineau AT zmanda DOT com>
To: Jean-Francois Malouin <Jean-Francois.Malouin AT bic.mni.mcgill DOT ca>
Date: Tue, 30 Oct 2007 11:39:59 -0400
This bug is fixed in 2.5.3alpha, but the patch was not backported to 2.5.2p1.

Can you try the attached patch?

Jean-Louis

Jean-Francois Malouin wrote:
Hi,

With amanda-2.5.2p1 I did an archive a few days ago and trying to
restore it caused me a few problems: amfetchdump tells me that
there is no valid data to be restored for that date and amadmin
reports a connection timeout:

grumpy: /opt/amanda/sbin/amfetchdump -p -d /dev/nst1 archive-nihpd3-right1 \
yorick /data/nihpd/nihpd3/data/mri_processing/1.1 20071022 | tar -tvf -
No matching dumps found

grumpy: su amanda -c "/opt/amanda/sbin/amadmin archive-nihpd3-right1 \
find yorick /data/nihpd/nihpd3/data/mri_processing/1.1"
2007-10-22 yorick /data/nihpd/nihpd3/data/mri_processing/1.1  0
av24-1_archive-nihpd3-right1_T00006L3    0   -- FAILED (dumper) [data
read: recv error: Connection timed out]

However I was able to manually extract all the chunks on that tape
and, after reassembling them, to untar everything without a glitch.
Looking at the logs I see that it was retried with success:

DISK planner yorick /data/nihpd/nihpd3/data/mri_processing/1.1
FAIL dumper yorick /data/nihpd/nihpd3/data/mri_processing/1.1 20071022
0 [data read: recv error: Connection timed out]
  sendbackup: start [yorick:/data/nihpd/nihpd3/data/mri_processing/1.1
level 0]
PARTIAL chunker yorick /data/nihpd/nihpd3/data/mri_processing/1.1 20071022 0 
[sec 9771.397 kb 105401080 kps 10786.7]
SUCCESS dumper yorick /data/nihpd/nihpd3/data/mri_processing/1.1 20071022 0 
[sec 10432.472 kb 113375650 kps 10867.6 orig-kb 113375650]
SUCCESS chunker yorick /data/nihpd/nihpd3/data/mri_processing/1.1 20071022 0 
[sec 10432.600 kb 113375650 kps 10867.4]
STATS driver estimate yorick /data/nihpd/nihpd3/data/mri_processing/1.1 
20071022 0 [sec 8192 nkb 113375682 ckb 113375712 kps 13840]
CHUNK taper yorick /data/nihpd/nihpd3/data/mri_processing/1.1 20071022 1 0 [sec 
197.311 kb 5242848 kps 26571.5 {wr: writers 163840 rdwait
79.637 wrwait 112.112 filemark 4.930}]
CHUNK taper yorick /data/nihpd/nihpd3/data/mri_processing/1.1 20071022 2 0 [sec 
101.364 kb 5242848 kps 51722.7 {wr: writers 163840 rdwait
42.617 wrwait 57.238 filemark 1.013}]

...

CHUNK taper yorick /data/nihpd/nihpd3/data/mri_processing/1.1 20071022 22 0 
[sec 60.077 kb 3275872 kps 54527.8 {wr: writers 102372 rdwait
3.036 wrwait 55.529 filemark 1.051}]
CHUNKSUCCESS taper yorick /data/nihpd/nihpd3/data/mri_processing/1.1 20071022 0 
[sec 2313.467 kb 113376352 kps 49007.1 {wr: writers 102372
rdwait 3.036 wrwait 55.529 filemark 1.051}]

what gives?
jf

diff -u -r --show-c-function --new-file 
--exclude-from=/home/martinea/src.orig/amanda.diff 
--ignore-matching-lines='$Id:' amanda-2.5.2p1/server-src/amadmin.c 
amanda-2.5.2p1.find/server-src/amadmin.c
--- amanda-2.5.2p1/server-src/amadmin.c 2007-07-05 13:02:02.000000000 -0400
+++ amanda-2.5.2p1.find/server-src/amadmin.c    2007-10-29 13:16:16.000000000 
-0400
@@ -1111,7 +1111,7 @@ find(
 
     if(argc < 3) {
        fprintf(stderr,
-               "%s: expecting \"find [--sort <hkdlpb>] [hostname 
[<disk>]]*\"\n",
+               "%s: expecting \"find [--sort <hkdlpbf>] [hostname 
[<disk>]]*\"\n",
                get_pname());
        usage();
     }
@@ -1129,6 +1129,8 @@ find(
            case 'K':
            case 'd':
            case 'D':
+           case 'f':
+           case 'F':
            case 'l':
            case 'L':
            case 'p':
diff -u -r --show-c-function --new-file 
--exclude-from=/home/martinea/src.orig/amanda.diff 
--ignore-matching-lines='$Id:' amanda-2.5.2p1/server-src/find.c 
amanda-2.5.2p1.find/server-src/find.c
--- amanda-2.5.2p1/server-src/find.c    2007-05-23 07:56:31.000000000 -0400
+++ amanda-2.5.2p1.find/server-src/find.c       2007-10-29 13:23:48.000000000 
-0400
@@ -39,7 +39,6 @@
 int find_match(char *host, char *disk);
 int search_logfile(find_result_t **output_find, char *label, char *datestamp, 
char *logfile);
 void search_holding_disk(find_result_t **output_find);
-void strip_failed_chunks(find_result_t **output_find);
 char *find_nicedate(char *datestamp);
 static int find_compare(const void *, const void *);
 static int parse_taper_datestamp_log(char *logline, char **datestamp, char 
**level);
@@ -114,8 +113,6 @@ find_dump(
 
     search_holding_disk(&output_find);
 
-    strip_failed_chunks(&output_find);
-    
     return(output_find);
 }
 
@@ -198,76 +195,6 @@ find_log(void)
     return(output_find_log);
 }
 
-/*
- * Remove CHUNK entries from dumps that ultimately failed from our report.
- */
-void strip_failed_chunks(
-    find_result_t **output_find)
-{
-    find_result_t *cur, *prev = NULL, *failed = NULL, *failures = NULL;
-
-    /* Generate a list of failures */
-    for(cur=*output_find; cur; cur=cur->next) {
-       if(!cur->hostname  || !cur->diskname ||
-          !cur->timestamp || !cur->label)
-           continue;
-
-       if(strcmp(cur->status, "OK")){
-           failed = alloc(SIZEOF(find_result_t));
-           memcpy(failed, cur, SIZEOF(find_result_t));
-           failed->next = failures;
-           failures = failed;
-       }
-    }
-
-    /* Now if a CHUNK matches the parameters of a failed dump, remove it */
-    for(failed=failures; failed; failed=failed->next) {
-       prev = NULL;
-       cur = *output_find;
-       while (cur != NULL) {
-           find_result_t *next = cur->next;
-           if(!cur->hostname  || !cur->diskname || 
-              !cur->timestamp || !cur->label    || !cur->partnum ||
-              !strcmp(cur->partnum, "--") || strcmp(cur->status, "OK")) {
-               prev = cur;
-               cur = next;
-           }
-           else if(!strcmp(cur->hostname, failed->hostname) &&
-                !strcmp(cur->diskname, failed->diskname) &&
-                !strcmp(cur->timestamp, failed->timestamp) &&
-                !strcmp(cur->label, failed->label) &&
-                cur->level == failed->level){
-               amfree(cur->diskname);
-               amfree(cur->hostname);
-               amfree(cur->label);
-               amfree(cur->timestamp);
-               amfree(cur->partnum);
-               amfree(cur->status);
-               cur = next;
-               if (prev) {
-                   amfree(prev->next);
-                   prev->next = next;
-               } else {
-                   amfree(*output_find);
-                   *output_find = next;
-               }
-           }
-            else {
-               prev = cur;
-               cur = next;
-           }
-
-       }
-    }
-
-    for(failed=failures; failed;) {
-       find_result_t *fai = failed->next;
-       fai = failed->next;
-       amfree(failed);
-       failed=fai;
-    }
-}
-
 void
 search_holding_disk(
     find_result_t **output_find)
@@ -685,6 +612,8 @@ search_logfile(
     char *s;
     int ch;
     disk_t *dp;
+    find_result_t *part_find = NULL;  /* List for all part of a DLE */
+    find_result_t *a_part_find;
 
     if((logf = fopen(logfile, "r")) == NULL) {
        error("could not open logfile %s: %s", logfile, strerror(errno));
@@ -736,7 +665,7 @@ search_logfile(
            }
        }
        partnum = "--";
-       if(curlog == L_SUCCESS || curlog == L_PARTIAL || curlog == L_FAIL || 
curlog == L_CHUNK) {
+       if(curlog == L_SUCCESS || curlog == L_PARTIAL || curlog == L_FAIL || 
curlog == L_CHUNK || curlog == L_CHUNKSUCCESS) {
            s = curstr;
            ch = *s++;
 
@@ -819,7 +748,6 @@ search_logfile(
                if(curprog == P_TAPER) {
                    find_result_t *new_output_find =
                        (find_result_t *)alloc(SIZEOF(find_result_t));
-                   new_output_find->next=*output_find;
                    new_output_find->timestamp = stralloc(date);
                    new_output_find->hostname=stralloc(host);
                    new_output_find->diskname=stralloc(disk);
@@ -827,13 +755,43 @@ search_logfile(
                    new_output_find->partnum = stralloc(partnum);
                    new_output_find->label=stralloc(label);
                    new_output_find->filenum=filenum;
-                   if(curlog == L_SUCCESS || curlog == L_CHUNK) 
-                       new_output_find->status=stralloc("OK");
-                   else if(curlog == L_PARTIAL)
-                       new_output_find->status=stralloc("PARTIAL");
-                   else
-                       new_output_find->status=stralloc(rest);
-                   *output_find=new_output_find;
+                   new_output_find->next=NULL;
+                   if (curlog == L_SUCCESS) {
+                       new_output_find->status = stralloc("OK");
+                       new_output_find->next = *output_find;
+                       *output_find = new_output_find;
+                   } else if (curlog == L_CHUNKSUCCESS ||
+                              curlog == L_PARTIAL      || curlog == L_FAIL) {
+                       /* result line */
+                       if (curlog == L_PARTIAL || curlog == L_FAIL) {
+                            /* change status of each part */
+                           for (a_part_find = part_find; a_part_find;
+                                a_part_find = a_part_find->next) {
+                               if (curlog == L_PARTIAL)
+                                    a_part_find->status = stralloc("PARTIAL");
+                               else
+                                    a_part_find->status = stralloc(rest);
+                           }
+                       }
+                       if (part_find) { /* find last element */
+                           for (a_part_find = part_find;
+                                a_part_find->next != NULL;
+                                a_part_find=a_part_find->next) {
+                           }
+                           /* merge part_find to *output_find */
+                           a_part_find->next = *output_find;
+                           *output_find = part_find;
+                           part_find = NULL;
+                       }
+                   } else { /* part line */
+                       if (curlog == L_CHUNK)
+                           new_output_find->status=stralloc("OK");
+                       else /* PARTPARTIAL */
+                           new_output_find->status=stralloc("PARTIAL");
+                       /* Add to part_find list */
+                       new_output_find->next = part_find;
+                       part_find = new_output_find;
+                   }
                }
                else if(curlog == L_FAIL) {     /* print other failures too */
                    find_result_t *new_output_find =
@@ -859,6 +817,11 @@ search_logfile(
        }
     }
     afclose(logf);
+
+    if (part_find != NULL) {
+       dbprintf(("part_find not empty\n"));
+    }
+
     return 1;
 }
 
diff -u -r --show-c-function --new-file 
--exclude-from=/home/martinea/src.orig/amanda.diff 
--ignore-matching-lines='$Id:' amanda-2.5.2p1/server-src/find.h 
amanda-2.5.2p1.find/server-src/find.h
--- amanda-2.5.2p1/server-src/find.h    2007-05-04 07:39:04.000000000 -0400
+++ amanda-2.5.2p1.find/server-src/find.h       2007-10-29 13:15:24.000000000 
-0400
@@ -3,7 +3,7 @@
 
 #include "diskfile.h"
 
-#define DEFAULT_SORT_ORDER      "hkdlpb"
+#define DEFAULT_SORT_ORDER      "hkdlpbf"
 
 typedef struct find_result_s {
     struct find_result_s *next;
<Prev in Thread] Current Thread [Next in Thread>