Hello,
I see several nsrindex operation. Check the parallism in the NetWorker
server client, increase it to at least 12 if it is not set.
The reason is, that the nsrindex operation cannot become ready due to the
servers client paralillsm.
There are several topics with the nsrjobd in 7.5. So after the next stop of
the Servers NetWorker software, go to /nsr/res
and move the jobsdb directory to jobsdb.old. It will be recreated during the
next NetWorker start.
If you did this you cannot check savegrp results before the restart with the
NMC, but you can still do this with the daemon.raw.
On a high deployed system, the nsrjobd does not have any chance to make a
reorg of the jobs database.
Check although the ulimit -n value, it should be 2048 is possible.
Best regards Rainer
-----Ursprüngliche Nachricht-----
Von: EMC NetWorker discussion [mailto:NETWORKER AT LISTSERV.TEMPLE DOT EDU] Im
Auftrag von jeronimo
Gesendet: Samstag, 24. November 2012 20:20
An: NETWORKER AT LISTSERV.TEMPLE DOT EDU
Betreff: [Networker] all sessions slow at some time during backup
The server OS is 64-bit Linux running on HP Proliant hardware with lots of
cores (yeah, overpowered)
I know nothing of a hardware fault. Anyway, what could it be? It more or
less always behaves that way.
For the details you wanted:
On the server:
/ # ps -ef | grep nsr
root 2496 10671 0 04:02 ? 00:00:30 /opt/nsr/nsrindexd -a ADD
hhh index for backup on hhh
root 3515 10625 0 18:14 ? 00:00:00 /opt/nsr/nsrmmd -n 6
root 4118 10671 0 18:15 ? 00:00:12 /opt/nsr/nsrindexd -a ADD
iii index for backup on iii
root 10618 1 0 Nov18 ? 00:02:31 /opt/nsr/nsrexecd 2>&1
root 10625 1 0 Nov18 ? 00:07:29 /opt/nsr/nsrd 2>&1
root 10658 1 0 Nov18 ? 00:23:13 /opt/nsr/nmc/bin/gstd
root 10662 1 0 Nov18 ? 00:00:04
/opt/nsr/nmc/apache/bin/httpd -f /opt/nsr/nmc/apache/conf/httpd.conf
root 10667 10625 0 Nov18 ? 00:10:34 /opt/nsr/nsrmmdbd
root 10671 10625 0 Nov18 ? 00:00:22 /opt/nsr/nsrindexd
root 10674 10625 0 Nov18 ? 00:09:03 /opt/nsr/nsrjobd
root 10678 10658 0 Nov18 ? 00:02:30
/opt/nsr/nmc/sybasa/bin/dbsrv9 -ct- -x tcpip(ServerPort=2638) -ti 1500 -tl
6000 -sb 0 -Q -gk all -o /opt/nsr/nmc/logs/db_output -n gst_on_serv
/nsr/nmc/lgto_gstdb/lgto_gst.db
root 11476 10625 0 Nov18 ? 00:00:22 /opt/nsr/nsrmmgd
root 11515 10618 0 Nov18 ? 00:00:06 /opt/nsr/nsrlcpd -s serv -N
1 -n 1
root 11517 10618 0 Nov18 ? 00:00:06 /opt/nsr/nsrlcpd -s serv -N
1 -n 2
root 11556 10625 3 Nov18 ? 04:53:36 /opt/nsr/nsrmmd -n 1
root 11561 10625 7 Nov18 ? 11:08:43 /opt/nsr/nsrmmd -n 2
root 11566 10625 0 Nov18 ? 01:05:18 /opt/nsr/nsrmmd -n 3
root 11571 10625 0 Nov18 ? 00:00:00 /opt/nsr/nsrmmd -n 4
root 12039 10625 0 07:29 ? 00:00:00 /opt/nsr/savegrp
WEEKEND-DEV-DB
root 12456 10671 0 07:38 ? 00:00:16 /opt/nsr/nsrindexd -a ADD
xxx index for backup on xxx
root 13723 10671 0 07:55 ? 00:00:11 /opt/nsr/nsrindexd -a ADD
zzz for backup on zzz
root 14068 10671 0 08:03 ? 00:00:00 /opt/nsr/nsrindexd -a ADD
zzz index for backup on zzz
root 15057 10671 0 08:30 ? 00:00:10 /opt/nsr/nsrindexd -a ADD
aaa index for backup on aaa
root 15541 10671 0 08:44 ? 00:00:08 /opt/nsr/nsrindexd -a ADD
bbb index for backup on bbb
root 18646 10671 0 09:48 ? 00:00:00 /opt/nsr/nsrindexd -a ADD
ccc index for backup on ccc
root 18649 10671 0 09:48 ? 00:00:00 /opt/nsr/nsrindexd -a ADD
ddd index for backup on ddd
root 22244 10625 0 Nov23 ? 00:00:01 /opt/nsr/savegrp WEEKEND
nobody 27970 10662 0 Nov23 ? 00:00:00
/opt/nsr/nmc/apache/bin/httpd -f /opt/nsr/nmc/apache/conf/httpd.conf
nobody 27971 10662 0 Nov23 ? 00:00:00
/opt/nsr/nmc/apache/bin/httpd -f /opt/nsr/nmc/apache/conf/httpd.conf
root 31090 10671 0 02:37 ? 00:01:06 /opt/nsr/nsrindexd -a ADD
eee index for backup on eee
root 31325 10671 0 Nov23 ? 00:00:00 /opt/nsr/nsrindexd -a READ
nasndmp1 index for all on serv
root 31327 10625 0 Nov23 ? 00:00:00 /opt/nsr/ansrd nasndmp1:root
recovering
root 32332 10625 0 Nov23 ? 00:00:00 /opt/nsr/nsrmmd -n 5
# strace -p 12456 (nsrindexd)
Process 12456 attached - interrupt to quit select(64, [9], NULL, NULL, NULL
^C
# strace -p 22244 (savegrp WEEKEND)
Process 22244 attached - interrupt to quit select(64, [5], NULL, NULL, {16,
836000}) = 0 (Timeout) select(64, [5], NULL, NULL, {30, 0} ^C
On the client:
# ps -ef | grep nsr
root 5154 1 0 Nov11 ? 00:00:01 /opt/nsr/nsrexecd 2>&1
root 30589 5154 0 07:38 ? 00:02:48 /opt/nsr/savepnpc -s serv -g
WEEKEND -LL -f - -m host -l full -q -W 78 -N /media /media
# strace -p 30589
Process 30589 attached - interrupt to quit writev(5,
[{"QH\23$\217x\324\205\242\256\326\321g`\317\373L\6\266\36"..., 32208}], 1)
= 32208 read(10,
"e<9\240hzY\204\235\274\3173\203\32w\r\t\366\277;j\10#~"..., 65536) = 65536
writev(5, [{"\200\0\0\0\0\1\0x.\re\270\0\0\0\0\0\0\0\2\0\5\363\330\0"...,
116}, {"\0\0\1\0\0\1\0\4\0\0\0\0e<9\240hzY\204\235\274\3173\203"...,
65548}], 2) = 65664 read(10,
"H\r\341>H\16V5\330\261\215E~\310\7I\220\377b\317\362\226"..., 65536) =
65536
close(10) = 0
getuid() = 0
setreuid(65534, 4294967295) = 0
utime("file.pdf", [2012/09/03-15:22:28, 2011/08/10-09:01:20]) = 0
setreuid(0, 4294967295) = 0
writev(5, [{"\200\0\0\0\200\0\0x+\re\270\0\0\0\0\0\0\0\2\0\5\363\330"...,
128}], 1) = 128 lstat("file.zip", {st_mode=S_IFREG|0666, st_size=6853484,
...}) = 0 open("file.zip", O_RDONLY) = 10 fgetxattr(10,
"system.posix_acl_access", 0x0, 0) = -1 ENODATA (No data available)
fgetxattr(10, "system.posix_acl_default", 0x0, 0) = -1 ENODATA (No data
available)
flistxattr(10, (nil), 0) = 17
flistxattr(10, 0x5d4210, 17) = 17
fgetxattr(10, "netware.metadata", 0x0, 0) = 160 fgetxattr(10,
"netware.metadata",
"\xff\xfe\x01\x00\x00\x00\x00\x00\xef\x03\x00\x1a\x00\x00\x00\x00
\x00\x00`\x00\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff\xffS\x12\xd2O\x00\x00\
x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc6\x10\xd2O\x00\x00\x00\x00\x00\x00
\x00\x00\x00\x00\x00\x00\xfa\x10\x9eP\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0
0\x00\x00\x80\xf5\x8cr\xe4\x8a\xdd\x11\xbd\xd1\x00\x11\x85e0\x0f\x00\x00\x00
\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00
\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00
\x00\x00\x00\x00\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff\x7f\xff\xff\xff\xff
\x00\x00\x00", 160) = 160 read(10,
"PK\3\4\24\0\0\0\10\0z\201:@\357\366#\302P\222h\0=\232%"..., 65536) = 65536
read(10, "PK\3\4\24\0\0\0\10\0z\201:@\357\366#\302P\222h\0=\232%"..., 65536)
= 65536 writev(5,
[{"\200\0\0p*\re\270\0\0\0\0\0\0\0\2\0\5\363\330\0\0\0\315"..., 692},
{"\0\0\1\0\0\1\0\4\0\0\0\0PK\3\4\24\0\0\0\10\0z\201:@\357"..., 65548}], 2) =
66240 read(10, "\312\346\276L\27\326\376\24\214\34t=w{\35\345n\370z\264"...,
65536) = 65536 writev(5,
[{"\200\0\0\0\0\1\0x(\re\270\0\0\0\0\0\0\0\2\0\5\363\330\0"..., 116},
{"\0\0\1\0\0\1\0\4\0\0\0\0\312\346\276L\27\326\376\24\214"..., 65548}], 2) =
65664 read(10,
"\357?\216SA\361\227=\307},\264m\feO(\247+\232Q\251\t\253"..., 65536) =
65536
But this is not progressing fast, about one 20 or 30 writev/read every 30
seconds.....
Currently one of the hosts is backing up alone onto one tape and is really
going fast.
While the others (about 10 savesets/sessions) are writing to another tape
which is almost not progressing at all....
+----------------------------------------------------------------------
|This was sent by jm+backupcentral AT roth DOT lu via Backup Central.
|Forward SPAM to abuse AT backupcentral DOT com.
+----------------------------------------------------------------------
|