> It'd be really nice if Bacula could store this information in a more
> query-friendly way.
Just for kicks, I've written an extension to PostgreSQL that lets you
decode the lstat field into a proper `stat' structure you can query,
build indexes on, etc. It's basically just a PostgreSQL function
interface wrapper around the base64 and stat structure decoding routines
from the Bacula sources.
If you're not using PostgreSQL ... well, I guess it could probably be
ported to MySQL's function interface, though you'd be on your own doing
so. If you use SQLite (or, IMO, MySQL) .... time to upgrade?
I've attached the source code, which should build with a simple "make;
make install" on any reasonably sensible UNIX with a gcc compiler. I
didn't use Pg's extension build system since it was so easy to just
write a Makefile, but you could probably adapt it to it pretty easily.
Example of use:
bacula=# select decode_stat(lstat) from file limit 10;
decode_stat
------------------------------------------------------------------------------------------
(64782,49578,33152,1,105,8,0,8597,24,1231311060,1231311060,1233542770,-1076656616,0)
(64782,-1201252948,33152,1,105,8,0,22845,48,1188919531,1188919531,1233542525,16777216,0)
(64782,-1201252948,33152,1,105,8,0,35196,72,1197340795,1197340795,1233542530,30836799,0)
(64782,-1201252948,33152,1,105,8,0,3383,8,1214876142,1214876142,1233542823,30836799,0)
(64782,-1201252948,33152,1,105,8,0,3004,8,1190704535,1190704535,1233542394,30836799,0)
(64782,-1201252948,33152,1,105,8,0,1428,8,1170831227,1170831227,1233542575,30836799,0)
(64782,-1201252948,33152,1,105,8,0,12073,24,1192006848,1192006848,1233542442,30836799,0)
(64782,-1201252948,33152,1,105,8,0,38046,80,1149858815,1115606509,1233542603,30836799,0)
(64782,-1201252948,33152,1,105,8,0,6511,16,1149858788,1092056820,1233542500,30836799,0)
(64782,-1201252948,33152,1,105,8,0,2243,8,1149858835,1131671960,1233542505,30836799,0)
(10 rows)
The data structure the function returns:
bacula=# \d stat
Composite type "public.stat"
Column | Type
------------+---------
st_dev | integer
st_ino | integer
st_mod | integer
st_nlink | integer
st_uid | integer
st_gid | integer
st_rdev | bigint
st_size | integer
st_blksize | integer
st_blocks | integer
st_atime | integer
st_mtime | integer
st_ctime | integer
linkfi | integer
Files and sizes:
bacula=# SELECT filename.name, x.st_size FROM (select file.*,
(decode_stat(lstat)).st_size FROM file LIMIT 10) AS x INNER JOIN
filename ON x.filenameid = filename.filenameid;
name | st_size
-------+---------
8917. | 59627
9554. | 147590
5750. | 747
6141. | 100195
6209. | 14146
6169. | 5729
272. | 1981
1303. | 1752
9151. | 970
6487. | 448236
(10 rows)
Making an index on file size, then selecting the names and sizes of the
biggest 20 files:
CREATE OR REPLACE FUNCTION decode_lstat_size(text) RETURNS int4 AS $$
SELECT (decode_stat($1)).st_size;
$$ LANGUAGE SQL IMMUTABLE STRICT;
CREATE INDEX file_size ON file (decode_lstat_size(lstat));
SELECT filename.name, decode_lstat_size(lstat)
FROM file INNER JOIN filename ON file.filenameid = filename.filenameid
ORDER BY decode_lstat_size(lstat) DESC
LIMIT 20;
name | decode_lstat_size
--------------------------------+-------------------
class40.img.Bin | 2097152768
ponserverxp.img | 2097152000
ponserverxp.img | 2097152000
ponserverxp.img | 2097152000
ponserverxp.img | 2097152000
ponserverxp.img | 2097152000
Post_Studio_SOE_Aug_2007.dmg | 2053557781
alder-000002.vmdk | 1988952064
alder-000002.vmdk | 1988952064
profiles.star | 1729841152
GarageBandExtraContent.tar | 1199718400
GarageBandExtraContent.tar | 1198376960
xcode_2.4.1_8m1910_6936315.dmg | 968079156
INBOX | 949566630
old photos.psd | 928456141
old photos.psd | 928456141
old photos.psd | 928456141
old photos.psd | 928456141
old photos.psd | 928456141
old photos.psd | 928456141
(20 rows)
Time: 1.617 ms
--
Craig Ringer
/*
Bacula® - The Network Backup Solution
Copyright (C) 2000-2007 Free Software Foundation Europe e.V.
The main author of Bacula is Kern Sibbald, with contributions from
many others, a complete list can be found in the file AUTHORS.
This program is Free Software; you can redistribute it and/or
modify it under the terms of version two of the GNU General Public
License as published by the Free Software Foundation and included
in the file LICENSE.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA.
Bacula® is a registered trademark of Kern Sibbald.
The licensor of Bacula is the Free Software Foundation Europe
(FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
Switzerland, email:ftf AT fsfeurope DOT org.
*/
/*
* Generic base 64 input and output routines
*
* Written by Kern E. Sibbald, March MM.
*
* Version $Id: base64.c 8495 2009-02-28 14:52:14Z marcovw $
*/
#ifdef TEST_MODE
#include <glob.h>
#endif
#include <inttypes.h>
#include <cstring>
static uint8_t const base64_digits[64] =
{
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
};
static int base64_inited = 0;
static uint8_t base64_map[128];
/* Initialize the Base 64 conversion routines */
void
base64_init(void)
{
int i;
memset(base64_map, 0, sizeof(base64_map));
for (i=0; i<64; i++)
base64_map[(uint8_t)base64_digits[i]] = i;
base64_inited = 1;
}
/* Convert a value to base64 characters.
* The result is stored in where, which
* must be at least 8 characters long.
*
* Returns the number of characters
* stored (not including the EOS).
*/
int
to_base64(intmax_t value, char *where)
{
uintmax_t val;
int i = 0;
int n;
/* Handle negative values */
if (value < 0) {
where[i++] = '-';
value = -value;
}
/* Determine output size */
val = value;
do {
val >>= 6;
i++;
} while (val);
n = i;
/* Output characters */
val = value;
where[i] = 0;
do {
where[--i] = base64_digits[val & (uintmax_t)0x3F];
val >>= 6;
} while (val);
return n;
}
/*
* Convert the Base 64 characters in where to
* a value. No checking is done on the validity
* of the characters!!
*
* Returns the value.
*/
int
from_base64(intmax_t *value, char *where)
{
uintmax_t val = 0;
int i, neg;
if (!base64_inited)
base64_init();
/* Check if it is negative */
i = neg = 0;
if (where[i] == '-') {
i++;
neg = 1;
}
/* Construct value */
while (where[i] != 0 && where[i] != ' ') {
val <<= 6;
val += base64_map[(uint8_t)where[i++]];
}
*value = neg ? -(intmax_t)val : (intmax_t)val;
return i;
}
/*
* Encode binary data in bin of len bytes into
* buf as base64 characters.
*
* If compatible is true, the bin_to_base64 routine will be compatible
* with what the rest of the world uses.
*
* Returns: the number of characters stored not
* including the EOS
*/
int
bin_to_base64(char *buf, int buflen, char *bin, int binlen, int compatible)
{
uint32_t reg, save, mask;
int rem, i;
int j = 0;
reg = 0;
rem = 0;
buflen--; /* allow for storing EOS */
for (i=0; i < binlen; ) {
if (rem < 6) {
reg <<= 8;
if (compatible) {
reg |= (uint8_t)bin[i++];
} else {
reg |= (int8_t)bin[i++];
}
rem += 8;
}
save = reg;
reg >>= (rem - 6);
if (j < buflen) {
buf[j++] = base64_digits[reg & 0x3F];
}
reg = save;
rem -= 6;
}
if (rem && j < buflen) {
mask = (1 << rem) - 1;
if (compatible) {
buf[j++] = base64_digits[(reg & mask) << (6 - rem)];
} else {
buf[j++] = base64_digits[reg & mask];
}
}
buf[j] = 0;
return j;
}
#ifdef BIN_TEST
int main(int argc, char *argv[])
{
int xx = 0;
int len;
char buf[100];
char junk[100];
int i;
#ifdef xxxx
for (i=0; i < 1000; i++) {
bin_to_base64(buf, sizeof(buf), (char *)&xx, 4, true);
printf("xx=%s\n", buf);
xx++;
}
#endif
junk[0] = 0xFF;
for (i=1; i<100; i++) {
junk[i] = junk[i-1]-1;
}
len = bin_to_base64(buf, sizeof(buf) junk, 16, true);
printf("len=%d junk=%s\n", len, buf);
return 0;
}
#endif
#ifdef TEST_MODE
static int errfunc(const char *epath, int eernoo)
{
printf("in errfunc\n");
return 1;
}
/*
* Test the base64 routines by encoding and decoding
* lstat() packets.
*/
int main(int argc, char *argv[])
{
char where[500];
int i;
glob_t my_glob;
char *fname;
struct stat statp;
struct stat statn;
int debug_level = 0;
char *p;
time_t t = 1028712799;
if (argc > 1 && strcmp(argv[1], "-v") == 0)
debug_level++;
base64_init();
my_glob.gl_offs = 0;
glob("/etc/grub.conf", GLOB_MARK, errfunc, &my_glob);
for (i=0; my_glob.gl_pathv[i]; i++) {
fname = my_glob.gl_pathv[i];
if (lstat(fname, &statp) < 0) {
berrno be;
printf("Cannot stat %s: %s\n", fname, be.bstrerror(errno));
continue;
}
encode_stat(where, &statp, 0, 0);
printf("Encoded stat=%s\n", where);
#ifdef xxx
p = where;
p += to_base64((intmax_t)(statp.st_atime), p);
*p++ = ' ';
p += to_base64((intmax_t)t, p);
printf("%s %s\n", fname, where);
printf("%s %lld\n", "st_dev", (intmax_t)statp.st_dev);
printf("%s %lld\n", "st_ino", (intmax_t)statp.st_ino);
printf("%s %lld\n", "st_mode", (intmax_t)statp.st_mode);
printf("%s %lld\n", "st_nlink", (intmax_t)statp.st_nlink);
printf("%s %lld\n", "st_uid", (intmax_t)statp.st_uid);
printf("%s %lld\n", "st_gid", (intmax_t)statp.st_gid);
printf("%s %lld\n", "st_rdev", (intmax_t)statp.st_rdev);
printf("%s %lld\n", "st_size", (intmax_t)statp.st_size);
printf("%s %lld\n", "st_blksize", (intmax_t)statp.st_blksize);
printf("%s %lld\n", "st_blocks", (intmax_t)statp.st_blocks);
printf("%s %lld\n", "st_atime", (intmax_t)statp.st_atime);
printf("%s %lld\n", "st_mtime", (intmax_t)statp.st_mtime);
printf("%s %lld\n", "st_ctime", (intmax_t)statp.st_ctime);
#endif
if (debug_level)
printf("%s: len=%d val=%s\n", fname, strlen(where), where);
decode_stat(where, &statn);
if (statp.st_dev != statn.st_dev ||
statp.st_ino != statn.st_ino ||
statp.st_mode != statn.st_mode ||
statp.st_nlink != statn.st_nlink ||
statp.st_uid != statn.st_uid ||
statp.st_gid != statn.st_gid ||
statp.st_rdev != statn.st_rdev ||
statp.st_size != statn.st_size ||
statp.st_blksize != statn.st_blksize ||
statp.st_blocks != statn.st_blocks ||
statp.st_atime != statn.st_atime ||
statp.st_mtime != statn.st_mtime ||
statp.st_ctime != statn.st_ctime) {
printf("%s: %s\n", fname, where);
encode_stat(where, &statn, 0, 0);
printf("%s: %s\n", fname, where);
printf("NOT EQAL\n");
}
}
globfree(&my_glob);
printf("%d files examined\n", i);
to_base64(UINT32_MAX, where);
printf("UINT32_MAX=%s\n", where);
return 0;
}
#endif
/*
* Generic base 64 input and output routines
*
* Written by Kern E. Sibbald, March MM.
*
* Version $Id: base64.h 7380 2008-07-14 10:42:59Z kerns $
*/
/*
Bacula® - The Network Backup Solution
Copyright (C) 2000-2006 Free Software Foundation Europe e.V.
The main author of Bacula is Kern Sibbald, with contributions from
many others, a complete list can be found in the file AUTHORS.
This program is Free Software; you can redistribute it and/or
modify it under the terms of version two of the GNU General Public
License as published by the Free Software Foundation and included
in the file LICENSE.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA.
Bacula® is a registered trademark of Kern Sibbald.
The licensor of Bacula is the Free Software Foundation Europe
(FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
Switzerland, email:ftf AT fsfeurope DOT org.
*/
/* Maximum size of len bytes after base64 encoding */
#define BASE64_SIZE(len) ((4 * len + 2) / 3 + 1)
int
from_base64(intmax_t *value, char *where);
int
bin_to_base64(char *buf, int buflen, char *bin, int binlen, int compatible);
#include <inttypes.h>
#include "base64.h"
extern "C" {
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
}
/* Do casting according to unknown type to keep compiler happy */
#ifdef HAVE_TYPEOF
#define plug(st, val) st = (typeof st)val
#else
#if !HAVE_GCC & HAVE_SUN_OS
/* Sun compiler does not handle templates correctly */
#define plug(st, val) st = val
#elif __sgi
#define plug(st, val) st = val
#else
/* Use templates to do the casting */
template <class T> void plug(T &st, uint64_t val)
{ st = static_cast<T>(val); }
#endif
#endif
extern "C" int decode_stat(char *buf, struct stat *statp, int32_t *LinkFI);
/* Decode a stat packet from base64 characters */
int decode_stat(char *buf, struct stat *statp, int32_t *LinkFI)
{
char *p = buf;
int64_t val;
p += from_base64(&val, p);
plug(statp->st_dev, val);
p++;
p += from_base64(&val, p);
plug(statp->st_ino, val);
p++;
p += from_base64(&val, p);
plug(statp->st_mode, val);
p++;
p += from_base64(&val, p);
plug(statp->st_nlink, val);
p++;
p += from_base64(&val, p);
plug(statp->st_uid, val);
p++;
p += from_base64(&val, p);
plug(statp->st_gid, val);
p++;
p += from_base64(&val, p);
plug(statp->st_rdev, val);
p++;
p += from_base64(&val, p);
plug(statp->st_size, val);
p++;
#ifndef HAVE_MINGW
p += from_base64(&val, p);
plug(statp->st_blksize, val);
p++;
p += from_base64(&val, p);
plug(statp->st_blocks, val);
p++;
#else
p += from_base64(&val, p);
// plug(statp->st_blksize, val);
p++;
p += from_base64(&val, p);
// plug(statp->st_blocks, val);
p++;
#endif
p += from_base64(&val, p);
plug(statp->st_atime, val);
p++;
p += from_base64(&val, p);
plug(statp->st_mtime, val);
p++;
p += from_base64(&val, p);
plug(statp->st_ctime, val);
/* Optional FileIndex of hard linked file data */
if (*p == ' ' || (*p != 0 && *(p+1) == ' ')) {
p++;
p += from_base64(&val, p);
*LinkFI = (uint32_t)val;
} else {
*LinkFI = 0;
return 0;
}
/* FreeBSD user flags */
if (*p == ' ' || (*p != 0 && *(p+1) == ' ')) {
p++;
p += from_base64(&val, p);
#ifdef HAVE_CHFLAGS
plug(statp->st_flags, val);
} else {
statp->st_flags = 0;
#endif
}
/* Look for data stream id */
if (*p == ' ' || (*p != 0 && *(p+1) == ' ')) {
p++;
p += from_base64(&val, p);
} else {
val = 0;
}
return (int)val;
}
DROP FUNCTION IF EXISTS decode_stat(stat);
DROP TYPE IF EXISTS stat CASCADE;
CREATE TYPE stat AS (
st_dev int4,
st_ino int4,
st_mod int4,
st_nlink int4,
st_uid int4,
st_gid int4,
st_rdev int8,
st_size int4,
st_blksize int4,
st_blocks int4,
st_atime int4,
st_mtime int4,
st_ctime int4,
-- st_flags int4, -- Unused
LinkFI int4
);
CREATE OR REPLACE FUNCTION
decode_stat( text )
RETURNS
stat
AS
'decode_stat.so', 'bacula_decode_stat'
LANGUAGE
C
STRICT
IMMUTABLE;
default: decode_stat.so
base64.o: Makefile base64.c base64.h
g++ -g -c -fpic -fno-exceptions -o base64.o base64.c
decode_stat.o: Makefile decode_stat.c base64.h
g++ -g -c -fpic -fno-exceptions -o decode_stat.o decode_stat.c
pgfunc.o: Makefile pgfunc.c
gcc -g -c -fpic -o pgfunc.o pgfunc.c -I `pg_config --includedir-server`
decode_stat.so: Makefile decode_stat.o pgfunc.o base64.o
g++ -fpic -shared -o decode_stat.so base64.o pgfunc.o decode_stat.o
clean:
rm *.o *.so
install: decode_stat.so
cp decode_stat.so `pg_config --pkglibdir`/decode_stat.so
#include "postgres.h"
#include <string.h>
#include "fmgr.h"
#include "executor/executor.h"
#include "funcapi.h"
#include "sys/types.h"
#include "sys/stat.h"
#include "unistd.h"
int decode_stat(char *buf, struct stat *statp, int32_t *LinkFI);
#ifdef PG_MODULE_MAGIC
PG_MODULE_MAGIC;
#endif
/* * text_to_cstring * (from proposed Pg patches)
* Create a palloc'd, null-terminated C string from a text value. We support
* being passed a compressed or toasted text value. This is a bit bogus since
* such values shouldn't really be referred to as "text *", but it seems useful
* for robustness. If we didn't handle that case here, we'd need another routine
* that did, anyway. */
static char *
bds_text_to_cstring (const text * t)
{
char *result;
text *tunpacked = pg_detoast_datum_packed ((struct varlena *) t);
int len = VARSIZE_ANY_EXHDR (tunpacked);
result = (char *) palloc (len + 1);
memcpy (result, VARDATA_ANY (tunpacked), len);
result[len] = '\0';
if (tunpacked != t)
pfree (tunpacked);
return result;
}
PG_FUNCTION_INFO_V1(bacula_decode_stat);
Datum
bacula_decode_stat(PG_FUNCTION_ARGS)
{
/* Result tuple storage and interim values */
TupleDesc tupledesc;
Datum values[15];
HeapTuple heaptuple;
int tuplen;
bool *nulls;
/* Text to parse */
text *t;
char *c;
/* Storage for result from decode_stat(...) */
int32_t LinkFI;
struct stat st;
/* obtain a (maybe palloc'd) buffer of the text we'll work on,
zero-terminated. */
t = PG_GETARG_TEXT_P(0);
c = bds_text_to_cstring(t);
PG_FREE_IF_COPY(t, 0);
/* Use Bacula's code to decode the stat string */
decode_stat(c, &st, &LinkFI);
pfree(c);
/* If `t' was palloc'd, free it */
/* Set up to return a tuple, and complain if we're in a context where we
can't */
if( get_call_result_type( fcinfo, NULL, &tupledesc ) != TYPEFUNC_COMPOSITE )
ereport( ERROR,
( errcode( ERRCODE_FEATURE_NOT_SUPPORTED ),
errmsg( "function returning record called in context "
"that cannot accept type record" )));
/* Copy the stat data into a Datum array we can feed to Pg's tuple creation
code */
values[0] = Int32GetDatum( st.st_dev );
values[1] = Int32GetDatum( st.st_ino );
values[2] = Int32GetDatum( st.st_mode );
values[3] = Int32GetDatum( st.st_nlink );
values[4] = Int32GetDatum( st.st_uid );
values[5] = Int32GetDatum( st.st_gid );
values[6] = Int64GetDatum( st.st_rdev );
values[7] = Int32GetDatum( st.st_size );
values[8] = Int32GetDatum( st.st_blksize );
values[9] = Int32GetDatum( st.st_blocks );
values[10] = Int32GetDatum( st.st_atime );
values[11] = Int32GetDatum( st.st_mtime );
values[12] = Int32GetDatum( st.st_ctime );
/* values[13] = Int32GetDatum( st.st_flags ); */ /* fbsd only */
values[13] = Int32GetDatum( LinkFI );
/* Create the return tuple with the datum array */
BlessTupleDesc( tupledesc );
tuplen = tupledesc->natts;
nulls = palloc( tuplen * sizeof( bool ) );
memset( nulls, 0, tuplen * sizeof( bool ) );
heaptuple = heap_form_tuple( tupledesc, values, nulls );
pfree(nulls);
PG_RETURN_DATUM( HeapTupleGetDatum( heaptuple ) );
};
------------------------------------------------------------------------------
_______________________________________________
Bacula-users mailing list
Bacula-users AT lists.sourceforge DOT net
https://lists.sourceforge.net/lists/listinfo/bacula-users
|