circache: use a zlib wrapper instead of direct calls

This commit is contained in:
Jean-Francois Dockes 2017-12-28 15:27:38 +01:00
parent 2210d49d3d
commit da64e3d9e0
4 changed files with 206 additions and 103 deletions

View File

@ -242,6 +242,8 @@ utils/utf8iter.h \
utils/wipedir.cpp \
utils/wipedir.h \
utils/workqueue.h \
utils/zlibut.cpp \
utils/zlibut.h \
xaposix/safefcntl.h \
xaposix/safesysstat.h \
xaposix/safesyswait.h \

View File

@ -18,6 +18,8 @@
#ifndef TEST_CIRCACHE
#include "autoconfig.h"
#include "circache.h"
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
@ -27,11 +29,11 @@
#include "safeunistd.h"
#include <assert.h>
#include <memory.h>
#include <zlib.h>
#include "chrono.h"
#include <memory>
#include "chrono.h"
#include "zlibut.h"
#ifndef _WIN32
#include <sys/uio.h>
@ -96,8 +98,6 @@ struct TempBuf {
char *m_buf;
};
static bool inflateToDynBuf(void *inp, UINT inlen, void **outpp, UINT *outlenp);
/*
* File structure:
* - Starts with a 1-KB header block, with a param dictionary.
@ -631,14 +631,12 @@ public:
if (hd.flags & EFDataCompressed) {
LOGDEB1("Circache:readdicdata: data compressed\n" );
void *uncomp;
unsigned int uncompsize;
if (!inflateToDynBuf(bf, hd.datasize, &uncomp, &uncompsize)) {
ZLibUtBuf buf;
if (!inflateToBuf(bf, hd.datasize, buf)) {
m_reason << "CirCache: decompression failed ";
return false;
}
data->assign((char *)uncomp, uncompsize);
free(uncomp);
data->assign(buf.getBuf(), buf.getCnt());
} else {
LOGDEB1("Circache:readdicdata: data NOT compressed\n" );
data->assign(bf, hd.datasize);
@ -1035,17 +1033,14 @@ bool CirCache::put(const string& udi, const ConfSimple *iconf,
const char *datap = data.c_str();
size_t datalen = data.size();
unsigned short flags = 0;
TempBuf compbuf;
ZLibUtBuf buf;
if (!(iflags & NoCompHint)) {
uLong len = compressBound(static_cast<uLong>(data.size()));
char *bf = compbuf.setsize(len);
if (bf != 0 &&
compress((Bytef*)bf, &len, (Bytef*)data.c_str(),
static_cast<uLong>(data.size())) == Z_OK) {
if (float(len) < 0.9 * float(data.size())) {
// bf is local but it's our static buffer address
datap = bf;
datalen = len;
if (deflateToBuf(data.c_str(), data.size(), buf)) {
// If compression succeeds, and the ratio makes sense,
// store compressed
if (float(buf.getCnt()) < 0.9 * float(data.size())) {
datap = buf.getBuf();
datalen = buf.getCnt();
flags |= EFDataCompressed;
}
}
@ -1268,90 +1263,6 @@ bool CirCache::getCurrent(string& udi, string& dic, string *data)
return true;
}
static void *allocmem(
void *cp, /* The array to grow. may be NULL */
int sz, /* Unit size in bytes */
int *np, /* Pointer to current allocation number */
int min, /* Number to allocate the first time */
int maxinc) /* Maximum increment */
{
if (cp == 0) {
cp = malloc(min * sz);
*np = cp ? min : 0;
return cp;
}
int inc = (*np > maxinc) ? maxinc : *np;
if ((cp = realloc(cp, (*np + inc) * sz)) != 0) {
*np += inc;
}
return cp;
}
static bool inflateToDynBuf(void* inp, UINT inlen, void **outpp, UINT *outlenp)
{
z_stream d_stream; /* decompression stream */
LOGDEB0("inflateToDynBuf: inlen " << (inlen) << "\n" );
d_stream.zalloc = (alloc_func)0;
d_stream.zfree = (free_func)0;
d_stream.opaque = (voidpf)0;
// Compression works well on html files, 4-6 is quite common, Otoh we
// maybe passed a big, little if at all compressed image or pdf file,
// So we set the initial allocation at 3 times the input size
const int imul = 3;
const int mxinc = 20;
char *outp = 0;
int alloc = 0;
d_stream.next_in = (Bytef*)inp;
d_stream.avail_in = inlen;
d_stream.next_out = 0;
d_stream.avail_out = 0;
int err;
if ((err = inflateInit(&d_stream)) != Z_OK) {
LOGERR("Inflate: inflateInit: err " << (err) << " msg " << (d_stream.msg) << "\n" );
free(outp);
return false;
}
for (;;) {
LOGDEB2("InflateToDynBuf: avail_in " << (d_stream.avail_in) << " total_in " << (d_stream.total_in) << " avail_out " << (d_stream.avail_out) << " total_out " << (d_stream.total_out) << "\n" );
if (d_stream.avail_out == 0) {
if ((outp = (char*)allocmem(outp, inlen, &alloc,
imul, mxinc)) == 0) {
LOGERR("Inflate: out of memory, current alloc " << (alloc * inlen) << "\n" );
inflateEnd(&d_stream);
return false;
} else {
LOGDEB2("inflateToDynBuf: realloc(" << (alloc * inlen) << ") ok\n" );
}
d_stream.avail_out = alloc * inlen - d_stream.total_out;
d_stream.next_out = (Bytef*)(outp + d_stream.total_out);
}
err = inflate(&d_stream, Z_NO_FLUSH);
if (err == Z_STREAM_END) {
break;
}
if (err != Z_OK) {
LOGERR("Inflate: error " << (err) << " msg " << (d_stream.msg) << "\n" );
inflateEnd(&d_stream);
free(outp);
return false;
}
}
*outlenp = d_stream.total_out;
*outpp = (Bytef *)outp;
if ((err = inflateEnd(&d_stream)) != Z_OK) {
LOGERR("Inflate: inflateEnd error " << (err) << " msg " << (d_stream.msg) << "\n" );
return false;
}
LOGDEB0("inflateToDynBuf: ok, output size " << (d_stream.total_out) << "\n" );
return true;
}
// Copy all entries from occ to ncc. Both are already open.
static bool copyall(std::shared_ptr<CirCache> occ,
std::shared_ptr<CirCache> ncc, int& nentries,

169
src/utils/zlibut.cpp Normal file
View File

@ -0,0 +1,169 @@
/* Copyright (C) 2017 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include "zlibut.h"
#include <zlib.h>
#include "log.h"
static void *allocmem(
void *cp, /* The array to grow. may be NULL */
int sz, /* Unit size in bytes */
int *np, /* Pointer to current allocation number */
int min, /* Number to allocate the first time */
int maxinc) /* Maximum increment */
{
if (cp == 0) {
cp = malloc(min * sz);
*np = cp ? min : 0;
return cp;
}
int inc = (*np > maxinc) ? maxinc : *np;
if ((cp = realloc(cp, (*np + inc) * sz)) != 0) {
*np += inc;
}
return cp;
}
class ZLibUtBuf::Internal {
public:
Internal() {}
~Internal() {
if (buf && dofree) {
free(buf);
}
}
bool grow(size_t n) {
if (!initsz)
initsz = n;
buf = (char *)allocmem(buf, initsz, &alloc, 1, 20);
return nullptr != buf;
}
int getAlloc() {
return alloc * initsz;
}
char *buf{nullptr};
int initsz{0}; // Set to first alloc size
int alloc{0}; // Allocation count (allocmem()). Capa is alloc*inisz
int datacnt{0}; // Data count
bool dofree{true}; // Does buffer belong to me ?
friend bool inflateToBuf(void* inp, unsigned int inlen, ZLibUtBuf& buf);
};
ZLibUtBuf::ZLibUtBuf()
{
m = new Internal;
}
ZLibUtBuf::~ZLibUtBuf()
{
delete m;
}
char *ZLibUtBuf::getBuf() const
{
return m->buf;
}
char *ZLibUtBuf::takeBuf()
{
m->dofree = false;
return m->buf;
}
size_t ZLibUtBuf::getCnt()
{
return m->datacnt;
}
bool inflateToBuf(const void* inp, unsigned int inlen, ZLibUtBuf& buf)
{
LOGDEB0("inflateToBuf: inlen " << inlen << "\n");
z_stream d_stream; /* decompression stream */
d_stream.zalloc = (alloc_func)0;
d_stream.zfree = (free_func)0;
d_stream.opaque = (voidpf)0;
d_stream.next_in = (Bytef*)inp;
d_stream.avail_in = inlen;
d_stream.next_out = 0;
d_stream.avail_out = 0;
int err;
if ((err = inflateInit(&d_stream)) != Z_OK) {
LOGERR("Inflate: inflateInit: err " << err << " msg " <<
d_stream.msg << "\n");
return false;
}
for (;;) {
LOGDEB2("InflateToDynBuf: avail_in " << d_stream.avail_in <<
" total_in " << d_stream.total_in << " avail_out " <<
d_stream.avail_out << " total_out " << d_stream.total_out <<
"\n");
if (d_stream.avail_out == 0) {
if (!buf.m->grow(inlen)) {
LOGERR("Inflate: out of memory, current alloc " <<
buf.m->getAlloc() << "\n");
inflateEnd(&d_stream);
return false;
}
d_stream.avail_out = buf.m->getAlloc() - d_stream.total_out;
d_stream.next_out = (Bytef*)(buf.getBuf() + d_stream.total_out);
}
err = inflate(&d_stream, Z_NO_FLUSH);
if (err == Z_STREAM_END) {
break;
}
if (err != Z_OK) {
LOGERR("Inflate: error " << err << " msg " << d_stream.msg << "\n");
inflateEnd(&d_stream);
return false;
}
}
if ((err = inflateEnd(&d_stream)) != Z_OK) {
LOGERR("Inflate: inflateEnd error " << err << " msg " << d_stream.msg
<< "\n");
return false;
}
buf.m->datacnt = d_stream.total_out;
LOGDEB1("inflateToBuf: ok, output size " << buf.getCnt() << "\n");
return true;
}
bool deflateToBuf(const void* inp, unsigned int inlen, ZLibUtBuf& buf)
{
uLongf len = compressBound(static_cast<uLong>(inlen));
// This needs cleanup: because the buffer is reused inside
// e.g. circache, we want a minimum size in case the 1st doc size,
// which sets the grow increment is small. It would be better to
// let the user set a min size hint.
if (len < 500 *1024)
len = 500 * 1024;
while (buf.m->getAlloc() < int(len)) {
if (!buf.m->grow(len)) {
LOGERR("deflateToBuf: can't get buffer for " << len << " bytes\n");
return false;
}
}
bool ret = compress((Bytef*)buf.getBuf(), &len, (Bytef*)inp,
static_cast<uLong>(inlen)) == Z_OK;
buf.m->datacnt = len;
return ret;
}

21
src/utils/zlibut.h Normal file
View File

@ -0,0 +1,21 @@
#ifndef _ZLIBUT_H_INCLUDED_
#define _ZLIBUT_H_INCLUDED_
#include <sys/types.h>
class ZLibUtBuf {
public:
ZLibUtBuf();
~ZLibUtBuf();
char *getBuf() const;
char *takeBuf();
size_t getCnt();
class Internal;
Internal *m;
};
bool inflateToBuf(const void* inp, unsigned int inlen, ZLibUtBuf& buf);
bool deflateToBuf(const void* inp, unsigned int inlen, ZLibUtBuf& buf);
#endif /* _ZLIBUT_H_INCLUDED_ */