diff --git a/src/Makefile.am b/src/Makefile.am index 53b0a08d..f4294f41 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -242,6 +242,8 @@ utils/utf8iter.h \ utils/wipedir.cpp \ utils/wipedir.h \ utils/workqueue.h \ +utils/zlibut.cpp \ +utils/zlibut.h \ xaposix/safefcntl.h \ xaposix/safesysstat.h \ xaposix/safesyswait.h \ diff --git a/src/utils/circache.cpp b/src/utils/circache.cpp index 505b3492..6a736887 100644 --- a/src/utils/circache.cpp +++ b/src/utils/circache.cpp @@ -18,6 +18,8 @@ #ifndef TEST_CIRCACHE #include "autoconfig.h" +#include "circache.h" + #include #include #include @@ -27,11 +29,11 @@ #include "safeunistd.h" #include #include -#include -#include "chrono.h" #include +#include "chrono.h" +#include "zlibut.h" #ifndef _WIN32 #include @@ -96,8 +98,6 @@ struct TempBuf { char *m_buf; }; -static bool inflateToDynBuf(void *inp, UINT inlen, void **outpp, UINT *outlenp); - /* * File structure: * - Starts with a 1-KB header block, with a param dictionary. @@ -631,14 +631,12 @@ public: if (hd.flags & EFDataCompressed) { LOGDEB1("Circache:readdicdata: data compressed\n" ); - void *uncomp; - unsigned int uncompsize; - if (!inflateToDynBuf(bf, hd.datasize, &uncomp, &uncompsize)) { + ZLibUtBuf buf; + if (!inflateToBuf(bf, hd.datasize, buf)) { m_reason << "CirCache: decompression failed "; return false; } - data->assign((char *)uncomp, uncompsize); - free(uncomp); + data->assign(buf.getBuf(), buf.getCnt()); } else { LOGDEB1("Circache:readdicdata: data NOT compressed\n" ); data->assign(bf, hd.datasize); @@ -1035,17 +1033,14 @@ bool CirCache::put(const string& udi, const ConfSimple *iconf, const char *datap = data.c_str(); size_t datalen = data.size(); unsigned short flags = 0; - TempBuf compbuf; + ZLibUtBuf buf; if (!(iflags & NoCompHint)) { - uLong len = compressBound(static_cast(data.size())); - char *bf = compbuf.setsize(len); - if (bf != 0 && - compress((Bytef*)bf, &len, (Bytef*)data.c_str(), - static_cast(data.size())) == Z_OK) { - if (float(len) < 0.9 * float(data.size())) { - // bf is local but it's our static buffer address - datap = bf; - datalen = len; + if (deflateToBuf(data.c_str(), data.size(), buf)) { + // If compression succeeds, and the ratio makes sense, + // store compressed + if (float(buf.getCnt()) < 0.9 * float(data.size())) { + datap = buf.getBuf(); + datalen = buf.getCnt(); flags |= EFDataCompressed; } } @@ -1268,90 +1263,6 @@ bool CirCache::getCurrent(string& udi, string& dic, string *data) return true; } -static void *allocmem( - void *cp, /* The array to grow. may be NULL */ - int sz, /* Unit size in bytes */ - int *np, /* Pointer to current allocation number */ - int min, /* Number to allocate the first time */ - int maxinc) /* Maximum increment */ -{ - if (cp == 0) { - cp = malloc(min * sz); - *np = cp ? min : 0; - return cp; - } - - int inc = (*np > maxinc) ? maxinc : *np; - if ((cp = realloc(cp, (*np + inc) * sz)) != 0) { - *np += inc; - } - return cp; -} - -static bool inflateToDynBuf(void* inp, UINT inlen, void **outpp, UINT *outlenp) -{ - z_stream d_stream; /* decompression stream */ - - LOGDEB0("inflateToDynBuf: inlen " << (inlen) << "\n" ); - - d_stream.zalloc = (alloc_func)0; - d_stream.zfree = (free_func)0; - d_stream.opaque = (voidpf)0; - // Compression works well on html files, 4-6 is quite common, Otoh we - // maybe passed a big, little if at all compressed image or pdf file, - // So we set the initial allocation at 3 times the input size - const int imul = 3; - const int mxinc = 20; - char *outp = 0; - int alloc = 0; - d_stream.next_in = (Bytef*)inp; - d_stream.avail_in = inlen; - d_stream.next_out = 0; - d_stream.avail_out = 0; - - int err; - if ((err = inflateInit(&d_stream)) != Z_OK) { - LOGERR("Inflate: inflateInit: err " << (err) << " msg " << (d_stream.msg) << "\n" ); - free(outp); - return false; - } - - for (;;) { - LOGDEB2("InflateToDynBuf: avail_in " << (d_stream.avail_in) << " total_in " << (d_stream.total_in) << " avail_out " << (d_stream.avail_out) << " total_out " << (d_stream.total_out) << "\n" ); - if (d_stream.avail_out == 0) { - if ((outp = (char*)allocmem(outp, inlen, &alloc, - imul, mxinc)) == 0) { - LOGERR("Inflate: out of memory, current alloc " << (alloc * inlen) << "\n" ); - inflateEnd(&d_stream); - return false; - } else { - LOGDEB2("inflateToDynBuf: realloc(" << (alloc * inlen) << ") ok\n" ); - } - d_stream.avail_out = alloc * inlen - d_stream.total_out; - d_stream.next_out = (Bytef*)(outp + d_stream.total_out); - } - err = inflate(&d_stream, Z_NO_FLUSH); - if (err == Z_STREAM_END) { - break; - } - if (err != Z_OK) { - LOGERR("Inflate: error " << (err) << " msg " << (d_stream.msg) << "\n" ); - inflateEnd(&d_stream); - free(outp); - return false; - } - } - *outlenp = d_stream.total_out; - *outpp = (Bytef *)outp; - - if ((err = inflateEnd(&d_stream)) != Z_OK) { - LOGERR("Inflate: inflateEnd error " << (err) << " msg " << (d_stream.msg) << "\n" ); - return false; - } - LOGDEB0("inflateToDynBuf: ok, output size " << (d_stream.total_out) << "\n" ); - return true; -} - // Copy all entries from occ to ncc. Both are already open. static bool copyall(std::shared_ptr occ, std::shared_ptr ncc, int& nentries, diff --git a/src/utils/zlibut.cpp b/src/utils/zlibut.cpp new file mode 100644 index 00000000..a5a11e64 --- /dev/null +++ b/src/utils/zlibut.cpp @@ -0,0 +1,169 @@ +/* Copyright (C) 2017 J.F.Dockes + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include "zlibut.h" + +#include + +#include "log.h" + +static void *allocmem( + void *cp, /* The array to grow. may be NULL */ + int sz, /* Unit size in bytes */ + int *np, /* Pointer to current allocation number */ + int min, /* Number to allocate the first time */ + int maxinc) /* Maximum increment */ +{ + if (cp == 0) { + cp = malloc(min * sz); + *np = cp ? min : 0; + return cp; + } + + int inc = (*np > maxinc) ? maxinc : *np; + if ((cp = realloc(cp, (*np + inc) * sz)) != 0) { + *np += inc; + } + return cp; +} + +class ZLibUtBuf::Internal { +public: + Internal() {} + ~Internal() { + if (buf && dofree) { + free(buf); + } + } + bool grow(size_t n) { + if (!initsz) + initsz = n; + buf = (char *)allocmem(buf, initsz, &alloc, 1, 20); + return nullptr != buf; + } + int getAlloc() { + return alloc * initsz; + } + char *buf{nullptr}; + int initsz{0}; // Set to first alloc size + int alloc{0}; // Allocation count (allocmem()). Capa is alloc*inisz + int datacnt{0}; // Data count + bool dofree{true}; // Does buffer belong to me ? + friend bool inflateToBuf(void* inp, unsigned int inlen, ZLibUtBuf& buf); +}; + +ZLibUtBuf::ZLibUtBuf() +{ + m = new Internal; +} +ZLibUtBuf::~ZLibUtBuf() +{ + delete m; +} + +char *ZLibUtBuf::getBuf() const +{ + return m->buf; +} +char *ZLibUtBuf::takeBuf() +{ + m->dofree = false; + return m->buf; +} +size_t ZLibUtBuf::getCnt() +{ + return m->datacnt; +} + +bool inflateToBuf(const void* inp, unsigned int inlen, ZLibUtBuf& buf) +{ + LOGDEB0("inflateToBuf: inlen " << inlen << "\n"); + + z_stream d_stream; /* decompression stream */ + + d_stream.zalloc = (alloc_func)0; + d_stream.zfree = (free_func)0; + d_stream.opaque = (voidpf)0; + d_stream.next_in = (Bytef*)inp; + d_stream.avail_in = inlen; + d_stream.next_out = 0; + d_stream.avail_out = 0; + + int err; + if ((err = inflateInit(&d_stream)) != Z_OK) { + LOGERR("Inflate: inflateInit: err " << err << " msg " << + d_stream.msg << "\n"); + return false; + } + + for (;;) { + LOGDEB2("InflateToDynBuf: avail_in " << d_stream.avail_in << + " total_in " << d_stream.total_in << " avail_out " << + d_stream.avail_out << " total_out " << d_stream.total_out << + "\n"); + if (d_stream.avail_out == 0) { + if (!buf.m->grow(inlen)) { + LOGERR("Inflate: out of memory, current alloc " << + buf.m->getAlloc() << "\n"); + inflateEnd(&d_stream); + return false; + } + d_stream.avail_out = buf.m->getAlloc() - d_stream.total_out; + d_stream.next_out = (Bytef*)(buf.getBuf() + d_stream.total_out); + } + err = inflate(&d_stream, Z_NO_FLUSH); + if (err == Z_STREAM_END) { + break; + } + if (err != Z_OK) { + LOGERR("Inflate: error " << err << " msg " << d_stream.msg << "\n"); + inflateEnd(&d_stream); + return false; + } + } + if ((err = inflateEnd(&d_stream)) != Z_OK) { + LOGERR("Inflate: inflateEnd error " << err << " msg " << d_stream.msg + << "\n"); + return false; + } + buf.m->datacnt = d_stream.total_out; + LOGDEB1("inflateToBuf: ok, output size " << buf.getCnt() << "\n"); + return true; +} + + +bool deflateToBuf(const void* inp, unsigned int inlen, ZLibUtBuf& buf) +{ + uLongf len = compressBound(static_cast(inlen)); + // This needs cleanup: because the buffer is reused inside + // e.g. circache, we want a minimum size in case the 1st doc size, + // which sets the grow increment is small. It would be better to + // let the user set a min size hint. + if (len < 500 *1024) + len = 500 * 1024; + + while (buf.m->getAlloc() < int(len)) { + if (!buf.m->grow(len)) { + LOGERR("deflateToBuf: can't get buffer for " << len << " bytes\n"); + return false; + } + } + bool ret = compress((Bytef*)buf.getBuf(), &len, (Bytef*)inp, + static_cast(inlen)) == Z_OK; + buf.m->datacnt = len; + return ret; +} diff --git a/src/utils/zlibut.h b/src/utils/zlibut.h new file mode 100644 index 00000000..4e9016c8 --- /dev/null +++ b/src/utils/zlibut.h @@ -0,0 +1,21 @@ +#ifndef _ZLIBUT_H_INCLUDED_ +#define _ZLIBUT_H_INCLUDED_ + +#include + +class ZLibUtBuf { +public: + ZLibUtBuf(); + ~ZLibUtBuf(); + char *getBuf() const; + char *takeBuf(); + size_t getCnt(); + + class Internal; + Internal *m; +}; + +bool inflateToBuf(const void* inp, unsigned int inlen, ZLibUtBuf& buf); +bool deflateToBuf(const void* inp, unsigned int inlen, ZLibUtBuf& buf); + +#endif /* _ZLIBUT_H_INCLUDED_ */