From bc8eee622eac94595bbc95f7469e162ecf1e982b Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Wed, 24 Mar 2021 17:22:35 +0100 Subject: [PATCH] circache: add function to extract all entries as file pairs in target directory --- src/common/webstore.h | 1 - src/testmains/trcircache.cpp | 33 +++++--- src/utils/circache.cpp | 156 ++++++++++++++++++++++------------- src/utils/circache.h | 5 +- src/utils/md5.cpp | 8 ++ src/utils/md5.h | 2 +- 6 files changed, 130 insertions(+), 75 deletions(-) diff --git a/src/common/webstore.h b/src/common/webstore.h index 70386035..9127c9ee 100644 --- a/src/common/webstore.h +++ b/src/common/webstore.h @@ -21,7 +21,6 @@ class RclConfig; namespace Rcl { -class Db; class Doc; } class CirCache; diff --git a/src/testmains/trcircache.cpp b/src/testmains/trcircache.cpp index 73f0e945..7f3500c4 100644 --- a/src/testmains/trcircache.cpp +++ b/src/testmains/trcircache.cpp @@ -46,17 +46,17 @@ Usage(FILE *fp = stderr) } static int op_flags; -#define OPT_MOINS 0x1 -#define OPT_c 0x2 -#define OPT_p 0x8 -#define OPT_g 0x10 -#define OPT_d 0x20 -#define OPT_i 0x40 -#define OPT_D 0x80 -#define OPT_u 0x100 -#define OPT_e 0x200 -#define OPT_a 0x800 -#define OPT_C 0x1000 +#define OPT_a 0x1 +#define OPT_b 0x2 +#define OPT_C 0x4 +#define OPT_c 0x8 +#define OPT_D 0x10 +#define OPT_d 0x20 +#define OPT_e 0x40 +#define OPT_g 0x80 +#define OPT_i 0x100 +#define OPT_p 0x200 +#define OPT_u 0x400 bool storeFile(CirCache& cc, const std::string fn); @@ -78,6 +78,7 @@ int main(int argc, char **argv) while (**argv) switch (*(*argv)++) { case 'a': op_flags |= OPT_a; break; + case 'b': op_flags |= OPT_b; break; case 'C': op_flags |= OPT_C; break; case 'c': op_flags |= OPT_c; break; case 'D': op_flags |= OPT_D; break; @@ -149,6 +150,16 @@ b1: } argc--; } + } else if (op_flags & OPT_b) { + if (argc < 1) { + Usage(); + } + std::string ddir = *argv++; argc--; + string reason; + if (!CirCache::burst(dir, ddir, &reason)) { + cerr << reason << endl; + return 1; + } } else if (op_flags & OPT_p) { if (argc < 1) { Usage(); diff --git a/src/utils/circache.cpp b/src/utils/circache.cpp index dabe7ea3..1b7c2bd4 100644 --- a/src/utils/circache.cpp +++ b/src/utils/circache.cpp @@ -14,8 +14,6 @@ * Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ -#define LOGGER_LOCAL_LOGINC 4 - #include "autoconfig.h" #include "circache.h" @@ -30,15 +28,12 @@ #include #include #include - #include - -#include "chrono.h" -#include "zlibut.h" -#include "smallut.h" -#include "pathut.h" -#include "wipedir.h" -#include "copyfile.h" +#include +#include +#include +#include +#include #ifndef _WIN32 #include @@ -64,41 +59,19 @@ static ssize_t writev(int fd, const struct iovec *iov, int iovcnt) } #endif - -#include -#include -#include - -#include "cstr.h" -#include "circache.h" +#include "chrono.h" #include "conftree.h" +#include "copyfile.h" +#include "cstr.h" #include "log.h" -#include "smallut.h" #include "md5.h" +#include "pathut.h" +#include "smallut.h" +#include "wipedir.h" +#include "zlibut.h" using namespace std; - -/** Temp buffer with automatic deallocation */ -struct TempBuf { - TempBuf() - : m_buf(0) { - } - TempBuf(int n) { - m_buf = (char *)malloc(n); - } - ~TempBuf() { - if (m_buf) { - free(m_buf); - } - } - char *setsize(int n) { - return (m_buf = (char *)realloc(m_buf, n)); - } - char *buf() { - return m_buf; - } - char *m_buf; -}; +using namespace std::placeholders; /* * File structure: @@ -1325,10 +1298,10 @@ bool CirCache::getCurrent(string& udi, string& dic, string *data) return true; } -// Copy all entries from occ to ncc. Both are already open. +// Send all entries from occ to callback. occ is already open. static bool copyall(std::shared_ptr occ, - std::shared_ptr ncc, int& nentries, - ostringstream& msg) + std::function cb, + int& nentries, ostringstream& msg) { bool eof = false; if (!occ->rewind(eof)) { @@ -1356,9 +1329,10 @@ static bool copyall(std::shared_ptr occ, return false; } //cerr << "UDI: " << udi << endl; - if (!ncc->put(udi, &dic, data)) { - msg << "put failed: " << ncc->getReason() << " sdic [" << sdic << - "]" << endl; + if (!cb(udi, &dic, data)) { + string err; + catstrerror(&err, "", errno); + msg << "put failed: errno " << err << " for [" << sdic << "]" << endl; return false; } nentries++; @@ -1427,7 +1401,11 @@ int CirCache::appendCC(const string& ddir, const string& sdir, string *reason) } int nentries; - if (!copyall(occ, ncc, nentries, msg)) { + std::function cb = + std::bind(&CirCache::put, ncc, _1, _2, _3, 0); + if (!copyall(occ, cb, nentries, msg)) { + msg << " " << ncc->getReason() << "\n"; + LOGERR(msg.str()); if (reason) { *reason = msg.str(); } @@ -1440,10 +1418,11 @@ int CirCache::appendCC(const string& ddir, const string& sdir, string *reason) bool CirCache::compact(const std::string& dir, std::string *reason) { ostringstream msg; + msg << "CirCache::compact: "; // Open source file std::shared_ptr occ(new CirCache(dir)); if (!occ->open(CirCache::CC_OPREAD)) { - msg << "CirCache::compact: open failed in " << dir << " : " << occ->getReason() << "\n"; + msg << "open failed in " << dir << " : " << occ->getReason() << "\n"; LOGERR(msg.str()); if (reason) { *reason = msg.str(); @@ -1452,7 +1431,7 @@ bool CirCache::compact(const std::string& dir, std::string *reason) } long long avmbs; if (fsocc(dir, nullptr, &avmbs) && avmbs * 1024 * 1024 < 1.2 * occ->size()) { - msg << "CirCache::compact: not enough space on file system"; + msg << "not enough space on file system"; LOGERR(msg.str() <<"\n"); if (reason) { *reason = msg.str(); @@ -1461,7 +1440,7 @@ bool CirCache::compact(const std::string& dir, std::string *reason) } std::string ndir = path_cat(dir, "tmpcopy"); if (!path_makepath(dir, 0700)) { - msg << "CirCache::compact: path_makepath failed with errno " << errno; + msg << "path_makepath failed with errno " << errno; LOGERR(msg.str() << "\n"); if (reason) { *reason = msg.str(); @@ -1471,7 +1450,7 @@ bool CirCache::compact(const std::string& dir, std::string *reason) std::shared_ptr ncc(new CirCache(ndir)); if (!ncc->create(occ->size(), occ->uniquentries() ? CC_CRUNIQUE : CC_CRNONE)) { - msg << "CirCache::compact: Open failed in " << ndir << " : " << ncc->getReason(); + msg << "open failed in " << ndir << " : " << ncc->getReason(); LOGERR(msg.str() << "\n"); if (reason) { *reason = msg.str(); @@ -1479,7 +1458,10 @@ bool CirCache::compact(const std::string& dir, std::string *reason) return false; } int nentries; - if (!copyall(occ, ncc, nentries, msg)) { + std::function cb = + std::bind(&CirCache::put, ncc, _1, _2, _3, 0); + if (!copyall(occ, cb, nentries, msg)) { + msg << " " << ncc->getReason(); LOGERR(msg.str() << "\n"); if (reason) { *reason = msg.str(); @@ -1495,7 +1477,7 @@ bool CirCache::compact(const std::string& dir, std::string *reason) std::string nfile = path_cat(ndir, "circache.crch").c_str(); std::string ofile = path_cat(dir, "circache.crch").c_str(); if (!renameormove(nfile.c_str(), ofile.c_str(), r)) { - msg << "CirCache::compact: rename: " << r; + msg << "rename: " << r; LOGERR(msg.str() << "\n"); if (reason) { *reason = msg.str(); @@ -1508,13 +1490,58 @@ bool CirCache::compact(const std::string& dir, std::string *reason) return true; } +class CCDataToFile { +public: + CCDataToFile(const std::string dd) + : m_dir(dd) {} + bool putFile(const std::string& udi, const ConfSimple *dicp, const std::string& data); + std::string& getReason() {return m_reason;} +private: + std::string m_dir; + std::string m_reason; +}; + +bool CCDataToFile::putFile(const std::string& udi, const ConfSimple *dicp, const std::string& data) +{ +#if 0 + std::ostringstream deb; + dicp->write(deb); + LOGDEB("CCDataToFile::putFile: udi " << udi << " dic " << deb.str() << + " datasize " << data.size() << "\n"); +#endif + std::string hash = MD5Hex(udi); + std::string dsuff; + std::string mt; + dicp->get("mimetype", mt); + if (mt == "text/html") { + dsuff = ".html"; + } else if (mt == "application/pdf") { + dsuff = ".pdf"; + } else { + dsuff = ".xxx"; + } + + std::string fn = path_cat(m_dir, "circache-" + hash + dsuff); + if (!stringtofile(data, fn.c_str(), m_reason)) { + return false; + } + fn = path_cat(m_dir, "circache-" + hash + ".dic"); + std::ostringstream str; + dicp->write(str); + if (!stringtofile(str.str(), fn.c_str(), m_reason)) { + return false; + } + return true; +} + bool CirCache::burst(const std::string& ccdir, const std::string destdir, std::string *reason) { ostringstream msg; + msg << "CirCache::burst: "; // Open source file std::shared_ptr occ(new CirCache(ccdir)); if (!occ->open(CirCache::CC_OPREAD)) { - msg << "CirCache::burst: open failed in " << dir << " : " << occ->getReason() << "\n"; + msg << "open failed in " << ccdir << " : " << occ->getReason() << "\n"; LOGERR(msg.str()); if (reason) { *reason = msg.str(); @@ -1522,8 +1549,8 @@ bool CirCache::burst(const std::string& ccdir, const std::string destdir, std::s return -1; } long long avmbs; - if (fsocc(dir, nullptr, &avmbs) && avmbs * 1024 * 1024 < 1.2 * occ->size()) { - msg << "CirCache::burst: not enough space on file system"; + if (fsocc(destdir, nullptr, &avmbs) && avmbs * 1024 * 1024 < 1.2 * occ->size()) { + msg << "not enough space on file system"; LOGERR(msg.str() <<"\n"); if (reason) { *reason = msg.str(); @@ -1531,7 +1558,19 @@ bool CirCache::burst(const std::string& ccdir, const std::string destdir, std::s return false; } if (!path_makepath(destdir, 0700)) { - msg << "CirCache::burst: path_makepath failed with errno " << errno; + msg << "path_makepath failed with errno " << errno; + LOGERR(msg.str() << "\n"); + if (reason) { + *reason = msg.str(); + } + return false; + } + int nentries; + CCDataToFile copier(destdir); + std::function cb = + std::bind(&CCDataToFile::putFile, copier, _1, _2, _3); + if (!copyall(occ, cb, nentries, msg)) { + msg << " " << copier.getReason(); LOGERR(msg.str() << "\n"); if (reason) { *reason = msg.str(); @@ -1539,7 +1578,6 @@ bool CirCache::burst(const std::string& ccdir, const std::string destdir, std::s return false; } - m_d->scan(CIRCACHE_FIRSTBLOCK_SIZE, &rec, false); - + return true; } diff --git a/src/utils/circache.h b/src/utils/circache.h index 1fd62e93..b13e6394 100644 --- a/src/utils/circache.h +++ b/src/utils/circache.h @@ -32,11 +32,10 @@ * Inside the file. the UDIs are stored inside the entry dictionary * under the key "udi". * - * It is assumed that the dictionary are small (they are routinely read/parsed) - * + * It is assumed that the dictionaries are small (they are routinely + * read/parsed) */ -#include #include #include diff --git a/src/utils/md5.cpp b/src/utils/md5.cpp index f326c201..f84d8dc1 100644 --- a/src/utils/md5.cpp +++ b/src/utils/md5.cpp @@ -281,6 +281,14 @@ string& MD5HexPrint(const string& digest, string &out) return out; } +std::string MD5Hex(const std::string& data) +{ + std::string digest, out; + MD5String(data, digest); + MD5HexPrint(digest, out); + return out; +} + string& MD5HexScan(const string& xdigest, string& digest) { digest.erase(); diff --git a/src/utils/md5.h b/src/utils/md5.h index b9532442..6f86440e 100644 --- a/src/utils/md5.h +++ b/src/utils/md5.h @@ -38,5 +38,5 @@ extern void MD5Final(std::string& digest, MD5_CTX *); extern std::string& MD5String(const std::string& data, std::string& digest); extern std::string& MD5HexPrint(const std::string& digest, std::string& out); extern std::string& MD5HexScan(const std::string& xdigest, std::string& digest); - +extern std::string MD5Hex(const std::string& data); #endif /* _MD5_H_ */