circache: append: resize dest to avoid recycling while appending. Clarifications
This commit is contained in:
parent
5e82af9798
commit
9eac638bb9
@ -1289,7 +1289,6 @@ void RclMain::applyStyleSheet()
|
||||
{
|
||||
::applyStyleSheet(prefs.qssFile);
|
||||
if (m_source) {
|
||||
std::cerr << "applyStyleSheet emit docSourceChanged\n";
|
||||
emit docSourceChanged(m_source);
|
||||
emit sortDataChanged(m_sortspec);
|
||||
} else {
|
||||
|
||||
@ -146,9 +146,10 @@ string WebcacheModel::getData(unsigned int idx)
|
||||
return string();
|
||||
}
|
||||
string udi = m->all[allidx].udi;
|
||||
// Compute the instance for this udi (in case we are not erasing older instances).
|
||||
// Compute the instance for this udi (in case we are configured to
|
||||
// not erase older instances). Valid instance values begin at 1
|
||||
int instance = 0;
|
||||
for (int i = 0; i < allidx; i++) {
|
||||
for (unsigned int i = 0; i <= idx; i++) {
|
||||
if (m->all[i].udi == udi) {
|
||||
instance++;
|
||||
}
|
||||
@ -345,6 +346,8 @@ void WebcacheEdit::saveToFile()
|
||||
return;
|
||||
string data = m_model->getData(selection[0].row());
|
||||
QString qfn = myGetFileName(false, "Saving webcache data");
|
||||
if (qfn.isEmpty())
|
||||
return;
|
||||
string reason;
|
||||
if (!stringtofile(data, qs2utf8s(qfn).c_str(), reason)) {
|
||||
QMessageBox::warning(0, "Recoll", tr("File creation failed: ") + u8s2qs(reason));
|
||||
|
||||
@ -23,13 +23,15 @@ using namespace std;
|
||||
static char *thisprog;
|
||||
|
||||
static char usage [] =
|
||||
" -c [-u] <dirname> <sizekbs>: create\n"
|
||||
" -c [-u] <dirname> <sizekbs>: create new store or possibly resize existing one\n"
|
||||
" -u: set the 'unique' flag (else unset it)\n"
|
||||
" None of this changes the existing data\n"
|
||||
" -p <dirname> <apath> [apath ...] : put files\n"
|
||||
" -d <dirname> : dump\n"
|
||||
" -g [-i instance] [-D] <dirname> <udi>: get\n"
|
||||
" -D: also dump data\n"
|
||||
" -e <dirname> <udi> : erase\n"
|
||||
" -a <targetdir> <dir> [<dir> ...]: append old content to target\n"
|
||||
" -a <targetdir> <dir> [<dir> ...]: append content from existing cache(s) to target\n"
|
||||
" The target should be first resized to hold all the data, else only\n"
|
||||
" as many entries as capacity permit will be retained\n"
|
||||
;
|
||||
@ -53,6 +55,8 @@ static int op_flags;
|
||||
#define OPT_e 0x200
|
||||
#define OPT_a 0x800
|
||||
|
||||
bool storeFile(CirCache& cc, const std::string fn);
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int instance = -1;
|
||||
@ -143,7 +147,7 @@ b1:
|
||||
}
|
||||
while (argc) {
|
||||
string reason;
|
||||
if (CirCache::append(dir, *argv++, &reason) < 0) {
|
||||
if (CirCache::appendCC(dir, *argv++, &reason) < 0) {
|
||||
cerr << reason << endl;
|
||||
return 1;
|
||||
}
|
||||
@ -160,42 +164,8 @@ b1:
|
||||
while (argc) {
|
||||
string fn = *argv++;
|
||||
argc--;
|
||||
char dic[1000];
|
||||
string data, reason;
|
||||
if (!file_to_string(fn, data, &reason)) {
|
||||
cerr << "File_to_string: " << reason << endl;
|
||||
exit(1);
|
||||
}
|
||||
string udi;
|
||||
make_udi(fn, "", udi);
|
||||
string cmd("xdg-mime query filetype ");
|
||||
// Should do more quoting here...
|
||||
cmd += "'" + fn + "'";
|
||||
FILE *fp = popen(cmd.c_str(), "r");
|
||||
char* buf=0;
|
||||
size_t sz = 0;
|
||||
if (::getline(&buf, &sz, fp) -1) {
|
||||
cerr << "Could not read from xdg-mime output\n";
|
||||
exit(1);
|
||||
}
|
||||
pclose(fp);
|
||||
string mimetype(buf);
|
||||
free(buf);
|
||||
trimstring(mimetype, "\n\r");
|
||||
cout << "Got [" << mimetype << "]\n";
|
||||
|
||||
string s;
|
||||
ConfSimple conf(s);
|
||||
conf.set("udi", udi);
|
||||
conf.set("mimetype", mimetype);
|
||||
//ostringstream str; conf.write(str); cout << str.str() << endl;
|
||||
|
||||
if (!cc.put(udi, &conf, data, 0)) {
|
||||
cerr << "Put failed: " << cc.getReason() << endl;
|
||||
cerr << "conf: [";
|
||||
conf.write(cerr);
|
||||
cerr << "]" << endl;
|
||||
exit(1);
|
||||
if (!storeFile(cc, fn)) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
cc.open(CirCache::CC_OPREAD);
|
||||
@ -243,3 +213,46 @@ b1:
|
||||
|
||||
exit(0);
|
||||
}
|
||||
|
||||
|
||||
bool storeFile(CirCache& cc, const std::string fn)
|
||||
{
|
||||
char dic[1000];
|
||||
string data, reason;
|
||||
if (!file_to_string(fn, data, &reason)) {
|
||||
std::cerr << "File_to_string: " << reason << endl;
|
||||
return false;
|
||||
}
|
||||
string udi;
|
||||
make_udi(fn, "", udi);
|
||||
string cmd("xdg-mime query filetype ");
|
||||
// Should do more quoting here...
|
||||
cmd += "'" + fn + "'";
|
||||
FILE *fp = popen(cmd.c_str(), "r");
|
||||
char* buf=0;
|
||||
size_t sz = 0;
|
||||
if (::getline(&buf, &sz, fp) -1) {
|
||||
std::cerr << "Could not read from xdg-mime output\n";
|
||||
return false;
|
||||
}
|
||||
pclose(fp);
|
||||
string mimetype(buf);
|
||||
free(buf);
|
||||
trimstring(mimetype, "\n\r");
|
||||
//std::cerr << "Got [" << mimetype << "]\n";
|
||||
|
||||
string s;
|
||||
ConfSimple conf(s);
|
||||
conf.set("udi", udi);
|
||||
conf.set("mimetype", mimetype);
|
||||
//ostringstream str; conf.write(str); cout << str.str() << endl;
|
||||
|
||||
if (!cc.put(udi, &conf, data, 0)) {
|
||||
std::cerr << "Put failed: " << cc.getReason() << endl;
|
||||
std::cerr << "conf: [";
|
||||
conf.write(std::cerr);
|
||||
std::cerr << "]" << endl;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -14,6 +14,7 @@
|
||||
* Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
#define LOGGER_LOCAL_LOGINC 4
|
||||
|
||||
#include "autoconfig.h"
|
||||
|
||||
@ -34,6 +35,7 @@
|
||||
|
||||
#include "chrono.h"
|
||||
#include "zlibut.h"
|
||||
#include "smallut.h"
|
||||
|
||||
#ifndef _WIN32
|
||||
#include <sys/uio.h>
|
||||
@ -255,10 +257,8 @@ public:
|
||||
return true;
|
||||
}
|
||||
void khDump() {
|
||||
for (kh_type::const_iterator it = m_ofskh.begin();
|
||||
it != m_ofskh.end(); it++) {
|
||||
LOGDEB("Circache::KHDUMP: " << it->first.asHexString() << " " <<
|
||||
it->second << "\n");
|
||||
for (const auto& e : m_ofskh) {
|
||||
LOGDEB("Circache::KHDUMP: " << e.first.asHexString() << " " << e.second << "\n");
|
||||
}
|
||||
}
|
||||
|
||||
@ -270,8 +270,7 @@ public:
|
||||
|
||||
UdiH h(udi);
|
||||
|
||||
LOGDEB2("Circache::khFind: h " << h.asHexString() << " udi [" << udi <<
|
||||
"]\n");
|
||||
LOGDEB2("Circache::khFind: h " << h.asHexString() << " udi [" << udi << "]\n");
|
||||
|
||||
pair<kh_type::iterator, kh_type::iterator> p = m_ofskh.equal_range(h);
|
||||
|
||||
@ -312,9 +311,8 @@ public:
|
||||
}
|
||||
// Clear entries for vector of udi/offs
|
||||
bool khClear(const vector<pair<string, int64_t> >& udis) {
|
||||
for (vector<pair<string, int64_t> >::const_iterator it = udis.begin();
|
||||
it != udis.end(); it++) {
|
||||
khClear(*it);
|
||||
for (const auto& udioffs : udis) {
|
||||
khClear(udioffs);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -589,15 +587,13 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
bool readDicData(int64_t hoffs, EntryHeaderData& hd, string& dic,
|
||||
string* data) {
|
||||
bool readDicData(int64_t hoffs, EntryHeaderData& hd, string& dic, string* data) {
|
||||
int64_t offs = hoffs + CIRCACHE_HEADER_SIZE;
|
||||
// This syscall could be avoided in some cases if we saved the offset
|
||||
// at each seek. In most cases, we just read the header and we are
|
||||
// at the right position
|
||||
if (lseek(m_fd, offs, 0) != offs) {
|
||||
m_reason << "CirCache::get: lseek(" << offs << ") failed: " <<
|
||||
errno;
|
||||
m_reason << "CirCache::get: lseek(" << offs << ") failed: " << errno;
|
||||
return false;
|
||||
}
|
||||
char *bf = 0;
|
||||
@ -863,9 +859,9 @@ public:
|
||||
|
||||
virtual status takeone(int64_t offs, const string& udi,
|
||||
const EntryHeaderData& d) {
|
||||
LOGDEB2("Circache:Scan: off " << offs << " udi [" << udi << "] dcsz " <<
|
||||
d.dicsize << " dtsz " << d.datasize <<
|
||||
" pdsz " << d.padsize << " flgs " << d.flags << "\n");
|
||||
LOGDEB1("Circache:Scan: off " << offs << " udi [" << udi << "] dcsz " <<
|
||||
d.dicsize << " dtsz " << d.datasize << " pdsz " << d.padsize <<
|
||||
" flgs " << d.flags << " previnst " << m_instance << "\n");
|
||||
if (!m_udi.compare(udi)) {
|
||||
m_instance++;
|
||||
m_offs = offs;
|
||||
@ -878,7 +874,6 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
// instance == -1 means get latest. Otherwise specify from 1+
|
||||
bool CirCache::get(const string& udi, string& dic, string *data, int instance)
|
||||
{
|
||||
Chrono chron;
|
||||
@ -899,12 +894,11 @@ bool CirCache::get(const string& udi, string& dic, string *data, int instance)
|
||||
int finst = 1;
|
||||
EntryHeaderData d_good;
|
||||
int64_t o_good = 0;
|
||||
for (vector<int64_t>::iterator it = ofss.begin();
|
||||
it != ofss.end(); it++) {
|
||||
LOGDEB1("Circache::get: trying offs " << *it << "\n");
|
||||
for (const auto& offset : ofss) {
|
||||
LOGDEB1("Circache::get: trying offs " << offset << "\n");
|
||||
EntryHeaderData d;
|
||||
string fudi;
|
||||
if (!m_d->readHUdi(*it, d, fudi)) {
|
||||
if (!m_d->readHUdi(offset, d, fudi)) {
|
||||
return false;
|
||||
}
|
||||
if (!fudi.compare(udi)) {
|
||||
@ -912,7 +906,7 @@ bool CirCache::get(const string& udi, string& dic, string *data, int instance)
|
||||
// matches, else go on. If instance is -1 need to
|
||||
// go to the end anyway
|
||||
d_good = d;
|
||||
o_good = *it;
|
||||
o_good = offset;
|
||||
if (finst == instance) {
|
||||
break;
|
||||
} else {
|
||||
@ -946,6 +940,10 @@ bool CirCache::get(const string& udi, string& dic, string *data, int instance)
|
||||
return bret;
|
||||
}
|
||||
|
||||
// It would be possible to have an option to only erase if this is the
|
||||
// last entry in the file, by comparing the offsets from khFind() with
|
||||
// m_oheadoffs. Read the last offset < m_oheadoffs and check that
|
||||
// offset+sizes == oheadoffs
|
||||
bool CirCache::erase(const string& udi, bool reallyclear)
|
||||
{
|
||||
if (m_d == 0) {
|
||||
@ -977,22 +975,22 @@ bool CirCache::erase(const string& udi, bool reallyclear)
|
||||
return true;
|
||||
}
|
||||
|
||||
for (vector<int64_t>::iterator it = ofss.begin(); it != ofss.end(); it++) {
|
||||
LOGDEB2("CirCache::erase: reading at " << *it << "\n");
|
||||
for (const auto& offset : ofss) {
|
||||
LOGDEB2("CirCache::erase: reading at " << offset << "\n");
|
||||
EntryHeaderData d;
|
||||
string fudi;
|
||||
if (!m_d->readHUdi(*it, d, fudi)) {
|
||||
if (!m_d->readHUdi(offset, d, fudi)) {
|
||||
return false;
|
||||
}
|
||||
LOGDEB2("CirCache::erase: found fudi [" << fudi << "]\n");
|
||||
if (!fudi.compare(udi)) {
|
||||
EntryHeaderData nd;
|
||||
nd.padsize = d.dicsize + d.datasize + d.padsize;
|
||||
LOGDEB2("CirCache::erase: rewrite at " << *it << "\n");
|
||||
if (*it == m_d->m_nheadoffs) {
|
||||
LOGDEB2("CirCache::erase: rewrite at " << offset << "\n");
|
||||
if (offset == m_d->m_nheadoffs) {
|
||||
m_d->m_npadsize = nd.padsize;
|
||||
}
|
||||
if (!m_d->writeEntryHeader(*it, nd, reallyclear)) {
|
||||
if (!m_d->writeEntryHeader(offset, nd, reallyclear)) {
|
||||
LOGERR("CirCache::erase: write header failed\n");
|
||||
return false;
|
||||
}
|
||||
@ -1339,26 +1337,60 @@ static bool copyall(std::shared_ptr<CirCache> occ,
|
||||
return true;
|
||||
}
|
||||
|
||||
// Append all entries from sdir to ddir
|
||||
int CirCache::append(const string ddir, const string& sdir, string *reason)
|
||||
int CirCache::appendCC(const string ddir, const string& sdir, string *reason)
|
||||
{
|
||||
ostringstream msg;
|
||||
// Open source file
|
||||
std::shared_ptr<CirCache> occ(new CirCache(sdir));
|
||||
if (!occ->open(CirCache::CC_OPREAD)) {
|
||||
if (reason) {
|
||||
msg << "Open failed in " << sdir << " : " <<
|
||||
occ->getReason() << endl;
|
||||
msg << "Open failed in " << sdir << " : " << occ->getReason() << endl;
|
||||
*reason = msg.str();
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Possibly resize dest. If the dest is currently recycling, it
|
||||
// will keep on. This only avoids erasing entries in dest if it is
|
||||
// currently writing at EOF (growing), which will be the case if
|
||||
// we are using this to compact existing file (the dest was just
|
||||
// created for the purpose).
|
||||
int64_t dstavail{0}, dstmaxsize{0};
|
||||
bool isunique;
|
||||
// Check dest size
|
||||
{
|
||||
std::shared_ptr<CirCache> ncc(new CirCache(ddir));
|
||||
if (!ncc->open(CirCache::CC_OPREAD)) {
|
||||
if (reason) {
|
||||
msg << "Open failed in " << ddir << " : " << ncc->getReason() << endl;
|
||||
*reason = msg.str();
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
dstmaxsize = ncc->m_d->m_maxsize;
|
||||
dstavail = dstmaxsize - ncc->m_d->m_nheadoffs;
|
||||
isunique = ncc->m_d->m_uniquentries;
|
||||
}
|
||||
if (dstavail < occ->size()) {
|
||||
std::shared_ptr<CirCache> ncc(new CirCache(ddir));
|
||||
auto nsize = dstmaxsize + (occ->size() - dstavail) + 5*1000*1000;
|
||||
LOGDEB1("CirCache::appendCC: Dstmaxsize " << displayableBytes(dstmaxsize) << " dstavail "<<
|
||||
displayableBytes(dstavail) << " org size " << displayableBytes(occ->size()) <<
|
||||
" nsize " << displayableBytes(nsize) << "\n");
|
||||
if (!ncc->create(nsize, isunique ? CC_CRUNIQUE : CC_CRNONE)) {
|
||||
if (reason) {
|
||||
msg << "Open failed in " << ddir << " : " << ncc->getReason() << endl;
|
||||
*reason = msg.str();
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// Open dest file
|
||||
std::shared_ptr<CirCache> ncc(new CirCache(ddir));
|
||||
if (!ncc->open(CirCache::CC_OPWRITE)) {
|
||||
if (reason) {
|
||||
msg << "Open failed in " << ddir << " : " <<
|
||||
ncc->getReason() << endl;
|
||||
msg << "Open failed in " << ddir << " : " << ncc->getReason() << endl;
|
||||
*reason = msg.str();
|
||||
}
|
||||
return -1;
|
||||
|
||||
@ -67,11 +67,14 @@ public:
|
||||
virtual std::string getpath();
|
||||
|
||||
// Set data to 0 if you just want the header
|
||||
// instance == -1 means get latest. Otherwise specify from 1+
|
||||
virtual bool get(const std::string& udi, std::string& dic,
|
||||
std::string *data = 0, int instance = -1);
|
||||
|
||||
// Note: the dicp MUST have an udi entry
|
||||
enum PutFlags {NoCompHint = 1};
|
||||
enum PutFlags {
|
||||
NoCompHint = 1, // Do not attempt compression.
|
||||
};
|
||||
virtual bool put(const std::string& udi, const ConfSimple *dicp,
|
||||
const std::string& data, unsigned int flags = 0);
|
||||
|
||||
@ -99,15 +102,27 @@ public:
|
||||
|
||||
/* Utility: append all entries from sdir to ddir.
|
||||
*
|
||||
* This does not need to be a member at all, just using the namespace here.
|
||||
* ddir must already exist. It will be appropriately resized if
|
||||
* needed to avoid recycling while writing the new entries.
|
||||
* ** Note that if dest is not currently growing, this action
|
||||
* will recycle old dest entries between the current write
|
||||
* point and EOF (or up to wherever we need to write to store
|
||||
* the source data) **
|
||||
* Also note that if the objective is just to compact (reuse the erased
|
||||
* entries space) you should first create the new circache with the
|
||||
* same maxsize as the old one, else the new maxsize will be the
|
||||
* current file size (current erased+active entries, with
|
||||
* available space corresponding to the old erased entries).
|
||||
*
|
||||
* @param ddir destination circache (must be previously created
|
||||
* with appropriate size)
|
||||
* This method does not need to be a member at all, just using the
|
||||
* namespace here.
|
||||
*
|
||||
* @param ddir destination circache (must exist)
|
||||
* @param sdir source circache
|
||||
* @ret number of entries copied or -a
|
||||
*/
|
||||
static int append(const std::string ddir, const std::string& sdir,
|
||||
std::string *reason = 0);
|
||||
static int appendCC(const std::string ddir, const std::string& sdir,
|
||||
std::string *reason = 0);
|
||||
|
||||
protected:
|
||||
CirCacheInternal *m_d;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user