diff --git a/src/testmains/Makefile.am b/src/testmains/Makefile.am index 7e54f4a2..df933aab 100644 --- a/src/testmains/Makefile.am +++ b/src/testmains/Makefile.am @@ -80,5 +80,5 @@ wipedir_LDADD = ../librecoll.la x11mon_SOURCES = trx11mon.cpp x11mon_LDADD = ../utils/x11mon.o ../librecoll.la -lX11 -rclqdocmem_SOURCES = rclqdocmem.cpp +rclqdocmem_SOURCES = rclqdocmem.cpp qresultstore.cpp rclqdocmem_LDADD = ../librecoll.la diff --git a/src/testmains/qresultstore.cpp b/src/testmains/qresultstore.cpp new file mode 100644 index 00000000..56e59569 --- /dev/null +++ b/src/testmains/qresultstore.cpp @@ -0,0 +1,187 @@ +/* Copyright (C) 2017-2020 J.F.Dockes + * + * License: GPL 2.1 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "qresultstore.h" + +#include +#include +#include +#include + +#include +#include +#include + +#include "rcldoc.h" +#include "rclquery.h" + +class QResultStore::Internal { +public: + bool testentry(const std::pair& entry) { + return !entry.second.empty() && + excludedfields.find(entry.first) == excludedfields.end(); + } + + std::map keyidx; + int ndocs{0}; + // Notes: offsets[0] is always 0, not really useful, simpler this + // way. Also could use simple C array instead of c++ vector... + struct docoffs { + ~docoffs() { + free(base); + } + char *base{nullptr}; + std::vector offsets; + }; + std::vector docs; + std::set excludedfields; +}; + +QResultStore::QResultStore() +{ + m = new Internal; +} +QResultStore::~QResultStore() +{ + delete m; +} + + +//{"author", "ipath", "rcludi", "relevancyrating", "sig", "abstract", "caption", +// "filename", "origcharset", "sig"}; + + +bool QResultStore::storeQuery(Rcl::Query& query, std::set excl) +{ + m->excludedfields = excl; + ///////////// + // Enumerate all existing keys and assign array indexes for + // them. Count documents while we are at it. + m->keyidx = {{"url",0}, + {"mimetype", 1}, + {"fmtime", 2}, + {"dmtime", 3}, + {"fbytes", 4}, + {"dbytes", 5} + }; + m->ndocs = 0; + for (;;m->ndocs++) { + Rcl::Doc doc; + if (!query.getDoc(m->ndocs, doc, false)) { + break; + } + for (const auto& entry : doc.meta) { + if (m->testentry(entry)) { + auto it = m->keyidx.find(entry.first); + if (it == m->keyidx.end()) { + int idx = m->keyidx.size(); + m->keyidx.insert({entry.first, idx}); + }; + } + } + } + + /////// + // Populate the main array with doc-equivalent structures. + + m->docs.resize(m->ndocs); + + for (int i = 0; i < m->ndocs; i++) { + Rcl::Doc doc; + if (!query.getDoc(i, doc, false)) { + break; + } + auto& vdoc = m->docs[i]; + vdoc.offsets.resize(m->keyidx.size()); + int nbytes = + doc.url.size() + 1 + + doc.mimetype.size() + 1 + + doc.fmtime.size() + 1 + + doc.dmtime.size() + 1 + + doc.fbytes.size() + 1 + + doc.dbytes.size() + 1; + for (const auto& entry : doc.meta) { + if (m->testentry(entry)) { + if (m->keyidx.find(entry.first) == m->keyidx.end()) { + continue; + } + nbytes += entry.second.size() + 1; + } + } + + char *cp = (char*)malloc(nbytes); + if (nullptr == cp) { + abort(); + } + +#define STRINGCPCOPY(CHARP, S) do { \ + memcpy(CHARP, S.c_str(), S.size()+1); \ + CHARP += S.size()+1; \ + } while (false); + + vdoc.base = cp; + vdoc.offsets[0] = cp - vdoc.base; + STRINGCPCOPY(cp, doc.url); + vdoc.offsets[1] = cp - vdoc.base; + STRINGCPCOPY(cp, doc.mimetype); + vdoc.offsets[2] = cp - vdoc.base; + STRINGCPCOPY(cp, doc.fmtime); + vdoc.offsets[3] = cp - vdoc.base; + STRINGCPCOPY(cp, doc.dmtime); + vdoc.offsets[4] = cp - vdoc.base; + STRINGCPCOPY(cp, doc.fbytes); + vdoc.offsets[5] = cp - vdoc.base; + STRINGCPCOPY(cp, doc.dbytes); + for (const auto& entry : doc.meta) { + if (m->testentry(entry)) { + auto it = m->keyidx.find(entry.first); + if (it == m->keyidx.end()) { + std::cerr << "Unknown key: " << entry.first << "\n"; + } + if (it->second <= 5) { + // Already done ! Storing another address would be + // wasteful and crash when freeing... + continue; + } + vdoc.offsets[it->second] = cp - vdoc.base; + STRINGCPCOPY(cp, entry.second); + } + } + } + return true; +} + +const char *QResultStore::fieldvalue(int docindex, const std::string& fldname) +{ + if (docindex < 0 || docindex >= m->ndocs) { + return nullptr; + } + auto& vdoc = m->docs[docindex]; + + auto it = m->keyidx.find(fldname); + if (it == m->keyidx.end()) { + return nullptr; + } + if (it->second < 0 || it->second >= int(vdoc.offsets.size())) { + //?? + return nullptr; + } + return vdoc.base + vdoc.offsets[it->second]; +} diff --git a/src/testmains/qresultstore.h b/src/testmains/qresultstore.h new file mode 100644 index 00000000..6a599eed --- /dev/null +++ b/src/testmains/qresultstore.h @@ -0,0 +1,46 @@ +/* Copyright (C) 2017-2020 J.F.Dockes + * + * License: GPL 2.1 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef _QRESULTSTORE_H_INCLUDED_ +#define _QRESULTSTORE_H_INCLUDED_ + +#include +#include + +namespace Rcl { +class Query; +} + +class QResultStore { +public: + QResultStore(); + ~QResultStore(); + + bool storeQuery(Rcl::Query& q, std::set excluded = {}); + const char *fieldvalue(int docindex, const std::string& fldname); + + QResultStore(const QResultStore&) = delete; + QResultStore& operator=(const QResultStore&) = delete; + class Internal; +private: + Internal *m{nullptr}; +}; + +#endif /* _QRESULTSTORE_H_INCLUDED_ */ diff --git a/src/testmains/rclqdocmem.cpp b/src/testmains/rclqdocmem.cpp index 516eeb40..a0c00eec 100644 --- a/src/testmains/rclqdocmem.cpp +++ b/src/testmains/rclqdocmem.cpp @@ -44,7 +44,7 @@ #include "plaintorich.h" #include "hldata.h" #include "smallut.h" - +#include "qresultstore.h" //const std::string confdir{"/home/dockes/.recoll-prod"}; const std::string confdir{"/var/cache/upmpdcli/uprcl"}; @@ -265,7 +265,6 @@ int main(int argc, char *argv[]) } #elif defined(STORE_ARRAYS) - // // Each result stored as a vector with a shared // key->intidx map to store the key name to index mapping, and and @@ -302,6 +301,26 @@ int main(int argc, char *argv[]) // performance impact which should be quite modest. // ** This supposes that we don't use obstack though, as obstack // placement is unpredictable. + // + // This the solution now implemented: no obstack, use struct with offsets + // This uses 19 MB of storage for the audio index, and 72 MB for + // the main one (less keys->less gain) +{ +#if 1 + QResultStore store; + bool result = store.storeQuery( + query, {"author", "ipath", "rcludi", "relevancyrating", + "sig","abstract", "caption", "filename", "origcharset", "sig"}); + if (!result) { + std::cerr << "storeQuery failed\n"; + return 1; + } + meminfo("After storing"); + std::cerr << "url 20 " << store.fieldvalue(20, "url") << "\n"; +#else + ///////////// + // Enumerate all existing keys and assign array indexes for + // them. Count documents while we are at it. std::map keyidx { {"url",0}, {"mimetype", 1}, @@ -310,7 +329,6 @@ int main(int argc, char *argv[]) {"fbytes", 4}, {"dbytes", 5}, }; - int ndocs = 0; for (;;ndocs++) { Rcl::Doc doc; @@ -327,10 +345,22 @@ int main(int argc, char *argv[]) } } } - // 49 keys ! + // The audio db has 49 keys ! std::cerr << "Found " << keyidx.size() << " different keys\n"; - std::vector> docs; + /////// + // Populate the main array with doc-equivalent structures. + + // Notes: offsets[0] is always 0, not really useful, simpler this way. Also + // could use simple C array instead of c++ vector... + struct docoffs { + ~docoffs() { + free(base); + } + char *base{nullptr}; + std::vector offsets; + }; + std::vector docs; docs.resize(ndocs); meminfo("After resize"); @@ -340,7 +370,7 @@ int main(int argc, char *argv[]) break; } auto& vdoc = docs[i]; - vdoc.resize(keyidx.size()); + vdoc.offsets.resize(keyidx.size()); int nbytes = doc.url.size() + 1 + doc.mimetype.size() + 1 + @@ -362,21 +392,25 @@ int main(int argc, char *argv[]) if (nullptr == cp) { abort(); } - if (i < 2) { - std::cerr << "malloc returned " << (void*)cp << "\n"; - } #define STRINGCPCOPY(CHARP, S) do { \ memcpy(CHARP, S.c_str(), S.size()+1); \ CHARP += S.size()+1; \ } while (false); - vdoc[0] = cp; STRINGCPCOPY(cp, doc.url); - vdoc[1] = cp; STRINGCPCOPY(cp, doc.mimetype); - vdoc[2] = cp; STRINGCPCOPY(cp, doc.fmtime); - vdoc[3] = cp; STRINGCPCOPY(cp, doc.dmtime); - vdoc[4] = cp; STRINGCPCOPY(cp, doc.fbytes); - vdoc[5] = cp; STRINGCPCOPY(cp, doc.dbytes); + vdoc.base = cp; + vdoc.offsets[0] = cp - vdoc.base; + STRINGCPCOPY(cp, doc.url); + vdoc.offsets[1] = cp - vdoc.base; + STRINGCPCOPY(cp, doc.mimetype); + vdoc.offsets[2] = cp - vdoc.base; + STRINGCPCOPY(cp, doc.fmtime); + vdoc.offsets[3] = cp - vdoc.base; + STRINGCPCOPY(cp, doc.dmtime); + vdoc.offsets[4] = cp - vdoc.base; + STRINGCPCOPY(cp, doc.fbytes); + vdoc.offsets[5] = cp - vdoc.base; + STRINGCPCOPY(cp, doc.dbytes); for (const auto& entry : doc.meta) { if (testentry(entry)) { auto it = keyidx.find(entry.first); @@ -385,26 +419,19 @@ int main(int argc, char *argv[]) abort(); } if (it->second <= 5) { - // Already done ! + // Already done ! Storing another address would be + // wasteful and crash when freeing... continue; } - vdoc[it->second] = cp; STRINGCPCOPY(cp, entry.second); + vdoc.offsets[it->second] = cp - vdoc.base; + STRINGCPCOPY(cp, entry.second); } } - if (i < 2) { - std::cerr << "vdoc[0] " << (void*)vdoc[0] << "\n"; - } } meminfo("After storing"); - for (auto& vdoc : docs) { - if (!vdoc.empty()) { - //std::cerr << "Freeing " << (void*)(vdoc[0]) << "\n"; - free(vdoc[0]); - } - } - meminfo("After free"); - +#endif +} #elif defined(STORE_ALLOBSTACK) //