result storing experiments
This commit is contained in:
parent
5c2f62ae00
commit
b7f0654526
@ -80,5 +80,5 @@ wipedir_LDADD = ../librecoll.la
|
||||
x11mon_SOURCES = trx11mon.cpp
|
||||
x11mon_LDADD = ../utils/x11mon.o ../librecoll.la -lX11
|
||||
|
||||
rclqdocmem_SOURCES = rclqdocmem.cpp
|
||||
rclqdocmem_SOURCES = rclqdocmem.cpp qresultstore.cpp
|
||||
rclqdocmem_LDADD = ../librecoll.la
|
||||
|
||||
187
src/testmains/qresultstore.cpp
Normal file
187
src/testmains/qresultstore.cpp
Normal file
@ -0,0 +1,187 @@
|
||||
/* Copyright (C) 2017-2020 J.F.Dockes
|
||||
*
|
||||
* License: GPL 2.1
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2.1 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include "qresultstore.h"
|
||||
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
#include <malloc.h>
|
||||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "rcldoc.h"
|
||||
#include "rclquery.h"
|
||||
|
||||
class QResultStore::Internal {
|
||||
public:
|
||||
bool testentry(const std::pair<std::string,std::string>& entry) {
|
||||
return !entry.second.empty() &&
|
||||
excludedfields.find(entry.first) == excludedfields.end();
|
||||
}
|
||||
|
||||
std::map<std::string, int> keyidx;
|
||||
int ndocs{0};
|
||||
// Notes: offsets[0] is always 0, not really useful, simpler this
|
||||
// way. Also could use simple C array instead of c++ vector...
|
||||
struct docoffs {
|
||||
~docoffs() {
|
||||
free(base);
|
||||
}
|
||||
char *base{nullptr};
|
||||
std::vector<int> offsets;
|
||||
};
|
||||
std::vector<struct docoffs> docs;
|
||||
std::set<std::string> excludedfields;
|
||||
};
|
||||
|
||||
QResultStore::QResultStore()
|
||||
{
|
||||
m = new Internal;
|
||||
}
|
||||
QResultStore::~QResultStore()
|
||||
{
|
||||
delete m;
|
||||
}
|
||||
|
||||
|
||||
//{"author", "ipath", "rcludi", "relevancyrating", "sig", "abstract", "caption",
|
||||
// "filename", "origcharset", "sig"};
|
||||
|
||||
|
||||
bool QResultStore::storeQuery(Rcl::Query& query, std::set<std::string> excl)
|
||||
{
|
||||
m->excludedfields = excl;
|
||||
/////////////
|
||||
// Enumerate all existing keys and assign array indexes for
|
||||
// them. Count documents while we are at it.
|
||||
m->keyidx = {{"url",0},
|
||||
{"mimetype", 1},
|
||||
{"fmtime", 2},
|
||||
{"dmtime", 3},
|
||||
{"fbytes", 4},
|
||||
{"dbytes", 5}
|
||||
};
|
||||
m->ndocs = 0;
|
||||
for (;;m->ndocs++) {
|
||||
Rcl::Doc doc;
|
||||
if (!query.getDoc(m->ndocs, doc, false)) {
|
||||
break;
|
||||
}
|
||||
for (const auto& entry : doc.meta) {
|
||||
if (m->testentry(entry)) {
|
||||
auto it = m->keyidx.find(entry.first);
|
||||
if (it == m->keyidx.end()) {
|
||||
int idx = m->keyidx.size();
|
||||
m->keyidx.insert({entry.first, idx});
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
///////
|
||||
// Populate the main array with doc-equivalent structures.
|
||||
|
||||
m->docs.resize(m->ndocs);
|
||||
|
||||
for (int i = 0; i < m->ndocs; i++) {
|
||||
Rcl::Doc doc;
|
||||
if (!query.getDoc(i, doc, false)) {
|
||||
break;
|
||||
}
|
||||
auto& vdoc = m->docs[i];
|
||||
vdoc.offsets.resize(m->keyidx.size());
|
||||
int nbytes =
|
||||
doc.url.size() + 1 +
|
||||
doc.mimetype.size() + 1 +
|
||||
doc.fmtime.size() + 1 +
|
||||
doc.dmtime.size() + 1 +
|
||||
doc.fbytes.size() + 1 +
|
||||
doc.dbytes.size() + 1;
|
||||
for (const auto& entry : doc.meta) {
|
||||
if (m->testentry(entry)) {
|
||||
if (m->keyidx.find(entry.first) == m->keyidx.end()) {
|
||||
continue;
|
||||
}
|
||||
nbytes += entry.second.size() + 1;
|
||||
}
|
||||
}
|
||||
|
||||
char *cp = (char*)malloc(nbytes);
|
||||
if (nullptr == cp) {
|
||||
abort();
|
||||
}
|
||||
|
||||
#define STRINGCPCOPY(CHARP, S) do { \
|
||||
memcpy(CHARP, S.c_str(), S.size()+1); \
|
||||
CHARP += S.size()+1; \
|
||||
} while (false);
|
||||
|
||||
vdoc.base = cp;
|
||||
vdoc.offsets[0] = cp - vdoc.base;
|
||||
STRINGCPCOPY(cp, doc.url);
|
||||
vdoc.offsets[1] = cp - vdoc.base;
|
||||
STRINGCPCOPY(cp, doc.mimetype);
|
||||
vdoc.offsets[2] = cp - vdoc.base;
|
||||
STRINGCPCOPY(cp, doc.fmtime);
|
||||
vdoc.offsets[3] = cp - vdoc.base;
|
||||
STRINGCPCOPY(cp, doc.dmtime);
|
||||
vdoc.offsets[4] = cp - vdoc.base;
|
||||
STRINGCPCOPY(cp, doc.fbytes);
|
||||
vdoc.offsets[5] = cp - vdoc.base;
|
||||
STRINGCPCOPY(cp, doc.dbytes);
|
||||
for (const auto& entry : doc.meta) {
|
||||
if (m->testentry(entry)) {
|
||||
auto it = m->keyidx.find(entry.first);
|
||||
if (it == m->keyidx.end()) {
|
||||
std::cerr << "Unknown key: " << entry.first << "\n";
|
||||
}
|
||||
if (it->second <= 5) {
|
||||
// Already done ! Storing another address would be
|
||||
// wasteful and crash when freeing...
|
||||
continue;
|
||||
}
|
||||
vdoc.offsets[it->second] = cp - vdoc.base;
|
||||
STRINGCPCOPY(cp, entry.second);
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
const char *QResultStore::fieldvalue(int docindex, const std::string& fldname)
|
||||
{
|
||||
if (docindex < 0 || docindex >= m->ndocs) {
|
||||
return nullptr;
|
||||
}
|
||||
auto& vdoc = m->docs[docindex];
|
||||
|
||||
auto it = m->keyidx.find(fldname);
|
||||
if (it == m->keyidx.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
if (it->second < 0 || it->second >= int(vdoc.offsets.size())) {
|
||||
//??
|
||||
return nullptr;
|
||||
}
|
||||
return vdoc.base + vdoc.offsets[it->second];
|
||||
}
|
||||
46
src/testmains/qresultstore.h
Normal file
46
src/testmains/qresultstore.h
Normal file
@ -0,0 +1,46 @@
|
||||
/* Copyright (C) 2017-2020 J.F.Dockes
|
||||
*
|
||||
* License: GPL 2.1
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2.1 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#ifndef _QRESULTSTORE_H_INCLUDED_
|
||||
#define _QRESULTSTORE_H_INCLUDED_
|
||||
|
||||
#include <string>
|
||||
#include <set>
|
||||
|
||||
namespace Rcl {
|
||||
class Query;
|
||||
}
|
||||
|
||||
class QResultStore {
|
||||
public:
|
||||
QResultStore();
|
||||
~QResultStore();
|
||||
|
||||
bool storeQuery(Rcl::Query& q, std::set<std::string> excluded = {});
|
||||
const char *fieldvalue(int docindex, const std::string& fldname);
|
||||
|
||||
QResultStore(const QResultStore&) = delete;
|
||||
QResultStore& operator=(const QResultStore&) = delete;
|
||||
class Internal;
|
||||
private:
|
||||
Internal *m{nullptr};
|
||||
};
|
||||
|
||||
#endif /* _QRESULTSTORE_H_INCLUDED_ */
|
||||
@ -44,7 +44,7 @@
|
||||
#include "plaintorich.h"
|
||||
#include "hldata.h"
|
||||
#include "smallut.h"
|
||||
|
||||
#include "qresultstore.h"
|
||||
|
||||
//const std::string confdir{"/home/dockes/.recoll-prod"};
|
||||
const std::string confdir{"/var/cache/upmpdcli/uprcl"};
|
||||
@ -265,7 +265,6 @@ int main(int argc, char *argv[])
|
||||
}
|
||||
|
||||
#elif defined(STORE_ARRAYS)
|
||||
|
||||
//
|
||||
// Each result stored as a vector<const char*> with a shared
|
||||
// key->intidx map to store the key name to index mapping, and and
|
||||
@ -302,6 +301,26 @@ int main(int argc, char *argv[])
|
||||
// performance impact which should be quite modest.
|
||||
// ** This supposes that we don't use obstack though, as obstack
|
||||
// placement is unpredictable.
|
||||
//
|
||||
// This the solution now implemented: no obstack, use struct with offsets
|
||||
// This uses 19 MB of storage for the audio index, and 72 MB for
|
||||
// the main one (less keys->less gain)
|
||||
{
|
||||
#if 1
|
||||
QResultStore store;
|
||||
bool result = store.storeQuery(
|
||||
query, {"author", "ipath", "rcludi", "relevancyrating",
|
||||
"sig","abstract", "caption", "filename", "origcharset", "sig"});
|
||||
if (!result) {
|
||||
std::cerr << "storeQuery failed\n";
|
||||
return 1;
|
||||
}
|
||||
meminfo("After storing");
|
||||
std::cerr << "url 20 " << store.fieldvalue(20, "url") << "\n";
|
||||
#else
|
||||
/////////////
|
||||
// Enumerate all existing keys and assign array indexes for
|
||||
// them. Count documents while we are at it.
|
||||
std::map<std::string, int> keyidx {
|
||||
{"url",0},
|
||||
{"mimetype", 1},
|
||||
@ -310,7 +329,6 @@ int main(int argc, char *argv[])
|
||||
{"fbytes", 4},
|
||||
{"dbytes", 5},
|
||||
};
|
||||
|
||||
int ndocs = 0;
|
||||
for (;;ndocs++) {
|
||||
Rcl::Doc doc;
|
||||
@ -327,10 +345,22 @@ int main(int argc, char *argv[])
|
||||
}
|
||||
}
|
||||
}
|
||||
// 49 keys !
|
||||
// The audio db has 49 keys !
|
||||
std::cerr << "Found " << keyidx.size() << " different keys\n";
|
||||
|
||||
std::vector<std::vector<char*>> docs;
|
||||
///////
|
||||
// Populate the main array with doc-equivalent structures.
|
||||
|
||||
// Notes: offsets[0] is always 0, not really useful, simpler this way. Also
|
||||
// could use simple C array instead of c++ vector...
|
||||
struct docoffs {
|
||||
~docoffs() {
|
||||
free(base);
|
||||
}
|
||||
char *base{nullptr};
|
||||
std::vector<int> offsets;
|
||||
};
|
||||
std::vector<struct docoffs> docs;
|
||||
docs.resize(ndocs);
|
||||
meminfo("After resize");
|
||||
|
||||
@ -340,7 +370,7 @@ int main(int argc, char *argv[])
|
||||
break;
|
||||
}
|
||||
auto& vdoc = docs[i];
|
||||
vdoc.resize(keyidx.size());
|
||||
vdoc.offsets.resize(keyidx.size());
|
||||
int nbytes =
|
||||
doc.url.size() + 1 +
|
||||
doc.mimetype.size() + 1 +
|
||||
@ -362,21 +392,25 @@ int main(int argc, char *argv[])
|
||||
if (nullptr == cp) {
|
||||
abort();
|
||||
}
|
||||
if (i < 2) {
|
||||
std::cerr << "malloc returned " << (void*)cp << "\n";
|
||||
}
|
||||
|
||||
#define STRINGCPCOPY(CHARP, S) do { \
|
||||
memcpy(CHARP, S.c_str(), S.size()+1); \
|
||||
CHARP += S.size()+1; \
|
||||
} while (false);
|
||||
|
||||
vdoc[0] = cp; STRINGCPCOPY(cp, doc.url);
|
||||
vdoc[1] = cp; STRINGCPCOPY(cp, doc.mimetype);
|
||||
vdoc[2] = cp; STRINGCPCOPY(cp, doc.fmtime);
|
||||
vdoc[3] = cp; STRINGCPCOPY(cp, doc.dmtime);
|
||||
vdoc[4] = cp; STRINGCPCOPY(cp, doc.fbytes);
|
||||
vdoc[5] = cp; STRINGCPCOPY(cp, doc.dbytes);
|
||||
vdoc.base = cp;
|
||||
vdoc.offsets[0] = cp - vdoc.base;
|
||||
STRINGCPCOPY(cp, doc.url);
|
||||
vdoc.offsets[1] = cp - vdoc.base;
|
||||
STRINGCPCOPY(cp, doc.mimetype);
|
||||
vdoc.offsets[2] = cp - vdoc.base;
|
||||
STRINGCPCOPY(cp, doc.fmtime);
|
||||
vdoc.offsets[3] = cp - vdoc.base;
|
||||
STRINGCPCOPY(cp, doc.dmtime);
|
||||
vdoc.offsets[4] = cp - vdoc.base;
|
||||
STRINGCPCOPY(cp, doc.fbytes);
|
||||
vdoc.offsets[5] = cp - vdoc.base;
|
||||
STRINGCPCOPY(cp, doc.dbytes);
|
||||
for (const auto& entry : doc.meta) {
|
||||
if (testentry(entry)) {
|
||||
auto it = keyidx.find(entry.first);
|
||||
@ -385,26 +419,19 @@ int main(int argc, char *argv[])
|
||||
abort();
|
||||
}
|
||||
if (it->second <= 5) {
|
||||
// Already done !
|
||||
// Already done ! Storing another address would be
|
||||
// wasteful and crash when freeing...
|
||||
continue;
|
||||
}
|
||||
vdoc[it->second] = cp; STRINGCPCOPY(cp, entry.second);
|
||||
vdoc.offsets[it->second] = cp - vdoc.base;
|
||||
STRINGCPCOPY(cp, entry.second);
|
||||
}
|
||||
}
|
||||
if (i < 2) {
|
||||
std::cerr << "vdoc[0] " << (void*)vdoc[0] << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
meminfo("After storing");
|
||||
for (auto& vdoc : docs) {
|
||||
if (!vdoc.empty()) {
|
||||
//std::cerr << "Freeing " << (void*)(vdoc[0]) << "\n";
|
||||
free(vdoc[0]);
|
||||
}
|
||||
}
|
||||
meminfo("After free");
|
||||
|
||||
#endif
|
||||
}
|
||||
#elif defined(STORE_ALLOBSTACK)
|
||||
|
||||
//
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user