result storing experiments
This commit is contained in:
parent
5c2f62ae00
commit
b7f0654526
@ -80,5 +80,5 @@ wipedir_LDADD = ../librecoll.la
|
|||||||
x11mon_SOURCES = trx11mon.cpp
|
x11mon_SOURCES = trx11mon.cpp
|
||||||
x11mon_LDADD = ../utils/x11mon.o ../librecoll.la -lX11
|
x11mon_LDADD = ../utils/x11mon.o ../librecoll.la -lX11
|
||||||
|
|
||||||
rclqdocmem_SOURCES = rclqdocmem.cpp
|
rclqdocmem_SOURCES = rclqdocmem.cpp qresultstore.cpp
|
||||||
rclqdocmem_LDADD = ../librecoll.la
|
rclqdocmem_LDADD = ../librecoll.la
|
||||||
|
|||||||
187
src/testmains/qresultstore.cpp
Normal file
187
src/testmains/qresultstore.cpp
Normal file
@ -0,0 +1,187 @@
|
|||||||
|
/* Copyright (C) 2017-2020 J.F.Dockes
|
||||||
|
*
|
||||||
|
* License: GPL 2.1
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2.1 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public License
|
||||||
|
* along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc.,
|
||||||
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "qresultstore.h"
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <iostream>
|
||||||
|
#include <map>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include <malloc.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "rcldoc.h"
|
||||||
|
#include "rclquery.h"
|
||||||
|
|
||||||
|
class QResultStore::Internal {
|
||||||
|
public:
|
||||||
|
bool testentry(const std::pair<std::string,std::string>& entry) {
|
||||||
|
return !entry.second.empty() &&
|
||||||
|
excludedfields.find(entry.first) == excludedfields.end();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::map<std::string, int> keyidx;
|
||||||
|
int ndocs{0};
|
||||||
|
// Notes: offsets[0] is always 0, not really useful, simpler this
|
||||||
|
// way. Also could use simple C array instead of c++ vector...
|
||||||
|
struct docoffs {
|
||||||
|
~docoffs() {
|
||||||
|
free(base);
|
||||||
|
}
|
||||||
|
char *base{nullptr};
|
||||||
|
std::vector<int> offsets;
|
||||||
|
};
|
||||||
|
std::vector<struct docoffs> docs;
|
||||||
|
std::set<std::string> excludedfields;
|
||||||
|
};
|
||||||
|
|
||||||
|
QResultStore::QResultStore()
|
||||||
|
{
|
||||||
|
m = new Internal;
|
||||||
|
}
|
||||||
|
QResultStore::~QResultStore()
|
||||||
|
{
|
||||||
|
delete m;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//{"author", "ipath", "rcludi", "relevancyrating", "sig", "abstract", "caption",
|
||||||
|
// "filename", "origcharset", "sig"};
|
||||||
|
|
||||||
|
|
||||||
|
bool QResultStore::storeQuery(Rcl::Query& query, std::set<std::string> excl)
|
||||||
|
{
|
||||||
|
m->excludedfields = excl;
|
||||||
|
/////////////
|
||||||
|
// Enumerate all existing keys and assign array indexes for
|
||||||
|
// them. Count documents while we are at it.
|
||||||
|
m->keyidx = {{"url",0},
|
||||||
|
{"mimetype", 1},
|
||||||
|
{"fmtime", 2},
|
||||||
|
{"dmtime", 3},
|
||||||
|
{"fbytes", 4},
|
||||||
|
{"dbytes", 5}
|
||||||
|
};
|
||||||
|
m->ndocs = 0;
|
||||||
|
for (;;m->ndocs++) {
|
||||||
|
Rcl::Doc doc;
|
||||||
|
if (!query.getDoc(m->ndocs, doc, false)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
for (const auto& entry : doc.meta) {
|
||||||
|
if (m->testentry(entry)) {
|
||||||
|
auto it = m->keyidx.find(entry.first);
|
||||||
|
if (it == m->keyidx.end()) {
|
||||||
|
int idx = m->keyidx.size();
|
||||||
|
m->keyidx.insert({entry.first, idx});
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
///////
|
||||||
|
// Populate the main array with doc-equivalent structures.
|
||||||
|
|
||||||
|
m->docs.resize(m->ndocs);
|
||||||
|
|
||||||
|
for (int i = 0; i < m->ndocs; i++) {
|
||||||
|
Rcl::Doc doc;
|
||||||
|
if (!query.getDoc(i, doc, false)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
auto& vdoc = m->docs[i];
|
||||||
|
vdoc.offsets.resize(m->keyidx.size());
|
||||||
|
int nbytes =
|
||||||
|
doc.url.size() + 1 +
|
||||||
|
doc.mimetype.size() + 1 +
|
||||||
|
doc.fmtime.size() + 1 +
|
||||||
|
doc.dmtime.size() + 1 +
|
||||||
|
doc.fbytes.size() + 1 +
|
||||||
|
doc.dbytes.size() + 1;
|
||||||
|
for (const auto& entry : doc.meta) {
|
||||||
|
if (m->testentry(entry)) {
|
||||||
|
if (m->keyidx.find(entry.first) == m->keyidx.end()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
nbytes += entry.second.size() + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
char *cp = (char*)malloc(nbytes);
|
||||||
|
if (nullptr == cp) {
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
|
||||||
|
#define STRINGCPCOPY(CHARP, S) do { \
|
||||||
|
memcpy(CHARP, S.c_str(), S.size()+1); \
|
||||||
|
CHARP += S.size()+1; \
|
||||||
|
} while (false);
|
||||||
|
|
||||||
|
vdoc.base = cp;
|
||||||
|
vdoc.offsets[0] = cp - vdoc.base;
|
||||||
|
STRINGCPCOPY(cp, doc.url);
|
||||||
|
vdoc.offsets[1] = cp - vdoc.base;
|
||||||
|
STRINGCPCOPY(cp, doc.mimetype);
|
||||||
|
vdoc.offsets[2] = cp - vdoc.base;
|
||||||
|
STRINGCPCOPY(cp, doc.fmtime);
|
||||||
|
vdoc.offsets[3] = cp - vdoc.base;
|
||||||
|
STRINGCPCOPY(cp, doc.dmtime);
|
||||||
|
vdoc.offsets[4] = cp - vdoc.base;
|
||||||
|
STRINGCPCOPY(cp, doc.fbytes);
|
||||||
|
vdoc.offsets[5] = cp - vdoc.base;
|
||||||
|
STRINGCPCOPY(cp, doc.dbytes);
|
||||||
|
for (const auto& entry : doc.meta) {
|
||||||
|
if (m->testentry(entry)) {
|
||||||
|
auto it = m->keyidx.find(entry.first);
|
||||||
|
if (it == m->keyidx.end()) {
|
||||||
|
std::cerr << "Unknown key: " << entry.first << "\n";
|
||||||
|
}
|
||||||
|
if (it->second <= 5) {
|
||||||
|
// Already done ! Storing another address would be
|
||||||
|
// wasteful and crash when freeing...
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
vdoc.offsets[it->second] = cp - vdoc.base;
|
||||||
|
STRINGCPCOPY(cp, entry.second);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *QResultStore::fieldvalue(int docindex, const std::string& fldname)
|
||||||
|
{
|
||||||
|
if (docindex < 0 || docindex >= m->ndocs) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
auto& vdoc = m->docs[docindex];
|
||||||
|
|
||||||
|
auto it = m->keyidx.find(fldname);
|
||||||
|
if (it == m->keyidx.end()) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
if (it->second < 0 || it->second >= int(vdoc.offsets.size())) {
|
||||||
|
//??
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
return vdoc.base + vdoc.offsets[it->second];
|
||||||
|
}
|
||||||
46
src/testmains/qresultstore.h
Normal file
46
src/testmains/qresultstore.h
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
/* Copyright (C) 2017-2020 J.F.Dockes
|
||||||
|
*
|
||||||
|
* License: GPL 2.1
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2.1 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public License
|
||||||
|
* along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc.,
|
||||||
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _QRESULTSTORE_H_INCLUDED_
|
||||||
|
#define _QRESULTSTORE_H_INCLUDED_
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <set>
|
||||||
|
|
||||||
|
namespace Rcl {
|
||||||
|
class Query;
|
||||||
|
}
|
||||||
|
|
||||||
|
class QResultStore {
|
||||||
|
public:
|
||||||
|
QResultStore();
|
||||||
|
~QResultStore();
|
||||||
|
|
||||||
|
bool storeQuery(Rcl::Query& q, std::set<std::string> excluded = {});
|
||||||
|
const char *fieldvalue(int docindex, const std::string& fldname);
|
||||||
|
|
||||||
|
QResultStore(const QResultStore&) = delete;
|
||||||
|
QResultStore& operator=(const QResultStore&) = delete;
|
||||||
|
class Internal;
|
||||||
|
private:
|
||||||
|
Internal *m{nullptr};
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* _QRESULTSTORE_H_INCLUDED_ */
|
||||||
@ -44,7 +44,7 @@
|
|||||||
#include "plaintorich.h"
|
#include "plaintorich.h"
|
||||||
#include "hldata.h"
|
#include "hldata.h"
|
||||||
#include "smallut.h"
|
#include "smallut.h"
|
||||||
|
#include "qresultstore.h"
|
||||||
|
|
||||||
//const std::string confdir{"/home/dockes/.recoll-prod"};
|
//const std::string confdir{"/home/dockes/.recoll-prod"};
|
||||||
const std::string confdir{"/var/cache/upmpdcli/uprcl"};
|
const std::string confdir{"/var/cache/upmpdcli/uprcl"};
|
||||||
@ -265,7 +265,6 @@ int main(int argc, char *argv[])
|
|||||||
}
|
}
|
||||||
|
|
||||||
#elif defined(STORE_ARRAYS)
|
#elif defined(STORE_ARRAYS)
|
||||||
|
|
||||||
//
|
//
|
||||||
// Each result stored as a vector<const char*> with a shared
|
// Each result stored as a vector<const char*> with a shared
|
||||||
// key->intidx map to store the key name to index mapping, and and
|
// key->intidx map to store the key name to index mapping, and and
|
||||||
@ -302,6 +301,26 @@ int main(int argc, char *argv[])
|
|||||||
// performance impact which should be quite modest.
|
// performance impact which should be quite modest.
|
||||||
// ** This supposes that we don't use obstack though, as obstack
|
// ** This supposes that we don't use obstack though, as obstack
|
||||||
// placement is unpredictable.
|
// placement is unpredictable.
|
||||||
|
//
|
||||||
|
// This the solution now implemented: no obstack, use struct with offsets
|
||||||
|
// This uses 19 MB of storage for the audio index, and 72 MB for
|
||||||
|
// the main one (less keys->less gain)
|
||||||
|
{
|
||||||
|
#if 1
|
||||||
|
QResultStore store;
|
||||||
|
bool result = store.storeQuery(
|
||||||
|
query, {"author", "ipath", "rcludi", "relevancyrating",
|
||||||
|
"sig","abstract", "caption", "filename", "origcharset", "sig"});
|
||||||
|
if (!result) {
|
||||||
|
std::cerr << "storeQuery failed\n";
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
meminfo("After storing");
|
||||||
|
std::cerr << "url 20 " << store.fieldvalue(20, "url") << "\n";
|
||||||
|
#else
|
||||||
|
/////////////
|
||||||
|
// Enumerate all existing keys and assign array indexes for
|
||||||
|
// them. Count documents while we are at it.
|
||||||
std::map<std::string, int> keyidx {
|
std::map<std::string, int> keyidx {
|
||||||
{"url",0},
|
{"url",0},
|
||||||
{"mimetype", 1},
|
{"mimetype", 1},
|
||||||
@ -310,7 +329,6 @@ int main(int argc, char *argv[])
|
|||||||
{"fbytes", 4},
|
{"fbytes", 4},
|
||||||
{"dbytes", 5},
|
{"dbytes", 5},
|
||||||
};
|
};
|
||||||
|
|
||||||
int ndocs = 0;
|
int ndocs = 0;
|
||||||
for (;;ndocs++) {
|
for (;;ndocs++) {
|
||||||
Rcl::Doc doc;
|
Rcl::Doc doc;
|
||||||
@ -327,10 +345,22 @@ int main(int argc, char *argv[])
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// 49 keys !
|
// The audio db has 49 keys !
|
||||||
std::cerr << "Found " << keyidx.size() << " different keys\n";
|
std::cerr << "Found " << keyidx.size() << " different keys\n";
|
||||||
|
|
||||||
std::vector<std::vector<char*>> docs;
|
///////
|
||||||
|
// Populate the main array with doc-equivalent structures.
|
||||||
|
|
||||||
|
// Notes: offsets[0] is always 0, not really useful, simpler this way. Also
|
||||||
|
// could use simple C array instead of c++ vector...
|
||||||
|
struct docoffs {
|
||||||
|
~docoffs() {
|
||||||
|
free(base);
|
||||||
|
}
|
||||||
|
char *base{nullptr};
|
||||||
|
std::vector<int> offsets;
|
||||||
|
};
|
||||||
|
std::vector<struct docoffs> docs;
|
||||||
docs.resize(ndocs);
|
docs.resize(ndocs);
|
||||||
meminfo("After resize");
|
meminfo("After resize");
|
||||||
|
|
||||||
@ -340,7 +370,7 @@ int main(int argc, char *argv[])
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
auto& vdoc = docs[i];
|
auto& vdoc = docs[i];
|
||||||
vdoc.resize(keyidx.size());
|
vdoc.offsets.resize(keyidx.size());
|
||||||
int nbytes =
|
int nbytes =
|
||||||
doc.url.size() + 1 +
|
doc.url.size() + 1 +
|
||||||
doc.mimetype.size() + 1 +
|
doc.mimetype.size() + 1 +
|
||||||
@ -362,21 +392,25 @@ int main(int argc, char *argv[])
|
|||||||
if (nullptr == cp) {
|
if (nullptr == cp) {
|
||||||
abort();
|
abort();
|
||||||
}
|
}
|
||||||
if (i < 2) {
|
|
||||||
std::cerr << "malloc returned " << (void*)cp << "\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
#define STRINGCPCOPY(CHARP, S) do { \
|
#define STRINGCPCOPY(CHARP, S) do { \
|
||||||
memcpy(CHARP, S.c_str(), S.size()+1); \
|
memcpy(CHARP, S.c_str(), S.size()+1); \
|
||||||
CHARP += S.size()+1; \
|
CHARP += S.size()+1; \
|
||||||
} while (false);
|
} while (false);
|
||||||
|
|
||||||
vdoc[0] = cp; STRINGCPCOPY(cp, doc.url);
|
vdoc.base = cp;
|
||||||
vdoc[1] = cp; STRINGCPCOPY(cp, doc.mimetype);
|
vdoc.offsets[0] = cp - vdoc.base;
|
||||||
vdoc[2] = cp; STRINGCPCOPY(cp, doc.fmtime);
|
STRINGCPCOPY(cp, doc.url);
|
||||||
vdoc[3] = cp; STRINGCPCOPY(cp, doc.dmtime);
|
vdoc.offsets[1] = cp - vdoc.base;
|
||||||
vdoc[4] = cp; STRINGCPCOPY(cp, doc.fbytes);
|
STRINGCPCOPY(cp, doc.mimetype);
|
||||||
vdoc[5] = cp; STRINGCPCOPY(cp, doc.dbytes);
|
vdoc.offsets[2] = cp - vdoc.base;
|
||||||
|
STRINGCPCOPY(cp, doc.fmtime);
|
||||||
|
vdoc.offsets[3] = cp - vdoc.base;
|
||||||
|
STRINGCPCOPY(cp, doc.dmtime);
|
||||||
|
vdoc.offsets[4] = cp - vdoc.base;
|
||||||
|
STRINGCPCOPY(cp, doc.fbytes);
|
||||||
|
vdoc.offsets[5] = cp - vdoc.base;
|
||||||
|
STRINGCPCOPY(cp, doc.dbytes);
|
||||||
for (const auto& entry : doc.meta) {
|
for (const auto& entry : doc.meta) {
|
||||||
if (testentry(entry)) {
|
if (testentry(entry)) {
|
||||||
auto it = keyidx.find(entry.first);
|
auto it = keyidx.find(entry.first);
|
||||||
@ -385,26 +419,19 @@ int main(int argc, char *argv[])
|
|||||||
abort();
|
abort();
|
||||||
}
|
}
|
||||||
if (it->second <= 5) {
|
if (it->second <= 5) {
|
||||||
// Already done !
|
// Already done ! Storing another address would be
|
||||||
|
// wasteful and crash when freeing...
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
vdoc[it->second] = cp; STRINGCPCOPY(cp, entry.second);
|
vdoc.offsets[it->second] = cp - vdoc.base;
|
||||||
|
STRINGCPCOPY(cp, entry.second);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (i < 2) {
|
|
||||||
std::cerr << "vdoc[0] " << (void*)vdoc[0] << "\n";
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
meminfo("After storing");
|
meminfo("After storing");
|
||||||
for (auto& vdoc : docs) {
|
#endif
|
||||||
if (!vdoc.empty()) {
|
}
|
||||||
//std::cerr << "Freeing " << (void*)(vdoc[0]) << "\n";
|
|
||||||
free(vdoc[0]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
meminfo("After free");
|
|
||||||
|
|
||||||
#elif defined(STORE_ALLOBSTACK)
|
#elif defined(STORE_ALLOBSTACK)
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user