indents etc.

This commit is contained in:
Jean-Francois Dockes 2021-04-22 09:03:36 +02:00
parent eca2f47a2a
commit d207d4f1c5
3 changed files with 82 additions and 93 deletions

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2005 J.F.Dockes
/* Copyright (C) 2005-2021 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
@ -24,69 +24,64 @@ using std::string;
static bool filter(const DocSeqFiltSpec& fs, const Rcl::Doc *x)
{
LOGDEB2(" Filter: ncrits " << (fs.crits.size()) << "\n" );
// Compare using each criterion in term. We're doing an or:
// 1st ok ends
LOGDEB2(" Filter: ncrits " << fs.crits.size() << "\n");
// Compare using each criterion in term. We're doing an or: 1st ok ends
for (unsigned int i = 0; i < fs.crits.size(); i++) {
switch (fs.crits[i]) {
case DocSeqFiltSpec::DSFS_MIMETYPE:
LOGDEB2(" filter: MIMETYPE: me [" << (fs.values[i]) << "] doc [" << (x->mimetype) << "]\n" );
if (x->mimetype == fs.values[i])
return true;
switch (fs.crits[i]) {
case DocSeqFiltSpec::DSFS_MIMETYPE:
LOGDEB2(" filter: MIMETYPE: me [" << fs.values[i] << "] doc [" << x->mimetype << "]\n");
if (x->mimetype == fs.values[i])
return true;
break;
case DocSeqFiltSpec::DSFS_QLANG: {
LOGDEB(" filter: QLANG [" << fs.values[i] << "]!!\n");
}
break;
case DocSeqFiltSpec::DSFS_QLANG:
{
LOGDEB(" filter: QLANG [" << (fs.values[i]) << "]!!\n" );
}
break;
case DocSeqFiltSpec::DSFS_PASSALL:
return true;
}
case DocSeqFiltSpec::DSFS_PASSALL:
return true;
}
}
// Did all comparisons
return false;
}
DocSeqFiltered::DocSeqFiltered(RclConfig *conf, std::shared_ptr<DocSequence> iseq,
DocSeqFiltSpec &filtspec)
: DocSeqModifier(iseq), m_config(conf)
DocSeqFiltered::DocSeqFiltered(
RclConfig *conf, std::shared_ptr<DocSequence> iseq, DocSeqFiltSpec &filtspec)
: DocSeqModifier(iseq), m_config(conf)
{
setFiltSpec(filtspec);
}
bool DocSeqFiltered::setFiltSpec(const DocSeqFiltSpec &filtspec)
{
LOGDEB0("DocSeqFiltered::setFiltSpec\n" );
LOGDEB0("DocSeqFiltered::setFiltSpec\n");
for (unsigned int i = 0; i < filtspec.crits.size(); i++) {
switch (filtspec.crits[i]) {
case DocSeqFiltSpec::DSFS_MIMETYPE:
m_spec.orCrit(filtspec.crits[i], filtspec.values[i]);
break;
case DocSeqFiltSpec::DSFS_QLANG:
{
// There are very few lang constructs that we can
// interpret. The default config uses rclcat:value
// only. That will be all for now...
string val = filtspec.values[i];
if (val.find("rclcat:") == 0) {
string catg = val.substr(7);
vector<string> tps;
m_config->getMimeCatTypes(catg, tps);
for (vector<string>::const_iterator it = tps.begin();
it != tps.end(); it++) {
LOGDEB2("Adding mime: [" << *it << "]\n");
m_spec.orCrit(DocSeqFiltSpec::DSFS_MIMETYPE, *it);
switch (filtspec.crits[i]) {
case DocSeqFiltSpec::DSFS_MIMETYPE:
m_spec.orCrit(filtspec.crits[i], filtspec.values[i]);
break;
case DocSeqFiltSpec::DSFS_QLANG: {
// There are very few lang constructs that we can interpret. The
// default config uses rclcat:value only. That will be all for now...
string val = filtspec.values[i];
if (val.find("rclcat:") == 0) {
string catg = val.substr(7);
vector<string> tps;
m_config->getMimeCatTypes(catg, tps);
for (const auto& mime : tps) {
LOGDEB2("Adding mime: [" << mime << "]\n");
m_spec.orCrit(DocSeqFiltSpec::DSFS_MIMETYPE, mime);
}
}
}
}
}
break;
default:
break;
}
default:
break;
}
}
// If m_spec ends up empty, pass everything, better than filtering all.
if (m_spec.crits.empty()) {
m_spec.orCrit(DocSeqFiltSpec::DSFS_PASSALL, "");
m_spec.orCrit(DocSeqFiltSpec::DSFS_PASSALL, "");
}
m_dbindices.clear();
return true;
@ -94,31 +89,31 @@ bool DocSeqFiltered::setFiltSpec(const DocSeqFiltSpec &filtspec)
bool DocSeqFiltered::getDoc(int idx, Rcl::Doc &doc, string *)
{
LOGDEB2("DocSeqFiltered::getDoc() fetching " << (idx) << "\n" );
LOGDEB2("DocSeqFiltered::getDoc() fetching " << idx << "\n");
if (idx >= (int)m_dbindices.size()) {
// Have to fetch docs and filter until we get enough or
// fail
m_dbindices.reserve(idx+1);
// Have to fetch docs and filter until we get enough or
// fail
m_dbindices.reserve(idx+1);
// First backend seq doc we fetch is the one after last stored
int backend_idx = m_dbindices.size() > 0 ? m_dbindices.back() + 1 : 0;
// First backend seq doc we fetch is the one after last stored
int backend_idx = m_dbindices.size() > 0 ? m_dbindices.back() + 1 : 0;
// Loop until we get enough docs
Rcl::Doc tdoc;
while (idx >= (int)m_dbindices.size()) {
if (!m_seq->getDoc(backend_idx, tdoc))
return false;
if (filter(m_spec, &tdoc)) {
m_dbindices.push_back(backend_idx);
// Loop until we get enough docs
Rcl::Doc tdoc;
while (idx >= (int)m_dbindices.size()) {
if (!m_seq->getDoc(backend_idx, tdoc))
return false;
if (filter(m_spec, &tdoc)) {
m_dbindices.push_back(backend_idx);
}
backend_idx++;
}
backend_idx++;
}
doc = tdoc;
doc = tdoc;
} else {
// The corresponding backend indice is already known
if (!m_seq->getDoc(m_dbindices[idx], doc))
return false;
// The corresponding backend indice is already known
if (!m_seq->getDoc(m_dbindices[idx], doc))
return false;
}
return true;
}

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2005 J.F.Dockes
/* Copyright (C) 2005-2021 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
@ -16,10 +16,9 @@
*/
#include "autoconfig.h"
#include "sortseq.h"
#include <algorithm>
#include "sortseq.h"
#include "log.h"
using std::string;
@ -31,15 +30,14 @@ public:
// It's not too clear in the std::sort doc what this should do. This
// behaves as operator<
int operator()(const Rcl::Doc *x, const Rcl::Doc *y)
{
LOGDEB1("Comparing .. \n" );
int operator()(const Rcl::Doc *x, const Rcl::Doc *y) {
LOGDEB1("Comparing .. \n" );
const auto xit = x->meta.find(ss.field);
const auto yit = y->meta.find(ss.field);
if (xit == x->meta.end() || yit == y->meta.end())
return 0;
return ss.desc ? yit->second < xit->second : xit->second < yit->second;
const auto xit = x->meta.find(ss.field);
const auto yit = y->meta.find(ss.field);
if (xit == x->meta.end() || yit == y->meta.end())
return 0;
return ss.desc ? yit->second < xit->second : xit->second < yit->second;
}
};
@ -52,16 +50,16 @@ bool DocSeqSorted::setSortSpec(const DocSeqSortSpec &sortspec)
m_docs.resize(count);
int i;
for (i = 0; i < count; i++) {
if (!m_seq->getDoc(i, m_docs[i])) {
LOGERR("DocSeqSorted: getDoc failed for doc " << (i) << "\n" );
count = i;
break;
}
if (!m_seq->getDoc(i, m_docs[i])) {
LOGERR("DocSeqSorted: getDoc failed for doc " << i << "\n");
count = i;
break;
}
}
m_docs.resize(count);
m_docsp.resize(count);
for (i = 0; i < count; i++)
m_docsp[i] = &m_docs[i];
m_docsp[i] = &m_docs[i];
CompareDocs cmp(sortspec);
sort(m_docsp.begin(), m_docsp.end(), cmp);
@ -70,10 +68,9 @@ bool DocSeqSorted::setSortSpec(const DocSeqSortSpec &sortspec)
bool DocSeqSorted::getDoc(int num, Rcl::Doc &doc, string *)
{
LOGDEB("DocSeqSorted::getDoc(" << (num) << ")\n" );
LOGDEB("DocSeqSorted::getDoc(" << num << ")\n");
if (num < 0 || num >= int(m_docsp.size()))
return false;
return false;
doc = *m_docsp[num];
return true;
}

View File

@ -1570,8 +1570,7 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
// There is no way in hell we could have an idea of the
// charset here, so let's hope it's ascii or utf-8. We call
// transcode to strip the bad chars and pray
if (transcode(path_getsimple(doc.ipath), utf8ipathlast,
"UTF-8", "UTF-8")) {
if (transcode(path_getsimple(doc.ipath), utf8ipathlast, "UTF-8", "UTF-8")) {
splitter.text_to_words(utf8ipathlast);
}
}
@ -1596,14 +1595,12 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
splitter.curpos = 0;
newdocument.add_posting(wrap_prefix(pathelt_prefix),
splitter.basepos + splitter.curpos++);
for (vector<string>::iterator it = vpath.begin();
it != vpath.end(); it++){
if (it->length() > 230) {
// Just truncate it. May still be useful because
// of wildcards
*it = it->substr(0, 230);
for (auto& elt : vpath) {
if (elt.length() > 230) {
// Just truncate it. May still be useful because of wildcards
elt = elt.substr(0, 230);
}
newdocument.add_posting(wrap_prefix(pathelt_prefix) + *it,
newdocument.add_posting(wrap_prefix(pathelt_prefix) + elt,
splitter.basepos + splitter.curpos++);
}
splitter.basepos += splitter.curpos + 100;