Add recollindex option to write file not indexed reasons to diagnostics file

This commit is contained in:
Jean-Francois Dockes 2021-04-01 10:32:04 +02:00
parent 485a0fc650
commit 4756b1252b
18 changed files with 288 additions and 81 deletions

View File

@ -107,6 +107,8 @@ index/fsindexer.cpp \
index/fsindexer.h \
index/idxstatus.h \
index/idxstatus.cpp \
index/idxdiags.h \
index/idxdiags.cpp \
index/mimetype.cpp \
index/mimetype.h \
index/rclmon.h \

View File

@ -54,6 +54,7 @@
#include "cpuconf.h"
#include "execmd.h"
#include "md5.h"
#include "idxdiags.h"
using namespace std;
@ -754,6 +755,7 @@ bool RclConfig::inStopSuffixes(const string& fni)
if (it != STOPSUFFIXES->end()) {
LOGDEB2("RclConfig::inStopSuffixes: Found (" << fni << ") [" <<
((*it).m_str) << "]\n");
IdxDiags::theDiags().record(IdxDiags::NoContentSuffix, fni);
return true;
} else {
LOGDEB2("RclConfig::inStopSuffixes: not found [" << fni << "]\n");
@ -822,35 +824,38 @@ bool RclConfig::getMimeCatTypes(const string& cat, vector<string>& tps) const
return true;
}
string RclConfig::getMimeHandlerDef(const string &mtype, bool filtertypes)
string RclConfig::getMimeHandlerDef(const string &mtype, bool filtertypes, const std::string& fn)
{
string hs;
if (filtertypes) {
if(m_rmtstate.needrecompute()) {
m_restrictMTypes.clear();
stringToStrings(stringtolower((const string&)m_rmtstate.getvalue()),
m_restrictMTypes);
stringToStrings(stringtolower((const string&)m_rmtstate.getvalue()), m_restrictMTypes);
}
if (m_xmtstate.needrecompute()) {
m_excludeMTypes.clear();
stringToStrings(stringtolower((const string&)m_xmtstate.getvalue()),
m_excludeMTypes);
stringToStrings(stringtolower((const string&)m_xmtstate.getvalue()), m_excludeMTypes);
}
if (!m_restrictMTypes.empty() &&
!m_restrictMTypes.count(stringtolower(mtype))) {
LOGDEB2("RclConfig::getMimeHandlerDef: not in mime type list\n");
if (!m_restrictMTypes.empty() && !m_restrictMTypes.count(stringtolower(mtype))) {
IdxDiags::theDiags().record(IdxDiags::NotIncludedMime, fn, mtype);
LOGDEB1("RclConfig::getMimeHandlerDef: " << mtype << " not in mime type list\n");
return hs;
}
if (!m_excludeMTypes.empty() &&
m_excludeMTypes.count(stringtolower(mtype))) {
LOGDEB2("RclConfig::getMimeHandlerDef: in excluded mime list\n");
if (!m_excludeMTypes.empty() && m_excludeMTypes.count(stringtolower(mtype))) {
IdxDiags::theDiags().record(IdxDiags::ExcludedMime, fn, mtype);
LOGDEB1("RclConfig::getMimeHandlerDef: " << mtype << " in excluded mime list (fn " <<
fn << ")\n");
return hs;
}
}
if (!mimeconf->get(mtype, hs, "index")) {
LOGDEB1("getMimeHandlerDef: no handler for '" << mtype << "'\n");
if (mtype != "inode/directory") {
IdxDiags::theDiags().record(IdxDiags::NoHandler, fn, mtype);
LOGDEB1("getMimeHandlerDef: no handler for '" << mtype << "' (fn " <<
fn << ")\n");
}
}
return hs;
}

View File

@ -248,7 +248,8 @@ public:
string getSuffixFromMimeType(const string &mt) const;
/** mimeconf: get input filter for mimetype */
string getMimeHandlerDef(const string &mimetype, bool filtertypes=false);
string getMimeHandlerDef(const string &mimetype, bool filtertypes=false,
const std::string& fn = std::string());
/** For lines like: "name = some value; attr1 = value1; attr2 = val2"
* Separate the value and store the attributes in a ConfSimple

View File

@ -15,6 +15,9 @@ recollindex \- indexing command for the Recoll full text search system
[
.B \-k
]
[
.B \--diagfile
<diagpath> ]
.br
.B recollindex
[
@ -93,6 +96,12 @@ pattern
<cfdir>]
.B \--webcache-burst
<destdir>
.B recollindex
[
.B \-c
<cfdir>]
.B \--notindexed
[path [path ...]]
.SH DESCRIPTION
The
@ -142,7 +151,44 @@ will try again to process all failed files. Please note that
.B recollindex
may also decide to retry failed files if the auxiliary checking script
defined by the "checkneedretryindexscript" configuration variable indicates
that this should happen.
that this should happen.
.PP
If option
.B \--diagfile
is given, the path given as parameter will be truncated and indexing
diagnostics will be written to it. Each line in the file will have a
diagnostic type (reason for the file not to be indexed), the file path, and
a possible additional piece of information, which can be the MIME type or
the archive internal path depending on the issue. The following diagnostic
types are currently defined:
.IP
.B Skipped
: the path matches an element of
.B skippedPaths or
.B skippedNames.
.IP
.B NoContentSuffix
: the file name suffix is found in the
.B noContentSuffixes
list.
.IP
.B MissingHelper
: a helper program is missing.
.IP
.B Error
: general error (see the log).
.IP
.B NoHandler: no handler is defined for the MIME type.
.IP
.B ExcludedMime
: the MIME type is part of the
.B excludedmimetypes
list.
.IP
.B NotIncludedMime
: the
.B onlymimetypes
list is not empty and the the MIME type is not in it.
.PP
If option
.B
@ -297,7 +343,12 @@ cache.
.B recollindex \--webcache-burst <destdir>
will extract all entries from the Web cache to files created inside
<destdir>. Each cache entry is extracted as two files, for the data and metadata.
.PP
.B recollindex \--notindexed [path [path ...]]
will check each path and print out those which are absent from the index
(with an "ABSENT" prefix), or caused an indexing error (with an "ERROR"
prefix). If no paths are given on the command line, the command will read
them, one per line, from stdin.
.SH SEE ALSO
.PP

View File

@ -19,6 +19,8 @@
*/
#include "autoconfig.h"
#include "checkindexed.h"
#include <stdio.h>
#include <iostream>

View File

@ -47,6 +47,7 @@
#include "rclinit.h"
#include "extrameta.h"
#include "utf8fn.h"
#include "idxdiags.h"
#if defined(HAVE_POSIX_FADVISE)
#include <unistd.h>
#include <fcntl.h>
@ -397,8 +398,7 @@ bool FsIndexer::indexFiles(list<string>& files, int flags)
continue;
}
}
if (processone(*it, &stb, FsTreeWalker::FtwRegular) !=
FsTreeWalker::FtwOk) {
if (processone(*it, &stb, FsTreeWalker::FtwRegular) != FsTreeWalker::FtwOk) {
LOGERR("FsIndexer::indexFiles: processone failed\n");
goto out;
}
@ -560,9 +560,8 @@ void *FsIndexerInternfileWorker(void * fsp)
return (void*)1;
}
LOGDEB0("FsIndexerInternfileWorker: task fn " << tsk->fn << "\n");
if (fip->processonefile(&myconf, tsk->fn, &tsk->statbuf,
tsk->localfields) !=
FsTreeWalker::FtwOk) {
if (fip->processonefile(
&myconf, tsk->fn, &tsk->statbuf, tsk->localfields) != FsTreeWalker::FtwOk) {
LOGERR("FsIndexerInternfileWorker: processone failed\n");
tqp->workerExit();
return (void*)0;
@ -584,9 +583,8 @@ void *FsIndexerInternfileWorker(void * fsp)
/// Accent and majuscule handling are performed by the db module when doing
/// the actual indexing work. The Rcl::Doc created by internfile()
/// mostly contains pretty raw utf8 data.
FsTreeWalker::Status
FsIndexer::processone(const std::string &fn, const struct PathStat *stp,
FsTreeWalker::CbFlag flg)
FsTreeWalker::Status FsIndexer::processone(
const std::string &fn, const struct PathStat *stp, FsTreeWalker::CbFlag flg)
{
if (m_updater) {
#ifdef IDX_THREADS
@ -610,7 +608,10 @@ FsIndexer::processone(const std::string &fn, const struct PathStat *stp,
if (flg == FsTreeWalker::FtwDirReturn)
return FsTreeWalker::FtwOk;
}
if (flg == FsTreeWalker::FtwSkipped) {
IdxDiags::theDiags().record(IdxDiags::Skipped, fn);
return FsTreeWalker::FtwOk;
}
#ifdef IDX_THREADS
if (m_haveInternQ) {
InternfileTask *tp = new InternfileTask(fn, stp, m_localfields);
@ -644,10 +645,9 @@ bool FsIndexer::launchAddOrUpdate(const string& udi, const string& parent_udi,
return m_db->addOrUpdate(udi, parent_udi, doc);
}
FsTreeWalker::Status
FsIndexer::processonefile(RclConfig *config,
const std::string &fn, const struct PathStat *stp,
const map<string, string>& localfields)
FsTreeWalker::Status FsIndexer::processonefile(
RclConfig *config, const std::string &fn, const struct PathStat *stp,
const map<string, string>& localfields)
{
////////////////////
// Check db up to date ? Doing this before file type
@ -693,7 +693,7 @@ FsIndexer::processonefile(RclConfig *config,
// If noretryfailed is set, check for a file which previously
// failed to index, and avoid re-processing it
if (needupdate && m_noretryfailed && existingDoc &&
!oldsig.empty() && *oldsig.rbegin() == '+') {
!oldsig.empty() && oldsig.back() == '+') {
// Check that the sigs are the same except for the '+'. If the file
// actually changed, we always retry (maybe it was fixed)
string nold = oldsig.substr(0, oldsig.size()-1);
@ -720,8 +720,7 @@ FsIndexer::processonefile(RclConfig *config,
return FsTreeWalker::FtwOk;
}
LOGDEB0("processone: processing: [" <<
displayableBytes(stp->pst_size) << "] " << fn << "\n");
LOGDEB0("processone: processing: [" << displayableBytes(stp->pst_size) << "] " << fn << "\n");
// Note that we used to do the full path here, but I ended up
// believing that it made more sense to use only the file name
@ -813,6 +812,7 @@ FsIndexer::processonefile(RclConfig *config,
// myriads of such files, the ext script is executed for them
// and fails every time)
if (fis == FileInterner::FIError) {
IdxDiags::theDiags().record(IdxDiags::Error, fn, doc.ipath);
doc.sig += cstr_plus;
}
@ -822,8 +822,7 @@ FsIndexer::processonefile(RclConfig *config,
// Add document to database. If there is an ipath, add it
// as a child of the file document.
if (!launchAddOrUpdate(udi, doc.ipath.empty() ?
cstr_null : parent_udi, doc)) {
if (!launchAddOrUpdate(udi, doc.ipath.empty() ? cstr_null : parent_udi, doc)) {
return FsTreeWalker::FtwError;
}

98
src/index/idxdiags.cpp Normal file
View File

@ -0,0 +1,98 @@
/* Copyright (C) 2021 J.F.Dockes
*
* License: GPL 2.1
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2.1 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include "autoconfig.h"
#include <stdio.h>
#include <mutex>
#include "idxdiags.h"
static std::mutex diagmutex;
class IdxDiags::Internal {
public:
~Internal() {
if (fp) {
fclose(fp);
}
}
FILE *fp{nullptr};
};
IdxDiags::IdxDiags()
{
m = new Internal;
}
IdxDiags::~IdxDiags()
{
delete m;
}
bool IdxDiags::flush()
{
std::unique_lock<std::mutex> lock(diagmutex);
if (m && m->fp) {
return fflush(m->fp) ? false : true;
}
return true;
}
static IdxDiags *theInstance;
IdxDiags& IdxDiags::theDiags()
{
if (nullptr == theInstance) {
theInstance = new IdxDiags;
}
return *theInstance;
}
bool IdxDiags::init(const std::string& outpath)
{
m->fp = fopen(outpath.c_str(), "w");
if (nullptr == m->fp) {
return false;
}
return true;
}
bool IdxDiags::record(DiagKind diag, const std::string& path, const std::string& detail)
{
if (nullptr == m || nullptr == m->fp || (path.empty() && detail.empty())) {
return true;
}
const char *skind = "Unknown";
switch (diag) {
case Ok: skind = "Ok";break;
case Skipped: skind = "Skipped";break;
case NoContentSuffix: skind = "NoContentSuffix";break;
case MissingHelper: skind = "MissingHelper";break;
case Error: skind = "Error";break;
case NoHandler: skind = "NoHandler";break;
case ExcludedMime: skind = "ExcludedMime";break;
case NotIncludedMime: skind = "NotIncludedMime";break;
}
std::unique_lock<std::mutex> lock(diagmutex);
fprintf(m->fp, "%s %s | %s\n", skind, path.c_str(), detail.c_str());
return true;
}

50
src/index/idxdiags.h Normal file
View File

@ -0,0 +1,50 @@
/* Copyright (C) 2021 J.F.Dockes
*
* License: GPL 2.1
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2.1 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#ifndef _IDXDIAGS_H_INCLUDED_
#define _IDXDIAGS_H_INCLUDED_
#include <string>
class IdxDiags {
public:
enum DiagKind {Ok, Skipped, NoContentSuffix, MissingHelper, Error, NoHandler,
ExcludedMime, NotIncludedMime};
// Retrieve a reference to the single instance.
static IdxDiags& theDiags();
// Initialize, setting the output file path. outpath will be truncated.
// No locking: this must be called from the main thread, before going multithread.
// If init is never called, further calls to record() or flush() will be noops.
bool init(const std::string& outpath);
// Record a reason for a document not to be indexed.
bool record(DiagKind diag, const std::string& path, const std::string& detail = std::string());
bool flush();
class Internal;
private:
Internal *m;
IdxDiags();
~IdxDiags();
};
#endif /* _IDXDIAGS_H_INCLUDED_ */

View File

@ -66,20 +66,16 @@ static RclMonitor *makeMonitor();
*/
class WalkCB : public FsTreeWalkerCB {
public:
WalkCB(RclConfig *conf, RclMonitor *mon, RclMonEventQueue *queue,
FsTreeWalker& walker)
: m_config(conf), m_mon(mon), m_queue(queue), m_walker(walker)
{}
WalkCB(RclConfig *conf, RclMonitor *mon, RclMonEventQueue *queue, FsTreeWalker& walker)
: m_config(conf), m_mon(mon), m_queue(queue), m_walker(walker) {}
virtual ~WalkCB() {}
virtual FsTreeWalker::Status
processone(const string &fn, const struct PathStat *st,
FsTreeWalker::CbFlag flg) {
virtual FsTreeWalker::Status processone(
const string &fn, const struct PathStat *st, FsTreeWalker::CbFlag flg) {
MONDEB("rclMonRcvRun: processone " << fn << " m_mon " << m_mon <<
" m_mon->ok " << (m_mon ? m_mon->ok() : false) << std::endl);
if (flg == FsTreeWalker::FtwDirEnter ||
flg == FsTreeWalker::FtwDirReturn) {
if (flg == FsTreeWalker::FtwDirEnter || flg == FsTreeWalker::FtwDirReturn) {
m_config->setKeyDir(fn);
// Set up skipped patterns for this subtree.
m_walker.setSkippedNames(m_config->getSkippedNames());
@ -106,8 +102,7 @@ public:
m_mon->saved_errno != ENOENT)
return FsTreeWalker::FtwError;
}
} else if (!m_mon->generatesExist() &&
flg == FsTreeWalker::FtwRegular) {
} else if (!m_mon->generatesExist() && flg == FsTreeWalker::FtwRegular) {
// Have to synthetize events for regular files existence
// at startup because the monitor does not do it
// Note 2011-09-29: no sure this is actually needed. We just ran

View File

@ -62,6 +62,7 @@ using namespace std;
#include "checkretryfailed.h"
#include "idxstatus.h"
#include "circache.h"
#include "idxdiags.h"
// Command line options
static int op_flags;
@ -93,11 +94,13 @@ static int op_flags;
#define OPTVAL_WEBCACHE_COMPACT 1000
#define OPTVAL_WEBCACHE_BURST 1001
#define OPTVAL_DIAGS_NOTINDEXED 1002
#define OPTVAL_DIAGS_DIAGFILE 1003
static struct option long_options[] = {
{"webcache-compact", 0, 0, OPTVAL_WEBCACHE_COMPACT},
{"webcache-burst", required_argument, 0, OPTVAL_WEBCACHE_BURST},
{"notindexed", 0, 0, OPTVAL_DIAGS_NOTINDEXED},
{"diagfile", required_argument, 0, OPTVAL_DIAGS_DIAGFILE},
{0, 0, 0, 0}
};
@ -110,6 +113,7 @@ static ConfIndexer *confindexer;
static void cleanup()
{
deleteZ(confindexer);
IdxDiags::theDiags().flush();
recoll_exitready();
}
@ -274,20 +278,15 @@ static void setMyPriority(const RclConfig *config)
class MakeListWalkerCB : public FsTreeWalkerCB {
public:
MakeListWalkerCB(list<string>& files, const vector<string>& selpats)
: m_files(files), m_pats(selpats)
{
}
virtual FsTreeWalker::Status
processone(const string& fn, const struct PathStat *,
FsTreeWalker::CbFlag flg) {
: m_files(files), m_pats(selpats) {}
virtual FsTreeWalker::Status processone(
const string& fn, const struct PathStat *, FsTreeWalker::CbFlag flg) {
if (flg== FsTreeWalker::FtwDirEnter || flg == FsTreeWalker::FtwRegular){
if (m_pats.empty()) {
cerr << "Selecting " << fn << endl;
m_files.push_back(fn);
} else {
for (vector<string>::const_iterator it = m_pats.begin();
it != m_pats.end(); it++) {
if (fnmatch(it->c_str(), fn.c_str(), 0) == 0) {
for (const auto& pat : m_pats) {
if (fnmatch(pat.c_str(), fn.c_str(), 0) == 0) {
m_files.push_back(fn);
break;
}
@ -451,6 +450,8 @@ static const char usage [] =
" -Z : in place reset: consider all documents as changed. Can also\n"
" be combined with -i or -r but not -m\n"
" -k : retry files on which we previously failed\n"
" --diagfile <outputpath> : list skipped or otherwise not indexed documents to <outputpath>\n"
" <outputpath> will be truncated\n"
#ifdef RCL_MONITOR
"recollindex -m [-w <secs>] -x [-D] [-C]\n"
" Perform real time indexing. Don't become a daemon if -D is set.\n"
@ -636,6 +637,7 @@ int main(int argc, char *argv[])
bool diags_notindexed{false};
std::string burstdir;
std::string diagfile;
while ((ret = getopt_long(argc, (char *const*)&args[0], "c:CDdEefhikKlmnPp:rR:sS:w:xZz",
long_options, NULL)) != -1) {
switch (ret) {
@ -676,7 +678,7 @@ int main(int argc, char *argv[])
case OPTVAL_WEBCACHE_COMPACT: webcache_compact = true; break;
case OPTVAL_WEBCACHE_BURST: burstdir = optarg; webcache_burst = true;break;
case OPTVAL_DIAGS_NOTINDEXED: diags_notindexed = true;break;
case OPTVAL_DIAGS_DIAGFILE: diagfile = optarg;break;
default: Usage(); break;
}
}
@ -790,6 +792,12 @@ int main(int argc, char *argv[])
}
}
if (!diagfile.empty()) {
if (!IdxDiags::theDiags().init(diagfile)) {
std::cerr << "Could not initialize diags file " << diagfile << "\n";
LOGERR("recollindex: Could not initialize diags file " << diagfile << "\n");
}
}
bool rezero((op_flags & OPT_z) != 0);
bool inPlaceReset((op_flags & OPT_Z) != 0);

View File

@ -411,8 +411,7 @@ WebQueueIndexer::processone(
if (flg != FsTreeWalker::FtwRegular)
return FsTreeWalker::FtwOk;
string dotpath = path_cat(path_getfather(path),
string(DOTFILEPREFIX) + path_getsimple(path));
string dotpath = path_cat(path_getfather(path), string(DOTFILEPREFIX) + path_getsimple(path));
LOGDEB("WebQueueIndexer: prc1: [" << path << "]\n");
WebQueueDotFile dotfile(m_config, dotpath);

View File

@ -240,17 +240,15 @@ void FileInterner::init(const string &f, const struct PathStat *stp,
m_mimetype = l_mime;
// Look for appropriate handler (might still return empty)
RecollFilter *df = getMimeHandler(l_mime, m_cfg, !m_forPreview);
RecollFilter *df = getMimeHandler(l_mime, m_cfg, !m_forPreview, f);
if (!df || df->is_unknown()) {
// No real handler for this type, for now :(
LOGDEB("FileInterner:: unprocessed mime: [" << l_mime << "] [" << f <<
"]\n");
LOGDEB("FileInterner:: unprocessed mime: [" << l_mime << "] [" << f << "]\n");
if (!df)
return;
}
df->set_property(Dijon::Filter::OPERATING_MODE,
m_forPreview ? "view" : "index");
df->set_property(Dijon::Filter::OPERATING_MODE, m_forPreview ? "view" : "index");
df->set_property(Dijon::Filter::DJF_UDI, udi);
df->set_docsize(docsize);
@ -271,8 +269,7 @@ FileInterner::FileInterner(const string &data, RclConfig *cnf,
init(data, cnf, flags, imime);
}
void FileInterner::init(const string &data, RclConfig *,
int, const string& imime)
void FileInterner::init(const string &data, RclConfig *, int, const string& imime)
{
if (imime.empty()) {
LOGERR("FileInterner: inmemory constructor needs input mime type\n");
@ -281,7 +278,7 @@ void FileInterner::init(const string &data, RclConfig *,
m_mimetype = imime;
// Look for appropriate handler (might still return empty)
RecollFilter *df = getMimeHandler(m_mimetype, m_cfg, !m_forPreview);
RecollFilter *df = getMimeHandler(m_mimetype, m_cfg, !m_forPreview, m_fn);
if (!df) {
// No handler for this type, for now :( if indexallfilenames
@ -289,8 +286,7 @@ void FileInterner::init(const string &data, RclConfig *,
LOGDEB("FileInterner:: unprocessed mime [" << m_mimetype << "]\n");
return;
}
df->set_property(Dijon::Filter::OPERATING_MODE,
m_forPreview ? "view" : "index");
df->set_property(Dijon::Filter::OPERATING_MODE, m_forPreview ? "view" : "index");
df->set_docsize(data.length());
if (df->is_data_input_ok(Dijon::Filter::DOCUMENT_STRING)) {
@ -741,12 +737,11 @@ int FileInterner::addHandler()
getKeyValue(docdata, cstr_dj_keyipath, ipathel);
bool dofilter = !m_forPreview &&
(mimetype.compare(cstr_texthtml) || !ipathel.empty());
RecollFilter *newflt = getMimeHandler(mimetype, m_cfg, dofilter);
RecollFilter *newflt = getMimeHandler(mimetype, m_cfg, dofilter, m_fn);
if (!newflt) {
// If we can't find a handler, this doc can't be handled
// but there can be other ones so we go on
LOGINFO("FileInterner::addHandler: no filter for [" << mimetype <<
"]\n");
LOGINFO("FileInterner::addHandler: no filter for [" << mimetype << "]\n");
return ADD_CONTINUE;
}
newflt->set_property(Dijon::Filter::OPERATING_MODE,

View File

@ -29,6 +29,7 @@
#include "smallut.h"
#include "md5ut.h"
#include "rclconfig.h"
#include "idxdiags.h"
using namespace std;
@ -186,6 +187,7 @@ bool MimeHandlerExec::next_document()
missingHelper = true;
m_reason = string("RECFILTERROR HELPERNOTFOUND ") + cmd;
whatHelper = m_reason;
IdxDiags::theDiags().record(IdxDiags::MissingHelper, m_fn);
} else if (output.find("RECFILTERROR") == 0) {
// If the output string begins with RECFILTERROR, then it's
// interpretable error information out from a recoll script
@ -193,6 +195,7 @@ bool MimeHandlerExec::next_document()
std::string::size_type pos;
if ((pos = output.find("RECFILTERROR ")) == 0) {
if (output.find("HELPERNOTFOUND") != string::npos) {
IdxDiags::theDiags().record(IdxDiags::MissingHelper, m_fn);
missingHelper = true;
whatHelper = output.substr(pos);
}

View File

@ -33,6 +33,7 @@
#include "mimetype.h"
#include "idfile.h"
#include "rclutil.h"
#include "idxdiags.h"
using namespace std;
@ -72,6 +73,7 @@ bool MimeHandlerExecMultiple::startCmd()
vector<string>myparams(params.begin() + 1, params.end());
if (m_cmd.startExec(cmd, myparams, 1, 1) < 0) {
IdxDiags::theDiags().record(IdxDiags::MissingHelper, m_fn);
m_reason = string("RECFILTERROR HELPERNOTFOUND ") + cmd;
missingHelper = true;
whatHelper = cmd;
@ -113,6 +115,7 @@ bool MimeHandlerExecMultiple::readDataElement(string& name, string &data)
if ((pos = ibuf.find("RECFILTERROR ")) == 0) {
m_reason = ibuf;
if (ibuf.find("HELPERNOTFOUND") != string::npos) {
IdxDiags::theDiags().record(IdxDiags::MissingHelper, m_fn);
missingHelper = true;
whatHelper = ibuf.substr(pos);
}

View File

@ -256,8 +256,8 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs,
}
/* Get handler/filter object for given mime type: */
RecollFilter *getMimeHandler(const string &mtype, RclConfig *cfg,
bool filtertypes)
RecollFilter *getMimeHandler(const string &mtype, RclConfig *cfg,
bool filtertypes, const std::string& fn)
{
LOGDEB("getMimeHandler: mtype [" << mtype << "] filtertypes " <<
filtertypes << "\n");
@ -270,7 +270,7 @@ RecollFilter *getMimeHandler(const string &mtype, RclConfig *cfg,
// indexedmimetypes but an html handler could still be in the
// cache because it was needed by some other interning stack).
string hs;
hs = cfg->getMimeHandlerDef(mtype, filtertypes);
hs = cfg->getMimeHandlerDef(mtype, filtertypes, fn);
string id;
if (!hs.empty()) {

View File

@ -169,7 +169,7 @@ protected:
* indexedmimetypes (if this is set at all).
*/
extern RecollFilter *getMimeHandler(const std::string &mtyp, RclConfig *cfg,
bool filtertypes);
bool filtertypes, const std::string& fn = std::string());
/// Free up filter for reuse (you can also delete it)
extern void returnMimeHandler(RecollFilter *);

View File

@ -2401,7 +2401,7 @@ bool Db::dbStats(DbStats& res, bool listfailed)
try {
Xapian::Document doc = xdb.get_document(docid);
string sig = doc.get_value(VALUE_SIG);
if (sig.empty() || sig[sig.size()-1] != '+') {
if (sig.empty() || sig.back() != '+') {
continue;
}
string data = doc.get_data();

View File

@ -411,9 +411,7 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top,
// Skipped file names match ?
if (!data->skippedNames.empty()) {
if (inSkippedNames(dname)) {
if (data->options & FtwOnlySkipped) {
cb.processone(path_cat(top, dname), nullptr, FtwSkipped);
}
cb.processone(path_cat(top, dname), nullptr, FtwSkipped);
continue;
}
}
@ -428,9 +426,7 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top,
// this was broken by 1.13.00 and the systematic use of
// FNM_LEADING_DIR
if (inSkippedPaths(fn, false)) {
if (data->options & FtwOnlySkipped) {
cb.processone(fn, nullptr, FtwSkipped);
}
cb.processone(fn, nullptr, FtwSkipped);
continue;
}
}