Add recollindex option to write file not indexed reasons to diagnostics file
This commit is contained in:
parent
485a0fc650
commit
4756b1252b
@ -107,6 +107,8 @@ index/fsindexer.cpp \
|
||||
index/fsindexer.h \
|
||||
index/idxstatus.h \
|
||||
index/idxstatus.cpp \
|
||||
index/idxdiags.h \
|
||||
index/idxdiags.cpp \
|
||||
index/mimetype.cpp \
|
||||
index/mimetype.h \
|
||||
index/rclmon.h \
|
||||
|
||||
@ -54,6 +54,7 @@
|
||||
#include "cpuconf.h"
|
||||
#include "execmd.h"
|
||||
#include "md5.h"
|
||||
#include "idxdiags.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -754,6 +755,7 @@ bool RclConfig::inStopSuffixes(const string& fni)
|
||||
if (it != STOPSUFFIXES->end()) {
|
||||
LOGDEB2("RclConfig::inStopSuffixes: Found (" << fni << ") [" <<
|
||||
((*it).m_str) << "]\n");
|
||||
IdxDiags::theDiags().record(IdxDiags::NoContentSuffix, fni);
|
||||
return true;
|
||||
} else {
|
||||
LOGDEB2("RclConfig::inStopSuffixes: not found [" << fni << "]\n");
|
||||
@ -822,35 +824,38 @@ bool RclConfig::getMimeCatTypes(const string& cat, vector<string>& tps) const
|
||||
return true;
|
||||
}
|
||||
|
||||
string RclConfig::getMimeHandlerDef(const string &mtype, bool filtertypes)
|
||||
string RclConfig::getMimeHandlerDef(const string &mtype, bool filtertypes, const std::string& fn)
|
||||
{
|
||||
string hs;
|
||||
|
||||
if (filtertypes) {
|
||||
if(m_rmtstate.needrecompute()) {
|
||||
m_restrictMTypes.clear();
|
||||
stringToStrings(stringtolower((const string&)m_rmtstate.getvalue()),
|
||||
m_restrictMTypes);
|
||||
stringToStrings(stringtolower((const string&)m_rmtstate.getvalue()), m_restrictMTypes);
|
||||
}
|
||||
if (m_xmtstate.needrecompute()) {
|
||||
m_excludeMTypes.clear();
|
||||
stringToStrings(stringtolower((const string&)m_xmtstate.getvalue()),
|
||||
m_excludeMTypes);
|
||||
stringToStrings(stringtolower((const string&)m_xmtstate.getvalue()), m_excludeMTypes);
|
||||
}
|
||||
if (!m_restrictMTypes.empty() &&
|
||||
!m_restrictMTypes.count(stringtolower(mtype))) {
|
||||
LOGDEB2("RclConfig::getMimeHandlerDef: not in mime type list\n");
|
||||
if (!m_restrictMTypes.empty() && !m_restrictMTypes.count(stringtolower(mtype))) {
|
||||
IdxDiags::theDiags().record(IdxDiags::NotIncludedMime, fn, mtype);
|
||||
LOGDEB1("RclConfig::getMimeHandlerDef: " << mtype << " not in mime type list\n");
|
||||
return hs;
|
||||
}
|
||||
if (!m_excludeMTypes.empty() &&
|
||||
m_excludeMTypes.count(stringtolower(mtype))) {
|
||||
LOGDEB2("RclConfig::getMimeHandlerDef: in excluded mime list\n");
|
||||
if (!m_excludeMTypes.empty() && m_excludeMTypes.count(stringtolower(mtype))) {
|
||||
IdxDiags::theDiags().record(IdxDiags::ExcludedMime, fn, mtype);
|
||||
LOGDEB1("RclConfig::getMimeHandlerDef: " << mtype << " in excluded mime list (fn " <<
|
||||
fn << ")\n");
|
||||
return hs;
|
||||
}
|
||||
}
|
||||
|
||||
if (!mimeconf->get(mtype, hs, "index")) {
|
||||
LOGDEB1("getMimeHandlerDef: no handler for '" << mtype << "'\n");
|
||||
if (mtype != "inode/directory") {
|
||||
IdxDiags::theDiags().record(IdxDiags::NoHandler, fn, mtype);
|
||||
LOGDEB1("getMimeHandlerDef: no handler for '" << mtype << "' (fn " <<
|
||||
fn << ")\n");
|
||||
}
|
||||
}
|
||||
return hs;
|
||||
}
|
||||
|
||||
@ -248,7 +248,8 @@ public:
|
||||
string getSuffixFromMimeType(const string &mt) const;
|
||||
|
||||
/** mimeconf: get input filter for mimetype */
|
||||
string getMimeHandlerDef(const string &mimetype, bool filtertypes=false);
|
||||
string getMimeHandlerDef(const string &mimetype, bool filtertypes=false,
|
||||
const std::string& fn = std::string());
|
||||
|
||||
/** For lines like: "name = some value; attr1 = value1; attr2 = val2"
|
||||
* Separate the value and store the attributes in a ConfSimple
|
||||
|
||||
@ -15,6 +15,9 @@ recollindex \- indexing command for the Recoll full text search system
|
||||
[
|
||||
.B \-k
|
||||
]
|
||||
[
|
||||
.B \--diagfile
|
||||
<diagpath> ]
|
||||
.br
|
||||
.B recollindex
|
||||
[
|
||||
@ -93,6 +96,12 @@ pattern
|
||||
<cfdir>]
|
||||
.B \--webcache-burst
|
||||
<destdir>
|
||||
.B recollindex
|
||||
[
|
||||
.B \-c
|
||||
<cfdir>]
|
||||
.B \--notindexed
|
||||
[path [path ...]]
|
||||
|
||||
.SH DESCRIPTION
|
||||
The
|
||||
@ -142,7 +151,44 @@ will try again to process all failed files. Please note that
|
||||
.B recollindex
|
||||
may also decide to retry failed files if the auxiliary checking script
|
||||
defined by the "checkneedretryindexscript" configuration variable indicates
|
||||
that this should happen.
|
||||
that this should happen.
|
||||
.PP
|
||||
If option
|
||||
.B \--diagfile
|
||||
is given, the path given as parameter will be truncated and indexing
|
||||
diagnostics will be written to it. Each line in the file will have a
|
||||
diagnostic type (reason for the file not to be indexed), the file path, and
|
||||
a possible additional piece of information, which can be the MIME type or
|
||||
the archive internal path depending on the issue. The following diagnostic
|
||||
types are currently defined:
|
||||
.IP
|
||||
.B Skipped
|
||||
: the path matches an element of
|
||||
.B skippedPaths or
|
||||
.B skippedNames.
|
||||
.IP
|
||||
.B NoContentSuffix
|
||||
: the file name suffix is found in the
|
||||
.B noContentSuffixes
|
||||
list.
|
||||
.IP
|
||||
.B MissingHelper
|
||||
: a helper program is missing.
|
||||
.IP
|
||||
.B Error
|
||||
: general error (see the log).
|
||||
.IP
|
||||
.B NoHandler: no handler is defined for the MIME type.
|
||||
.IP
|
||||
.B ExcludedMime
|
||||
: the MIME type is part of the
|
||||
.B excludedmimetypes
|
||||
list.
|
||||
.IP
|
||||
.B NotIncludedMime
|
||||
: the
|
||||
.B onlymimetypes
|
||||
list is not empty and the the MIME type is not in it.
|
||||
.PP
|
||||
If option
|
||||
.B
|
||||
@ -297,7 +343,12 @@ cache.
|
||||
.B recollindex \--webcache-burst <destdir>
|
||||
will extract all entries from the Web cache to files created inside
|
||||
<destdir>. Each cache entry is extracted as two files, for the data and metadata.
|
||||
|
||||
.PP
|
||||
.B recollindex \--notindexed [path [path ...]]
|
||||
will check each path and print out those which are absent from the index
|
||||
(with an "ABSENT" prefix), or caused an indexing error (with an "ERROR"
|
||||
prefix). If no paths are given on the command line, the command will read
|
||||
them, one per line, from stdin.
|
||||
|
||||
.SH SEE ALSO
|
||||
.PP
|
||||
|
||||
@ -19,6 +19,8 @@
|
||||
*/
|
||||
#include "autoconfig.h"
|
||||
|
||||
#include "checkindexed.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <iostream>
|
||||
|
||||
|
||||
@ -47,6 +47,7 @@
|
||||
#include "rclinit.h"
|
||||
#include "extrameta.h"
|
||||
#include "utf8fn.h"
|
||||
#include "idxdiags.h"
|
||||
#if defined(HAVE_POSIX_FADVISE)
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
@ -397,8 +398,7 @@ bool FsIndexer::indexFiles(list<string>& files, int flags)
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (processone(*it, &stb, FsTreeWalker::FtwRegular) !=
|
||||
FsTreeWalker::FtwOk) {
|
||||
if (processone(*it, &stb, FsTreeWalker::FtwRegular) != FsTreeWalker::FtwOk) {
|
||||
LOGERR("FsIndexer::indexFiles: processone failed\n");
|
||||
goto out;
|
||||
}
|
||||
@ -560,9 +560,8 @@ void *FsIndexerInternfileWorker(void * fsp)
|
||||
return (void*)1;
|
||||
}
|
||||
LOGDEB0("FsIndexerInternfileWorker: task fn " << tsk->fn << "\n");
|
||||
if (fip->processonefile(&myconf, tsk->fn, &tsk->statbuf,
|
||||
tsk->localfields) !=
|
||||
FsTreeWalker::FtwOk) {
|
||||
if (fip->processonefile(
|
||||
&myconf, tsk->fn, &tsk->statbuf, tsk->localfields) != FsTreeWalker::FtwOk) {
|
||||
LOGERR("FsIndexerInternfileWorker: processone failed\n");
|
||||
tqp->workerExit();
|
||||
return (void*)0;
|
||||
@ -584,9 +583,8 @@ void *FsIndexerInternfileWorker(void * fsp)
|
||||
/// Accent and majuscule handling are performed by the db module when doing
|
||||
/// the actual indexing work. The Rcl::Doc created by internfile()
|
||||
/// mostly contains pretty raw utf8 data.
|
||||
FsTreeWalker::Status
|
||||
FsIndexer::processone(const std::string &fn, const struct PathStat *stp,
|
||||
FsTreeWalker::CbFlag flg)
|
||||
FsTreeWalker::Status FsIndexer::processone(
|
||||
const std::string &fn, const struct PathStat *stp, FsTreeWalker::CbFlag flg)
|
||||
{
|
||||
if (m_updater) {
|
||||
#ifdef IDX_THREADS
|
||||
@ -610,7 +608,10 @@ FsIndexer::processone(const std::string &fn, const struct PathStat *stp,
|
||||
if (flg == FsTreeWalker::FtwDirReturn)
|
||||
return FsTreeWalker::FtwOk;
|
||||
}
|
||||
|
||||
if (flg == FsTreeWalker::FtwSkipped) {
|
||||
IdxDiags::theDiags().record(IdxDiags::Skipped, fn);
|
||||
return FsTreeWalker::FtwOk;
|
||||
}
|
||||
#ifdef IDX_THREADS
|
||||
if (m_haveInternQ) {
|
||||
InternfileTask *tp = new InternfileTask(fn, stp, m_localfields);
|
||||
@ -644,10 +645,9 @@ bool FsIndexer::launchAddOrUpdate(const string& udi, const string& parent_udi,
|
||||
return m_db->addOrUpdate(udi, parent_udi, doc);
|
||||
}
|
||||
|
||||
FsTreeWalker::Status
|
||||
FsIndexer::processonefile(RclConfig *config,
|
||||
const std::string &fn, const struct PathStat *stp,
|
||||
const map<string, string>& localfields)
|
||||
FsTreeWalker::Status FsIndexer::processonefile(
|
||||
RclConfig *config, const std::string &fn, const struct PathStat *stp,
|
||||
const map<string, string>& localfields)
|
||||
{
|
||||
////////////////////
|
||||
// Check db up to date ? Doing this before file type
|
||||
@ -693,7 +693,7 @@ FsIndexer::processonefile(RclConfig *config,
|
||||
// If noretryfailed is set, check for a file which previously
|
||||
// failed to index, and avoid re-processing it
|
||||
if (needupdate && m_noretryfailed && existingDoc &&
|
||||
!oldsig.empty() && *oldsig.rbegin() == '+') {
|
||||
!oldsig.empty() && oldsig.back() == '+') {
|
||||
// Check that the sigs are the same except for the '+'. If the file
|
||||
// actually changed, we always retry (maybe it was fixed)
|
||||
string nold = oldsig.substr(0, oldsig.size()-1);
|
||||
@ -720,8 +720,7 @@ FsIndexer::processonefile(RclConfig *config,
|
||||
return FsTreeWalker::FtwOk;
|
||||
}
|
||||
|
||||
LOGDEB0("processone: processing: [" <<
|
||||
displayableBytes(stp->pst_size) << "] " << fn << "\n");
|
||||
LOGDEB0("processone: processing: [" << displayableBytes(stp->pst_size) << "] " << fn << "\n");
|
||||
|
||||
// Note that we used to do the full path here, but I ended up
|
||||
// believing that it made more sense to use only the file name
|
||||
@ -813,6 +812,7 @@ FsIndexer::processonefile(RclConfig *config,
|
||||
// myriads of such files, the ext script is executed for them
|
||||
// and fails every time)
|
||||
if (fis == FileInterner::FIError) {
|
||||
IdxDiags::theDiags().record(IdxDiags::Error, fn, doc.ipath);
|
||||
doc.sig += cstr_plus;
|
||||
}
|
||||
|
||||
@ -822,8 +822,7 @@ FsIndexer::processonefile(RclConfig *config,
|
||||
|
||||
// Add document to database. If there is an ipath, add it
|
||||
// as a child of the file document.
|
||||
if (!launchAddOrUpdate(udi, doc.ipath.empty() ?
|
||||
cstr_null : parent_udi, doc)) {
|
||||
if (!launchAddOrUpdate(udi, doc.ipath.empty() ? cstr_null : parent_udi, doc)) {
|
||||
return FsTreeWalker::FtwError;
|
||||
}
|
||||
|
||||
|
||||
98
src/index/idxdiags.cpp
Normal file
98
src/index/idxdiags.cpp
Normal file
@ -0,0 +1,98 @@
|
||||
/* Copyright (C) 2021 J.F.Dockes
|
||||
*
|
||||
* License: GPL 2.1
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2.1 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include "autoconfig.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <mutex>
|
||||
|
||||
#include "idxdiags.h"
|
||||
|
||||
static std::mutex diagmutex;
|
||||
|
||||
class IdxDiags::Internal {
|
||||
public:
|
||||
~Internal() {
|
||||
if (fp) {
|
||||
fclose(fp);
|
||||
}
|
||||
}
|
||||
FILE *fp{nullptr};
|
||||
};
|
||||
|
||||
IdxDiags::IdxDiags()
|
||||
{
|
||||
m = new Internal;
|
||||
}
|
||||
|
||||
IdxDiags::~IdxDiags()
|
||||
{
|
||||
delete m;
|
||||
}
|
||||
|
||||
bool IdxDiags::flush()
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(diagmutex);
|
||||
if (m && m->fp) {
|
||||
return fflush(m->fp) ? false : true;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static IdxDiags *theInstance;
|
||||
|
||||
IdxDiags& IdxDiags::theDiags()
|
||||
{
|
||||
if (nullptr == theInstance) {
|
||||
theInstance = new IdxDiags;
|
||||
}
|
||||
return *theInstance;
|
||||
}
|
||||
|
||||
bool IdxDiags::init(const std::string& outpath)
|
||||
{
|
||||
m->fp = fopen(outpath.c_str(), "w");
|
||||
if (nullptr == m->fp) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool IdxDiags::record(DiagKind diag, const std::string& path, const std::string& detail)
|
||||
{
|
||||
if (nullptr == m || nullptr == m->fp || (path.empty() && detail.empty())) {
|
||||
return true;
|
||||
}
|
||||
const char *skind = "Unknown";
|
||||
switch (diag) {
|
||||
case Ok: skind = "Ok";break;
|
||||
case Skipped: skind = "Skipped";break;
|
||||
case NoContentSuffix: skind = "NoContentSuffix";break;
|
||||
case MissingHelper: skind = "MissingHelper";break;
|
||||
case Error: skind = "Error";break;
|
||||
case NoHandler: skind = "NoHandler";break;
|
||||
case ExcludedMime: skind = "ExcludedMime";break;
|
||||
case NotIncludedMime: skind = "NotIncludedMime";break;
|
||||
}
|
||||
|
||||
std::unique_lock<std::mutex> lock(diagmutex);
|
||||
fprintf(m->fp, "%s %s | %s\n", skind, path.c_str(), detail.c_str());
|
||||
return true;
|
||||
}
|
||||
50
src/index/idxdiags.h
Normal file
50
src/index/idxdiags.h
Normal file
@ -0,0 +1,50 @@
|
||||
/* Copyright (C) 2021 J.F.Dockes
|
||||
*
|
||||
* License: GPL 2.1
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2.1 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#ifndef _IDXDIAGS_H_INCLUDED_
|
||||
#define _IDXDIAGS_H_INCLUDED_
|
||||
|
||||
#include <string>
|
||||
|
||||
class IdxDiags {
|
||||
public:
|
||||
enum DiagKind {Ok, Skipped, NoContentSuffix, MissingHelper, Error, NoHandler,
|
||||
ExcludedMime, NotIncludedMime};
|
||||
|
||||
// Retrieve a reference to the single instance.
|
||||
static IdxDiags& theDiags();
|
||||
|
||||
// Initialize, setting the output file path. outpath will be truncated.
|
||||
// No locking: this must be called from the main thread, before going multithread.
|
||||
// If init is never called, further calls to record() or flush() will be noops.
|
||||
bool init(const std::string& outpath);
|
||||
|
||||
// Record a reason for a document not to be indexed.
|
||||
bool record(DiagKind diag, const std::string& path, const std::string& detail = std::string());
|
||||
bool flush();
|
||||
|
||||
class Internal;
|
||||
private:
|
||||
Internal *m;
|
||||
IdxDiags();
|
||||
~IdxDiags();
|
||||
};
|
||||
|
||||
#endif /* _IDXDIAGS_H_INCLUDED_ */
|
||||
@ -66,20 +66,16 @@ static RclMonitor *makeMonitor();
|
||||
*/
|
||||
class WalkCB : public FsTreeWalkerCB {
|
||||
public:
|
||||
WalkCB(RclConfig *conf, RclMonitor *mon, RclMonEventQueue *queue,
|
||||
FsTreeWalker& walker)
|
||||
: m_config(conf), m_mon(mon), m_queue(queue), m_walker(walker)
|
||||
{}
|
||||
WalkCB(RclConfig *conf, RclMonitor *mon, RclMonEventQueue *queue, FsTreeWalker& walker)
|
||||
: m_config(conf), m_mon(mon), m_queue(queue), m_walker(walker) {}
|
||||
virtual ~WalkCB() {}
|
||||
|
||||
virtual FsTreeWalker::Status
|
||||
processone(const string &fn, const struct PathStat *st,
|
||||
FsTreeWalker::CbFlag flg) {
|
||||
virtual FsTreeWalker::Status processone(
|
||||
const string &fn, const struct PathStat *st, FsTreeWalker::CbFlag flg) {
|
||||
MONDEB("rclMonRcvRun: processone " << fn << " m_mon " << m_mon <<
|
||||
" m_mon->ok " << (m_mon ? m_mon->ok() : false) << std::endl);
|
||||
|
||||
if (flg == FsTreeWalker::FtwDirEnter ||
|
||||
flg == FsTreeWalker::FtwDirReturn) {
|
||||
if (flg == FsTreeWalker::FtwDirEnter || flg == FsTreeWalker::FtwDirReturn) {
|
||||
m_config->setKeyDir(fn);
|
||||
// Set up skipped patterns for this subtree.
|
||||
m_walker.setSkippedNames(m_config->getSkippedNames());
|
||||
@ -106,8 +102,7 @@ public:
|
||||
m_mon->saved_errno != ENOENT)
|
||||
return FsTreeWalker::FtwError;
|
||||
}
|
||||
} else if (!m_mon->generatesExist() &&
|
||||
flg == FsTreeWalker::FtwRegular) {
|
||||
} else if (!m_mon->generatesExist() && flg == FsTreeWalker::FtwRegular) {
|
||||
// Have to synthetize events for regular files existence
|
||||
// at startup because the monitor does not do it
|
||||
// Note 2011-09-29: no sure this is actually needed. We just ran
|
||||
|
||||
@ -62,6 +62,7 @@ using namespace std;
|
||||
#include "checkretryfailed.h"
|
||||
#include "idxstatus.h"
|
||||
#include "circache.h"
|
||||
#include "idxdiags.h"
|
||||
|
||||
// Command line options
|
||||
static int op_flags;
|
||||
@ -93,11 +94,13 @@ static int op_flags;
|
||||
#define OPTVAL_WEBCACHE_COMPACT 1000
|
||||
#define OPTVAL_WEBCACHE_BURST 1001
|
||||
#define OPTVAL_DIAGS_NOTINDEXED 1002
|
||||
#define OPTVAL_DIAGS_DIAGFILE 1003
|
||||
|
||||
static struct option long_options[] = {
|
||||
{"webcache-compact", 0, 0, OPTVAL_WEBCACHE_COMPACT},
|
||||
{"webcache-burst", required_argument, 0, OPTVAL_WEBCACHE_BURST},
|
||||
{"notindexed", 0, 0, OPTVAL_DIAGS_NOTINDEXED},
|
||||
{"diagfile", required_argument, 0, OPTVAL_DIAGS_DIAGFILE},
|
||||
{0, 0, 0, 0}
|
||||
};
|
||||
|
||||
@ -110,6 +113,7 @@ static ConfIndexer *confindexer;
|
||||
static void cleanup()
|
||||
{
|
||||
deleteZ(confindexer);
|
||||
IdxDiags::theDiags().flush();
|
||||
recoll_exitready();
|
||||
}
|
||||
|
||||
@ -274,20 +278,15 @@ static void setMyPriority(const RclConfig *config)
|
||||
class MakeListWalkerCB : public FsTreeWalkerCB {
|
||||
public:
|
||||
MakeListWalkerCB(list<string>& files, const vector<string>& selpats)
|
||||
: m_files(files), m_pats(selpats)
|
||||
{
|
||||
}
|
||||
virtual FsTreeWalker::Status
|
||||
processone(const string& fn, const struct PathStat *,
|
||||
FsTreeWalker::CbFlag flg) {
|
||||
: m_files(files), m_pats(selpats) {}
|
||||
virtual FsTreeWalker::Status processone(
|
||||
const string& fn, const struct PathStat *, FsTreeWalker::CbFlag flg) {
|
||||
if (flg== FsTreeWalker::FtwDirEnter || flg == FsTreeWalker::FtwRegular){
|
||||
if (m_pats.empty()) {
|
||||
cerr << "Selecting " << fn << endl;
|
||||
m_files.push_back(fn);
|
||||
} else {
|
||||
for (vector<string>::const_iterator it = m_pats.begin();
|
||||
it != m_pats.end(); it++) {
|
||||
if (fnmatch(it->c_str(), fn.c_str(), 0) == 0) {
|
||||
for (const auto& pat : m_pats) {
|
||||
if (fnmatch(pat.c_str(), fn.c_str(), 0) == 0) {
|
||||
m_files.push_back(fn);
|
||||
break;
|
||||
}
|
||||
@ -451,6 +450,8 @@ static const char usage [] =
|
||||
" -Z : in place reset: consider all documents as changed. Can also\n"
|
||||
" be combined with -i or -r but not -m\n"
|
||||
" -k : retry files on which we previously failed\n"
|
||||
" --diagfile <outputpath> : list skipped or otherwise not indexed documents to <outputpath>\n"
|
||||
" <outputpath> will be truncated\n"
|
||||
#ifdef RCL_MONITOR
|
||||
"recollindex -m [-w <secs>] -x [-D] [-C]\n"
|
||||
" Perform real time indexing. Don't become a daemon if -D is set.\n"
|
||||
@ -636,6 +637,7 @@ int main(int argc, char *argv[])
|
||||
bool diags_notindexed{false};
|
||||
|
||||
std::string burstdir;
|
||||
std::string diagfile;
|
||||
while ((ret = getopt_long(argc, (char *const*)&args[0], "c:CDdEefhikKlmnPp:rR:sS:w:xZz",
|
||||
long_options, NULL)) != -1) {
|
||||
switch (ret) {
|
||||
@ -676,7 +678,7 @@ int main(int argc, char *argv[])
|
||||
case OPTVAL_WEBCACHE_COMPACT: webcache_compact = true; break;
|
||||
case OPTVAL_WEBCACHE_BURST: burstdir = optarg; webcache_burst = true;break;
|
||||
case OPTVAL_DIAGS_NOTINDEXED: diags_notindexed = true;break;
|
||||
|
||||
case OPTVAL_DIAGS_DIAGFILE: diagfile = optarg;break;
|
||||
default: Usage(); break;
|
||||
}
|
||||
}
|
||||
@ -790,6 +792,12 @@ int main(int argc, char *argv[])
|
||||
}
|
||||
}
|
||||
|
||||
if (!diagfile.empty()) {
|
||||
if (!IdxDiags::theDiags().init(diagfile)) {
|
||||
std::cerr << "Could not initialize diags file " << diagfile << "\n";
|
||||
LOGERR("recollindex: Could not initialize diags file " << diagfile << "\n");
|
||||
}
|
||||
}
|
||||
bool rezero((op_flags & OPT_z) != 0);
|
||||
bool inPlaceReset((op_flags & OPT_Z) != 0);
|
||||
|
||||
|
||||
@ -411,8 +411,7 @@ WebQueueIndexer::processone(
|
||||
if (flg != FsTreeWalker::FtwRegular)
|
||||
return FsTreeWalker::FtwOk;
|
||||
|
||||
string dotpath = path_cat(path_getfather(path),
|
||||
string(DOTFILEPREFIX) + path_getsimple(path));
|
||||
string dotpath = path_cat(path_getfather(path), string(DOTFILEPREFIX) + path_getsimple(path));
|
||||
LOGDEB("WebQueueIndexer: prc1: [" << path << "]\n");
|
||||
|
||||
WebQueueDotFile dotfile(m_config, dotpath);
|
||||
|
||||
@ -240,17 +240,15 @@ void FileInterner::init(const string &f, const struct PathStat *stp,
|
||||
m_mimetype = l_mime;
|
||||
|
||||
// Look for appropriate handler (might still return empty)
|
||||
RecollFilter *df = getMimeHandler(l_mime, m_cfg, !m_forPreview);
|
||||
RecollFilter *df = getMimeHandler(l_mime, m_cfg, !m_forPreview, f);
|
||||
|
||||
if (!df || df->is_unknown()) {
|
||||
// No real handler for this type, for now :(
|
||||
LOGDEB("FileInterner:: unprocessed mime: [" << l_mime << "] [" << f <<
|
||||
"]\n");
|
||||
LOGDEB("FileInterner:: unprocessed mime: [" << l_mime << "] [" << f << "]\n");
|
||||
if (!df)
|
||||
return;
|
||||
}
|
||||
df->set_property(Dijon::Filter::OPERATING_MODE,
|
||||
m_forPreview ? "view" : "index");
|
||||
df->set_property(Dijon::Filter::OPERATING_MODE, m_forPreview ? "view" : "index");
|
||||
df->set_property(Dijon::Filter::DJF_UDI, udi);
|
||||
|
||||
df->set_docsize(docsize);
|
||||
@ -271,8 +269,7 @@ FileInterner::FileInterner(const string &data, RclConfig *cnf,
|
||||
init(data, cnf, flags, imime);
|
||||
}
|
||||
|
||||
void FileInterner::init(const string &data, RclConfig *,
|
||||
int, const string& imime)
|
||||
void FileInterner::init(const string &data, RclConfig *, int, const string& imime)
|
||||
{
|
||||
if (imime.empty()) {
|
||||
LOGERR("FileInterner: inmemory constructor needs input mime type\n");
|
||||
@ -281,7 +278,7 @@ void FileInterner::init(const string &data, RclConfig *,
|
||||
m_mimetype = imime;
|
||||
|
||||
// Look for appropriate handler (might still return empty)
|
||||
RecollFilter *df = getMimeHandler(m_mimetype, m_cfg, !m_forPreview);
|
||||
RecollFilter *df = getMimeHandler(m_mimetype, m_cfg, !m_forPreview, m_fn);
|
||||
|
||||
if (!df) {
|
||||
// No handler for this type, for now :( if indexallfilenames
|
||||
@ -289,8 +286,7 @@ void FileInterner::init(const string &data, RclConfig *,
|
||||
LOGDEB("FileInterner:: unprocessed mime [" << m_mimetype << "]\n");
|
||||
return;
|
||||
}
|
||||
df->set_property(Dijon::Filter::OPERATING_MODE,
|
||||
m_forPreview ? "view" : "index");
|
||||
df->set_property(Dijon::Filter::OPERATING_MODE, m_forPreview ? "view" : "index");
|
||||
|
||||
df->set_docsize(data.length());
|
||||
if (df->is_data_input_ok(Dijon::Filter::DOCUMENT_STRING)) {
|
||||
@ -741,12 +737,11 @@ int FileInterner::addHandler()
|
||||
getKeyValue(docdata, cstr_dj_keyipath, ipathel);
|
||||
bool dofilter = !m_forPreview &&
|
||||
(mimetype.compare(cstr_texthtml) || !ipathel.empty());
|
||||
RecollFilter *newflt = getMimeHandler(mimetype, m_cfg, dofilter);
|
||||
RecollFilter *newflt = getMimeHandler(mimetype, m_cfg, dofilter, m_fn);
|
||||
if (!newflt) {
|
||||
// If we can't find a handler, this doc can't be handled
|
||||
// but there can be other ones so we go on
|
||||
LOGINFO("FileInterner::addHandler: no filter for [" << mimetype <<
|
||||
"]\n");
|
||||
LOGINFO("FileInterner::addHandler: no filter for [" << mimetype << "]\n");
|
||||
return ADD_CONTINUE;
|
||||
}
|
||||
newflt->set_property(Dijon::Filter::OPERATING_MODE,
|
||||
|
||||
@ -29,6 +29,7 @@
|
||||
#include "smallut.h"
|
||||
#include "md5ut.h"
|
||||
#include "rclconfig.h"
|
||||
#include "idxdiags.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -186,6 +187,7 @@ bool MimeHandlerExec::next_document()
|
||||
missingHelper = true;
|
||||
m_reason = string("RECFILTERROR HELPERNOTFOUND ") + cmd;
|
||||
whatHelper = m_reason;
|
||||
IdxDiags::theDiags().record(IdxDiags::MissingHelper, m_fn);
|
||||
} else if (output.find("RECFILTERROR") == 0) {
|
||||
// If the output string begins with RECFILTERROR, then it's
|
||||
// interpretable error information out from a recoll script
|
||||
@ -193,6 +195,7 @@ bool MimeHandlerExec::next_document()
|
||||
std::string::size_type pos;
|
||||
if ((pos = output.find("RECFILTERROR ")) == 0) {
|
||||
if (output.find("HELPERNOTFOUND") != string::npos) {
|
||||
IdxDiags::theDiags().record(IdxDiags::MissingHelper, m_fn);
|
||||
missingHelper = true;
|
||||
whatHelper = output.substr(pos);
|
||||
}
|
||||
|
||||
@ -33,6 +33,7 @@
|
||||
#include "mimetype.h"
|
||||
#include "idfile.h"
|
||||
#include "rclutil.h"
|
||||
#include "idxdiags.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -72,6 +73,7 @@ bool MimeHandlerExecMultiple::startCmd()
|
||||
vector<string>myparams(params.begin() + 1, params.end());
|
||||
|
||||
if (m_cmd.startExec(cmd, myparams, 1, 1) < 0) {
|
||||
IdxDiags::theDiags().record(IdxDiags::MissingHelper, m_fn);
|
||||
m_reason = string("RECFILTERROR HELPERNOTFOUND ") + cmd;
|
||||
missingHelper = true;
|
||||
whatHelper = cmd;
|
||||
@ -113,6 +115,7 @@ bool MimeHandlerExecMultiple::readDataElement(string& name, string &data)
|
||||
if ((pos = ibuf.find("RECFILTERROR ")) == 0) {
|
||||
m_reason = ibuf;
|
||||
if (ibuf.find("HELPERNOTFOUND") != string::npos) {
|
||||
IdxDiags::theDiags().record(IdxDiags::MissingHelper, m_fn);
|
||||
missingHelper = true;
|
||||
whatHelper = ibuf.substr(pos);
|
||||
}
|
||||
|
||||
@ -256,8 +256,8 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs,
|
||||
}
|
||||
|
||||
/* Get handler/filter object for given mime type: */
|
||||
RecollFilter *getMimeHandler(const string &mtype, RclConfig *cfg,
|
||||
bool filtertypes)
|
||||
RecollFilter *getMimeHandler(const string &mtype, RclConfig *cfg,
|
||||
bool filtertypes, const std::string& fn)
|
||||
{
|
||||
LOGDEB("getMimeHandler: mtype [" << mtype << "] filtertypes " <<
|
||||
filtertypes << "\n");
|
||||
@ -270,7 +270,7 @@ RecollFilter *getMimeHandler(const string &mtype, RclConfig *cfg,
|
||||
// indexedmimetypes but an html handler could still be in the
|
||||
// cache because it was needed by some other interning stack).
|
||||
string hs;
|
||||
hs = cfg->getMimeHandlerDef(mtype, filtertypes);
|
||||
hs = cfg->getMimeHandlerDef(mtype, filtertypes, fn);
|
||||
string id;
|
||||
|
||||
if (!hs.empty()) {
|
||||
|
||||
@ -169,7 +169,7 @@ protected:
|
||||
* indexedmimetypes (if this is set at all).
|
||||
*/
|
||||
extern RecollFilter *getMimeHandler(const std::string &mtyp, RclConfig *cfg,
|
||||
bool filtertypes);
|
||||
bool filtertypes, const std::string& fn = std::string());
|
||||
|
||||
/// Free up filter for reuse (you can also delete it)
|
||||
extern void returnMimeHandler(RecollFilter *);
|
||||
|
||||
@ -2401,7 +2401,7 @@ bool Db::dbStats(DbStats& res, bool listfailed)
|
||||
try {
|
||||
Xapian::Document doc = xdb.get_document(docid);
|
||||
string sig = doc.get_value(VALUE_SIG);
|
||||
if (sig.empty() || sig[sig.size()-1] != '+') {
|
||||
if (sig.empty() || sig.back() != '+') {
|
||||
continue;
|
||||
}
|
||||
string data = doc.get_data();
|
||||
|
||||
@ -411,9 +411,7 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top,
|
||||
// Skipped file names match ?
|
||||
if (!data->skippedNames.empty()) {
|
||||
if (inSkippedNames(dname)) {
|
||||
if (data->options & FtwOnlySkipped) {
|
||||
cb.processone(path_cat(top, dname), nullptr, FtwSkipped);
|
||||
}
|
||||
cb.processone(path_cat(top, dname), nullptr, FtwSkipped);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
@ -428,9 +426,7 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top,
|
||||
// this was broken by 1.13.00 and the systematic use of
|
||||
// FNM_LEADING_DIR
|
||||
if (inSkippedPaths(fn, false)) {
|
||||
if (data->options & FtwOnlySkipped) {
|
||||
cb.processone(fn, nullptr, FtwSkipped);
|
||||
}
|
||||
cb.processone(fn, nullptr, FtwSkipped);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user