Disable filters with missing helpers for the whole indexing pass

This commit is contained in:
dockes 2008-10-06 06:22:47 +00:00
parent 9082f3bf65
commit 0a04919f5a
4 changed files with 52 additions and 16 deletions

View File

@ -1,5 +1,5 @@
#ifndef lint #ifndef lint
static char rcsid[] = "@(#$Id: mh_exec.cpp,v 1.12 2008-10-04 14:26:59 dockes Exp $ (C) 2005 J.F.Dockes"; static char rcsid[] = "@(#$Id: mh_exec.cpp,v 1.13 2008-10-06 06:22:46 dockes Exp $ (C) 2005 J.F.Dockes";
#endif #endif
/* /*
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
@ -23,6 +23,10 @@ static char rcsid[] = "@(#$Id: mh_exec.cpp,v 1.12 2008-10-04 14:26:59 dockes Exp
#include "mh_html.h" #include "mh_html.h"
#include "debuglog.h" #include "debuglog.h"
#include "cancelcheck.h" #include "cancelcheck.h"
#include "smallut.h"
#include <sys/types.h>
#include <sys/wait.h>
#ifndef NO_NAMESPACES #ifndef NO_NAMESPACES
using namespace std; using namespace std;
@ -42,6 +46,10 @@ bool MimeHandlerExec::next_document()
if (m_havedoc == false) if (m_havedoc == false)
return false; return false;
m_havedoc = false; m_havedoc = false;
if (missingHelper) {
LOGDEB(("MimeHandlerExec::next_document(): helper known missing\n"));
return false;
}
if (params.empty()) { if (params.empty()) {
// Hu ho // Hu ho
LOGERR(("MimeHandlerExec::mkDoc: empty params\n")); LOGERR(("MimeHandlerExec::mkDoc: empty params\n"));
@ -59,7 +67,7 @@ bool MimeHandlerExec::next_document()
if (!m_ipath.empty()) if (!m_ipath.empty())
myparams.push_back(m_ipath); myparams.push_back(m_ipath);
// Execute command and store the result text // Execute command, store the output
string& output = m_metaData["content"]; string& output = m_metaData["content"];
output.erase(); output.erase();
ExecCmd mexec; ExecCmd mexec;
@ -68,15 +76,37 @@ bool MimeHandlerExec::next_document()
mexec.putenv(m_forPreview ? "RECOLL_FILTER_FORPREVIEW=yes" : mexec.putenv(m_forPreview ? "RECOLL_FILTER_FORPREVIEW=yes" :
"RECOLL_FILTER_FORPREVIEW=no"); "RECOLL_FILTER_FORPREVIEW=no");
int status = mexec.doexec(cmd, myparams, 0, &output); int status = mexec.doexec(cmd, myparams, 0, &output);
if (status) { if (status) {
LOGERR(("MimeHandlerExec: command status 0x%x: %s\n", LOGERR(("MimeHandlerExec: command status 0x%x for %s\n",
status, cmd.c_str())); status, cmd.c_str()));
// If the output string begins with RECFILTERROR, then it's if (WIFEXITED(status) && WEXITSTATUS(status) == 127) {
// interpretable error information // That's how execmd signals a failed exec (most probably
if (output.find("RECFILTERROR") == 0) // a missing command). Let'hope no filter uses the same value as
// an exit status... Disable myself permanently and signal the
// missing cmd.
missingHelper = true;
m_reason = string("RECFILTERROR HELPERNOTFOUND ") + cmd;
} else if (output.find("RECFILTERROR") == 0) {
// If the output string begins with RECFILTERROR, then it's
// interpretable error information out from a recoll script
m_reason = output; m_reason = output;
list<string> lerr;
stringToStrings(output, lerr);
if (lerr.size() > 2) {
list<string>::iterator it = lerr.begin();
it++;
if (*it == "HELPERNOTFOUND") {
// No use trying again and again to execute this filter,
// it won't work.
missingHelper = true;
}
}
}
return false; return false;
} }
// Success. Store some external metadata
m_metaData["origcharset"] = m_defcharset; m_metaData["origcharset"] = m_defcharset;
// Default charset: all recoll filters output utf-8, but this // Default charset: all recoll filters output utf-8, but this
// could still be overridden by the content-type meta tag. // could still be overridden by the content-type meta tag.

View File

@ -16,7 +16,7 @@
*/ */
#ifndef _MH_EXEC_H_INCLUDED_ #ifndef _MH_EXEC_H_INCLUDED_
#define _MH_EXEC_H_INCLUDED_ #define _MH_EXEC_H_INCLUDED_
/* @(#$Id: mh_exec.h,v 1.7 2008-10-04 14:26:59 dockes Exp $ (C) 2004 J.F.Dockes */ /* @(#$Id: mh_exec.h,v 1.8 2008-10-06 06:22:46 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string> #include <string>
#include <list> #include <list>
@ -33,8 +33,10 @@ using std::string;
*/ */
class MimeHandlerExec : public RecollFilter { class MimeHandlerExec : public RecollFilter {
public: public:
// params, cfgMtype and chgCharset do not get reset by // Members not reset by clear(). params, cfgMtype and chgCharset
// clear(). They define what I am // actually define what I am. missingHelper is a permanent error
// (no use to try and execute over and over something that's not
// here).
list<string> params; list<string> params;
// The defaults for external filters is to output html except if defined // The defaults for external filters is to output html except if defined
// otherwise in the config. // otherwise in the config.
@ -42,8 +44,10 @@ class MimeHandlerExec : public RecollFilter {
// For ext programs which don't output html, the output charset // For ext programs which don't output html, the output charset
// has to be known: ie they have a --charset utf-8 like option. // has to be known: ie they have a --charset utf-8 like option.
string cfgCharset; string cfgCharset;
bool missingHelper;
MimeHandlerExec(const string& mt) : RecollFilter(mt) {} MimeHandlerExec(const string& mt) : RecollFilter(mt), missingHelper(false)
{}
virtual ~MimeHandlerExec() {} virtual ~MimeHandlerExec() {}
virtual bool set_document_file(const string &file_path) { virtual bool set_document_file(const string &file_path) {
m_fn = file_path; m_fn = file_path;

View File

@ -1,5 +1,5 @@
#ifndef lint #ifndef lint
static char rcsid[] = "@(#$Id: mimehandler.cpp,v 1.23 2008-10-04 14:26:59 dockes Exp $ (C) 2004 J.F.Dockes"; static char rcsid[] = "@(#$Id: mimehandler.cpp,v 1.24 2008-10-06 06:22:46 dockes Exp $ (C) 2004 J.F.Dockes";
#endif #endif
/* /*
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
@ -35,8 +35,9 @@ using namespace std;
#include "mh_text.h" #include "mh_text.h"
#include "mh_unknown.h" #include "mh_unknown.h"
// Pool of already known and created handlers // Pool of already known and created handlers. There can be several instance
static map<string, Dijon::Filter*> o_handlers; // for a given mime type (think email attachment in email message)
static multimap<string, Dijon::Filter*> o_handlers;
/** Create internal handler object appropriate for given mime type */ /** Create internal handler object appropriate for given mime type */
static Dijon::Filter *mhFactory(const string &mime) static Dijon::Filter *mhFactory(const string &mime)
@ -127,9 +128,10 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs)
/* Return mime handler to pool */ /* Return mime handler to pool */
void returnMimeHandler(Dijon::Filter *handler) void returnMimeHandler(Dijon::Filter *handler)
{ {
typedef multimap<string, Dijon::Filter*>::value_type value_type;
if (handler) { if (handler) {
handler->clear(); handler->clear();
o_handlers[handler->get_mime_type()] = handler; o_handlers.insert(value_type(handler->get_mime_type(), handler));
} }
} }

View File

@ -1,5 +1,5 @@
#ifndef lint #ifndef lint
static char rcsid[] = "@(#$Id: execmd.cpp,v 1.26 2007-11-08 09:34:40 dockes Exp $ (C) 2004 J.F.Dockes"; static char rcsid[] = "@(#$Id: execmd.cpp,v 1.27 2008-10-06 06:22:47 dockes Exp $ (C) 2004 J.F.Dockes";
#endif #endif
/* /*
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
@ -374,7 +374,7 @@ int ExecCmd::doexec(const string &cmd, const list<string>& args,
// Hu ho // Hu ho
LOGERR(("ExecCmd::doexec: execvp(%s) failed. errno %d\n", cmd.c_str(), LOGERR(("ExecCmd::doexec: execvp(%s) failed. errno %d\n", cmd.c_str(),
errno)); errno));
_exit(128); _exit(127);
} }
/* This cant be reached: to make cc happy */ /* This cant be reached: to make cc happy */
return -1; return -1;