defined data access interface for external indexers

This commit is contained in:
Jean-Francois Dockes 2016-06-01 09:46:47 +02:00
parent 03063f58dc
commit 1aea57fcb2
7 changed files with 197 additions and 10 deletions

View File

@ -81,6 +81,8 @@ index/bglfetcher.cpp \
index/bglfetcher.h \
index/checkretryfailed.cpp \
index/checkretryfailed.h \
index/exefetcher.cpp \
index/exefetcher.h \
index/fetcher.cpp \
index/fetcher.h \
index/fsfetcher.cpp \

129
src/index/exefetcher.cpp Normal file
View File

@ -0,0 +1,129 @@
/* Copyright (C) 2016 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include "autoconfig.h"
#include <string>
#include <vector>
#include <iostream>
#include "exefetcher.h"
#include "debuglog.h"
#include "pathut.h"
#include "rclconfig.h"
#include "execmd.h"
#include "rcldoc.h"
using namespace std;
class EXEDocFetcher::Internal {
public:
string bckid;
vector<string> sfetch;
vector<string> smkid;
bool docmd(const vector<string>& cmd, const Rcl::Doc& idoc, string& out) {
ExecCmd ecmd;
// We're always called for preview (or Open)
ecmd.putenv("RECOLL_FILTER_FORPREVIEW=yes");
string udi;
idoc.getmeta(Rcl::Doc::keyudi, &udi);
vector<string> args(cmd);
args.push_back(udi);
args.push_back(idoc.url);
args.push_back(idoc.ipath);
int status = ecmd.doexec1(args, 0, &out);
if (status == 0) {
LOGDEB(("EXEDocFetcher::Internal: got [%s]\n", out.c_str()));
return true;
} else {
LOGERR(("EXEDOcFetcher::fetch: %s: %s failed for %s %s %s 0x%u\n",
bckid.c_str(), stringsToString(cmd).c_str(), udi.c_str(),
idoc.url.c_str(), idoc.ipath.c_str()));
return false;
}
}
};
EXEDocFetcher::EXEDocFetcher(const EXEDocFetcher::Internal& _m)
{
m = new Internal(_m);
LOGDEB(("EXEDocFetcher::EXEDocFetcher: fetch is %s\n",
stringsToString(m->sfetch).c_str()));
}
bool EXEDocFetcher::fetch(RclConfig* cnf, const Rcl::Doc& idoc, RawDoc& out)
{
out.kind = RawDoc::RDK_DATADIRECT;
return m->docmd(m->sfetch, idoc, out.data);
}
bool EXEDocFetcher::makesig(RclConfig* cnf, const Rcl::Doc& idoc, string& sig)
{
return m->docmd(m->smkid, idoc, sig);
}
// Lookup bckid in the config and create an appropriate fetcher.
EXEDocFetcher *exeDocFetcherMake(RclConfig *config, const string& bckid)
{
EXEDocFetcher *fetcher = 0;
// The config we only read once, not gonna change.
static ConfSimple *bconf;
if (!bconf) {
string bconfname = path_cat(config->getConfDir(), "backends");
LOGDEB(("exeDocFetcherMake: using config in %s\n", bconfname.c_str()));
bconf = new ConfSimple(bconfname.c_str(), true);
if (!bconf->ok()) {
delete bconf;
bconf = 0;
LOGDEB(("exeDocFetcherMake: bad/no config: %s\n",
bconfname.c_str()));
return 0;
}
}
EXEDocFetcher::Internal m;
m.bckid = bckid;
string sfetch;
if (!bconf->get("fetch", sfetch, bckid) || sfetch.empty()) {
LOGERR(("exeDocFetcherMake: no 'fetch' for [%s]\n", bckid.c_str()));
return 0;
}
stringToStrings(sfetch, m.sfetch);
// We look up the command as we do for filters for now
m.sfetch[0] = config->findFilter(m.sfetch[0]);
if (!path_isabsolute(m.sfetch[0])) {
LOGERR(("exeDocFetcherMake: %s not found in exec path or filters dir\n",
m.sfetch[0].c_str()));
return 0;
}
string smkid;
if (!bconf->get("makesig", smkid, bckid) || smkid.empty()) {
LOGDEB(("exeDocFetcherMake: no 'makesig' for [%s]\n", bckid.c_str()));
return 0;
}
stringToStrings(smkid, m.smkid);
m.smkid[0] = config->findFilter(m.smkid[0]);
if (!path_isabsolute(m.smkid[0])) {
LOGERR(("exeDocFetcherMake: %s not found in exec path or filters dir\n",
m.smkid[0].c_str()));
return 0;
}
return new EXEDocFetcher(m);
}

44
src/index/exefetcher.h Normal file
View File

@ -0,0 +1,44 @@
/* Copyright (C) 2012 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifndef _EXEFETCHER_H_INCLUDED_
#define _EXEFETCHER_H_INCLUDED_
#include "fetcher.h"
class RclConfig;
/**
* A fetcher which works by executing external programs, defined in a
* configuration file:
*/
class EXEDocFetcher : public DocFetcher {
class Internal;
EXEDocFetcher(const Internal&);
virtual ~EXEDocFetcher() {}
virtual bool fetch(RclConfig* cnf, const Rcl::Doc& idoc, RawDoc& out);
/** Calls stat to retrieve file signature data */
virtual bool makesig(RclConfig* cnf, const Rcl::Doc& idoc, std::string& sig);
friend EXEDocFetcher *exeDocFetcherMake(RclConfig *, const std::string&);
private:
Internal *m;
};
// Lookup bckid in the config and create an appropriate fetcher.
EXEDocFetcher *exeDocFetcherMake(RclConfig *config, const std::string& bckid);
#endif /* _EXEFETCHER_H_INCLUDED_ */

View File

@ -18,12 +18,14 @@
#include "debuglog.h"
#include "rclconfig.h"
#include "fetcher.h"
#include "fsfetcher.h"
#include "bglfetcher.h"
#include "exefetcher.h"
DocFetcher *docFetcherMake(const Rcl::Doc& idoc)
DocFetcher *docFetcherMake(RclConfig *config, const Rcl::Doc& idoc)
{
if (idoc.url.empty()) {
LOGERR(("docFetcherMakeg:: no url in doc!\n"));
@ -38,7 +40,11 @@ DocFetcher *docFetcherMake(const Rcl::Doc& idoc)
return new BGLDocFetcher;
#endif
} else {
LOGERR(("DocFetcherFactory: unknown backend [%s]\n", backend.c_str()));
return 0;
DocFetcher *f = exeDocFetcherMake(config, backend);
if (!f) {
LOGERR(("DocFetcherFactory: unknown backend [%s]\n",
backend.c_str()));
}
return f;
}
}

View File

@ -42,7 +42,7 @@ public:
/** A RawDoc is the data for a document-holding entity either as a
memory block, or pointed to by a file name */
struct RawDoc {
enum RawDocKind {RDK_FILENAME, RDK_DATA};
enum RawDocKind {RDK_FILENAME, RDK_DATA, RDK_DATADIRECT};
RawDocKind kind;
std::string data; // Doc data or file name
struct stat st; // Only used if RDK_FILENAME
@ -71,6 +71,6 @@ public:
};
/** Return an appropriate fetcher object given the backend string identifier */
DocFetcher *docFetcherMake(const Rcl::Doc& idoc);
DocFetcher *docFetcherMake(RclConfig *config, const Rcl::Doc& idoc);
#endif /* _FETCHER_H_INCLUDED_ */

View File

@ -319,6 +319,7 @@ void FileInterner::initcommon(RclConfig *cnf, int flags)
m_tmpflgs[i] = false;
m_targetMType = cstr_textplain;
m_cfg->getConfParam("noxattrfields", &m_noxattrs);
m_direct = false;
}
FileInterner::FileInterner(const Rcl::Doc& idoc, RclConfig *cnf, int flags)
@ -327,7 +328,7 @@ FileInterner::FileInterner(const Rcl::Doc& idoc, RclConfig *cnf, int flags)
LOGDEB0(("FileInterner::FileInterner(idoc)\n"));
initcommon(cnf, flags);
DocFetcher *fetcher = docFetcherMake(idoc);
DocFetcher *fetcher = docFetcherMake(cnf, idoc);
if (fetcher == 0) {
LOGERR(("FileInterner:: no backend\n"));
return;
@ -344,6 +345,10 @@ FileInterner::FileInterner(const Rcl::Doc& idoc, RclConfig *cnf, int flags)
case DocFetcher::RawDoc::RDK_DATA:
init(rawdoc.data, cnf, flags, idoc.mimetype);
break;
case DocFetcher::RawDoc::RDK_DATADIRECT:
init(rawdoc.data, cnf, flags, idoc.mimetype);
m_direct = true;
break;
default:
LOGERR(("FileInterner::FileInterner(idoc): bad rawdoc kind ??\n"));
}
@ -352,7 +357,7 @@ FileInterner::FileInterner(const Rcl::Doc& idoc, RclConfig *cnf, int flags)
bool FileInterner::makesig(RclConfig *cnf, const Rcl::Doc& idoc, string& sig)
{
DocFetcher *fetcher = docFetcherMake(idoc);
DocFetcher *fetcher = docFetcherMake(cnf, idoc);
if (fetcher == 0) {
LOGERR(("FileInterner::makesig no backend for doc\n"));
return false;
@ -763,7 +768,7 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, const string& ipath
// We set the ipath for the first handler here, others are set
// when they're pushed on the stack
vector<string> vipath;
if (!ipath.empty()) {
if (!ipath.empty() && !m_direct) {
vector<string> lipath;
stringToTokens(ipath, lipath, cstr_isep, true);
for (vector<string>::iterator it = lipath.begin();
@ -946,7 +951,7 @@ bool FileInterner::idocToFile(TempFile& otemp, const string& tofile,
bool FileInterner::topdocToFile(TempFile& otemp, const string& tofile,
RclConfig *cnf, const Rcl::Doc& idoc)
{
DocFetcher *fetcher = docFetcherMake(idoc);
DocFetcher *fetcher = docFetcherMake(cnf, idoc);
if (fetcher == 0) {
LOGERR(("FileInterner::idocToFile no backend\n"));
return false;

View File

@ -275,7 +275,8 @@ class FileInterner {
Uncomp m_uncomp;
bool m_noxattrs; // disable xattrs usage
bool m_direct; // External app did the extraction
// Pseudo-constructors
void init(const string &fn, const struct stat *stp,
RclConfig *cnf, int flags, const string *mtype = 0);