defined data access interface for external indexers
This commit is contained in:
parent
03063f58dc
commit
1aea57fcb2
@ -81,6 +81,8 @@ index/bglfetcher.cpp \
|
||||
index/bglfetcher.h \
|
||||
index/checkretryfailed.cpp \
|
||||
index/checkretryfailed.h \
|
||||
index/exefetcher.cpp \
|
||||
index/exefetcher.h \
|
||||
index/fetcher.cpp \
|
||||
index/fetcher.h \
|
||||
index/fsfetcher.cpp \
|
||||
|
||||
129
src/index/exefetcher.cpp
Normal file
129
src/index/exefetcher.cpp
Normal file
@ -0,0 +1,129 @@
|
||||
/* Copyright (C) 2016 J.F.Dockes
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc.,
|
||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
#include "autoconfig.h"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
|
||||
#include "exefetcher.h"
|
||||
|
||||
#include "debuglog.h"
|
||||
#include "pathut.h"
|
||||
#include "rclconfig.h"
|
||||
#include "execmd.h"
|
||||
#include "rcldoc.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
class EXEDocFetcher::Internal {
|
||||
public:
|
||||
string bckid;
|
||||
vector<string> sfetch;
|
||||
vector<string> smkid;
|
||||
bool docmd(const vector<string>& cmd, const Rcl::Doc& idoc, string& out) {
|
||||
ExecCmd ecmd;
|
||||
// We're always called for preview (or Open)
|
||||
ecmd.putenv("RECOLL_FILTER_FORPREVIEW=yes");
|
||||
string udi;
|
||||
idoc.getmeta(Rcl::Doc::keyudi, &udi);
|
||||
vector<string> args(cmd);
|
||||
args.push_back(udi);
|
||||
args.push_back(idoc.url);
|
||||
args.push_back(idoc.ipath);
|
||||
int status = ecmd.doexec1(args, 0, &out);
|
||||
if (status == 0) {
|
||||
LOGDEB(("EXEDocFetcher::Internal: got [%s]\n", out.c_str()));
|
||||
return true;
|
||||
} else {
|
||||
LOGERR(("EXEDOcFetcher::fetch: %s: %s failed for %s %s %s 0x%u\n",
|
||||
bckid.c_str(), stringsToString(cmd).c_str(), udi.c_str(),
|
||||
idoc.url.c_str(), idoc.ipath.c_str()));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
EXEDocFetcher::EXEDocFetcher(const EXEDocFetcher::Internal& _m)
|
||||
{
|
||||
m = new Internal(_m);
|
||||
LOGDEB(("EXEDocFetcher::EXEDocFetcher: fetch is %s\n",
|
||||
stringsToString(m->sfetch).c_str()));
|
||||
}
|
||||
|
||||
bool EXEDocFetcher::fetch(RclConfig* cnf, const Rcl::Doc& idoc, RawDoc& out)
|
||||
{
|
||||
out.kind = RawDoc::RDK_DATADIRECT;
|
||||
return m->docmd(m->sfetch, idoc, out.data);
|
||||
}
|
||||
|
||||
bool EXEDocFetcher::makesig(RclConfig* cnf, const Rcl::Doc& idoc, string& sig)
|
||||
{
|
||||
return m->docmd(m->smkid, idoc, sig);
|
||||
}
|
||||
|
||||
// Lookup bckid in the config and create an appropriate fetcher.
|
||||
EXEDocFetcher *exeDocFetcherMake(RclConfig *config, const string& bckid)
|
||||
{
|
||||
EXEDocFetcher *fetcher = 0;
|
||||
|
||||
// The config we only read once, not gonna change.
|
||||
static ConfSimple *bconf;
|
||||
if (!bconf) {
|
||||
string bconfname = path_cat(config->getConfDir(), "backends");
|
||||
LOGDEB(("exeDocFetcherMake: using config in %s\n", bconfname.c_str()));
|
||||
bconf = new ConfSimple(bconfname.c_str(), true);
|
||||
if (!bconf->ok()) {
|
||||
delete bconf;
|
||||
bconf = 0;
|
||||
LOGDEB(("exeDocFetcherMake: bad/no config: %s\n",
|
||||
bconfname.c_str()));
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
EXEDocFetcher::Internal m;
|
||||
m.bckid = bckid;
|
||||
|
||||
string sfetch;
|
||||
if (!bconf->get("fetch", sfetch, bckid) || sfetch.empty()) {
|
||||
LOGERR(("exeDocFetcherMake: no 'fetch' for [%s]\n", bckid.c_str()));
|
||||
return 0;
|
||||
}
|
||||
stringToStrings(sfetch, m.sfetch);
|
||||
// We look up the command as we do for filters for now
|
||||
m.sfetch[0] = config->findFilter(m.sfetch[0]);
|
||||
if (!path_isabsolute(m.sfetch[0])) {
|
||||
LOGERR(("exeDocFetcherMake: %s not found in exec path or filters dir\n",
|
||||
m.sfetch[0].c_str()));
|
||||
return 0;
|
||||
}
|
||||
|
||||
string smkid;
|
||||
if (!bconf->get("makesig", smkid, bckid) || smkid.empty()) {
|
||||
LOGDEB(("exeDocFetcherMake: no 'makesig' for [%s]\n", bckid.c_str()));
|
||||
return 0;
|
||||
}
|
||||
stringToStrings(smkid, m.smkid);
|
||||
m.smkid[0] = config->findFilter(m.smkid[0]);
|
||||
if (!path_isabsolute(m.smkid[0])) {
|
||||
LOGERR(("exeDocFetcherMake: %s not found in exec path or filters dir\n",
|
||||
m.smkid[0].c_str()));
|
||||
return 0;
|
||||
}
|
||||
return new EXEDocFetcher(m);
|
||||
}
|
||||
44
src/index/exefetcher.h
Normal file
44
src/index/exefetcher.h
Normal file
@ -0,0 +1,44 @@
|
||||
/* Copyright (C) 2012 J.F.Dockes
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc.,
|
||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
#ifndef _EXEFETCHER_H_INCLUDED_
|
||||
#define _EXEFETCHER_H_INCLUDED_
|
||||
|
||||
#include "fetcher.h"
|
||||
|
||||
class RclConfig;
|
||||
|
||||
/**
|
||||
* A fetcher which works by executing external programs, defined in a
|
||||
* configuration file:
|
||||
*/
|
||||
class EXEDocFetcher : public DocFetcher {
|
||||
class Internal;
|
||||
EXEDocFetcher(const Internal&);
|
||||
virtual ~EXEDocFetcher() {}
|
||||
|
||||
virtual bool fetch(RclConfig* cnf, const Rcl::Doc& idoc, RawDoc& out);
|
||||
/** Calls stat to retrieve file signature data */
|
||||
virtual bool makesig(RclConfig* cnf, const Rcl::Doc& idoc, std::string& sig);
|
||||
friend EXEDocFetcher *exeDocFetcherMake(RclConfig *, const std::string&);
|
||||
private:
|
||||
Internal *m;
|
||||
};
|
||||
|
||||
// Lookup bckid in the config and create an appropriate fetcher.
|
||||
EXEDocFetcher *exeDocFetcherMake(RclConfig *config, const std::string& bckid);
|
||||
|
||||
#endif /* _EXEFETCHER_H_INCLUDED_ */
|
||||
@ -18,12 +18,14 @@
|
||||
|
||||
|
||||
#include "debuglog.h"
|
||||
#include "rclconfig.h"
|
||||
|
||||
#include "fetcher.h"
|
||||
#include "fsfetcher.h"
|
||||
#include "bglfetcher.h"
|
||||
#include "exefetcher.h"
|
||||
|
||||
DocFetcher *docFetcherMake(const Rcl::Doc& idoc)
|
||||
DocFetcher *docFetcherMake(RclConfig *config, const Rcl::Doc& idoc)
|
||||
{
|
||||
if (idoc.url.empty()) {
|
||||
LOGERR(("docFetcherMakeg:: no url in doc!\n"));
|
||||
@ -38,7 +40,11 @@ DocFetcher *docFetcherMake(const Rcl::Doc& idoc)
|
||||
return new BGLDocFetcher;
|
||||
#endif
|
||||
} else {
|
||||
LOGERR(("DocFetcherFactory: unknown backend [%s]\n", backend.c_str()));
|
||||
return 0;
|
||||
DocFetcher *f = exeDocFetcherMake(config, backend);
|
||||
if (!f) {
|
||||
LOGERR(("DocFetcherFactory: unknown backend [%s]\n",
|
||||
backend.c_str()));
|
||||
}
|
||||
return f;
|
||||
}
|
||||
}
|
||||
|
||||
@ -42,7 +42,7 @@ public:
|
||||
/** A RawDoc is the data for a document-holding entity either as a
|
||||
memory block, or pointed to by a file name */
|
||||
struct RawDoc {
|
||||
enum RawDocKind {RDK_FILENAME, RDK_DATA};
|
||||
enum RawDocKind {RDK_FILENAME, RDK_DATA, RDK_DATADIRECT};
|
||||
RawDocKind kind;
|
||||
std::string data; // Doc data or file name
|
||||
struct stat st; // Only used if RDK_FILENAME
|
||||
@ -71,6 +71,6 @@ public:
|
||||
};
|
||||
|
||||
/** Return an appropriate fetcher object given the backend string identifier */
|
||||
DocFetcher *docFetcherMake(const Rcl::Doc& idoc);
|
||||
DocFetcher *docFetcherMake(RclConfig *config, const Rcl::Doc& idoc);
|
||||
|
||||
#endif /* _FETCHER_H_INCLUDED_ */
|
||||
|
||||
@ -319,6 +319,7 @@ void FileInterner::initcommon(RclConfig *cnf, int flags)
|
||||
m_tmpflgs[i] = false;
|
||||
m_targetMType = cstr_textplain;
|
||||
m_cfg->getConfParam("noxattrfields", &m_noxattrs);
|
||||
m_direct = false;
|
||||
}
|
||||
|
||||
FileInterner::FileInterner(const Rcl::Doc& idoc, RclConfig *cnf, int flags)
|
||||
@ -327,7 +328,7 @@ FileInterner::FileInterner(const Rcl::Doc& idoc, RclConfig *cnf, int flags)
|
||||
LOGDEB0(("FileInterner::FileInterner(idoc)\n"));
|
||||
initcommon(cnf, flags);
|
||||
|
||||
DocFetcher *fetcher = docFetcherMake(idoc);
|
||||
DocFetcher *fetcher = docFetcherMake(cnf, idoc);
|
||||
if (fetcher == 0) {
|
||||
LOGERR(("FileInterner:: no backend\n"));
|
||||
return;
|
||||
@ -344,6 +345,10 @@ FileInterner::FileInterner(const Rcl::Doc& idoc, RclConfig *cnf, int flags)
|
||||
case DocFetcher::RawDoc::RDK_DATA:
|
||||
init(rawdoc.data, cnf, flags, idoc.mimetype);
|
||||
break;
|
||||
case DocFetcher::RawDoc::RDK_DATADIRECT:
|
||||
init(rawdoc.data, cnf, flags, idoc.mimetype);
|
||||
m_direct = true;
|
||||
break;
|
||||
default:
|
||||
LOGERR(("FileInterner::FileInterner(idoc): bad rawdoc kind ??\n"));
|
||||
}
|
||||
@ -352,7 +357,7 @@ FileInterner::FileInterner(const Rcl::Doc& idoc, RclConfig *cnf, int flags)
|
||||
|
||||
bool FileInterner::makesig(RclConfig *cnf, const Rcl::Doc& idoc, string& sig)
|
||||
{
|
||||
DocFetcher *fetcher = docFetcherMake(idoc);
|
||||
DocFetcher *fetcher = docFetcherMake(cnf, idoc);
|
||||
if (fetcher == 0) {
|
||||
LOGERR(("FileInterner::makesig no backend for doc\n"));
|
||||
return false;
|
||||
@ -763,7 +768,7 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, const string& ipath
|
||||
// We set the ipath for the first handler here, others are set
|
||||
// when they're pushed on the stack
|
||||
vector<string> vipath;
|
||||
if (!ipath.empty()) {
|
||||
if (!ipath.empty() && !m_direct) {
|
||||
vector<string> lipath;
|
||||
stringToTokens(ipath, lipath, cstr_isep, true);
|
||||
for (vector<string>::iterator it = lipath.begin();
|
||||
@ -946,7 +951,7 @@ bool FileInterner::idocToFile(TempFile& otemp, const string& tofile,
|
||||
bool FileInterner::topdocToFile(TempFile& otemp, const string& tofile,
|
||||
RclConfig *cnf, const Rcl::Doc& idoc)
|
||||
{
|
||||
DocFetcher *fetcher = docFetcherMake(idoc);
|
||||
DocFetcher *fetcher = docFetcherMake(cnf, idoc);
|
||||
if (fetcher == 0) {
|
||||
LOGERR(("FileInterner::idocToFile no backend\n"));
|
||||
return false;
|
||||
|
||||
@ -275,7 +275,8 @@ class FileInterner {
|
||||
Uncomp m_uncomp;
|
||||
|
||||
bool m_noxattrs; // disable xattrs usage
|
||||
|
||||
bool m_direct; // External app did the extraction
|
||||
|
||||
// Pseudo-constructors
|
||||
void init(const string &fn, const struct stat *stp,
|
||||
RclConfig *cnf, int flags, const string *mtype = 0);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user