defined data access interface for external indexers
This commit is contained in:
parent
03063f58dc
commit
1aea57fcb2
@ -81,6 +81,8 @@ index/bglfetcher.cpp \
|
|||||||
index/bglfetcher.h \
|
index/bglfetcher.h \
|
||||||
index/checkretryfailed.cpp \
|
index/checkretryfailed.cpp \
|
||||||
index/checkretryfailed.h \
|
index/checkretryfailed.h \
|
||||||
|
index/exefetcher.cpp \
|
||||||
|
index/exefetcher.h \
|
||||||
index/fetcher.cpp \
|
index/fetcher.cpp \
|
||||||
index/fetcher.h \
|
index/fetcher.h \
|
||||||
index/fsfetcher.cpp \
|
index/fsfetcher.cpp \
|
||||||
|
|||||||
129
src/index/exefetcher.cpp
Normal file
129
src/index/exefetcher.cpp
Normal file
@ -0,0 +1,129 @@
|
|||||||
|
/* Copyright (C) 2016 J.F.Dockes
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc.,
|
||||||
|
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
*/
|
||||||
|
#include "autoconfig.h"
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
#include "exefetcher.h"
|
||||||
|
|
||||||
|
#include "debuglog.h"
|
||||||
|
#include "pathut.h"
|
||||||
|
#include "rclconfig.h"
|
||||||
|
#include "execmd.h"
|
||||||
|
#include "rcldoc.h"
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
class EXEDocFetcher::Internal {
|
||||||
|
public:
|
||||||
|
string bckid;
|
||||||
|
vector<string> sfetch;
|
||||||
|
vector<string> smkid;
|
||||||
|
bool docmd(const vector<string>& cmd, const Rcl::Doc& idoc, string& out) {
|
||||||
|
ExecCmd ecmd;
|
||||||
|
// We're always called for preview (or Open)
|
||||||
|
ecmd.putenv("RECOLL_FILTER_FORPREVIEW=yes");
|
||||||
|
string udi;
|
||||||
|
idoc.getmeta(Rcl::Doc::keyudi, &udi);
|
||||||
|
vector<string> args(cmd);
|
||||||
|
args.push_back(udi);
|
||||||
|
args.push_back(idoc.url);
|
||||||
|
args.push_back(idoc.ipath);
|
||||||
|
int status = ecmd.doexec1(args, 0, &out);
|
||||||
|
if (status == 0) {
|
||||||
|
LOGDEB(("EXEDocFetcher::Internal: got [%s]\n", out.c_str()));
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
LOGERR(("EXEDOcFetcher::fetch: %s: %s failed for %s %s %s 0x%u\n",
|
||||||
|
bckid.c_str(), stringsToString(cmd).c_str(), udi.c_str(),
|
||||||
|
idoc.url.c_str(), idoc.ipath.c_str()));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
EXEDocFetcher::EXEDocFetcher(const EXEDocFetcher::Internal& _m)
|
||||||
|
{
|
||||||
|
m = new Internal(_m);
|
||||||
|
LOGDEB(("EXEDocFetcher::EXEDocFetcher: fetch is %s\n",
|
||||||
|
stringsToString(m->sfetch).c_str()));
|
||||||
|
}
|
||||||
|
|
||||||
|
bool EXEDocFetcher::fetch(RclConfig* cnf, const Rcl::Doc& idoc, RawDoc& out)
|
||||||
|
{
|
||||||
|
out.kind = RawDoc::RDK_DATADIRECT;
|
||||||
|
return m->docmd(m->sfetch, idoc, out.data);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool EXEDocFetcher::makesig(RclConfig* cnf, const Rcl::Doc& idoc, string& sig)
|
||||||
|
{
|
||||||
|
return m->docmd(m->smkid, idoc, sig);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Lookup bckid in the config and create an appropriate fetcher.
|
||||||
|
EXEDocFetcher *exeDocFetcherMake(RclConfig *config, const string& bckid)
|
||||||
|
{
|
||||||
|
EXEDocFetcher *fetcher = 0;
|
||||||
|
|
||||||
|
// The config we only read once, not gonna change.
|
||||||
|
static ConfSimple *bconf;
|
||||||
|
if (!bconf) {
|
||||||
|
string bconfname = path_cat(config->getConfDir(), "backends");
|
||||||
|
LOGDEB(("exeDocFetcherMake: using config in %s\n", bconfname.c_str()));
|
||||||
|
bconf = new ConfSimple(bconfname.c_str(), true);
|
||||||
|
if (!bconf->ok()) {
|
||||||
|
delete bconf;
|
||||||
|
bconf = 0;
|
||||||
|
LOGDEB(("exeDocFetcherMake: bad/no config: %s\n",
|
||||||
|
bconfname.c_str()));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
EXEDocFetcher::Internal m;
|
||||||
|
m.bckid = bckid;
|
||||||
|
|
||||||
|
string sfetch;
|
||||||
|
if (!bconf->get("fetch", sfetch, bckid) || sfetch.empty()) {
|
||||||
|
LOGERR(("exeDocFetcherMake: no 'fetch' for [%s]\n", bckid.c_str()));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
stringToStrings(sfetch, m.sfetch);
|
||||||
|
// We look up the command as we do for filters for now
|
||||||
|
m.sfetch[0] = config->findFilter(m.sfetch[0]);
|
||||||
|
if (!path_isabsolute(m.sfetch[0])) {
|
||||||
|
LOGERR(("exeDocFetcherMake: %s not found in exec path or filters dir\n",
|
||||||
|
m.sfetch[0].c_str()));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
string smkid;
|
||||||
|
if (!bconf->get("makesig", smkid, bckid) || smkid.empty()) {
|
||||||
|
LOGDEB(("exeDocFetcherMake: no 'makesig' for [%s]\n", bckid.c_str()));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
stringToStrings(smkid, m.smkid);
|
||||||
|
m.smkid[0] = config->findFilter(m.smkid[0]);
|
||||||
|
if (!path_isabsolute(m.smkid[0])) {
|
||||||
|
LOGERR(("exeDocFetcherMake: %s not found in exec path or filters dir\n",
|
||||||
|
m.smkid[0].c_str()));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return new EXEDocFetcher(m);
|
||||||
|
}
|
||||||
44
src/index/exefetcher.h
Normal file
44
src/index/exefetcher.h
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
/* Copyright (C) 2012 J.F.Dockes
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc.,
|
||||||
|
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
*/
|
||||||
|
#ifndef _EXEFETCHER_H_INCLUDED_
|
||||||
|
#define _EXEFETCHER_H_INCLUDED_
|
||||||
|
|
||||||
|
#include "fetcher.h"
|
||||||
|
|
||||||
|
class RclConfig;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A fetcher which works by executing external programs, defined in a
|
||||||
|
* configuration file:
|
||||||
|
*/
|
||||||
|
class EXEDocFetcher : public DocFetcher {
|
||||||
|
class Internal;
|
||||||
|
EXEDocFetcher(const Internal&);
|
||||||
|
virtual ~EXEDocFetcher() {}
|
||||||
|
|
||||||
|
virtual bool fetch(RclConfig* cnf, const Rcl::Doc& idoc, RawDoc& out);
|
||||||
|
/** Calls stat to retrieve file signature data */
|
||||||
|
virtual bool makesig(RclConfig* cnf, const Rcl::Doc& idoc, std::string& sig);
|
||||||
|
friend EXEDocFetcher *exeDocFetcherMake(RclConfig *, const std::string&);
|
||||||
|
private:
|
||||||
|
Internal *m;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Lookup bckid in the config and create an appropriate fetcher.
|
||||||
|
EXEDocFetcher *exeDocFetcherMake(RclConfig *config, const std::string& bckid);
|
||||||
|
|
||||||
|
#endif /* _EXEFETCHER_H_INCLUDED_ */
|
||||||
@ -18,12 +18,14 @@
|
|||||||
|
|
||||||
|
|
||||||
#include "debuglog.h"
|
#include "debuglog.h"
|
||||||
|
#include "rclconfig.h"
|
||||||
|
|
||||||
#include "fetcher.h"
|
#include "fetcher.h"
|
||||||
#include "fsfetcher.h"
|
#include "fsfetcher.h"
|
||||||
#include "bglfetcher.h"
|
#include "bglfetcher.h"
|
||||||
|
#include "exefetcher.h"
|
||||||
|
|
||||||
DocFetcher *docFetcherMake(const Rcl::Doc& idoc)
|
DocFetcher *docFetcherMake(RclConfig *config, const Rcl::Doc& idoc)
|
||||||
{
|
{
|
||||||
if (idoc.url.empty()) {
|
if (idoc.url.empty()) {
|
||||||
LOGERR(("docFetcherMakeg:: no url in doc!\n"));
|
LOGERR(("docFetcherMakeg:: no url in doc!\n"));
|
||||||
@ -38,7 +40,11 @@ DocFetcher *docFetcherMake(const Rcl::Doc& idoc)
|
|||||||
return new BGLDocFetcher;
|
return new BGLDocFetcher;
|
||||||
#endif
|
#endif
|
||||||
} else {
|
} else {
|
||||||
LOGERR(("DocFetcherFactory: unknown backend [%s]\n", backend.c_str()));
|
DocFetcher *f = exeDocFetcherMake(config, backend);
|
||||||
return 0;
|
if (!f) {
|
||||||
|
LOGERR(("DocFetcherFactory: unknown backend [%s]\n",
|
||||||
|
backend.c_str()));
|
||||||
|
}
|
||||||
|
return f;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -42,7 +42,7 @@ public:
|
|||||||
/** A RawDoc is the data for a document-holding entity either as a
|
/** A RawDoc is the data for a document-holding entity either as a
|
||||||
memory block, or pointed to by a file name */
|
memory block, or pointed to by a file name */
|
||||||
struct RawDoc {
|
struct RawDoc {
|
||||||
enum RawDocKind {RDK_FILENAME, RDK_DATA};
|
enum RawDocKind {RDK_FILENAME, RDK_DATA, RDK_DATADIRECT};
|
||||||
RawDocKind kind;
|
RawDocKind kind;
|
||||||
std::string data; // Doc data or file name
|
std::string data; // Doc data or file name
|
||||||
struct stat st; // Only used if RDK_FILENAME
|
struct stat st; // Only used if RDK_FILENAME
|
||||||
@ -71,6 +71,6 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
/** Return an appropriate fetcher object given the backend string identifier */
|
/** Return an appropriate fetcher object given the backend string identifier */
|
||||||
DocFetcher *docFetcherMake(const Rcl::Doc& idoc);
|
DocFetcher *docFetcherMake(RclConfig *config, const Rcl::Doc& idoc);
|
||||||
|
|
||||||
#endif /* _FETCHER_H_INCLUDED_ */
|
#endif /* _FETCHER_H_INCLUDED_ */
|
||||||
|
|||||||
@ -319,6 +319,7 @@ void FileInterner::initcommon(RclConfig *cnf, int flags)
|
|||||||
m_tmpflgs[i] = false;
|
m_tmpflgs[i] = false;
|
||||||
m_targetMType = cstr_textplain;
|
m_targetMType = cstr_textplain;
|
||||||
m_cfg->getConfParam("noxattrfields", &m_noxattrs);
|
m_cfg->getConfParam("noxattrfields", &m_noxattrs);
|
||||||
|
m_direct = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
FileInterner::FileInterner(const Rcl::Doc& idoc, RclConfig *cnf, int flags)
|
FileInterner::FileInterner(const Rcl::Doc& idoc, RclConfig *cnf, int flags)
|
||||||
@ -327,7 +328,7 @@ FileInterner::FileInterner(const Rcl::Doc& idoc, RclConfig *cnf, int flags)
|
|||||||
LOGDEB0(("FileInterner::FileInterner(idoc)\n"));
|
LOGDEB0(("FileInterner::FileInterner(idoc)\n"));
|
||||||
initcommon(cnf, flags);
|
initcommon(cnf, flags);
|
||||||
|
|
||||||
DocFetcher *fetcher = docFetcherMake(idoc);
|
DocFetcher *fetcher = docFetcherMake(cnf, idoc);
|
||||||
if (fetcher == 0) {
|
if (fetcher == 0) {
|
||||||
LOGERR(("FileInterner:: no backend\n"));
|
LOGERR(("FileInterner:: no backend\n"));
|
||||||
return;
|
return;
|
||||||
@ -344,6 +345,10 @@ FileInterner::FileInterner(const Rcl::Doc& idoc, RclConfig *cnf, int flags)
|
|||||||
case DocFetcher::RawDoc::RDK_DATA:
|
case DocFetcher::RawDoc::RDK_DATA:
|
||||||
init(rawdoc.data, cnf, flags, idoc.mimetype);
|
init(rawdoc.data, cnf, flags, idoc.mimetype);
|
||||||
break;
|
break;
|
||||||
|
case DocFetcher::RawDoc::RDK_DATADIRECT:
|
||||||
|
init(rawdoc.data, cnf, flags, idoc.mimetype);
|
||||||
|
m_direct = true;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
LOGERR(("FileInterner::FileInterner(idoc): bad rawdoc kind ??\n"));
|
LOGERR(("FileInterner::FileInterner(idoc): bad rawdoc kind ??\n"));
|
||||||
}
|
}
|
||||||
@ -352,7 +357,7 @@ FileInterner::FileInterner(const Rcl::Doc& idoc, RclConfig *cnf, int flags)
|
|||||||
|
|
||||||
bool FileInterner::makesig(RclConfig *cnf, const Rcl::Doc& idoc, string& sig)
|
bool FileInterner::makesig(RclConfig *cnf, const Rcl::Doc& idoc, string& sig)
|
||||||
{
|
{
|
||||||
DocFetcher *fetcher = docFetcherMake(idoc);
|
DocFetcher *fetcher = docFetcherMake(cnf, idoc);
|
||||||
if (fetcher == 0) {
|
if (fetcher == 0) {
|
||||||
LOGERR(("FileInterner::makesig no backend for doc\n"));
|
LOGERR(("FileInterner::makesig no backend for doc\n"));
|
||||||
return false;
|
return false;
|
||||||
@ -763,7 +768,7 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, const string& ipath
|
|||||||
// We set the ipath for the first handler here, others are set
|
// We set the ipath for the first handler here, others are set
|
||||||
// when they're pushed on the stack
|
// when they're pushed on the stack
|
||||||
vector<string> vipath;
|
vector<string> vipath;
|
||||||
if (!ipath.empty()) {
|
if (!ipath.empty() && !m_direct) {
|
||||||
vector<string> lipath;
|
vector<string> lipath;
|
||||||
stringToTokens(ipath, lipath, cstr_isep, true);
|
stringToTokens(ipath, lipath, cstr_isep, true);
|
||||||
for (vector<string>::iterator it = lipath.begin();
|
for (vector<string>::iterator it = lipath.begin();
|
||||||
@ -946,7 +951,7 @@ bool FileInterner::idocToFile(TempFile& otemp, const string& tofile,
|
|||||||
bool FileInterner::topdocToFile(TempFile& otemp, const string& tofile,
|
bool FileInterner::topdocToFile(TempFile& otemp, const string& tofile,
|
||||||
RclConfig *cnf, const Rcl::Doc& idoc)
|
RclConfig *cnf, const Rcl::Doc& idoc)
|
||||||
{
|
{
|
||||||
DocFetcher *fetcher = docFetcherMake(idoc);
|
DocFetcher *fetcher = docFetcherMake(cnf, idoc);
|
||||||
if (fetcher == 0) {
|
if (fetcher == 0) {
|
||||||
LOGERR(("FileInterner::idocToFile no backend\n"));
|
LOGERR(("FileInterner::idocToFile no backend\n"));
|
||||||
return false;
|
return false;
|
||||||
|
|||||||
@ -275,6 +275,7 @@ class FileInterner {
|
|||||||
Uncomp m_uncomp;
|
Uncomp m_uncomp;
|
||||||
|
|
||||||
bool m_noxattrs; // disable xattrs usage
|
bool m_noxattrs; // disable xattrs usage
|
||||||
|
bool m_direct; // External app did the extraction
|
||||||
|
|
||||||
// Pseudo-constructors
|
// Pseudo-constructors
|
||||||
void init(const string &fn, const struct stat *stp,
|
void init(const string &fn, const struct stat *stp,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user