diff --git a/src/Makefile.am b/src/Makefile.am index a5fb88f5..0d362d60 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -81,6 +81,8 @@ index/bglfetcher.cpp \ index/bglfetcher.h \ index/checkretryfailed.cpp \ index/checkretryfailed.h \ +index/exefetcher.cpp \ +index/exefetcher.h \ index/fetcher.cpp \ index/fetcher.h \ index/fsfetcher.cpp \ diff --git a/src/index/exefetcher.cpp b/src/index/exefetcher.cpp new file mode 100644 index 00000000..2ee8bfde --- /dev/null +++ b/src/index/exefetcher.cpp @@ -0,0 +1,129 @@ +/* Copyright (C) 2016 J.F.Dockes + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ +#include "autoconfig.h" + +#include +#include +#include + +#include "exefetcher.h" + +#include "debuglog.h" +#include "pathut.h" +#include "rclconfig.h" +#include "execmd.h" +#include "rcldoc.h" + +using namespace std; + +class EXEDocFetcher::Internal { +public: + string bckid; + vector sfetch; + vector smkid; + bool docmd(const vector& cmd, const Rcl::Doc& idoc, string& out) { + ExecCmd ecmd; + // We're always called for preview (or Open) + ecmd.putenv("RECOLL_FILTER_FORPREVIEW=yes"); + string udi; + idoc.getmeta(Rcl::Doc::keyudi, &udi); + vector args(cmd); + args.push_back(udi); + args.push_back(idoc.url); + args.push_back(idoc.ipath); + int status = ecmd.doexec1(args, 0, &out); + if (status == 0) { + LOGDEB(("EXEDocFetcher::Internal: got [%s]\n", out.c_str())); + return true; + } else { + LOGERR(("EXEDOcFetcher::fetch: %s: %s failed for %s %s %s 0x%u\n", + bckid.c_str(), stringsToString(cmd).c_str(), udi.c_str(), + idoc.url.c_str(), idoc.ipath.c_str())); + return false; + } + } +}; + +EXEDocFetcher::EXEDocFetcher(const EXEDocFetcher::Internal& _m) +{ + m = new Internal(_m); + LOGDEB(("EXEDocFetcher::EXEDocFetcher: fetch is %s\n", + stringsToString(m->sfetch).c_str())); +} + +bool EXEDocFetcher::fetch(RclConfig* cnf, const Rcl::Doc& idoc, RawDoc& out) +{ + out.kind = RawDoc::RDK_DATADIRECT; + return m->docmd(m->sfetch, idoc, out.data); +} + +bool EXEDocFetcher::makesig(RclConfig* cnf, const Rcl::Doc& idoc, string& sig) +{ + return m->docmd(m->smkid, idoc, sig); +} + +// Lookup bckid in the config and create an appropriate fetcher. +EXEDocFetcher *exeDocFetcherMake(RclConfig *config, const string& bckid) +{ + EXEDocFetcher *fetcher = 0; + + // The config we only read once, not gonna change. + static ConfSimple *bconf; + if (!bconf) { + string bconfname = path_cat(config->getConfDir(), "backends"); + LOGDEB(("exeDocFetcherMake: using config in %s\n", bconfname.c_str())); + bconf = new ConfSimple(bconfname.c_str(), true); + if (!bconf->ok()) { + delete bconf; + bconf = 0; + LOGDEB(("exeDocFetcherMake: bad/no config: %s\n", + bconfname.c_str())); + return 0; + } + } + + EXEDocFetcher::Internal m; + m.bckid = bckid; + + string sfetch; + if (!bconf->get("fetch", sfetch, bckid) || sfetch.empty()) { + LOGERR(("exeDocFetcherMake: no 'fetch' for [%s]\n", bckid.c_str())); + return 0; + } + stringToStrings(sfetch, m.sfetch); + // We look up the command as we do for filters for now + m.sfetch[0] = config->findFilter(m.sfetch[0]); + if (!path_isabsolute(m.sfetch[0])) { + LOGERR(("exeDocFetcherMake: %s not found in exec path or filters dir\n", + m.sfetch[0].c_str())); + return 0; + } + + string smkid; + if (!bconf->get("makesig", smkid, bckid) || smkid.empty()) { + LOGDEB(("exeDocFetcherMake: no 'makesig' for [%s]\n", bckid.c_str())); + return 0; + } + stringToStrings(smkid, m.smkid); + m.smkid[0] = config->findFilter(m.smkid[0]); + if (!path_isabsolute(m.smkid[0])) { + LOGERR(("exeDocFetcherMake: %s not found in exec path or filters dir\n", + m.smkid[0].c_str())); + return 0; + } + return new EXEDocFetcher(m); +} diff --git a/src/index/exefetcher.h b/src/index/exefetcher.h new file mode 100644 index 00000000..30e43d3d --- /dev/null +++ b/src/index/exefetcher.h @@ -0,0 +1,44 @@ +/* Copyright (C) 2012 J.F.Dockes + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ +#ifndef _EXEFETCHER_H_INCLUDED_ +#define _EXEFETCHER_H_INCLUDED_ + +#include "fetcher.h" + +class RclConfig; + +/** + * A fetcher which works by executing external programs, defined in a + * configuration file: + */ +class EXEDocFetcher : public DocFetcher { + class Internal; + EXEDocFetcher(const Internal&); + virtual ~EXEDocFetcher() {} + + virtual bool fetch(RclConfig* cnf, const Rcl::Doc& idoc, RawDoc& out); + /** Calls stat to retrieve file signature data */ + virtual bool makesig(RclConfig* cnf, const Rcl::Doc& idoc, std::string& sig); + friend EXEDocFetcher *exeDocFetcherMake(RclConfig *, const std::string&); +private: + Internal *m; +}; + +// Lookup bckid in the config and create an appropriate fetcher. +EXEDocFetcher *exeDocFetcherMake(RclConfig *config, const std::string& bckid); + +#endif /* _EXEFETCHER_H_INCLUDED_ */ diff --git a/src/index/fetcher.cpp b/src/index/fetcher.cpp index 5afc77fc..db277437 100644 --- a/src/index/fetcher.cpp +++ b/src/index/fetcher.cpp @@ -18,12 +18,14 @@ #include "debuglog.h" +#include "rclconfig.h" #include "fetcher.h" #include "fsfetcher.h" #include "bglfetcher.h" +#include "exefetcher.h" -DocFetcher *docFetcherMake(const Rcl::Doc& idoc) +DocFetcher *docFetcherMake(RclConfig *config, const Rcl::Doc& idoc) { if (idoc.url.empty()) { LOGERR(("docFetcherMakeg:: no url in doc!\n")); @@ -38,7 +40,11 @@ DocFetcher *docFetcherMake(const Rcl::Doc& idoc) return new BGLDocFetcher; #endif } else { - LOGERR(("DocFetcherFactory: unknown backend [%s]\n", backend.c_str())); - return 0; + DocFetcher *f = exeDocFetcherMake(config, backend); + if (!f) { + LOGERR(("DocFetcherFactory: unknown backend [%s]\n", + backend.c_str())); + } + return f; } } diff --git a/src/index/fetcher.h b/src/index/fetcher.h index e741d53d..418aa604 100644 --- a/src/index/fetcher.h +++ b/src/index/fetcher.h @@ -42,7 +42,7 @@ public: /** A RawDoc is the data for a document-holding entity either as a memory block, or pointed to by a file name */ struct RawDoc { - enum RawDocKind {RDK_FILENAME, RDK_DATA}; + enum RawDocKind {RDK_FILENAME, RDK_DATA, RDK_DATADIRECT}; RawDocKind kind; std::string data; // Doc data or file name struct stat st; // Only used if RDK_FILENAME @@ -71,6 +71,6 @@ public: }; /** Return an appropriate fetcher object given the backend string identifier */ -DocFetcher *docFetcherMake(const Rcl::Doc& idoc); +DocFetcher *docFetcherMake(RclConfig *config, const Rcl::Doc& idoc); #endif /* _FETCHER_H_INCLUDED_ */ diff --git a/src/internfile/internfile.cpp b/src/internfile/internfile.cpp index 107d697d..89e2273f 100644 --- a/src/internfile/internfile.cpp +++ b/src/internfile/internfile.cpp @@ -319,6 +319,7 @@ void FileInterner::initcommon(RclConfig *cnf, int flags) m_tmpflgs[i] = false; m_targetMType = cstr_textplain; m_cfg->getConfParam("noxattrfields", &m_noxattrs); + m_direct = false; } FileInterner::FileInterner(const Rcl::Doc& idoc, RclConfig *cnf, int flags) @@ -327,7 +328,7 @@ FileInterner::FileInterner(const Rcl::Doc& idoc, RclConfig *cnf, int flags) LOGDEB0(("FileInterner::FileInterner(idoc)\n")); initcommon(cnf, flags); - DocFetcher *fetcher = docFetcherMake(idoc); + DocFetcher *fetcher = docFetcherMake(cnf, idoc); if (fetcher == 0) { LOGERR(("FileInterner:: no backend\n")); return; @@ -344,6 +345,10 @@ FileInterner::FileInterner(const Rcl::Doc& idoc, RclConfig *cnf, int flags) case DocFetcher::RawDoc::RDK_DATA: init(rawdoc.data, cnf, flags, idoc.mimetype); break; + case DocFetcher::RawDoc::RDK_DATADIRECT: + init(rawdoc.data, cnf, flags, idoc.mimetype); + m_direct = true; + break; default: LOGERR(("FileInterner::FileInterner(idoc): bad rawdoc kind ??\n")); } @@ -352,7 +357,7 @@ FileInterner::FileInterner(const Rcl::Doc& idoc, RclConfig *cnf, int flags) bool FileInterner::makesig(RclConfig *cnf, const Rcl::Doc& idoc, string& sig) { - DocFetcher *fetcher = docFetcherMake(idoc); + DocFetcher *fetcher = docFetcherMake(cnf, idoc); if (fetcher == 0) { LOGERR(("FileInterner::makesig no backend for doc\n")); return false; @@ -763,7 +768,7 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, const string& ipath // We set the ipath for the first handler here, others are set // when they're pushed on the stack vector vipath; - if (!ipath.empty()) { + if (!ipath.empty() && !m_direct) { vector lipath; stringToTokens(ipath, lipath, cstr_isep, true); for (vector::iterator it = lipath.begin(); @@ -946,7 +951,7 @@ bool FileInterner::idocToFile(TempFile& otemp, const string& tofile, bool FileInterner::topdocToFile(TempFile& otemp, const string& tofile, RclConfig *cnf, const Rcl::Doc& idoc) { - DocFetcher *fetcher = docFetcherMake(idoc); + DocFetcher *fetcher = docFetcherMake(cnf, idoc); if (fetcher == 0) { LOGERR(("FileInterner::idocToFile no backend\n")); return false; diff --git a/src/internfile/internfile.h b/src/internfile/internfile.h index d2392036..5e7c6c4b 100644 --- a/src/internfile/internfile.h +++ b/src/internfile/internfile.h @@ -275,7 +275,8 @@ class FileInterner { Uncomp m_uncomp; bool m_noxattrs; // disable xattrs usage - + bool m_direct; // External app did the extraction + // Pseudo-constructors void init(const string &fn, const struct stat *stp, RclConfig *cnf, int flags, const string *mtype = 0);