diff --git a/src/index/exefetcher.cpp b/src/index/exefetcher.cpp index bd3afa39..9e34c83e 100644 --- a/src/index/exefetcher.cpp +++ b/src/index/exefetcher.cpp @@ -77,7 +77,8 @@ bool EXEDocFetcher::makesig(RclConfig* cnf, const Rcl::Doc& idoc, string& sig) } // Lookup bckid in the config and create an appropriate fetcher. -EXEDocFetcher *exeDocFetcherMake(RclConfig *config, const string& bckid) +std::unique_ptr exeDocFetcherMake(RclConfig *config, + const string& bckid) { // The config we only read once, not gonna change. static ConfSimple *bconf; @@ -122,5 +123,5 @@ EXEDocFetcher *exeDocFetcherMake(RclConfig *config, const string& bckid) " not found in exec path or filters dir\n"); return 0; } - return new EXEDocFetcher(m); + return std::unique_ptr(new EXEDocFetcher(m)); } diff --git a/src/index/exefetcher.h b/src/index/exefetcher.h index 3c92fc47..0af151b7 100644 --- a/src/index/exefetcher.h +++ b/src/index/exefetcher.h @@ -17,6 +17,7 @@ #ifndef _EXEFETCHER_H_INCLUDED_ #define _EXEFETCHER_H_INCLUDED_ +#include #include "fetcher.h" class RclConfig; @@ -35,6 +36,7 @@ class RclConfig; * query time for previewing and opening the document. */ class EXEDocFetcher : public DocFetcher { +public: class Internal; EXEDocFetcher(const Internal&); virtual ~EXEDocFetcher() {} @@ -42,12 +44,14 @@ class EXEDocFetcher : public DocFetcher { virtual bool fetch(RclConfig* cnf, const Rcl::Doc& idoc, RawDoc& out); /** Calls stat to retrieve file signature data */ virtual bool makesig(RclConfig* cnf, const Rcl::Doc& idoc,std::string& sig); - friend EXEDocFetcher *exeDocFetcherMake(RclConfig *, const std::string&); + friend std::unique_ptr + exeDocFetcherMake(RclConfig *, const std::string&); private: Internal *m; }; // Lookup bckid in the config and create an appropriate fetcher. -EXEDocFetcher *exeDocFetcherMake(RclConfig *config, const std::string& bckid); +std::unique_ptr exeDocFetcherMake(RclConfig *config, + const std::string& bckid); #endif /* _EXEFETCHER_H_INCLUDED_ */ diff --git a/src/index/fetcher.cpp b/src/index/fetcher.cpp index f643d069..f5cc4dd0 100644 --- a/src/index/fetcher.cpp +++ b/src/index/fetcher.cpp @@ -16,6 +16,8 @@ */ #include "autoconfig.h" +#include + #include "log.h" #include "rclconfig.h" #include "fetcher.h" @@ -23,22 +25,23 @@ #include "webqueuefetcher.h" #include "exefetcher.h" -DocFetcher *docFetcherMake(RclConfig *config, const Rcl::Doc& idoc) +std::unique_ptr docFetcherMake(RclConfig *config, + const Rcl::Doc& idoc) { if (idoc.url.empty()) { LOGERR("docFetcherMakeg:: no url in doc!\n" ); - return 0; + return std::unique_ptr(); } string backend; idoc.getmeta(Rcl::Doc::keybcknd, &backend); if (backend.empty() || !backend.compare("FS")) { - return new FSDocFetcher; + return std::unique_ptr(new FSDocFetcher); #ifndef DISABLE_WEB_INDEXER } else if (!backend.compare("BGL")) { - return new WQDocFetcher; + return std::unique_ptr(new WQDocFetcher); #endif } else { - DocFetcher *f = exeDocFetcherMake(config, backend); + std::unique_ptr f(exeDocFetcherMake(config, backend)); if (!f) { LOGERR("DocFetcherFactory: unknown backend [" << backend << "]\n"); } diff --git a/src/index/fetcher.h b/src/index/fetcher.h index 63e13345..92cf38d9 100644 --- a/src/index/fetcher.h +++ b/src/index/fetcher.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2012 J.F.Dockes +/* Copyright (C) 2012-2019 J.F.Dockes * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -19,6 +19,7 @@ #include "safesysstat.h" #include +#include #include "rcldoc.h" @@ -45,12 +46,12 @@ class RclConfig; class DocFetcher { public: /** A RawDoc is the data for a document-holding entity either as a - memory block, or pointed to by a file name */ + memory block, or pointed to by a file name */ struct RawDoc { - enum RawDocKind {RDK_FILENAME, RDK_DATA, RDK_DATADIRECT}; - RawDocKind kind; - std::string data; // Doc data or file name - struct stat st; // Only used if RDK_FILENAME + enum RawDocKind {RDK_FILENAME, RDK_DATA, RDK_DATADIRECT}; + RawDocKind kind; + std::string data; // Doc data or file name + struct stat st; // Only used if RDK_FILENAME }; /** @@ -73,11 +74,16 @@ public: */ virtual bool makesig(RclConfig* cnf, const Rcl::Doc& idoc, std::string& sig) = 0; + enum Reason{FetchOk, FetchNotExist, FetchNoPerm, FetchOther}; + virtual Reason testAccess(RclConfig* cnf, const Rcl::Doc& idoc) { + return FetchOther; + } virtual ~DocFetcher() {} }; /** Return an appropriate fetcher object given the backend string * identifier inside idoc*/ -DocFetcher *docFetcherMake(RclConfig *config, const Rcl::Doc& idoc); +std::unique_ptr docFetcherMake(RclConfig *config, + const Rcl::Doc& idoc); #endif /* _FETCHER_H_INCLUDED_ */ diff --git a/src/index/fsfetcher.cpp b/src/index/fsfetcher.cpp index 5dd6dabf..667d1780 100644 --- a/src/index/fsfetcher.cpp +++ b/src/index/fsfetcher.cpp @@ -1,4 +1,4 @@ -/* Copyright (C) 2012 J.F.Dockes +/* Copyright (C) 2012-2019 J.F.Dockes * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -28,31 +28,32 @@ using std::string; -static bool urltopath(RclConfig* cnf, - const Rcl::Doc& idoc, string& fn, struct stat& st) +static DocFetcher::Reason urltopath(RclConfig* cnf, const Rcl::Doc& idoc, + string& fn, struct stat& st) { // The url has to be like file:// fn = fileurltolocalpath(idoc.url); if (fn.empty()) { - LOGERR("FSDocFetcher::fetch/sig: non fs url: [" << (idoc.url) << "]\n" ); - return false; + LOGERR("FSDocFetcher::fetch/sig: non fs url: [" << idoc.url << "]\n"); + return DocFetcher::FetchOther; } cnf->setKeyDir(path_getfather(fn)); bool follow = false; cnf->getConfParam("followLinks", &follow); if (path_fileprops(fn, &st, follow) < 0) { - LOGERR("FSDocFetcher::fetch: stat errno " << (errno) << " for [" << (fn) << "]\n" ); - return false; + LOGERR("FSDocFetcher::fetch: stat errno " << errno << " for [" << fn + << "]\n"); + return DocFetcher::FetchNotExist; } - return true; + return DocFetcher::FetchOk; } bool FSDocFetcher::fetch(RclConfig* cnf, const Rcl::Doc& idoc, RawDoc& out) { string fn; - if (!urltopath(cnf, idoc, fn, out.st)) - return false; + if (urltopath(cnf, idoc, fn, out.st) != DocFetcher::FetchOk) + return false; out.kind = RawDoc::RDK_FILENAME; out.data = fn; return true; @@ -62,10 +63,24 @@ bool FSDocFetcher::makesig(RclConfig* cnf, const Rcl::Doc& idoc, string& sig) { string fn; struct stat st; - if (!urltopath(cnf, idoc, fn, st)) - return false; + if (urltopath(cnf, idoc, fn, st) != DocFetcher::FetchOk) + return false; FsIndexer::makesig(&st, sig); return true; } - +DocFetcher::Reason FSDocFetcher::testAccess(RclConfig* cnf, const Rcl::Doc& idoc) +{ + string fn; + struct stat st; + DocFetcher::Reason reason = urltopath(cnf, idoc, fn, st); + if (reason != DocFetcher::FetchOk) { + return reason; + } + if (!path_readable(fn)) { + return DocFetcher::FetchNoPerm; + } + // We have no way to know if the file is fully readable without + // trying (local Windows locks), which would take too much time. + return DocFetcher::FetchOther; +} diff --git a/src/index/fsfetcher.h b/src/index/fsfetcher.h index 0719a2f7..f7ebb862 100644 --- a/src/index/fsfetcher.h +++ b/src/index/fsfetcher.h @@ -28,6 +28,7 @@ class FSDocFetcher : public DocFetcher{ /** Calls stat to retrieve file signature data */ virtual bool makesig(RclConfig* cnf,const Rcl::Doc& idoc, std::string& sig); + virtual DocFetcher::Reason testAccess(RclConfig* cnf, const Rcl::Doc& idoc); virtual ~FSDocFetcher() {} }; diff --git a/src/internfile/internfile.cpp b/src/internfile/internfile.cpp index 4eccb50e..df7442be 100644 --- a/src/internfile/internfile.cpp +++ b/src/internfile/internfile.cpp @@ -1,4 +1,4 @@ -/* Copyright (C) 2004 J.F.Dockes +/* Copyright (C) 2004-2019 J.F.Dockes * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -15,7 +15,6 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ -#ifndef TEST_INTERNFILE #include "autoconfig.h" #include @@ -29,6 +28,7 @@ #include #include #include +#include using namespace std; @@ -61,7 +61,7 @@ static string colon_hide(const string& in) { string out; for (string::const_iterator it = in.begin(); it != in.end(); it++) { - out += *it == ':' ? cchar_colon_repl : *it; + out += *it == ':' ? cchar_colon_repl : *it; } return out; } @@ -69,7 +69,7 @@ static string colon_restore(const string& in) { string out; for (string::const_iterator it = in.begin(); it != in.end(); it++) { - out += *it == cchar_colon_repl ? ':' : *it; + out += *it == cchar_colon_repl ? ':' : *it; } return out; } @@ -83,11 +83,11 @@ bool FileInterner::getEnclosingUDI(const Rcl::Doc &doc, string& udi) string eipath = doc.ipath; string::size_type colon; if (eipath.empty()) - return false; + return false; if ((colon = eipath.find_last_of(cstr_isep)) != string::npos) { - eipath.erase(colon); + eipath.erase(colon); } else { - eipath.erase(); + eipath.erase(); } make_udi(url_gpath(doc.idxurl.empty() ? doc.url : doc.idxurl), eipath, udi); @@ -98,9 +98,9 @@ string FileInterner::getLastIpathElt(const string& ipath) { string::size_type sep; if ((sep = ipath.find_last_of(cstr_isep)) != string::npos) { - return ipath.substr(sep + 1); + return ipath.substr(sep + 1); } else { - return ipath; + return ipath; } } @@ -118,12 +118,12 @@ bool FileInterner::ipathContains(const string& parent, const string& child) // processed by the first call to internfile(). // Split into "constructor calls init()" to allow use from other constructor FileInterner::FileInterner(const string &fn, const struct stat *stp, - RclConfig *cnf, int flags, const string *imime) + RclConfig *cnf, int flags, const string *imime) { LOGDEB0("FileInterner::FileInterner(fn=" << fn << ")\n"); if (fn.empty()) { - LOGERR("FileInterner::FileInterner: empty file name!\n"); - return; + LOGERR("FileInterner::FileInterner: empty file name!\n"); + return; } initcommon(cnf, flags); init(fn, stp, cnf, flags, imime); @@ -141,8 +141,8 @@ void FileInterner::init(const string &f, const struct stat *stp, RclConfig *cnf, int flags, const string *imime) { if (f.empty()) { - LOGERR("FileInterner::init: empty file name!\n"); - return; + LOGERR("FileInterner::init: empty file name!\n"); + return; } m_fn = f; @@ -173,7 +173,7 @@ void FileInterner::init(const string &f, const struct stat *stp, RclConfig *cnf, } else { LOGDEB("FileInterner::init fn [" << f << "] mime [" << (imime ? imime->c_str() : "(null)") << "] preview " << - m_forPreview << "\n"); + m_forPreview << "\n"); // Run mime type identification in any case (see comment above). l_mime = mimetype(m_fn, stp, m_cfg, usfci); @@ -188,52 +188,52 @@ void FileInterner::init(const string &f, const struct stat *stp, RclConfig *cnf, int64_t docsize = stp->st_size; if (!l_mime.empty()) { - // Has mime: check for a compressed file. If so, create a - // temporary uncompressed file, and rerun the mime type - // identification, then do the rest with the temp file. - vectorucmd; - if (m_cfg->getUncompressor(l_mime, ucmd)) { - // Check for compressed size limit - int maxkbs = -1; - if (!m_cfg->getConfParam("compressedfilemaxkbs", &maxkbs) || - maxkbs < 0 || !stp || int(stp->st_size / 1024) < maxkbs) { - if (!m_uncomp->uncompressfile(m_fn, ucmd, m_tfile)) { + // Has mime: check for a compressed file. If so, create a + // temporary uncompressed file, and rerun the mime type + // identification, then do the rest with the temp file. + vectorucmd; + if (m_cfg->getUncompressor(l_mime, ucmd)) { + // Check for compressed size limit + int maxkbs = -1; + if (!m_cfg->getConfParam("compressedfilemaxkbs", &maxkbs) || + maxkbs < 0 || !stp || int(stp->st_size / 1024) < maxkbs) { + if (!m_uncomp->uncompressfile(m_fn, ucmd, m_tfile)) { m_ok = true; - return; - } - LOGDEB1("FileInterner:: after ucomp: tfile " << m_tfile <<"\n"); - m_fn = m_tfile; - // Stat the uncompressed file, mainly to get the size - struct stat ucstat; - if (path_fileprops(m_fn, &ucstat) != 0) { - LOGERR("FileInterner: can't stat the uncompressed file[" << + return; + } + LOGDEB1("FileInterner:: after ucomp: tfile " << m_tfile <<"\n"); + m_fn = m_tfile; + // Stat the uncompressed file, mainly to get the size + struct stat ucstat; + if (path_fileprops(m_fn, &ucstat) != 0) { + LOGERR("FileInterner: can't stat the uncompressed file[" << m_fn << "] errno " << errno << "\n"); m_ok = true; - return; - } else { - docsize = ucstat.st_size; - } - l_mime = mimetype(m_fn, &ucstat, m_cfg, usfci); - if (l_mime.empty() && imime) - l_mime = *imime; - } else { - LOGINFO("FileInterner:: " << m_fn << " over size limit " << + return; + } else { + docsize = ucstat.st_size; + } + l_mime = mimetype(m_fn, &ucstat, m_cfg, usfci); + if (l_mime.empty() && imime) + l_mime = *imime; + } else { + LOGINFO("FileInterner:: " << m_fn << " over size limit " << maxkbs << " kbs\n"); - } - } + } + } } if (l_mime.empty()) { - // No mime type. We let it through as config may warrant that - // we index all file names - LOGDEB0("FileInterner:: no mime: [" << m_fn << "]\n"); + // No mime type. We let it through as config may warrant that + // we index all file names + LOGDEB0("FileInterner:: no mime: [" << m_fn << "]\n"); } // Get fields computed from extended attributes. We use the // original file, not the m_fn which may be the uncompressed temp // file if (!m_noxattrs) - reapXAttrs(m_cfg, f, m_XAttrsFields); + reapXAttrs(m_cfg, f, m_XAttrsFields); // Gather metadata from external commands as configured. reapMetaCmds(m_cfg, f, m_cmdFields); @@ -244,14 +244,14 @@ void FileInterner::init(const string &f, const struct stat *stp, RclConfig *cnf, RecollFilter *df = getMimeHandler(l_mime, m_cfg, !m_forPreview); if (!df || df->is_unknown()) { - // No real handler for this type, for now :( - LOGDEB("FileInterner:: unprocessed mime: [" << l_mime << "] [" << f << + // No real handler for this type, for now :( + LOGDEB("FileInterner:: unprocessed mime: [" << l_mime << "] [" << f << "]\n"); - if (!df) - return; + if (!df) + return; } df->set_property(Dijon::Filter::OPERATING_MODE, - m_forPreview ? "view" : "index"); + m_forPreview ? "view" : "index"); df->set_property(Dijon::Filter::DJF_UDI, udi); df->set_docsize(docsize); @@ -276,7 +276,7 @@ void FileInterner::init(const string &data, RclConfig *cnf, int flags, const string& imime) { if (imime.empty()) { - LOGERR("FileInterner: inmemory constructor needs input mime type\n"); + LOGERR("FileInterner: inmemory constructor needs input mime type\n"); return; } m_mimetype = imime; @@ -285,26 +285,26 @@ void FileInterner::init(const string &data, RclConfig *cnf, RecollFilter *df = getMimeHandler(m_mimetype, m_cfg, !m_forPreview); if (!df) { - // No handler for this type, for now :( if indexallfilenames - // is set in the config, this normally wont happen (we get mh_unknown) - LOGDEB("FileInterner:: unprocessed mime [" << m_mimetype << "]\n"); - return; + // No handler for this type, for now :( if indexallfilenames + // is set in the config, this normally wont happen (we get mh_unknown) + LOGDEB("FileInterner:: unprocessed mime [" << m_mimetype << "]\n"); + return; } df->set_property(Dijon::Filter::OPERATING_MODE, - m_forPreview ? "view" : "index"); + m_forPreview ? "view" : "index"); df->set_docsize(data.length()); if (df->is_data_input_ok(Dijon::Filter::DOCUMENT_STRING)) { - (void)df->set_document_string(m_mimetype, data); + (void)df->set_document_string(m_mimetype, data); } else if (df->is_data_input_ok(Dijon::Filter::DOCUMENT_DATA)) { - (void)df->set_document_data(m_mimetype, data.c_str(), data.length()); + (void)df->set_document_data(m_mimetype, data.c_str(), data.length()); } else if (df->is_data_input_ok(Dijon::Filter::DOCUMENT_FILE_NAME)) { - TempFile temp = dataToTempFile(data, m_mimetype); - if (temp.ok()) { - (void)df->set_document_file(m_mimetype, temp.filename()); - m_tmpflgs[m_handlers.size()] = true; - m_tempfiles.push_back(temp); - } + TempFile temp = dataToTempFile(data, m_mimetype); + if (temp.ok()) { + (void)df->set_document_file(m_mimetype, temp.filename()); + m_tmpflgs[m_handlers.size()] = true; + m_tempfiles.push_back(temp); + } } // Don't process init errors here: doing it later allows indexing // the file name of even a totally unparsable file @@ -320,7 +320,7 @@ void FileInterner::initcommon(RclConfig *cnf, int flags) // Initialize handler stack. m_handlers.reserve(MAXHANDLERS); for (unsigned int i = 0; i < MAXHANDLERS; i++) - m_tmpflgs[i] = false; + m_tmpflgs[i] = false; m_targetMType = cstr_textplain; m_cfg->getConfParam("noxattrfields", &m_noxattrs); m_direct = false; @@ -331,23 +331,23 @@ FileInterner::FileInterner(const Rcl::Doc& idoc, RclConfig *cnf, int flags) LOGDEB0("FileInterner::FileInterner(idoc)\n"); initcommon(cnf, flags); - DocFetcher *fetcher = docFetcherMake(cnf, idoc); - if (fetcher == 0) { + std::unique_ptr fetcher(docFetcherMake(cnf, idoc)); + if (!fetcher) { LOGERR("FileInterner:: no backend\n"); return; } DocFetcher::RawDoc rawdoc; if (!fetcher->fetch(cnf, idoc, rawdoc)) { - LOGERR("FileInterner:: fetcher failed\n"); - return; + LOGERR("FileInterner:: fetcher failed\n"); + return; } switch (rawdoc.kind) { case DocFetcher::RawDoc::RDK_FILENAME: init(rawdoc.data, &rawdoc.st, cnf, flags, &idoc.mimetype); - break; + break; case DocFetcher::RawDoc::RDK_DATA: init(rawdoc.data, cnf, flags, idoc.mimetype); - break; + break; case DocFetcher::RawDoc::RDK_DATADIRECT: // Note: only used for demo with the sample python external // mbox indexer at this point. The external program is @@ -356,22 +356,38 @@ FileInterner::FileInterner(const Rcl::Doc& idoc, RclConfig *cnf, int flags) m_direct = true; break; default: - LOGERR("FileInterner::FileInterner(idoc): bad rawdoc kind ??\n"); + LOGERR("FileInterner::FileInterner(idoc): bad rawdoc kind ??\n"); } return; } +FileInterner::ErrorPossibleCause FileInterner::tryGetReason(RclConfig *cnf, + const Rcl::Doc& idoc) +{ + LOGDEB0("FileInterner::tryGetReason(idoc)\n"); + + std::unique_ptr fetcher(docFetcherMake(cnf, idoc)); + if (!fetcher) { + LOGERR("FileInterner:: no backend\n"); + return FileInterner::FetchNoBackend; + } + DocFetcher::Reason fetchreason = fetcher->testAccess(cnf, idoc); + switch (fetchreason) { + case DocFetcher::FetchNotExist: return FileInterner::FetchMissing; + case DocFetcher::FetchNoPerm: return FileInterner::FetchPerm; + default: return FileInterner::InternfileOther; + } +} + bool FileInterner::makesig(RclConfig *cnf, const Rcl::Doc& idoc, string& sig) { - DocFetcher *fetcher = docFetcherMake(cnf, idoc); - if (fetcher == 0) { + std::unique_ptr fetcher(docFetcherMake(cnf, idoc)); + if (!fetcher) { LOGERR("FileInterner::makesig no backend for doc\n"); return false; } bool ret = fetcher->makesig(cnf, idoc, sig); - - delete fetcher; return ret; } @@ -392,14 +408,14 @@ TempFile FileInterner::dataToTempFile(const string& dt, const string& mt) // Create temp file with appropriate suffix for mime type TempFile temp(m_cfg->getSuffixFromMimeType(mt)); if (!temp.ok()) { - LOGERR("FileInterner::dataToTempFile: cant create tempfile: " << + LOGERR("FileInterner::dataToTempFile: cant create tempfile: " << temp.getreason() << "\n"); - return TempFile(); + return TempFile(); } string reason; if (!stringtofile(dt, temp.filename(), reason)) { - LOGERR("FileInterner::dataToTempFile: stringtofile: " < verr; - stringToStrings(msg, verr); - if (verr.size() > 2) { - vector::iterator it = verr.begin(); - it++; - if (*it == "HELPERNOTFOUND") { - it++; - for (; it != verr.end(); it++) { - m_missingdatap->addMissing(*it, mt); - } - } - } + vector verr; + stringToStrings(msg, verr); + if (verr.size() > 2) { + vector::iterator it = verr.begin(); + it++; + if (*it == "HELPERNOTFOUND") { + it++; + for (; it != verr.end(); it++) { + m_missingdatap->addMissing(*it, mt); + } + } + } } } void FIMissingStore::getMissingExternal(string& out) { for (map >::const_iterator it = - m_typesForMissing.begin(); it != m_typesForMissing.end(); it++) { - out += string(" ") + it->first; + m_typesForMissing.begin(); it != m_typesForMissing.end(); it++) { + out += string(" ") + it->first; } trimstring(out); } @@ -440,16 +456,16 @@ void FIMissingStore::getMissingDescription(string& out) out.erase(); for (map >::const_iterator it = - m_typesForMissing.begin(); it != m_typesForMissing.end(); it++) { - out += it->first + " ("; - set::const_iterator it3; - for (it3 = it->second.begin(); - it3 != it->second.end(); it3++) { - out += *it3 + " "; - } - trimstring(out); - out += ")"; - out += "\n"; + m_typesForMissing.begin(); it != m_typesForMissing.end(); it++) { + out += it->first + " ("; + set::const_iterator it3; + for (it3 = it->second.begin(); + it3 != it->second.end(); it3++) { + out += *it3 + " "; + } + trimstring(out); + out += ")"; + out += "\n"; } } @@ -463,45 +479,45 @@ FIMissingStore::FIMissingStore(const string& in) stringToTokens(in, lines, "\n"); for (vector::const_iterator it = lines.begin(); - it != lines.end(); it++) { - // Lines from the file are like: - // - // filter name string (mime1 mime2) - // - // We can't be too sure that there will never be a parenthesis - // inside the filter string as this comes from the filter - // itself. The list part is safer, so we start from the end. - const string& line = *it; - string::size_type lastopen = line.find_last_of("("); - if (lastopen == string::npos) - continue; - string::size_type lastclose = line.find_last_of(")"); - if (lastclose == string::npos || lastclose <= lastopen + 1) - continue; - string smtypes = line.substr(lastopen+1, lastclose - lastopen - 1); - vector mtypes; - stringToTokens(smtypes, mtypes); - string filter = line.substr(0, lastopen); - trimstring(filter); - if (filter.empty()) - continue; + it != lines.end(); it++) { + // Lines from the file are like: + // + // filter name string (mime1 mime2) + // + // We can't be too sure that there will never be a parenthesis + // inside the filter string as this comes from the filter + // itself. The list part is safer, so we start from the end. + const string& line = *it; + string::size_type lastopen = line.find_last_of("("); + if (lastopen == string::npos) + continue; + string::size_type lastclose = line.find_last_of(")"); + if (lastclose == string::npos || lastclose <= lastopen + 1) + continue; + string smtypes = line.substr(lastopen+1, lastclose - lastopen - 1); + vector mtypes; + stringToTokens(smtypes, mtypes); + string filter = line.substr(0, lastopen); + trimstring(filter); + if (filter.empty()) + continue; - for (vector::const_iterator itt = mtypes.begin(); - itt != mtypes.end(); itt++) { - m_typesForMissing[filter].insert(*itt); - } + for (vector::const_iterator itt = mtypes.begin(); + itt != mtypes.end(); itt++) { + m_typesForMissing[filter].insert(*itt); + } } } // Helper for extracting a value from a map. static inline bool getKeyValue(const map& docdata, - const string& key, string& value) + const string& key, string& value) { auto it = docdata.find(key); if (it != docdata.end()) { - value = it->second; - LOGDEB2("getKeyValue: [" << key << "]->[" << value << "]\n"); - return true; + value = it->second; + LOGDEB2("getKeyValue: [" << key << "]->[" << value << "]\n"); + return true; } LOGDEB2("getKeyValue: no value for [" << key << "]\n"); return false; @@ -515,55 +531,55 @@ bool FileInterner::dijontorcl(Rcl::Doc& doc) { RecollFilter *df = m_handlers.back(); if (df == 0) { - //?? - LOGERR("FileInterner::dijontorcl: null top handler ??\n"); - return false; + //?? + LOGERR("FileInterner::dijontorcl: null top handler ??\n"); + return false; } for (const auto& ent : df->get_meta_data()) { - if (ent.first == cstr_dj_keycontent) { - doc.text = ent.second; - if (doc.fbytes.empty()) { - // It's normally set by walking the filter stack, in - // collectIpathAndMt, which was called before us. It - // can happen that the doc size is still empty at this - // point if the last container filter is directly - // returning text/plain content, so that there is no - // ipath-less filter at the top + if (ent.first == cstr_dj_keycontent) { + doc.text = ent.second; + if (doc.fbytes.empty()) { + // It's normally set by walking the filter stack, in + // collectIpathAndMt, which was called before us. It + // can happen that the doc size is still empty at this + // point if the last container filter is directly + // returning text/plain content, so that there is no + // ipath-less filter at the top lltodecstr(doc.text.length(), doc.fbytes); LOGDEB("FileInterner::dijontorcl: fbytes->" << doc.fbytes << endl); - } - } else if (ent.first == cstr_dj_keymd) { - doc.dmtime = ent.second; - } else if (ent.first == cstr_dj_keyanc) { - doc.haschildren = true; - } else if (ent.first == cstr_dj_keyorigcharset) { - doc.origcharset = ent.second; - } else if (ent.first == cstr_dj_keyfn) { - // Only if not set during the stack walk - const string *fnp = 0; - if (!doc.peekmeta(Rcl::Doc::keyfn, &fnp) || fnp->empty()) - doc.meta[Rcl::Doc::keyfn] = ent.second; - } else if (ent.first == cstr_dj_keymt || - ent.first == cstr_dj_keycharset) { - // don't need/want these. - } else { + } + } else if (ent.first == cstr_dj_keymd) { + doc.dmtime = ent.second; + } else if (ent.first == cstr_dj_keyanc) { + doc.haschildren = true; + } else if (ent.first == cstr_dj_keyorigcharset) { + doc.origcharset = ent.second; + } else if (ent.first == cstr_dj_keyfn) { + // Only if not set during the stack walk + const string *fnp = 0; + if (!doc.peekmeta(Rcl::Doc::keyfn, &fnp) || fnp->empty()) + doc.meta[Rcl::Doc::keyfn] = ent.second; + } else if (ent.first == cstr_dj_keymt || + ent.first == cstr_dj_keycharset) { + // don't need/want these. + } else { LOGDEB2("dijontorcl: " << m_cfg->fieldCanon(ent.first) << " -> " << ent.second << endl); - doc.addmeta(m_cfg->fieldCanon(ent.first), ent.second); - } + doc.addmeta(m_cfg->fieldCanon(ent.first), ent.second); + } } if (doc.meta[Rcl::Doc::keyabs].empty() && - !doc.meta[cstr_dj_keyds].empty()) { - doc.meta[Rcl::Doc::keyabs] = doc.meta[cstr_dj_keyds]; - doc.meta.erase(cstr_dj_keyds); + !doc.meta[cstr_dj_keyds].empty()) { + doc.meta[Rcl::Doc::keyabs] = doc.meta[cstr_dj_keyds]; + doc.meta.erase(cstr_dj_keyds); } return true; } const set nocopyfields{cstr_dj_keycontent, cstr_dj_keymd, cstr_dj_keyanc, cstr_dj_keyorigcharset, cstr_dj_keyfn, - cstr_dj_keymt, cstr_dj_keycharset, cstr_dj_keyds}; + cstr_dj_keymt, cstr_dj_keycharset, cstr_dj_keyds}; static void copymeta(const RclConfig *cfg,Rcl::Doc& doc, const RecollFilter* hp) { @@ -603,7 +619,7 @@ void FileInterner::collectIpathAndMT(Rcl::Doc& doc) const bool hasipath = false; if (!m_noxattrs) { - docFieldsFromXattrs(m_cfg, m_XAttrsFields, doc); + docFieldsFromXattrs(m_cfg, m_XAttrsFields, doc); } docFieldsFromMetaCmds(m_cfg, m_cmdFields, doc); @@ -614,9 +630,9 @@ void FileInterner::collectIpathAndMT(Rcl::Doc& doc) const string pathelprev; for (unsigned int i = 0; i < m_handlers.size(); i++) { - const map& docdata = m_handlers[i]->get_meta_data(); + const map& docdata = m_handlers[i]->get_meta_data(); string ipathel; - getKeyValue(docdata, cstr_dj_keyipath, ipathel); + getKeyValue(docdata, cstr_dj_keyipath, ipathel); if (!ipathel.empty()) { // Non-empty ipath. This stack element is for an // actual embedded document, not a format translation. @@ -624,7 +640,7 @@ void FileInterner::collectIpathAndMT(Rcl::Doc& doc) const doc.ipath += colon_hide(ipathel) + cstr_isep; getKeyValue(docdata, cstr_dj_keymt, doc.mimetype); getKeyValue(docdata, cstr_dj_keyfn, doc.meta[Rcl::Doc::keyfn]); - } else { + } else { // We copy all the metadata from the topmost actual // document: either the first if it has no ipath, or the // last one with an ipath (before pure format @@ -656,7 +672,7 @@ void FileInterner::collectIpathAndMT(Rcl::Doc& doc) const if (hasipath) { // Trim ending ipath separator - LOGDEB2("IPATH [" << doc.ipath << "]\n"); + LOGDEB2("IPATH [" << doc.ipath << "]\n"); if (doc.ipath.back() == cstr_isep[0]) { doc.ipath.erase(doc.ipath.end()-1); } @@ -667,11 +683,11 @@ void FileInterner::collectIpathAndMT(Rcl::Doc& doc) const void FileInterner::popHandler() { if (m_handlers.empty()) - return; + return; size_t i = m_handlers.size() - 1; if (m_tmpflgs[i]) { - m_tempfiles.pop_back(); - m_tmpflgs[i] = false; + m_tempfiles.pop_back(); + m_tmpflgs[i] = false; } returnMimeHandler(m_handlers.back()); m_handlers.pop_back(); @@ -695,18 +711,18 @@ int FileInterner::addHandler() // general), we're done decoding. If we hit text/plain, we're done // in any case if (!stringicmp(mimetype, m_targetMType) || - !stringicmp(mimetype, cstr_textplain)) { - m_reachedMType = mimetype; - LOGDEB1("FileInterner::addHandler: target reached\n"); - return ADD_BREAK; + !stringicmp(mimetype, cstr_textplain)) { + m_reachedMType = mimetype; + LOGDEB1("FileInterner::addHandler: target reached\n"); + return ADD_BREAK; } // We need to stack another handler. Check stack size if (m_handlers.size() >= MAXHANDLERS) { - // Stack too big. Skip this and go on to check if there is - // something else in the current back() - LOGERR("FileInterner::addHandler: stack too high\n"); - return ADD_CONTINUE; + // Stack too big. Skip this and go on to check if there is + // something else in the current back() + LOGERR("FileInterner::addHandler: stack too high\n"); + return ADD_CONTINUE; } // We must not filter out HTML when it is an intermediate @@ -721,49 +737,49 @@ int FileInterner::addHandler() (mimetype.compare(cstr_texthtml) || !ipathel.empty()); RecollFilter *newflt = getMimeHandler(mimetype, m_cfg, dofilter); if (!newflt) { - // If we can't find a handler, this doc can't be handled - // but there can be other ones so we go on - LOGINFO("FileInterner::addHandler: no filter for [" << mimetype << + // If we can't find a handler, this doc can't be handled + // but there can be other ones so we go on + LOGINFO("FileInterner::addHandler: no filter for [" << mimetype << "]\n"); - return ADD_CONTINUE; + return ADD_CONTINUE; } newflt->set_property(Dijon::Filter::OPERATING_MODE, - m_forPreview ? "view" : "index"); + m_forPreview ? "view" : "index"); if (!charset.empty()) - newflt->set_property(Dijon::Filter::DEFAULT_CHARSET, charset); + newflt->set_property(Dijon::Filter::DEFAULT_CHARSET, charset); // Get current content: we don't use getkeyvalue() here to avoid // copying the text, which may be big. string ns; const string *txt = &ns; { - map::const_iterator it; - it = docdata.find(cstr_dj_keycontent); - if (it != docdata.end()) - txt = &it->second; + map::const_iterator it; + it = docdata.find(cstr_dj_keycontent); + if (it != docdata.end()) + txt = &it->second; } bool setres = false; newflt->set_docsize(txt->length()); if (newflt->is_data_input_ok(Dijon::Filter::DOCUMENT_STRING)) { - setres = newflt->set_document_string(mimetype, *txt); + setres = newflt->set_document_string(mimetype, *txt); } else if (newflt->is_data_input_ok(Dijon::Filter::DOCUMENT_DATA)) { - setres = newflt->set_document_data(mimetype,txt->c_str(),txt->length()); + setres = newflt->set_document_data(mimetype,txt->c_str(),txt->length()); } else if (newflt->is_data_input_ok(Dijon::Filter::DOCUMENT_FILE_NAME)) { - TempFile temp = dataToTempFile(*txt, mimetype); - if (temp.ok() && - (setres = newflt->set_document_file(mimetype, temp.filename()))) { - m_tmpflgs[m_handlers.size()] = true; - m_tempfiles.push_back(temp); - // Hack here, but really helps perfs: if we happen to - // create a temp file for, ie, an image attachment, keep - // it around for preview to use it through get_imgtmp() - if (!mimetype.compare(0, 6, "image/")) { - m_imgtmp = m_tempfiles.back(); - } - } + TempFile temp = dataToTempFile(*txt, mimetype); + if (temp.ok() && + (setres = newflt->set_document_file(mimetype, temp.filename()))) { + m_tmpflgs[m_handlers.size()] = true; + m_tempfiles.push_back(temp); + // Hack here, but really helps perfs: if we happen to + // create a temp file for, ie, an image attachment, keep + // it around for preview to use it through get_imgtmp() + if (!mimetype.compare(0, 6, "image/")) { + m_imgtmp = m_tempfiles.back(); + } + } } if (!setres) { - LOGINFO("FileInterner::addHandler: set_doc failed inside [" << m_fn << + LOGINFO("FileInterner::addHandler: set_doc failed inside [" << m_fn << "] for mtype " << mimetype << "\n"); } // Add handler and go on, maybe this one will give us text... @@ -791,22 +807,22 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc,const string& ipath) m_imgtmp = TempFile(); if (m_handlers.size() < 1) { - // Just means the constructor failed - LOGDEB("FileInterner::internfile: no handler: constructor failed\n"); - return FIError; + // Just means the constructor failed + LOGDEB("FileInterner::internfile: no handler: constructor failed\n"); + return FIError; } // Input Ipath vector when retrieving a given subdoc for previewing vector vipath; if (!ipath.empty() && !m_direct) { - stringToTokens(ipath, vipath, cstr_isep, true); - for (auto& entry: vipath) { - entry = colon_restore(entry); - } - if (!m_handlers.back()->skip_to_document(vipath[m_handlers.size()-1])){ - LOGERR("FileInterner::internfile: can't skip\n"); - return FIError; - } + stringToTokens(ipath, vipath, cstr_isep, true); + for (auto& entry: vipath) { + entry = colon_restore(entry); + } + if (!m_handlers.back()->skip_to_document(vipath[m_handlers.size()-1])){ + LOGERR("FileInterner::internfile: can't skip\n"); + return FIError; + } } // Try to get doc from the topmost handler @@ -819,88 +835,88 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc,const string& ipath) int loop = 0; while (!m_handlers.empty()) { CancelCheck::instance().checkCancel(); - if (loop++ > 1000) { - LOGERR("FileInterner:: looping!\n"); - return FIError; - } - // If there are no more docs at the current top level we pop and - // see if there is something at the previous one - if (!m_handlers.back()->has_documents()) { + if (loop++ > 1000) { + LOGERR("FileInterner:: looping!\n"); + return FIError; + } + // If there are no more docs at the current top level we pop and + // see if there is something at the previous one + if (!m_handlers.back()->has_documents()) { // If looking for a specific doc, this is an error. Happens if // the index is stale, and the ipath points to the wrong message // for exemple (one with less attachments) - if (m_forPreview) { + if (m_forPreview) { m_reason += "Requested document does not exist. "; m_reason += m_handlers.back()->get_error(); LOGERR("FileInterner: requested document does not exist\n"); - return FIError; + return FIError; } - popHandler(); - continue; - } + popHandler(); + continue; + } - // While indexing, don't stop on next_document() error. There - // might be ie an error while decoding an attachment, but we - // still want to process the rest of the mbox! For preview: fatal. - if (!m_handlers.back()->next_document()) { + // While indexing, don't stop on next_document() error. There + // might be ie an error while decoding an attachment, but we + // still want to process the rest of the mbox! For preview: fatal. + if (!m_handlers.back()->next_document()) { // Using a temp doc here because else we'd need to pop the // last ipath element when we do the pophandler (else the // ipath continues to grow in the current doc with each // consecutive error). It would be better to have // something like ipath.pop(). We do need the MIME type Rcl::Doc doc1 = doc; - processNextDocError(doc1); + processNextDocError(doc1); doc.mimetype = doc1.mimetype; - if (m_forPreview) { + if (m_forPreview) { m_reason += "Requested document does not exist. "; m_reason += m_handlers.back()->get_error(); LOGERR("FileInterner: requested document does not exist\n"); - return FIError; + return FIError; } - popHandler(); - continue; - } + popHandler(); + continue; + } - // Look at the type for the next document and possibly add - // handler to stack. - switch (addHandler()) { - case ADD_OK: // Just go through: handler has been stacked, use it - LOGDEB2("addHandler returned OK\n"); - break; - case ADD_CONTINUE: - // forget this doc and retrieve next from current handler - // (ipath stays same) - LOGDEB2("addHandler returned CONTINUE\n"); - continue; - case ADD_BREAK: - // Stop looping: doc type ok, need complete its processing - // and return it - LOGDEB2("addHandler returned BREAK\n"); - goto breakloop; // when you have to you have to - case ADD_ERROR: - LOGDEB2("addHandler returned ERROR\n"); - return FIError; - } + // Look at the type for the next document and possibly add + // handler to stack. + switch (addHandler()) { + case ADD_OK: // Just go through: handler has been stacked, use it + LOGDEB2("addHandler returned OK\n"); + break; + case ADD_CONTINUE: + // forget this doc and retrieve next from current handler + // (ipath stays same) + LOGDEB2("addHandler returned CONTINUE\n"); + continue; + case ADD_BREAK: + // Stop looping: doc type ok, need complete its processing + // and return it + LOGDEB2("addHandler returned BREAK\n"); + goto breakloop; // when you have to you have to + case ADD_ERROR: + LOGDEB2("addHandler returned ERROR\n"); + return FIError; + } - // If we have an ipath, meaning that we are seeking a specific - // document (ie: previewing a search result), we may have to - // seek to the correct entry of a compound doc (ie: archive or - // mail). When we are out of ipath entries, we stop seeking, - // the handlers stack may still grow for translation (ie: if - // the target doc is msword, we'll still stack the - // word-to-text translator). - if (!ipath.empty()) { - if (m_handlers.size() <= vipath.size() && - !m_handlers.back()->skip_to_document(vipath[m_handlers.size()-1])) { - LOGERR("FileInterner::internfile: can't skip\n"); - return FIError; - } - } + // If we have an ipath, meaning that we are seeking a specific + // document (ie: previewing a search result), we may have to + // seek to the correct entry of a compound doc (ie: archive or + // mail). When we are out of ipath entries, we stop seeking, + // the handlers stack may still grow for translation (ie: if + // the target doc is msword, we'll still stack the + // word-to-text translator). + if (!ipath.empty()) { + if (m_handlers.size() <= vipath.size() && + !m_handlers.back()->skip_to_document(vipath[m_handlers.size()-1])) { + LOGERR("FileInterner::internfile: can't skip\n"); + return FIError; + } + } } - breakloop: +breakloop: if (m_handlers.empty()) { - LOGDEB("FileInterner::internfile: conversion ended with no doc\n"); - return FIError; + LOGDEB("FileInterner::internfile: conversion ended with no doc\n"); + return FIError; } // Compute ipath and significant mimetype. ipath is returned @@ -913,7 +929,7 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc,const string& ipath) // previewing too collectIpathAndMT(doc); if (m_forPreview) { - doc.mimetype = m_reachedMType; + doc.mimetype = m_reachedMType; } // Keep this AFTER collectIpathAndMT dijontorcl(doc); @@ -921,19 +937,19 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc,const string& ipath) // Possibly destack so that we can test for FIDone. While doing this // possibly set aside an ancestor html text (for the GUI preview) while (!m_handlers.empty() && !m_handlers.back()->has_documents()) { - if (m_forPreview) { - MimeHandlerHtml *hth = - dynamic_cast(m_handlers.back()); - if (hth) { - m_html = hth->get_html(); - } - } - popHandler(); + if (m_forPreview) { + MimeHandlerHtml *hth = + dynamic_cast(m_handlers.back()); + if (hth) { + m_html = hth->get_html(); + } + } + popHandler(); } if (m_handlers.empty()) - return FIDone; + return FIDone; else - return FIAgain; + return FIAgain; } bool FileInterner::tempFileForMT(TempFile& otemp, RclConfig* cnf, @@ -958,7 +974,7 @@ bool FileInterner::idocToFile( if (idoc.ipath.empty()) { // Because of the mandatory first conversion in the // FileInterner constructor, need to use a specific method. - return topdocToFile(otemp, tofile, cnf, idoc, uncompress); + return topdocToFile(otemp, tofile, cnf, idoc, uncompress); } // We set FIF_forPreview for consistency with the previous version @@ -976,8 +992,8 @@ bool FileInterner::topdocToFile( TempFile& otemp, const string& tofile, RclConfig *cnf, const Rcl::Doc& idoc, bool uncompress) { - DocFetcher *fetcher = docFetcherMake(cnf, idoc); - if (fetcher == 0) { + std::unique_ptr fetcher(docFetcherMake(cnf, idoc)); + if (!fetcher) { LOGERR("FileInterner::topdocToFile no backend\n"); return false; } @@ -1031,17 +1047,17 @@ bool FileInterner::topdocToFile( } bool FileInterner::interntofile(TempFile& otemp, const string& tofile, - const string& ipath, const string& mimetype) + const string& ipath, const string& mimetype) { if (!ok()) { - LOGERR("FileInterner::interntofile: constructor failed\n"); - return false; + LOGERR("FileInterner::interntofile: constructor failed\n"); + return false; } Rcl::Doc doc; Status ret = internfile(doc, ipath); if (ret == FileInterner::FIError) { - LOGERR("FileInterner::interntofile: internfile() failed\n"); - return false; + LOGERR("FileInterner::interntofile: internfile() failed\n"); + return false; } // Specialcase text/html. This is to work around a bug that will @@ -1062,18 +1078,18 @@ bool FileInterner::interntofile(TempFile& otemp, const string& tofile, if (!tempFileForMT(temp, m_cfg, mimetype)) { return false; } - filename = temp.filename(); + filename = temp.filename(); } else { - filename = tofile.c_str(); + filename = tofile.c_str(); } string reason; if (!stringtofile(doc.text, filename, reason)) { - LOGERR("FileInterner::interntofile: stringtofile : " << reason << "\n"); - return false; + LOGERR("FileInterner::interntofile: stringtofile : " << reason << "\n"); + return false; } if (tofile.empty()) - otemp = temp; + otemp = temp; return true; } @@ -1151,117 +1167,3 @@ bool FileInterner::maybeUncompressToTemp(TempFile& temp, const string& fn, } return true; } - -#else - -#include -#include -#include -#include -#include "safesysstat.h" - -using namespace std; - -#include "log.h" - -#include "rclinit.h" -#include "internfile.h" -#include "rclconfig.h" -#include "rcldoc.h" - -static string thisprog; - -static string usage = - " internfile [ipath]\n" - " \n\n" - ; - -static void -Usage(void) -{ - cerr << thisprog << ": usage:\n" << usage; - exit(1); -} - -static int op_flags; -#define OPT_q 0x1 - -RclConfig *config; -int main(int argc, char **argv) -{ - thisprog = argv[0]; - argc--; argv++; - - while (argc > 0 && **argv == '-') { - (*argv)++; - if (!(**argv)) - /* Cas du "adb - core" */ - Usage(); - while (**argv) - switch (*(*argv)++) { - default: Usage(); break; - } - argc--; argv++; - } - DebugLog::getdbl()->setloglevel(DEBDEB1); - DebugLog::setfilename("stderr"); - - if (argc < 1) - Usage(); - string fn(*argv++); - argc--; - string ipath; - if (argc >= 1) { - ipath.append(*argv++); - argc--; - } - string reason; - config = recollinit(0, 0, 0, reason); - - if (config == 0 || !config->ok()) { - string str = "Configuration problem: "; - str += reason; - fprintf(stderr, "%s\n", str.c_str()); - exit(1); - } - struct stat st; - if (stat(fn.c_str(), &st)) { - perror("stat"); - exit(1); - } - FileInterner interner(fn, &st, config, 0); - Rcl::Doc doc; - FileInterner::Status status = interner.internfile(doc, ipath); - switch (status) { - case FileInterner::FIDone: - case FileInterner::FIAgain: - break; - case FileInterner::FIError: - default: - fprintf(stderr, "internfile failed\n"); - exit(1); - } - - cout << "doc.url [[[[" << doc.url << - "]]]]\n-----------------------------------------------------\n" << - "doc.ipath [[[[" << doc.ipath << - "]]]]\n-----------------------------------------------------\n" << - "doc.mimetype [[[[" << doc.mimetype << - "]]]]\n-----------------------------------------------------\n" << - "doc.fmtime [[[[" << doc.fmtime << - "]]]]\n-----------------------------------------------------\n" << - "doc.dmtime [[[[" << doc.dmtime << - "]]]]\n-----------------------------------------------------\n" << - "doc.origcharset [[[[" << doc.origcharset << - "]]]]\n-----------------------------------------------------\n" << - "doc.meta[title] [[[[" << doc.meta["title"] << - "]]]]\n-----------------------------------------------------\n" << - "doc.meta[keywords] [[[[" << doc.meta["keywords"] << - "]]]]\n-----------------------------------------------------\n" << - "doc.meta[abstract] [[[[" << doc.meta["abstract"] << - "]]]]\n-----------------------------------------------------\n" << - "doc.text [[[[" << doc.text << "]]]]\n"; -} - -#endif // TEST_INTERNFILE - diff --git a/src/internfile/internfile.h b/src/internfile/internfile.h index 22bb5021..21543125 100644 --- a/src/internfile/internfile.h +++ b/src/internfile/internfile.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2004 J.F.Dockes +/* Copyright (C) 2004-2019 J.F.Dockes * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -33,7 +33,7 @@ using std::set; class RclConfig; namespace Rcl { - class Doc; +class Doc; } class Uncomp; @@ -51,9 +51,9 @@ public: FIMissingStore(const string& in); virtual ~FIMissingStore() {} virtual void addMissing(const string& prog, const string& mt) - { - m_typesForMissing[prog].insert(mt); - } + { + m_typesForMissing[prog].insert(mt); + } // Get simple progs list string virtual void getMissingExternal(string& out); // Get progs + assoc mtypes description string @@ -87,7 +87,7 @@ public: * */ class FileInterner { - public: +public: /** Operation modifier flags */ enum Flags {FIF_none, FIF_forPreview, FIF_doUseInputMimetype}; /** Return values for internfile() */ @@ -115,7 +115,7 @@ class FileInterner { * mime type for the uncompressed version. */ FileInterner(const string &fn, const struct stat *stp, - RclConfig *cnf, int flags, const string *mtype = 0); + RclConfig *cnf, int flags, const string *mtype = 0); /** * Alternate constructor for the case where the data is in memory. @@ -138,9 +138,8 @@ class FileInterner { ~FileInterner(); - void setMissingStore(FIMissingStore *st) - { - m_missingdatap = st; + void setMissingStore(FIMissingStore *st) { + m_missingdatap = st; } /** @@ -160,9 +159,9 @@ class FileInterner { Status internfile(Rcl::Doc& doc, const string &ipath = ""); /** Extract subdoc defined by ipath in idoc to file. See params for - idocToFile() */ + idocToFile() */ bool interntofile(TempFile& otemp, const string& tofile, - const string& ipath, const string& mimetype); + const string& ipath, const string& mimetype); /** Return the file's (top level object) mimetype (useful for * creating the pseudo-doc for container files) @@ -181,17 +180,17 @@ class FileInterner { const string& get_html() {return m_html;} /** If we happen to be processing an image file and need a temp file, - we keep it around to save work for our caller, which can get it here */ + we keep it around to save work for our caller, which can get it here */ TempFile get_imgtmp() {return m_imgtmp;} const string& getReason() const - { - return m_reason; - } + { + return m_reason; + } bool ok() const - { - return m_ok; - } + { + return m_ok; + } /** * Get UDI for immediate parent for document. @@ -234,7 +233,7 @@ class FileInterner { * anything for a top level document. */ static bool idocToFile(TempFile& temp, const string& tofile, - RclConfig *cnf, const Rcl::Doc& doc, + RclConfig *cnf, const Rcl::Doc& doc, bool uncompress = true); /** Does file appear to be the compressed version of a document? */ @@ -248,7 +247,14 @@ class FileInterner { static bool maybeUncompressToTemp(TempFile& temp, const string& fn, RclConfig *cnf, const Rcl::Doc& doc); - private: + /** Try to get a top level reason after an operation failed. This + * is just for "simple" issues, like file missing, permissions, + * etc. */ + enum ErrorPossibleCause{FetchMissing, FetchPerm, FetchNoBackend, + InternfileOther}; + static ErrorPossibleCause tryGetReason(RclConfig *, const Rcl::Doc&); + +private: static const unsigned int MAXHANDLERS = 20; RclConfig *m_cfg; string m_fn; @@ -287,7 +293,7 @@ class FileInterner { void init(const string &fn, const struct stat *stp, RclConfig *cnf, int flags, const string *mtype = 0); void init(const string &data, RclConfig *cnf, int flags, - const string& mtype); + const string& mtype); void initcommon(RclConfig *cnf, int flags); bool dijontorcl(Rcl::Doc&); @@ -298,7 +304,7 @@ class FileInterner { void checkExternalMissing(const string& msg, const string& mt); void processNextDocError(Rcl::Doc &doc); static bool tempFileForMT(TempFile& otemp, RclConfig *cnf, - const std::string& mimetype); + const std::string& mimetype); static bool topdocToFile(TempFile& otemp, const std::string& tofile, RclConfig *cnf, const Rcl::Doc& idoc, bool uncompress); diff --git a/src/qtgui/preview_load.cpp b/src/qtgui/preview_load.cpp index 0a91ccf2..b8b6b009 100644 --- a/src/qtgui/preview_load.cpp +++ b/src/qtgui/preview_load.cpp @@ -62,6 +62,7 @@ void LoadThread::run() } else { fdoc.mimetype = interner.getMimetype(); mst.getMissingExternal(missing); + explain = FileInterner::tryGetReason(&m_config, m_idoc); status = -1; } } catch (CancelExcept) { diff --git a/src/qtgui/preview_load.h b/src/qtgui/preview_load.h index 91a55630..63b9a0e6 100644 --- a/src/qtgui/preview_load.h +++ b/src/qtgui/preview_load.h @@ -25,6 +25,7 @@ #include "pathut.h" #include "rclutil.h" #include "rclconfig.h" +#include "internfile.h" /* * A thread to perform the file reading / format conversion work for preview @@ -48,7 +49,8 @@ public: Rcl::Doc fdoc; TempFile tmpimg; std::string missing; - + FileInterner::ErrorPossibleCause explain{FileInterner::InternfileOther}; + private: Rcl::Doc m_idoc; bool m_previewHtml; diff --git a/src/qtgui/preview_w.cpp b/src/qtgui/preview_w.cpp index 6613394d..00d0d681 100644 --- a/src/qtgui/preview_w.cpp +++ b/src/qtgui/preview_w.cpp @@ -643,7 +643,6 @@ bool Preview::loadDocInCurrentTab(const Rcl::Doc &idoc, int docnum) if (CancelCheck::instance().cancelState()) return false; if (lthr.status != 0) { - progress.close(); QString explain; if (!lthr.missing.empty()) { explain = QString::fromUtf8("
") + @@ -655,13 +654,40 @@ bool Preview::loadDocInCurrentTab(const Rcl::Doc &idoc, int docnum) lthr.fdoc.mimetype.c_str() + explain); } else { if (progress.wasCanceled()) { - //QMessageBox::warning(0, "Recoll", tr("Canceled")); + QMessageBox::warning(0, "Recoll", tr("Canceled")); } else { - QMessageBox::warning(0, "Recoll", - tr("Error while loading file")); + progress.close(); + // Note that we can't easily check for a readable file + // because it's possible that only a region is locked + // (e.g. on Windows for an ost file the first block is + // readable even if Outlook is running). + QString msg; + switch (lthr.explain) { + case FileInterner::FetchMissing: + msg = tr("Error loading the document: file missing"); + break; + case FileInterner::FetchPerm: + msg = tr("Error loading the document: no permission"); + break; + case FileInterner::FetchNoBackend: + msg = + tr("Error loading the document: backend not configured"); + break; + case FileInterner::InternfileOther: +#ifdef _WIN32 + msg = tr("Error loading the document: other handler error"); +#else + msg = tr("Error loading the document: " + "other handler error
" + "Maybe the application is locking the file ?"); +#endif + break; + } + QMessageBox::warning(0, "Recoll", msg); } } + progress.close(); return false; } // Reset config just in case. @@ -894,7 +920,7 @@ PreviewTextEdit::PreviewTextEdit(QWidget* parent, const char* nm, Preview *pv) void PreviewTextEdit::onAnchorClicked(const QUrl& url) { LOGDEB("PreviewTextEdit::onAnchorClicked: " << qs2utf8s(url.toString()) - << std::endl); + << std::endl); if (prefs.previewActiveLinks && m_preview->m_rclmain) { Rcl::Doc doc; doc.url = qs2utf8s(url.toString()).c_str(); @@ -1016,4 +1042,3 @@ void PreviewTextEdit::print() QTextEdit::print(&printer); #endif } - diff --git a/src/testmains/trinternfile.cpp b/src/testmains/trinternfile.cpp new file mode 100644 index 00000000..0511455f --- /dev/null +++ b/src/testmains/trinternfile.cpp @@ -0,0 +1,129 @@ +/* Copyright (C) 2017-2019 J.F.Dockes + * + * License: GPL 2.1 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + + +#include +#include +#include +#include +#include "safesysstat.h" + +using namespace std; + +#include "log.h" + +#include "rclinit.h" +#include "internfile.h" +#include "rclconfig.h" +#include "rcldoc.h" + +static string thisprog; + +static string usage = + " internfile [ipath]\n" + " \n\n" + ; + +static void +Usage(void) +{ + cerr << thisprog << ": usage:\n" << usage; + exit(1); +} + +static int op_flags; +#define OPT_q 0x1 + +RclConfig *config; +int main(int argc, char **argv) +{ + thisprog = argv[0]; + argc--; argv++; + + while (argc > 0 && **argv == '-') { + (*argv)++; + if (!(**argv)) + /* Cas du "adb - core" */ + Usage(); + while (**argv) + switch (*(*argv)++) { + default: Usage(); break; + } + argc--; argv++; + } + DebugLog::getdbl()->setloglevel(DEBDEB1); + DebugLog::setfilename("stderr"); + + if (argc < 1) + Usage(); + string fn(*argv++); + argc--; + string ipath; + if (argc >= 1) { + ipath.append(*argv++); + argc--; + } + string reason; + config = recollinit(0, 0, 0, reason); + + if (config == 0 || !config->ok()) { + string str = "Configuration problem: "; + str += reason; + fprintf(stderr, "%s\n", str.c_str()); + exit(1); + } + struct stat st; + if (stat(fn.c_str(), &st)) { + perror("stat"); + exit(1); + } + FileInterner interner(fn, &st, config, 0); + Rcl::Doc doc; + FileInterner::Status status = interner.internfile(doc, ipath); + switch (status) { + case FileInterner::FIDone: + case FileInterner::FIAgain: + break; + case FileInterner::FIError: + default: + fprintf(stderr, "internfile failed\n"); + exit(1); + } + + cout << "doc.url [[[[" << doc.url << + "]]]]\n-----------------------------------------------------\n" << + "doc.ipath [[[[" << doc.ipath << + "]]]]\n-----------------------------------------------------\n" << + "doc.mimetype [[[[" << doc.mimetype << + "]]]]\n-----------------------------------------------------\n" << + "doc.fmtime [[[[" << doc.fmtime << + "]]]]\n-----------------------------------------------------\n" << + "doc.dmtime [[[[" << doc.dmtime << + "]]]]\n-----------------------------------------------------\n" << + "doc.origcharset [[[[" << doc.origcharset << + "]]]]\n-----------------------------------------------------\n" << + "doc.meta[title] [[[[" << doc.meta["title"] << + "]]]]\n-----------------------------------------------------\n" << + "doc.meta[keywords] [[[[" << doc.meta["keywords"] << + "]]]]\n-----------------------------------------------------\n" << + "doc.meta[abstract] [[[[" << doc.meta["abstract"] << + "]]]]\n-----------------------------------------------------\n" << + "doc.text [[[[" << doc.text << "]]]]\n"; +}