This commit is contained in:
Jean-Francois Dockes 2020-07-15 10:47:18 +02:00
parent 3cf7fb3b65
commit 96ba5acd32
14 changed files with 505 additions and 533 deletions

View File

@ -1,18 +0,0 @@
PROGS = subtreelist mimetype
all: $(PROGS)
SUBTREELIST_OBJS= subtreelist.o
subtreelist : $(SUBTREELIST_OBJS)
$(CXX) $(ALL_CXXFLAGS) -o subtreelist $(SUBTREELIST_OBJS) \
$(LIBRECOLL)
subtreelist.o : subtreelist.cpp
$(CXX) $(ALL_CXXFLAGS) -DTEST_SUBTREELIST -c subtreelist.cpp
MIMETYPE_OBJS= trmimetype.o
mimetype : $(MIMETYPE_OBJS)
$(CXX) $(ALL_CXXFLAGS) -o mimetype $(MIMETYPE_OBJS) $(LIBRECOLL)
trmimetype.o : mimetype.cpp
$(CXX) $(ALL_CXXFLAGS) -DTEST_MIMETYPE -c -o trmimetype.o \
mimetype.cpp
include ../utils/utmkdefs.mk

View File

@ -35,17 +35,17 @@ std::unique_ptr<DocFetcher> docFetcherMake(RclConfig *config,
string backend; string backend;
idoc.getmeta(Rcl::Doc::keybcknd, &backend); idoc.getmeta(Rcl::Doc::keybcknd, &backend);
if (backend.empty() || !backend.compare("FS")) { if (backend.empty() || !backend.compare("FS")) {
return std::unique_ptr<DocFetcher>(new FSDocFetcher); return std::unique_ptr<DocFetcher>(new FSDocFetcher);
#ifndef DISABLE_WEB_INDEXER #ifndef DISABLE_WEB_INDEXER
} else if (!backend.compare("BGL")) { } else if (!backend.compare("BGL")) {
return std::unique_ptr<DocFetcher>(new WQDocFetcher); return std::unique_ptr<DocFetcher>(new WQDocFetcher);
#endif #endif
} else { } else {
std::unique_ptr<DocFetcher> f(exeDocFetcherMake(config, backend)); std::unique_ptr<DocFetcher> f(exeDocFetcherMake(config, backend));
if (!f) { if (!f) {
LOGERR("DocFetcherFactory: unknown backend [" << backend << "]\n"); LOGERR("DocFetcherFactory: unknown backend [" << backend << "]\n");
} }
return f; return f;
} }
} }

View File

@ -23,11 +23,11 @@
// Current status of an indexing operation. This is updated in // Current status of an indexing operation. This is updated in
// $RECOLL_CONFDIR/idxstatus.txt // $RECOLL_CONFDIR/idxstatus.txt
class DbIxStatus { class DbIxStatus {
public: public:
enum Phase {DBIXS_NONE, enum Phase {DBIXS_NONE,
DBIXS_FILES, DBIXS_PURGE, DBIXS_STEMDB, DBIXS_CLOSING, DBIXS_FILES, DBIXS_PURGE, DBIXS_STEMDB, DBIXS_CLOSING,
DBIXS_MONITOR, DBIXS_MONITOR,
DBIXS_DONE}; DBIXS_DONE};
Phase phase; Phase phase;
std::string fn; // Last file processed std::string fn; // Last file processed
int docsdone; // Documents actually updated int docsdone; // Documents actually updated
@ -43,9 +43,9 @@ class DbIxStatus {
bool hasmonitor{false}; bool hasmonitor{false};
void reset() { void reset() {
phase = DBIXS_FILES; phase = DBIXS_FILES;
fn.erase(); fn.erase();
docsdone = filesdone = fileerrors = dbtotdocs = totfiles = 0; docsdone = filesdone = fileerrors = dbtotdocs = totfiles = 0;
} }
DbIxStatus() {reset();} DbIxStatus() {reset();}
}; };

View File

@ -117,9 +117,9 @@ ConfIndexer::ConfIndexer(RclConfig *cnf, DbIxStatusUpdater *updfunc)
ConfIndexer::~ConfIndexer() ConfIndexer::~ConfIndexer()
{ {
deleteZ(m_fsindexer); deleteZ(m_fsindexer);
#ifndef DISABLE_WEB_INDEXER #ifndef DISABLE_WEB_INDEXER
deleteZ(m_webindexer); deleteZ(m_webindexer);
#endif #endif
} }
@ -132,14 +132,14 @@ bool ConfIndexer::runFirstIndexing()
{ {
// Indexing status file existing and not empty ? // Indexing status file existing and not empty ?
if (path_filesize(m_config->getIdxStatusFile()) > 0) { if (path_filesize(m_config->getIdxStatusFile()) > 0) {
LOGDEB0("ConfIndexer::runFirstIndexing: no: status file not empty\n"); LOGDEB0("ConfIndexer::runFirstIndexing: no: status file not empty\n");
return false; return false;
} }
// And only do this if the user has kept the default topdirs (~). // And only do this if the user has kept the default topdirs (~).
vector<string> tdl = m_config->getTopdirs(); vector<string> tdl = m_config->getTopdirs();
if (tdl.size() != 1 || tdl[0].compare(path_canon(path_tildexpand("~")))) { if (tdl.size() != 1 || tdl[0].compare(path_canon(path_tildexpand("~")))) {
LOGDEB0("ConfIndexer::runFirstIndexing: no: not home only\n"); LOGDEB0("ConfIndexer::runFirstIndexing: no: not home only\n");
return false; return false;
} }
return true; return true;
} }
@ -150,7 +150,7 @@ bool ConfIndexer::firstFsIndexingSequence()
deleteZ(m_fsindexer); deleteZ(m_fsindexer);
m_fsindexer = new FsIndexer(m_config, &m_db, m_updater); m_fsindexer = new FsIndexer(m_config, &m_db, m_updater);
if (!m_fsindexer) { if (!m_fsindexer) {
return false; return false;
} }
int flushmb = m_db.getFlushMb(); int flushmb = m_db.getFlushMb();
m_db.setFlushMb(2); m_db.setFlushMb(2);
@ -164,17 +164,17 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun, int flags)
{ {
Rcl::Db::OpenMode mode = resetbefore ? Rcl::Db::DbTrunc : Rcl::Db::DbUpd; Rcl::Db::OpenMode mode = resetbefore ? Rcl::Db::DbTrunc : Rcl::Db::DbUpd;
if (!m_db.open(mode)) { if (!m_db.open(mode)) {
LOGERR("ConfIndexer: error opening database " << m_config->getDbDir() << LOGERR("ConfIndexer: error opening database " << m_config->getDbDir() <<
" : " << m_db.getReason() << "\n"); " : " << m_db.getReason() << "\n");
addIdxReason("indexer", m_db.getReason()); addIdxReason("indexer", m_db.getReason());
return false; return false;
} }
m_config->setKeyDir(cstr_null); m_config->setKeyDir(cstr_null);
if (typestorun & IxTFs) { if (typestorun & IxTFs) {
if (runFirstIndexing()) { if (runFirstIndexing()) {
firstFsIndexingSequence(); firstFsIndexingSequence();
} }
deleteZ(m_fsindexer); deleteZ(m_fsindexer);
m_fsindexer = new FsIndexer(m_config, &m_db, m_updater); m_fsindexer = new FsIndexer(m_config, &m_db, m_updater);
if (!m_fsindexer || !m_fsindexer->index(flags)) { if (!m_fsindexer || !m_fsindexer->index(flags)) {
@ -183,7 +183,7 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun, int flags)
} else { } else {
addIdxReason("indexer", "Index creation failed. See log."); addIdxReason("indexer", "Index creation failed. See log.");
} }
m_db.close(); m_db.close();
return false; return false;
} }
} }
@ -193,7 +193,7 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun, int flags)
deleteZ(m_webindexer); deleteZ(m_webindexer);
m_webindexer = new WebQueueIndexer(m_config, &m_db, m_updater); m_webindexer = new WebQueueIndexer(m_config, &m_db, m_updater);
if (!m_webindexer || !m_webindexer->index()) { if (!m_webindexer || !m_webindexer->index()) {
m_db.close(); m_db.close();
addIdxReason("indexer", "Web index creation failed. See log"); addIdxReason("indexer", "Web index creation failed. See log");
return false; return false;
} }
@ -203,10 +203,10 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun, int flags)
// Get rid of all database entries that don't exist in the // Get rid of all database entries that don't exist in the
// filesystem anymore. Only if all *configured* indexers ran. // filesystem anymore. Only if all *configured* indexers ran.
if (m_updater && !m_updater->update(DbIxStatus::DBIXS_PURGE, "")) { if (m_updater && !m_updater->update(DbIxStatus::DBIXS_PURGE, "")) {
m_db.close(); m_db.close();
addIdxReason("indexer", "Index purge failed. See log"); addIdxReason("indexer", "Index purge failed. See log");
return false; return false;
} }
m_db.purge(); m_db.purge();
} }
@ -214,22 +214,22 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun, int flags)
// here. Makes no sense to check for cancel, we'll have to close // here. Makes no sense to check for cancel, we'll have to close
// anyway // anyway
if (m_updater) if (m_updater)
m_updater->update(DbIxStatus::DBIXS_CLOSING, string()); m_updater->update(DbIxStatus::DBIXS_CLOSING, string());
if (!m_db.close()) { if (!m_db.close()) {
LOGERR("ConfIndexer::index: error closing database in " << LOGERR("ConfIndexer::index: error closing database in " <<
m_config->getDbDir() << "\n"); m_config->getDbDir() << "\n");
addIdxReason("indexer", "Index close/flush failed. See log"); addIdxReason("indexer", "Index close/flush failed. See log");
return false; return false;
} }
if (m_updater && !m_updater->update(DbIxStatus::DBIXS_CLOSING, string())) if (m_updater && !m_updater->update(DbIxStatus::DBIXS_CLOSING, string()))
return false; return false;
bool ret = true; bool ret = true;
if (!createStemmingDatabases()) { if (!createStemmingDatabases()) {
ret = false; ret = false;
} }
if (m_updater && !m_updater->update(DbIxStatus::DBIXS_CLOSING, string())) if (m_updater && !m_updater->update(DbIxStatus::DBIXS_CLOSING, string()))
return false; return false;
// Don't fail indexing because of an aspell issue: we ignore the status. // Don't fail indexing because of an aspell issue: we ignore the status.
// Messages were written to the reasons output // Messages were written to the reasons output
@ -237,7 +237,7 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun, int flags)
clearMimeHandlerCache(); clearMimeHandlerCache();
if (m_updater) if (m_updater)
m_updater->update(DbIxStatus::DBIXS_DONE, string()); m_updater->update(DbIxStatus::DBIXS_DONE, string());
return ret; return ret;
} }
@ -246,14 +246,14 @@ bool ConfIndexer::indexFiles(list<string>& ifiles, int flag)
list<string> myfiles; list<string> myfiles;
string origcwd = m_config->getOrigCwd(); string origcwd = m_config->getOrigCwd();
for (const auto& entry : ifiles) { for (const auto& entry : ifiles) {
myfiles.push_back(path_canon(entry, &origcwd)); myfiles.push_back(path_canon(entry, &origcwd));
} }
myfiles.sort(); myfiles.sort();
if (!m_db.open(Rcl::Db::DbUpd)) { if (!m_db.open(Rcl::Db::DbUpd)) {
LOGERR("ConfIndexer: indexFiles error opening database " << LOGERR("ConfIndexer: indexFiles error opening database " <<
m_config->getDbDir() << "\n"); m_config->getDbDir() << "\n");
return false; return false;
} }
m_config->setKeyDir(cstr_null); m_config->setKeyDir(cstr_null);
bool ret = false; bool ret = false;
@ -280,9 +280,9 @@ bool ConfIndexer::indexFiles(list<string>& ifiles, int flag)
} }
// The close would be done in our destructor, but we want status here // The close would be done in our destructor, but we want status here
if (!m_db.close()) { if (!m_db.close()) {
LOGERR("ConfIndexer::index: error closing database in " << LOGERR("ConfIndexer::index: error closing database in " <<
m_config->getDbDir() << "\n"); m_config->getDbDir() << "\n");
return false; return false;
} }
ifiles = myfiles; ifiles = myfiles;
clearMimeHandlerCache(); clearMimeHandlerCache();
@ -297,7 +297,7 @@ bool ConfIndexer::updateDocs(vector<Rcl::Doc> &docs, IxFlag flag)
docsToPaths(docs, paths); docsToPaths(docs, paths);
list<string> files(paths.begin(), paths.end()); list<string> files(paths.begin(), paths.end());
if (!files.empty()) { if (!files.empty()) {
return indexFiles(files, flag); return indexFiles(files, flag);
} }
return true; return true;
} }
@ -307,14 +307,14 @@ bool ConfIndexer::purgeFiles(list<string> &files, int flag)
list<string> myfiles; list<string> myfiles;
string origcwd = m_config->getOrigCwd(); string origcwd = m_config->getOrigCwd();
for (const auto& entry : files) { for (const auto& entry : files) {
myfiles.push_back(path_canon(entry, &origcwd)); myfiles.push_back(path_canon(entry, &origcwd));
} }
myfiles.sort(); myfiles.sort();
if (!m_db.open(Rcl::Db::DbUpd)) { if (!m_db.open(Rcl::Db::DbUpd)) {
LOGERR("ConfIndexer: purgeFiles error opening database " << LOGERR("ConfIndexer: purgeFiles error opening database " <<
m_config->getDbDir() << "\n"); m_config->getDbDir() << "\n");
return false; return false;
} }
bool ret = false; bool ret = false;
m_config->setKeyDir(cstr_null); m_config->setKeyDir(cstr_null);
@ -337,9 +337,9 @@ bool ConfIndexer::purgeFiles(list<string> &files, int flag)
// The close would be done in our destructor, but we want status here // The close would be done in our destructor, but we want status here
if (!m_db.close()) { if (!m_db.close()) {
LOGERR("ConfIndexer::purgefiles: error closing database in " << LOGERR("ConfIndexer::purgefiles: error closing database in " <<
m_config->getDbDir() << "\n"); m_config->getDbDir() << "\n");
return false; return false;
} }
return ret; return ret;
} }
@ -356,18 +356,18 @@ bool ConfIndexer::createStemmingDatabases()
addIdxReason("stemming", "could not open db"); addIdxReason("stemming", "could not open db");
return false; return false;
} }
vector<string> langs; vector<string> langs;
stringToStrings(slangs, langs); stringToStrings(slangs, langs);
// Get the list of existing stem dbs from the database (some may have // Get the list of existing stem dbs from the database (some may have
// been manually created, we just keep those from the config // been manually created, we just keep those from the config
vector<string> dblangs = m_db.getStemLangs(); vector<string> dblangs = m_db.getStemLangs();
vector<string>::const_iterator it; vector<string>::const_iterator it;
for (it = dblangs.begin(); it != dblangs.end(); it++) { for (it = dblangs.begin(); it != dblangs.end(); it++) {
if (find(langs.begin(), langs.end(), *it) == langs.end()) if (find(langs.begin(), langs.end(), *it) == langs.end())
m_db.deleteStemDb(*it); m_db.deleteStemDb(*it);
} }
ret = ret && m_db.createStemDbs(langs); ret = ret && m_db.createStemDbs(langs);
if (!ret) { if (!ret) {
addIdxReason("stemming", "stem db creation failed"); addIdxReason("stemming", "stem db creation failed");
} }
@ -379,7 +379,7 @@ bool ConfIndexer::createStemmingDatabases()
bool ConfIndexer::createStemDb(const string &lang) bool ConfIndexer::createStemDb(const string &lang)
{ {
if (!m_db.open(Rcl::Db::DbUpd)) if (!m_db.open(Rcl::Db::DbUpd))
return false; return false;
vector<string> langs; vector<string> langs;
stringToStrings(lang, langs); stringToStrings(lang, langs);
return m_db.createStemDbs(langs); return m_db.createStemDbs(langs);
@ -397,32 +397,32 @@ bool ConfIndexer::createAspellDict()
// it forever. // it forever.
static int noaspell = -12345; static int noaspell = -12345;
if (noaspell == -12345) { if (noaspell == -12345) {
noaspell = false; noaspell = false;
m_config->getConfParam("noaspell", &noaspell); m_config->getConfParam("noaspell", &noaspell);
} }
if (noaspell) if (noaspell)
return true; return true;
if (!m_db.open(Rcl::Db::DbRO)) { if (!m_db.open(Rcl::Db::DbRO)) {
LOGERR("ConfIndexer::createAspellDict: could not open db\n"); LOGERR("ConfIndexer::createAspellDict: could not open db\n");
return false; return false;
} }
Aspell aspell(m_config); Aspell aspell(m_config);
string reason; string reason;
if (!aspell.init(reason)) { if (!aspell.init(reason)) {
LOGERR("ConfIndexer::createAspellDict: aspell init failed: " << LOGERR("ConfIndexer::createAspellDict: aspell init failed: " <<
reason << "\n"); reason << "\n");
noaspell = true; noaspell = true;
return false; return false;
} }
LOGDEB("ConfIndexer::createAspellDict: creating dictionary\n"); LOGDEB("ConfIndexer::createAspellDict: creating dictionary\n");
if (!aspell.buildDict(m_db, reason)) { if (!aspell.buildDict(m_db, reason)) {
LOGERR("ConfIndexer::createAspellDict: aspell buildDict failed: " << LOGERR("ConfIndexer::createAspellDict: aspell buildDict failed: " <<
reason << "\n"); reason << "\n");
addIdxReason("aspell", reason); addIdxReason("aspell", reason);
noaspell = true; noaspell = true;
return false; return false;
} }
#endif #endif
return true; return true;
@ -432,4 +432,3 @@ vector<string> ConfIndexer::getStemmerNames()
{ {
return Rcl::Db::getStemmerNames(); return Rcl::Db::getStemmerNames();
} }

View File

@ -34,7 +34,7 @@ class WebQueueIndexer;
/** Callback to say what we're doing. If the update func returns false, we /** Callback to say what we're doing. If the update func returns false, we
* stop as soon as possible without corrupting state */ * stop as soon as possible without corrupting state */
class DbIxStatusUpdater { class DbIxStatusUpdater {
public: public:
#ifdef IDX_THREADS #ifdef IDX_THREADS
std::mutex m_mutex; std::mutex m_mutex;
#endif #endif
@ -43,14 +43,14 @@ class DbIxStatusUpdater {
// Convenience: change phase/fn and update // Convenience: change phase/fn and update
virtual bool update(DbIxStatus::Phase phase, const string& fn) virtual bool update(DbIxStatus::Phase phase, const string& fn)
{ {
#ifdef IDX_THREADS #ifdef IDX_THREADS
std::unique_lock<std::mutex> lock(m_mutex); std::unique_lock<std::mutex> lock(m_mutex);
#endif #endif
status.phase = phase; status.phase = phase;
status.fn = fn; status.fn = fn;
return update(); return update();
} }
// To be implemented by user for sending info somewhere // To be implemented by user for sending info somewhere
virtual bool update() = 0; virtual bool update() = 0;
@ -62,7 +62,7 @@ class DbIxStatusUpdater {
* database(s). * database(s).
*/ */
class ConfIndexer { class ConfIndexer {
public: public:
enum runStatus {IndexerOk, IndexerError}; enum runStatus {IndexerOk, IndexerError};
ConfIndexer(RclConfig *cnf, DbIxStatusUpdater *updfunc = 0); ConfIndexer(RclConfig *cnf, DbIxStatusUpdater *updfunc = 0);
virtual ~ConfIndexer(); virtual ~ConfIndexer();
@ -113,7 +113,7 @@ class ConfIndexer {
/** Set in place reset mode */ /** Set in place reset mode */
void setInPlaceReset() {m_db.setInPlaceReset();} void setInPlaceReset() {m_db.setInPlaceReset();}
private: private:
RclConfig *m_config; RclConfig *m_config;
Rcl::Db m_db; Rcl::Db m_db;
FsIndexer *m_fsindexer; FsIndexer *m_fsindexer;

View File

@ -55,11 +55,11 @@ static string mimetypefromdata(RclConfig *cfg, const string &fn, bool usfc)
#ifdef USE_SYSTEM_FILE_COMMAND #ifdef USE_SYSTEM_FILE_COMMAND
if (usfc && mime.empty()) { if (usfc && mime.empty()) {
// Last resort: use "file -i", or its configured replacement. // Last resort: use "file -i", or its configured replacement.
// 'file' fallback if the configured command (default: // 'file' fallback if the configured command (default:
// xdg-mime) is not found // xdg-mime) is not found
static const vector<string> tradfilecmd = {{FILE_PROG}, {"-i"}}; static const vector<string> tradfilecmd = {{FILE_PROG}, {"-i"}};
vector<string> cmd; vector<string> cmd;
string scommand; string scommand;
@ -81,55 +81,55 @@ static string mimetypefromdata(RclConfig *cfg, const string &fn, bool usfc)
cmd = tradfilecmd; cmd = tradfilecmd;
} }
string result; string result;
LOGDEB2("mimetype: executing: [" << stringsToString(cmd) << "]\n"); LOGDEB2("mimetype: executing: [" << stringsToString(cmd) << "]\n");
if (!ExecCmd::backtick(cmd, result)) { if (!ExecCmd::backtick(cmd, result)) {
LOGERR("mimetypefromdata: exec " << LOGERR("mimetypefromdata: exec " <<
stringsToString(cmd) << " failed\n"); stringsToString(cmd) << " failed\n");
return string(); return string();
} }
trimstring(result, " \t\n\r"); trimstring(result, " \t\n\r");
LOGDEB2("mimetype: systemfilecommand output [" << result << "]\n"); LOGDEB2("mimetype: systemfilecommand output [" << result << "]\n");
// The normal output from "file -i" looks like the following: // The normal output from "file -i" looks like the following:
// thefilename.xxx: text/plain; charset=us-ascii // thefilename.xxx: text/plain; charset=us-ascii
// Sometimes the semi-colon is missing like in: // Sometimes the semi-colon is missing like in:
// mimetype.cpp: text/x-c charset=us-ascii // mimetype.cpp: text/x-c charset=us-ascii
// And sometimes we only get the mime type. This apparently happens // And sometimes we only get the mime type. This apparently happens
// when 'file' believes that the file name is binary // when 'file' believes that the file name is binary
// xdg-mime only outputs the MIME type. // xdg-mime only outputs the MIME type.
// If there is no colon and there is a slash, this is hopefuly // If there is no colon and there is a slash, this is hopefuly
// the mime type // the mime type
if (result.find_first_of(":") == string::npos && if (result.find_first_of(":") == string::npos &&
result.find_first_of("/") != string::npos) { result.find_first_of("/") != string::npos) {
return result; return result;
} }
// Else the result should begin with the file name. Get rid of it: // Else the result should begin with the file name. Get rid of it:
if (result.find(fn) != 0) { if (result.find(fn) != 0) {
// Garbage "file" output. Maybe the result of a charset // Garbage "file" output. Maybe the result of a charset
// conversion attempt? // conversion attempt?
LOGERR("mimetype: can't interpret output from [" << LOGERR("mimetype: can't interpret output from [" <<
stringsToString(cmd) << "] : [" << result << "]\n"); stringsToString(cmd) << "] : [" << result << "]\n");
return string(); return string();
} }
result = result.substr(fn.size()); result = result.substr(fn.size());
// Now should look like ": text/plain; charset=us-ascii" // Now should look like ": text/plain; charset=us-ascii"
// Split it, and take second field // Split it, and take second field
list<string> res; list<string> res;
stringToStrings(result, res); stringToStrings(result, res);
if (res.size() <= 1) if (res.size() <= 1)
return string(); return string();
list<string>::iterator it = res.begin(); list<string>::iterator it = res.begin();
mime = *++it; mime = *++it;
// Remove possible semi-colon at the end // Remove possible semi-colon at the end
trimstring(mime, " \t;"); trimstring(mime, " \t;");
// File -i will sometimes return strange stuff (ie: "very small file") // File -i will sometimes return strange stuff (ie: "very small file")
if(mime.find("/") == string::npos) if(mime.find("/") == string::npos)
mime.clear(); mime.clear();
} }
#endif //USE_SYSTEM_FILE_COMMAND #endif //USE_SYSTEM_FILE_COMMAND
@ -139,24 +139,24 @@ static string mimetypefromdata(RclConfig *cfg, const string &fn, bool usfc)
/// Guess mime type, first from suffix, then from file data. We also /// Guess mime type, first from suffix, then from file data. We also
/// have a list of suffixes that we don't touch at all. /// have a list of suffixes that we don't touch at all.
string mimetype(const string &fn, const struct PathStat *stp, string mimetype(const string &fn, const struct PathStat *stp,
RclConfig *cfg, bool usfc) RclConfig *cfg, bool usfc)
{ {
// Use stat data if available to check for non regular files // Use stat data if available to check for non regular files
if (stp) { if (stp) {
// Note: the value used for directories is different from what // Note: the value used for directories is different from what
// file -i would print on Linux (inode/directory). Probably // file -i would print on Linux (inode/directory). Probably
// comes from bsd. Thos may surprise a user trying to use a // comes from bsd. Thos may surprise a user trying to use a
// 'mime:' filter with the query language, but it's not work // 'mime:' filter with the query language, but it's not work
// changing (would force a reindex). // changing (would force a reindex).
if (stp->pst_type == PathStat::PST_DIR) if (stp->pst_type == PathStat::PST_DIR)
return "inode/directory"; return "inode/directory";
if (stp->pst_type == PathStat::PST_SYMLINK) if (stp->pst_type == PathStat::PST_SYMLINK)
return "inode/symlink"; return "inode/symlink";
if (stp->pst_type != PathStat::PST_REGULAR) if (stp->pst_type != PathStat::PST_REGULAR)
return "inode/x-fsspecial"; return "inode/x-fsspecial";
// Empty files are just this: avoid further errors with actual filters. // Empty files are just this: avoid further errors with actual filters.
if (stp->pst_size == 0) if (stp->pst_size == 0)
return "inode/x-empty"; return "inode/x-empty";
} }
string mtype; string mtype;
@ -165,40 +165,40 @@ string mimetype(const string &fn, const struct PathStat *stp,
// Extended attribute has priority on everything, as per: // Extended attribute has priority on everything, as per:
// http://freedesktop.org/wiki/CommonExtendedAttributes // http://freedesktop.org/wiki/CommonExtendedAttributes
if (pxattr::get(fn, "mime_type", &mtype)) { if (pxattr::get(fn, "mime_type", &mtype)) {
LOGDEB0("Mimetype: 'mime_type' xattr : [" << mtype << "]\n"); LOGDEB0("Mimetype: 'mime_type' xattr : [" << mtype << "]\n");
if (mtype.empty()) { if (mtype.empty()) {
LOGDEB0("Mimetype: getxattr() returned empty mime type !\n"); LOGDEB0("Mimetype: getxattr() returned empty mime type !\n");
} else { } else {
return mtype; return mtype;
} }
} }
#endif #endif
if (cfg == 0) { if (cfg == 0) {
LOGERR("Mimetype: null config ??\n"); LOGERR("Mimetype: null config ??\n");
return mtype; return mtype;
} }
if (cfg->inStopSuffixes(fn)) { if (cfg->inStopSuffixes(fn)) {
LOGDEB("mimetype: fn [" << fn << "] in stopsuffixes\n"); LOGDEB("mimetype: fn [" << fn << "] in stopsuffixes\n");
return mtype; return mtype;
} }
// Compute file name suffix and search the mimetype map // Compute file name suffix and search the mimetype map
string::size_type dot = fn.find_first_of("."); string::size_type dot = fn.find_first_of(".");
while (dot != string::npos) { while (dot != string::npos) {
string suff = stringtolower(fn.substr(dot)); string suff = stringtolower(fn.substr(dot));
mtype = cfg->getMimeTypeFromSuffix(suff); mtype = cfg->getMimeTypeFromSuffix(suff);
if (!mtype.empty() || dot >= fn.size() - 1) if (!mtype.empty() || dot >= fn.size() - 1)
break; break;
dot = fn.find_first_of(".", dot + 1); dot = fn.find_first_of(".", dot + 1);
} }
// If type was not determined from suffix, examine file data. Can // If type was not determined from suffix, examine file data. Can
// only do this if we have an actual file (as opposed to a pure // only do this if we have an actual file (as opposed to a pure
// name). // name).
if (mtype.empty() && stp) if (mtype.empty() && stp)
mtype = mimetypefromdata(cfg, fn, usfc); mtype = mimetypefromdata(cfg, fn, usfc);
return mtype; return mtype;
} }

View File

@ -45,9 +45,9 @@ using std::multimap;
* Monitoring event: something changed in the filesystem * Monitoring event: something changed in the filesystem
*/ */
class RclMonEvent { class RclMonEvent {
public: public:
enum EvType {RCLEVT_NONE= 0, RCLEVT_MODIFY=1, RCLEVT_DELETE=2, enum EvType {RCLEVT_NONE= 0, RCLEVT_MODIFY=1, RCLEVT_DELETE=2,
RCLEVT_DIRCREATE=3, RCLEVT_ISDIR=0x10}; RCLEVT_DIRCREATE=3, RCLEVT_ISDIR=0x10};
string m_path; string m_path;
// Type and flags // Type and flags
int m_etyp; int m_etyp;
@ -61,13 +61,13 @@ class RclMonEvent {
bool m_needidx; bool m_needidx;
RclMonEvent() : m_etyp(RCLEVT_NONE), RclMonEvent() : m_etyp(RCLEVT_NONE),
m_itvsecs(0), m_minclock(0), m_needidx(false) {} m_itvsecs(0), m_minclock(0), m_needidx(false) {}
EvType evtype() {return EvType(m_etyp & 0xf);} EvType evtype() {return EvType(m_etyp & 0xf);}
int evflags() {return m_etyp & 0xf0;} int evflags() {return m_etyp & 0xf0;}
}; };
enum RclMonitorOption {RCLMON_NONE=0, RCLMON_NOFORK=1, RCLMON_NOX11=2, enum RclMonitorOption {RCLMON_NONE=0, RCLMON_NOFORK=1, RCLMON_NOX11=2,
RCLMON_NOCONFCHECK=4}; RCLMON_NOCONFCHECK=4};
/** /**
* Monitoring event queue. This is the shared object between the main thread * Monitoring event queue. This is the shared object between the main thread
@ -76,7 +76,7 @@ enum RclMonitorOption {RCLMON_NONE=0, RCLMON_NOFORK=1, RCLMON_NOX11=2,
*/ */
class RclEQData; class RclEQData;
class RclMonEventQueue { class RclMonEventQueue {
public: public:
RclMonEventQueue(); RclMonEventQueue();
~RclMonEventQueue(); ~RclMonEventQueue();
/** Wait for event or timeout. Returns with the queue locked */ /** Wait for event or timeout. Returns with the queue locked */
@ -94,7 +94,7 @@ class RclMonEventQueue {
void setConfig(RclConfig *conf); void setConfig(RclConfig *conf);
RclConfig *getConfig(); RclConfig *getConfig();
private: private:
RclEQData *m_data; RclEQData *m_data;
}; };

View File

@ -142,50 +142,50 @@ public:
std::condition_variable m_cond; std::condition_variable m_cond;
RclEQData() RclEQData()
: m_config(0), m_ok(true) : m_config(0), m_ok(true)
{ {
} }
void readDelayPats(int dfltsecs); void readDelayPats(int dfltsecs);
DelayPat searchDelayPats(const string& path) DelayPat searchDelayPats(const string& path)
{ {
for (vector<DelayPat>::iterator it = m_delaypats.begin(); for (vector<DelayPat>::iterator it = m_delaypats.begin();
it != m_delaypats.end(); it++) { it != m_delaypats.end(); it++) {
if (fnmatch(it->pattern.c_str(), path.c_str(), 0) == 0) { if (fnmatch(it->pattern.c_str(), path.c_str(), 0) == 0) {
return *it; return *it;
}
}
return DelayPat();
} }
}
return DelayPat();
}
void delayInsert(const queue_type::iterator &qit); void delayInsert(const queue_type::iterator &qit);
}; };
void RclEQData::readDelayPats(int dfltsecs) void RclEQData::readDelayPats(int dfltsecs)
{ {
if (m_config == 0) if (m_config == 0)
return; return;
string patstring; string patstring;
if (!m_config->getConfParam("mondelaypatterns", patstring) || if (!m_config->getConfParam("mondelaypatterns", patstring) ||
patstring.empty()) patstring.empty())
return; return;
vector<string> dplist; vector<string> dplist;
if (!stringToStrings(patstring, dplist)) { if (!stringToStrings(patstring, dplist)) {
LOGERR("rclEQData: bad pattern list: [" << (patstring) << "]\n" ); LOGERR("rclEQData: bad pattern list: [" << (patstring) << "]\n" );
return; return;
} }
for (vector<string>::iterator it = dplist.begin(); for (vector<string>::iterator it = dplist.begin();
it != dplist.end(); it++) { it != dplist.end(); it++) {
string::size_type pos = it->find_last_of(":"); string::size_type pos = it->find_last_of(":");
DelayPat dp; DelayPat dp;
dp.pattern = it->substr(0, pos); dp.pattern = it->substr(0, pos);
if (pos != string::npos && pos != it->size()-1) { if (pos != string::npos && pos != it->size()-1) {
dp.seconds = atoi(it->substr(pos+1).c_str()); dp.seconds = atoi(it->substr(pos+1).c_str());
} else { } else {
dp.seconds = dfltsecs; dp.seconds = dfltsecs;
} }
m_delaypats.push_back(dp); m_delaypats.push_back(dp);
LOGDEB2("rclmon::readDelayPats: add [" << (dp.pattern) << "] " << (dp.seconds) << "\n" ); LOGDEB2("rclmon::readDelayPats: add [" << (dp.pattern) << "] " << (dp.seconds) << "\n" );
} }
} }
@ -197,12 +197,12 @@ void RclEQData::delayInsert(const queue_type::iterator &qit)
MONDEB("RclEQData::delayInsert: minclock " << qit->second.m_minclock << MONDEB("RclEQData::delayInsert: minclock " << qit->second.m_minclock <<
std::endl); std::endl);
for (delays_type::iterator dit = m_delays.begin(); for (delays_type::iterator dit = m_delays.begin();
dit != m_delays.end(); dit++) { dit != m_delays.end(); dit++) {
queue_type::iterator qit1 = *dit; queue_type::iterator qit1 = *dit;
if ((*qit1).second.m_minclock > qit->second.m_minclock) { if ((*qit1).second.m_minclock > qit->second.m_minclock) {
m_delays.insert(dit, qit); m_delays.insert(dit, qit);
return; return;
} }
} }
m_delays.push_back(qit); m_delays.push_back(qit);
} }
@ -220,7 +220,7 @@ RclMonEventQueue::~RclMonEventQueue()
void RclMonEventQueue::setopts(int opts) void RclMonEventQueue::setopts(int opts)
{ {
if (m_data) if (m_data)
m_data->m_opts = opts; m_data->m_opts = opts;
} }
/** Wait until there is something to process on the queue, or timeout. /** Wait until there is something to process on the queue, or timeout.
@ -232,22 +232,22 @@ std::unique_lock<std::mutex> RclMonEventQueue::wait(int seconds, bool *top)
MONDEB("RclMonEventQueue::wait, seconds: " << seconds << std::endl); MONDEB("RclMonEventQueue::wait, seconds: " << seconds << std::endl);
if (!empty()) { if (!empty()) {
MONDEB("RclMonEventQueue:: immediate return\n"); MONDEB("RclMonEventQueue:: immediate return\n");
return lock; return lock;
} }
int err; int err;
if (seconds > 0) { if (seconds > 0) {
if (top) if (top)
*top = false; *top = false;
if (m_data->m_cond.wait_for(lock, std::chrono::seconds(seconds)) == if (m_data->m_cond.wait_for(lock, std::chrono::seconds(seconds)) ==
std::cv_status::timeout) { std::cv_status::timeout) {
*top = true; *top = true;
MONDEB("RclMonEventQueue:: timeout\n"); MONDEB("RclMonEventQueue:: timeout\n");
return lock; return lock;
} }
} else { } else {
m_data->m_cond.wait(lock); m_data->m_cond.wait(lock);
} }
MONDEB("RclMonEventQueue:: non-timeout return\n"); MONDEB("RclMonEventQueue:: non-timeout return\n");
return lock; return lock;
@ -269,16 +269,16 @@ RclConfig *RclMonEventQueue::getConfig()
bool RclMonEventQueue::ok() bool RclMonEventQueue::ok()
{ {
if (m_data == 0) { if (m_data == 0) {
LOGINFO("RclMonEventQueue: not ok: bad state\n" ); LOGINFO("RclMonEventQueue: not ok: bad state\n" );
return false; return false;
} }
if (stopindexing) { if (stopindexing) {
LOGINFO("RclMonEventQueue: not ok: stop request\n" ); LOGINFO("RclMonEventQueue: not ok: stop request\n" );
return false; return false;
} }
if (!m_data->m_ok) { if (!m_data->m_ok) {
LOGINFO("RclMonEventQueue: not ok: queue terminated\n" ); LOGINFO("RclMonEventQueue: not ok: queue terminated\n" );
return false; return false;
} }
return true; return true;
} }
@ -295,24 +295,24 @@ void RclMonEventQueue::setTerminate()
bool RclMonEventQueue::empty() bool RclMonEventQueue::empty()
{ {
if (m_data == 0) { if (m_data == 0) {
MONDEB("RclMonEventQueue::empty(): true (m_data==0)\n"); MONDEB("RclMonEventQueue::empty(): true (m_data==0)\n");
return true; return true;
} }
if (!m_data->m_iqueue.empty()) { if (!m_data->m_iqueue.empty()) {
MONDEB("RclMonEventQueue::empty(): false (m_iqueue not empty)\n"); MONDEB("RclMonEventQueue::empty(): false (m_iqueue not empty)\n");
return true; return true;
} }
if (m_data->m_dqueue.empty()) { if (m_data->m_dqueue.empty()) {
MONDEB("RclMonEventQueue::empty(): true (m_Xqueue both empty)\n"); MONDEB("RclMonEventQueue::empty(): true (m_Xqueue both empty)\n");
return true; return true;
} }
// Only dqueue has events. Have to check the delays (only the // Only dqueue has events. Have to check the delays (only the
// first, earliest one): // first, earliest one):
queue_type::iterator qit = *(m_data->m_delays.begin()); queue_type::iterator qit = *(m_data->m_delays.begin());
if (qit->second.m_minclock > time(0)) { if (qit->second.m_minclock > time(0)) {
MONDEB("RclMonEventQueue::empty(): true (no delay ready " << MONDEB("RclMonEventQueue::empty(): true (no delay ready " <<
qit->second.m_minclock << ")\n"); qit->second.m_minclock << ")\n");
return true; return true;
} }
MONDEB("RclMonEventQueue::empty(): returning false (delay expired)\n"); MONDEB("RclMonEventQueue::empty(): returning false (delay expired)\n");
return false; return false;
@ -329,36 +329,36 @@ RclMonEvent RclMonEventQueue::pop()
// Look at the delayed events, get rid of the expired/unactive // Look at the delayed events, get rid of the expired/unactive
// ones, possibly return an expired/needidx one. // ones, possibly return an expired/needidx one.
while (!m_data->m_delays.empty()) { while (!m_data->m_delays.empty()) {
delays_type::iterator dit = m_data->m_delays.begin(); delays_type::iterator dit = m_data->m_delays.begin();
queue_type::iterator qit = *dit; queue_type::iterator qit = *dit;
MONDEB("RclMonEventQueue::pop(): in delays: evt minclock " << MONDEB("RclMonEventQueue::pop(): in delays: evt minclock " <<
qit->second.m_minclock << std::endl); qit->second.m_minclock << std::endl);
if (qit->second.m_minclock <= now) { if (qit->second.m_minclock <= now) {
if (qit->second.m_needidx) { if (qit->second.m_needidx) {
RclMonEvent ev = qit->second; RclMonEvent ev = qit->second;
qit->second.m_minclock = time(0) + qit->second.m_itvsecs; qit->second.m_minclock = time(0) + qit->second.m_itvsecs;
qit->second.m_needidx = false; qit->second.m_needidx = false;
m_data->m_delays.erase(dit); m_data->m_delays.erase(dit);
m_data->delayInsert(qit); m_data->delayInsert(qit);
return ev; return ev;
} else {
// Delay elapsed without new update, get rid of event.
m_data->m_dqueue.erase(qit);
m_data->m_delays.erase(dit);
}
} else { } else {
// Delay elapsed without new update, get rid of event. // This and following events are for later processing, we
m_data->m_dqueue.erase(qit); // are done with the delayed event list.
m_data->m_delays.erase(dit); break;
} }
} else {
// This and following events are for later processing, we
// are done with the delayed event list.
break;
}
} }
// Look for non-delayed event // Look for non-delayed event
if (!m_data->m_iqueue.empty()) { if (!m_data->m_iqueue.empty()) {
queue_type::iterator qit = m_data->m_iqueue.begin(); queue_type::iterator qit = m_data->m_iqueue.begin();
RclMonEvent ev = qit->second; RclMonEvent ev = qit->second;
m_data->m_iqueue.erase(qit); m_data->m_iqueue.erase(qit);
return ev; return ev;
} }
return RclMonEvent(); return RclMonEvent();
@ -376,32 +376,32 @@ bool RclMonEventQueue::pushEvent(const RclMonEvent &ev)
DelayPat pat = m_data->searchDelayPats(ev.m_path); DelayPat pat = m_data->searchDelayPats(ev.m_path);
if (pat.seconds != 0) { if (pat.seconds != 0) {
// Using delayed reindex queue. Need to take care of minclock and also // Using delayed reindex queue. Need to take care of minclock and also
// insert into the in-minclock-order list // insert into the in-minclock-order list
queue_type::iterator qit = m_data->m_dqueue.find(ev.m_path); queue_type::iterator qit = m_data->m_dqueue.find(ev.m_path);
if (qit == m_data->m_dqueue.end()) { if (qit == m_data->m_dqueue.end()) {
// Not there yet, insert new // Not there yet, insert new
qit = qit =
m_data->m_dqueue.insert(queue_type::value_type(ev.m_path, ev)).first; m_data->m_dqueue.insert(queue_type::value_type(ev.m_path, ev)).first;
// Set the time to next index to "now" as it has not been // Set the time to next index to "now" as it has not been
// indexed recently (otherwise it would still be in the // indexed recently (otherwise it would still be in the
// queue), and add the iterator to the delay queue. // queue), and add the iterator to the delay queue.
qit->second.m_minclock = time(0); qit->second.m_minclock = time(0);
qit->second.m_needidx = true; qit->second.m_needidx = true;
qit->second.m_itvsecs = pat.seconds; qit->second.m_itvsecs = pat.seconds;
m_data->delayInsert(qit); m_data->delayInsert(qit);
} else {
// Already in queue. Possibly update type but save minclock
// (so no need to touch m_delays). Flag as needing indexing
time_t saved_clock = qit->second.m_minclock;
qit->second = ev;
qit->second.m_minclock = saved_clock;
qit->second.m_needidx = true;
}
} else { } else {
// Already in queue. Possibly update type but save minclock // Immediate event: just insert it, erasing any previously
// (so no need to touch m_delays). Flag as needing indexing // existing entry
time_t saved_clock = qit->second.m_minclock; m_data->m_iqueue[ev.m_path] = ev;
qit->second = ev;
qit->second.m_minclock = saved_clock;
qit->second.m_needidx = true;
}
} else {
// Immediate event: just insert it, erasing any previously
// existing entry
m_data->m_iqueue[ev.m_path] = ev;
} }
m_data->m_cond.notify_all(); m_data->m_cond.notify_all();
@ -429,19 +429,19 @@ static bool expeditedIndexingRequested(RclConfig *conf)
{ {
static vector<string> rqfiles; static vector<string> rqfiles;
if (rqfiles.empty()) { if (rqfiles.empty()) {
rqfiles.push_back(path_cat(conf->getConfDir(), "rclmonixnow")); rqfiles.push_back(path_cat(conf->getConfDir(), "rclmonixnow"));
const char *cp; const char *cp;
if ((cp = getenv("RECOLL_CONFTOP"))) { if ((cp = getenv("RECOLL_CONFTOP"))) {
rqfiles.push_back(path_cat(cp, "rclmonixnow")); rqfiles.push_back(path_cat(cp, "rclmonixnow"));
} }
if ((cp = getenv("RECOLL_CONFMID"))) { if ((cp = getenv("RECOLL_CONFMID"))) {
rqfiles.push_back(path_cat(cp, "rclmonixnow")); rqfiles.push_back(path_cat(cp, "rclmonixnow"));
} }
} }
bool found = false; bool found = false;
for (vector<string>::const_iterator it = rqfiles.begin(); for (vector<string>::const_iterator it = rqfiles.begin();
it != rqfiles.end(); it++) { it != rqfiles.end(); it++) {
found = found || checkfileanddelete(*it); found = found || checkfileanddelete(*it);
} }
return found; return found;
} }
@ -449,9 +449,9 @@ static bool expeditedIndexingRequested(RclConfig *conf)
bool startMonitor(RclConfig *conf, int opts) bool startMonitor(RclConfig *conf, int opts)
{ {
if (!conf->getConfParam("monauxinterval", &auxinterval)) if (!conf->getConfParam("monauxinterval", &auxinterval))
auxinterval = dfltauxinterval; auxinterval = dfltauxinterval;
if (!conf->getConfParam("monixinterval", &ixinterval)) if (!conf->getConfParam("monixinterval", &ixinterval))
ixinterval = dfltixinterval; ixinterval = dfltixinterval;
rclEQ.setConfig(conf); rclEQ.setConfig(conf);
rclEQ.setopts(opts); rclEQ.setopts(opts);
@ -533,14 +533,14 @@ bool startMonitor(RclConfig *conf, int opts)
} }
now = time(0); now = time(0);
// Process. We don't do this every time but let the lists accumulate // Process. We don't do this every time but let the lists accumulate
// a little, this saves processing. Start at once if list is big. // a little, this saves processing. Start at once if list is big.
if (expeditedIndexingRequested(conf) || if (expeditedIndexingRequested(conf) ||
(now - lastixtime > ixinterval) || (now - lastixtime > ixinterval) ||
(deleted.size() + modified.size() > 20)) { (deleted.size() + modified.size() > 20)) {
lastixtime = now; lastixtime = now;
// Used to do the modified list first, but it does seem // Used to do the modified list first, but it does seem
// smarter to make room first... // smarter to make room first...
if (!deleted.empty()) { if (!deleted.empty()) {
deleted.sort(); deleted.sort();
deleted.unique(); deleted.unique();
@ -559,28 +559,28 @@ bool startMonitor(RclConfig *conf, int opts)
} }
} }
// Recreate the auxiliary dbs every hour at most. // Recreate the auxiliary dbs every hour at most.
now = time(0); now = time(0);
if (didsomething && now - lastauxtime > auxinterval) { if (didsomething && now - lastauxtime > auxinterval) {
lastauxtime = now; lastauxtime = now;
didsomething = false; didsomething = false;
if (!createAuxDbs(conf)) { if (!createAuxDbs(conf)) {
// We used to bail out on error here. Not anymore, // We used to bail out on error here. Not anymore,
// because this is most of the time due to a failure // because this is most of the time due to a failure
// of aspell dictionary generation, which is not // of aspell dictionary generation, which is not
// critical. // critical.
}
} }
}
// Check for a config change // Check for a config change
if (!(opts & RCLMON_NOCONFCHECK) && o_reexec && conf->sourceChanged()) { if (!(opts & RCLMON_NOCONFCHECK) && o_reexec && conf->sourceChanged()) {
LOGDEB("Rclmonprc: config changed, reexecuting myself\n" ); LOGDEB("Rclmonprc: config changed, reexecuting myself\n" );
// We never want to have a -n option after a config // We never want to have a -n option after a config
// change. -n was added by the reexec after the initial // change. -n was added by the reexec after the initial
// pass even if it was not given on the command line // pass even if it was not given on the command line
o_reexec->removeArg("-n"); o_reexec->removeArg("-n");
o_reexec->reexec(); o_reexec->reexec();
} }
} }
LOGDEB("Rclmonprc: calling queue setTerminate\n" ); LOGDEB("Rclmonprc: calling queue setTerminate\n" );
rclEQ.setTerminate(); rclEQ.setTerminate();

View File

@ -120,51 +120,51 @@ public:
} }
virtual bool update() virtual bool update()
{ {
// Update the status file. Avoid doing it too often. Always do // Update the status file. Avoid doing it too often. Always do
// it at the end (status DONE) // it at the end (status DONE)
if (status.phase == DbIxStatus::DBIXS_DONE || if (status.phase == DbIxStatus::DBIXS_DONE ||
status.phase != m_prevphase || m_chron.millis() > 300) { status.phase != m_prevphase || m_chron.millis() > 300) {
if (status.totfiles < status.filesdone || if (status.totfiles < status.filesdone ||
status.phase == DbIxStatus::DBIXS_DONE) { status.phase == DbIxStatus::DBIXS_DONE) {
status.totfiles = status.filesdone; status.totfiles = status.filesdone;
}
m_prevphase = status.phase;
m_chron.restart();
m_file.holdWrites(true);
m_file.set("phase", int(status.phase));
m_file.set("docsdone", status.docsdone);
m_file.set("filesdone", status.filesdone);
m_file.set("fileerrors", status.fileerrors);
m_file.set("dbtotdocs", status.dbtotdocs);
m_file.set("totfiles", status.totfiles);
m_file.set("fn", status.fn);
m_file.set("hasmonitor", status.hasmonitor);
m_file.holdWrites(false);
}
if (path_exists(m_stopfilename)) {
LOGINF("recollindex: asking indexer to stop because " <<
m_stopfilename << " exists\n");
path_unlink(m_stopfilename);
stopindexing = true;
}
if (stopindexing) {
return false;
} }
m_prevphase = status.phase;
m_chron.restart();
m_file.holdWrites(true);
m_file.set("phase", int(status.phase));
m_file.set("docsdone", status.docsdone);
m_file.set("filesdone", status.filesdone);
m_file.set("fileerrors", status.fileerrors);
m_file.set("dbtotdocs", status.dbtotdocs);
m_file.set("totfiles", status.totfiles);
m_file.set("fn", status.fn);
m_file.set("hasmonitor", status.hasmonitor);
m_file.holdWrites(false);
}
if (path_exists(m_stopfilename)) {
LOGINF("recollindex: asking indexer to stop because " <<
m_stopfilename << " exists\n");
path_unlink(m_stopfilename);
stopindexing = true;
}
if (stopindexing) {
return false;
}
#ifndef DISABLE_X11MON #ifndef DISABLE_X11MON
// If we are in the monitor, we also need to check X11 status // If we are in the monitor, we also need to check X11 status
// during the initial indexing pass (else the user could log // during the initial indexing pass (else the user could log
// out and the indexing would go on, not good (ie: if the user // out and the indexing would go on, not good (ie: if the user
// logs in again, the new recollindex will fail). // logs in again, the new recollindex will fail).
if ((op_flags & OPT_m) && !(op_flags & OPT_x) && !x11IsAlive()) { if ((op_flags & OPT_m) && !(op_flags & OPT_x) && !x11IsAlive()) {
LOGDEB("X11 session went away during initial indexing pass\n"); LOGDEB("X11 session went away during initial indexing pass\n");
stopindexing = true; stopindexing = true;
return false; return false;
} }
#endif #endif
return true; return true;
} }
private: private:
ConfSimple m_file; ConfSimple m_file;
@ -263,8 +263,8 @@ class MakeListWalkerCB : public FsTreeWalkerCB {
public: public:
MakeListWalkerCB(list<string>& files, const vector<string>& selpats) MakeListWalkerCB(list<string>& files, const vector<string>& selpats)
: m_files(files), m_pats(selpats) : m_files(files), m_pats(selpats)
{ {
} }
virtual FsTreeWalker::Status virtual FsTreeWalker::Status
processone(const string& fn, const struct PathStat *, processone(const string& fn, const struct PathStat *,
FsTreeWalker::CbFlag flg) { FsTreeWalker::CbFlag flg) {
@ -427,103 +427,103 @@ static bool checktopdirs(RclConfig *config, vector<string>& nonexist)
string thisprog; string thisprog;
static const char usage [] = static const char usage [] =
"\n" "\n"
"recollindex [-h] \n" "recollindex [-h] \n"
" Print help\n" " Print help\n"
"recollindex [-z|-Z] [-k]\n" "recollindex [-z|-Z] [-k]\n"
" Index everything according to configuration file\n" " Index everything according to configuration file\n"
" -z : reset database before starting indexing\n" " -z : reset database before starting indexing\n"
" -Z : in place reset: consider all documents as changed. Can also\n" " -Z : in place reset: consider all documents as changed. Can also\n"
" be combined with -i or -r but not -m\n" " be combined with -i or -r but not -m\n"
" -k : retry files on which we previously failed\n" " -k : retry files on which we previously failed\n"
#ifdef RCL_MONITOR #ifdef RCL_MONITOR
"recollindex -m [-w <secs>] -x [-D] [-C]\n" "recollindex -m [-w <secs>] -x [-D] [-C]\n"
" Perform real time indexing. Don't become a daemon if -D is set.\n" " Perform real time indexing. Don't become a daemon if -D is set.\n"
" -w sets number of seconds to wait before starting.\n" " -w sets number of seconds to wait before starting.\n"
" -C disables monitoring config for changes/reexecuting.\n" " -C disables monitoring config for changes/reexecuting.\n"
" -n disables initial incremental indexing (!and purge!).\n" " -n disables initial incremental indexing (!and purge!).\n"
#ifndef DISABLE_X11MON #ifndef DISABLE_X11MON
" -x disables exit on end of x11 session\n" " -x disables exit on end of x11 session\n"
#endif /* DISABLE_X11MON */ #endif /* DISABLE_X11MON */
#endif /* RCL_MONITOR */ #endif /* RCL_MONITOR */
"recollindex -e [<filepath [path ...]>]\n" "recollindex -e [<filepath [path ...]>]\n"
" Purge data for individual files. No stem database updates.\n" " Purge data for individual files. No stem database updates.\n"
" Reads paths on stdin if none is given as argument.\n" " Reads paths on stdin if none is given as argument.\n"
"recollindex -i [-f] [-Z] [<filepath [path ...]>]\n" "recollindex -i [-f] [-Z] [<filepath [path ...]>]\n"
" Index individual files. No database purge or stem database updates\n" " Index individual files. No database purge or stem database updates\n"
" Will read paths on stdin if none is given as argument\n" " Will read paths on stdin if none is given as argument\n"
" -f : ignore skippedPaths and skippedNames while doing this\n" " -f : ignore skippedPaths and skippedNames while doing this\n"
"recollindex -r [-K] [-f] [-Z] [-p pattern] <top> \n" "recollindex -r [-K] [-f] [-Z] [-p pattern] <top> \n"
" Recursive partial reindex. \n" " Recursive partial reindex. \n"
" -p : filter file names, multiple instances are allowed, e.g.: \n" " -p : filter file names, multiple instances are allowed, e.g.: \n"
" -p *.odt -p *.pdf\n" " -p *.odt -p *.pdf\n"
" -K : skip previously failed files (they are retried by default)\n" " -K : skip previously failed files (they are retried by default)\n"
"recollindex -l\n" "recollindex -l\n"
" List available stemming languages\n" " List available stemming languages\n"
"recollindex -s <lang>\n" "recollindex -s <lang>\n"
" Build stem database for additional language <lang>\n" " Build stem database for additional language <lang>\n"
"recollindex -E\n" "recollindex -E\n"
" Check configuration file for topdirs and other paths existence\n" " Check configuration file for topdirs and other paths existence\n"
#ifdef FUTURE_IMPROVEMENT #ifdef FUTURE_IMPROVEMENT
"recollindex -W\n" "recollindex -W\n"
" Process the Web queue\n" " Process the Web queue\n"
#endif #endif
#ifdef RCL_USE_ASPELL #ifdef RCL_USE_ASPELL
"recollindex -S\n" "recollindex -S\n"
" Build aspell spelling dictionary.>\n" " Build aspell spelling dictionary.>\n"
#endif #endif
"Common options:\n" "Common options:\n"
" -c <configdir> : specify config directory, overriding $RECOLL_CONFDIR\n" " -c <configdir> : specify config directory, overriding $RECOLL_CONFDIR\n"
; ;
static void Usage() static void Usage()
{ {
FILE *fp = (op_flags & OPT_h) ? stdout : stderr; FILE *fp = (op_flags & OPT_h) ? stdout : stderr;
fprintf(fp, "%s: Usage: %s", path_getsimple(thisprog).c_str(), usage); fprintf(fp, "%s: Usage: %s", path_getsimple(thisprog).c_str(), usage);
fprintf(fp, "Recoll version: %s\n", Rcl::version_string().c_str()); fprintf(fp, "Recoll version: %s\n", Rcl::version_string().c_str());
exit((op_flags & OPT_h)==0); exit((op_flags & OPT_h)==0);
} }
static RclConfig *config; static RclConfig *config;
static void lockorexit(Pidfile *pidfile, RclConfig *config) static void lockorexit(Pidfile *pidfile, RclConfig *config)
{ {
PRETEND_USE(config); PRETEND_USE(config);
pid_t pid; pid_t pid;
if ((pid = pidfile->open()) != 0) { if ((pid = pidfile->open()) != 0) {
if (pid > 0) { if (pid > 0) {
cerr << "Can't become exclusive indexer: " << pidfile->getreason() cerr << "Can't become exclusive indexer: " << pidfile->getreason()
<< ". Return (other pid?): " << pid << endl; << ". Return (other pid?): " << pid << endl;
#ifndef _WIN32 #ifndef _WIN32
// Have a look at the status file. If the other process is // Have a look at the status file. If the other process is
// a monitor we can tell it to start an incremental pass // a monitor we can tell it to start an incremental pass
// by touching the configuration file // by touching the configuration file
DbIxStatus status; DbIxStatus status;
readIdxStatus(config, status); readIdxStatus(config, status);
if (status.hasmonitor) { if (status.hasmonitor) {
string cmd("touch "); string cmd("touch ");
string path = path_cat(config->getConfDir(), "recoll.conf"); string path = path_cat(config->getConfDir(), "recoll.conf");
cmd += path; cmd += path;
int status; int status;
if ((status = system(cmd.c_str()))) { if ((status = system(cmd.c_str()))) {
cerr << cmd << " failed with status " << status << endl; cerr << cmd << " failed with status " << status << endl;
} else { } else {
cerr << "Monitoring indexer process was notified of " cerr << "Monitoring indexer process was notified of "
"indexing request\n"; "indexing request\n";
} }
} }
#endif #endif
} else { } else {
cerr << "Can't become exclusive indexer: " << pidfile->getreason() cerr << "Can't become exclusive indexer: " << pidfile->getreason()
<< endl; << endl;
} }
exit(1); exit(1);
} }
if (pidfile->write_pid() != 0) { if (pidfile->write_pid() != 0) {
cerr << "Can't become exclusive indexer: " << pidfile->getreason() << cerr << "Can't become exclusive indexer: " << pidfile->getreason() <<
endl; endl;
exit(1); exit(1);
} }
} }
static string reasonsfile; static string reasonsfile;
@ -559,7 +559,7 @@ static void flushIdxReasons()
static vector<string> argstovector(int argc, wchar_t **argv) static vector<string> argstovector(int argc, wchar_t **argv)
#else #else
#define WARGTOSTRING(w) (w) #define WARGTOSTRING(w) (w)
static vector<string> argstovector(int argc, char **argv) static vector<string> argstovector(int argc, char **argv)
#endif #endif
{ {
thisprog = path_absolute(WARGTOSTRING(argv[0])); thisprog = path_absolute(WARGTOSTRING(argv[0]));
@ -599,7 +599,7 @@ static std::string orig_cwd;
#if USE_WMAIN #if USE_WMAIN
int wmain(int argc, wchar_t *argv[]) int wmain(int argc, wchar_t *argv[])
#else #else
int main(int argc, char *argv[]) int main(int argc, char *argv[])
#endif #endif
{ {
// The reexec struct is used by the daemon to shed memory after // The reexec struct is used by the daemon to shed memory after

View File

@ -15,7 +15,6 @@
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/ */
#ifndef TEST_SUBTREELIST
#include "autoconfig.h" #include "autoconfig.h"
#include <memory> #include <memory>
@ -28,13 +27,14 @@
#include "log.h" #include "log.h"
bool subtreelist(RclConfig *config, const string& top, bool subtreelist(RclConfig *config, const string& top,
vector<string>& paths) vector<string>& paths)
{ {
LOGDEB("subtreelist: top: [" << (top) << "]\n" ); LOGDEB("subtreelist: top: [" << (top) << "]\n" );
Rcl::Db rcldb(config); Rcl::Db rcldb(config);
if (!rcldb.open(Rcl::Db::DbRO)) { if (!rcldb.open(Rcl::Db::DbRO)) {
LOGERR("subtreelist: can't open database in [" << (config->getDbDir()) << "]: " << (rcldb.getReason()) << "\n" ); LOGERR("subtreelist: can't open database in [" << config->getDbDir() <<
return false; "]: " << rcldb.getReason() << "\n");
return false;
} }
Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_OR, cstr_null); Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_OR, cstr_null);
@ -47,85 +47,12 @@ bool subtreelist(RclConfig *config, const string& top,
int cnt = query.getResCnt(); int cnt = query.getResCnt();
for (int i = 0; i < cnt; i++) { for (int i = 0; i < cnt; i++) {
Rcl::Doc doc; Rcl::Doc doc;
if (!query.getDoc(i, doc)) if (!query.getDoc(i, doc))
break; break;
string path = fileurltolocalpath(doc.url); string path = fileurltolocalpath(doc.url);
if (!path.empty()) if (!path.empty())
paths.push_back(path); paths.push_back(path);
} }
return true; return true;
} }
#else // TEST
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <iostream>
#include <vector>
#include <string>
using namespace std;
#include "subtreelist.h"
#include "rclconfig.h"
#include "rclinit.h"
static char *thisprog;
static char usage [] =
" <path> : list document paths in this tree\n"
;
static void
Usage(void)
{
cerr << thisprog << ": usage:" << endl << usage;
exit(1);
}
static int op_flags;
#define OPT_o 0x2
int main(int argc, char **argv)
{
string top;
thisprog = argv[0];
argc--; argv++;
while (argc > 0 && **argv == '-') {
(*argv)++;
if (!(**argv))
/* Cas du "adb - core" */
Usage();
while (**argv)
switch (*(*argv)++) {
default: Usage(); break;
}
argc--; argv++;
}
if (argc < 1)
Usage();
top = *argv++;argc--;
string reason;
RclConfig *config = recollinit(0, 0, 0, reason, 0);
if (!config || !config->ok()) {
fprintf(stderr, "Recoll init failed: %s\n", reason.c_str());
exit(1);
}
vector<string> paths;
if (!subtreelist(config, top, paths)) {
cerr << "subtreelist failed" << endl;
exit(1);
}
for (vector<string>::const_iterator it = paths.begin();
it != paths.end(); it++) {
cout << *it << endl;
}
exit(0);
}
#endif

View File

@ -28,6 +28,6 @@ class RclConfig;
// the real time indexer to purge entries when a top directory is // the real time indexer to purge entries when a top directory is
// renamed. This is really convoluted, I'd like a better way. // renamed. This is really convoluted, I'd like a better way.
extern bool subtreelist(RclConfig *config, const string& top, extern bool subtreelist(RclConfig *config, const string& top,
std::vector<std::string>& paths); std::vector<std::string>& paths);
#endif /* _SUBTREELIST_H_INCLUDED_ */ #endif /* _SUBTREELIST_H_INCLUDED_ */

View File

@ -35,13 +35,13 @@ class CirCache;
class RclConfig; class RclConfig;
class WebStore; class WebStore;
namespace Rcl { namespace Rcl {
class Db; class Db;
} }
class WebQueueIndexer : public FsTreeWalkerCB { class WebQueueIndexer : public FsTreeWalkerCB {
public: public:
WebQueueIndexer(RclConfig *cnf, Rcl::Db *db, WebQueueIndexer(RclConfig *cnf, Rcl::Db *db,
DbIxStatusUpdater *updfunc = 0); DbIxStatusUpdater *updfunc = 0);
~WebQueueIndexer(); ~WebQueueIndexer();
/** This is called by the top indexer in recollindex. /** This is called by the top indexer in recollindex.

View File

@ -35,23 +35,23 @@ bool WQDocFetcher::fetch(RclConfig* cnf, const Rcl::Doc& idoc, RawDoc& out)
{ {
string udi; string udi;
if (!idoc.getmeta(Rcl::Doc::keyudi, &udi) || udi.empty()) { if (!idoc.getmeta(Rcl::Doc::keyudi, &udi) || udi.empty()) {
LOGERR("WQDocFetcher:: no udi in idoc\n" ); LOGERR("WQDocFetcher:: no udi in idoc\n" );
return false; return false;
} }
Rcl::Doc dotdoc; Rcl::Doc dotdoc;
{ {
std::unique_lock<std::mutex> locker(o_beagler_mutex); std::unique_lock<std::mutex> locker(o_beagler_mutex);
// Retrieve from our webcache (beagle data). The beagler // Retrieve from our webcache (beagle data). The beagler
// object is created at the first call of this routine and // object is created at the first call of this routine and
// deleted when the program exits. // deleted when the program exits.
static WebStore o_beagler(cnf); static WebStore o_beagler(cnf);
if (!o_beagler.getFromCache(udi, dotdoc, out.data)) { if (!o_beagler.getFromCache(udi, dotdoc, out.data)) {
LOGINFO("WQDocFetcher::fetch: failed for [" << udi << "]\n"); LOGINFO("WQDocFetcher::fetch: failed for [" << udi << "]\n");
return false; return false;
} }
} }
if (dotdoc.mimetype.compare(idoc.mimetype)) { if (dotdoc.mimetype.compare(idoc.mimetype)) {
LOGINFO("WQDocFetcher:: udi [" << udi << "], mimetp mismatch: in: [" << LOGINFO("WQDocFetcher:: udi [" << udi << "], mimetp mismatch: in: [" <<
idoc.mimetype << "], bgl [" << dotdoc.mimetype << "]\n"); idoc.mimetype << "], bgl [" << dotdoc.mimetype << "]\n");
} }
out.kind = RawDoc::RDK_DATA; out.kind = RawDoc::RDK_DATA;

View File

@ -0,0 +1,64 @@
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <iostream>
#include <vector>
#include <string>
using namespace std;
#include "subtreelist.h"
#include "rclconfig.h"
#include "rclinit.h"
static char *thisprog;
static char usage [] = " <path> : list document paths in this tree\n";
static void Usage(void)
{
std::cerr << thisprog << ": usage:" << endl << usage;
exit(1);
}
static int op_flags;
#define OPT_o 0x2
int main(int argc, char **argv)
{
string top;
thisprog = argv[0];
argc--; argv++;
while (argc > 0 && **argv == '-') {
(*argv)++;
if (!(**argv))
/* Cas du "adb - core" */
Usage();
while (**argv)
switch (*(*argv)++) {
default: Usage(); break;
}
argc--; argv++;
}
if (argc < 1)
Usage();
top = *argv++;argc--;
string reason;
RclConfig *config = recollinit(0, 0, 0, reason, 0);
if (!config || !config->ok()) {
fprintf(stderr, "Recoll init failed: %s\n", reason.c_str());
exit(1);
}
vector<string> paths;
if (!subtreelist(config, top, paths)) {
cerr << "subtreelist failed" << endl;
exit(1);
}
for (vector<string>::const_iterator it = paths.begin();
it != paths.end(); it++) {
cout << *it << endl;
}
exit(0);
}