diff --git a/src/common/autoconfig-win.h b/src/common/autoconfig-win.h index d1852cf8..8c589ecc 100644 --- a/src/common/autoconfig-win.h +++ b/src/common/autoconfig-win.h @@ -133,7 +133,7 @@ /* #undef PUTENV_ARG_CONST */ /* Real time monitoring option */ -#undef RCL_MONITOR +#define RCL_MONITOR 1 /* Split camelCase words */ /* #undef RCL_SPLIT_CAMELCASE */ diff --git a/src/index/rclmonprc.cpp b/src/index/rclmonprc.cpp index 56251456..e3a46459 100644 --- a/src/index/rclmonprc.cpp +++ b/src/index/rclmonprc.cpp @@ -512,7 +512,7 @@ bool startMonitor(RclConfig *conf, int opts) modified.push_back(ev.m_path); break; case RclMonEvent::RCLEVT_DELETE: - LOGDEB0("Monitor: Delete on " << (ev.m_path) << "\n" ); + LOGDEB0("Monitor: Delete on " << ev.m_path << "\n"); // If this is for a directory (which the caller should // tell us because he knows), we should purge the db // of all the subtree, because on a directory rename, @@ -522,16 +522,20 @@ bool startMonitor(RclConfig *conf, int opts) // do it, and just wait for a restart to do a full run and // purge. deleted.push_back(ev.m_path); - if (ev.evflags() & RclMonEvent::RCLEVT_ISDIR) { +#ifndef _WIN32 + // We don't know the type of deleted entries on + // win32. So do the subtree things always. + if (ev.evflags() & RclMonEvent::RCLEVT_ISDIR) +#endif + { vector paths; if (subtreelist(conf, ev.m_path, paths)) { - deleted.insert(deleted.end(), - paths.begin(), paths.end()); + deleted.insert(deleted.end(), paths.begin(), paths.end()); } } break; default: - LOGDEB("Monitor: got Other on [" << (ev.m_path) << "]\n" ); + LOGDEB("Monitor: got Other on [" << ev.m_path << "]\n"); } } } diff --git a/src/index/rclmonrcv.cpp b/src/index/rclmonrcv.cpp index 80a1f7e4..dbcbb000 100644 --- a/src/index/rclmonrcv.cpp +++ b/src/index/rclmonrcv.cpp @@ -60,12 +60,10 @@ /** * Recoll real time monitor event receiver. This file has code to interface - * to FAM or inotify and place events on the event queue. + * to FAM, inotify, etc. and place events on the event queue. */ -/** A small virtual interface for monitors. Lets - * either fam/gamin or raw imonitor hide behind - */ +/** Virtual interface for the actual filesystem monitoring module. */ class RclMonitor { public: RclMonitor() {} @@ -75,8 +73,12 @@ public: virtual bool getEvent(RclMonEvent& ev, int msecs = -1) = 0; virtual bool ok() const = 0; // Does this monitor generate 'exist' events at startup? - virtual bool generatesExist() const = 0; - + virtual bool generatesExist() const { + return false; + } + virtual bool isRecursive() const { + return false; + } // Save significant errno after monitor calls int saved_errno{0}; }; @@ -126,9 +128,8 @@ public: if (!m_mon || !m_mon->ok()) return FsTreeWalker::FtwError; // We do nothing special if addWatch fails for a reasonable reason - if (!m_mon->addWatch(fn, true)) { - if (m_mon->saved_errno != EACCES && - m_mon->saved_errno != ENOENT) { + if (!m_mon->isRecursive() && !m_mon->addWatch(fn, true)) { + if (m_mon->saved_errno != EACCES && m_mon->saved_errno != ENOENT) { LOGINF("walkerCB: addWatch failed\n"); return FsTreeWalker::FtwError; } @@ -144,8 +145,8 @@ public: // monitoring ? There should be another way: maybe start // monitoring without actually handling events (just // queue), then run incremental then start handling - // events ? But we also have to do it on a directory - // move! So keep it + // events ? ** But we also have to do it on a directory + // move! So keep it ** We could probably skip it on the initial run though. RclMonEvent ev; ev.m_path = fn; ev.m_etyp = RclMonEvent::RCLEVT_MODIFY; @@ -161,6 +162,96 @@ private: FsTreeWalker& m_walker; }; +static bool rclMonAddTopWatches( + FsTreeWalker& walker, RclConfig& lconfig, RclMonitor *mon, RclMonEventQueue *queue) +{ + // Get top directories from config. Special monitor sublist if + // set, else full list. + vector tdl = lconfig.getTopdirs(true); + if (tdl.empty()) { + LOGERR("rclMonRcvRun:: top directory list (topdirs param.) not found " + "in configuration or topdirs list parse error"); + queue->setTerminate(); + return false; + } + // Walk the directory trees to add watches + WalkCB walkcb(&lconfig, mon, queue, walker); + for (const auto& dir : tdl) { + lconfig.setKeyDir(dir); + // Adjust the follow symlinks options + bool follow; + if (lconfig.getConfParam("followLinks", &follow) && follow) { + walker.setOpts(FsTreeWalker::FtwFollow); + } else { + walker.setOpts(FsTreeWalker::FtwOptNone); + } + if (path_isdir(dir, follow)) { + LOGDEB("rclMonRcvRun: walking " << dir << "\n"); + // If the fs watcher is recursive, we add the watches for the topdirs here, and walk the + // tree just for generating initial events. + if (mon->isRecursive() && !mon->addWatch(dir, true)) { + if (mon->saved_errno != EACCES && mon->saved_errno != ENOENT) { + LOGERR("rclMonAddTopWatches: addWatch failed for [" << dir << "]\n"); + return false; + } + } + if (walker.walk(dir, walkcb) != FsTreeWalker::FtwOk) { + LOGERR("rclMonRcvRun: tree walk failed\n"); + return false; + } + if (walker.getErrCnt() > 0) { + LOGINFO("rclMonRcvRun: fs walker errors: " << walker.getReason() << "\n"); + } + } else { + // We have to special-case regular files which are part of the topdirs list because the + // tree walker only adds watches for directories + if (!mon->addWatch(dir, false)) { + LOGSYSERR("rclMonRcvRun", "addWatch", dir); + } + } + } + + bool doweb = false; + lconfig.getConfParam("processwebqueue", &doweb); + if (doweb) { + string webqueuedir = lconfig.getWebQueueDir(); + if (!mon->addWatch(webqueuedir, true)) { + LOGERR("rclMonRcvRun: addwatch (webqueuedir) failed\n"); + if (mon->saved_errno != EACCES && mon->saved_errno != ENOENT) + return false; + } + } + return true; +} + +static bool rclMonAddSubWatches( + const std::string& path, FsTreeWalker& walker, RclConfig& lconfig, + RclMonitor *mon, RclMonEventQueue *queue) +{ + WalkCB walkcb(&lconfig, mon, queue, walker); + if (walker.walk(path, walkcb) != FsTreeWalker::FtwOk) { + LOGERR("rclMonRcvRun: walking new dir " << path << " : " << walker.getReason() << "\n"); + return false; + } + if (walker.getErrCnt() > 0) { + LOGINFO("rclMonRcvRun: fs walker errors: " << walker.getReason() << "\n"); + } + return true; +} + +// Don't push events for skipped files. This would get filtered on the processing side +// anyway, but causes unnecessary wakeups and messages. Do not test skippedPaths here, +// this would be incorrect (because a topdir can be under a skippedPath and this was +// handled while adding the watches). Also we let the other side process onlyNames. +static bool rclMonShouldSkip(const std::string& path, RclConfig& lconfig, FsTreeWalker& walker) +{ + lconfig.setKeyDir(path_getfather(path)); + walker.setSkippedNames(lconfig.getSkippedNames()); + if (walker.inSkippedNames(path_getsimple(path))) + return true; + return false; +} + // Main thread routine: create watches, then forever wait for and queue events void *rclMonRcvRun(void *q) { @@ -181,100 +272,34 @@ void *rclMonRcvRun(void *q) return 0; } - // Get top directories from config. Special monitor sublist if - // set, else full list. - vector tdl = lconfig.getTopdirs(true); - if (tdl.empty()) { - LOGERR("rclMonRcvRun:: top directory list (topdirs param.) not found " - "in configuration or topdirs list parse error"); - queue->setTerminate(); - return 0; - } - - // Walk the directory trees to add watches FsTreeWalker walker; walker.setSkippedPaths(lconfig.getDaemSkippedPaths()); - WalkCB walkcb(&lconfig, mon, queue, walker); - for (const auto& dir : tdl) { - lconfig.setKeyDir(dir); - // Adjust the follow symlinks options - bool follow; - if (lconfig.getConfParam("followLinks", &follow) && - follow) { - walker.setOpts(FsTreeWalker::FtwFollow); - } else { - walker.setOpts(FsTreeWalker::FtwOptNone); - } - // We have to special-case regular files which are part of the topdirs - // list because we the tree walker only adds watches for directories - if (path_isdir(dir, follow)) { - LOGDEB("rclMonRcvRun: walking " << dir << "\n"); - if (walker.walk(dir, walkcb) != FsTreeWalker::FtwOk) { - LOGERR("rclMonRcvRun: tree walk failed\n"); - goto terminate; - } - if (walker.getErrCnt() > 0) { - LOGINFO("rclMonRcvRun: fs walker errors: " << walker.getReason() << "\n"); - } - } else { - if (!mon->addWatch(dir, false)) { - LOGSYSERR("rclMonRcvRun", "addWatch", dir); - } - } - } - { - bool doweb = false; - lconfig.getConfParam("processwebqueue", &doweb); - if (doweb) { - string webqueuedir = lconfig.getWebQueueDir(); - if (!mon->addWatch(webqueuedir, true)) { - LOGERR("rclMonRcvRun: addwatch (webqueuedir) failed\n"); - if (mon->saved_errno != EACCES && mon->saved_errno != ENOENT) - goto terminate; - } - } + if (!rclMonAddTopWatches(walker, lconfig, mon, queue)) { + LOGERR("rclMonRcvRun: addtopwatches failed\n"); + goto terminate; } // Forever wait for monitoring events and add them to queue: MONDEB("rclMonRcvRun: waiting for events. q->ok(): " << queue->ok() << "\n"); while (queue->ok() && mon->ok()) { RclMonEvent ev; - // Note: I could find no way to get the select - // call to return when a signal is delivered to the process - // (it goes to the main thread, from which I tried to close or - // write to the select fd, with no effect). So set a - // timeout so that an intr will be detected + // Note: I could find no way to get the select call to return when a signal is delivered to + // the process (it goes to the main thread, from which I tried to close or write to the + // select fd, with no effect). So set a timeout so that an intr will be detected if (mon->getEvent(ev, 2000)) { - // Don't push events for skipped files. This would get - // filtered on the processing side anyway, but causes - // unnecessary wakeups and messages. Do not test - // skippedPaths here, this would be incorrect (because a - // topdir can be under a skippedPath and this was handled - // while adding the watches). - // Also we let the other side process onlyNames. - lconfig.setKeyDir(path_getfather(ev.m_path)); - walker.setSkippedNames(lconfig.getSkippedNames()); - if (walker.inSkippedNames(path_getsimple(ev.m_path))) + if (rclMonShouldSkip(ev.m_path, lconfig, walker)) continue; if (ev.m_etyp == RclMonEvent::RCLEVT_DIRCREATE) { - // Recursive addwatch: there may already be stuff - // inside this directory. Ie: files were quickly - // created, or this is actually the target of a - // directory move. This is necessary for inotify, but - // it seems that fam/gamin is doing the job for us so - // that we are generating double events here (no big - // deal as prc will sort/merge). + // Recursive addwatch: there may already be stuff inside this directory. E.g.: files + // were quickly created, or this is actually the target of a directory move. This is + // necessary for inotify, but it seems that fam/gamin is doing the job for us so + // that we are generating double events here (no big deal as prc will sort/merge). LOGDEB("rclMonRcvRun: walking new dir " << ev.m_path << "\n"); - if (walker.walk(ev.m_path, walkcb) != FsTreeWalker::FtwOk) { - LOGERR("rclMonRcvRun: walking new dir " << ev.m_path << - " : " << walker.getReason() << "\n"); + if (!rclMonAddSubWatches(ev.m_path, walker, lconfig, mon, queue)) { goto terminate; } - if (walker.getErrCnt() > 0) { - LOGINFO("rclMonRcvRun: fs walker errors: " << walker.getReason() << "\n"); - } } if (ev.m_etyp != RclMonEvent::RCLEVT_NONE) @@ -298,7 +323,7 @@ bool eraseWatchSubTree(map& idtopath, const string& top) while (it != idtopath.end()) { if (it->second.find(top) == 0) { found = true; - idtopath.erase(it++); + it = idtopath.erase(it); } else { it++; } @@ -557,7 +582,6 @@ public: virtual bool addWatch(const string& path, bool isdir); virtual bool getEvent(RclMonEvent& ev, int msecs = -1); bool ok() const {return m_ok;} - virtual bool generatesExist() const {return false;} private: bool m_ok; @@ -749,22 +773,17 @@ bool RclIntf::getEvent(RclMonEvent& ev, int msecs) #ifdef _WIN32 - /* - * WIN32 VERSION ISSUES: + * WIN32 VERSION NOTES: * - * - It appears that watching a subdirectory of a given directory - * prevents renaming the top directory, Windows says: can't rename - * because open or a file in it is open. This is a major issue of - * course. Check if this can be solved by using a recursive watch - * instead of setting watches on all subdirs. Would need a code - * changes in the "generic" part of course. - * - In general, directory renames need more studying. - * - Otherwise appears to more or less work... + * - When using non-recursive watches (one per dir), it appeared that + * watching a subdirectory of a given directory prevented renaming + * the top directory, Windows says: can't rename because open or a + * file in it is open. This is mostly why we use recursive watches + * on the topdirs only. */ #include #include -#include #include #include @@ -776,33 +795,29 @@ class RclFSWatchWin32; enum class Action {Add = 1, Delete = 2, Modify = 3, Move = 4}; +// Virtual interface for the monitor callback. Note: this for compatibility with the efsw code, as +// rclmon uses a pull, not push interface. The callback pushes the events to a local queue from +// which they are then pulled by the upper level code. class FileWatchListener { public: virtual ~FileWatchListener() {} - - /// Handles the action file action - /// @param watchid The watch id for the directory - /// @param dir The directory - /// @param filename The filename that was accessed (not full path) - /// @param action Action that was performed - /// @param oldFilename The name of the file or directory moved virtual void handleFileAction(WatchID watchid, const std::string& dir, const std::string& fn, Action action, bool isdir, std::string oldfn = "" ) = 0; }; -// Internal watch data +// Internal watch data. This piggy-back our actual data pointer to the MS overlapped pointer. This +// is a bit of a hack, and we could probably use event Ids instead. struct WatcherStructWin32 { OVERLAPPED Overlapped; WatcherWin32 *Watch; }; +// Actual data structure for one directory watch class WatcherWin32 { public: - WatcherWin32() {} - WatchID ID; - FileWatchListener *Listener; + FileWatchListener *Listener{nullptr}; bool Recursive; std::string DirName; std::string OldFileName; @@ -816,6 +831,7 @@ public: RclFSWatchWin32 *Watch{nullptr}; }; +// The efsw top level file system watcher: manages all the directory watches. class RclFSWatchWin32 { public: RclFSWatchWin32(); @@ -850,16 +866,14 @@ private: void removeAllWatches(); }; +// Adapter for the rclmon interface class RclMonitorWin32 : public RclMonitor, public FileWatchListener { public: - RclMonitorWin32() { - MONDEB("RclMonitorWin32::RclMonitorWin32\n"); - } virtual ~RclMonitorWin32() {} virtual bool addWatch(const string& path, bool /*isDir*/) override { MONDEB("RclMonitorWin32::addWatch: " << path << "\n"); - return m_fswatcher.addWatch(path, this, false) != -1; + return m_fswatcher.addWatch(path, this, true) != -1; } virtual bool getEvent(RclMonEvent& ev, int msecs = -1) { @@ -885,6 +899,10 @@ public: virtual bool generatesExist() const override { return false; } + // Can the caller avoid setting watches on subdirs ? + virtual bool isRecursive() const override { + return true; + } virtual void handleFileAction(WatchID watchid, const std::string& dir, const std::string& fn, Action action, bool isdir, std::string oldfn = "") { MONDEB("RclMonitorWin32::handleFileAction: dir [" << dir << "] fn [" << fn << "] act " << @@ -996,8 +1014,6 @@ RclFSWatchWin32::~RclFSWatchWin32() PostQueuedCompletionStatus(mIOCP, 0, reinterpret_cast(this), NULL); } - // delete mThread ?? - removeAllWatches(); CloseHandle(mIOCP); @@ -1010,7 +1026,7 @@ WatchID RclFSWatchWin32::addWatch(const std::string& _dir,FileWatchListener *wat path_slashize(dir); if (!path_isdir(dir)) { LOGDEB("RclFSWatchWin32::addWatch: not a directory: " << dir << "\n"); - return -1; + return 0; } if (!path_readable(dir)) { LOGINF("RclFSWatchWin32::addWatch: not readable: " << dir << "\n"); @@ -1063,7 +1079,7 @@ void RclFSWatchWin32::removeAllWatches() mWatches.clear(); } -/// Unpacks events and passes them to the event processor +// Unpacks events and passes them to the event processor void CALLBACK WatchCallback(DWORD dwNumberOfBytesTransfered, LPOVERLAPPED lpOverlapped) { if (dwNumberOfBytesTransfered == 0 || NULL == lpOverlapped) { @@ -1115,9 +1131,11 @@ void RclFSWatchWin32::run(DWORD msecs) } } -void RclFSWatchWin32::handleAction(WatcherWin32 *watch, const std::string& fn, unsigned long action) +void RclFSWatchWin32::handleAction(WatcherWin32 *watch, const std::string& _fn, unsigned long action) { + std::string fn(_fn); Action fwAction; + path_slashize(fn); MONDEB("handleAction: fn [" << fn << "] action " << action << "\n"); // In case fn is not a simple name but a relative path (probably @@ -1154,8 +1172,9 @@ void RclFSWatchWin32::handleAction(WatcherWin32 *watch, const std::string& fn, u case FILE_ACTION_RENAMED_NEW_NAME: { fwAction = Action::Move; - // If this is a directory, possibly update the watches. - // TBD: this seems wrong because we should process the whole subtree ? + // If this is a directory, possibly update the watches. TBD: this seems wrong because we + // should process the whole subtree ? Also probably not needed at all because we are + // recursive and only set watches on the top directories. if (isdir) { // Update the new directory path std::string oldpath = path_cat(watch->DirName, watch->OldFileName); diff --git a/src/index/subtreelist.cpp b/src/index/subtreelist.cpp index 40018aeb..4c4c2f9c 100644 --- a/src/index/subtreelist.cpp +++ b/src/index/subtreelist.cpp @@ -26,13 +26,18 @@ #include "subtreelist.h" #include "log.h" -bool subtreelist(RclConfig *config, const string& top, - vector& paths) +bool subtreelist(RclConfig *config, const string& _top, vector& paths) { - LOGDEB("subtreelist: top: [" << (top) << "]\n" ); + std::string top(_top); +#ifdef _WIN32 + // Need to convert c:path to /c/path because this is how paths are indexed + top = path_slashdrive(top); +#endif + + LOGDEB("subtreelist: top: [" << top << "]\n"); Rcl::Db rcldb(config); if (!rcldb.open(Rcl::Db::DbRO)) { - LOGERR("subtreelist: can't open database in [" << config->getDbDir() << + LOGERR("subtreelist: can't open index in [" << config->getDbDir() << "]: " << rcldb.getReason() << "\n"); return false; } diff --git a/src/utils/rclutil.cpp b/src/utils/rclutil.cpp index d33f79bb..239f446e 100644 --- a/src/utils/rclutil.cpp +++ b/src/utils/rclutil.cpp @@ -310,23 +310,30 @@ bool printableUrl(const string& fcharset, const string& in, string& out) return true; } +#ifdef _WIN32 +// Convert X:/path to /X/path for path splitting inside the index +string path_slashdrive(const string& path) +{ + string npath; + if (path_hasdrive(path)) { + npath.append(1, '/'); + npath.append(1, path[0]); + if (path_isdriveabs(path)) { + npath.append(path.substr(2)); + } else { + // This should be an error really + npath.append(1, '/'); + npath.append(path.substr(2)); + } + } + return npath; +} +#endif // _WIN32 + string url_gpathS(const string& url) { #ifdef _WIN32 - string u = url_gpath(url); - string nu; - if (path_hasdrive(u)) { - nu.append(1, '/'); - nu.append(1, u[0]); - if (path_isdriveabs(u)) { - nu.append(u.substr(2)); - } else { - // This should be an error really - nu.append(1, '/'); - nu.append(u.substr(2)); - } - } - return nu; + return path_slashdrive(url_gpath(url)); #else return url_gpath(url); #endif diff --git a/src/utils/rclutil.h b/src/utils/rclutil.h index 3187da29..38b4d9d2 100644 --- a/src/utils/rclutil.h +++ b/src/utils/rclutil.h @@ -48,6 +48,9 @@ extern bool printableUrl(const std::string& fcharset, const std::string& in, std::string& out); /// Same but, in the case of a Windows local path, also turn "c:/" into /// "/c/" This should be used only for splitting the path in rcldb. +#ifdef _WIN32 +extern std::string path_slashdrive(const std::string& path); +#endif extern std::string url_gpathS(const std::string& url); /// Like strftime but guaranteed utf-8 output (esp. useful on Windows)