diff --git a/packaging/FreeBSD/recoll/pkg-plist b/packaging/FreeBSD/recoll/pkg-plist index bfcd9397..212639d4 100644 --- a/packaging/FreeBSD/recoll/pkg-plist +++ b/packaging/FreeBSD/recoll/pkg-plist @@ -14,8 +14,8 @@ share/pixmaps/recoll.png %%DATADIR%%/filters/hotrecoll.py %%DATADIR%%/filters/rclabw %%DATADIR%%/filters/rclaptosidman -%%DATADIR%%/filters/rclaudio -%%DATADIR%%/filters/rclchm +%%DATADIR%%/filters/rclaudio.py +%%DATADIR%%/filters/rclchm.py %%DATADIR%%/filters/rcldjvu %%DATADIR%%/filters/rcldoc %%DATADIR%%/filters/rcldvi @@ -23,11 +23,11 @@ share/pixmaps/recoll.png %%DATADIR%%/filters/rclfb2 %%DATADIR%%/filters/rclflac %%DATADIR%%/filters/rclgaim -%%DATADIR%%/filters/rclics +%%DATADIR%%/filters/rclics.py %%DATADIR%%/filters/rclid3 %%DATADIR%%/filters/rclimg -%%DATADIR%%/filters/rclinfo -%%DATADIR%%/filters/rclkar +%%DATADIR%%/filters/rclinfo.py +%%DATADIR%%/filters/rclkar.py %%DATADIR%%/filters/rclkwd %%DATADIR%%/filters/rcllatinclass.py %%DATADIR%%/filters/rcllatinstops.zip @@ -41,7 +41,7 @@ share/pixmaps/recoll.png %%DATADIR%%/filters/rclps %%DATADIR%%/filters/rclpurple %%DATADIR%%/filters/rclpython -%%DATADIR%%/filters/rclrar +%%DATADIR%%/filters/rclrar.py %%DATADIR%%/filters/rclrtf %%DATADIR%%/filters/rclscribus %%DATADIR%%/filters/rclshowinfo @@ -51,11 +51,11 @@ share/pixmaps/recoll.png %%DATADIR%%/filters/rcltex %%DATADIR%%/filters/rcltext %%DATADIR%%/filters/rcluncomp -%%DATADIR%%/filters/rclwar +%%DATADIR%%/filters/rclwar.py %%DATADIR%%/filters/rclwpd %%DATADIR%%/filters/rclxls -%%DATADIR%%/filters/rclzip -%%DATADIR%%/filters/rcl7z +%%DATADIR%%/filters/rclzip.py +%%DATADIR%%/filters/rcl7z.py %%DATADIR%%/filters/xdg-open %%DATADIR%%/images/aptosid-book.png %%DATADIR%%/images/aptosid-manual.png diff --git a/packaging/debian/buildppa.sh b/packaging/debian/buildppa.sh index 310777b8..5b59a6ef 100644 --- a/packaging/debian/buildppa.sh +++ b/packaging/debian/buildppa.sh @@ -85,7 +85,7 @@ done ### KIO. -series="bionic focal groovy hirsute impish" +#series="bionic focal hirsute impish" series= debdir=debiankio diff --git a/packaging/homebrew/recoll.rb b/packaging/homebrew/recoll.rb index 20d55b31..a12c5377 100644 --- a/packaging/homebrew/recoll.rb +++ b/packaging/homebrew/recoll.rb @@ -60,40 +60,40 @@ index f41a9f39..dc3085a4 100755 # # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this -diff --git filters/rcl7z filters/rcl7z +diff --git filters/rcl7z.py filters/rcl7z.py index c68c8bcb..ac50c4ec 100755 ---- filters/rcl7z -+++ filters/rcl7z +--- filters/rcl7z.py ++++ filters/rcl7z.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python2.7 # 7-Zip file filter for Recoll -diff --git filters/rclaudio filters/rclaudio +diff --git filters/rclaudio.py filters/rclaudio.py index 94ca0be7..08d6375a 100755 ---- filters/rclaudio -+++ filters/rclaudio +--- filters/rclaudio.py ++++ filters/rclaudio.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python2.7 # Audio tag filter for Recoll, using mutagen -diff --git filters/rclchm filters/rclchm +diff --git filters/rclchm.py filters/rclchm.py index f9811c37..3bc9b16d 100755 ---- filters/rclchm -+++ filters/rclchm +--- filters/rclchm.py ++++ filters/rclchm.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python2.7 """Extract Html files from a Microsoft Compiled Html Help file (.chm) Needs at least python 2.2 for HTMLParser (chmlib needs 2.2 too)""" -diff --git filters/rcldia filters/rcldia +diff --git filters/rcldia.py filters/rcldia.py index 282148eb..a480294b 100755 ---- filters/rcldia -+++ filters/rcldia +--- filters/rcldia.py ++++ filters/rcldia.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python2.7 @@ -120,30 +120,30 @@ index e8fa1831..b92b185d 100755 from __future__ import print_function import rclexecm -diff --git filters/rclepub filters/rclepub +diff --git filters/rclepub.py filters/rclepub.py index 8042d7f9..51786af1 100755 ---- filters/rclepub -+++ filters/rclepub +--- filters/rclepub.py ++++ filters/rclepub.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python2.7 """Extract Html content from an EPUB file (.epub)""" from __future__ import print_function -diff --git filters/rclepub1 filters/rclepub1 +diff --git filters/rclepub.py1 filters/rclepub.py1 index bd44f635..a7ea6c06 100755 ---- filters/rclepub1 -+++ filters/rclepub1 +--- filters/rclepub.py1 ++++ filters/rclepub.py1 @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python2.7 """Extract Html content from an EPUB file (.chm), concatenating all sections""" from __future__ import print_function -diff --git filters/rclics filters/rclics +diff --git filters/rclics.py filters/rclics.py index 0ef04f2d..de177024 100755 ---- filters/rclics -+++ filters/rclics +--- filters/rclics.py ++++ filters/rclics.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python2.7 @@ -160,20 +160,20 @@ index 7eb1da91..4eb6c9b0 100755 # Python-based Image Tag extractor for Recoll. This is less thorough # than the Perl-based rclimg script, but useful if you don't want to -diff --git filters/rclinfo filters/rclinfo +diff --git filters/rclinfo.py filters/rclinfo.py index f353d19e..36cf34e0 100755 ---- filters/rclinfo -+++ filters/rclinfo +--- filters/rclinfo.py ++++ filters/rclinfo.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python2.7 # Read a file in GNU info format and output its nodes as subdocs, # interfacing with recoll execm -diff --git filters/rclkar filters/rclkar +diff --git filters/rclkar.py filters/rclkar.py index d6570dd5..34b8d2a2 100755 ---- filters/rclkar -+++ filters/rclkar +--- filters/rclkar.py ++++ filters/rclkar.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python2.7 @@ -230,10 +230,10 @@ index 615455b3..1e411890 100755 # -*- coding: iso-8859-1 -*- """ MoinMoin - Python source parser and colorizer -diff --git filters/rclrar filters/rclrar +diff --git filters/rclrar.py filters/rclrar.py index 8f723fa5..5f6adfb0 100755 ---- filters/rclrar -+++ filters/rclrar +--- filters/rclrar.py ++++ filters/rclrar.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python2.7 @@ -280,10 +280,10 @@ index 8c1b8aea..cee17324 100755 # Copyright (C) 2014 J.F.Dockes # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by -diff --git filters/rcltar filters/rcltar +diff --git filters/rcltar.py filters/rcltar.py index d8bf100d..ab4b306e 100755 ---- filters/rcltar -+++ filters/rcltar +--- filters/rcltar.py ++++ filters/rcltar.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python2.7 @@ -320,10 +320,10 @@ index 32a11c1a..eab3b257 100644 from __future__ import print_function import rclexecm -diff --git filters/rclwar filters/rclwar +diff --git filters/rclwar.py filters/rclwar.py index b654f3b3..301e28e9 100755 ---- filters/rclwar -+++ filters/rclwar +--- filters/rclwar.py ++++ filters/rclwar.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python2.7 @@ -360,10 +360,10 @@ index 158e1222..602769af 100755 # Copyright (C) 2016 J.F.Dockes # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by -diff --git filters/rclzip filters/rclzip +diff --git filters/rclzip.py filters/rclzip.py index 35739625..0c597fbd 100755 ---- filters/rclzip -+++ filters/rclzip +--- filters/rclzip.py ++++ filters/rclzip.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python2.7 diff --git a/src/Makefile.am b/src/Makefile.am index de99959e..7ac3d2d1 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -686,31 +686,31 @@ filters/openxml-xls-body.xsl \ filters/openxml-word-body.xsl \ filters/openxml-meta.xsl \ filters/ppt-dump.py \ -filters/rcl7z \ +filters/rcl7z.py \ filters/rclaptosidman \ -filters/rclaudio \ +filters/rclaudio.py \ filters/rclbasehandler.py \ filters/rclbibtex.sh \ filters/rclcheckneedretry.sh \ -filters/rclchm \ -filters/rcldia \ +filters/rclchm.py \ +filters/rcldia.py \ filters/rcldjvu.py \ filters/rcldoc.py \ filters/rcldvi \ -filters/rclepub \ -filters/rclepub1 \ +filters/rclepub.py \ +filters/rclepub1.py \ filters/rclexec1.py \ filters/rclexecm.py \ filters/rclfb2.py \ filters/rclgaim \ filters/rclgenxslt.py \ filters/rclhwp.py \ -filters/rclics \ +filters/rclics.py \ filters/rclimg \ filters/rclimg.py \ -filters/rclinfo \ +filters/rclinfo.py \ filters/rclipynb.py \ -filters/rclkar \ +filters/rclkar.py \ filters/rclkwd \ filters/rcllatinclass.py \ filters/rcllatinstops.zip \ @@ -729,21 +729,21 @@ filters/rclps \ filters/rclpst.py \ filters/rclpurple \ filters/rclpython.py \ -filters/rclrar \ +filters/rclrar.py \ filters/rclrtf.py \ filters/rclscribus \ filters/rclshowinfo \ -filters/rcltar \ +filters/rcltar.py \ filters/rcltex \ filters/rcltext.py \ filters/rcluncomp \ filters/rcluncomp.py \ -filters/rclwar \ +filters/rclwar.py \ filters/rclxls.py \ filters/rclxml.py \ filters/rclxmp.py \ filters/rclxslt.py \ -filters/rclzip \ +filters/rclzip.py \ filters/recoll-we-move-files.py \ filters/recollepub.zip \ filters/svg.xsl \ diff --git a/src/README b/src/README index 75b13be0..30a2764e 100644 --- a/src/README +++ b/src/README @@ -2763,8 +2763,8 @@ Chapter 4. Programming interface If you can program and want to write an execm handler, it should not be too difficult to make sense of one of the existing modules. For example, - look at rclzip which uses Zip file paths as identifiers (ipath), and - rclics, which uses an integer index. Also have a look at the comments + look at rclzip.py which uses Zip file paths as identifiers (ipath), and + rclics.py, which uses an integer index. Also have a look at the comments inside the internfile/mh_execm.h file and possibly at the corresponding module. @@ -2819,7 +2819,7 @@ Chapter 4. Programming interface text/rtf = exec unrtf --nopict --html; charset=iso-8859-1; mimetype=text/html - application/x-chm = execm rclchm + application/x-chm = execm rclchm.py The fragment specifies that: diff --git a/src/RECOLL-VERSION.txt b/src/RECOLL-VERSION.txt index deade24a..359c4108 100644 --- a/src/RECOLL-VERSION.txt +++ b/src/RECOLL-VERSION.txt @@ -1 +1 @@ -1.31.6 +1.32.0 diff --git a/src/common/autoconfig-mac.h b/src/common/autoconfig-mac.h index c6baab26..9f172caa 100644 --- a/src/common/autoconfig-mac.h +++ b/src/common/autoconfig-mac.h @@ -125,7 +125,7 @@ #define PACKAGE_NAME "Recoll" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "Recoll 1.31.6" +#define PACKAGE_STRING "Recoll 1.32.0" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "recoll" @@ -134,7 +134,7 @@ #define PACKAGE_URL "" /* Define to the version of this package. */ -#define PACKAGE_VERSION "1.31.6" +#define PACKAGE_VERSION "1.32.0" /* putenv parameter is const */ /* #undef PUTENV_ARG_CONST */ diff --git a/src/common/autoconfig-win.h b/src/common/autoconfig-win.h index d1852cf8..2cb98f56 100644 --- a/src/common/autoconfig-win.h +++ b/src/common/autoconfig-win.h @@ -118,7 +118,7 @@ #define PACKAGE_NAME "Recoll" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "Recoll 1.31.6" +#define PACKAGE_STRING "Recoll 1.32.0" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "recoll" @@ -127,13 +127,13 @@ #define PACKAGE_URL "" /* Define to the version of this package. */ -#define PACKAGE_VERSION "1.31.6" +#define PACKAGE_VERSION "1.32.0" /* putenv parameter is const */ /* #undef PUTENV_ARG_CONST */ /* Real time monitoring option */ -#undef RCL_MONITOR +#define RCL_MONITOR 1 /* Split camelCase words */ /* #undef RCL_SPLIT_CAMELCASE */ diff --git a/src/common/rclconfig.cpp b/src/common/rclconfig.cpp index e1fd78f0..490d30ea 100644 --- a/src/common/rclconfig.cpp +++ b/src/common/rclconfig.cpp @@ -58,6 +58,16 @@ using namespace std; +// Naming the directory for platform-specific default config files, overriding the top-level ones +// E.g. /usr/share/recoll/examples/windows +#ifdef _WIN32 +static const string confsysdir{"windows"}; +#elif defined(_APPLE__) +static const string confsysdir{"macos"}; +#else +static const string confsysdir; +#endif + // Static, logically const, RclConfig members or module static // variables are initialized once from the first object build during // process initialization. @@ -303,8 +313,15 @@ RclConfig::RclConfig(const string *argcnf) m_cdirs.push_back(cp); } - // Base/installation config - m_cdirs.push_back(path_cat(m_datadir, "examples")); + // Base/installation config, and its platform-specific overrides + std::string defaultsdir = path_cat(m_datadir, "examples"); + if (!confsysdir.empty()) { + std::string sdir = path_cat(defaultsdir, confsysdir); + if (path_isdir(sdir)) { + m_cdirs.push_back(sdir); + } + } + m_cdirs.push_back(defaultsdir); string cnferrloc; for (const auto& dir : m_cdirs) { @@ -376,6 +393,7 @@ bool RclConfig::updateMainConfig() { ConfStack *newconf = new ConfStack("recoll.conf", m_cdirs, true); if (newconf == 0 || !newconf->ok()) { + std::cerr << "updateMainConfig: new Confstack not ok\n"; if (m_conf) return false; m_ok = false; @@ -1633,6 +1651,7 @@ vector RclConfig::getDaemSkippedPaths() const // and filtersdir from the config file to the PATH, then use execmd::which() string RclConfig::findFilter(const string &icmd) const { + LOGDEB2("findFilter: " << icmd << "\n"); // If the path is absolute, this is it if (path_isabsolute(icmd)) return icmd; @@ -1680,13 +1699,19 @@ bool RclConfig::processFilterCmd(std::vector& cmd) const LOGDEB0("processFilterCmd: in: " << stringsToString(cmd) << "\n"); auto it = cmd.begin(); - // Special-case python and perl on windows: we need to also locate the - // first argument which is the script name "python somescript.py". - // On Unix, thanks to #!, we usually just run "somescript.py", but need - // the same change if we ever want to use the same cmd line as windows - bool hasinterp = !stringlowercmp("python", *it) || - !stringlowercmp("perl", *it); - +#ifdef _WIN32 + // Special-case interpreters on windows: we used to have an additional 1st argument "python" in + // mimeconf, but we now rely on the .py extension for better sharing of mimeconf. + std::string ext = path_suffix(*it); + if ("py" == ext) { + it = cmd.insert(it, findFilter("python")); + it++; + } else if ("pl" == ext) { + it = cmd.insert(it, findFilter("perl")); + it++; + } +#endif + // Note that, if the cmd vector size is 1, post-incrementing the // iterator in the following statement, which works on x86, leads // to a crash on ARM with gcc 6 and 8 (at least), which does not @@ -1694,25 +1719,15 @@ bool RclConfig::processFilterCmd(std::vector& cmd) const // whatever... We do it later then. *it = findFilter(*it); - if (hasinterp) { - if (cmd.size() < 2) { - LOGERR("processFilterCmd: python/perl cmd: no script?. [" << - stringsToString(cmd) << "]\n"); - return false; - } else { - ++it; - *it = findFilter(*it); - } - } LOGDEB0("processFilterCmd: out: " << stringsToString(cmd) << "\n"); return true; } -bool RclConfig::pythonCmd(const std::string& scriptname, - std::vector& cmd) const +// This now does nothing more than processFilterCmd (after we changed to relying on the py extension) +bool RclConfig::pythonCmd(const std::string& scriptname, std::vector& cmd) const { #ifdef _WIN32 - cmd = {"python", scriptname}; + cmd = {scriptname}; #else cmd = {scriptname}; #endif diff --git a/src/common/textsplit.cpp b/src/common/textsplit.cpp index 49b234c0..be61c917 100644 --- a/src/common/textsplit.cpp +++ b/src/common/textsplit.cpp @@ -596,6 +596,7 @@ bool TextSplit::text_to_words(const string &in) clearsplitstate(); bool pagepending = false; + bool nlpending = false; bool softhyphenpending = false; // Running count of non-alphanum chars. Reset when we see one; @@ -705,6 +706,10 @@ bool TextSplit::text_to_words(const string &in) pagepending = false; newpage(m_wordpos); } + if (nlpending) { + nlpending = false; + newline(m_wordpos); + } break; case WILD: @@ -745,6 +750,12 @@ bool TextSplit::text_to_words(const string &in) break; } } else { + // Note about dangling hyphens: we always strip '-' found before whitespace, + // even before a newline, then generate two terms, before and after the line + // break. We have no way to know if '-' is there because a word was broken by + // justification or if it was part of an actual compound word (would need a + // dictionary to check). As soft-hyphen *should* be used if the '-' is not part + // of the text. if (nextc == -1 || isvisiblewhite(nextc)) { goto SPACE; } @@ -844,19 +855,10 @@ bool TextSplit::text_to_words(const string &in) break; case '\n': + nlpending = true; + /* FALLTHROUGH */ case '\r': - if (m_span.length() && *m_span.rbegin() == '-') { - // if '-' is the last char before end of line, we - // strip it. We have no way to know if this is added - // because of the line split or if it was part of an - // actual compound word (would need a dictionary to - // check). As soft-hyphen *should* be used if the '-' - // is not part of the text, it is better to properly - // process a real compound word, and produce wrong - // output from wrong text. The word-emitting routine - // will strip the trailing '-'. - goto SPACE; - } else if (softhyphenpending) { + if (softhyphenpending) { // Don't reset soft-hyphen continue; } else { diff --git a/src/common/textsplit.h b/src/common/textsplit.h index 0821ee04..c09e867f 100644 --- a/src/common/textsplit.h +++ b/src/common/textsplit.h @@ -73,6 +73,9 @@ public: * just don't know about pages. */ virtual void newpage(int /*pos*/) {} + /** Called when we encounter newline \n 0x0a. Override to use the event. */ + virtual void newline(int /*pos*/) {} + // Static utility functions: /** Count words in string, as the splitter would generate them */ diff --git a/src/doc/man/recoll.conf.5 b/src/doc/man/recoll.conf.5 index ff3390a7..5617976f 100644 --- a/src/doc/man/recoll.conf.5 +++ b/src/doc/man/recoll.conf.5 @@ -148,7 +148,7 @@ not set, the daemon uses skippedPaths. .TP .BI "zipUseSkippedNames = "bool Use skippedNames inside Zip archives. Fetched -directly by the rclzip handler. Skip the patterns defined by skippedNames +directly by the rclzip.py handler. Skip the patterns defined by skippedNames inside Zip archives. Can be redefined for subdirectories. See https://www.lesbonscomptes.com/recoll/faqsandhowtos/FilteringOutZipArchiveMembers.html @@ -195,7 +195,7 @@ lets you turn off md5 computation for selected types. It is global (no redefinition for subtrees). At the moment, it only has an effect for external handlers (exec and execm). The file types can be specified by listing either MIME types (e.g. audio/mpeg) or handler names -(e.g. rclaudio). +(e.g. rclaudio.py). .TP .BI "compressedfilemaxkbs = "int Size limit for compressed diff --git a/src/doc/user/recoll.conf.xml b/src/doc/user/recoll.conf.xml index 7fbaec38..be66bd0e 100644 --- a/src/doc/user/recoll.conf.xml +++ b/src/doc/user/recoll.conf.xml @@ -112,7 +112,7 @@ not set, the daemon uses skippedPaths. zipUseSkippedNames Use skippedNames inside Zip archives. Fetched -directly by the rclzip handler. Skip the patterns defined by skippedNames +directly by the rclzip.py handler. Skip the patterns defined by skippedNames inside Zip archives. Can be redefined for subdirectories. See https://www.lesbonscomptes.com/recoll/faqsandhowtos/FilteringOutZipArchiveMembers.html @@ -162,7 +162,7 @@ lets you turn off md5 computation for selected types. It is global (no redefinition for subtrees). At the moment, it only has an effect for external handlers (exec and execm). The file types can be specified by listing either MIME types (e.g. audio/mpeg) or handler names -(e.g. rclaudio). +(e.g. rclaudio.py). compressedfilemaxkbs diff --git a/src/doc/user/usermanual.html b/src/doc/user/usermanual.html index c538dfa8..9cb3df43 100644 --- a/src/doc/user/usermanual.html +++ b/src/doc/user/usermanual.html @@ -10,7 +10,7 @@ + "Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.3 or any later version published by the Free Software Foundation; with no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license can be found at the following location: GNU web site. This document introduces full text search notions and describes the installation and use of the Recoll application. This version describes Recoll 1.32."> @@ -53,7 +53,7 @@ alink="#0000FF"> and describes the installation and use of the Recoll application. This version describes Recoll 1.31.

+ "application">Recoll 1.32.

@@ -443,7 +443,7 @@ alink="#0000FF">

This document introduces full text search notions and describes the installation and use of the Recoll application. It is updated for - Recoll 1.31.

+ Recoll 1.32.

Recoll was for a long time dedicated to Unix-like systems. It was only lately (2015) ported to MS-Windows. @@ -6581,9 +6581,10 @@ text/html [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/r

All the Python handlers share at least the rclexecm.py module, which handles the communication. Have a look at, - for example, rclzip - for a handler which uses rclexecm.py directly.

+ for example, rclzip.py for a handler which + uses rclexecm.py + directly.

  • Most Python handlers which process @@ -6633,7 +6634,7 @@ text/html [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/r "_top">Git repository (the sample not in the distributed release at the moment).

    You can also have a look at the slightly more complex - rclzip + rclzip.py which uses Zip file paths as identifiers (ipath).

    execm handlers sometimes @@ -6726,7 +6727,7 @@ text/html [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/r text/rtf = exec unrtf --nopict --html; charset=iso-8859-1; mimetype=text/html - application/x-chm = execm rclchm + application/x-chm = execm rclchm.py

    The fragment specifies that:

    @@ -6880,7 +6881,7 @@ text/html [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/r "literal">setfield() helper method. This avoids the necessity to produce HTML, and any issue with HTML quoting. See, for example, rclaudio in rclaudio.py in Recoll 1.23 and later for an example of handler which outputs text/plain and uses zipUseSkippedNames

    Use skippedNames inside Zip archives. Fetched - directly by the rclzip handler. Skip the patterns - defined by skippedNames inside Zip archives. Can - be redefined for subdirectories. See + directly by the rclzip.py handler. Skip the + patterns defined by skippedNames inside Zip + archives. Can be redefined for subdirectories. + See https://www.lesbonscomptes.com/recoll/faqsandhowtos/FilteringOutZipArchiveMembers.html

    + audio/mpeg) or handler names (e.g. + rclaudio.py).

    %p. Page index. Only significant for a subset of document types, currently only PDF, - Postscript and DVI files. Can be used to start the - editor at the right page for a match or - snippet.

    + Postscript and DVI files. If it is set, a + significant term will be chosen in the query, and + %p will be substituted with the first page where + the term appears. Can be used to start the editor + at the right page for a match or snippet.

    +
  • +
  • +

    %l. Line number. Only significant + for document types with relevant line breaks, + mostly text/plain and analogs. If it is set, a + significant term will be chosen in the query, and + %p will be substituted with the first line where + the term appears.

  • %s. Search term. The value will only - be set for documents with indexed page numbers (ie: - PDF). The value will be one of the matched search - terms. It would allow pre-setting the value in the - "Find" entry inside Evince for example, for easy - highlighting of the term.

    + be set for documents with indexed page or line + numbers and if %p or %l is also used. The value + will be one of the matched search terms. It would + allow pre-setting the value in the "Find" entry + inside Evince for example, for easy highlighting of + the term.

  • %u. Url.

    diff --git a/src/doc/user/usermanual.xml b/src/doc/user/usermanual.xml index 168a7a4c..8241da85 100644 --- a/src/doc/user/usermanual.xml +++ b/src/doc/user/usermanual.xml @@ -5,7 +5,7 @@ Recoll"> http://www.recoll.org/pages/features.html"> - + Xapian"> Windows"> Unix-like systems"> @@ -4957,7 +4957,7 @@ text/html [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/r is). All the Python handlers share at least the rclexecm.py module, which handles the communication. Have a look at, for - example, rclzip for a handler which + example, rclzip.py for a handler which uses rclexecm.py directly. Most Python handlers which process single-document files by executing another command @@ -4994,7 +4994,7 @@ text/html [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/r the moment). You can also have a look at the slightly more complex - rclzip which uses Zip + rclzip.py which uses Zip file paths as identifiers (ipath). execm handlers sometimes need to make @@ -5062,7 +5062,7 @@ text/html [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/r text/rtf = exec unrtf --nopict --html; charset=iso-8859-1; mimetype=text/html - application/x-chm = execm rclchm + application/x-chm = execm rclchm.py The fragment specifies that: @@ -5205,7 +5205,7 @@ text/html [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/r method to produce metadata, by calling the setfield() helper method. This avoids the necessity to produce HTML, and any issue with HTML quoting. See, - for example, rclaudio in &RCL; 1.23 and + for example, rclaudio.py in &RCL; 1.23 and later for an example of handler which outputs text/plain and uses setfield() to produce metadata. @@ -7114,28 +7114,37 @@ other = rclcat:other (possibly a script) to be able to handle it. - %M - MIME type + + %MMIME type - %p - Page index. Only significant for a subset of document - types, currently only PDF, Postscript and DVI files. Can be - used to start the editor at the right page for a match or - snippet. + + %pPage index. Only significant for a subset of + document types, currently only PDF, Postscript and DVI files. If it is set, a + significant term will be chosen in the query, and %p will be substituted with the + first page where the term appears. Can be used to start the editor at the right page + for a match or snippet. - %s - Search term. The value will only be set for documents - with indexed page numbers (ie: PDF). The value will be one of - the matched search terms. It would allow pre-setting the - value in the "Find" entry inside Evince for example, for easy - highlighting of the term. + + %lLine number. Only significant for document + types with relevant line breaks, mostly text/plain and analogs. If it is set, a + significant term will be chosen in the query, and %p will be substituted with the + first line where the term appears. - %u - Url. + + %sSearch term. The value will only be set for + documents with indexed page or line numbers and if %p or %l is also used. The value + will be one of the matched search terms. It would allow pre-setting the value in the + "Find" entry inside Evince for example, for easy highlighting of the + term. + + + %uUrl. + + In addition to the predefined values above, all strings like diff --git a/src/filters/kosplitter.py b/src/filters/kosplitter.py index 6c80a0f9..732cd3ed 100755 --- a/src/filters/kosplitter.py +++ b/src/filters/kosplitter.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 ################################# # Copyright (C) 2020 J.F.Dockes # This program is free software; you can redistribute it and/or modify diff --git a/src/filters/rcl7z b/src/filters/rcl7z.py similarity index 98% rename from src/filters/rcl7z rename to src/filters/rcl7z.py index 7ba035ac..96022a30 100755 --- a/src/filters/rcl7z +++ b/src/filters/rcl7z.py @@ -3,7 +3,7 @@ # 7-Zip file filter for Recoll # Thanks to Recoll user Martin Ziegler -# This is a modified version of rclzip, with some help from rcltar +# This is a modified version of rclzip.py, with some help from rcltar.py # # Normally using py7zr https://github.com/miurahr/py7zr # diff --git a/src/filters/rclaudio b/src/filters/rclaudio.py similarity index 100% rename from src/filters/rclaudio rename to src/filters/rclaudio.py diff --git a/src/filters/rclchm b/src/filters/rclchm.py similarity index 100% rename from src/filters/rclchm rename to src/filters/rclchm.py diff --git a/src/filters/rcldia b/src/filters/rcldia.py similarity index 97% rename from src/filters/rcldia rename to src/filters/rcldia.py index 3869bced..45dbeb16 100755 --- a/src/filters/rcldia +++ b/src/filters/rcldia.py @@ -6,7 +6,7 @@ from __future__ import print_function # stefan.friedel@iwr.uni-heidelberg.de 2012 # # add the following to ~/.recoll/mimeconf into the [index] section: -# application/x-dia-diagram = execm rcldia;mimetype=text/plain;charset=utf-8 +# application/x-dia-diagram = execm rcldia.py;mimetype=text/plain;charset=utf-8 # and into the [icons] section: # application/x-dia-diagram = drawing # and finally under [categories]: diff --git a/src/filters/rclepub b/src/filters/rclepub.py similarity index 99% rename from src/filters/rclepub rename to src/filters/rclepub.py index e0919a1a..4775c5bd 100755 --- a/src/filters/rclepub +++ b/src/filters/rclepub.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 """Extract Html content from an EPUB file (.epub)""" from __future__ import print_function diff --git a/src/filters/rclepub1 b/src/filters/rclepub1.py similarity index 100% rename from src/filters/rclepub1 rename to src/filters/rclepub1.py diff --git a/src/filters/rclexecm.py b/src/filters/rclexecm.py index 42d2ff76..1a68a4ce 100644 --- a/src/filters/rclexecm.py +++ b/src/filters/rclexecm.py @@ -372,7 +372,7 @@ def main(proto, extract): params = {'filename' : makebytes(path)} - # Some filters (e.g. rclaudio) need/get a MIME type from the indexer. + # Some filters (e.g. rclaudio.py) need/get a MIME type from the indexer. # We make a half-assed attempt to emulate: mimetype = _g_config.mimeType(path) if not mimetype and not _g_mswindows: diff --git a/src/filters/rclhwp.py b/src/filters/rclhwp.py index bb682dd7..b64f67e1 100755 --- a/src/filters/rclhwp.py +++ b/src/filters/rclhwp.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # Copyright (C) 2020 J.F.Dockes # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/src/filters/rclics b/src/filters/rclics.py similarity index 100% rename from src/filters/rclics rename to src/filters/rclics.py diff --git a/src/filters/rclinfo b/src/filters/rclinfo.py similarity index 99% rename from src/filters/rclinfo rename to src/filters/rclinfo.py index cdc1d4da..ff5d711d 100755 --- a/src/filters/rclinfo +++ b/src/filters/rclinfo.py @@ -141,7 +141,7 @@ class InfoSimpleSplitter: if name == b'File': infofile = value except Exception as err: - print("rclinfo: bad line in %s: [%s] %s\n" % \ + print("rclinfo.py: bad line in %s: [%s] %s\n" % \ (infofile, line, err), file = sys.stderr) nodename = prevnodename node += line diff --git a/src/filters/rclkar b/src/filters/rclkar.py similarity index 100% rename from src/filters/rclkar rename to src/filters/rclkar.py diff --git a/src/filters/rclpst.py b/src/filters/rclpst.py index 7b8111bf..42ab7e9a 100755 --- a/src/filters/rclpst.py +++ b/src/filters/rclpst.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 ################################# # Copyright (C) 2019 J.F.Dockes # This program is free software; you can redistribute it and/or modify diff --git a/src/filters/rclpython.py b/src/filters/rclpython.py index 2a4ca490..1d812dcf 100755 --- a/src/filters/rclpython.py +++ b/src/filters/rclpython.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # Rclpython is based on "colorize.py" from: # http://chrisarndt.de/en/software/python/colorize.html diff --git a/src/filters/rclrar b/src/filters/rclrar.py similarity index 98% rename from src/filters/rclrar rename to src/filters/rclrar.py index e6b38bb1..4c3fa766 100755 --- a/src/filters/rclrar +++ b/src/filters/rclrar.py @@ -60,7 +60,7 @@ except Exception as ex: # (https://www.rarlab.com/rar_add.htm). The unrar-free version fails # with the message "Failed the read enough data" # -# This is identical to rclzip except I did a search/replace from zip +# This is identical to rclzip.py except I did a search/replace from zip # to rar, and changed this comment. class RarExtractor: def __init__(self, em): diff --git a/src/filters/rcltar b/src/filters/rcltar.py similarity index 99% rename from src/filters/rcltar rename to src/filters/rcltar.py index c6f2bf4f..1389c703 100755 --- a/src/filters/rcltar +++ b/src/filters/rcltar.py @@ -2,7 +2,7 @@ # Tar-file filter for Recoll # Thanks to Recoll user Martin Ziegler -# This is a modified version of /usr/share/recoll/filters/rclzip +# This is a modified version of /usr/share/recoll/filters/rclzip.py # It works not only for tar-files, but automatically for gzipped and # bzipped tar-files at well. diff --git a/src/filters/rclwar b/src/filters/rclwar.py similarity index 100% rename from src/filters/rclwar rename to src/filters/rclwar.py diff --git a/src/filters/rclzip b/src/filters/rclzip.py similarity index 99% rename from src/filters/rclzip rename to src/filters/rclzip.py index dc046fdf..116609db 100755 --- a/src/filters/rclzip +++ b/src/filters/rclzip.py @@ -51,7 +51,7 @@ if not hasrclconfig: # and stores it in the catalog as an unicode object. Else it uses the # binary string, which it decodes as CP437 (zip standard). # -# When reading the file, the input file name is used by rclzip +# When reading the file, the input file name is used by rclzip.py # directly as an index into the catalog. # # When we send the file name data to the indexer, we have to serialize diff --git a/src/index/rclmonprc.cpp b/src/index/rclmonprc.cpp index a2e54662..3702bb7c 100644 --- a/src/index/rclmonprc.cpp +++ b/src/index/rclmonprc.cpp @@ -1,7 +1,7 @@ #include "autoconfig.h" #ifdef RCL_MONITOR -/* Copyright (C) 2006 J.F.Dockes +/* Copyright (C) 2006-2022 J.F.Dockes * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -122,7 +122,7 @@ struct DelayPat { */ class RclEQData { public: - int m_opts; + int m_opts{0}; // Queue for normal files (unlimited reindex) queue_type m_iqueue; // Queue for delayed reindex files @@ -135,27 +135,21 @@ public: delays_type m_delays; // Configured intervals for path patterns, read from the configuration. vector m_delaypats; - RclConfig *m_config; - bool m_ok; + RclConfig *m_config{nullptr}; + bool m_ok{true}; std::mutex m_mutex; std::condition_variable m_cond; - RclEQData() - : m_config(0), m_ok(true) - { - } void readDelayPats(int dfltsecs); - DelayPat searchDelayPats(const string& path) - { - for (vector::iterator it = m_delaypats.begin(); - it != m_delaypats.end(); it++) { - if (fnmatch(it->pattern.c_str(), path.c_str(), 0) == 0) { - return *it; - } + DelayPat searchDelayPats(const string& path) { + for (const auto& dpat: m_delaypats) { + if (fnmatch(dpat.pattern.c_str(), path.c_str(), 0) == 0) { + return dpat; } - return DelayPat(); } + return DelayPat(); + } void delayInsert(const queue_type::iterator &qit); }; @@ -170,22 +164,21 @@ void RclEQData::readDelayPats(int dfltsecs) vector dplist; if (!stringToStrings(patstring, dplist)) { - LOGERR("rclEQData: bad pattern list: [" << (patstring) << "]\n" ); + LOGERR("rclEQData: bad pattern list: [" << patstring << "]\n"); return; } - for (vector::iterator it = dplist.begin(); - it != dplist.end(); it++) { - string::size_type pos = it->find_last_of(":"); + for (const auto& entry : dplist) { + string::size_type pos = entry.find_last_of(":"); DelayPat dp; - dp.pattern = it->substr(0, pos); - if (pos != string::npos && pos != it->size()-1) { - dp.seconds = atoi(it->substr(pos+1).c_str()); + dp.pattern = entry.substr(0, pos); + if (pos != string::npos && pos != entry.size() - 1) { + dp.seconds = atoi(entry.substr(pos+1).c_str()); } else { dp.seconds = dfltsecs; } m_delaypats.push_back(dp); - LOGDEB2("rclmon::readDelayPats: add [" << (dp.pattern) << "] " << (dp.seconds) << "\n" ); + LOGDEB2("rclmon::readDelayPats: add [" << dp.pattern << "] " << dp.seconds << "\n"); } } @@ -194,10 +187,8 @@ void RclEQData::readDelayPats(int dfltsecs) // when necessary. void RclEQData::delayInsert(const queue_type::iterator &qit) { - MONDEB("RclEQData::delayInsert: minclock " << qit->second.m_minclock << - std::endl); - for (delays_type::iterator dit = m_delays.begin(); - dit != m_delays.end(); dit++) { + MONDEB("RclEQData::delayInsert: minclock " << qit->second.m_minclock << "\n"); + for (delays_type::iterator dit = m_delays.begin(); dit != m_delays.end(); dit++) { queue_type::iterator qit1 = *dit; if ((*qit1).second.m_minclock > qit->second.m_minclock) { m_delays.insert(dit, qit); @@ -230,7 +221,7 @@ std::unique_lock RclMonEventQueue::wait(int seconds, bool *top) { std::unique_lock lock(m_data->m_mutex); - MONDEB("RclMonEventQueue::wait, seconds: " << seconds << std::endl); + MONDEB("RclMonEventQueue::wait, seconds: " << seconds << "\n"); if (!empty()) { MONDEB("RclMonEventQueue:: immediate return\n"); return lock; @@ -310,7 +301,7 @@ bool RclMonEventQueue::empty() // first, earliest one): queue_type::iterator qit = *(m_data->m_delays.begin()); if (qit->second.m_minclock > time(0)) { - MONDEB("RclMonEventQueue::empty(): true (no delay ready " << + MONDEB("RclMonEventQueue::empty(): true (no delay ready " << qit->second.m_minclock << ")\n"); return true; } @@ -324,7 +315,7 @@ bool RclMonEventQueue::empty() RclMonEvent RclMonEventQueue::pop() { time_t now = time(0); - MONDEB("RclMonEventQueue::pop(), now " << now << std::endl); + MONDEB("RclMonEventQueue::pop(), now " << now << "\n"); // Look at the delayed events, get rid of the expired/unactive // ones, possibly return an expired/needidx one. @@ -332,7 +323,7 @@ RclMonEvent RclMonEventQueue::pop() delays_type::iterator dit = m_data->m_delays.begin(); queue_type::iterator qit = *dit; MONDEB("RclMonEventQueue::pop(): in delays: evt minclock " << - qit->second.m_minclock << std::endl); + qit->second.m_minclock << "\n"); if (qit->second.m_minclock <= now) { if (qit->second.m_needidx) { RclMonEvent ev = qit->second; @@ -371,7 +362,7 @@ RclMonEvent RclMonEventQueue::pop() // special processing to limit their reindexing rate. bool RclMonEventQueue::pushEvent(const RclMonEvent &ev) { - MONDEB("RclMonEventQueue::pushEvent for " << ev.m_path << std::endl); + MONDEB("RclMonEventQueue::pushEvent for " << ev.m_path << "\n"); std::unique_lock lock(m_data->m_mutex); DelayPat pat = m_data->searchDelayPats(ev.m_path); @@ -381,8 +372,7 @@ bool RclMonEventQueue::pushEvent(const RclMonEvent &ev) queue_type::iterator qit = m_data->m_dqueue.find(ev.m_path); if (qit == m_data->m_dqueue.end()) { // Not there yet, insert new - qit = - m_data->m_dqueue.insert(queue_type::value_type(ev.m_path, ev)).first; + qit = m_data->m_dqueue.insert(queue_type::value_type(ev.m_path, ev)).first; // Set the time to next index to "now" as it has not been // indexed recently (otherwise it would still be in the // queue), and add the iterator to the delay queue. @@ -452,6 +442,8 @@ bool startMonitor(RclConfig *conf, int opts) auxinterval = dfltauxinterval; if (!conf->getConfParam("monixinterval", &ixinterval)) ixinterval = dfltixinterval; + bool doweb{false}; + conf->getConfParam("processwebqueue", &doweb); rclEQ.setConfig(conf); rclEQ.setopts(opts); @@ -471,11 +463,13 @@ bool startMonitor(RclConfig *conf, int opts) while (true) { time_t now = time(0); - if (now - lastmovetime > ixinterval) { +#ifndef DISABLE_WEB_INDEXER + if (doweb && (now - lastmovetime > ixinterval)) { lastmovetime = now; runWebFilesMoverScript(conf); } - +#endif // DISABLE_WEB_INDEXER + { // Wait for event or timeout. // Set a relatively short timeout for better monitoring of @@ -487,7 +481,7 @@ bool startMonitor(RclConfig *conf, int opts) #ifndef _WIN32 bool x11dead = !(opts & RCLMON_NOX11) && !x11IsAlive(); if (x11dead) - LOGDEB("RclMonprc: x11 is dead\n" ); + LOGDEB("RclMonprc: x11 is dead\n"); #else bool x11dead = false; #endif @@ -508,26 +502,27 @@ bool startMonitor(RclConfig *conf, int opts) modified.push_back(ev.m_path); break; case RclMonEvent::RCLEVT_DELETE: - LOGDEB0("Monitor: Delete on " << (ev.m_path) << "\n" ); - // If this is for a directory (which the caller should - // tell us because he knows), we should purge the db - // of all the subtree, because on a directory rename, - // inotify will only generate one event for the - // renamed top, not the subentries. This is relatively - // complicated to do though, and we currently do not - // do it, and just wait for a restart to do a full run and - // purge. + LOGDEB0("Monitor: Delete on " << ev.m_path << "\n"); + // If this is for a directory (which the caller should tell us because he + // knows), we should purge the db of all the subtree entries, because on a + // directory rename, inotify will only generate one event for the renamed top, + // not the subentries. The entries from the new subtree are updated when the + // monitor walks it on the DIRCREATE event. deleted.push_back(ev.m_path); - if (ev.evflags() & RclMonEvent::RCLEVT_ISDIR) { +#ifndef _WIN32 + // We don't know the type of deleted entries on + // win32. So do the subtree things always. + if (ev.evflags() & RclMonEvent::RCLEVT_ISDIR) +#endif + { vector paths; if (subtreelist(conf, ev.m_path, paths)) { - deleted.insert(deleted.end(), - paths.begin(), paths.end()); + deleted.insert(deleted.end(), paths.begin(), paths.end()); } } break; default: - LOGDEB("Monitor: got Other on [" << (ev.m_path) << "]\n" ); + LOGDEB("Monitor: got Other on [" << ev.m_path << "]\n"); } } } @@ -572,17 +567,19 @@ bool startMonitor(RclConfig *conf, int opts) } } +#ifndef _WIN32 // Check for a config change if (!(opts & RCLMON_NOCONFCHECK) && o_reexec && conf->sourceChanged()) { - LOGDEB("Rclmonprc: config changed, reexecuting myself\n" ); + LOGDEB("Rclmonprc: config changed, reexecuting myself\n"); // We never want to have a -n option after a config // change. -n was added by the reexec after the initial // pass even if it was not given on the command line o_reexec->removeArg("-n"); o_reexec->reexec(); } +#endif // ! _WIN32 } - LOGDEB("Rclmonprc: calling queue setTerminate\n" ); + LOGDEB("Rclmonprc: calling queue setTerminate\n"); rclEQ.setTerminate(); // We used to wait for the receiver thread here before returning, @@ -590,9 +587,8 @@ bool startMonitor(RclConfig *conf, int opts) // during our limited time window for exiting. To be reviewed if // we ever need several monitor invocations in the same process // (can't foresee any reason why we'd want to do this). - LOGDEB("Monitor: returning\n" ); + LOGDEB("Monitor: returning\n"); return true; } #endif // RCL_MONITOR - diff --git a/src/index/rclmonrcv.cpp b/src/index/rclmonrcv.cpp index e3fc0df1..dbcbb000 100644 --- a/src/index/rclmonrcv.cpp +++ b/src/index/rclmonrcv.cpp @@ -1,6 +1,6 @@ #include "autoconfig.h" #ifdef RCL_MONITOR -/* Copyright (C) 2006 J.F.Dockes +/* Copyright (C) 2006-2021 J.F.Dockes * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -16,6 +16,35 @@ * Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ + +/* The code for the Win32 version of the monitor was largely copied from efsw: + * https://github.com/SpartanJ/efsw + * LICENSE for the original WIN32 code: + * Copyright (c) 2020 Martn Lucas Golini + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * This software is a fork of the "simplefilewatcher" by James Wynn (james@jameswynn.com) + * http://code.google.com/p/simplefilewatcher/ also MIT licensed. + */ + + #include "autoconfig.h" #include @@ -31,12 +60,10 @@ /** * Recoll real time monitor event receiver. This file has code to interface - * to FAM or inotify and place events on the event queue. + * to FAM, inotify, etc. and place events on the event queue. */ -/** A small virtual interface for monitors. Lets - * either fam/gamin or raw imonitor hide behind - */ +/** Virtual interface for the actual filesystem monitoring module. */ class RclMonitor { public: RclMonitor() {} @@ -46,8 +73,12 @@ public: virtual bool getEvent(RclMonEvent& ev, int msecs = -1) = 0; virtual bool ok() const = 0; // Does this monitor generate 'exist' events at startup? - virtual bool generatesExist() const = 0; - + virtual bool generatesExist() const { + return false; + } + virtual bool isRecursive() const { + return false; + } // Save significant errno after monitor calls int saved_errno{0}; }; @@ -72,8 +103,8 @@ public: virtual FsTreeWalker::Status processone( const string &fn, const struct PathStat *st, FsTreeWalker::CbFlag flg) { - MONDEB("rclMonRcvRun: processone " << fn << " m_mon " << m_mon << - " m_mon->ok " << (m_mon ? m_mon->ok() : false) << std::endl); + MONDEB("walkerCB: processone " << fn << " m_mon " << m_mon << + " m_mon->ok " << (m_mon ? m_mon->ok() : false) << "\n"); if (flg == FsTreeWalker::FtwDirEnter || flg == FsTreeWalker::FtwDirReturn) { m_config->setKeyDir(fn); @@ -90,17 +121,18 @@ public: if (ev.m_etyp != RclMonEvent::RCLEVT_NONE) m_queue->pushEvent(ev); } else { - MONDEB("rclMonRcvRun: no event pending\n"); + MONDEB("walkerCB: no event pending\n"); break; } } if (!m_mon || !m_mon->ok()) return FsTreeWalker::FtwError; // We do nothing special if addWatch fails for a reasonable reason - if (!m_mon->addWatch(fn, true)) { - if (m_mon->saved_errno != EACCES && - m_mon->saved_errno != ENOENT) + if (!m_mon->isRecursive() && !m_mon->addWatch(fn, true)) { + if (m_mon->saved_errno != EACCES && m_mon->saved_errno != ENOENT) { + LOGINF("walkerCB: addWatch failed\n"); return FsTreeWalker::FtwError; + } } } else if (!m_mon->generatesExist() && flg == FsTreeWalker::FtwRegular) { // Have to synthetize events for regular files existence @@ -113,8 +145,8 @@ public: // monitoring ? There should be another way: maybe start // monitoring without actually handling events (just // queue), then run incremental then start handling - // events ? But we also have to do it on a directory - // move! So keep it + // events ? ** But we also have to do it on a directory + // move! So keep it ** We could probably skip it on the initial run though. RclMonEvent ev; ev.m_path = fn; ev.m_etyp = RclMonEvent::RCLEVT_MODIFY; @@ -130,6 +162,96 @@ private: FsTreeWalker& m_walker; }; +static bool rclMonAddTopWatches( + FsTreeWalker& walker, RclConfig& lconfig, RclMonitor *mon, RclMonEventQueue *queue) +{ + // Get top directories from config. Special monitor sublist if + // set, else full list. + vector tdl = lconfig.getTopdirs(true); + if (tdl.empty()) { + LOGERR("rclMonRcvRun:: top directory list (topdirs param.) not found " + "in configuration or topdirs list parse error"); + queue->setTerminate(); + return false; + } + // Walk the directory trees to add watches + WalkCB walkcb(&lconfig, mon, queue, walker); + for (const auto& dir : tdl) { + lconfig.setKeyDir(dir); + // Adjust the follow symlinks options + bool follow; + if (lconfig.getConfParam("followLinks", &follow) && follow) { + walker.setOpts(FsTreeWalker::FtwFollow); + } else { + walker.setOpts(FsTreeWalker::FtwOptNone); + } + if (path_isdir(dir, follow)) { + LOGDEB("rclMonRcvRun: walking " << dir << "\n"); + // If the fs watcher is recursive, we add the watches for the topdirs here, and walk the + // tree just for generating initial events. + if (mon->isRecursive() && !mon->addWatch(dir, true)) { + if (mon->saved_errno != EACCES && mon->saved_errno != ENOENT) { + LOGERR("rclMonAddTopWatches: addWatch failed for [" << dir << "]\n"); + return false; + } + } + if (walker.walk(dir, walkcb) != FsTreeWalker::FtwOk) { + LOGERR("rclMonRcvRun: tree walk failed\n"); + return false; + } + if (walker.getErrCnt() > 0) { + LOGINFO("rclMonRcvRun: fs walker errors: " << walker.getReason() << "\n"); + } + } else { + // We have to special-case regular files which are part of the topdirs list because the + // tree walker only adds watches for directories + if (!mon->addWatch(dir, false)) { + LOGSYSERR("rclMonRcvRun", "addWatch", dir); + } + } + } + + bool doweb = false; + lconfig.getConfParam("processwebqueue", &doweb); + if (doweb) { + string webqueuedir = lconfig.getWebQueueDir(); + if (!mon->addWatch(webqueuedir, true)) { + LOGERR("rclMonRcvRun: addwatch (webqueuedir) failed\n"); + if (mon->saved_errno != EACCES && mon->saved_errno != ENOENT) + return false; + } + } + return true; +} + +static bool rclMonAddSubWatches( + const std::string& path, FsTreeWalker& walker, RclConfig& lconfig, + RclMonitor *mon, RclMonEventQueue *queue) +{ + WalkCB walkcb(&lconfig, mon, queue, walker); + if (walker.walk(path, walkcb) != FsTreeWalker::FtwOk) { + LOGERR("rclMonRcvRun: walking new dir " << path << " : " << walker.getReason() << "\n"); + return false; + } + if (walker.getErrCnt() > 0) { + LOGINFO("rclMonRcvRun: fs walker errors: " << walker.getReason() << "\n"); + } + return true; +} + +// Don't push events for skipped files. This would get filtered on the processing side +// anyway, but causes unnecessary wakeups and messages. Do not test skippedPaths here, +// this would be incorrect (because a topdir can be under a skippedPath and this was +// handled while adding the watches). Also we let the other side process onlyNames. +static bool rclMonShouldSkip(const std::string& path, RclConfig& lconfig, FsTreeWalker& walker) +{ + lconfig.setKeyDir(path_getfather(path)); + walker.setSkippedNames(lconfig.getSkippedNames()); + if (walker.inSkippedNames(path_getsimple(path))) + return true; + return false; +} + // Main thread routine: create watches, then forever wait for and queue events void *rclMonRcvRun(void *q) { @@ -150,104 +272,34 @@ void *rclMonRcvRun(void *q) return 0; } - // Get top directories from config. Special monitor sublist if - // set, else full list. - vector tdl = lconfig.getTopdirs(true); - if (tdl.empty()) { - LOGERR("rclMonRcvRun:: top directory list (topdirs param.) not found " - "in configuration or topdirs list parse error"); - queue->setTerminate(); - return 0; - } - - // Walk the directory trees to add watches FsTreeWalker walker; walker.setSkippedPaths(lconfig.getDaemSkippedPaths()); - WalkCB walkcb(&lconfig, mon, queue, walker); - for (auto it = tdl.begin(); it != tdl.end(); it++) { - lconfig.setKeyDir(*it); - // Adjust the follow symlinks options - bool follow; - if (lconfig.getConfParam("followLinks", &follow) && - follow) { - walker.setOpts(FsTreeWalker::FtwFollow); - } else { - walker.setOpts(FsTreeWalker::FtwOptNone); - } - // We have to special-case regular files which are part of the topdirs - // list because we the tree walker only adds watches for directories - if (path_isdir(*it, follow)) { - LOGDEB("rclMonRcvRun: walking " << *it << "\n"); - if (walker.walk(*it, walkcb) != FsTreeWalker::FtwOk) { - LOGERR("rclMonRcvRun: tree walk failed\n"); - goto terminate; - } - if (walker.getErrCnt() > 0) { - LOGINFO("rclMonRcvRun: fs walker errors: " << - walker.getReason() << "\n"); - } - } else { - if (!mon->addWatch(*it, false)) { - LOGERR("rclMonRcvRun: addWatch failed for " << *it << - " errno " << mon->saved_errno << std::endl); - } - } - } - { - bool doweb = false; - lconfig.getConfParam("processwebqueue", &doweb); - if (doweb) { - string webqueuedir = lconfig.getWebQueueDir(); - if (!mon->addWatch(webqueuedir, true)) { - LOGERR("rclMonRcvRun: addwatch (webqueuedir) failed\n"); - if (mon->saved_errno != EACCES && mon->saved_errno != ENOENT) - goto terminate; - } - } + if (!rclMonAddTopWatches(walker, lconfig, mon, queue)) { + LOGERR("rclMonRcvRun: addtopwatches failed\n"); + goto terminate; } // Forever wait for monitoring events and add them to queue: - MONDEB("rclMonRcvRun: waiting for events. q->ok(): " << queue->ok() << - std::endl); + MONDEB("rclMonRcvRun: waiting for events. q->ok(): " << queue->ok() << "\n"); while (queue->ok() && mon->ok()) { RclMonEvent ev; - // Note: I could find no way to get the select - // call to return when a signal is delivered to the process - // (it goes to the main thread, from which I tried to close or - // write to the select fd, with no effect). So set a - // timeout so that an intr will be detected + // Note: I could find no way to get the select call to return when a signal is delivered to + // the process (it goes to the main thread, from which I tried to close or write to the + // select fd, with no effect). So set a timeout so that an intr will be detected if (mon->getEvent(ev, 2000)) { - // Don't push events for skipped files. This would get - // filtered on the processing side anyway, but causes - // unnecessary wakeups and messages. Do not test - // skippedPaths here, this would be incorrect (because a - // topdir can be under a skippedPath and this was handled - // while adding the watches). - // Also we let the other side process onlyNames. - lconfig.setKeyDir(path_getfather(ev.m_path)); - walker.setSkippedNames(lconfig.getSkippedNames()); - if (walker.inSkippedNames(path_getsimple(ev.m_path))) + if (rclMonShouldSkip(ev.m_path, lconfig, walker)) continue; if (ev.m_etyp == RclMonEvent::RCLEVT_DIRCREATE) { - // Recursive addwatch: there may already be stuff - // inside this directory. Ie: files were quickly - // created, or this is actually the target of a - // directory move. This is necessary for inotify, but - // it seems that fam/gamin is doing the job for us so - // that we are generating double events here (no big - // deal as prc will sort/merge). + // Recursive addwatch: there may already be stuff inside this directory. E.g.: files + // were quickly created, or this is actually the target of a directory move. This is + // necessary for inotify, but it seems that fam/gamin is doing the job for us so + // that we are generating double events here (no big deal as prc will sort/merge). LOGDEB("rclMonRcvRun: walking new dir " << ev.m_path << "\n"); - if (walker.walk(ev.m_path, walkcb) != FsTreeWalker::FtwOk) { - LOGERR("rclMonRcvRun: walking new dir " << ev.m_path << - " : " << walker.getReason() << "\n"); + if (!rclMonAddSubWatches(ev.m_path, walker, lconfig, mon, queue)) { goto terminate; } - if (walker.getErrCnt() > 0) { - LOGINFO("rclMonRcvRun: fs walker errors: " << - walker.getReason() << "\n"); - } } if (ev.m_etyp != RclMonEvent::RCLEVT_NONE) @@ -271,7 +323,7 @@ bool eraseWatchSubTree(map& idtopath, const string& top) while (it != idtopath.end()) { if (it->second.find(top) == 0) { found = true; - idtopath.erase(it++); + it = idtopath.erase(it); } else { it++; } @@ -364,7 +416,7 @@ bool RclFAM::addWatch(const string& path, bool isdir) return false; bool ret = false; - MONDEB("RclFAM::addWatch: adding " << path << std::endl); + MONDEB("RclFAM::addWatch: adding " << path << "\n"); // It happens that the following call block forever. // We'd like to be able to at least terminate on a signal here, but @@ -410,7 +462,7 @@ bool RclFAM::getEvent(RclMonEvent& ev, int msecs) FD_ZERO(&readfds); FD_SET(fam_fd, &readfds); - MONDEB("RclFAM::getEvent: select. fam_fd is " << fam_fd << std::endl); + MONDEB("RclFAM::getEvent: select. fam_fd is " << fam_fd << "\n"); // Fam / gamin is sometimes a bit slow to send events. Always add // a little timeout, because if we fail to retrieve enough events, // we risk deadlocking in addwatch() @@ -432,7 +484,7 @@ bool RclFAM::getEvent(RclMonEvent& ev, int msecs) return false; } - MONDEB("RclFAM::getEvent: select returned " << ret << std::endl); + MONDEB("RclFAM::getEvent: select returned " << ret << "\n"); if (!FD_ISSET(fam_fd, &readfds)) return false; @@ -464,8 +516,7 @@ bool RclFAM::getEvent(RclMonEvent& ev, int msecs) ev.m_path = fe.filename; } - MONDEB("RclFAM::getEvent: " << event_name(fe.code) < " " << - ev.m_path << std::endl); + MONDEB("RclFAM::getEvent: " << event_name(fe.code) < " " << ev.m_path << "\n"); switch (fe.code) { case FAMCreated: @@ -517,23 +568,20 @@ bool RclFAM::getEvent(RclMonEvent& ev, int msecs) class RclIntf : public RclMonitor { public: RclIntf() - : m_ok(false), m_fd(-1), m_evp(0), m_ep(0) - { - if ((m_fd = inotify_init()) < 0) { - LOGERR("RclIntf:: inotify_init failed, errno " << errno << "\n"); - return; - } - m_ok = true; - } - virtual ~RclIntf() - { - close(); + : m_ok(false), m_fd(-1), m_evp(0), m_ep(0) { + if ((m_fd = inotify_init()) < 0) { + LOGERR("RclIntf:: inotify_init failed, errno " << errno << "\n"); + return; } + m_ok = true; + } + virtual ~RclIntf() { + close(); + } virtual bool addWatch(const string& path, bool isdir); virtual bool getEvent(RclMonEvent& ev, int msecs = -1); bool ok() const {return m_ok;} - virtual bool generatesExist() const {return false;} private: bool m_ok; @@ -586,7 +634,7 @@ bool RclIntf::addWatch(const string& path, bool) { if (!ok()) return false; - MONDEB("RclIntf::addWatch: adding " << path << std::endl); + MONDEB("RclIntf::addWatch: adding " << path << "\n"); // CLOSE_WRITE is covered through MODIFY. CREATE is needed for mkdirs uint32_t mask = IN_MODIFY | IN_CREATE | IN_MOVED_FROM | IN_MOVED_TO | IN_DELETE @@ -636,9 +684,8 @@ bool RclIntf::getEvent(RclMonEvent& ev, int msecs) } int ret; MONDEB("RclIntf::getEvent: select\n"); - if ((ret = select(m_fd + 1, &readfds, 0, 0, msecs >= 0 ? &timeout : 0)) - < 0) { - LOGERR("RclIntf::getEvent: select failed, errno " << errno << "\n"); + if ((ret = select(m_fd + 1, &readfds, 0, 0, msecs >= 0 ? &timeout : 0)) < 0) { + LOGSYSERR("RclIntf::getEvent", "select", ""); close(); return false; } else if (ret == 0) { @@ -652,8 +699,7 @@ bool RclIntf::getEvent(RclMonEvent& ev, int msecs) return false; int rret; if ((rret=read(m_fd, m_evbuf, sizeof(m_evbuf))) <= 0) { - LOGERR("RclIntf::getEvent: read failed, " << sizeof(m_evbuf) << - "->" << rret << " errno " << errno << "\n"); + LOGSYSERR("RclIntf::getEvent", "read", sizeof(m_evbuf)); close(); return false; } @@ -679,8 +725,7 @@ bool RclIntf::getEvent(RclMonEvent& ev, int msecs) ev.m_path = path_cat(ev.m_path, evp->name); } - MONDEB("RclIntf::getEvent: " << event_name(evp->mask) << " " << - ev.m_path << std::endl); + MONDEB("RclIntf::getEvent: " << event_name(evp->mask) << " " << ev.m_path << "\n"); if ((evp->mask & IN_MOVED_FROM) && (evp->mask & IN_ISDIR)) { // We get this when a directory is renamed. Erase the subtree @@ -725,20 +770,472 @@ bool RclIntf::getEvent(RclMonEvent& ev, int msecs) #endif // RCL_USE_INOTIFY + +#ifdef _WIN32 + +/* + * WIN32 VERSION NOTES: + * + * - When using non-recursive watches (one per dir), it appeared that + * watching a subdirectory of a given directory prevented renaming + * the top directory, Windows says: can't rename because open or a + * file in it is open. This is mostly why we use recursive watches + * on the topdirs only. + */ +#include +#include +#include +#include + +#include "safewindows.h" + +typedef long WatchID; +class WatcherWin32; +class RclFSWatchWin32; + +enum class Action {Add = 1, Delete = 2, Modify = 3, Move = 4}; + +// Virtual interface for the monitor callback. Note: this for compatibility with the efsw code, as +// rclmon uses a pull, not push interface. The callback pushes the events to a local queue from +// which they are then pulled by the upper level code. +class FileWatchListener { +public: + virtual ~FileWatchListener() {} + virtual void handleFileAction(WatchID watchid, const std::string& dir, const std::string& fn, + Action action, bool isdir, std::string oldfn = "" ) = 0; +}; + +// Internal watch data. This piggy-back our actual data pointer to the MS overlapped pointer. This +// is a bit of a hack, and we could probably use event Ids instead. +struct WatcherStructWin32 +{ + OVERLAPPED Overlapped; + WatcherWin32 *Watch; +}; + +// Actual data structure for one directory watch +class WatcherWin32 { +public: + WatchID ID; + FileWatchListener *Listener{nullptr}; + bool Recursive; + std::string DirName; + std::string OldFileName; + + HANDLE DirHandle{nullptr}; + // do NOT make this bigger than 64K because it will fail if the folder being watched is on the + // network! (see http://msdn.microsoft.com/en-us/library/windows/desktop/aa365465(v=vs.85).aspx) + BYTE Buffer[8 * 1024]; + DWORD NotifyFilter{0}; + bool StopNow{false}; + RclFSWatchWin32 *Watch{nullptr}; +}; + +// The efsw top level file system watcher: manages all the directory watches. +class RclFSWatchWin32 { +public: + RclFSWatchWin32(); + + virtual ~RclFSWatchWin32(); + + // Add a directory watch + // On error returns -1 + WatchID addWatch(const std::string& directory, FileWatchListener *watcher, bool recursive); + + // 2nd stage of action processing (after the static handler which just reads the data) + void handleAction(WatcherWin32 *watch, const std::string& fn, unsigned long action); + + bool ok() const { + return mInitOK; + } + + // Fetch events, with msecs timeout if there are no more + void run(DWORD msecs); + +private: + HANDLE mIOCP; + // Using a vector because we don't remove watches. Change to list if needed. + std::vector mWatches; + bool mInitOK{false}; + WatchID mLastWatchID{0}; + + std::mutex mWatchesLock; + + bool pathInWatches(const std::string& path); + /// Remove all directory watches. + void removeAllWatches(); +}; + +// Adapter for the rclmon interface +class RclMonitorWin32 : public RclMonitor, public FileWatchListener { +public: + virtual ~RclMonitorWin32() {} + + virtual bool addWatch(const string& path, bool /*isDir*/) override { + MONDEB("RclMonitorWin32::addWatch: " << path << "\n"); + return m_fswatcher.addWatch(path, this, true) != -1; + } + + virtual bool getEvent(RclMonEvent& ev, int msecs = -1) { + PRETEND_USE(msecs); + if (!m_events.empty()) { + ev = m_events.front(); + m_events.pop(); + return true; + } + m_fswatcher.run(msecs); + if (!m_events.empty()) { + ev = m_events.front(); + m_events.pop(); + return true; + } + return false; + } + + virtual bool ok() const override { + return m_fswatcher.ok(); + } + // Does this monitor generate 'exist' events at startup? + virtual bool generatesExist() const override { + return false; + } + // Can the caller avoid setting watches on subdirs ? + virtual bool isRecursive() const override { + return true; + } + virtual void handleFileAction(WatchID watchid, const std::string& dir, const std::string& fn, + Action action, bool isdir, std::string oldfn = "") { + MONDEB("RclMonitorWin32::handleFileAction: dir [" << dir << "] fn [" << fn << "] act " << + int(action) << " isdir " << isdir << " oldfn [" << oldfn << "]\n"); + RclMonEvent event; + switch (action) { + case Action::Move: + case Action::Add: event.m_etyp = isdir ? + RclMonEvent::RCLEVT_DIRCREATE : RclMonEvent::RCLEVT_MODIFY; break; + case Action::Delete: + event.m_etyp = RclMonEvent::RCLEVT_DELETE; + if (isdir) { + event.m_etyp |= RclMonEvent::RCLEVT_ISDIR; + } + break; + case Action::Modify: event.m_etyp = RclMonEvent::RCLEVT_MODIFY; break; + } + event.m_path = path_cat(dir, fn); + m_events.push(event); + } + + // Save significant errno after monitor calls + int saved_errno{0}; +private: + std::queue m_events; + RclFSWatchWin32 m_fswatcher; +}; + + +/// Stops monitoring a directory. +void DestroyWatch(WatcherStructWin32 *pWatch) +{ + if (pWatch) { + WatcherWin32 *ww32 = pWatch->Watch; + ww32->StopNow = true; + CancelIoEx(ww32->DirHandle, &pWatch->Overlapped); + CloseHandle(ww32->DirHandle); + delete ww32; + // Shouldn't we call heapfree on the parameter here ?? + } +} + +/// Refreshes the directory monitoring. +bool RefreshWatch(WatcherStructWin32 *pWatch) +{ + WatcherWin32 *ww32 = pWatch->Watch; + return ReadDirectoryChangesW( + ww32->DirHandle, + ww32->Buffer, + sizeof(ww32->Buffer), + ww32->Recursive, + ww32->NotifyFilter, + NULL, + &pWatch->Overlapped, + NULL + ) != 0; +} + +/// Starts monitoring a directory. +WatcherStructWin32 *CreateWatch(LPCWSTR szDirectory, bool recursive, DWORD NotifyFilter, HANDLE iocp) +{ + WatcherStructWin32 *wsw32; + size_t ptrsize = sizeof(*wsw32); + wsw32 =static_cast(HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, ptrsize)); + + WatcherWin32 *ww32 = new WatcherWin32(); + wsw32->Watch = ww32; + + ww32->DirHandle = CreateFileW( + szDirectory, + GENERIC_READ, + FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, + NULL, + OPEN_EXISTING, + FILE_FLAG_BACKUP_SEMANTICS | FILE_FLAG_OVERLAPPED, + NULL + ); + + if (ww32->DirHandle != INVALID_HANDLE_VALUE && + CreateIoCompletionPort(ww32->DirHandle, iocp, 0, 1)) { + ww32->NotifyFilter = NotifyFilter; + ww32->Recursive = recursive; + + if (RefreshWatch(wsw32)) { + return wsw32; + } + } + + CloseHandle(ww32->DirHandle); + delete ww32; + HeapFree(GetProcessHeap(), 0, wsw32); + return NULL; +} + + +RclFSWatchWin32::RclFSWatchWin32() + : mLastWatchID(0) +{ + mIOCP = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 1); + if (mIOCP && mIOCP != INVALID_HANDLE_VALUE) + mInitOK = true; +} + +RclFSWatchWin32::~RclFSWatchWin32() +{ + mInitOK = false; + + if (mIOCP && mIOCP != INVALID_HANDLE_VALUE) { + PostQueuedCompletionStatus(mIOCP, 0, reinterpret_cast(this), NULL); + } + + removeAllWatches(); + + CloseHandle(mIOCP); +} + +WatchID RclFSWatchWin32::addWatch(const std::string& _dir,FileWatchListener *watcher,bool recursive) +{ + LOGDEB("RclFSWatchWin32::addWatch: " << _dir << " recursive " << recursive << "\n"); + std::string dir(_dir); + path_slashize(dir); + if (!path_isdir(dir)) { + LOGDEB("RclFSWatchWin32::addWatch: not a directory: " << dir << "\n"); + return 0; + } + if (!path_readable(dir)) { + LOGINF("RclFSWatchWin32::addWatch: not readable: " << dir << "\n"); + return 0; + } + path_catslash(dir); + auto wdir = utf8towchar(dir); + + std::unique_lock lock(mWatchesLock); + + if (pathInWatches(dir)) { + MONDEB("RclFSWatchWin32::addWatch: already in watches: " << dir << "\n"); + return 0; + } + + WatchID watchid = ++mLastWatchID; + + WatcherStructWin32 *watch = CreateWatch( + wdir.get(), recursive, + FILE_NOTIFY_CHANGE_CREATION | + FILE_NOTIFY_CHANGE_LAST_WRITE | + FILE_NOTIFY_CHANGE_FILE_NAME | + FILE_NOTIFY_CHANGE_DIR_NAME | + FILE_NOTIFY_CHANGE_SIZE, + mIOCP + ); + + if (nullptr == watch) { + LOGINF("RclFSWatchWin32::addWatch: CreateWatch failed\n"); + return -1; + } + + // Add the handle to the handles vector + watch->Watch->ID = watchid; + watch->Watch->Watch = this; + watch->Watch->Listener = watcher; + watch->Watch->DirName = dir; + + mWatches.push_back(watch); + + return watchid; +} + +void RclFSWatchWin32::removeAllWatches() +{ + std::unique_lock lock(mWatchesLock); + for( auto& watchp : mWatches) { + DestroyWatch(watchp); + } + mWatches.clear(); +} + +// Unpacks events and passes them to the event processor +void CALLBACK WatchCallback(DWORD dwNumberOfBytesTransfered, LPOVERLAPPED lpOverlapped) +{ + if (dwNumberOfBytesTransfered == 0 || NULL == lpOverlapped) { + return; + } + + WatcherStructWin32 *wsw32 = (WatcherStructWin32*)lpOverlapped; + WatcherWin32 *ww32 = wsw32->Watch; + + PFILE_NOTIFY_INFORMATION pNotify; + size_t offset = 0; + do { + pNotify = (PFILE_NOTIFY_INFORMATION) &ww32->Buffer[offset]; + offset += pNotify->NextEntryOffset; + + std::string sfn; + wchartoutf8(pNotify->FileName, sfn, pNotify->FileNameLength / sizeof(WCHAR)); + ww32->Watch->handleAction(ww32, sfn, pNotify->Action); + } while (pNotify->NextEntryOffset != 0); + + if (!ww32->StopNow) { + RefreshWatch(wsw32); + } +} + +void RclFSWatchWin32::run(DWORD msecs) +{ + if (!mWatches.empty()) { + DWORD numOfBytes = 0; + OVERLAPPED* ov = NULL; + ULONG_PTR compKey = 0; + BOOL res = FALSE; + DWORD ms = msecs == -1 ? INFINITE : msecs; + while ((res = GetQueuedCompletionStatus(mIOCP, &numOfBytes, &compKey, &ov, ms))) { + if (compKey != 0 && compKey == reinterpret_cast(this)) { + // Called from ~RclFSWatchWin32. Must exit. + MONDEB("RclFSWatchWin32::run: queuedcompletion said need exit\n"); + return; + } else { + std::unique_lock lock(mWatchesLock); + WatchCallback(numOfBytes, ov); + } + } + } else { + // No watches yet. + MONDEB("RclFSWatchWin32::run: no watches yet\n"); + DWORD ms = msecs == -1 ? 1000 : msecs; + std::this_thread::sleep_for(std::chrono::milliseconds(ms)); + } +} + +void RclFSWatchWin32::handleAction(WatcherWin32 *watch, const std::string& _fn, unsigned long action) +{ + std::string fn(_fn); + Action fwAction; + path_slashize(fn); + MONDEB("handleAction: fn [" << fn << "] action " << action << "\n"); + + // In case fn is not a simple name but a relative path (probably + // possible/common if recursive is set ?), sort out the directory + // path and simple file name. + std::string newpath = path_cat(watch->DirName, fn); + bool isdir = path_isdir(newpath); + std::string simplefn = path_getsimple(newpath); + std::string folderPath = path_getfather(newpath); + + switch (action) { + case FILE_ACTION_RENAMED_OLD_NAME: + watch->OldFileName = fn; + /* FALLTHROUGH */ + case FILE_ACTION_REMOVED: + fwAction = Action::Delete; + // The system does not tell us if this was a directory, but we + // need the info. Check if it was in the watches. + // TBD: for a delete, we should delete all watches on the subtree ! + path_catslash(newpath); + for (auto& watchp : mWatches) { + if (watchp->Watch->DirName == newpath) { + isdir = true; + break; + } + } + break; + case FILE_ACTION_ADDED: + fwAction = Action::Add; + break; + case FILE_ACTION_MODIFIED: + fwAction = Action::Modify; + break; + case FILE_ACTION_RENAMED_NEW_NAME: { + fwAction = Action::Move; + + // If this is a directory, possibly update the watches. TBD: this seems wrong because we + // should process the whole subtree ? Also probably not needed at all because we are + // recursive and only set watches on the top directories. + if (isdir) { + // Update the new directory path + std::string oldpath = path_cat(watch->DirName, watch->OldFileName); + path_catslash(oldpath); + for (auto& watchp : mWatches) { + if (watchp->Watch->DirName == oldpath) { + watchp->Watch->DirName = newpath; + break; + } + } + } + + std::string oldFolderPath = watch->DirName + + watch->OldFileName.substr(0, watch->OldFileName.find_last_of("/\\")); + + if (folderPath == oldFolderPath) { + watch->Listener->handleFileAction(watch->ID, folderPath, simplefn, fwAction, isdir, + path_getsimple(watch->OldFileName)); + } else { + // Calling the client with non-simple paths?? + watch->Listener->handleFileAction(watch->ID, watch->DirName, fn, fwAction, isdir, + watch->OldFileName); + } + return; + } + default: + return; + }; + + watch->Listener->handleFileAction(watch->ID, folderPath, simplefn, fwAction, isdir); +} + +bool RclFSWatchWin32::pathInWatches(const std::string& path) +{ + for (const auto& wsw32 : mWatches) { + if (wsw32->Watch->DirName == path ) { + return true; + } + } + return false; +} + +#endif // _WIN32 + + /////////////////////////////////////////////////////////////////////// // The monitor 'factory' static RclMonitor *makeMonitor() { -#ifdef RCL_USE_INOTIFY +#ifdef _WIN32 + return new RclMonitorWin32; +#else +# ifdef RCL_USE_INOTIFY return new RclIntf; -#endif -#ifndef RCL_USE_INOTIFY -#ifdef RCL_USE_FAM +# elif defined(RCL_USE_FAM) return new RclFAM; -#endif +# endif #endif LOGINFO("RclMonitor: neither Inotify nor Fam was compiled as file system " "change notification interface\n"); return 0; } + #endif // RCL_MONITOR diff --git a/src/index/recollindex.cpp b/src/index/recollindex.cpp index d9b8810e..28d3f5b1 100644 --- a/src/index/recollindex.cpp +++ b/src/index/recollindex.cpp @@ -103,7 +103,9 @@ static struct option long_options[] = { {0, 0, 0, 0} }; +#ifndef _WIN32 ReExec *o_reexec; +#endif // Globals for atexit cleanup static ConfIndexer *confindexer; @@ -867,11 +869,13 @@ int main(int argc, char *argv[]) LOGDEB("recollindex: sleeping " << sleepsecs << "\n"); for (int i = 0; i < sleepsecs; i++) { sleep(1); +#ifndef _WIN32 // Check that x11 did not go away while we were sleeping. if (!(op_flags & OPT_x) && !x11IsAlive()) { LOGDEB("X11 session went away during initial sleep period\n"); exit(0); } +#endif } } diff --git a/src/index/subtreelist.cpp b/src/index/subtreelist.cpp index 40018aeb..4c4c2f9c 100644 --- a/src/index/subtreelist.cpp +++ b/src/index/subtreelist.cpp @@ -26,13 +26,18 @@ #include "subtreelist.h" #include "log.h" -bool subtreelist(RclConfig *config, const string& top, - vector& paths) +bool subtreelist(RclConfig *config, const string& _top, vector& paths) { - LOGDEB("subtreelist: top: [" << (top) << "]\n" ); + std::string top(_top); +#ifdef _WIN32 + // Need to convert c:path to /c/path because this is how paths are indexed + top = path_slashdrive(top); +#endif + + LOGDEB("subtreelist: top: [" << top << "]\n"); Rcl::Db rcldb(config); if (!rcldb.open(Rcl::Db::DbRO)) { - LOGERR("subtreelist: can't open database in [" << config->getDbDir() << + LOGERR("subtreelist: can't open index in [" << config->getDbDir() << "]: " << rcldb.getReason() << "\n"); return false; } diff --git a/src/internfile/internfile.cpp b/src/internfile/internfile.cpp index c60e1e21..677a544e 100644 --- a/src/internfile/internfile.cpp +++ b/src/internfile/internfile.cpp @@ -51,7 +51,7 @@ using namespace std; // The internal path element separator. This can't be the same as the rcldb // file to ipath separator : "|" // We replace it with a control char if it comes out of a filter (ie: -// rclzip or rclchm can do this). If you want the SOH control char +// rclzip.py or rclchm.py can do this). If you want the SOH control char // inside an ipath, you're out of luck (and a bit weird). static const string cstr_isep(":"); @@ -562,13 +562,13 @@ bool FileInterner::dijontorcl(Rcl::Doc& doc) const string *val = 0; if (!doc.peekmeta(Rcl::Doc::keymd5, &val) || val->empty()) doc.meta[Rcl::Doc::keymd5] = ent.second; - } else if (ent.first == cstr_dj_keymt || - ent.first == cstr_dj_keycharset) { + } else if (ent.first == cstr_dj_keymt || ent.first == cstr_dj_keycharset) { // don't need/want these. } else { - LOGDEB2("dijontorcl: " << m_cfg->fieldCanon(ent.first) << " -> " << - ent.second << endl); - doc.addmeta(m_cfg->fieldCanon(ent.first), ent.second); + LOGDEB2("dijontorcl: " << m_cfg->fieldCanon(ent.first) << " -> " << ent.second << "\n"); + if (!ent.second.empty()) { + doc.meta[m_cfg->fieldCanon(ent.first)] = ent.second; + } } } if (doc.meta[Rcl::Doc::keyabs].empty() && @@ -583,7 +583,7 @@ const set nocopyfields{cstr_dj_keycontent, cstr_dj_keymd, cstr_dj_keyanc, cstr_dj_keyorigcharset, cstr_dj_keyfn, cstr_dj_keymt, cstr_dj_keycharset, cstr_dj_keyds}; -static void copymeta(const RclConfig *cfg,Rcl::Doc& doc, const RecollFilter* hp) +static void copymeta(const RclConfig *cfg, Rcl::Doc& doc, const RecollFilter* hp) { for (const auto& entry : hp->get_meta_data()) { if (nocopyfields.find(entry.first) == nocopyfields.end()) { @@ -650,7 +650,7 @@ void FileInterner::collectIpathAndMT(Rcl::Doc& doc) const // handlers to use setfield() instead of embedding // metadata in the HTML meta tags. if (i == 0 || !pathelprev.empty()) { - copymeta(m_cfg, doc, m_handlers[i]); + copymeta(m_cfg, doc, m_handlers[i == 0 ? 0 : i-1]); } if (doc.fbytes.empty()) { lltodecstr(m_handlers[i]->get_docsize(), doc.fbytes); @@ -744,8 +744,7 @@ int FileInterner::addHandler() LOGINFO("FileInterner::addHandler: no filter for [" << mimetype << "]\n"); return ADD_CONTINUE; } - newflt->set_property(Dijon::Filter::OPERATING_MODE, - m_forPreview ? "view" : "index"); + newflt->set_property(Dijon::Filter::OPERATING_MODE, m_forPreview ? "view" : "index"); if (!charset.empty()) newflt->set_property(Dijon::Filter::DEFAULT_CHARSET, charset); diff --git a/src/internfile/mh_exec.cpp b/src/internfile/mh_exec.cpp index 47ad9db7..88097aa6 100644 --- a/src/internfile/mh_exec.cpp +++ b/src/internfile/mh_exec.cpp @@ -82,8 +82,7 @@ bool MimeHandlerExec::set_document_file_impl(const std::string& mt, tpsread = true; if (!nomd5tps.empty()) { if (params.size() && - nomd5tps.find(path_getsimple(params[0])) != - nomd5tps.end()) { + nomd5tps.find(path_getsimple(params[0])) != nomd5tps.end()) { m_handlernomd5 = true; } // On windows the 1st param is often a script interp diff --git a/src/internfile/mh_execm.h b/src/internfile/mh_execm.h index 7956de8e..27fcba20 100644 --- a/src/internfile/mh_execm.h +++ b/src/internfile/mh_execm.h @@ -73,7 +73,7 @@ text/plainData: 10 * time). Absent during indexing (ipaths are generated and sent back from * the script) * - Mimetype: this is the mime type for the (possibly container) file. - * Can be useful to filters which handle multiple types, like rclaudio. + * Can be useful to filters which handle multiple types, like rclaudio.py. * * The script answers with messages having the following fields: * - Document: translated document data. diff --git a/src/qtgui/rclm_view.cpp b/src/qtgui/rclm_view.cpp index d9a75b90..6aa43e00 100644 --- a/src/qtgui/rclm_view.cpp +++ b/src/qtgui/rclm_view.cpp @@ -34,6 +34,7 @@ #include "rclmain_w.h" #include "rclzg.h" #include "pathut.h" +#include "unacpp.h" using namespace std; @@ -42,7 +43,6 @@ static const vector browser_list{ "opera", "google-chrome", "chromium-browser", "palemoon", "iceweasel", "firefox", "konqueror", "epiphany"}; - // Start native viewer or preview for input Doc. This is used to allow // using recoll from another app (e.g. Unity Scope) to view embedded // result docs (docs with an ipath). . We act as a proxy to extract @@ -155,13 +155,27 @@ void RclMain::openWith(Rcl::Doc doc, string cmdspec) execViewer(subs, false, execname, lcmd, cmdspec, doc); } -void RclMain::startNativeViewer(Rcl::Doc doc, int pagenum, QString term) +static bool pagenumNeeded(const std::string& cmd) { + return cmd.find("%p") != std::string::npos; +} +static bool linenumNeeded(const std::string& cmd) +{ + return cmd.find("%l") != std::string::npos; +} +static bool termNeeded(const std::string& cmd) +{ + return cmd.find("%s") != std::string::npos; +} + +void RclMain::startNativeViewer(Rcl::Doc doc, int pagenum, QString qterm) +{ + std::string term = qs2utf8s(qterm); string apptag; doc.getmeta(Rcl::Doc::keyapptg, &apptag); LOGDEB("RclMain::startNativeViewer: mtype [" << doc.mimetype << "] apptag [" << apptag << "] page " << pagenum << " term [" << - qs2utf8s(term) << "] url [" << doc.url << "] ipath [" << + term << "] url [" << doc.url << "] ipath [" << doc.ipath << "]\n"); // Look for appropriate viewer @@ -377,19 +391,19 @@ void RclMain::startNativeViewer(Rcl::Doc doc, int pagenum, QString term) // If we are not called with a page number (which would happen for a call // from the snippets window), see if we can compute a page number anyway. - if (pagenum == -1) { - pagenum = 1; - string lterm; - if (m_source) - pagenum = m_source->getFirstMatchPage(doc, lterm); + if (m_source && pagenum == -1 && (pagenumNeeded(cmd) || termNeeded(cmd)|| linenumNeeded(cmd))) { + pagenum = m_source->getFirstMatchPage(doc, term); if (pagenum == -1) pagenum = 1; - else // We get the match term used to compute the page - term = QString::fromUtf8(lterm.c_str()); } - char cpagenum[20]; - sprintf(cpagenum, "%d", pagenum); + int line = 1; + if (m_source && !term.empty() && linenumNeeded(cmd)) { + if (doc.text.empty()) { + rcldb->getDocRawText(doc); + } + line = m_source->getFirstMatchLine(doc, term); + } // Substitute %xx inside arguments string efftime; @@ -408,9 +422,10 @@ void RclMain::startNativeViewer(Rcl::Doc doc, int pagenum, QString term) subs["f"] = fn; subs["F"] = fn; subs["i"] = FileInterner::getLastIpathElt(doc.ipath); + subs["l"] = ulltodecstr(line); subs["M"] = doc.mimetype; - subs["p"] = cpagenum; - subs["s"] = (const char*)term.toLocal8Bit(); + subs["p"] = ulltodecstr(pagenum); + subs["s"] = term; subs["U"] = url_encode(url); subs["u"] = url; // Let %(xx) access all metadata. diff --git a/src/qtgui/rclmain_w.h b/src/qtgui/rclmain_w.h index 05899cb4..1c00932d 100644 --- a/src/qtgui/rclmain_w.h +++ b/src/qtgui/rclmain_w.h @@ -140,8 +140,7 @@ public slots: virtual void showActionsSearch(); virtual void startPreview(int docnum, Rcl::Doc doc, int keymods); virtual void startPreview(Rcl::Doc); - virtual void startNativeViewer(Rcl::Doc, int pagenum = -1, - QString term = QString()); + virtual void startNativeViewer(Rcl::Doc, int pagenum = -1, QString term = QString()); virtual void openWith(Rcl::Doc, string); virtual void saveDocToFile(Rcl::Doc); virtual void previewNextInTab(Preview *, int sid, int docnum); diff --git a/src/qtgui/recoll-win.pro b/src/qtgui/recoll-win.pro index fa478c4a..39eae455 100644 --- a/src/qtgui/recoll-win.pro +++ b/src/qtgui/recoll-win.pro @@ -117,40 +117,39 @@ INCLUDEPATH += ../common ../index ../internfile ../query ../unac \ ../utils ../aspell ../rcldb ../qtgui ../xaposix \ confgui widgets windows { - DEFINES += PSAPI_VERSION=1 - DEFINES += __WIN32__ - DEFINES += UNICODE - RC_FILE = recoll.rc + DEFINES += PSAPI_VERSION=1 + DEFINES += __WIN32__ + DEFINES += UNICODE + RC_FILE = recoll.rc - HEADERS += \ - winschedtool.h - SOURCES += \ + HEADERS += \ + winschedtool.h + SOURCES += \ winschedtool.cpp - FORMS += \ + FORMS += \ winschedtool.ui - contains(QMAKE_CC, gcc){ - # MingW - QMAKE_CXXFLAGS += -std=c++11 -Wno-unused-parameter - LIBS += C:/recoll/src/windows/build-librecoll-Desktop_Qt_5_8_0_MinGW_32bit-Release/release/librecoll.dll - } + contains(QMAKE_CC, gcc){ + # MingW + QMAKE_CXXFLAGS += -std=c++11 -Wno-unused-parameter + LIBS += \ + C:/recoll/src/windows/build-librecoll-Desktop_Qt_5_8_0_MinGW_32bit-Release/release/librecoll.dll + } contains(QMAKE_CC, cl){ # MSVC RECOLLDEPS = ../../../recolldeps/msvc DEFINES += USING_STATIC_LIBICONV + PRE_TARGETDEPS = \ + ../windows/build-librecoll-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release/librecoll.lib LIBS += \ - -L../windows/build-librecoll-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release \ - -llibrecoll \ + -L../windows/build-librecoll-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release -llibrecoll \ $$RECOLLDEPS/libxml2/libxml2-2.9.4+dfsg1/win32/bin.msvc/libxml2.lib \ $$RECOLLDEPS/libxslt/libxslt-1.1.29/win32/bin.msvc/libxslt.lib \ - -L../windows/build-libxapian-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release \ - -llibxapian \ - -L$$RECOLLDEPS/build-libiconv-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release \ - -llibiconv \ + -L../windows/build-libxapian-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release -llibxapian \ + -L$$RECOLLDEPS/build-libiconv-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release -llibiconv \ $$RECOLLDEPS/zlib-1.2.11/zdll.lib \ - -lrpcrt4 -lws2_32 -luser32 \ - -lshell32 -lshlwapi -lpsapi -lkernel32 + -lrpcrt4 -lws2_32 -luser32 -lshell32 -lshlwapi -lpsapi -lkernel32 } } @@ -170,30 +169,32 @@ mac { rtitool.cpp FORMS += \ - crontool.ui \ - rtitool.ui + crontool.ui \ + rtitool.ui LIBS += \ -../windows/build-librecoll-Desktop_Qt_5_14_2_clang_64bit-Release/liblibrecoll.a \ - ../../../xapian-core-1.4.18/.libs/libxapian.a \ - -lxslt -lxml2 -liconv -lz + ../windows/build-librecoll-Desktop_Qt_5_14_2_clang_64bit-Release/liblibrecoll.a \ + ../../../xapian-core-1.4.18/.libs/libxapian.a \ + -lxslt -lxml2 -liconv -lz ICON = images/recoll.icns - system(cp ../sampleconf/mimeview.mac ../mimeview) - APP_EXAMPLES.files = \ - ../sampleconf/fragment-buttons.xml \ - ../sampleconf/fields \ - ../sampleconf/recoll.conf \ - ../sampleconf/mimeconf \ - ../sampleconf/recoll.qss \ - ../sampleconf/recoll-dark.qss \ - ../sampleconf/recoll-dark.css \ - ../sampleconf/mimemap \ - ../mimeview + ../sampleconf/fragment-buttons.xml \ + ../sampleconf/fields \ + ../sampleconf/recoll.conf \ + ../sampleconf/mimeconf \ + ../sampleconf/mimeview \ + ../sampleconf/mimemap \ + ../sampleconf/recoll.qss \ + ../sampleconf/recoll-dark.qss \ + ../sampleconf/recoll-dark.css APP_EXAMPLES.path = Contents/Resources/examples + APP_EXAMPLES_MAC.files = \ + ../sampleconf/macos/mimeview + APP_EXAMPLES_MAC.path = Contents/Resources/examples/macos + APP_FILTERS.files = \ ../filters/abiword.xsl \ ../filters/cmdtalk.py \ @@ -209,30 +210,30 @@ mac { ../filters/openxml-word-body.xsl \ ../filters/openxml-meta.xsl \ ../filters/ppt-dump.py \ - ../filters/rcl7z \ + ../filters/rcl7z.py \ ../filters/rclaptosidman \ - ../filters/rclaudio \ + ../filters/rclaudio.py \ ../filters/rclbasehandler.py \ ../filters/rclbibtex.sh \ ../filters/rclcheckneedretry.sh \ - ../filters/rclchm \ - ../filters/rcldia \ + ../filters/rclchm.py \ + ../filters/rcldia.py \ ../filters/rcldjvu.py \ ../filters/rcldoc.py \ ../filters/rcldvi \ - ../filters/rclepub \ - ../filters/rclepub1 \ + ../filters/rclepub.py \ + ../filters/rclepub1.py \ ../filters/rclexec1.py \ ../filters/rclexecm.py \ ../filters/rclfb2.py \ ../filters/rclgaim \ ../filters/rclgenxslt.py \ ../filters/rclhwp.py \ - ../filters/rclics \ + ../filters/rclics.py \ ../filters/rclimg \ ../filters/rclimg.py \ - ../filters/rclinfo \ - ../filters/rclkar \ + ../filters/rclinfo.py \ + ../filters/rclkar.py \ ../filters/rclkwd \ ../filters/rcllatinclass.py \ ../filters/rcllatinstops.zip \ @@ -250,21 +251,21 @@ mac { ../filters/rclpst.py \ ../filters/rclpurple \ ../filters/rclpython.py \ - ../filters/rclrar \ + ../filters/rclrar.py \ ../filters/rclrtf.py \ ../filters/rclscribus \ ../filters/rclshowinfo \ - ../filters/rcltar \ + ../filters/rcltar.py \ ../filters/rcltex \ ../filters/rcltext.py \ ../filters/rcluncomp \ ../filters/rcluncomp.py \ - ../filters/rclwar \ + ../filters/rclwar.py \ ../filters/rclxls.py \ ../filters/rclxml.py \ ../filters/rclxmp.py \ ../filters/rclxslt.py \ - ../filters/rclzip \ + ../filters/rclzip.py \ ../filters/recoll-we-move-files.py \ ../filters/recollepub.zip \ ../filters/svg.xsl \ diff --git a/src/qtgui/snippets_w.cpp b/src/qtgui/snippets_w.cpp index 0e67a1bc..54c90e47 100644 --- a/src/qtgui/snippets_w.cpp +++ b/src/qtgui/snippets_w.cpp @@ -1,4 +1,4 @@ -/* Copyright (C) 2012 J.F.Dockes +/* Copyright (C) 2012-2021 J.F.Dockes * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -67,8 +67,7 @@ using namespace std; class PlainToRichQtSnippets : public PlainToRich { public: virtual string startMatch(unsigned int) { - return string(""); + return string(""); } virtual string endMatch() { return string(""); @@ -82,12 +81,10 @@ void SnippetsW::init() QPushButton *searchButton = new QPushButton(tr("Search")); searchButton->setAutoDefault(false); buttonBox->addButton(searchButton, QDialogButtonBox::ActionRole); -// setWindowFlags(Qt::WindowStaysOnTopHint); searchFM->hide(); onNewShortcuts(); - connect(&SCBase::scBase(), SIGNAL(shortcutsChanged()), - this, SLOT(onNewShortcuts())); + connect(&SCBase::scBase(), SIGNAL(shortcutsChanged()), this, SLOT(onNewShortcuts())); QPushButton *closeButton = buttonBox->button(QDialogButtonBox::Close); if (closeButton) @@ -105,11 +102,9 @@ void SnippetsW::init() browserw = new QWebView(this); verticalLayout->insertWidget(0, browserw); browser->setUrl(QUrl(QString::fromUtf8("about:blank"))); - connect(browser, SIGNAL(linkClicked(const QUrl &)), - this, SLOT(onLinkClicked(const QUrl &))); + connect(browser, SIGNAL(linkClicked(const QUrl &)), this, SLOT(onLinkClicked(const QUrl &))); browser->page()->setLinkDelegationPolicy(QWebPage::DelegateAllLinks); - browser->page()->currentFrame()->setScrollBarPolicy(Qt::Horizontal, - Qt::ScrollBarAlwaysOff); + browser->page()->currentFrame()->setScrollBarPolicy(Qt::Horizontal, Qt::ScrollBarAlwaysOff); QWEBSETTINGS *ws = browser->page()->settings(); if (prefs.reslistfontfamily != "") { ws->setFontFamily(QWEBSETTINGS::StandardFont, prefs.reslistfontfamily); @@ -136,8 +131,7 @@ void SnippetsW::init() #else browserw = new QTextBrowser(this); verticalLayout->insertWidget(0, browserw); - connect(browser, SIGNAL(anchorClicked(const QUrl &)), - this, SLOT(onLinkClicked(const QUrl &))); + connect(browser, SIGNAL(anchorClicked(const QUrl &)), this, SLOT(onLinkClicked(const QUrl &))); browser->setReadOnly(true); browser->setUndoRedoEnabled(false); browser->setOpenLinks(false); @@ -183,8 +177,7 @@ void SnippetsW::createPopupMenu(const QPoint& pos) { QMenu *popup = new QMenu(this); if (m_sortingByPage) { - popup->addAction(tr("Sort By Relevance"), this, - SLOT(reloadByRelevance())); + popup->addAction(tr("Sort By Relevance"), this, SLOT(reloadByRelevance())); } else { popup->addAction(tr("Sort By Page"), this, SLOT(reloadByPage())); } @@ -230,29 +223,22 @@ void SnippetsW::onSetDoc(Rcl::Doc doc, std::shared_ptr source) source->getTerms(hdata); ostringstream oss; - oss << - "" - ""; + oss << "" + ""; oss << "\n"; oss << qs2utf8s(prefs.darkreslistheadertext) << qs2utf8s(prefs.reslistheadertext); - oss << - "" - "" - "" - ; + oss << "
    "; g_hiliter.set_inputhtml(false); bool nomatch = true; for (const auto& snippet : vpabs) { if (snippet.page == -1) { - oss << "" << endl; + oss << "" << "\n"; continue; } list lr; @@ -263,13 +249,12 @@ void SnippetsW::onSetDoc(Rcl::Doc doc, std::shared_ptr source) nomatch = false; oss << "" << endl; + oss << "" << "\n"; } - oss << "
    " << - snippet.snippet << "
    " << snippet.snippet << "
    "; if (snippet.page > 0) { - oss << "" - << "P. " << snippet.page << ""; + oss << "" << + "P. " << snippet.page << ""; } - oss << "" << lr.front().c_str() << "
    " << lr.front().c_str() << "
    " << endl; + oss << "" << "\n"; if (nomatch) { oss.str("\n"); oss << qs2utf8s(tr("

    Sorry, no exact match was found within limits. " @@ -278,12 +263,12 @@ void SnippetsW::onSetDoc(Rcl::Doc doc, std::shared_ptr source) } oss << "\n"; #if defined(USING_WEBKIT) || defined(USING_WEBENGINE) - browser->setHtml(QString::fromUtf8(oss.str().c_str())); + browser->setHtml(u8s2qs(oss.str())); #else browser->clear(); browser->append("."); browser->clear(); - browser->insertHtml(QString::fromUtf8(oss.str().c_str())); + browser->insertHtml(u8s2qs(oss.str())); browser->moveCursor (QTextCursor::Start); browser->ensureCursorVisible(); #endif @@ -354,8 +339,7 @@ void SnippetsW::onLinkClicked(const QUrl &url) string term; if (termpos != string::npos) term = ascurl.substr(termpos+1); - emit startNativeViewer(m_doc, page, - QString::fromUtf8(term.c_str())); + emit startNativeViewer(m_doc, page, u8s2qs(term)); return; } } diff --git a/src/query/docseq.h b/src/query/docseq.h index 4dd6f50f..650b9d89 100644 --- a/src/query/docseq.h +++ b/src/query/docseq.h @@ -111,6 +111,9 @@ public: virtual int getFirstMatchPage(Rcl::Doc&, std::string&) { return -1; } + virtual int getFirstMatchLine(const Rcl::Doc&, const std::string&) { + return 1; + } /** Get duplicates. */ virtual bool docDups(const Rcl::Doc&, std::vector&) { return false; diff --git a/src/query/docseqdb.cpp b/src/query/docseqdb.cpp index fab028bd..df06c6a3 100644 --- a/src/query/docseqdb.cpp +++ b/src/query/docseqdb.cpp @@ -126,6 +126,17 @@ int DocSequenceDb::getFirstMatchPage(Rcl::Doc &doc, string& term) return -1; } +int DocSequenceDb::getFirstMatchLine(const Rcl::Doc &doc, const string& term) +{ + std::unique_lock locker(o_dblock); + if (!setQuery()) + return false; + if (m_q->whatDb()) { + return m_q->getFirstMatchLine(doc, term); + } + return 1; +} + list DocSequenceDb::expand(Rcl::Doc &doc) { std::unique_lock locker(o_dblock); diff --git a/src/query/docseqdb.h b/src/query/docseqdb.h index 69535d79..b77051b4 100644 --- a/src/query/docseqdb.h +++ b/src/query/docseqdb.h @@ -43,6 +43,7 @@ public: virtual bool getAbstract(Rcl::Doc &doc, std::vector&) override; virtual int getFirstMatchPage(Rcl::Doc&, std::string& term) override; + virtual int getFirstMatchLine(const Rcl::Doc&, const std::string& term) override; virtual bool docDups(const Rcl::Doc& doc, std::vector& dups) override; virtual std::string getDescription() override; diff --git a/src/query/plaintorich.h b/src/query/plaintorich.h index 9118ea5a..b86f649b 100644 --- a/src/query/plaintorich.h +++ b/src/query/plaintorich.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2004 J.F.Dockes +/* Copyright (C) 2004-2021 J.F.Dockes * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -60,8 +60,7 @@ public: * @param in raw text out of internfile. * @param out rich text output, divided in chunks (to help our caller * avoid inserting half tags into textedit which doesnt like it) - * @param in hdata terms and groups to be highlighted. These are - * lowercase and unaccented. + * @param in hdata terms and groups to be highlighted. See utils/hldata.h * @param chunksize max size of chunks in output list */ virtual bool plaintorich(const std::string &in, std::list &out, diff --git a/src/rcldb/rclabsfromtext.cpp b/src/rcldb/rclabsfromtext.cpp index 32783b1f..4195b1c4 100644 --- a/src/rcldb/rclabsfromtext.cpp +++ b/src/rcldb/rclabsfromtext.cpp @@ -141,11 +141,9 @@ public: // add/update fragment definition. virtual bool takeword(const std::string& term, int pos, int bts, int bte) { LOGDEB1("takeword: [" << term << "] bytepos: "< maxtermcount) { LOGINF("Rclabsfromtext: stopping because maxtermcount reached: "<< maxtermcount << endl); @@ -154,8 +152,7 @@ public: } // Also limit the number of fragments (just in case safety) if (m_fragments.size() > maxtermcount / 100) { - LOGINF("Rclabsfromtext: stopping because maxfragments reached: "<< - maxtermcount/100 << endl); + LOGINF("Rclabsfromtext: stopping: max fragments count: " << maxtermcount/100 << "\n"); retflags |= ABSRES_TRUNC; return false; } @@ -193,8 +190,7 @@ public: m_curterm = term; m_curtermcoef = coef; } else { - LOGDEB2("Extending current fragment: " << m_remainingWords << - " -> " << m_ctxwords << endl); + LOGDEB2("Extending current fragment: "< "< 5) { - // Limit expansion of contiguous fragments (this is to - // avoid common terms in search causing long - // heavyweight meaningless fragments. Also, limit length). + // Limit expansion of contiguous fragments (this is to avoid common terms in search + // causing long heavyweight meaningless fragments. Also, limit length). m_remainingWords = 1; m_extcount = 0; } @@ -247,18 +242,14 @@ public: LOGDEB1("FRAGMENT: from byte " << m_curfrag.first << " to byte " << m_curfrag.second << endl); LOGDEB1("FRAGMENT TEXT [" << m_rawtext.substr( - m_curfrag.first, m_curfrag.second-m_curfrag.first) - << "]\n"); - // We used to not push weak fragments if we had a lot - // already. This can cause problems if the fragments - // we drop are actually group fragments (which have - // not got their boost yet). The right cut value is - // difficult to determine, because the absolute values - // of the coefs depend on many things (index size, - // etc.) The old test was if (m_totalcoef < 5.0 || - // m_curfragcoef >= 1.0) We now just avoid creating a - // monster by testing the current fragments count at - // the top of the function + m_curfrag.first, m_curfrag.second-m_curfrag.first) << "]\n"); + // We used to not push weak fragments if we had a lot already. This can cause + // problems if the fragments we drop are actually group fragments (which have not + // got their boost yet). The right cut value is difficult to determine, because the + // absolute values of the coefs depend on many things (index size, etc.) The old + // test was if (m_totalcoef < 5.0 || m_curfragcoef >= 1.0) We now just avoid + // creating a monster by testing the current fragments count at the top of the + // function m_fragments.push_back(MatchFragment(m_curfrag.first, m_curfrag.second, m_curfragcoef, @@ -298,8 +289,7 @@ public: m_curtermcoef = 0.0; } - LOGDEB("TextSplitABS: stored total " << m_fragments.size() << - " fragments" << endl); + LOGDEB("TextSplitABS: stored total " << m_fragments.size() << " fragments" << endl); vector tboffs; // Look for matches to PHRASE and NEAR term groups and finalize @@ -340,9 +330,8 @@ public: } auto fragit = m_fragments.begin(); for (const auto& grpmatch : tboffs) { - LOGDEB2("LOOKING FOR FRAGMENT: group: " << grpmatch.offs.first << - "-" << grpmatch.offs.second << " curfrag " << - fragit->start << "-" << fragit->stop << endl); + LOGDEB2("LOOKING FOR FRAGMENT: group: " << grpmatch.offs.first << "-" << + grpmatch.offs.second<<" curfrag "<start<<"-"<stop<<"\n"); while (fragit->stop < grpmatch.offs.first) { fragit++; if (fragit == m_fragments.end()) { @@ -417,21 +406,19 @@ int Query::Native::abstractFromText( bool sortbypage ) { - (void)chron; + PRETEND_USE(chron); LOGABS("abstractFromText: entry: " << chron.millis() << "mS\n"); string rawtext; if (!ndb->getRawText(docid, rawtext)) { LOGDEB0("abstractFromText: can't fetch text\n"); return ABSRES_ERROR; } - LOGABS("abstractFromText: got raw text: size " << rawtext.size() << " " << - chron.millis() << "mS\n"); + LOGABS("abstractFromText: got raw text: size "<m_snipMaxPosWalk); splitter.text_to_words(rawtext); LOGABS("abstractFromText: text_to_words: " << chron.millis() << "mS\n"); @@ -484,8 +470,7 @@ int Query::Native::abstractFromText( // main term and the page positions. unsigned int count = 0; for (const auto& entry : result) { - string frag( - fixfrag(rawtext.substr(entry.start, entry.stop - entry.start))); + string frag(fixfrag(rawtext.substr(entry.start, entry.stop - entry.start))); #ifdef COMPUTE_HLZONES // This would need to be modified to take tag parameters @@ -506,8 +491,7 @@ int Query::Native::abstractFromText( if (page < 0) page = 0; } - LOGDEB0("=== FRAGMENT: p. " << page << " Coef: " << entry.coef << - ": " << frag << endl); + LOGDEB0("=== FRAGMENT: p. " << page << " Coef: " << entry.coef << ": " << frag << endl); vabs.push_back(Snippet(page, frag).setTerm(entry.term)); if (count++ >= maxtotaloccs) break; @@ -515,4 +499,45 @@ int Query::Native::abstractFromText( return ABSRES_OK | splitter.getretflags(); } +class TermLineSplitter : public TextSplit { +public: + TermLineSplitter(const std::string& term) + : TextSplit(TextSplit::TXTS_NOSPANS), m_term(term) { + } + bool takeword(const std::string& _term, int, int, int) override { + std::string term; + if (o_index_stripchars) { + if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) { + LOGINFO("PlainToRich::takeword: unac failed for [" << term << "]\n"); + return true; + } + } + if (term == m_term) { + return false; + } + return true; + } + void newline(int) override { + m_line++; + } + int getline() { + return m_line; + } +private: + int m_line{1}; + std::string m_term; +}; + +int Query::getFirstMatchLine(const Doc &doc, const std::string& term) +{ + int line = 1; + TermLineSplitter splitter(term); + bool ret = splitter.text_to_words(doc.text); + // The splitter takeword() breaks by returning false as soon as the term is found + if (ret == false) { + line = splitter.getline(); + } + return line; +} + } diff --git a/src/rcldb/rclabstract.cpp b/src/rcldb/rclabstract.cpp index 311ef760..04811a11 100644 --- a/src/rcldb/rclabstract.cpp +++ b/src/rcldb/rclabstract.cpp @@ -254,7 +254,7 @@ double Query::Native::qualityTerms(Xapian::docid docid, } -// Return page number for first match of "significant" term. +// Choose most interesting term and return the page number for its first match int Query::Native::getFirstMatchPage(Xapian::docid docid, string& term) { LOGDEB("Query::Native::getFirstMatchPage\n"); @@ -286,9 +286,7 @@ int Query::Native::getFirstMatchPage(Xapian::docid docid, string& term) qualityTerms(docid, terms, byQ); for (auto mit = byQ.rbegin(); mit != byQ.rend(); mit++) { - for (vector::const_iterator qit = mit->second.begin(); - qit != mit->second.end(); qit++) { - string qterm = *qit; + for (const auto& qterm : mit->second) { Xapian::PositionIterator pos; string emptys; try { @@ -619,9 +617,8 @@ int Query::Native::abstractFromIndex( // possibly retried by our caller. // // @param[out] vabs the abstract is returned as a vector of snippets. -int Query::Native::makeAbstract(Xapian::docid docid, - vector& vabs, - int imaxoccs, int ictxwords, bool sortbypage) +int Query::Native::makeAbstract( + Xapian::docid docid, vector& vabs, int imaxoccs, int ictxwords, bool sortbypage) { chron.restart(); LOGDEB("makeAbstract: docid " << docid << " imaxoccs " << diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index 557affcb..ceaec4e3 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -96,10 +96,13 @@ const string pathelt_prefix = "XP"; static const string udi_prefix("Q"); static const string parent_prefix("F"); -// Special terms to mark begin/end of field (for anchored searches), and -// page breaks +// Special terms to mark begin/end of field (for anchored searches). string start_of_field_term; string end_of_field_term; + +// Special term for page breaks. Note that we use a complicated mechanism for multiple page +// breaks at the same position, when it would have been probably simpler to use XXPG/n terms +// instead (did not try to implement though). A change would force users to reindex. const string page_break_term = "XXPG/"; // Special term to mark documents with children. @@ -1846,16 +1849,14 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc) } } - // If empty pages (multiple break at same pos) were recorded, save - // them (this is because we have no way to record them in the - // Xapian list + // If empty pages (multiple break at same pos) were recorded, save them (this is + // because we have no way to record them in the Xapian list) if (!tpidx.m_pageincrvec.empty()) { ostringstream multibreaks; for (unsigned int i = 0; i < tpidx.m_pageincrvec.size(); i++) { if (i != 0) multibreaks << ","; - multibreaks << tpidx.m_pageincrvec[i].first << "," << - tpidx.m_pageincrvec[i].second; + multibreaks << tpidx.m_pageincrvec[i].first << "," << tpidx.m_pageincrvec[i].second; } RECORD_APPEND(record, string(cstr_mbreaks), multibreaks.str()); } diff --git a/src/rcldb/rclquery.cpp b/src/rcldb/rclquery.cpp index 19b88f79..19b50c87 100644 --- a/src/rcldb/rclquery.cpp +++ b/src/rcldb/rclquery.cpp @@ -360,7 +360,6 @@ int Query::getFirstMatchPage(const Doc &doc, string& term) return m_reason.empty() ? pagenum : -1; } - // Mset size // Note: times for retrieving (multiple times)all docs from a sample // 25k docs db (q: mime:*) @@ -511,8 +510,7 @@ vector Query::expand(const Doc &doc) Xapian::ESet eset = m_nq->xenquire->get_eset(20, rset, false); LOGDEB("ESet terms:\n"); // We filter out the special terms - for (Xapian::ESetIterator it = eset.begin(); - it != eset.end(); it++) { + for (Xapian::ESetIterator it = eset.begin(); it != eset.end(); it++) { LOGDEB(" [" << (*it) << "]\n"); if ((*it).empty() || has_prefix(*it)) continue; diff --git a/src/rcldb/rclquery.h b/src/rcldb/rclquery.h index cade3650..fd8874d3 100644 --- a/src/rcldb/rclquery.h +++ b/src/rcldb/rclquery.h @@ -115,10 +115,18 @@ public: // Returned as a vector of pair page is 0 if unknown int makeDocAbstract(const Doc &doc, std::vector& abst, int maxoccs= -1, int ctxwords= -1,bool sortbypage=false); - /** Retrieve page number for first match for "significant" query term - * @param term returns the chosen term */ + + /** Choose most interesting term and return the page number for its first match + * @param term returns the chosen term + * @return page number or -1 if term not found or other issue + */ int getFirstMatchPage(const Doc &doc, std::string& term); + /** Compute line number for first match of term. Only works if doc.text has text. + * This uses a text split. Both this and the above getFirstMaxPage() could be done and saved + * while we compute the abstracts, quite a lot of waste here. */ + int getFirstMatchLine(const Doc &doc, const std::string& term); + /** Retrieve a reference to the searchData we are using */ std::shared_ptr getSD() { return m_sd; diff --git a/src/sampleconf/macos/mimeview b/src/sampleconf/macos/mimeview new file mode 100644 index 00000000..6cbe6878 --- /dev/null +++ b/src/sampleconf/macos/mimeview @@ -0,0 +1,9 @@ +# External viewers, launched by the recoll GUI when you click on a result +# 'Open' link - MAC version +# On the MAC, we use "open" for everything, no exceptions at the moment. + +xallexcepts = + +[view] +# Pseudo entry used if the 'use desktop' preference is set in the GUI +application/x-all = open %f diff --git a/src/sampleconf/mimeconf b/src/sampleconf/mimeconf index f41e5d06..39bb54ca 100644 --- a/src/sampleconf/mimeconf +++ b/src/sampleconf/mimeconf @@ -148,25 +148,25 @@ application/vnd.sun.xml.writer.template = \ body content.xml opendoc-body.xsl #application/x-mobipocket-ebook = execm rclmobi -#application/x-tar = execm rcltar +#application/x-tar = execm rcltar.py -application/epub+zip = execm rclepub +application/epub+zip = execm rclepub.py application/x-ipynb+json = exec jupyter nbconvert --to script --stdout ; mimetype = text/plain application/javascript = internal text/plain -application/ogg = execm rclaudio +application/ogg = execm rclaudio.py application/pdf = execm rclpdf.py application/postscript = exec rclps application/sql = internal text/plain application/vnd.wordperfect = exec wpd2html;mimetype=text/html -application/x-7z-compressed = execm rcl7z +application/x-7z-compressed = execm rcl7z.py application/x-abiword = internal xsltproc abiword.xsl application/x-awk = internal text/plain -application/x-chm = execm rclchm -application/x-dia-diagram = execm rcldia;mimetype=text/plain +application/x-chm = execm rclchm.py +application/x-dia-diagram = execm rcldia.py;mimetype=text/plain application/x-dvi = exec rcldvi -application/x-flac = execm rclaudio +application/x-flac = execm rclaudio.py application/x-gnote = execm rclxml.py -application/x-gnuinfo = execm rclinfo +application/x-gnuinfo = execm rclinfo.py application/x-gnumeric = internal xsltproc gnumeric.xsl application/x-hwp = execm rclhwp.py application/x-kword = exec rclkwd @@ -175,22 +175,22 @@ application/x-mimehtml = internal message/rfc822 application/x-okular-notes = internal xsltproc okular-note.xsl application/x-perl = internal text/plain application/x-php = internal text/plain -application/x-rar = execm rclrar;charset=default +application/x-rar = execm rclrar.py;charset=default application/x-ruby = internal text/plain application/x-scribus = exec rclscribus application/x-shellscript = internal text/plain application/x-tex = exec rcltex -application/x-webarchive = execm rclwar +application/x-webarchive = execm rclwar.py application/x-zerosize = internal -application/zip = execm rclzip;charset=default -audio/aac = execm rclaudio -audio/ape = execm rclaudio -audio/mp4 = execm rclaudio -audio/mpeg = execm rclaudio -audio/ogg = execm rclaudio -audio/x-karaoke = execm rclkar -audio/x-musepack = execm rclaudio -audio/x-wavpack = execm rclaudio +application/zip = execm rclzip.py;charset=default +audio/aac = execm rclaudio.py +audio/ape = execm rclaudio.py +audio/mp4 = execm rclaudio.py +audio/mpeg = execm rclaudio.py +audio/ogg = execm rclaudio.py +audio/x-karaoke = execm rclkar.py +audio/x-musepack = execm rclaudio.py +audio/x-wavpack = execm rclaudio.py image/gif = execm rclimg image/jp2 = execm rclimg image/jpeg = execm rclimg @@ -203,7 +203,7 @@ image/x-xcf = execm rclimg inode/symlink = internal inode/x-empty = internal application/x-zerosize message/rfc822 = internal -text/calendar = execm rclics;mimetype=text/plain +text/calendar = execm rclics.py;mimetype=text/plain text/css = internal text/plain text/html = internal text/plain = internal @@ -234,7 +234,7 @@ text/x-ruby = internal text/x-shellscript = internal text/plain text/x-srt = internal text/plain text/x-tex = exec rcltex -video/mp4 = execm rclaudio +video/mp4 = execm rclaudio.py video/x-msvideo = execm rclimg diff --git a/src/sampleconf/mimemap b/src/sampleconf/mimemap index 4333a12d..bc9ecf95 100644 --- a/src/sampleconf/mimemap +++ b/src/sampleconf/mimemap @@ -15,6 +15,8 @@ .rst = text/plain .md = text/plain .gv = text/plain +.desktop = text/plain +.json = text/plain # .log is in the default noContentSuffixes, so this will also need a recoll.conf setting to do # anything @@ -58,6 +60,7 @@ .ipynb = application/x-ipynb+json .xml = text/xml +.opf = text/xml .note = application/x-gnote @@ -90,17 +93,21 @@ .svg = image/svg+xml .dia = application/x-dia-diagram +# Compressed files .gz = application/x-gzip .Z = application/x-gzip .bz2 = application/x-bzip2 -.rar = application/x-rar -#.Z = application/x-compress -.zip = application/zip -.7z = application/x-7z-compressed -.maff = application/zip +.lzma = application/x-lzma +.xz = application/x-xz .zst = application/x-zstd -# The rcltar module can handle compressed tar formats internally so we +# Archives +.rar = application/x-rar +.zip = application/zip +.maff = application/zip +.7z = application/x-7z-compressed + +# The rcltar.py module can handle compressed tar formats internally so we # use application/x-tar for all tar files compressed or not. Note that tar # file indexing is disabled by default, you'll need to copy and uncomment # the application/x-tar commented line from mimeconf into your personal config @@ -123,7 +130,9 @@ .chm = application/x-chm .epub = application/epub+zip +.kepub = application/epub+zip .mobi = application/x-mobipocket-ebook +.lit = application/x-ms-reader # OpenOffice / opendocument. We handle opendocument as old openoffice files # for now @@ -200,6 +209,7 @@ .ogg = application/ogg .ogx = audio/ogg .opus = audio/ogg +.wav = audio/x-wav .wv = audio/x-wavpack .mkv = video/x-matroska diff --git a/src/sampleconf/mimeview.mac b/src/sampleconf/mimeview.mac deleted file mode 100644 index 9677e472..00000000 --- a/src/sampleconf/mimeview.mac +++ /dev/null @@ -1,197 +0,0 @@ -# @(#$Id: mimeview,v 1.16 2008-09-15 08:03:37 dockes Exp $ (C) 2004 J.F.Dockes - -## ########################################## -# External viewers, launched by the recoll GUI when you click on a result -# 'Open' link - MAC version -# On the MAC, we use "open" for everything... - -# Mime types which we should not uncompress if they are found gzipped or -# bzipped because the native viewer knows how to handle. These would be -# exceptions and the list is normally empty -#nouncompforviewmts = - -# For releases 1.18 and later: exceptions when using the x-all entry: these -# types will use their local definition. This is useful, e.g.: -# -# - for pdf, where we can pass additional parameters like page to open and -# search string -# - For pages of CHM and EPUB documents where we can choose to open the -# parent document instead of a temporary html file. -#xallexcepts = application/pdf application/postscript application/x-dvi \ -# text/html|gnuinfo text/html|chm text/html|epub - -[view] -# Pseudo entry used if the 'use desktop' preference is set in the GUI -application/x-all = open %f - -application/epub+zip = ebook-viewer %f -# If you want to open the parent epub document for epub parts instead of -# opening them as html documents: -#text/html|epub = ebook-viewer %F;ignoreipath=1 - -application/x-gnote = gnote %f - -application/x-mobipocket-ebook = ebook-viewer %f - -application/x-kword = kword %f -application/x-abiword = abiword %f - -# Note: the Linux Mint evince clones, atril and xread, have the same options -application/pdf = evince --page-index=%p --find=%s %f -# Or: -#application/pdf = qpdfview --search %s %f#%p - -application/postscript = evince --page-index=%p --find=%s %f -application/x-dvi = evince --page-index=%p --find=%s %f - -application/x-lyx = lyx %f -application/x-scribus = scribus %f - -application/msword = libreoffice %f -application/vnd.ms-excel = libreoffice %f -application/vnd.ms-powerpoint = libreoffice %f - -application/vnd.oasis.opendocument.text = libreoffice %f -application/vnd.oasis.opendocument.presentation = libreoffice %f -application/vnd.oasis.opendocument.spreadsheet = libreoffice %f - -application/vnd.openxmlformats-officedocument.wordprocessingml.document = \ - libreoffice %f -application/vnd.openxmlformats-officedocument.wordprocessingml.template = \ - libreoffice %f -application/vnd.openxmlformats-officedocument.presentationml.template = \ - libreoffice %f -application/vnd.openxmlformats-officedocument.presentationml.presentation = \ - libreoffice %f -application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = \ - libreoffice %f -application/vnd.openxmlformats-officedocument.spreadsheetml.template =\ - libreoffice %f -application/vnd.sun.xml.calc = libreoffice %f -application/vnd.sun.xml.calc.template = libreoffice %f -application/vnd.sun.xml.draw = libreoffice %f -application/vnd.sun.xml.draw.template = libreoffice %f -application/vnd.sun.xml.impress = libreoffice %f -application/vnd.sun.xml.impress.template = libreoffice %f -application/vnd.sun.xml.math = libreoffice %f -application/vnd.sun.xml.writer = libreoffice %f -application/vnd.sun.xml.writer.global = libreoffice %f -application/vnd.sun.xml.writer.template = libreoffice %f -application/vnd.wordperfect = libreoffice %f -text/rtf = libreoffice %f - -application/x-dia-diagram = dia %f - -application/x-fsdirectory = dolphin %f -inode/directory = dolphin %f - -# Both dolphin and nautilus can pre-select a file inside a -# directory. Thunar can't afaik. xdg-open cant pass an additional -# parameters so these are to be xallexcepts. -application/x-fsdirectory|parentopen = dolphin --select %(childurl) %f -inode/directory|parentopen = dolphin --select %(childurl) %f -#application/x-fsdirectory|parentopen = nautilus %(childurl) -#inode/directory|parentopen = nautilus %(childurl) - -application/x-gnuinfo = xterm -e "info -f %f" -application/x-gnumeric = gnumeric %f - -application/x-flac = rhythmbox %f -audio/mpeg = rhythmbox %f -application/ogg = rhythmbox %f -audio/x-karaoke = kmid %f - -image/jpeg = gwenview %f -image/png = gwenview %f -image/tiff = gwenview %f -image/gif = gwenview %f -image/svg+xml = inkview %f -image/vnd.djvu = djview %f -image/x-xcf = gimp %f -image/bmp = gwenview %f -image/x-ms-bmp = gwenview %f -image/x-xpmi = gwenview %f -image/x-nikon-nef = ufraw %f - -# Opening mail messages: -# - Thunderbird will only open a single-message file if it has an .eml -# extension -# - "sylpheed %f" seems to work ok as of version 3.3 -# - "kmail --view %u" works -# - claws-mail: works using a small intermediary shell-script, which you -# set as the viewer here. You need to have at least one account inside -# claws-mail, so that it creates ~/Mail/inbox. Script contents example -# follows. Using 1 is probably not a good idea if this is a real account -# (here I am using a bogus one, so that I can overwrite anything inside -# inbox at will): -# #!/bin/bash -# cp $1 ~/Mail/inbox/1 -# claws-mail --select ~/Mail/inbox/1 -# rm ~/Mail/inbox/1 -message/rfc822 = thunderbird -file %f - -text/x-mail = thunderbird -file %f -application/x-mimehtml = thunderbird -file %f - -text/calendar = evolution %f - -application/x-okular-notes = okular %f - -application/x-rar = ark %f -application/x-tar = ark %f -application/zip = ark %f -application/x-7z-compressed = ark %f - -application/x-awk = emacsclient %f -application/x-perl = emacsclient %f -text/x-perl = emacsclient %f -application/x-shellscript = emacsclient %f -text/x-shellscript = emacsclient %f - -# Or firefox -remote "openFile(%u)" -text/html = firefox %u - -# gnu info nodes are translated to html with a "gnuinfo" -# rclaptg. rclshowinfo knows how to start the info command on the right -# node -text/html|gnuinfo = rclshowinfo %F %(title);ignoreipath=1 - -application/x-webarchive = konqueror %f -text/x-fictionbook = ebook-viewer %f -application/x-tex = emacsclient %f -application/xml = emacsclient %f -text/xml = emacsclient %f -text/x-tex = emacsclient %f -text/plain = emacsclient %f -text/x-awk = emacsclient %f -text/x-c = emacsclient %f -text/x-c+ = emacsclient %f -text/x-c++ = emacsclient %f -text/x-csv = libreoffice %f -text/x-html-sidux-man = konqueror %f -text/x-html-aptosid-man = iceweasel %f - -application/x-chm = kchmviewer %f -# Html pages inside a chm have a chm rclaptg set by the filter. Kchmviewer -# knows how to use the ipath (which is the internal chm path) to open the -# file at the right place -text/html|chm = kchmviewer --url %i %F - -text/x-ini = emacsclient %f -text/x-man = xterm -u8 -e "groff -T ascii -man %f | more" -text/x-python = idle %f -text/x-gaim-log = emacsclient %f -text/x-purple-html-log = emacsclient %f -text/x-purple-log = emacsclient %f - -# The video types will usually be handled by the desktop default, but they -# need entries here to get an "Open" link -video/3gpp = open %f -video/mp2p = open %f -video/mp2t = open %f -video/mp4 = open %f -video/mpeg = open %f -video/quicktime = open %f -video/x-matroska = open %f -video/x-ms-asf = open %f -video/x-msvideo = open %f diff --git a/src/sampleconf/recoll.conf b/src/sampleconf/recoll.conf index 63f99a4c..96b4c245 100644 --- a/src/sampleconf/recoll.conf +++ b/src/sampleconf/recoll.conf @@ -88,8 +88,8 @@ onlyNames = # can be redefined for subdirectories. noContentSuffixes = .md5 .map \ .o .lib .dll .a .sys .exe .com \ - .mpp .mpt .vsd \ - .img .img.gz .img.bz2 .img.xz .image .image.gz .image.bz2 .image.xz \ + .mpp .mpt .vsd .sqlite \ + .img .img.gz .img.bz2 .img.xz .image .image.gz .image.bz2 .image.xz .ttf \ .dat .bak .rdf .log.gz .log .db .msf .pid \ ,v ~ # @@ -155,7 +155,7 @@ skippedPaths = /media # # # Use skippedNames inside Zip archives.Fetched -# directly by the rclzip handler. Skip the patterns defined by skippedNames +# directly by the rclzip.py handler. Skip the patterns defined by skippedNames # inside Zip archives. Can be redefined for subdirectories. # See https://www.lesbonscomptes.com/recoll/faqsandhowtos/FilteringOutZipArchiveMembers.html # @@ -206,9 +206,9 @@ skippedPaths = /media # redefinition for subtrees). At the moment, it only has an effect for # external handlers (exec and execm). The file types can be specified by # listing either MIME types (e.g. audio/mpeg) or handler names -# (e.g. rclaudio). +# (e.g. rclaudio.py). # -nomd5types = rclaudio +nomd5types = rclaudio.py # Size limit for compressed # files.We need to decompress these in a diff --git a/src/testmains/Makefile.am b/src/testmains/Makefile.am index 6cd274ba..c2d72caa 100644 --- a/src/testmains/Makefile.am +++ b/src/testmains/Makefile.am @@ -38,8 +38,8 @@ AM_CPPFLAGS = -Wall -Wno-unused -std=c++11 \ -D_GNU_SOURCE \ $(DEFS) -noinst_PROGRAMS = plaintorich textsplit utf8iter fstreewalk rclconfig hldata unac mbox \ - circache wipedir mimetype pathut fileudi x11mon trqrstore ecrontab +noinst_PROGRAMS = plaintorich textsplit fstreewalk rclconfig hldata unac mbox \ + circache wipedir mimetype fileudi x11mon trqrstore ecrontab ecrontab_SOURCES = trecrontab.cpp ecrontab_LDADD = ../librecoll.la @@ -62,9 +62,6 @@ mbox_LDADD = ../librecoll.la mimetype_SOURCES = trmimetype.cpp mimetype_LDADD = ../librecoll.la -pathut_SOURCES = trpathut.cpp -pathut_LDADD = ../librecoll.la - rclconfig_SOURCES = trrclconfig.cpp rclconfig_LDADD = ../librecoll.la @@ -77,9 +74,6 @@ plaintorich_LDADD = ../librecoll.la unac_SOURCES = trunac.cpp unac_LDADD = ../librecoll.la -utf8iter_SOURCES = trutf8iter.cpp -utf8iter_LDADD = ../librecoll.la - wipedir_SOURCES = trwipedir.cpp wipedir_LDADD = ../librecoll.la diff --git a/src/testmains/trexecmd.cpp b/src/testmains/trexecmd.cpp deleted file mode 100644 index 94813d38..00000000 --- a/src/testmains/trexecmd.cpp +++ /dev/null @@ -1,384 +0,0 @@ -#include "autoconfig.h" - -#include "execmd.h" - -#include -#include -#include "safeunistd.h" -#include -#ifndef _WIN32 -#include -#endif - -#include -#include -#include -#include - -#include "log.h" -#include "cancelcheck.h" -#include "execmd.h" -#include "smallut.h" - -using namespace std; - -// Testing the rclexecm protocol outside of recoll. Here we use the -// rcldoc.py filter, you can try with rclaudio too, adjust the file -// arg accordingly. This simplified driver only really works with -// single-doc files (else it extracts only the first doc, usually the -// empty self-doc). -bool exercise_mhexecm(const string& cmdstr, const string& mimetype, - vector& files) -{ - if (files.empty()) - return false; - - ExecCmd cmd; - vector myparams; - -#ifdef _WIN32 - // Hack for windows: the command is always "Python somescript" - myparams.push_back(files[0]); - files.erase(files.begin()); -#endif - - if (cmd.startExec(cmdstr, myparams, 1, 1) < 0) { - cerr << "startExec " << cmdstr << " failed. Missing command?\n"; - return false; - } - - for (vector::const_iterator it = files.begin(); - it != files.end(); it++) { - // Build request message - ostringstream obuf; - obuf << "Filename: " << (*it).length() << "\n" << (*it); - obuf << "Mimetype: " << mimetype.length() << "\n" << mimetype; - // Bogus parameter should be skipped by filter - obuf << "BogusParam: " << string("bogus").length() << "\n" << "bogus"; - obuf << "\n"; - cerr << "SENDING: [" << obuf.str() << "]\n"; - // Send it - if (cmd.send(obuf.str()) < 0) { - // The real code calls zapchild here, but we don't need it as - // this will be handled by ~ExecCmd - //cmd.zapChild(); - cerr << "send error\n"; - return false; - } - - // Read answer - for (int loop=0;;loop++) { - string name, data; - - // Code from mh_execm.cpp: readDataElement - string ibuf; - // Read name and length - if (cmd.getline(ibuf) <= 0) { - cerr << "getline error\n"; - return false; - } - // Empty line (end of message) - if (!ibuf.compare("\n")) { - cerr << "Got empty line\n"; - name.clear(); - break; - } - - // Filters will sometimes abort before entering the real - // protocol, ie if a module can't be loaded. Check the - // special filter error first word: - if (ibuf.find("RECFILTERROR ") == 0) { - cerr << "Got RECFILTERROR\n"; - return false; - } - - // We're expecting something like Name: len\n - vector tokens; - stringToTokens(ibuf, tokens); - if (tokens.size() != 2) { - cerr << "bad line in filter output: [" << ibuf << "]\n"; - return false; - } - vector::iterator it = tokens.begin(); - name = *it++; - string& slen = *it; - int len; - if (sscanf(slen.c_str(), "%d", &len) != 1) { - cerr << "bad line in filter output (no len): [" << - ibuf << "]\n"; - return false; - } - // Read element data - data.erase(); - if (len > 0 && cmd.receive(data, len) != len) { - cerr << "MHExecMultiple: expected " << len << - " bytes of data, got " << data.length() << endl; - return false; - } - - // Empty element: end of message - if (name.empty()) - break; - cerr << "Got name: [" << name << "] data [" << data << "]\n"; - } - } - return true; -} - -static char *thisprog; -static char usage [] = -"trexecmd [-c -r -i -o] [-e ] cmd [arg1 arg2 ...]\n" -" -c : test cancellation (ie: trexecmd -c sleep 1000)\n" -" -r : run reexec. Must be separate option.\n" -" -i : command takes input\n" -" -o : command produces output\n" -" -e : send stderr to file named fn (will truncate it)\n" -" If -i is set, we send /etc/group contents to whatever command is run\n" -" If -o is set, we print whatever comes out\n" -"trexecmd -f bogus filter for testing. Uses same options\n" -"trexecmd -m [file ...]: test execm:\n" -" should be the path to an execm filter\n" -" the type of the file parameters\n" -"trexecmd -w cmd : do the 'which' thing\n" - ; - -static void Usage(FILE *fp = stderr) -{ - fprintf(fp, "%s: usage:\n%s", thisprog, usage); - exit(1); -} - -static int op_flags; -#define OPT_MOINS 0x1 -#define OPT_i 0x4 -#define OPT_w 0x8 -#define OPT_c 0x10 -#define OPT_r 0x20 -#define OPT_m 0x40 -#define OPT_o 0x80 -#define OPT_e 0x100 -#define OPT_f 0x200 - -void childfilter() -{ - const int bs = 1024; - char buf[bs]; - if (op_flags & OPT_c) - sleep(2000); - if (op_flags& OPT_i) { - while (read(0, buf, bs) > 0); - } - if (op_flags& OPT_o) { - for (int i = 0; i < 10; i++) { - printf("This is DATA 1 2 3\n"); - } - } - exit(0); -} - -// Data sink for data coming out of the command. We also use it to set -// a cancellation after a moment. -class MEAdv : public ExecCmdAdvise { -public: - void newData(int cnt) { - cerr << "newData(" << cnt << ")" << endl; - if (op_flags & OPT_c) { - static int callcnt; - if (callcnt++ == 5) { - // Just sets the cancellation flag - CancelCheck::instance().setCancel(); - // Would be called from somewhere else and throws an - // exception. We call it here for simplicity - cerr << "newData: should throw !\n"; - CancelCheck::instance().checkCancel(); - } - } - } -}; - -// Data provider, used if the -i flag is set -class MEPv : public ExecCmdProvide { -public: - string *m_input; - int m_cnt; - MEPv(string *i) - : m_input(i), m_cnt(0) { - } - ~MEPv() { - } - void newData() { - if (m_cnt++ < 10) { - char num[30]; - sprintf(num, "%d", m_cnt); - *m_input = string("This is an input chunk ") + string(num) + - string("\n"); - } else { - m_input->erase(); - } - } - void reset() { - m_cnt = 0; - } -}; - - - -ReExec reexec; -int main(int argc, char *argv[]) -{ -#ifndef _WIN32 - reexec.init(argc, argv); - - if (0) { - // Disabled: For testing reexec arg handling - vector newargs; - newargs.push_back("newarg"); - newargs.push_back("newarg1"); - newargs.push_back("newarg2"); - newargs.push_back("newarg3"); - newargs.push_back("newarg4"); - reexec.insertArgs(newargs, 2); - } -#endif - - string stderrFile; - thisprog = argv[0]; - argc--; argv++; - - while (argc > 0 && **argv == '-') { - (*argv)++; - if (!(**argv)) - /* Cas du "adb - core" */ - Usage(); - while (**argv) - switch (*(*argv)++) { - case 'c': op_flags |= OPT_c; break; - case 'e': - op_flags |= OPT_e; - if (argc < 2) { - Usage(); - } - stderrFile = *(++argv); argc--; - goto b1; - - case 'f': op_flags |= OPT_f; break; - case 'h': - for (int i = 0; i < 10; i++) { - cout << "MESSAGE " << i << " FROM TREXECMD\n"; - cout.flush(); - //sleep(1); - } - return 0; - case 'i': op_flags |= OPT_i; break; - case 'o': op_flags |= OPT_o; break; - case 'm': op_flags |= OPT_m; break; - case 'r': op_flags |= OPT_r; break; - case 'w': op_flags |= OPT_w; break; - default: Usage(); break; - } - b1: argc--; argv++; - } - - if (op_flags & OPT_f) { - childfilter(); - } - - if (argc < 1) - Usage(); - - string arg1 = *argv++; argc--; - vector l; - while (argc > 0) { - l.push_back(*argv++); argc--; - } - - DebugLog::getdbl()->setloglevel(DEBDEB1); - DebugLog::setfilename("stderr"); -#ifndef _WIN32 - signal(SIGPIPE, SIG_IGN); - - if (op_flags & OPT_r) { - // Test reexec. Normally only once, next time we fall through - // because we remove the -r option (only works if it was - // isolated, not like -rc - chdir("/"); - argv[0] = strdup(""); - sleep(1); - cerr << "Calling reexec\n"; - // We remove the -r arg from list, otherwise we are going to - // loop (which you can try by commenting out the following - // line) - reexec.removeArg("-r"); - reexec.reexec(); - } -#endif - - - if (op_flags & OPT_w) { - // Test "which" method - string path; - if (ExecCmd::which(arg1, path)) { - cout << path << endl; - return 0; - } - return 1; - } else if (op_flags & OPT_m) { - if (l.size() < 2) - Usage(); - string mimetype = l[0]; - l.erase(l.begin()); - return exercise_mhexecm(arg1, mimetype, l) ? 0 : 1; - } else { - // Default: execute command line arguments - ExecCmd mexec; - - // Set callback to be called whenever there is new data - // available and at a periodic interval, to check for - // cancellation - MEAdv adv; - mexec.setAdvise(&adv); - //mexec.setTimeout(5); - // Stderr output goes there - if (!stderrFile.empty()) - mexec.setStderr(stderrFile); - - // A few environment variables. Check with trexecmd env - mexec.putenv("TESTVARIABLE1=TESTVALUE1"); - mexec.putenv("TESTVARIABLE2=TESTVALUE2"); - mexec.putenv("TESTVARIABLE3=TESTVALUE3"); - - string input, output; - MEPv pv(&input); - - string *ip = 0; - if (op_flags & OPT_i) { - ip = &input; - mexec.setProvide(&pv); - } - string *op = 0; - if (op_flags & OPT_o) { - op = &output; - } - - int status = -1; - for (int i = 0; i < 10; i++) { - output.clear(); - pv.reset(); - try { - status = mexec.doexec(arg1, l, ip, op); - } catch (CancelExcept) { - cerr << "CANCELLED" << endl; - } - //fprintf(stderr, "Status: 0x%x\n", status); - if (op_flags & OPT_o) { - cout << "data received: [" << output << "]\n"; - cerr << "iter " << i << " status " << - status << " bytes received " << output.size() << endl; - } - if (status) - break; - } - return status >> 8; - } -} - diff --git a/src/testmains/trpathut.cpp b/src/testmains/trpathut.cpp deleted file mode 100644 index 8d4b4be4..00000000 --- a/src/testmains/trpathut.cpp +++ /dev/null @@ -1,105 +0,0 @@ -#include "pathut.h" - -#include -#include -#include - -#include -#include - -using namespace std; - -static std::map options { - {"path_home", 0}, - {"path_tildexpand", 0}, - {"listdir", 0}, - {"url_encode", 0}, - }; - -static const char *thisprog; -static void Usage(void) -{ - string sopts; - for (const auto& opt: options) { - sopts += "--" + opt.first + "\n"; - } - fprintf(stderr, "%s: usage: %s\n%s", thisprog, thisprog, sopts.c_str()); - exit(1); -} - -int main(int argc, char **argv) -{ - thisprog = *argv; - std::vector long_options; - - for (auto& entry : options) { - struct option opt; - opt.name = entry.first.c_str(); - opt.has_arg = 0; - opt.flag = &entry.second; - opt.val = 1; - long_options.push_back(opt); - } - long_options.push_back({0, 0, 0, 0}); - - while (getopt_long(argc, argv, "", &long_options[0], nullptr) != -1) { - } - if (options["path_home"]) { - if (optind != argc) { - cerr << "Usage: trsmallut --path_home\n"; - return 1; - } - cout << "path_home() -> [" << path_home() << "]\n"; - } else if (options["path_tildexpand"]) { - if (optind >= argc) { - cerr << "Usage: trsmallut --path_tildexpand \n"; - return 1; - } - string s = argv[optind]; - optind++; - if (optind != argc) { - return 1; - } - cout << "path_tildexpand(" << s << ") -> [" << path_tildexpand(s) << "]\n"; - } else if (options["url_encode"]) { - if (optind >= argc) { - cerr << "Usage: trsmallut --url_encode [offs=0]\n"; - return 1; - } - string s = argv[optind]; - optind++; - int offs = 0; - if (optind != argc) { - offs = atoi(argv[optind]); - optind++; - } - if (optind != argc) { - return 1; - } - cout << "url_encode(" << s << ", " << offs << ") -> [" << url_encode(s, offs) << "]\n"; - } else if (options["listdir"]) { - if (optind >= argc) { - cerr << "Usage: trsmallut --listdir \n"; - return 1; - } - std::string path = argv[optind]; - optind++; - if (optind != argc) { - cerr << "Usage: trsmallut --listdir \n"; - return 1; - } - std::string reason; - std::set entries; - if (!listdir(path, reason, entries)) { - std::cerr<< "listdir(" << path << ") failed : " << reason << "\n"; - return 1; - } - for (const auto& entry : entries) { - cout << entry << "\n"; - } - } else { - Usage(); - } - - return 0; -} diff --git a/src/testmains/trutf8iter.cpp b/src/testmains/trutf8iter.cpp deleted file mode 100644 index 8e83374b..00000000 --- a/src/testmains/trutf8iter.cpp +++ /dev/null @@ -1,232 +0,0 @@ -/* Copyright (C) 2005 J.F.Dockes - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - */ -#include -#include - -#include -#include -#include - - -#include "log.h" -#include "transcode.h" - -#ifndef NO_NAMESPACES -using namespace std; -#endif /* NO_NAMESPACES */ - -#define UTF8ITER_CHECK -#include "utf8iter.h" -#include "readfile.h" -#include "textsplit.h" - -void tryempty() -{ - Utf8Iter it(""); - cout << "EOF ? " << it.eof() << endl; - TextSplit::isCJK(*it); - exit(0); -} - -const char *thisprog; -static char usage [] = -"utf8iter [opts] infile outfile\n" -" converts infile to 32 bits unicode (processor order), for testing\n" -" -v : print stuff as we go\n" -"-t [-w] [-e] : test truncation\n" -"-c : str must be a single utf-8 char. Convert to code then show character bytes count\n" -; - -void Usage() { - fprintf(stderr, "%s:%s\n", thisprog, usage); - exit(1); -} -static int op_flags; -#define OPT_v 0x2 -#define OPT_t 0x4 -#define OPT_w 0x8 -#define OPT_e 0x10 -#define OPT_c 0x20 - -int trytruncate(std::string s, int maxlen) -{ - int flag = 0; - if (op_flags & OPT_w) - flag |= UTF8T_ATWORD; - if (op_flags & OPT_e) - flag |= UTF8T_ELLIPSIS; - utf8truncate(s, maxlen, flag); - std::cout << "Truncation result:[" << s << "]\n"; - return 0; -} - -FILE *infout = stdout; -int main(int argc, char **argv) -{ - thisprog = argv[0]; - argc--; argv++; - - while (argc > 0 && **argv == '-') { - (*argv)++; - if (!(**argv)) - Usage(); - while (**argv) - switch (*(*argv)++) { - case 'e': op_flags |= OPT_e;break; - case 't': op_flags |= OPT_t;break; - case 'v': op_flags |= OPT_v;break; - case 'w': op_flags |= OPT_w;break; - case 'c': op_flags |= OPT_c;break; - default: Usage(); break; - } - argc--;argv++; - } - - if (op_flags & OPT_c) { - if (argc != 1) - Usage(); - std::string s = *argv++;argc--; - Utf8Iter uit(s); - auto code = *uit; - auto cnt = utf8codepointsize(code); - std::cout << "0x" << std::hex << code << std::dec << " : " << cnt << " byte" << - (cnt>1?"s":"") << "\n"; - return 0; - } - - if (op_flags & OPT_t) { - if (argc < 2) - Usage(); - std::string s = *argv++;argc--; - int maxlen = atoi(*argv++);argc--; - return trytruncate(s, maxlen); - } - - string infile, outfile; - if (argc == 2) { - infile = *argv++;argc--; - outfile = *argv++;argc--; - Usage(); - } else if (argc != 0) { - Usage(); - } - string in; - if (!file_to_string(infile, in)) { - cerr << "Cant read file\n" << endl; - exit(1); - } - - vectorucsout1; - string out, out1; - Utf8Iter it(in); - FILE *fp = 0; - if (!outfile.empty()) { - fp = fopen(outfile.c_str(), "w"); - if (fp == 0) { - cerr << "Can't create " << outfile << endl; - exit(1); - } - } - - int nchars = 0; - for (;!it.eof(); it++) { - unsigned int value = *it; - if (value == (unsigned int)-1) { - cerr << "Conversion error occurred at position " << it.getBpos() - << endl; - exit(1); - } - if (op_flags & OPT_v) { - fprintf(infout, "Value: 0x%04x", value); - if (value < 0x7f) - fprintf(stdout, " (%c) ", value); - fprintf(infout, "\n"); - } - // UTF-32LE or BE array - ucsout1.push_back(value); - if (fp) { - // UTF-32LE or BE file - fwrite(&value, 4, 1, fp); - } - - // Reconstructed utf8 strings (2 methods) - if (!it.appendchartostring(out)) - break; - // conversion to string - out1 += it; - - // fprintf(stderr, "%s", string(it).c_str()); - nchars++; - } - if (fp) { - fclose(fp); - } - - fprintf(infout, "Found %d Unicode characters\n", nchars); - if (in.compare(out)) { - fprintf(stderr, "error: out != in\n"); - exit(1); - } - if (in != out1) { - fprintf(stderr, "error: out1 != in\n"); - exit(1); - } - - // Rewind and do it a second time - vectorucsout2; - it.rewind(); - for (int i = 0; ; i++) { - unsigned int value; - if ((value = it[i]) == (unsigned int)-1) { - break; - } - it++; - ucsout2.push_back(value); - } - - if (ucsout1 != ucsout2) { - fprintf(stderr, "error: ucsout1 != ucsout2\n"); - exit(1); - } - - ucsout2.clear(); - int ercnt; - const char *encoding = "UTF-32LE"; // note : use BE on high-endian machine - string ucs, ucs1; - for (const unsigned int i : ucsout1) { - ucs.append((const char *)&i, 4); - } - if (!transcode(ucs, ucs1, encoding, encoding, &ercnt) || ercnt) { - fprintf(stderr, "Transcode check failed, ercount: %d\n", ercnt); - exit(1); - } - if (ucs.compare(ucs1)) { - fprintf(stderr, "error: ucsout1 != ucsout2 after iconv\n"); - exit(1); - } - - if (!transcode(ucs, ucs1, encoding, "UTF-8", &ercnt) || ercnt) { - fprintf(stderr, "Transcode back to utf-8 check failed, ercount: %d\n", - ercnt); - exit(1); - } - if (ucs1.compare(in)) { - fprintf(stderr, "Transcode back to utf-8 compare to in failed\n"); - exit(1); - } - exit(0); -} diff --git a/src/utils/conftree.h b/src/utils/conftree.h index d868e436..dc6f86ad 100644 --- a/src/utils/conftree.h +++ b/src/utils/conftree.h @@ -125,8 +125,7 @@ public: * @param readonly if true open readonly, else rw * @param tildexp try tilde (home dir) expansion for subkey values */ - ConfSimple(const char *fname, int readonly = 0, bool tildexp = false, - bool trimvalues = true); + ConfSimple(const char *fname, int readonly = 0, bool tildexp = false, bool trimvalues = true); /** * Build the object by reading content from a string @@ -142,8 +141,7 @@ public: * @param readonly if true open read only, else rw * @param tildexp try tilde (home dir) expansion for subsection names */ - ConfSimple(int readonly = 0, bool tildexp = false, - bool trimvalues = true); + ConfSimple(int readonly = 0, bool tildexp = false, bool trimvalues = true); virtual ~ConfSimple() {}; @@ -184,8 +182,7 @@ public: * Set value for named integer parameter in specified subsection (or global) * @return 0 for error, 1 else */ - virtual int set(const std::string& nm, long long val, - const std::string& sk = std::string()); + virtual int set(const std::string& nm, long long val, const std::string& sk = std::string()); /** * Remove name and value from config @@ -388,8 +385,7 @@ public: construct(fns, ro); } /// Construct out of single file name and multiple directories - ConfStack(const std::string& nm, const std::vector& dirs, - bool ro = true) { + ConfStack(const std::string& nm, const std::vector& dirs, bool ro = true) { std::vector fns; for (const auto& dir : dirs) { fns.push_back(path_cat(dir, nm)); @@ -501,8 +497,8 @@ public: const std::string& sk, const char *pattern = 0) const override { return getNames1(sk, pattern, false); } - virtual std::vector getNamesShallow(const std::string& sk, - const char *patt = 0) const { + virtual std::vector getNamesShallow( + const std::string& sk, const char *patt = 0) const { return getNames1(sk, patt, true); } @@ -570,27 +566,30 @@ private: } } - /// Common construct from file names code. We used to be ok even - /// if some files were not readable/parsable. Now fail if any - /// fails. + /// Common construct from file names. + /// Fail if any fails, except for missing files in all but the bottom location, or the + /// top one in rw mode. void construct(const std::vector& fns, bool ro) { bool ok{true}; - bool first{true}; - for (const auto& fn : fns) { + for (unsigned int i = 0; i < fns.size(); i++) { + const auto& fn{fns[i]}; T* p = new T(fn.c_str(), ro); if (p && p->ok()) { m_confs.push_back(p); } else { delete p; - // In ro mode, we accept a non-existing topmost file - // and treat it as an empty one. - if (!(ro && first && !path_exists(fn))) { - ok = false; + // We accept missing files in all but the bottom/ directory. + // In rw mode, the topmost file must be present. + if (!path_exists(fn)) { + // !ro can only be true for i==0 + if (!ro || (i == fns.size() - 1)) { + ok = false; + break; + } } } // Only the first file is opened rw ro = true; - first = false; } m_ok = ok; } diff --git a/src/utils/execmd.cpp b/src/utils/execmd.cpp index eb5a7603..6a6fd510 100644 --- a/src/utils/execmd.cpp +++ b/src/utils/execmd.cpp @@ -39,6 +39,7 @@ #include #include #include +#include #ifdef HAVE_SPAWN_H #ifndef __USE_GNU #define __USE_GNU @@ -994,7 +995,8 @@ int ExecCmd::wait() LOGERR("ExecCmd::waitpid: returned -1 errno " << errno << "\n"); status = -1; } - LOGDEB("ExecCmd::wait: got status 0x" << (status) << "\n"); + LOGDEB("ExecCmd::wait: got status 0x" << std::hex << status << std::dec << ": " << + waitStatusAsString(status) << "\n"); m->m_pid = -1; } // Let the ExecCmdRsrc cleanup, it will do the killing/waiting if needed @@ -1043,6 +1045,23 @@ bool ExecCmd::backtick(const vector cmd, string& out) return status == 0; } +std::string ExecCmd::waitStatusAsString(int wstatus) +{ + std::ostringstream oss; + if (WIFEXITED(wstatus)) { + oss << "Exit status: " << WEXITSTATUS(wstatus); + } else { + if (WIFSIGNALED(wstatus)) { + oss << strsignal(WTERMSIG(wstatus)) << " "; + } + if (WCOREDUMP(wstatus)) { + oss << "(core dumped)"; + } + } + return oss.str(); +} + + /// ReExec class methods /////////////////////////////////////////////////// ReExec::ReExec(int argc, char *args[]) { diff --git a/src/utils/execmd.h b/src/utils/execmd.h index 623569c1..987a9e7f 100644 --- a/src/utils/execmd.h +++ b/src/utils/execmd.h @@ -247,6 +247,8 @@ public: */ static bool backtick(const std::vector cmd, std::string& out); + static std::string waitStatusAsString(int wstatus); + class Internal; private: Internal *m; diff --git a/src/utils/rclutil.cpp b/src/utils/rclutil.cpp index d33f79bb..239f446e 100644 --- a/src/utils/rclutil.cpp +++ b/src/utils/rclutil.cpp @@ -310,23 +310,30 @@ bool printableUrl(const string& fcharset, const string& in, string& out) return true; } +#ifdef _WIN32 +// Convert X:/path to /X/path for path splitting inside the index +string path_slashdrive(const string& path) +{ + string npath; + if (path_hasdrive(path)) { + npath.append(1, '/'); + npath.append(1, path[0]); + if (path_isdriveabs(path)) { + npath.append(path.substr(2)); + } else { + // This should be an error really + npath.append(1, '/'); + npath.append(path.substr(2)); + } + } + return npath; +} +#endif // _WIN32 + string url_gpathS(const string& url) { #ifdef _WIN32 - string u = url_gpath(url); - string nu; - if (path_hasdrive(u)) { - nu.append(1, '/'); - nu.append(1, u[0]); - if (path_isdriveabs(u)) { - nu.append(u.substr(2)); - } else { - // This should be an error really - nu.append(1, '/'); - nu.append(u.substr(2)); - } - } - return nu; + return path_slashdrive(url_gpath(url)); #else return url_gpath(url); #endif diff --git a/src/utils/rclutil.h b/src/utils/rclutil.h index 3187da29..38b4d9d2 100644 --- a/src/utils/rclutil.h +++ b/src/utils/rclutil.h @@ -48,6 +48,9 @@ extern bool printableUrl(const std::string& fcharset, const std::string& in, std::string& out); /// Same but, in the case of a Windows local path, also turn "c:/" into /// "/c/" This should be used only for splitting the path in rcldb. +#ifdef _WIN32 +extern std::string path_slashdrive(const std::string& path); +#endif extern std::string url_gpathS(const std::string& url); /// Like strftime but guaranteed utf-8 output (esp. useful on Windows) diff --git a/src/utils/utf8testin.txt b/src/utils/utf8testin.txt deleted file mode 100644 index bfb9ec85..00000000 --- a/src/utils/utf8testin.txt +++ /dev/null @@ -1,212 +0,0 @@ - -UTF-8 encoded sample plain-text file -‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ - -Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 - - -The ASCII compatible UTF-8 encoding used in this plain-text file -is defined in Unicode, ISO 10646-1, and RFC 2279. - - -Using Unicode/UTF-8, you can write in emails and source code things such as - -Mathematics and sciences: - - ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ - ⎪⎢⎜│a²+b³ ⎟⎥⎪ - ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ - ⎪⎢⎜⎷ c₈ ⎟⎥⎪ - ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ - ⎪⎢⎜ ∞ ⎟⎥⎪ - ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ - ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ - 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ - -Linguistics and dictionaries: - - ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn - Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] - -APL: - - ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ - -Nicer typography in plain text files: - - ╔══════════════════════════════════════════╗ - ║ ║ - ║ • ‘single’ and “double” quotes ║ - ║ ║ - ║ • Curly apostrophes: “We’ve been here” ║ - ║ ║ - ║ • Latin-1 apostrophe and accents: '´` ║ - ║ ║ - ║ • ‚deutsche‘ „Anführungszeichen“ ║ - ║ ║ - ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ - ║ ║ - ║ • ASCII safety test: 1lI|, 0OD, 8B ║ - ║ ╭─────────╮ ║ - ║ • the euro symbol: │ 14.95 € │ ║ - ║ ╰─────────╯ ║ - ╚══════════════════════════════════════════╝ - -Combining characters: - - STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ - -Greek (in Polytonic): - - The Greek anthem: - - Σὲ γνωρίζω ἀπὸ τὴν κόψη - τοῦ σπαθιοῦ τὴν τρομερή, - σὲ γνωρίζω ἀπὸ τὴν ὄψη - ποὺ μὲ βία μετράει τὴ γῆ. - - ᾿Απ᾿ τὰ κόκκαλα βγαλμένη - τῶν ῾Ελλήνων τὰ ἱερά - καὶ σὰν πρῶτα ἀνδρειωμένη - χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! - - From a speech of Demosthenes in the 4th century BC: - - Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, - ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς - λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ - τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ - εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ - πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν - οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, - οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν - ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον - τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι - γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν - προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους - σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ - τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ - τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς - τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. - - Δημοσθένους, Γ´ ᾿Ολυνθιακὸς - -Georgian: - - From a Unicode conference invitation: - - გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო - კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, - ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს - ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, - ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება - ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, - ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. - -Russian: - - From a Unicode conference invitation: - - Зарегистрируйтесь сейчас на Десятую Международную Конференцию по - Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. - Конференция соберет широкий круг экспертов по вопросам глобального - Интернета и Unicode, локализации и интернационализации, воплощению и - применению Unicode в различных операционных системах и программных - приложениях, шрифтах, верстке и многоязычных компьютерных системах. - -Thai (UCS Level 2): - - Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese - classic 'San Gua'): - - [----------------------------|------------------------] - ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ - สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา - ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา - โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ - เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ - ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ - พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ - ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ - - (The above is a two-column text. If combining characters are handled - correctly, the lines of the second column should be aligned with the - | character above.) - -Ethiopian: - - Proverbs in the Amharic language: - - ሰማይ አይታረስ ንጉሥ አይከሰስ። - ብላ ካለኝ እንደአባቴ በቆመጠኝ። - ጌጥ ያለቤቱ ቁምጥና ነው። - ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። - የአፍ ወለምታ በቅቤ አይታሽም። - አይጥ በበላ ዳዋ ተመታ። - ሲተረጉሙ ይደረግሙ። - ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። - ድር ቢያብር አንበሳ ያስር። - ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። - እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። - የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። - ሥራ ከመፍታት ልጄን ላፋታት። - ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። - የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። - ተንጋሎ ቢተፉ ተመልሶ ባፉ። - ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። - እግርህን በፍራሽህ ልክ ዘርጋ። - -Runes: - - ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ - - (Old English, which transcribed into Latin reads 'He cwaeth that he - bude thaem lande northweardum with tha Westsae.' and means 'He said - that he lived in the northern land near the Western Sea.') - -Braille: - - ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ - - ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ - ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ - ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ - ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ - ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ - ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ - - ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ - - ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ - ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ - ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ - ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ - ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ - ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ - ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ - ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ - ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ - - (The first couple of paragraphs of "A Christmas Carol" by Dickens) - -Compact font selection example text: - - ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 - abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ - –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд - ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა - -Greetings in various languages: - - Hello world, Καλημέρα κόσμε, コンニチハ - -Box drawing alignment tests: █ - ▉ - ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ - ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ - ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ - ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ - ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ - ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ - ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ - ▝▀▘▙▄▟ diff --git a/src/utils/workqueue.cpp b/src/utils/workqueue.cpp deleted file mode 100644 index aba5fd1e..00000000 --- a/src/utils/workqueue.cpp +++ /dev/null @@ -1,121 +0,0 @@ -/* Copyright (C) 2014 J.F.Dockes - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - */ -// Test program for the workqueue module - -#include -#include -#include -#include - -#include "safeunistd.h" - -#include "workqueue.h" - -static char *thisprog; - -static char usage [] = -" \n\n" -; -static void -Usage(void) -{ - fprintf(stderr, "%s: usage:\n%s", thisprog, usage); - exit(1); -} - -static int op_flags; -#define OPT_MOINS 0x1 -#define OPT_s 0x2 -#define OPT_b 0x4 - -class Task { -public: - Task() - : m_id(o_id++) - {} - int m_id; - static int o_id; -}; -int Task::o_id; - -void *worker(void *vtp) -{ - fprintf(stderr, "Worker working\n"); - WorkQueue *tqp = (WorkQueue *)vtp; - Task tsk; - for (;;) { - if (!tqp->take(&tsk)) { - fprintf(stderr, "Worker: take failed\n"); - return (void*)0; - } - fprintf(stderr, "WORKER: got task %d\n", tsk.m_id); - if (tsk.m_id > 20) { - tqp->workerExit(); - break; - } - } - return (void*)1; -} - -int main(int argc, char **argv) -{ - int count = 10; - - thisprog = argv[0]; - argc--; argv++; - - while (argc > 0 && **argv == '-') { - (*argv)++; - if (!(**argv)) - /* Cas du "adb - core" */ - Usage(); - while (**argv) - switch (*(*argv)++) { - case 's': op_flags |= OPT_s; break; - case 'b': op_flags |= OPT_b; if (argc < 2) Usage(); - if ((sscanf(*(++argv), "%d", &count)) != 1) - Usage(); - argc--; - goto b1; - default: Usage(); break; - } - b1: argc--; argv++; - } - - if (argc != 0) - Usage(); - - WorkQueue wq("testwq", 10); - - if (!wq.start(2, &worker, &wq)) { - fprintf(stderr, "Start failed\n"); - exit(1); - } - - for (;;) { - Task tsk; - fprintf(stderr, "BOSS: put task %d\n", tsk.m_id); - if (!wq.put(tsk)) { - fprintf(stderr, "Boss: put failed\n"); - exit(1); - } - if ((tsk.m_id % 10) == 0) - sleep(1); - } - exit(0); -} - diff --git a/src/windows/execmd_w.cpp b/src/windows/execmd_w.cpp index 8f140216..8e5ad2a5 100644 --- a/src/windows/execmd_w.cpp +++ b/src/windows/execmd_w.cpp @@ -1100,17 +1100,6 @@ bool ExecCmd::maybereap(int *status) } } -// Static -bool ExecCmd::backtick(const vector cmd, string& out) -{ - vector::const_iterator it = cmd.begin(); - it++; - vector args(it, cmd.end()); - ExecCmd mexec; - int status = mexec.doexec(*cmd.begin(), args, 0, &out); - return status == 0; -} - int ExecCmd::doexec(const string &cmd, const vector& args, const string *input, string *output) { @@ -1159,3 +1148,23 @@ int ExecCmd::doexec(const string &cmd, const vector& args, cleaner.inactivate(); return wait(); } + +// Static +bool ExecCmd::backtick(const vector cmd, string& out) +{ + vector::const_iterator it = cmd.begin(); + it++; + vector args(it, cmd.end()); + ExecCmd mexec; + int status = mexec.doexec(*cmd.begin(), args, 0, &out); + return status == 0; +} + +// Static. Unimplemented on windows for now +std::string ExecCmd::waitStatusAsString(int wstatus) +{ + std::ostringstream oss; + oss << std::hex << "0x" << wstatus << std::dec; + return oss.str(); +} + diff --git a/src/windows/mimeconf b/src/windows/mimeconf index 6bc6545e..19a021d8 100644 --- a/src/windows/mimeconf +++ b/src/windows/mimeconf @@ -1,438 +1,38 @@ -# (C) 2015 J.F.Dockes +# (C) 2015-2022 J.F.Dockes -# This file contains most of the data which determines how we -# handle the different mime types (also see the "mimeview" file). # -# This is the version specific to MS-WINDOWS +# MS-WINDOWS specific definitions for mimeconf # -# Sections: -# top-level: Decompression parameters. Should not be at top-level, historical. -# [index] : Associations of mime types to the filters that translate them -# to plain text or html. -# [icons] : Associations of mime types to result list icons (GUI) -# [categories] : groupings of mime types (media, text, message etc.) -# [guifilters] : defines the filtering checkboxes in the GUI. Uses the -# above categories by default -## ####################################### -# Decompression: these types need a first pass to create a temp file to -# work with. We use a script because uncompress utilities usually work in -# place, which is not suitable. -# -# Obviously this should be in a [decompress] section or such, but it was once -# forgotten and remained global for compatibility... -# -# The %t parameter will be substituted to the name of a temporary directory -# by recoll. This directory is guaranteed empty when calling the filter -# -# The %f parameter will be substituted with the input file. -# -# The script (ie: rcluncomp) must output the uncompressed file name on -# stdout. Note that the windows version will always use 7z, and ignore -# the decompressor parameter in the following lines -application/gzip = uncompress python rcluncomp.py 7z %f %t -application/x-gzip = uncompress python rcluncomp.py 7z %f %t -application/x-compress = uncompress python rcluncomp.py 7z %f %t -application/x-bzip2 = uncompress python rcluncomp.py 7z %f %t -application/x-xz = uncompress python rcluncomp.py 7z %f %t -application/x-lzma = uncompress python rcluncomp.py 7z %f %t +# Decompression: the windows version always uses 7z, no decompressor parameter is necessary +application/gzip = uncompress rcluncomp.py 7z %f %t +application/x-gzip = uncompress rcluncomp.py 7z %f %t +application/x-compress = uncompress rcluncomp.py 7z %f %t +application/x-bzip2 = uncompress rcluncomp.py 7z %f %t +application/x-xz = uncompress rcluncomp.py 7z %f %t +application/x-lzma = uncompress rcluncomp.py 7z %f %t +application/x-scribus = +application/x-tex = -## ################################### -# Filters for indexing and internal preview. -# The "internal" filters are hardwired in the c++ code. -# The external "exec" filters are typically scripts. By default, they output the -# document in simple html format, have a look at the scripts. -# A different format (ie text/plain), and a character set can be defined for -# each filter, see the exemples below (ie: msword) [index] - -application/msword = execm python rcldoc.py -application/vnd.ms-excel = execm python rclxls.py -application/vnd.ms-outlook = execm python rclpst.py -application/vnd.ms-powerpoint = execm python rclppt.py -# Also Handle the mime type returned by "file -i" for a suffix-less word -# file. This could probably just as well be an excel file, but we have to -# chose one. -application/vnd.ms-office = execm python rcldoc.py - -application/vnd.oasis.opendocument.text = \ - internal xsltproc meta meta.xml opendoc-meta.xsl \ - body content.xml opendoc-body.xsl -application/vnd.oasis.opendocument.text-template = \ - internal xsltproc meta meta.xml opendoc-meta.xsl \ - body content.xml opendoc-body.xsl -application/vnd.oasis.opendocument.presentation = \ - internal xsltproc meta meta.xml opendoc-meta.xsl \ - body content.xml opendoc-body.xsl -application/vnd.oasis.opendocument.spreadsheet = \ - internal xsltproc meta meta.xml opendoc-meta.xsl \ - body content.xml opendoc-body.xsl -application/vnd.oasis.opendocument.graphics = \ - internal xsltproc meta meta.xml opendoc-meta.xsl \ - body content.xml opendoc-body.xsl -application/vnd.oasis.opendocument.presentation-flat-xml = \ - internal xsltproc opendoc-flat.xsl -application/vnd.oasis.opendocument.text-flat-xml = \ - internal xsltproc opendoc-flat.xsl -application/vnd.oasis.opendocument.spreadsheet-flat-xml = \ - internal xsltproc opendoc-flat.xsl - -application/vnd.openxmlformats-officedocument.wordprocessingml.document = \ - internal xsltproc meta docProps/core.xml openxml-meta.xsl \ - body word/document.xml openxml-word-body.xsl \ - body word/footnotes.xml openxml-word-body.xsl \ - body word/endnotes.xml openxml-word-body.xsl -application/vnd.openxmlformats-officedocument.wordprocessingml.template = \ - internal xsltproc meta docProps/core.xml openxml-meta.xsl \ - body word/document.xml openxml-word-body.xsl \ - body word/footnotes.xml openxml-word-body.xsl \ - body word/endnotes.xml openxml-word-body.xsl -application/vnd.openxmlformats-officedocument.presentationml.template = \ - execm python rclopxml.py -application/vnd.openxmlformats-officedocument.presentationml.presentation = \ - execm python rclopxml.py -application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = \ - internal xsltproc meta docProps/core.xml openxml-meta.xsl \ - body xl/sharedStrings.xml openxml-xls-body.xsl -application/vnd.openxmlformats-officedocument.spreadsheetml.template =\ - internal xsltproc meta docProps/core.xml openxml-meta.xsl \ - body xl/sharedStrings.xml openxml-xls-body.xsl - -application/vnd.sun.xml.calc = \ - internal xsltproc meta meta.xml opendoc-meta.xsl \ - body content.xml opendoc-body.xsl -application/vnd.sun.xml.calc.template = \ - internal xsltproc meta meta.xml opendoc-meta.xsl \ - body content.xml opendoc-body.xsl -application/vnd.sun.xml.draw = \ - internal xsltproc meta meta.xml opendoc-meta.xsl \ - body content.xml opendoc-body.xsl -application/vnd.sun.xml.draw.template = \ - internal xsltproc meta meta.xml opendoc-meta.xsl \ - body content.xml opendoc-body.xsl -application/vnd.sun.xml.impress = \ - internal xsltproc meta meta.xml opendoc-meta.xsl \ - body content.xml opendoc-body.xsl -application/vnd.sun.xml.impress.template = \ - internal xsltproc meta meta.xml opendoc-meta.xsl \ - body content.xml opendoc-body.xsl -application/vnd.sun.xml.math = \ - internal xsltproc meta meta.xml opendoc-meta.xsl \ - body content.xml opendoc-body.xsl -application/vnd.sun.xml.writer = \ - internal xsltproc meta meta.xml opendoc-meta.xsl \ - body content.xml opendoc-body.xsl -application/vnd.sun.xml.writer.global = \ - internal xsltproc meta meta.xml opendoc-meta.xsl \ - body content.xml opendoc-body.xsl -application/vnd.sun.xml.writer.template = \ - internal xsltproc meta meta.xml opendoc-meta.xsl \ - body content.xml opendoc-body.xsl - -#application/postscript = exec rclps -#application/x-gnuinfo = execm python rclinfo -#application/x-tar = execm python rcltar - -application/epub+zip = execm python rclepub -application/x-ipynb+json = execm python rclipynb.py -application/javascript = internal text/plain -application/ogg = execm python rclaudio -application/pdf = execm python rclpdf.py -application/sql = internal text/plain +application/postscript = application/vnd.wordperfect = exec wpd/wpd2html;mimetype=text/html -application/x-7z-compressed = execm python rcl7z -application/x-abiword = internal xsltproc abiword.xsl -application/x-awk = internal text/plain -application/x-chm = execm python rclchm -application/x-dia-diagram = execm python rcldia;mimetype=text/plain -application/x-flac = execm python rclaudio -application/x-gnote = execm python rclxml.py -application/x-hwp = execm python rclhwp.py -application/x-mimehtml = internal message/rfc822 -application/x-perl = internal text/plain -application/x-php = internal text/plain -application/x-rar = execm python rclrar;charset=default -application/x-shellscript = internal text/plain -application/x-webarchive = execm python rclwar -application/x-zerosize = internal -application/zip = execm python rclzip;charset=default -audio/aac = execm python rclaudio -audio/mp4 = execm python rclaudio -audio/mpeg = execm python rclaudio -audio/x-karaoke = execm python rclkar +application/x-dvi = +application/x-gnuinfo = +application/x-ipynb+json = execm rclipynb.py +application/x-tar = image/gif = execm rclimg.exe image/jp2 = execm rclimg.exe image/jpeg = execm rclimg.exe image/png = execm rclimg.exe -image/svg+xml = internal xsltproc svg.xsl image/tiff = execm rclimg.exe -image/vnd.djvu = execm python rcldjvu.py -inode/symlink = internal -inode/x-empty = internal application/x-zerosize -message/rfc822 = internal -text/calendar = execm python rclics;mimetype=text/plain -text/css = internal text/plain -text/html = internal -text/plain = internal -text/plain1 = internal -#text/rtf = execm python rclrtf.py -text/rtf = exec unrtf --nopict --html;mimetype=text/html -text/x-c = internal -text/x-c+ = internal -text/x-c++ = internal -text/x-chm-html = internal text/html -text/x-csharp = internal text/plain -text/x-csv = internal text/plain -text/x-fictionbook = internal xsltproc fb2.xsl -text/x-ini = internal text/plain -text/x-mail = internal -text/x-orgmode = execm python rclorgmode.py -text/x-perl = internal text/plain -text/x-python = execm python rclpython.py -text/x-shellscript = internal text/plain -text/x-srt = internal text/plain +image/x-nikon-nef = execm rclimg.exe image/x-xcf = execm rclimg.exe - -# Generic XML is best indexed as text, else it generates too many errors -# All parameter and tag names, attribute values etc, are indexed as -# text. rclxml.py tries to just index the text content. -#application/xml = execm rclxml.py -#text/xml = execm rclxml.py -application/xml = internal text/plain -text/xml = internal text/plain - -## ############################################# -# Icons to be used in the result list if required by gui config -[icons] -application/epub+zip = book -application/javascript = source -application/msword = wordprocessing -application/ogg = sownd -application/pdf = pdf -application/postscript = postscript -application/vnd.ms-excel = spreadsheet -application/vnd.ms-powerpoint = presentation -application/vnd.oasis.opendocument.presentation = presentation -application/vnd.oasis.opendocument.spreadsheet = spreadsheet -application/vnd.oasis.opendocument.text = wordprocessing -application/vnd.openxmlformats-officedocument.presentationml.presentation = presentation -application/vnd.openxmlformats-officedocument.presentationml.template = presentation -application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = spreadsheet -application/vnd.openxmlformats-officedocument.spreadsheetml.template = spreadsheet -application/vnd.openxmlformats-officedocument.wordprocessingml.document = wordprocessing -application/vnd.openxmlformats-officedocument.wordprocessingml.template = wordprocessing -application/vnd.sun.xml.calc = spreadsheet -application/vnd.sun.xml.calc.template = spreadsheet -application/vnd.sun.xml.draw = drawing -application/vnd.sun.xml.draw.template = drawing -application/vnd.sun.xml.impress = presentation -application/vnd.sun.xml.impress.template = presentation -application/vnd.sun.xml.math = wordprocessing -application/vnd.sun.xml.writer = wordprocessing -application/vnd.sun.xml.writer.global = wordprocessing -application/vnd.sun.xml.writer.template = wordprocessing -application/vnd.wordperfect = wordprocessing -application/x-abiword = wordprocessing -application/x-awk = source -application/x-chm = book -application/x-dia-diagram = drawing -application/x-dvi = document -application/x-flac = sownd -application/x-fsdirectory = folder -application/x-gnote = document -#application/x-gnuinfo = book -application/x-gnumeric = spreadsheet -application/x-ipynb+json = document -application/x-kword = wordprocessing -application/x-lyx = wordprocessing -application/x-mimehtml = message -application/x-mobipocket-ebook = document -application/x-okular-notes = document -application/x-perl = source -application/x-php = source -application/x-rar = archive -application/x-scribus = document -application/x-scribus = wordprocessing -application/x-shellscript = source -application/x-tar = archive -application/x-tex = wordprocessing -application/x-webarchive = archive -application/xml = document -application/zip = archive -application/x-7z-compressed = archive -audio/mpeg = sownd -audio/x-karaoke = sownd -image/bmp = image -image/gif = image -image/jp2 = image -image/jpeg = image -image/png = image -image/svg+xml = drawing -image/tiff = image -image/vnd.djvu = document -image/x-xcf = image -image/x-xpmi = image -inode/directory = folder -inode/symlink = emblem-symbolic-link -message/rfc822 = message -text/html = html -text/html|chm = bookchap -text/html|epub = bookchap -#text/html|gnuinfo = bookchap -text/plain = txt -text/rtf = wordprocessing -text/x-c = source -text/x-c+ = source -text/x-c++ = source -text/x-csv = txt -text/x-fictionbook = document -text/x-html-aptosid-man = aptosid-book -text/x-html-sidux-man = sidux-book -text/x-ini = txt -text/x-mail = message -text/x-man = document -text/x-orgmode = document -text/x-perl = source -text/x-purple-html-log = pidgin -text/x-purple-log = pidgin -text/x-python = text-x-python -text/x-shellscript = source -text/x-tex = wordprocessing -text/xml = document -video/3gpp = video -video/mp2p = video -video/mp2t = video -video/mp4 = video -video/mpeg = video -video/quicktime = video -video/x-matroska = video -video/x-ms-asf = video -video/x-msvideo = video - -[categories] -# Categories group mime types by "kind". They can be used from the query -# language as an "rclcat" clause. This is fully dynamic, you can change the -# names and groups as you wish, only the mime types are stored in the index. -# -# If you add/remove categories, you may also want to change the -# "guifilters" section below. -text = \ - application/epub+zip \ - application/msword \ - application/pdf \ - application/postscript \ - application/vnd.oasis.opendocument.text \ - application/vnd.openxmlformats-officedocument.wordprocessingml.document \ - application/vnd.openxmlformats-officedocument.wordprocessingml.template \ - application/vnd.sun.xml.writer \ - application/vnd.sun.xml.writer.global \ - application/vnd.sun.xml.writer.template \ - application/vnd.wordperfect \ - application/x-abiword \ - application/x-awk \ - application/x-chm \ - application/x-dvi \ - application/x-gnote \ - application/x-gnuinfo \ - application/x-ipynb+json \ - application/x-kword \ - application/x-lyx \ - application/x-mobipocket-ebook \ - application/x-okular-notes \ - application/x-perl \ - application/x-scribus \ - application/x-shellscript \ - application/x-tex \ - application/xml \ - text/xml \ - text/x-csv \ - text/x-tex \ - image/vnd.djvu \ - text/calendar \ - text/html \ - text/plain \ - text/rtf \ - text/x-c \ - text/x-c++ \ - text/x-c+ \ - text/x-fictionbook \ - text/x-html-aptosid-man \ - text/x-html-sidux-man \ - text/x-ini \ - text/x-man \ - text/x-orgmode \ - text/x-perl \ - text/x-python \ - text/x-shellscript - -spreadsheet = \ - application/vnd.ms-excel \ - application/vnd.oasis.opendocument.spreadsheet \ - application/vnd.openxmlformats-officedocument.spreadsheetml.sheet \ - application/vnd.openxmlformats-officedocument.spreadsheetml.template \ - application/vnd.sun.xml.calc \ - application/vnd.sun.xml.calc.template \ - application/x-gnumeric - -presentation = \ - application/vnd.ms-powerpoint \ - application/vnd.oasis.opendocument.presentation \ - application/vnd.openxmlformats-officedocument.presentationml.presentation \ - application/vnd.openxmlformats-officedocument.presentationml.template \ - application/vnd.sun.xml.impress \ - application/vnd.sun.xml.impress.template - -media = \ - application/ogg \ - application/x-flac \ - audio/* \ - image/* \ - video/* \ - -message = message/rfc822 \ - text/x-gaim-log \ - text/x-mail \ - text/x-purple-log \ - text/x-purple-html-log \ - -other = application/vnd.sun.xml.draw \ - application/vnd.sun.xml.draw.template \ - application/vnd.sun.xml.math \ - application/x-dia-diagram \ - application/x-fsdirectory \ - application/x-mimehtml \ - application/x-rar \ - application/x-tar \ - application/x-webarchive \ - application/zip \ - application/x-7z-compressed \ - inode/directory \ - inode/symlink \ - -[guifilters] -# This defines the top level filters in the GUI (accessed by the the -# radiobuttons above the results area, or a toolbar combobox). -# Each entry defines a label and a query language fragment that will be -# applied to filter the current query if the option is activated. -# -# This does not really belong in mimeconf, but it does belong in the index -# config (not the GUI one), because it's not necessarily the same in all -# configs, it has to go somewhere, and it's not worth a separate config -# file... -# -# By default this filters by document category (see above), but any -# language fragment should be ok. Be aware though that the "document -# history" queries only know about simple "rclcat" filtering. -# -# If you don't want the filter names to be displayed in alphabetic order, -# you can define them with a colon. The part before the colon is not -# displayed but used for ordering, ie: a:zzbutshouldbefirst b:aacomeslast -# -text = rclcat:text -spreadsheet = rclcat:spreadsheet -presentation = rclcat:presentation -media = rclcat:media -message = rclcat:message -other = rclcat:other - +text/x-bibtex = +text/x-gaim-log = +text/x-html-aptosid-man = +text/x-man = +text/x-purple-log = +text/x-tex = +video/x-msvideo = execm rclimg.exe diff --git a/src/windows/mimeview b/src/windows/mimeview index dcf97fe3..72dc53f8 100644 --- a/src/windows/mimeview +++ b/src/windows/mimeview @@ -1,21 +1,5 @@ -## ########################################## -# External viewers, launched by the recoll GUI when you click on a result -# 'edit' link -# -# MS WINDOWS VERSION -# -# Mime types which we should not uncompress if they are found gzipped or -# bzipped because the native viewer knows how to handle. These would be -# exceptions and the list is normally empty -#nouncompforviewmts = +# MS WINDOWS system changes for mimeview -# For releases 1.18 and later: exceptions when using the x-all entry: these -# types will use their local definition. This is useful, e.g.: -# -# - for pdf, where we can pass additional parameters like page to open and -# search string -# - For pages of CHM and EPUB documents where we can choose to open the -# parent document instead of a temporary html file. xallexcepts = \ text/html|epub \ application/x-fsdirectory|parentopen inode/directory|parentopen @@ -44,157 +28,6 @@ application/pdf = C:/users/bill/appdata/local/apps/evince-2.32.0.145/bin/evince #application/pdf = "C:/Program Files/SumatraPDF/SumatraPDF.exe" -page %p %f #application/pdf = "C:/Program Files (x86)/Foxit Software/Foxit Reader/FoxitReader.exe" %f /A page=%p -###### THE FOLLOWING ARE NOT USED AT ALL ON WINDOWS, but the types need to -###### be listed for an "Open" link to appear in the result list -application/epub+zip = ebook-viewer %f - -application/x-gnote = gnote %f - -application/x-mobipocket-ebook = ebook-viewer %f - -application/x-kword = kword %f -application/x-abiword = abiword %f - - -application/postscript = evince --page-index=%p --find=%s %f -application/x-dvi = evince --page-index=%p --find=%s %f - -application/x-lyx = lyx %f -application/x-scribus = scribus %f - -#application/msword = libreoffice %f -application/msword = \ - "C:/Program Files (x86)/LibreOffice 5/program/soffice.exe" %f - -application/x-hwp = hanword %f - -application/vnd.ms-excel = libreoffice %f -application/vnd.ms-powerpoint = libreoffice %f - -application/vnd.oasis.opendocument.text = libreoffice %f -application/vnd.oasis.opendocument.presentation = libreoffice %f -application/vnd.oasis.opendocument.spreadsheet = libreoffice %f - -application/vnd.openxmlformats-officedocument.wordprocessingml.document = \ - libreoffice %f -application/vnd.openxmlformats-officedocument.wordprocessingml.template = \ - libreoffice %f -application/vnd.openxmlformats-officedocument.presentationml.template = \ - libreoffice %f -application/vnd.openxmlformats-officedocument.presentationml.presentation = \ - libreoffice %f -application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = \ - libreoffice %f -application/vnd.openxmlformats-officedocument.spreadsheetml.template =\ - libreoffice %f -application/vnd.sun.xml.calc = libreoffice %f -application/vnd.sun.xml.calc.template = libreoffice %f -application/vnd.sun.xml.draw = libreoffice %f -application/vnd.sun.xml.draw.template = libreoffice %f -application/vnd.sun.xml.impress = libreoffice %f -application/vnd.sun.xml.impress.template = libreoffice %f -application/vnd.sun.xml.math = libreoffice %f -application/vnd.sun.xml.writer = libreoffice %f -application/vnd.sun.xml.writer.global = libreoffice %f -application/vnd.sun.xml.writer.template = libreoffice %f -application/vnd.wordperfect = libreoffice %f -text/rtf = libreoffice %f - -application/x-dia-diagram = dia %f - -application/x-fsdirectory = dolphin %f -inode/directory = dolphin %f - -application/x-gnuinfo = xterm -e "info -f %f" -application/x-gnumeric = gnumeric %f - -application/x-flac = rhythmbox %f -audio/mpeg = rhythmbox %f -application/ogg = rhythmbox %f -audio/x-karaoke = kmid %f - -image/jpeg = gwenview %f -image/png = gwenview %f -image/tiff = gwenview %f -image/gif = gwenview %f -image/svg+xml = inkview %f -image/vnd.djvu = djview %f -image/x-xcf = gimp %f -image/bmp = gwenview %f -image/x-ms-bmp = gwenview %f -image/x-xpmi = gwenview %f - -# Opening mail messages not always works. -# - Thunderbird will only open a single-message file if it has an .emf -# extension -# - "sylpheed %f" seems to work ok as of version 3.3 -# - "kmail --view %u" works -message/rfc822 = thunderbird -file %f -text/x-mail = thunderbird -file %f -application/x-mimehtml = thunderbird -file %f - -text/calendar = evolution %f - -application/x-okular-notes = okular %f - -application/x-rar = ark %f -application/x-tar = ark %f -application/zip = ark %f -application/x-7z-compressed = ark %f - -application/x-awk = emacsclient --no-wait %f -application/x-perl = emacsclient --no-wait %f -text/x-perl = emacsclient --no-wait %f -application/x-shellscript = emacsclient --no-wait %f -text/x-shellscript = emacsclient --no-wait %f -text/x-srt = emacsclient --no-wait %f - -# Or firefox -remote "openFile(%u)" -text/html = firefox %u - -# gnu info nodes are translated to html with a "gnuinfo" -# rclaptg. rclshowinfo knows how to start the info command on the right -# node -text/html|gnuinfo = rclshowinfo %F %(title);ignoreipath=1 - -application/x-webarchive = konqueror %f -text/x-fictionbook = ebook-viewer %f -application/x-tex = emacsclient --no-wait %f -application/xml = emacsclient --no-wait %f -text/xml = emacsclient --no-wait %f -text/x-tex = emacsclient --no-wait %f -text/plain = emacsclient --no-wait %f -text/x-awk = emacsclient --no-wait %f -text/x-c = emacsclient --no-wait %f -text/x-c+ = emacsclient --no-wait %f -text/x-c++ = emacsclient --no-wait %f -text/x-csv = libreoffice %f -text/x-html-sidux-man = konqueror %f -text/x-html-aptosid-man = iceweasel %f - -application/x-chm = kchmviewer %f -# Html pages inside a chm have a chm rclaptg set by the filter. Kchmviewer -# knows how to use the ipath (which is the internal chm path) to open the -# file at the right place -text/html|chm = kchmviewer --url %i %F - -text/x-ini = emacsclient --no-wait %f -text/x-man = xterm -u8 -e "groff -T ascii -man %f | more" -text/x-python = idle %f -text/x-gaim-log = emacsclient --no-wait %f -text/x-purple-html-log = emacsclient --no-wait %f -text/x-purple-log = emacsclient --no-wait %f - -# The video types will usually be handled by the desktop default, but they -# need entries here to get an "Open" link -video/3gpp = vlc %f -video/mp2p = vlc %f -video/mp2t = vlc %f -video/mp4 = vlc %f -video/mpeg = vlc %f -video/quicktime = vlc %f -video/x-matroska = vlc %f -video/x-ms-asf = vlc %f -video/x-msvideo = vlc %f - - +########## +# Other MIME types have no specializations on Windows, but the types need to be listed for an "Open" +# link to appear in the result list, the listing is in the generic file diff --git a/src/windows/mkinstdir.sh b/src/windows/mkinstdir.sh index f37654c7..f01001ad 100644 --- a/src/windows/mkinstdir.sh +++ b/src/windows/mkinstdir.sh @@ -171,13 +171,15 @@ copyrecoll() chkcp $RCL/doc/user/docbook-xsl.css $DESTDIR/Share/doc mkdir -p $DESTDIR/Share/doc/webhelp rsync -av $RCL/doc/user/webhelp/docs/* $DESTDIR/Share/doc/webhelp || exit 1 - chkcp $RCL/sampleconf/fields $DESTDIR/Share/examples + chkcp $RCL/sampleconf/fields $DESTDIR/Share/examples chkcp $RCL/sampleconf/fragment-buttons.xml $DESTDIR/Share/examples - chkcp $RCL/windows/mimeconf $DESTDIR/Share/examples - chkcp $RCL/sampleconf/mimemap $DESTDIR/Share/examples - chkcp $RCL/windows/mimeview $DESTDIR/Share/examples - chkcp $RCL/sampleconf/recoll.conf $DESTDIR/Share/examples - chkcp $RCL/sampleconf/recoll.qss $DESTDIR/Share/examples + chkcp $RCL/sampleconf/mimeconf $DESTDIR/Share/examples + chkcp $RCL/sampleconf/mimeview $DESTDIR/Share/examples + chkcp $RCL/sampleconf/mimemap $DESTDIR/Share/examples + chkcp $RCL/windows/mimeconf $DESTDIR/Share/examples/windows + chkcp $RCL/windows/mimeview $DESTDIR/Share/examples/windows + chkcp $RCL/sampleconf/recoll.conf $DESTDIR/Share/examples + chkcp $RCL/sampleconf/recoll.qss $DESTDIR/Share/examples chkcp $RCL/sampleconf/recoll-dark.qss $DESTDIR/Share/examples chkcp $RCL/sampleconf/recoll-dark.css $DESTDIR/Share/examples @@ -349,7 +351,7 @@ test "$VERSION" = "$CFVERS" || echo Packaging version $CFVERS -for d in doc examples filters images translations; do +for d in doc examples examples/windows filters images translations; do test -d $DESTDIR/Share/$d || mkdir -p $DESTDIR/Share/$d || \ fatal mkdir $d failed done diff --git a/src/windows/qmkrecoll/recollindex.pro b/src/windows/qmkrecoll/recollindex.pro index 9fa47f03..c0b18fdb 100644 --- a/src/windows/qmkrecoll/recollindex.pro +++ b/src/windows/qmkrecoll/recollindex.pro @@ -35,25 +35,23 @@ windows { LIBS += \ ../build-librecoll-Desktop_Qt_5_8_0_MinGW_32bit-Release/release/librecoll.dll \ -lshlwapi -lpsapi -lkernel32 - } + } - contains(QMAKE_CC, cl){ - # MSVC - RECOLLDEPS = ../../../../recolldeps/msvc - DEFINES += USING_STATIC_LIBICONV - LIBS += \ - -L../build-librecoll-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release \ - -llibrecoll \ - $$RECOLLDEPS/libxml2/libxml2-2.9.4+dfsg1/win32/bin.msvc/libxml2.lib \ - $$RECOLLDEPS/libxslt/libxslt-1.1.29/win32/bin.msvc/libxslt.lib \ - -L../build-libxapian-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release \ - -llibxapian \ - $$RECOLLDEPS/zlib-1.2.11/zdll.lib \ - -L$$RECOLLDEPS/build-libiconv-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release \ - -llibiconv -lShell32 \ - -lrpcrt4 -lws2_32 -luser32 \ - -lshlwapi -lpsapi -lkernel32 - } + contains(QMAKE_CC, cl){ + # MSVC + RECOLLDEPS = ../../../../recolldeps/msvc + DEFINES += USING_STATIC_LIBICONV + PRE_TARGETDEPS = \ + ../build-librecoll-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release/librecoll.lib + LIBS += \ + -L../build-librecoll-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release -llibrecoll \ + $$RECOLLDEPS/libxml2/libxml2-2.9.4+dfsg1/win32/bin.msvc/libxml2.lib \ + $$RECOLLDEPS/libxslt/libxslt-1.1.29/win32/bin.msvc/libxslt.lib \ + -L../build-libxapian-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release -llibxapian \ + $$RECOLLDEPS/zlib-1.2.11/zdll.lib \ + -L$$RECOLLDEPS/build-libiconv-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release -llibiconv \ + -lShell32 -lrpcrt4 -lws2_32 -luser32 -lshlwapi -lpsapi -lkernel32 + } INCLUDEPATH += ../../windows SOURCES += ../../windows/getopt.cc diff --git a/src/windows/qmkrecoll/recollq.pro b/src/windows/qmkrecoll/recollq.pro index cc91d078..527d5a1a 100644 --- a/src/windows/qmkrecoll/recollq.pro +++ b/src/windows/qmkrecoll/recollq.pro @@ -29,18 +29,16 @@ windows { contains(QMAKE_CC, cl){ # Visual Studio RECOLLDEPS = ../../../../recolldeps/msvc + PRE_TARGETDEPS = \ + ../build-librecoll-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release/librecoll.lib LIBS += \ - -L../build-librecoll-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release \ - -llibrecoll \ + -L../build-librecoll-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release -llibrecoll \ $$RECOLLDEPS/libxml2/libxml2-2.9.4+dfsg1/win32/bin.msvc/libxml2.lib \ $$RECOLLDEPS/libxslt/libxslt-1.1.29/win32/bin.msvc/libxslt.lib \ - -L../build-libxapian-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release \ - -llibxapian \ - -L$$RECOLLDEPS/build-libiconv-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release/ \ - -llibiconv \ + -L../build-libxapian-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release -llibxapian \ + -L$$RECOLLDEPS/build-libiconv-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release/ -llibiconv \ $$RECOLLDEPS/zlib-1.2.11/zdll.lib \ - -lrpcrt4 -lws2_32 -luser32 -lshell32 \ - -lshlwapi -lpsapi -lkernel32 + -lrpcrt4 -lws2_32 -luser32 -lshell32 -lshlwapi -lpsapi -lkernel32 } INCLUDEPATH += ../../windows diff --git a/tests/Maildir/Maildir.txt b/tests/Maildir/Maildir.txt index 334fd3e7..49a8343d 100644 --- a/tests/Maildir/Maildir.txt +++ b/tests/Maildir/Maildir.txt @@ -1 +1 @@ -FreqFor maildir_uniquexxx : 692 +FreqFor maildir_uniquexxx : 734 diff --git a/tests/config/mimeconf b/tests/config/mimeconf index 974d09ce..b38b5fff 100644 --- a/tests/config/mimeconf +++ b/tests/config/mimeconf @@ -8,4 +8,4 @@ # values is identical. [index] -application/x-tar = execm rcltar +application/x-tar = execm rcltar.py diff --git a/tests/embed/embed.txt b/tests/embed/embed.txt index 4c643b6c..6169d50a 100644 --- a/tests/embed/embed.txt +++ b/tests/embed/embed.txt @@ -1,2 +1,2 @@ 1 results -application/msword [file:///home/dockes/projets/fulltext/testrecoll/embed/thunderbirdlocalfolders.zip] [xingfx1.doc] 24576 bytes +application/msword [file:///home/dockes/projets/fulltext/testrecoll/embed/thunderbirdlocalfolders.zip] [xingfx1.doc (Sending a word document)] 24576 bytes diff --git a/tests/orgmode/orgmode.txt b/tests/orgmode/orgmode.txt index 805cbc48..3348afb1 100644 --- a/tests/orgmode/orgmode.txt +++ b/tests/orgmode/orgmode.txt @@ -1,2 +1,2 @@ 1 results -text/plain [file:///home/dockes/projets/fulltext/testrecoll/orgmode/orgmode-example.org] [law and legal code versioned on github] 370 bytes +text/x-orgmode-sub [file:///home/dockes/projets/fulltext/testrecoll/orgmode/orgmode-example.org] [law and legal code versioned on github] 487 bytes diff --git a/tests/rfc2231/rfc2231.txt b/tests/rfc2231/rfc2231.txt index f1d98a3f..38192a38 100644 --- a/tests/rfc2231/rfc2231.txt +++ b/tests/rfc2231/rfc2231.txt @@ -2,5 +2,5 @@ 0 results 0 results 2 results +application/octet-stream [file:///home/dockes/projets/fulltext/testrecoll/rfc2231/thunder] [épatantuniquefilenameterm.bin (vrai attach)] 5785 bytes message/rfc822 [file:///home/dockes/projets/fulltext/testrecoll/rfc2231/thunder] [vrai attach] 11208 bytes -application/octet-stream [file:///home/dockes/projets/fulltext/testrecoll/rfc2231/thunder] [épatantuniquefilenameterm.bin] 5785 bytes