From 8c122d7de8c75add8c88d5de2a0f729a8a8cb051 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Fri, 7 Jan 2022 10:04:06 +0100 Subject: [PATCH 01/19] buildppa: remove groovy --- packaging/debian/buildppa.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packaging/debian/buildppa.sh b/packaging/debian/buildppa.sh index 310777b8..5b59a6ef 100644 --- a/packaging/debian/buildppa.sh +++ b/packaging/debian/buildppa.sh @@ -85,7 +85,7 @@ done ### KIO. -series="bionic focal groovy hirsute impish" +#series="bionic focal hirsute impish" series= debdir=debiankio From 25d6d78902d65e9b3feda40bfad7a21bdcfcf324 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Fri, 7 Jan 2022 10:44:40 +0100 Subject: [PATCH 02/19] comments and justification --- src/qtgui/rclmain_w.h | 3 +-- src/qtgui/snippets_w.cpp | 52 ++++++++++++++------------------------- src/query/plaintorich.h | 5 ++-- src/rcldb/rclabstract.cpp | 11 +++------ src/rcldb/rcldb.cpp | 15 +++++------ src/rcldb/rclquery.cpp | 4 +-- 6 files changed, 34 insertions(+), 56 deletions(-) diff --git a/src/qtgui/rclmain_w.h b/src/qtgui/rclmain_w.h index 05899cb4..1c00932d 100644 --- a/src/qtgui/rclmain_w.h +++ b/src/qtgui/rclmain_w.h @@ -140,8 +140,7 @@ public slots: virtual void showActionsSearch(); virtual void startPreview(int docnum, Rcl::Doc doc, int keymods); virtual void startPreview(Rcl::Doc); - virtual void startNativeViewer(Rcl::Doc, int pagenum = -1, - QString term = QString()); + virtual void startNativeViewer(Rcl::Doc, int pagenum = -1, QString term = QString()); virtual void openWith(Rcl::Doc, string); virtual void saveDocToFile(Rcl::Doc); virtual void previewNextInTab(Preview *, int sid, int docnum); diff --git a/src/qtgui/snippets_w.cpp b/src/qtgui/snippets_w.cpp index 0e67a1bc..54c90e47 100644 --- a/src/qtgui/snippets_w.cpp +++ b/src/qtgui/snippets_w.cpp @@ -1,4 +1,4 @@ -/* Copyright (C) 2012 J.F.Dockes +/* Copyright (C) 2012-2021 J.F.Dockes * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -67,8 +67,7 @@ using namespace std; class PlainToRichQtSnippets : public PlainToRich { public: virtual string startMatch(unsigned int) { - return string(""); + return string(""); } virtual string endMatch() { return string(""); @@ -82,12 +81,10 @@ void SnippetsW::init() QPushButton *searchButton = new QPushButton(tr("Search")); searchButton->setAutoDefault(false); buttonBox->addButton(searchButton, QDialogButtonBox::ActionRole); -// setWindowFlags(Qt::WindowStaysOnTopHint); searchFM->hide(); onNewShortcuts(); - connect(&SCBase::scBase(), SIGNAL(shortcutsChanged()), - this, SLOT(onNewShortcuts())); + connect(&SCBase::scBase(), SIGNAL(shortcutsChanged()), this, SLOT(onNewShortcuts())); QPushButton *closeButton = buttonBox->button(QDialogButtonBox::Close); if (closeButton) @@ -105,11 +102,9 @@ void SnippetsW::init() browserw = new QWebView(this); verticalLayout->insertWidget(0, browserw); browser->setUrl(QUrl(QString::fromUtf8("about:blank"))); - connect(browser, SIGNAL(linkClicked(const QUrl &)), - this, SLOT(onLinkClicked(const QUrl &))); + connect(browser, SIGNAL(linkClicked(const QUrl &)), this, SLOT(onLinkClicked(const QUrl &))); browser->page()->setLinkDelegationPolicy(QWebPage::DelegateAllLinks); - browser->page()->currentFrame()->setScrollBarPolicy(Qt::Horizontal, - Qt::ScrollBarAlwaysOff); + browser->page()->currentFrame()->setScrollBarPolicy(Qt::Horizontal, Qt::ScrollBarAlwaysOff); QWEBSETTINGS *ws = browser->page()->settings(); if (prefs.reslistfontfamily != "") { ws->setFontFamily(QWEBSETTINGS::StandardFont, prefs.reslistfontfamily); @@ -136,8 +131,7 @@ void SnippetsW::init() #else browserw = new QTextBrowser(this); verticalLayout->insertWidget(0, browserw); - connect(browser, SIGNAL(anchorClicked(const QUrl &)), - this, SLOT(onLinkClicked(const QUrl &))); + connect(browser, SIGNAL(anchorClicked(const QUrl &)), this, SLOT(onLinkClicked(const QUrl &))); browser->setReadOnly(true); browser->setUndoRedoEnabled(false); browser->setOpenLinks(false); @@ -183,8 +177,7 @@ void SnippetsW::createPopupMenu(const QPoint& pos) { QMenu *popup = new QMenu(this); if (m_sortingByPage) { - popup->addAction(tr("Sort By Relevance"), this, - SLOT(reloadByRelevance())); + popup->addAction(tr("Sort By Relevance"), this, SLOT(reloadByRelevance())); } else { popup->addAction(tr("Sort By Page"), this, SLOT(reloadByPage())); } @@ -230,29 +223,22 @@ void SnippetsW::onSetDoc(Rcl::Doc doc, std::shared_ptr source) source->getTerms(hdata); ostringstream oss; - oss << - "" - ""; + oss << "" + ""; oss << "\n"; oss << qs2utf8s(prefs.darkreslistheadertext) << qs2utf8s(prefs.reslistheadertext); - oss << - "" - "" - "" - ; + oss << "
"; g_hiliter.set_inputhtml(false); bool nomatch = true; for (const auto& snippet : vpabs) { if (snippet.page == -1) { - oss << "" << endl; + oss << "" << "\n"; continue; } list lr; @@ -263,13 +249,12 @@ void SnippetsW::onSetDoc(Rcl::Doc doc, std::shared_ptr source) nomatch = false; oss << "" << endl; + oss << "" << "\n"; } - oss << "
" << - snippet.snippet << "
" << snippet.snippet << "
"; if (snippet.page > 0) { - oss << "" - << "P. " << snippet.page << ""; + oss << "" << + "P. " << snippet.page << ""; } - oss << "" << lr.front().c_str() << "
" << lr.front().c_str() << "
" << endl; + oss << "" << "\n"; if (nomatch) { oss.str("\n"); oss << qs2utf8s(tr("

Sorry, no exact match was found within limits. " @@ -278,12 +263,12 @@ void SnippetsW::onSetDoc(Rcl::Doc doc, std::shared_ptr source) } oss << "\n"; #if defined(USING_WEBKIT) || defined(USING_WEBENGINE) - browser->setHtml(QString::fromUtf8(oss.str().c_str())); + browser->setHtml(u8s2qs(oss.str())); #else browser->clear(); browser->append("."); browser->clear(); - browser->insertHtml(QString::fromUtf8(oss.str().c_str())); + browser->insertHtml(u8s2qs(oss.str())); browser->moveCursor (QTextCursor::Start); browser->ensureCursorVisible(); #endif @@ -354,8 +339,7 @@ void SnippetsW::onLinkClicked(const QUrl &url) string term; if (termpos != string::npos) term = ascurl.substr(termpos+1); - emit startNativeViewer(m_doc, page, - QString::fromUtf8(term.c_str())); + emit startNativeViewer(m_doc, page, u8s2qs(term)); return; } } diff --git a/src/query/plaintorich.h b/src/query/plaintorich.h index 9118ea5a..b86f649b 100644 --- a/src/query/plaintorich.h +++ b/src/query/plaintorich.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2004 J.F.Dockes +/* Copyright (C) 2004-2021 J.F.Dockes * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -60,8 +60,7 @@ public: * @param in raw text out of internfile. * @param out rich text output, divided in chunks (to help our caller * avoid inserting half tags into textedit which doesnt like it) - * @param in hdata terms and groups to be highlighted. These are - * lowercase and unaccented. + * @param in hdata terms and groups to be highlighted. See utils/hldata.h * @param chunksize max size of chunks in output list */ virtual bool plaintorich(const std::string &in, std::list &out, diff --git a/src/rcldb/rclabstract.cpp b/src/rcldb/rclabstract.cpp index 311ef760..04811a11 100644 --- a/src/rcldb/rclabstract.cpp +++ b/src/rcldb/rclabstract.cpp @@ -254,7 +254,7 @@ double Query::Native::qualityTerms(Xapian::docid docid, } -// Return page number for first match of "significant" term. +// Choose most interesting term and return the page number for its first match int Query::Native::getFirstMatchPage(Xapian::docid docid, string& term) { LOGDEB("Query::Native::getFirstMatchPage\n"); @@ -286,9 +286,7 @@ int Query::Native::getFirstMatchPage(Xapian::docid docid, string& term) qualityTerms(docid, terms, byQ); for (auto mit = byQ.rbegin(); mit != byQ.rend(); mit++) { - for (vector::const_iterator qit = mit->second.begin(); - qit != mit->second.end(); qit++) { - string qterm = *qit; + for (const auto& qterm : mit->second) { Xapian::PositionIterator pos; string emptys; try { @@ -619,9 +617,8 @@ int Query::Native::abstractFromIndex( // possibly retried by our caller. // // @param[out] vabs the abstract is returned as a vector of snippets. -int Query::Native::makeAbstract(Xapian::docid docid, - vector& vabs, - int imaxoccs, int ictxwords, bool sortbypage) +int Query::Native::makeAbstract( + Xapian::docid docid, vector& vabs, int imaxoccs, int ictxwords, bool sortbypage) { chron.restart(); LOGDEB("makeAbstract: docid " << docid << " imaxoccs " << diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index 557affcb..ceaec4e3 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -96,10 +96,13 @@ const string pathelt_prefix = "XP"; static const string udi_prefix("Q"); static const string parent_prefix("F"); -// Special terms to mark begin/end of field (for anchored searches), and -// page breaks +// Special terms to mark begin/end of field (for anchored searches). string start_of_field_term; string end_of_field_term; + +// Special term for page breaks. Note that we use a complicated mechanism for multiple page +// breaks at the same position, when it would have been probably simpler to use XXPG/n terms +// instead (did not try to implement though). A change would force users to reindex. const string page_break_term = "XXPG/"; // Special term to mark documents with children. @@ -1846,16 +1849,14 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc) } } - // If empty pages (multiple break at same pos) were recorded, save - // them (this is because we have no way to record them in the - // Xapian list + // If empty pages (multiple break at same pos) were recorded, save them (this is + // because we have no way to record them in the Xapian list) if (!tpidx.m_pageincrvec.empty()) { ostringstream multibreaks; for (unsigned int i = 0; i < tpidx.m_pageincrvec.size(); i++) { if (i != 0) multibreaks << ","; - multibreaks << tpidx.m_pageincrvec[i].first << "," << - tpidx.m_pageincrvec[i].second; + multibreaks << tpidx.m_pageincrvec[i].first << "," << tpidx.m_pageincrvec[i].second; } RECORD_APPEND(record, string(cstr_mbreaks), multibreaks.str()); } diff --git a/src/rcldb/rclquery.cpp b/src/rcldb/rclquery.cpp index 19b88f79..19b50c87 100644 --- a/src/rcldb/rclquery.cpp +++ b/src/rcldb/rclquery.cpp @@ -360,7 +360,6 @@ int Query::getFirstMatchPage(const Doc &doc, string& term) return m_reason.empty() ? pagenum : -1; } - // Mset size // Note: times for retrieving (multiple times)all docs from a sample // 25k docs db (q: mime:*) @@ -511,8 +510,7 @@ vector Query::expand(const Doc &doc) Xapian::ESet eset = m_nq->xenquire->get_eset(20, rset, false); LOGDEB("ESet terms:\n"); // We filter out the special terms - for (Xapian::ESetIterator it = eset.begin(); - it != eset.end(); it++) { + for (Xapian::ESetIterator it = eset.begin(); it != eset.end(); it++) { LOGDEB(" [" << (*it) << "]\n"); if ((*it).empty() || has_prefix(*it)) continue; From 7b4290744130aa5ca179c673cd22bb87cb9f6b58 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Fri, 7 Jan 2022 11:43:46 +0100 Subject: [PATCH 03/19] Add callback for textsplit to report line breaks. Use it to implement looking up the first line where a term appears to use with a %l spec for executing a viewer --- src/common/textsplit.cpp | 26 +++++---- src/common/textsplit.h | 3 + src/qtgui/rclm_view.cpp | 43 +++++++++----- src/query/docseq.h | 3 + src/query/docseqdb.cpp | 11 ++++ src/query/docseqdb.h | 1 + src/rcldb/rclabsfromtext.cpp | 109 +++++++++++++++++++++-------------- src/rcldb/rclquery.h | 12 +++- 8 files changed, 138 insertions(+), 70 deletions(-) diff --git a/src/common/textsplit.cpp b/src/common/textsplit.cpp index 49b234c0..be61c917 100644 --- a/src/common/textsplit.cpp +++ b/src/common/textsplit.cpp @@ -596,6 +596,7 @@ bool TextSplit::text_to_words(const string &in) clearsplitstate(); bool pagepending = false; + bool nlpending = false; bool softhyphenpending = false; // Running count of non-alphanum chars. Reset when we see one; @@ -705,6 +706,10 @@ bool TextSplit::text_to_words(const string &in) pagepending = false; newpage(m_wordpos); } + if (nlpending) { + nlpending = false; + newline(m_wordpos); + } break; case WILD: @@ -745,6 +750,12 @@ bool TextSplit::text_to_words(const string &in) break; } } else { + // Note about dangling hyphens: we always strip '-' found before whitespace, + // even before a newline, then generate two terms, before and after the line + // break. We have no way to know if '-' is there because a word was broken by + // justification or if it was part of an actual compound word (would need a + // dictionary to check). As soft-hyphen *should* be used if the '-' is not part + // of the text. if (nextc == -1 || isvisiblewhite(nextc)) { goto SPACE; } @@ -844,19 +855,10 @@ bool TextSplit::text_to_words(const string &in) break; case '\n': + nlpending = true; + /* FALLTHROUGH */ case '\r': - if (m_span.length() && *m_span.rbegin() == '-') { - // if '-' is the last char before end of line, we - // strip it. We have no way to know if this is added - // because of the line split or if it was part of an - // actual compound word (would need a dictionary to - // check). As soft-hyphen *should* be used if the '-' - // is not part of the text, it is better to properly - // process a real compound word, and produce wrong - // output from wrong text. The word-emitting routine - // will strip the trailing '-'. - goto SPACE; - } else if (softhyphenpending) { + if (softhyphenpending) { // Don't reset soft-hyphen continue; } else { diff --git a/src/common/textsplit.h b/src/common/textsplit.h index 0821ee04..c09e867f 100644 --- a/src/common/textsplit.h +++ b/src/common/textsplit.h @@ -73,6 +73,9 @@ public: * just don't know about pages. */ virtual void newpage(int /*pos*/) {} + /** Called when we encounter newline \n 0x0a. Override to use the event. */ + virtual void newline(int /*pos*/) {} + // Static utility functions: /** Count words in string, as the splitter would generate them */ diff --git a/src/qtgui/rclm_view.cpp b/src/qtgui/rclm_view.cpp index d9a75b90..6aa43e00 100644 --- a/src/qtgui/rclm_view.cpp +++ b/src/qtgui/rclm_view.cpp @@ -34,6 +34,7 @@ #include "rclmain_w.h" #include "rclzg.h" #include "pathut.h" +#include "unacpp.h" using namespace std; @@ -42,7 +43,6 @@ static const vector browser_list{ "opera", "google-chrome", "chromium-browser", "palemoon", "iceweasel", "firefox", "konqueror", "epiphany"}; - // Start native viewer or preview for input Doc. This is used to allow // using recoll from another app (e.g. Unity Scope) to view embedded // result docs (docs with an ipath). . We act as a proxy to extract @@ -155,13 +155,27 @@ void RclMain::openWith(Rcl::Doc doc, string cmdspec) execViewer(subs, false, execname, lcmd, cmdspec, doc); } -void RclMain::startNativeViewer(Rcl::Doc doc, int pagenum, QString term) +static bool pagenumNeeded(const std::string& cmd) { + return cmd.find("%p") != std::string::npos; +} +static bool linenumNeeded(const std::string& cmd) +{ + return cmd.find("%l") != std::string::npos; +} +static bool termNeeded(const std::string& cmd) +{ + return cmd.find("%s") != std::string::npos; +} + +void RclMain::startNativeViewer(Rcl::Doc doc, int pagenum, QString qterm) +{ + std::string term = qs2utf8s(qterm); string apptag; doc.getmeta(Rcl::Doc::keyapptg, &apptag); LOGDEB("RclMain::startNativeViewer: mtype [" << doc.mimetype << "] apptag [" << apptag << "] page " << pagenum << " term [" << - qs2utf8s(term) << "] url [" << doc.url << "] ipath [" << + term << "] url [" << doc.url << "] ipath [" << doc.ipath << "]\n"); // Look for appropriate viewer @@ -377,19 +391,19 @@ void RclMain::startNativeViewer(Rcl::Doc doc, int pagenum, QString term) // If we are not called with a page number (which would happen for a call // from the snippets window), see if we can compute a page number anyway. - if (pagenum == -1) { - pagenum = 1; - string lterm; - if (m_source) - pagenum = m_source->getFirstMatchPage(doc, lterm); + if (m_source && pagenum == -1 && (pagenumNeeded(cmd) || termNeeded(cmd)|| linenumNeeded(cmd))) { + pagenum = m_source->getFirstMatchPage(doc, term); if (pagenum == -1) pagenum = 1; - else // We get the match term used to compute the page - term = QString::fromUtf8(lterm.c_str()); } - char cpagenum[20]; - sprintf(cpagenum, "%d", pagenum); + int line = 1; + if (m_source && !term.empty() && linenumNeeded(cmd)) { + if (doc.text.empty()) { + rcldb->getDocRawText(doc); + } + line = m_source->getFirstMatchLine(doc, term); + } // Substitute %xx inside arguments string efftime; @@ -408,9 +422,10 @@ void RclMain::startNativeViewer(Rcl::Doc doc, int pagenum, QString term) subs["f"] = fn; subs["F"] = fn; subs["i"] = FileInterner::getLastIpathElt(doc.ipath); + subs["l"] = ulltodecstr(line); subs["M"] = doc.mimetype; - subs["p"] = cpagenum; - subs["s"] = (const char*)term.toLocal8Bit(); + subs["p"] = ulltodecstr(pagenum); + subs["s"] = term; subs["U"] = url_encode(url); subs["u"] = url; // Let %(xx) access all metadata. diff --git a/src/query/docseq.h b/src/query/docseq.h index 4dd6f50f..650b9d89 100644 --- a/src/query/docseq.h +++ b/src/query/docseq.h @@ -111,6 +111,9 @@ public: virtual int getFirstMatchPage(Rcl::Doc&, std::string&) { return -1; } + virtual int getFirstMatchLine(const Rcl::Doc&, const std::string&) { + return 1; + } /** Get duplicates. */ virtual bool docDups(const Rcl::Doc&, std::vector&) { return false; diff --git a/src/query/docseqdb.cpp b/src/query/docseqdb.cpp index fab028bd..df06c6a3 100644 --- a/src/query/docseqdb.cpp +++ b/src/query/docseqdb.cpp @@ -126,6 +126,17 @@ int DocSequenceDb::getFirstMatchPage(Rcl::Doc &doc, string& term) return -1; } +int DocSequenceDb::getFirstMatchLine(const Rcl::Doc &doc, const string& term) +{ + std::unique_lock locker(o_dblock); + if (!setQuery()) + return false; + if (m_q->whatDb()) { + return m_q->getFirstMatchLine(doc, term); + } + return 1; +} + list DocSequenceDb::expand(Rcl::Doc &doc) { std::unique_lock locker(o_dblock); diff --git a/src/query/docseqdb.h b/src/query/docseqdb.h index 69535d79..b77051b4 100644 --- a/src/query/docseqdb.h +++ b/src/query/docseqdb.h @@ -43,6 +43,7 @@ public: virtual bool getAbstract(Rcl::Doc &doc, std::vector&) override; virtual int getFirstMatchPage(Rcl::Doc&, std::string& term) override; + virtual int getFirstMatchLine(const Rcl::Doc&, const std::string& term) override; virtual bool docDups(const Rcl::Doc& doc, std::vector& dups) override; virtual std::string getDescription() override; diff --git a/src/rcldb/rclabsfromtext.cpp b/src/rcldb/rclabsfromtext.cpp index 32783b1f..4195b1c4 100644 --- a/src/rcldb/rclabsfromtext.cpp +++ b/src/rcldb/rclabsfromtext.cpp @@ -141,11 +141,9 @@ public: // add/update fragment definition. virtual bool takeword(const std::string& term, int pos, int bts, int bte) { LOGDEB1("takeword: [" << term << "] bytepos: "< maxtermcount) { LOGINF("Rclabsfromtext: stopping because maxtermcount reached: "<< maxtermcount << endl); @@ -154,8 +152,7 @@ public: } // Also limit the number of fragments (just in case safety) if (m_fragments.size() > maxtermcount / 100) { - LOGINF("Rclabsfromtext: stopping because maxfragments reached: "<< - maxtermcount/100 << endl); + LOGINF("Rclabsfromtext: stopping: max fragments count: " << maxtermcount/100 << "\n"); retflags |= ABSRES_TRUNC; return false; } @@ -193,8 +190,7 @@ public: m_curterm = term; m_curtermcoef = coef; } else { - LOGDEB2("Extending current fragment: " << m_remainingWords << - " -> " << m_ctxwords << endl); + LOGDEB2("Extending current fragment: "< "< 5) { - // Limit expansion of contiguous fragments (this is to - // avoid common terms in search causing long - // heavyweight meaningless fragments. Also, limit length). + // Limit expansion of contiguous fragments (this is to avoid common terms in search + // causing long heavyweight meaningless fragments. Also, limit length). m_remainingWords = 1; m_extcount = 0; } @@ -247,18 +242,14 @@ public: LOGDEB1("FRAGMENT: from byte " << m_curfrag.first << " to byte " << m_curfrag.second << endl); LOGDEB1("FRAGMENT TEXT [" << m_rawtext.substr( - m_curfrag.first, m_curfrag.second-m_curfrag.first) - << "]\n"); - // We used to not push weak fragments if we had a lot - // already. This can cause problems if the fragments - // we drop are actually group fragments (which have - // not got their boost yet). The right cut value is - // difficult to determine, because the absolute values - // of the coefs depend on many things (index size, - // etc.) The old test was if (m_totalcoef < 5.0 || - // m_curfragcoef >= 1.0) We now just avoid creating a - // monster by testing the current fragments count at - // the top of the function + m_curfrag.first, m_curfrag.second-m_curfrag.first) << "]\n"); + // We used to not push weak fragments if we had a lot already. This can cause + // problems if the fragments we drop are actually group fragments (which have not + // got their boost yet). The right cut value is difficult to determine, because the + // absolute values of the coefs depend on many things (index size, etc.) The old + // test was if (m_totalcoef < 5.0 || m_curfragcoef >= 1.0) We now just avoid + // creating a monster by testing the current fragments count at the top of the + // function m_fragments.push_back(MatchFragment(m_curfrag.first, m_curfrag.second, m_curfragcoef, @@ -298,8 +289,7 @@ public: m_curtermcoef = 0.0; } - LOGDEB("TextSplitABS: stored total " << m_fragments.size() << - " fragments" << endl); + LOGDEB("TextSplitABS: stored total " << m_fragments.size() << " fragments" << endl); vector tboffs; // Look for matches to PHRASE and NEAR term groups and finalize @@ -340,9 +330,8 @@ public: } auto fragit = m_fragments.begin(); for (const auto& grpmatch : tboffs) { - LOGDEB2("LOOKING FOR FRAGMENT: group: " << grpmatch.offs.first << - "-" << grpmatch.offs.second << " curfrag " << - fragit->start << "-" << fragit->stop << endl); + LOGDEB2("LOOKING FOR FRAGMENT: group: " << grpmatch.offs.first << "-" << + grpmatch.offs.second<<" curfrag "<start<<"-"<stop<<"\n"); while (fragit->stop < grpmatch.offs.first) { fragit++; if (fragit == m_fragments.end()) { @@ -417,21 +406,19 @@ int Query::Native::abstractFromText( bool sortbypage ) { - (void)chron; + PRETEND_USE(chron); LOGABS("abstractFromText: entry: " << chron.millis() << "mS\n"); string rawtext; if (!ndb->getRawText(docid, rawtext)) { LOGDEB0("abstractFromText: can't fetch text\n"); return ABSRES_ERROR; } - LOGABS("abstractFromText: got raw text: size " << rawtext.size() << " " << - chron.millis() << "mS\n"); + LOGABS("abstractFromText: got raw text: size "<m_snipMaxPosWalk); splitter.text_to_words(rawtext); LOGABS("abstractFromText: text_to_words: " << chron.millis() << "mS\n"); @@ -484,8 +470,7 @@ int Query::Native::abstractFromText( // main term and the page positions. unsigned int count = 0; for (const auto& entry : result) { - string frag( - fixfrag(rawtext.substr(entry.start, entry.stop - entry.start))); + string frag(fixfrag(rawtext.substr(entry.start, entry.stop - entry.start))); #ifdef COMPUTE_HLZONES // This would need to be modified to take tag parameters @@ -506,8 +491,7 @@ int Query::Native::abstractFromText( if (page < 0) page = 0; } - LOGDEB0("=== FRAGMENT: p. " << page << " Coef: " << entry.coef << - ": " << frag << endl); + LOGDEB0("=== FRAGMENT: p. " << page << " Coef: " << entry.coef << ": " << frag << endl); vabs.push_back(Snippet(page, frag).setTerm(entry.term)); if (count++ >= maxtotaloccs) break; @@ -515,4 +499,45 @@ int Query::Native::abstractFromText( return ABSRES_OK | splitter.getretflags(); } +class TermLineSplitter : public TextSplit { +public: + TermLineSplitter(const std::string& term) + : TextSplit(TextSplit::TXTS_NOSPANS), m_term(term) { + } + bool takeword(const std::string& _term, int, int, int) override { + std::string term; + if (o_index_stripchars) { + if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) { + LOGINFO("PlainToRich::takeword: unac failed for [" << term << "]\n"); + return true; + } + } + if (term == m_term) { + return false; + } + return true; + } + void newline(int) override { + m_line++; + } + int getline() { + return m_line; + } +private: + int m_line{1}; + std::string m_term; +}; + +int Query::getFirstMatchLine(const Doc &doc, const std::string& term) +{ + int line = 1; + TermLineSplitter splitter(term); + bool ret = splitter.text_to_words(doc.text); + // The splitter takeword() breaks by returning false as soon as the term is found + if (ret == false) { + line = splitter.getline(); + } + return line; +} + } diff --git a/src/rcldb/rclquery.h b/src/rcldb/rclquery.h index cade3650..fd8874d3 100644 --- a/src/rcldb/rclquery.h +++ b/src/rcldb/rclquery.h @@ -115,10 +115,18 @@ public: // Returned as a vector of pair page is 0 if unknown int makeDocAbstract(const Doc &doc, std::vector& abst, int maxoccs= -1, int ctxwords= -1,bool sortbypage=false); - /** Retrieve page number for first match for "significant" query term - * @param term returns the chosen term */ + + /** Choose most interesting term and return the page number for its first match + * @param term returns the chosen term + * @return page number or -1 if term not found or other issue + */ int getFirstMatchPage(const Doc &doc, std::string& term); + /** Compute line number for first match of term. Only works if doc.text has text. + * This uses a text split. Both this and the above getFirstMaxPage() could be done and saved + * while we compute the abstracts, quite a lot of waste here. */ + int getFirstMatchLine(const Doc &doc, const std::string& term); + /** Retrieve a reference to the searchData we are using */ std::shared_ptr getSD() { return m_sd; From f8e556fceea75436d1a84e807d4e153be7cbea9c Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Fri, 7 Jan 2022 11:52:06 +0100 Subject: [PATCH 04/19] Documentation for %l --- src/doc/user/usermanual.html | 27 +++++++++++++++++------- src/doc/user/usermanual.xml | 41 ++++++++++++++++++++++-------------- 2 files changed, 44 insertions(+), 24 deletions(-) diff --git a/src/doc/user/usermanual.html b/src/doc/user/usermanual.html index c538dfa8..e679932c 100644 --- a/src/doc/user/usermanual.html +++ b/src/doc/user/usermanual.html @@ -10758,17 +10758,28 @@ other = rclcat:other

  • %p. Page index. Only significant for a subset of document types, currently only PDF, - Postscript and DVI files. Can be used to start the - editor at the right page for a match or - snippet.

    + Postscript and DVI files. If it is set, a + significant term will be chosen in the query, and + %p will be substituted with the first page where + the term appears. Can be used to start the editor + at the right page for a match or snippet.

    +
  • +
  • +

    %l. Line number. Only significant + for document types with relevant line breaks, + mostly text/plain and analogs. If it is set, a + significant term will be chosen in the query, and + %p will be substituted with the first line where + the term appears.

  • %s. Search term. The value will only - be set for documents with indexed page numbers (ie: - PDF). The value will be one of the matched search - terms. It would allow pre-setting the value in the - "Find" entry inside Evince for example, for easy - highlighting of the term.

    + be set for documents with indexed page or line + numbers and if %p or %l is also used. The value + will be one of the matched search terms. It would + allow pre-setting the value in the "Find" entry + inside Evince for example, for easy highlighting of + the term.

  • %u. Url.

    diff --git a/src/doc/user/usermanual.xml b/src/doc/user/usermanual.xml index 168a7a4c..cc2e81f7 100644 --- a/src/doc/user/usermanual.xml +++ b/src/doc/user/usermanual.xml @@ -5,7 +5,7 @@ Recoll"> http://www.recoll.org/pages/features.html"> - + Xapian"> Windows"> Unix-like systems"> @@ -7114,28 +7114,37 @@ other = rclcat:other (possibly a script) to be able to handle it. - %M - MIME type + + %MMIME type - %p - Page index. Only significant for a subset of document - types, currently only PDF, Postscript and DVI files. Can be - used to start the editor at the right page for a match or - snippet. + + %pPage index. Only significant for a subset of + document types, currently only PDF, Postscript and DVI files. If it is set, a + significant term will be chosen in the query, and %p will be substituted with the + first page where the term appears. Can be used to start the editor at the right page + for a match or snippet. - %s - Search term. The value will only be set for documents - with indexed page numbers (ie: PDF). The value will be one of - the matched search terms. It would allow pre-setting the - value in the "Find" entry inside Evince for example, for easy - highlighting of the term. + + %lLine number. Only significant for document + types with relevant line breaks, mostly text/plain and analogs. If it is set, a + significant term will be chosen in the query, and %p will be substituted with the + first line where the term appears. - %u - Url. + + %sSearch term. The value will only be set for + documents with indexed page or line numbers and if %p or %l is also used. The value + will be one of the matched search terms. It would allow pre-setting the value in the + "Find" entry inside Evince for example, for easy highlighting of the + term. + + + %uUrl. + + In addition to the predefined values above, all strings like From 9eebfd24ec0e29e8db75c64f35168ab09cd1517e Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Sat, 8 Jan 2022 14:54:23 +0100 Subject: [PATCH 05/19] none --- src/testmains/trexecmd.cpp | 384 ------------------------------------- 1 file changed, 384 deletions(-) delete mode 100644 src/testmains/trexecmd.cpp diff --git a/src/testmains/trexecmd.cpp b/src/testmains/trexecmd.cpp deleted file mode 100644 index 94813d38..00000000 --- a/src/testmains/trexecmd.cpp +++ /dev/null @@ -1,384 +0,0 @@ -#include "autoconfig.h" - -#include "execmd.h" - -#include -#include -#include "safeunistd.h" -#include -#ifndef _WIN32 -#include -#endif - -#include -#include -#include -#include - -#include "log.h" -#include "cancelcheck.h" -#include "execmd.h" -#include "smallut.h" - -using namespace std; - -// Testing the rclexecm protocol outside of recoll. Here we use the -// rcldoc.py filter, you can try with rclaudio too, adjust the file -// arg accordingly. This simplified driver only really works with -// single-doc files (else it extracts only the first doc, usually the -// empty self-doc). -bool exercise_mhexecm(const string& cmdstr, const string& mimetype, - vector& files) -{ - if (files.empty()) - return false; - - ExecCmd cmd; - vector myparams; - -#ifdef _WIN32 - // Hack for windows: the command is always "Python somescript" - myparams.push_back(files[0]); - files.erase(files.begin()); -#endif - - if (cmd.startExec(cmdstr, myparams, 1, 1) < 0) { - cerr << "startExec " << cmdstr << " failed. Missing command?\n"; - return false; - } - - for (vector::const_iterator it = files.begin(); - it != files.end(); it++) { - // Build request message - ostringstream obuf; - obuf << "Filename: " << (*it).length() << "\n" << (*it); - obuf << "Mimetype: " << mimetype.length() << "\n" << mimetype; - // Bogus parameter should be skipped by filter - obuf << "BogusParam: " << string("bogus").length() << "\n" << "bogus"; - obuf << "\n"; - cerr << "SENDING: [" << obuf.str() << "]\n"; - // Send it - if (cmd.send(obuf.str()) < 0) { - // The real code calls zapchild here, but we don't need it as - // this will be handled by ~ExecCmd - //cmd.zapChild(); - cerr << "send error\n"; - return false; - } - - // Read answer - for (int loop=0;;loop++) { - string name, data; - - // Code from mh_execm.cpp: readDataElement - string ibuf; - // Read name and length - if (cmd.getline(ibuf) <= 0) { - cerr << "getline error\n"; - return false; - } - // Empty line (end of message) - if (!ibuf.compare("\n")) { - cerr << "Got empty line\n"; - name.clear(); - break; - } - - // Filters will sometimes abort before entering the real - // protocol, ie if a module can't be loaded. Check the - // special filter error first word: - if (ibuf.find("RECFILTERROR ") == 0) { - cerr << "Got RECFILTERROR\n"; - return false; - } - - // We're expecting something like Name: len\n - vector tokens; - stringToTokens(ibuf, tokens); - if (tokens.size() != 2) { - cerr << "bad line in filter output: [" << ibuf << "]\n"; - return false; - } - vector::iterator it = tokens.begin(); - name = *it++; - string& slen = *it; - int len; - if (sscanf(slen.c_str(), "%d", &len) != 1) { - cerr << "bad line in filter output (no len): [" << - ibuf << "]\n"; - return false; - } - // Read element data - data.erase(); - if (len > 0 && cmd.receive(data, len) != len) { - cerr << "MHExecMultiple: expected " << len << - " bytes of data, got " << data.length() << endl; - return false; - } - - // Empty element: end of message - if (name.empty()) - break; - cerr << "Got name: [" << name << "] data [" << data << "]\n"; - } - } - return true; -} - -static char *thisprog; -static char usage [] = -"trexecmd [-c -r -i -o] [-e ] cmd [arg1 arg2 ...]\n" -" -c : test cancellation (ie: trexecmd -c sleep 1000)\n" -" -r : run reexec. Must be separate option.\n" -" -i : command takes input\n" -" -o : command produces output\n" -" -e : send stderr to file named fn (will truncate it)\n" -" If -i is set, we send /etc/group contents to whatever command is run\n" -" If -o is set, we print whatever comes out\n" -"trexecmd -f bogus filter for testing. Uses same options\n" -"trexecmd -m [file ...]: test execm:\n" -" should be the path to an execm filter\n" -" the type of the file parameters\n" -"trexecmd -w cmd : do the 'which' thing\n" - ; - -static void Usage(FILE *fp = stderr) -{ - fprintf(fp, "%s: usage:\n%s", thisprog, usage); - exit(1); -} - -static int op_flags; -#define OPT_MOINS 0x1 -#define OPT_i 0x4 -#define OPT_w 0x8 -#define OPT_c 0x10 -#define OPT_r 0x20 -#define OPT_m 0x40 -#define OPT_o 0x80 -#define OPT_e 0x100 -#define OPT_f 0x200 - -void childfilter() -{ - const int bs = 1024; - char buf[bs]; - if (op_flags & OPT_c) - sleep(2000); - if (op_flags& OPT_i) { - while (read(0, buf, bs) > 0); - } - if (op_flags& OPT_o) { - for (int i = 0; i < 10; i++) { - printf("This is DATA 1 2 3\n"); - } - } - exit(0); -} - -// Data sink for data coming out of the command. We also use it to set -// a cancellation after a moment. -class MEAdv : public ExecCmdAdvise { -public: - void newData(int cnt) { - cerr << "newData(" << cnt << ")" << endl; - if (op_flags & OPT_c) { - static int callcnt; - if (callcnt++ == 5) { - // Just sets the cancellation flag - CancelCheck::instance().setCancel(); - // Would be called from somewhere else and throws an - // exception. We call it here for simplicity - cerr << "newData: should throw !\n"; - CancelCheck::instance().checkCancel(); - } - } - } -}; - -// Data provider, used if the -i flag is set -class MEPv : public ExecCmdProvide { -public: - string *m_input; - int m_cnt; - MEPv(string *i) - : m_input(i), m_cnt(0) { - } - ~MEPv() { - } - void newData() { - if (m_cnt++ < 10) { - char num[30]; - sprintf(num, "%d", m_cnt); - *m_input = string("This is an input chunk ") + string(num) + - string("\n"); - } else { - m_input->erase(); - } - } - void reset() { - m_cnt = 0; - } -}; - - - -ReExec reexec; -int main(int argc, char *argv[]) -{ -#ifndef _WIN32 - reexec.init(argc, argv); - - if (0) { - // Disabled: For testing reexec arg handling - vector newargs; - newargs.push_back("newarg"); - newargs.push_back("newarg1"); - newargs.push_back("newarg2"); - newargs.push_back("newarg3"); - newargs.push_back("newarg4"); - reexec.insertArgs(newargs, 2); - } -#endif - - string stderrFile; - thisprog = argv[0]; - argc--; argv++; - - while (argc > 0 && **argv == '-') { - (*argv)++; - if (!(**argv)) - /* Cas du "adb - core" */ - Usage(); - while (**argv) - switch (*(*argv)++) { - case 'c': op_flags |= OPT_c; break; - case 'e': - op_flags |= OPT_e; - if (argc < 2) { - Usage(); - } - stderrFile = *(++argv); argc--; - goto b1; - - case 'f': op_flags |= OPT_f; break; - case 'h': - for (int i = 0; i < 10; i++) { - cout << "MESSAGE " << i << " FROM TREXECMD\n"; - cout.flush(); - //sleep(1); - } - return 0; - case 'i': op_flags |= OPT_i; break; - case 'o': op_flags |= OPT_o; break; - case 'm': op_flags |= OPT_m; break; - case 'r': op_flags |= OPT_r; break; - case 'w': op_flags |= OPT_w; break; - default: Usage(); break; - } - b1: argc--; argv++; - } - - if (op_flags & OPT_f) { - childfilter(); - } - - if (argc < 1) - Usage(); - - string arg1 = *argv++; argc--; - vector l; - while (argc > 0) { - l.push_back(*argv++); argc--; - } - - DebugLog::getdbl()->setloglevel(DEBDEB1); - DebugLog::setfilename("stderr"); -#ifndef _WIN32 - signal(SIGPIPE, SIG_IGN); - - if (op_flags & OPT_r) { - // Test reexec. Normally only once, next time we fall through - // because we remove the -r option (only works if it was - // isolated, not like -rc - chdir("/"); - argv[0] = strdup(""); - sleep(1); - cerr << "Calling reexec\n"; - // We remove the -r arg from list, otherwise we are going to - // loop (which you can try by commenting out the following - // line) - reexec.removeArg("-r"); - reexec.reexec(); - } -#endif - - - if (op_flags & OPT_w) { - // Test "which" method - string path; - if (ExecCmd::which(arg1, path)) { - cout << path << endl; - return 0; - } - return 1; - } else if (op_flags & OPT_m) { - if (l.size() < 2) - Usage(); - string mimetype = l[0]; - l.erase(l.begin()); - return exercise_mhexecm(arg1, mimetype, l) ? 0 : 1; - } else { - // Default: execute command line arguments - ExecCmd mexec; - - // Set callback to be called whenever there is new data - // available and at a periodic interval, to check for - // cancellation - MEAdv adv; - mexec.setAdvise(&adv); - //mexec.setTimeout(5); - // Stderr output goes there - if (!stderrFile.empty()) - mexec.setStderr(stderrFile); - - // A few environment variables. Check with trexecmd env - mexec.putenv("TESTVARIABLE1=TESTVALUE1"); - mexec.putenv("TESTVARIABLE2=TESTVALUE2"); - mexec.putenv("TESTVARIABLE3=TESTVALUE3"); - - string input, output; - MEPv pv(&input); - - string *ip = 0; - if (op_flags & OPT_i) { - ip = &input; - mexec.setProvide(&pv); - } - string *op = 0; - if (op_flags & OPT_o) { - op = &output; - } - - int status = -1; - for (int i = 0; i < 10; i++) { - output.clear(); - pv.reset(); - try { - status = mexec.doexec(arg1, l, ip, op); - } catch (CancelExcept) { - cerr << "CANCELLED" << endl; - } - //fprintf(stderr, "Status: 0x%x\n", status); - if (op_flags & OPT_o) { - cout << "data received: [" << output << "]\n"; - cerr << "iter " << i << " status " << - status << " bytes received " << output.size() << endl; - } - if (status) - break; - } - return status >> 8; - } -} - From 03378c55a487fdd799452e7b010b2d4dfc60c831 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Sat, 8 Jan 2022 15:33:30 +0100 Subject: [PATCH 06/19] none --- src/testmains/trpathut.cpp | 105 ---------------- src/testmains/trutf8iter.cpp | 232 ----------------------------------- 2 files changed, 337 deletions(-) delete mode 100644 src/testmains/trpathut.cpp delete mode 100644 src/testmains/trutf8iter.cpp diff --git a/src/testmains/trpathut.cpp b/src/testmains/trpathut.cpp deleted file mode 100644 index 8d4b4be4..00000000 --- a/src/testmains/trpathut.cpp +++ /dev/null @@ -1,105 +0,0 @@ -#include "pathut.h" - -#include -#include -#include - -#include -#include - -using namespace std; - -static std::map options { - {"path_home", 0}, - {"path_tildexpand", 0}, - {"listdir", 0}, - {"url_encode", 0}, - }; - -static const char *thisprog; -static void Usage(void) -{ - string sopts; - for (const auto& opt: options) { - sopts += "--" + opt.first + "\n"; - } - fprintf(stderr, "%s: usage: %s\n%s", thisprog, thisprog, sopts.c_str()); - exit(1); -} - -int main(int argc, char **argv) -{ - thisprog = *argv; - std::vector long_options; - - for (auto& entry : options) { - struct option opt; - opt.name = entry.first.c_str(); - opt.has_arg = 0; - opt.flag = &entry.second; - opt.val = 1; - long_options.push_back(opt); - } - long_options.push_back({0, 0, 0, 0}); - - while (getopt_long(argc, argv, "", &long_options[0], nullptr) != -1) { - } - if (options["path_home"]) { - if (optind != argc) { - cerr << "Usage: trsmallut --path_home\n"; - return 1; - } - cout << "path_home() -> [" << path_home() << "]\n"; - } else if (options["path_tildexpand"]) { - if (optind >= argc) { - cerr << "Usage: trsmallut --path_tildexpand \n"; - return 1; - } - string s = argv[optind]; - optind++; - if (optind != argc) { - return 1; - } - cout << "path_tildexpand(" << s << ") -> [" << path_tildexpand(s) << "]\n"; - } else if (options["url_encode"]) { - if (optind >= argc) { - cerr << "Usage: trsmallut --url_encode [offs=0]\n"; - return 1; - } - string s = argv[optind]; - optind++; - int offs = 0; - if (optind != argc) { - offs = atoi(argv[optind]); - optind++; - } - if (optind != argc) { - return 1; - } - cout << "url_encode(" << s << ", " << offs << ") -> [" << url_encode(s, offs) << "]\n"; - } else if (options["listdir"]) { - if (optind >= argc) { - cerr << "Usage: trsmallut --listdir \n"; - return 1; - } - std::string path = argv[optind]; - optind++; - if (optind != argc) { - cerr << "Usage: trsmallut --listdir \n"; - return 1; - } - std::string reason; - std::set entries; - if (!listdir(path, reason, entries)) { - std::cerr<< "listdir(" << path << ") failed : " << reason << "\n"; - return 1; - } - for (const auto& entry : entries) { - cout << entry << "\n"; - } - } else { - Usage(); - } - - return 0; -} diff --git a/src/testmains/trutf8iter.cpp b/src/testmains/trutf8iter.cpp deleted file mode 100644 index 8e83374b..00000000 --- a/src/testmains/trutf8iter.cpp +++ /dev/null @@ -1,232 +0,0 @@ -/* Copyright (C) 2005 J.F.Dockes - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - */ -#include -#include - -#include -#include -#include - - -#include "log.h" -#include "transcode.h" - -#ifndef NO_NAMESPACES -using namespace std; -#endif /* NO_NAMESPACES */ - -#define UTF8ITER_CHECK -#include "utf8iter.h" -#include "readfile.h" -#include "textsplit.h" - -void tryempty() -{ - Utf8Iter it(""); - cout << "EOF ? " << it.eof() << endl; - TextSplit::isCJK(*it); - exit(0); -} - -const char *thisprog; -static char usage [] = -"utf8iter [opts] infile outfile\n" -" converts infile to 32 bits unicode (processor order), for testing\n" -" -v : print stuff as we go\n" -"-t [-w] [-e] : test truncation\n" -"-c : str must be a single utf-8 char. Convert to code then show character bytes count\n" -; - -void Usage() { - fprintf(stderr, "%s:%s\n", thisprog, usage); - exit(1); -} -static int op_flags; -#define OPT_v 0x2 -#define OPT_t 0x4 -#define OPT_w 0x8 -#define OPT_e 0x10 -#define OPT_c 0x20 - -int trytruncate(std::string s, int maxlen) -{ - int flag = 0; - if (op_flags & OPT_w) - flag |= UTF8T_ATWORD; - if (op_flags & OPT_e) - flag |= UTF8T_ELLIPSIS; - utf8truncate(s, maxlen, flag); - std::cout << "Truncation result:[" << s << "]\n"; - return 0; -} - -FILE *infout = stdout; -int main(int argc, char **argv) -{ - thisprog = argv[0]; - argc--; argv++; - - while (argc > 0 && **argv == '-') { - (*argv)++; - if (!(**argv)) - Usage(); - while (**argv) - switch (*(*argv)++) { - case 'e': op_flags |= OPT_e;break; - case 't': op_flags |= OPT_t;break; - case 'v': op_flags |= OPT_v;break; - case 'w': op_flags |= OPT_w;break; - case 'c': op_flags |= OPT_c;break; - default: Usage(); break; - } - argc--;argv++; - } - - if (op_flags & OPT_c) { - if (argc != 1) - Usage(); - std::string s = *argv++;argc--; - Utf8Iter uit(s); - auto code = *uit; - auto cnt = utf8codepointsize(code); - std::cout << "0x" << std::hex << code << std::dec << " : " << cnt << " byte" << - (cnt>1?"s":"") << "\n"; - return 0; - } - - if (op_flags & OPT_t) { - if (argc < 2) - Usage(); - std::string s = *argv++;argc--; - int maxlen = atoi(*argv++);argc--; - return trytruncate(s, maxlen); - } - - string infile, outfile; - if (argc == 2) { - infile = *argv++;argc--; - outfile = *argv++;argc--; - Usage(); - } else if (argc != 0) { - Usage(); - } - string in; - if (!file_to_string(infile, in)) { - cerr << "Cant read file\n" << endl; - exit(1); - } - - vectorucsout1; - string out, out1; - Utf8Iter it(in); - FILE *fp = 0; - if (!outfile.empty()) { - fp = fopen(outfile.c_str(), "w"); - if (fp == 0) { - cerr << "Can't create " << outfile << endl; - exit(1); - } - } - - int nchars = 0; - for (;!it.eof(); it++) { - unsigned int value = *it; - if (value == (unsigned int)-1) { - cerr << "Conversion error occurred at position " << it.getBpos() - << endl; - exit(1); - } - if (op_flags & OPT_v) { - fprintf(infout, "Value: 0x%04x", value); - if (value < 0x7f) - fprintf(stdout, " (%c) ", value); - fprintf(infout, "\n"); - } - // UTF-32LE or BE array - ucsout1.push_back(value); - if (fp) { - // UTF-32LE or BE file - fwrite(&value, 4, 1, fp); - } - - // Reconstructed utf8 strings (2 methods) - if (!it.appendchartostring(out)) - break; - // conversion to string - out1 += it; - - // fprintf(stderr, "%s", string(it).c_str()); - nchars++; - } - if (fp) { - fclose(fp); - } - - fprintf(infout, "Found %d Unicode characters\n", nchars); - if (in.compare(out)) { - fprintf(stderr, "error: out != in\n"); - exit(1); - } - if (in != out1) { - fprintf(stderr, "error: out1 != in\n"); - exit(1); - } - - // Rewind and do it a second time - vectorucsout2; - it.rewind(); - for (int i = 0; ; i++) { - unsigned int value; - if ((value = it[i]) == (unsigned int)-1) { - break; - } - it++; - ucsout2.push_back(value); - } - - if (ucsout1 != ucsout2) { - fprintf(stderr, "error: ucsout1 != ucsout2\n"); - exit(1); - } - - ucsout2.clear(); - int ercnt; - const char *encoding = "UTF-32LE"; // note : use BE on high-endian machine - string ucs, ucs1; - for (const unsigned int i : ucsout1) { - ucs.append((const char *)&i, 4); - } - if (!transcode(ucs, ucs1, encoding, encoding, &ercnt) || ercnt) { - fprintf(stderr, "Transcode check failed, ercount: %d\n", ercnt); - exit(1); - } - if (ucs.compare(ucs1)) { - fprintf(stderr, "error: ucsout1 != ucsout2 after iconv\n"); - exit(1); - } - - if (!transcode(ucs, ucs1, encoding, "UTF-8", &ercnt) || ercnt) { - fprintf(stderr, "Transcode back to utf-8 check failed, ercount: %d\n", - ercnt); - exit(1); - } - if (ucs1.compare(in)) { - fprintf(stderr, "Transcode back to utf-8 compare to in failed\n"); - exit(1); - } - exit(0); -} From 3a9d7f7cb6397665307d2b1713b0e5346047b015 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Sat, 8 Jan 2022 15:44:21 +0100 Subject: [PATCH 07/19] execmd statusAsString --- src/doc/user/usermanual.html | 6 +++--- src/testmains/Makefile.am | 10 ++-------- src/utils/execmd.cpp | 21 ++++++++++++++++++++- src/utils/execmd.h | 2 ++ src/windows/execmd_w.cpp | 31 ++++++++++++++++++++----------- 5 files changed, 47 insertions(+), 23 deletions(-) diff --git a/src/doc/user/usermanual.html b/src/doc/user/usermanual.html index e679932c..389427e1 100644 --- a/src/doc/user/usermanual.html +++ b/src/doc/user/usermanual.html @@ -10,7 +10,7 @@ + "Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.3 or any later version published by the Free Software Foundation; with no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license can be found at the following location: GNU web site. This document introduces full text search notions and describes the installation and use of the Recoll application. This version describes Recoll 1.32."> @@ -53,7 +53,7 @@ alink="#0000FF"> and describes the installation and use of the Recoll application. This version describes Recoll 1.31.

    + "application">Recoll 1.32.

    @@ -443,7 +443,7 @@ alink="#0000FF">

    This document introduces full text search notions and describes the installation and use of the Recoll application. It is updated for - Recoll 1.31.

    + Recoll 1.32.

    Recoll was for a long time dedicated to Unix-like systems. It was only lately (2015) ported to MS-Windows. diff --git a/src/testmains/Makefile.am b/src/testmains/Makefile.am index 6cd274ba..c2d72caa 100644 --- a/src/testmains/Makefile.am +++ b/src/testmains/Makefile.am @@ -38,8 +38,8 @@ AM_CPPFLAGS = -Wall -Wno-unused -std=c++11 \ -D_GNU_SOURCE \ $(DEFS) -noinst_PROGRAMS = plaintorich textsplit utf8iter fstreewalk rclconfig hldata unac mbox \ - circache wipedir mimetype pathut fileudi x11mon trqrstore ecrontab +noinst_PROGRAMS = plaintorich textsplit fstreewalk rclconfig hldata unac mbox \ + circache wipedir mimetype fileudi x11mon trqrstore ecrontab ecrontab_SOURCES = trecrontab.cpp ecrontab_LDADD = ../librecoll.la @@ -62,9 +62,6 @@ mbox_LDADD = ../librecoll.la mimetype_SOURCES = trmimetype.cpp mimetype_LDADD = ../librecoll.la -pathut_SOURCES = trpathut.cpp -pathut_LDADD = ../librecoll.la - rclconfig_SOURCES = trrclconfig.cpp rclconfig_LDADD = ../librecoll.la @@ -77,9 +74,6 @@ plaintorich_LDADD = ../librecoll.la unac_SOURCES = trunac.cpp unac_LDADD = ../librecoll.la -utf8iter_SOURCES = trutf8iter.cpp -utf8iter_LDADD = ../librecoll.la - wipedir_SOURCES = trwipedir.cpp wipedir_LDADD = ../librecoll.la diff --git a/src/utils/execmd.cpp b/src/utils/execmd.cpp index eb5a7603..6a6fd510 100644 --- a/src/utils/execmd.cpp +++ b/src/utils/execmd.cpp @@ -39,6 +39,7 @@ #include #include #include +#include #ifdef HAVE_SPAWN_H #ifndef __USE_GNU #define __USE_GNU @@ -994,7 +995,8 @@ int ExecCmd::wait() LOGERR("ExecCmd::waitpid: returned -1 errno " << errno << "\n"); status = -1; } - LOGDEB("ExecCmd::wait: got status 0x" << (status) << "\n"); + LOGDEB("ExecCmd::wait: got status 0x" << std::hex << status << std::dec << ": " << + waitStatusAsString(status) << "\n"); m->m_pid = -1; } // Let the ExecCmdRsrc cleanup, it will do the killing/waiting if needed @@ -1043,6 +1045,23 @@ bool ExecCmd::backtick(const vector cmd, string& out) return status == 0; } +std::string ExecCmd::waitStatusAsString(int wstatus) +{ + std::ostringstream oss; + if (WIFEXITED(wstatus)) { + oss << "Exit status: " << WEXITSTATUS(wstatus); + } else { + if (WIFSIGNALED(wstatus)) { + oss << strsignal(WTERMSIG(wstatus)) << " "; + } + if (WCOREDUMP(wstatus)) { + oss << "(core dumped)"; + } + } + return oss.str(); +} + + /// ReExec class methods /////////////////////////////////////////////////// ReExec::ReExec(int argc, char *args[]) { diff --git a/src/utils/execmd.h b/src/utils/execmd.h index 623569c1..987a9e7f 100644 --- a/src/utils/execmd.h +++ b/src/utils/execmd.h @@ -247,6 +247,8 @@ public: */ static bool backtick(const std::vector cmd, std::string& out); + static std::string waitStatusAsString(int wstatus); + class Internal; private: Internal *m; diff --git a/src/windows/execmd_w.cpp b/src/windows/execmd_w.cpp index 8f140216..8e5ad2a5 100644 --- a/src/windows/execmd_w.cpp +++ b/src/windows/execmd_w.cpp @@ -1100,17 +1100,6 @@ bool ExecCmd::maybereap(int *status) } } -// Static -bool ExecCmd::backtick(const vector cmd, string& out) -{ - vector::const_iterator it = cmd.begin(); - it++; - vector args(it, cmd.end()); - ExecCmd mexec; - int status = mexec.doexec(*cmd.begin(), args, 0, &out); - return status == 0; -} - int ExecCmd::doexec(const string &cmd, const vector& args, const string *input, string *output) { @@ -1159,3 +1148,23 @@ int ExecCmd::doexec(const string &cmd, const vector& args, cleaner.inactivate(); return wait(); } + +// Static +bool ExecCmd::backtick(const vector cmd, string& out) +{ + vector::const_iterator it = cmd.begin(); + it++; + vector args(it, cmd.end()); + ExecCmd mexec; + int status = mexec.doexec(*cmd.begin(), args, 0, &out); + return status == 0; +} + +// Static. Unimplemented on windows for now +std::string ExecCmd::waitStatusAsString(int wstatus) +{ + std::ostringstream oss; + oss << std::hex << "0x" << wstatus << std::dec; + return oss.str(); +} + From d883500c17789a06aa811e0b644c271913ba6b71 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Sun, 9 Jan 2022 09:30:37 +0100 Subject: [PATCH 08/19] Added a few know suffixes to avoid unnecessary xdg-mime execs --- src/sampleconf/mimemap | 20 +++++++++++++++----- src/sampleconf/recoll.conf | 4 ++-- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/sampleconf/mimemap b/src/sampleconf/mimemap index 4333a12d..ced4ecde 100644 --- a/src/sampleconf/mimemap +++ b/src/sampleconf/mimemap @@ -15,6 +15,8 @@ .rst = text/plain .md = text/plain .gv = text/plain +.desktop = text/plain +.json = text/plain # .log is in the default noContentSuffixes, so this will also need a recoll.conf setting to do # anything @@ -58,6 +60,7 @@ .ipynb = application/x-ipynb+json .xml = text/xml +.opf = text/xml .note = application/x-gnote @@ -90,16 +93,20 @@ .svg = image/svg+xml .dia = application/x-dia-diagram +# Compressed files .gz = application/x-gzip .Z = application/x-gzip .bz2 = application/x-bzip2 -.rar = application/x-rar -#.Z = application/x-compress -.zip = application/zip -.7z = application/x-7z-compressed -.maff = application/zip +.lzma = application/x-lzma +.xz = application/x-xz .zst = application/x-zstd +# Archives +.rar = application/x-rar +.zip = application/zip +.maff = application/zip +.7z = application/x-7z-compressed + # The rcltar module can handle compressed tar formats internally so we # use application/x-tar for all tar files compressed or not. Note that tar # file indexing is disabled by default, you'll need to copy and uncomment @@ -123,7 +130,9 @@ .chm = application/x-chm .epub = application/epub+zip +.kepub = application/epub+zip .mobi = application/x-mobipocket-ebook +.lit = application/x-ms-reader # OpenOffice / opendocument. We handle opendocument as old openoffice files # for now @@ -200,6 +209,7 @@ .ogg = application/ogg .ogx = audio/ogg .opus = audio/ogg +.wav = audio/x-wav .wv = audio/x-wavpack .mkv = video/x-matroska diff --git a/src/sampleconf/recoll.conf b/src/sampleconf/recoll.conf index 63f99a4c..aceba66a 100644 --- a/src/sampleconf/recoll.conf +++ b/src/sampleconf/recoll.conf @@ -88,8 +88,8 @@ onlyNames = # can be redefined for subdirectories. noContentSuffixes = .md5 .map \ .o .lib .dll .a .sys .exe .com \ - .mpp .mpt .vsd \ - .img .img.gz .img.bz2 .img.xz .image .image.gz .image.bz2 .image.xz \ + .mpp .mpt .vsd .sqlite \ + .img .img.gz .img.bz2 .img.xz .image .image.gz .image.bz2 .image.xz .ttf \ .dat .bak .rdf .log.gz .log .db .msf .pid \ ,v ~ # From 4e710f239a7ccc4fcb7a4775b2725cf7483ae351 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Thu, 13 Jan 2022 10:17:59 +0000 Subject: [PATCH 09/19] Initial draft of file system monitoring for windows. Still has issues, dnot build by default --- src/index/rclmonprc.cpp | 10 +- src/index/rclmonrcv.cpp | 574 ++++++++++++++++++++++++++++++++++---- src/index/recollindex.cpp | 4 + 3 files changed, 538 insertions(+), 50 deletions(-) diff --git a/src/index/rclmonprc.cpp b/src/index/rclmonprc.cpp index a2e54662..56251456 100644 --- a/src/index/rclmonprc.cpp +++ b/src/index/rclmonprc.cpp @@ -452,6 +452,8 @@ bool startMonitor(RclConfig *conf, int opts) auxinterval = dfltauxinterval; if (!conf->getConfParam("monixinterval", &ixinterval)) ixinterval = dfltixinterval; + bool doweb{false}; + conf->getConfParam("processwebqueue", &doweb); rclEQ.setConfig(conf); rclEQ.setopts(opts); @@ -471,11 +473,13 @@ bool startMonitor(RclConfig *conf, int opts) while (true) { time_t now = time(0); - if (now - lastmovetime > ixinterval) { +#ifndef DISABLE_WEB_INDEXER + if (doweb && (now - lastmovetime > ixinterval)) { lastmovetime = now; runWebFilesMoverScript(conf); } - +#endif // DISABLE_WEB_INDEXER + { // Wait for event or timeout. // Set a relatively short timeout for better monitoring of @@ -572,6 +576,7 @@ bool startMonitor(RclConfig *conf, int opts) } } +#ifndef _WIN32 // Check for a config change if (!(opts & RCLMON_NOCONFCHECK) && o_reexec && conf->sourceChanged()) { LOGDEB("Rclmonprc: config changed, reexecuting myself\n" ); @@ -581,6 +586,7 @@ bool startMonitor(RclConfig *conf, int opts) o_reexec->removeArg("-n"); o_reexec->reexec(); } +#endif // ! _WIN32 } LOGDEB("Rclmonprc: calling queue setTerminate\n" ); rclEQ.setTerminate(); diff --git a/src/index/rclmonrcv.cpp b/src/index/rclmonrcv.cpp index e3fc0df1..80a1f7e4 100644 --- a/src/index/rclmonrcv.cpp +++ b/src/index/rclmonrcv.cpp @@ -1,6 +1,6 @@ #include "autoconfig.h" #ifdef RCL_MONITOR -/* Copyright (C) 2006 J.F.Dockes +/* Copyright (C) 2006-2021 J.F.Dockes * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -16,6 +16,35 @@ * Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ + +/* The code for the Win32 version of the monitor was largely copied from efsw: + * https://github.com/SpartanJ/efsw + * LICENSE for the original WIN32 code: + * Copyright (c) 2020 Martín Lucas Golini + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * This software is a fork of the "simplefilewatcher" by James Wynn (james@jameswynn.com) + * http://code.google.com/p/simplefilewatcher/ also MIT licensed. + */ + + #include "autoconfig.h" #include @@ -72,8 +101,8 @@ public: virtual FsTreeWalker::Status processone( const string &fn, const struct PathStat *st, FsTreeWalker::CbFlag flg) { - MONDEB("rclMonRcvRun: processone " << fn << " m_mon " << m_mon << - " m_mon->ok " << (m_mon ? m_mon->ok() : false) << std::endl); + MONDEB("walkerCB: processone " << fn << " m_mon " << m_mon << + " m_mon->ok " << (m_mon ? m_mon->ok() : false) << "\n"); if (flg == FsTreeWalker::FtwDirEnter || flg == FsTreeWalker::FtwDirReturn) { m_config->setKeyDir(fn); @@ -90,7 +119,7 @@ public: if (ev.m_etyp != RclMonEvent::RCLEVT_NONE) m_queue->pushEvent(ev); } else { - MONDEB("rclMonRcvRun: no event pending\n"); + MONDEB("walkerCB: no event pending\n"); break; } } @@ -99,8 +128,10 @@ public: // We do nothing special if addWatch fails for a reasonable reason if (!m_mon->addWatch(fn, true)) { if (m_mon->saved_errno != EACCES && - m_mon->saved_errno != ENOENT) + m_mon->saved_errno != ENOENT) { + LOGINF("walkerCB: addWatch failed\n"); return FsTreeWalker::FtwError; + } } } else if (!m_mon->generatesExist() && flg == FsTreeWalker::FtwRegular) { // Have to synthetize events for regular files existence @@ -164,8 +195,8 @@ void *rclMonRcvRun(void *q) FsTreeWalker walker; walker.setSkippedPaths(lconfig.getDaemSkippedPaths()); WalkCB walkcb(&lconfig, mon, queue, walker); - for (auto it = tdl.begin(); it != tdl.end(); it++) { - lconfig.setKeyDir(*it); + for (const auto& dir : tdl) { + lconfig.setKeyDir(dir); // Adjust the follow symlinks options bool follow; if (lconfig.getConfParam("followLinks", &follow) && @@ -176,20 +207,18 @@ void *rclMonRcvRun(void *q) } // We have to special-case regular files which are part of the topdirs // list because we the tree walker only adds watches for directories - if (path_isdir(*it, follow)) { - LOGDEB("rclMonRcvRun: walking " << *it << "\n"); - if (walker.walk(*it, walkcb) != FsTreeWalker::FtwOk) { + if (path_isdir(dir, follow)) { + LOGDEB("rclMonRcvRun: walking " << dir << "\n"); + if (walker.walk(dir, walkcb) != FsTreeWalker::FtwOk) { LOGERR("rclMonRcvRun: tree walk failed\n"); goto terminate; } if (walker.getErrCnt() > 0) { - LOGINFO("rclMonRcvRun: fs walker errors: " << - walker.getReason() << "\n"); + LOGINFO("rclMonRcvRun: fs walker errors: " << walker.getReason() << "\n"); } } else { - if (!mon->addWatch(*it, false)) { - LOGERR("rclMonRcvRun: addWatch failed for " << *it << - " errno " << mon->saved_errno << std::endl); + if (!mon->addWatch(dir, false)) { + LOGSYSERR("rclMonRcvRun", "addWatch", dir); } } } @@ -208,8 +237,7 @@ void *rclMonRcvRun(void *q) } // Forever wait for monitoring events and add them to queue: - MONDEB("rclMonRcvRun: waiting for events. q->ok(): " << queue->ok() << - std::endl); + MONDEB("rclMonRcvRun: waiting for events. q->ok(): " << queue->ok() << "\n"); while (queue->ok() && mon->ok()) { RclMonEvent ev; // Note: I could find no way to get the select @@ -245,8 +273,7 @@ void *rclMonRcvRun(void *q) goto terminate; } if (walker.getErrCnt() > 0) { - LOGINFO("rclMonRcvRun: fs walker errors: " << - walker.getReason() << "\n"); + LOGINFO("rclMonRcvRun: fs walker errors: " << walker.getReason() << "\n"); } } @@ -364,7 +391,7 @@ bool RclFAM::addWatch(const string& path, bool isdir) return false; bool ret = false; - MONDEB("RclFAM::addWatch: adding " << path << std::endl); + MONDEB("RclFAM::addWatch: adding " << path << "\n"); // It happens that the following call block forever. // We'd like to be able to at least terminate on a signal here, but @@ -410,7 +437,7 @@ bool RclFAM::getEvent(RclMonEvent& ev, int msecs) FD_ZERO(&readfds); FD_SET(fam_fd, &readfds); - MONDEB("RclFAM::getEvent: select. fam_fd is " << fam_fd << std::endl); + MONDEB("RclFAM::getEvent: select. fam_fd is " << fam_fd << "\n"); // Fam / gamin is sometimes a bit slow to send events. Always add // a little timeout, because if we fail to retrieve enough events, // we risk deadlocking in addwatch() @@ -432,7 +459,7 @@ bool RclFAM::getEvent(RclMonEvent& ev, int msecs) return false; } - MONDEB("RclFAM::getEvent: select returned " << ret << std::endl); + MONDEB("RclFAM::getEvent: select returned " << ret << "\n"); if (!FD_ISSET(fam_fd, &readfds)) return false; @@ -464,8 +491,7 @@ bool RclFAM::getEvent(RclMonEvent& ev, int msecs) ev.m_path = fe.filename; } - MONDEB("RclFAM::getEvent: " << event_name(fe.code) < " " << - ev.m_path << std::endl); + MONDEB("RclFAM::getEvent: " << event_name(fe.code) < " " << ev.m_path << "\n"); switch (fe.code) { case FAMCreated: @@ -517,18 +543,16 @@ bool RclFAM::getEvent(RclMonEvent& ev, int msecs) class RclIntf : public RclMonitor { public: RclIntf() - : m_ok(false), m_fd(-1), m_evp(0), m_ep(0) - { - if ((m_fd = inotify_init()) < 0) { - LOGERR("RclIntf:: inotify_init failed, errno " << errno << "\n"); - return; - } - m_ok = true; - } - virtual ~RclIntf() - { - close(); + : m_ok(false), m_fd(-1), m_evp(0), m_ep(0) { + if ((m_fd = inotify_init()) < 0) { + LOGERR("RclIntf:: inotify_init failed, errno " << errno << "\n"); + return; } + m_ok = true; + } + virtual ~RclIntf() { + close(); + } virtual bool addWatch(const string& path, bool isdir); virtual bool getEvent(RclMonEvent& ev, int msecs = -1); @@ -586,7 +610,7 @@ bool RclIntf::addWatch(const string& path, bool) { if (!ok()) return false; - MONDEB("RclIntf::addWatch: adding " << path << std::endl); + MONDEB("RclIntf::addWatch: adding " << path << "\n"); // CLOSE_WRITE is covered through MODIFY. CREATE is needed for mkdirs uint32_t mask = IN_MODIFY | IN_CREATE | IN_MOVED_FROM | IN_MOVED_TO | IN_DELETE @@ -636,9 +660,8 @@ bool RclIntf::getEvent(RclMonEvent& ev, int msecs) } int ret; MONDEB("RclIntf::getEvent: select\n"); - if ((ret = select(m_fd + 1, &readfds, 0, 0, msecs >= 0 ? &timeout : 0)) - < 0) { - LOGERR("RclIntf::getEvent: select failed, errno " << errno << "\n"); + if ((ret = select(m_fd + 1, &readfds, 0, 0, msecs >= 0 ? &timeout : 0)) < 0) { + LOGSYSERR("RclIntf::getEvent", "select", ""); close(); return false; } else if (ret == 0) { @@ -652,8 +675,7 @@ bool RclIntf::getEvent(RclMonEvent& ev, int msecs) return false; int rret; if ((rret=read(m_fd, m_evbuf, sizeof(m_evbuf))) <= 0) { - LOGERR("RclIntf::getEvent: read failed, " << sizeof(m_evbuf) << - "->" << rret << " errno " << errno << "\n"); + LOGSYSERR("RclIntf::getEvent", "read", sizeof(m_evbuf)); close(); return false; } @@ -679,8 +701,7 @@ bool RclIntf::getEvent(RclMonEvent& ev, int msecs) ev.m_path = path_cat(ev.m_path, evp->name); } - MONDEB("RclIntf::getEvent: " << event_name(evp->mask) << " " << - ev.m_path << std::endl); + MONDEB("RclIntf::getEvent: " << event_name(evp->mask) << " " << ev.m_path << "\n"); if ((evp->mask & IN_MOVED_FROM) && (evp->mask & IN_ISDIR)) { // We get this when a directory is renamed. Erase the subtree @@ -725,20 +746,477 @@ bool RclIntf::getEvent(RclMonEvent& ev, int msecs) #endif // RCL_USE_INOTIFY + +#ifdef _WIN32 + + +/* + * WIN32 VERSION ISSUES: + * + * - It appears that watching a subdirectory of a given directory + * prevents renaming the top directory, Windows says: can't rename + * because open or a file in it is open. This is a major issue of + * course. Check if this can be solved by using a recursive watch + * instead of setting watches on all subdirs. Would need a code + * changes in the "generic" part of course. + * - In general, directory renames need more studying. + * - Otherwise appears to more or less work... + */ +#include +#include +#include +#include +#include + +#include "safewindows.h" + +typedef long WatchID; +class WatcherWin32; +class RclFSWatchWin32; + +enum class Action {Add = 1, Delete = 2, Modify = 3, Move = 4}; + +class FileWatchListener { +public: + virtual ~FileWatchListener() {} + + /// Handles the action file action + /// @param watchid The watch id for the directory + /// @param dir The directory + /// @param filename The filename that was accessed (not full path) + /// @param action Action that was performed + /// @param oldFilename The name of the file or directory moved + virtual void handleFileAction(WatchID watchid, const std::string& dir, const std::string& fn, + Action action, bool isdir, std::string oldfn = "" ) = 0; +}; + +// Internal watch data +struct WatcherStructWin32 +{ + OVERLAPPED Overlapped; + WatcherWin32 *Watch; +}; + +class WatcherWin32 { +public: + WatcherWin32() {} + + WatchID ID; + FileWatchListener *Listener; + bool Recursive; + std::string DirName; + std::string OldFileName; + + HANDLE DirHandle{nullptr}; + // do NOT make this bigger than 64K because it will fail if the folder being watched is on the + // network! (see http://msdn.microsoft.com/en-us/library/windows/desktop/aa365465(v=vs.85).aspx) + BYTE Buffer[8 * 1024]; + DWORD NotifyFilter{0}; + bool StopNow{false}; + RclFSWatchWin32 *Watch{nullptr}; +}; + +class RclFSWatchWin32 { +public: + RclFSWatchWin32(); + + virtual ~RclFSWatchWin32(); + + // Add a directory watch + // On error returns -1 + WatchID addWatch(const std::string& directory, FileWatchListener *watcher, bool recursive); + + // 2nd stage of action processing (after the static handler which just reads the data) + void handleAction(WatcherWin32 *watch, const std::string& fn, unsigned long action); + + bool ok() const { + return mInitOK; + } + + // Fetch events, with msecs timeout if there are no more + void run(DWORD msecs); + +private: + HANDLE mIOCP; + // Using a vector because we don't remove watches. Change to list if needed. + std::vector mWatches; + bool mInitOK{false}; + WatchID mLastWatchID{0}; + + std::mutex mWatchesLock; + + bool pathInWatches(const std::string& path); + /// Remove all directory watches. + void removeAllWatches(); +}; + +class RclMonitorWin32 : public RclMonitor, public FileWatchListener { +public: + RclMonitorWin32() { + MONDEB("RclMonitorWin32::RclMonitorWin32\n"); + } + virtual ~RclMonitorWin32() {} + + virtual bool addWatch(const string& path, bool /*isDir*/) override { + MONDEB("RclMonitorWin32::addWatch: " << path << "\n"); + return m_fswatcher.addWatch(path, this, false) != -1; + } + + virtual bool getEvent(RclMonEvent& ev, int msecs = -1) { + PRETEND_USE(msecs); + if (!m_events.empty()) { + ev = m_events.front(); + m_events.pop(); + return true; + } + m_fswatcher.run(msecs); + if (!m_events.empty()) { + ev = m_events.front(); + m_events.pop(); + return true; + } + return false; + } + + virtual bool ok() const override { + return m_fswatcher.ok(); + } + // Does this monitor generate 'exist' events at startup? + virtual bool generatesExist() const override { + return false; + } + virtual void handleFileAction(WatchID watchid, const std::string& dir, const std::string& fn, + Action action, bool isdir, std::string oldfn = "") { + MONDEB("RclMonitorWin32::handleFileAction: dir [" << dir << "] fn [" << fn << "] act " << + int(action) << " isdir " << isdir << " oldfn [" << oldfn << "]\n"); + RclMonEvent event; + switch (action) { + case Action::Move: + case Action::Add: event.m_etyp = isdir ? + RclMonEvent::RCLEVT_DIRCREATE : RclMonEvent::RCLEVT_MODIFY; break; + case Action::Delete: + event.m_etyp = RclMonEvent::RCLEVT_DELETE; + if (isdir) { + event.m_etyp |= RclMonEvent::RCLEVT_ISDIR; + } + break; + case Action::Modify: event.m_etyp = RclMonEvent::RCLEVT_MODIFY; break; + } + event.m_path = path_cat(dir, fn); + m_events.push(event); + } + + // Save significant errno after monitor calls + int saved_errno{0}; +private: + std::queue m_events; + RclFSWatchWin32 m_fswatcher; +}; + + +/// Stops monitoring a directory. +void DestroyWatch(WatcherStructWin32 *pWatch) +{ + if (pWatch) { + WatcherWin32 *ww32 = pWatch->Watch; + ww32->StopNow = true; + CancelIoEx(ww32->DirHandle, &pWatch->Overlapped); + CloseHandle(ww32->DirHandle); + delete ww32; + // Shouldn't we call heapfree on the parameter here ?? + } +} + +/// Refreshes the directory monitoring. +bool RefreshWatch(WatcherStructWin32 *pWatch) +{ + WatcherWin32 *ww32 = pWatch->Watch; + return ReadDirectoryChangesW( + ww32->DirHandle, + ww32->Buffer, + sizeof(ww32->Buffer), + ww32->Recursive, + ww32->NotifyFilter, + NULL, + &pWatch->Overlapped, + NULL + ) != 0; +} + +/// Starts monitoring a directory. +WatcherStructWin32 *CreateWatch(LPCWSTR szDirectory, bool recursive, DWORD NotifyFilter, HANDLE iocp) +{ + WatcherStructWin32 *wsw32; + size_t ptrsize = sizeof(*wsw32); + wsw32 =static_cast(HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, ptrsize)); + + WatcherWin32 *ww32 = new WatcherWin32(); + wsw32->Watch = ww32; + + ww32->DirHandle = CreateFileW( + szDirectory, + GENERIC_READ, + FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, + NULL, + OPEN_EXISTING, + FILE_FLAG_BACKUP_SEMANTICS | FILE_FLAG_OVERLAPPED, + NULL + ); + + if (ww32->DirHandle != INVALID_HANDLE_VALUE && + CreateIoCompletionPort(ww32->DirHandle, iocp, 0, 1)) { + ww32->NotifyFilter = NotifyFilter; + ww32->Recursive = recursive; + + if (RefreshWatch(wsw32)) { + return wsw32; + } + } + + CloseHandle(ww32->DirHandle); + delete ww32; + HeapFree(GetProcessHeap(), 0, wsw32); + return NULL; +} + + +RclFSWatchWin32::RclFSWatchWin32() + : mLastWatchID(0) +{ + mIOCP = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 1); + if (mIOCP && mIOCP != INVALID_HANDLE_VALUE) + mInitOK = true; +} + +RclFSWatchWin32::~RclFSWatchWin32() +{ + mInitOK = false; + + if (mIOCP && mIOCP != INVALID_HANDLE_VALUE) { + PostQueuedCompletionStatus(mIOCP, 0, reinterpret_cast(this), NULL); + } + + // delete mThread ?? + + removeAllWatches(); + + CloseHandle(mIOCP); +} + +WatchID RclFSWatchWin32::addWatch(const std::string& _dir,FileWatchListener *watcher,bool recursive) +{ + LOGDEB("RclFSWatchWin32::addWatch: " << _dir << " recursive " << recursive << "\n"); + std::string dir(_dir); + path_slashize(dir); + if (!path_isdir(dir)) { + LOGDEB("RclFSWatchWin32::addWatch: not a directory: " << dir << "\n"); + return -1; + } + if (!path_readable(dir)) { + LOGINF("RclFSWatchWin32::addWatch: not readable: " << dir << "\n"); + return 0; + } + path_catslash(dir); + auto wdir = utf8towchar(dir); + + std::unique_lock lock(mWatchesLock); + + if (pathInWatches(dir)) { + MONDEB("RclFSWatchWin32::addWatch: already in watches: " << dir << "\n"); + return 0; + } + + WatchID watchid = ++mLastWatchID; + + WatcherStructWin32 *watch = CreateWatch( + wdir.get(), recursive, + FILE_NOTIFY_CHANGE_CREATION | + FILE_NOTIFY_CHANGE_LAST_WRITE | + FILE_NOTIFY_CHANGE_FILE_NAME | + FILE_NOTIFY_CHANGE_DIR_NAME | + FILE_NOTIFY_CHANGE_SIZE, + mIOCP + ); + + if (nullptr == watch) { + LOGINF("RclFSWatchWin32::addWatch: CreateWatch failed\n"); + return -1; + } + + // Add the handle to the handles vector + watch->Watch->ID = watchid; + watch->Watch->Watch = this; + watch->Watch->Listener = watcher; + watch->Watch->DirName = dir; + + mWatches.push_back(watch); + + return watchid; +} + +void RclFSWatchWin32::removeAllWatches() +{ + std::unique_lock lock(mWatchesLock); + for( auto& watchp : mWatches) { + DestroyWatch(watchp); + } + mWatches.clear(); +} + +/// Unpacks events and passes them to the event processor +void CALLBACK WatchCallback(DWORD dwNumberOfBytesTransfered, LPOVERLAPPED lpOverlapped) +{ + if (dwNumberOfBytesTransfered == 0 || NULL == lpOverlapped) { + return; + } + + WatcherStructWin32 *wsw32 = (WatcherStructWin32*)lpOverlapped; + WatcherWin32 *ww32 = wsw32->Watch; + + PFILE_NOTIFY_INFORMATION pNotify; + size_t offset = 0; + do { + pNotify = (PFILE_NOTIFY_INFORMATION) &ww32->Buffer[offset]; + offset += pNotify->NextEntryOffset; + + std::string sfn; + wchartoutf8(pNotify->FileName, sfn, pNotify->FileNameLength / sizeof(WCHAR)); + ww32->Watch->handleAction(ww32, sfn, pNotify->Action); + } while (pNotify->NextEntryOffset != 0); + + if (!ww32->StopNow) { + RefreshWatch(wsw32); + } +} + +void RclFSWatchWin32::run(DWORD msecs) +{ + if (!mWatches.empty()) { + DWORD numOfBytes = 0; + OVERLAPPED* ov = NULL; + ULONG_PTR compKey = 0; + BOOL res = FALSE; + DWORD ms = msecs == -1 ? INFINITE : msecs; + while ((res = GetQueuedCompletionStatus(mIOCP, &numOfBytes, &compKey, &ov, ms))) { + if (compKey != 0 && compKey == reinterpret_cast(this)) { + // Called from ~RclFSWatchWin32. Must exit. + MONDEB("RclFSWatchWin32::run: queuedcompletion said need exit\n"); + return; + } else { + std::unique_lock lock(mWatchesLock); + WatchCallback(numOfBytes, ov); + } + } + } else { + // No watches yet. + MONDEB("RclFSWatchWin32::run: no watches yet\n"); + DWORD ms = msecs == -1 ? 1000 : msecs; + std::this_thread::sleep_for(std::chrono::milliseconds(ms)); + } +} + +void RclFSWatchWin32::handleAction(WatcherWin32 *watch, const std::string& fn, unsigned long action) +{ + Action fwAction; + MONDEB("handleAction: fn [" << fn << "] action " << action << "\n"); + + // In case fn is not a simple name but a relative path (probably + // possible/common if recursive is set ?), sort out the directory + // path and simple file name. + std::string newpath = path_cat(watch->DirName, fn); + bool isdir = path_isdir(newpath); + std::string simplefn = path_getsimple(newpath); + std::string folderPath = path_getfather(newpath); + + switch (action) { + case FILE_ACTION_RENAMED_OLD_NAME: + watch->OldFileName = fn; + /* FALLTHROUGH */ + case FILE_ACTION_REMOVED: + fwAction = Action::Delete; + // The system does not tell us if this was a directory, but we + // need the info. Check if it was in the watches. + // TBD: for a delete, we should delete all watches on the subtree ! + path_catslash(newpath); + for (auto& watchp : mWatches) { + if (watchp->Watch->DirName == newpath) { + isdir = true; + break; + } + } + break; + case FILE_ACTION_ADDED: + fwAction = Action::Add; + break; + case FILE_ACTION_MODIFIED: + fwAction = Action::Modify; + break; + case FILE_ACTION_RENAMED_NEW_NAME: { + fwAction = Action::Move; + + // If this is a directory, possibly update the watches. + // TBD: this seems wrong because we should process the whole subtree ? + if (isdir) { + // Update the new directory path + std::string oldpath = path_cat(watch->DirName, watch->OldFileName); + path_catslash(oldpath); + for (auto& watchp : mWatches) { + if (watchp->Watch->DirName == oldpath) { + watchp->Watch->DirName = newpath; + break; + } + } + } + + std::string oldFolderPath = watch->DirName + + watch->OldFileName.substr(0, watch->OldFileName.find_last_of("/\\")); + + if (folderPath == oldFolderPath) { + watch->Listener->handleFileAction(watch->ID, folderPath, simplefn, fwAction, isdir, + path_getsimple(watch->OldFileName)); + } else { + // Calling the client with non-simple paths?? + watch->Listener->handleFileAction(watch->ID, watch->DirName, fn, fwAction, isdir, + watch->OldFileName); + } + return; + } + default: + return; + }; + + watch->Listener->handleFileAction(watch->ID, folderPath, simplefn, fwAction, isdir); +} + +bool RclFSWatchWin32::pathInWatches(const std::string& path) +{ + for (const auto& wsw32 : mWatches) { + if (wsw32->Watch->DirName == path ) { + return true; + } + } + return false; +} + +#endif // _WIN32 + + /////////////////////////////////////////////////////////////////////// // The monitor 'factory' static RclMonitor *makeMonitor() { -#ifdef RCL_USE_INOTIFY +#ifdef _WIN32 + return new RclMonitorWin32; +#else +# ifdef RCL_USE_INOTIFY return new RclIntf; -#endif -#ifndef RCL_USE_INOTIFY -#ifdef RCL_USE_FAM +# elif defined(RCL_USE_FAM) return new RclFAM; -#endif +# endif #endif LOGINFO("RclMonitor: neither Inotify nor Fam was compiled as file system " "change notification interface\n"); return 0; } + #endif // RCL_MONITOR diff --git a/src/index/recollindex.cpp b/src/index/recollindex.cpp index d9b8810e..28d3f5b1 100644 --- a/src/index/recollindex.cpp +++ b/src/index/recollindex.cpp @@ -103,7 +103,9 @@ static struct option long_options[] = { {0, 0, 0, 0} }; +#ifndef _WIN32 ReExec *o_reexec; +#endif // Globals for atexit cleanup static ConfIndexer *confindexer; @@ -867,11 +869,13 @@ int main(int argc, char *argv[]) LOGDEB("recollindex: sleeping " << sleepsecs << "\n"); for (int i = 0; i < sleepsecs; i++) { sleep(1); +#ifndef _WIN32 // Check that x11 did not go away while we were sleeping. if (!(op_flags & OPT_x) && !x11IsAlive()) { LOGDEB("X11 session went away during initial sleep period\n"); exit(0); } +#endif } } From 6eb4f3681df3fae29ca1d56e69e5a4a0ada295be Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Thu, 13 Jan 2022 11:19:16 +0100 Subject: [PATCH 10/19] removed unused files --- src/utils/utf8testin.txt | 212 --------------------------------------- src/utils/workqueue.cpp | 121 ---------------------- 2 files changed, 333 deletions(-) delete mode 100644 src/utils/utf8testin.txt delete mode 100644 src/utils/workqueue.cpp diff --git a/src/utils/utf8testin.txt b/src/utils/utf8testin.txt deleted file mode 100644 index bfb9ec85..00000000 --- a/src/utils/utf8testin.txt +++ /dev/null @@ -1,212 +0,0 @@ - -UTF-8 encoded sample plain-text file -‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ - -Markus Kuhn [ˈmaʳkÊŠs kuËn] — 2002-07-25 - - -The ASCII compatible UTF-8 encoding used in this plain-text file -is defined in Unicode, ISO 10646-1, and RFC 2279. - - -Using Unicode/UTF-8, you can write in emails and source code things such as - -Mathematics and sciences: - - ∮ Eâ‹…da = Q, n → ∞, ∑ f(i) = ∠g(i), ⎧⎡⎛┌─────â”⎞⎤⎫ - ⎪⎢⎜│a²+b³ ⎟⎥⎪ - ∀x∈â„: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ - ⎪⎢⎜⎷ c₈ ⎟⎥⎪ - â„• ⊆ â„•â‚€ ⊂ ℤ ⊂ ℚ ⊂ ℠⊂ â„‚, ⎨⎢⎜ ⎟⎥⎬ - ⎪⎢⎜ ∞ ⎟⎥⎪ - ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ - ⎪⎢⎜ ⎳aâ±-bâ±âŽŸâŽ¥âŽª - 2Hâ‚‚ + Oâ‚‚ ⇌ 2Hâ‚‚O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣âŽi=1 ⎠⎦⎭ - -Linguistics and dictionaries: - - ði ıntəˈnæʃənÉ™l fəˈnÉ›tık É™soÊŠsiˈeıʃn - Y [ˈÊpsilÉ”n], Yen [jÉ›n], Yoga [ˈjoËgÉ‘] - -APL: - - ((Vâ³V)=â³â´V)/Vâ†,V ⌷â†â³â†’â´âˆ†âˆ‡âŠƒâ€¾âŽâ•⌈ - -Nicer typography in plain text files: - - â•”â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•— - â•‘ â•‘ - â•‘ • ‘single’ and “double†quotes â•‘ - â•‘ â•‘ - â•‘ • Curly apostrophes: “We’ve been here†║ - â•‘ â•‘ - â•‘ • Latin-1 apostrophe and accents: '´` â•‘ - â•‘ â•‘ - â•‘ • ‚deutsche‘ „Anführungszeichen“ â•‘ - â•‘ â•‘ - â•‘ • †, ‡, ‰, •, 3–4, —, −5/+5, â„¢, … â•‘ - â•‘ â•‘ - â•‘ • ASCII safety test: 1lI|, 0OD, 8B â•‘ - â•‘ ╭─────────╮ â•‘ - â•‘ • the euro symbol: │ 14.95 € │ â•‘ - â•‘ ╰─────────╯ â•‘ - ╚â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â• - -Combining characters: - - STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ - -Greek (in Polytonic): - - The Greek anthem: - - Σὲ γνωÏίζω ἀπὸ τὴν κόψη - τοῦ σπαθιοῦ τὴν Ï„ÏομεÏá½µ, - σὲ γνωÏίζω ἀπὸ τὴν ὄψη - ποὺ μὲ βία μετÏάει τὴ γῆ. - - ᾿Απ᾿ τὰ κόκκαλα βγαλμένη - τῶν ῾Ελλήνων τὰ ἱεÏá½± - καὶ σὰν Ï€Ïῶτα ἀνδÏειωμένη - χαῖÏε, ὦ χαῖÏε, ᾿ΕλευθεÏιά! - - From a speech of Demosthenes in the 4th century BC: - - Οá½Ï‡á½¶ ταá½Ï„á½° παÏίσταταί μοι γιγνώσκειν, ὦ ἄνδÏες ᾿Αθηναῖοι, - ὅταν τ᾿ εἰς τὰ Ï€Ïάγματα ἀποβλέψω καὶ ὅταν Ï€Ïὸς τοὺς - λόγους οὓς ἀκούω· τοὺς μὲν Î³á½°Ï Î»á½¹Î³Î¿Ï…Ï‚ πεÏá½¶ τοῦ - τιμωÏήσασθαι Φίλιππον á½Ïá¿¶ γιγνομένους, τὰ δὲ Ï€Ïάγματ᾿ - εἰς τοῦτο Ï€Ïοήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αá½Ï„οὶ - Ï€ÏότεÏον κακῶς σκέψασθαι δέον. οá½Î´á½³Î½ οὖν ἄλλο μοι δοκοῦσιν - οἱ τὰ τοιαῦτα λέγοντες á¼¢ τὴν ὑπόθεσιν, πεÏá½¶ á¼§Ï‚ βουλεύεσθαι, - οá½Ï‡á½¶ τὴν οὖσαν παÏιστάντες ὑμῖν á¼Î¼Î±Ïτάνειν. á¼Î³á½¼ δέ, ὅτι μέν - ποτ᾿ á¼Î¾á¿†Î½ τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον - τιμωÏήσασθαι, καὶ μάλ᾿ ἀκÏιβῶς οἶδα· á¼Ï€á¾¿ á¼Î¼Î¿á¿¦ γάÏ, οὠπάλαι - γέγονεν ταῦτ᾿ ἀμφότεÏα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν - Ï€Ïολαβεῖν ἡμῖν εἶναι τὴν Ï€Ïώτην, ὅπως τοὺς συμμάχους - σώσομεν. á¼á½°Î½ Î³á½°Ï Ï„Î¿á¿¦Ï„Î¿ βεβαίως ὑπάÏξῃ, τότε καὶ πεÏá½¶ τοῦ - τίνα τιμωÏήσεταί τις καὶ ὃν Ï„Ïόπον á¼Î¾á½³ÏƒÏ„αι σκοπεῖν· Ï€Ïὶν δὲ - τὴν á¼€Ïχὴν á½€Ïθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι πεÏá½¶ τῆς - τελευτῆς á½Î½Ï„ινοῦν ποιεῖσθαι λόγον. - - Δημοσθένους, Γ´ ᾿Ολυνθιακὸς - -Georgian: - - From a Unicode conference invitation: - - გთხáƒáƒ•თ áƒáƒ®áƒšáƒáƒ•ე გáƒáƒ˜áƒáƒ áƒáƒ— რეგისტრáƒáƒªáƒ˜áƒ Unicode-ის მეáƒáƒ—ე სáƒáƒ”რთáƒáƒ¨áƒáƒ áƒ˜áƒ¡áƒ - კáƒáƒœáƒ¤áƒ”რენციáƒáƒ–ე დáƒáƒ¡áƒáƒ¡áƒ¬áƒ áƒ”ბáƒáƒ“, რáƒáƒ›áƒ”ლიც გáƒáƒ˜áƒ›áƒáƒ áƒ—ებრ10-12 მáƒáƒ áƒ¢áƒ¡, - ქ. მáƒáƒ˜áƒœáƒªáƒ¨áƒ˜, გერმáƒáƒœáƒ˜áƒáƒ¨áƒ˜. კáƒáƒœáƒ¤áƒ”რენცირშეჰკრებს ერთáƒáƒ“ მსáƒáƒ¤áƒšáƒ˜áƒáƒ¡ - ექსპერტებს ისეთ დáƒáƒ áƒ’ებში რáƒáƒ’áƒáƒ áƒ˜áƒªáƒáƒ ინტერნეტი დრUnicode-ი, - ინტერნáƒáƒªáƒ˜áƒáƒœáƒáƒšáƒ˜áƒ–áƒáƒªáƒ˜áƒ დრლáƒáƒ™áƒáƒšáƒ˜áƒ–áƒáƒªáƒ˜áƒ, Unicode-ის გáƒáƒ›áƒáƒ§áƒ”ნებრ- áƒáƒžáƒ”რáƒáƒªáƒ˜áƒ£áƒš სისტემებსáƒ, დრგáƒáƒ›áƒáƒ§áƒ”ნებით პრáƒáƒ’რáƒáƒ›áƒ”ბში, შრიფტებში, - ტექსტების დáƒáƒ›áƒ£áƒ¨áƒáƒ•ებáƒáƒ¡áƒ დრმრáƒáƒ•áƒáƒšáƒ”ნáƒáƒ•áƒáƒœ კáƒáƒ›áƒžáƒ˜áƒ£áƒ¢áƒ”რულ სისტემებში. - -Russian: - - From a Unicode conference invitation: - - ЗарегиÑтрируйтеÑÑŒ ÑÐµÐ¹Ñ‡Ð°Ñ Ð½Ð° ДеÑÑтую Международную Конференцию по - Unicode, ÐºÐ¾Ñ‚Ð¾Ñ€Ð°Ñ ÑоÑтоитÑÑ 10-12 марта 1997 года в Майнце в Германии. - ÐšÐ¾Ð½Ñ„ÐµÑ€ÐµÐ½Ñ†Ð¸Ñ Ñоберет широкий круг ÑкÑпертов по вопроÑам глобального - Интернета и Unicode, локализации и интернационализации, воплощению и - применению Unicode в различных операционных ÑиÑтемах и программных - приложениÑÑ…, шрифтах, верÑтке и многоÑзычных компьютерных ÑиÑтемах. - -Thai (UCS Level 2): - - Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese - classic 'San Gua'): - - [----------------------------|------------------------] - ๠à¹à¸œà¹ˆà¸™à¸”ินฮั่นเสื่อมโทรมà¹à¸ªà¸™à¸ªà¸±à¸‡à¹€à¸§à¸Š พระปà¸à¹€à¸à¸¨à¸à¸­à¸‡à¸šà¸¹à¹Šà¸à¸¹à¹‰à¸‚ึ้นใหม่ - สิบสองà¸à¸©à¸±à¸•ริย์à¸à¹ˆà¸­à¸™à¸«à¸™à¹‰à¸²à¹à¸¥à¸–ัดไป สององค์ไซร้โง่เขลาเบาปัà¸à¸à¸² - ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนัà¸à¸«à¸™à¸² - โฮจิ๋นเรียà¸à¸—ัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัภ- เหมือนขับไสไล่เสือจาà¸à¹€à¸„หา รับหมาป่าเข้ามาเลยอาสัภ- à¸à¹ˆà¸²à¸¢à¸­à¹‰à¸­à¸‡à¸­à¸¸à¹‰à¸™à¸¢à¸¸à¹à¸¢à¸à¹ƒà¸«à¹‰à¹à¸•à¸à¸à¸±à¸™ ใช้สาวนั้นเป็นชนวนชื่นชวนใจ - พลันลิฉุยà¸à¸¸à¸¢à¸à¸µà¸à¸¥à¸±à¸šà¸à¹ˆà¸­à¹€à¸«à¸•ุ ช่างอาเพศจริงหนาฟ้าร้องไห้ - ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูà¸à¸¹à¹‰à¸šà¸£à¸£à¸¥à¸±à¸‡à¸à¹Œ ฯ - - (The above is a two-column text. If combining characters are handled - correctly, the lines of the second column should be aligned with the - | character above.) - -Ethiopian: - - Proverbs in the Amharic language: - - ሰማይ አይታረስ ንጉሥ አይከሰስᢠ- ብላ ካለአእንደአባቴ በቆመጠáŠá¢ - ጌጥ ያለቤቱ á‰áˆáŒ¥áŠ“ áŠá‹á¢ - ደሀ በሕáˆáˆ™ ቅቤ ባይጠጣ ንጣት በገደለá‹á¢ - የአá ወለáˆá‰³ በቅቤ አይታሽáˆá¢ - አይጥ በበላ ዳዋ ተመታᢠ- ሲተረጉሙ ይደረáŒáˆ™á¢ - ቀስ በቀስᥠዕንá‰áˆ‹áˆ በእáŒáˆ© ይሄዳáˆá¢ - ድር ቢያብር አንበሳ ያስርᢠ- ሰዠእንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርáˆá¢ - እáŒá‹œáˆ­ የከáˆá‰°á‹áŠ• ጉሮሮ ሳይዘጋዠአይድርáˆá¢ - የጎረቤት ሌባᥠቢያዩት ይስቅ ባያዩት ያጠáˆá‰…ᢠ- ሥራ ከመáታት áˆáŒ„ን ላá‹á‰³á‰µá¢ - ዓባይ ማደሪያ የለá‹á¥ áŒáŠ•á‹µ ይዞ ይዞራáˆá¢ - የእስላሠአገሩ መካ የአሞራ አገሩ ዋርካᢠ- ተንጋሎ ቢተበተመáˆáˆ¶ ባá‰á¢ - ወዳጅህ ማር ቢሆን ጨርስህ አትላሰá‹á¢ - እáŒáˆ­áˆ…ን በáራሽህ áˆáŠ­ ዘርጋᢠ- -Runes: - - ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛠᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ áš¹á›áš¦ ᚦᚪ ᚹᛖᛥᚫ - - (Old English, which transcribed into Latin reads 'He cwaeth that he - bude thaem lande northweardum with tha Westsae.' and means 'He said - that he lived in the northern land near the Western Sea.') - -Braille: - - ⡌â â §â ‘ â ¼â â ’ â¡â œâ ‡â ‘⠹⠰⠎ ⡣⠕⠌ - - â¡â œâ ‡â ‘â ¹ â ºâ â Ž ⠙⠑â â ™â ’ â žâ • ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ â Šâ Ž â â • ⠙⠳⠃⠞ - â ±â â žâ ‘â §â » â â ƒâ ³â ž â ¹â â žâ ² ⡹⠑ ⠗⠑⠛⠊⠌⠻ â •â ‹ ⠙⠊⠎ ⠃⠥⠗⠊â â ‡ â ºâ â Ž - â Žâ Šâ ›â â « ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹â â â â ‚ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ â ¥â â ™â »â žâ â …⠻⠂ - â â â ™ ⠹⠑ â ¡â Šâ ‘â ‹ â â ³â —â â »â ² ⡎⠊⠗⠕⠕⠛⠑ â Žâ Šâ ›â â « â Šâ žâ ² â¡â â ™ - ⡎⠊⠗⠕⠕⠛⠑⠰⠎ â â â â ‘ â ºâ â Ž ⠛⠕⠕⠙ â ¥â â •â  â °â¡¡â â â ›â ‘â ‚ â ‹â •â — â â â ¹â ¹â ”â › ⠙⠑ - â ¡â •â Žâ ‘ â žâ • â â ¥â ž ⠙⠊⠎ â ™â â â ™ â žâ •â ² - - ⡕⠇⠙ â¡â œâ ‡â ‘â ¹ â ºâ â Ž â â Ž ⠙⠑â â ™ â â Ž â  â ™â •â •â —â ¤â â â Šâ ‡â ² - - â¡â ”⠙⠖ ⡊ ⠙⠕â â °â ž â â ‘â â  â žâ • â Žâ â ¹ â ¹â â ž ⡊ â …â â ªâ ‚ â •â ‹ â â ¹ - â ªâ  â …â â ªâ ‡â «â ›â ‘â ‚ â ±â â ž ⠹⠻⠑ â Šâ Ž â â œâ žâ Šâ Šâ ¥â ‡â œâ ‡â ¹ ⠙⠑â â ™ â â ƒâ ³â ž - â  â ™â •â •â —â ¤â â â Šâ ‡â ² ⡊ â â Šâ £â ž â ™â â §â ‘ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ â â ¹â Žâ ‘⠇⠋⠂ â žâ • - ⠗⠑⠛⠜⠙ â  â Šâ •â ‹â ‹â ”â ¤â â â Šâ ‡ â â Ž ⠹⠑ ⠙⠑â â ™â ‘â Œ â â Šâ ‘â Šâ ‘ â •â ‹ â Šâ —â •â â â •â â ›â »â ¹ - â ” ⠹⠑ â žâ —â â ™â ‘â ² ⡃⠥⠞ ⠹⠑ â ºâ Šâ Žâ ™â •â  â •â ‹ ⠳⠗ â â â Šâ ‘⠌⠕⠗⠎ - â Šâ Ž â ” ⠹⠑ â Žâ Šâ â Šâ ‡â ‘â † â â â ™ â â ¹ â ¥â â ™â â ‡â ‡â ªâ « â ™â â â ™â Ž - â ©â â ‡â ‡ â â •â ž ⠙⠊⠌⠥⠗⠃ â Šâ žâ ‚ â •â — ⠹⠑ ⡊⠳â â žâ —⠹⠰⠎ ⠙⠕â â ‘ â ‹â •â —â ² ⡹⠳ - ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ â â »â â Šâ ž â â ‘ â žâ • â —â ‘â â ‘â â žâ ‚ â ‘â â â ™â â žâ Šâ Šâ â ‡â ‡â ¹â ‚ â ¹â â ž - â¡â œâ ‡â ‘â ¹ â ºâ â Ž â â Ž ⠙⠑â â ™ â â Ž â  â ™â •â •â —â ¤â â â Šâ ‡â ² - - (The first couple of paragraphs of "A Christmas Carol" by Dickens) - -Compact font selection example text: - - ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 - abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ - –—‘“â€â€žâ€ â€¢â€¦â€°â„¢Å“ŠŸž€ ΑΒΓΔΩαβγδω ÐБВГДабвгд - ∀∂∈â„∧∪≡∞ ↑↗↨↻⇣ â”┼╔╘░►☺♀ ï¬ï¿½â‘€â‚‚ἠḂӥẄÉËâŽ×Աრ- -Greetings in various languages: - - Hello world, ΚαλημέÏα κόσμε, コンニãƒãƒ - -Box drawing alignment tests: â–ˆ - â–‰ - â•”â•â•╦â•â•â•— ┌──┬──┠╭──┬──╮ ╭──┬──╮ â”â”â”┳â”â”┓ ┎┒â”┑ â•· â•» â”┯┓ ┌┰┠▊ ╱╲╱╲╳╳╳ - ║┌─╨─â”â•‘ │╔â•â•§â•╗│ │╒â•╪â•╕│ │╓─â•─╖│ ┃┌─╂─â”┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ â”╋┥ â–‹ ╲╱╲╱╳╳╳ - ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ â•¿ │┃ â”╅╆┓ ╵ ╹ â”—â”·â”› └┸┘ â–Œ ╱╲╱╲╳╳╳ - â• â•¡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┠╎ â”┅┅┓ ┋ ■╲╱╲╱╳╳╳ - ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╠┇ ┋ â–Ž - ║└─╥─┘║ │╚â•╤â•â•│ │╘â•╪â•╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╠┇ ┋ â– - ╚â•â•â•©â•â•╠└──┴──┘ ╰──┴──╯ ╰──┴──╯ â”—â”â”â”»â”â”â”› ▗▄▖▛▀▜ └╌╌┘ ╎ â”—â•â•â”› ┋ â–▂▃▄▅▆▇█ - â–▀▘▙▄▟ diff --git a/src/utils/workqueue.cpp b/src/utils/workqueue.cpp deleted file mode 100644 index aba5fd1e..00000000 --- a/src/utils/workqueue.cpp +++ /dev/null @@ -1,121 +0,0 @@ -/* Copyright (C) 2014 J.F.Dockes - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - */ -// Test program for the workqueue module - -#include -#include -#include -#include - -#include "safeunistd.h" - -#include "workqueue.h" - -static char *thisprog; - -static char usage [] = -" \n\n" -; -static void -Usage(void) -{ - fprintf(stderr, "%s: usage:\n%s", thisprog, usage); - exit(1); -} - -static int op_flags; -#define OPT_MOINS 0x1 -#define OPT_s 0x2 -#define OPT_b 0x4 - -class Task { -public: - Task() - : m_id(o_id++) - {} - int m_id; - static int o_id; -}; -int Task::o_id; - -void *worker(void *vtp) -{ - fprintf(stderr, "Worker working\n"); - WorkQueue *tqp = (WorkQueue *)vtp; - Task tsk; - for (;;) { - if (!tqp->take(&tsk)) { - fprintf(stderr, "Worker: take failed\n"); - return (void*)0; - } - fprintf(stderr, "WORKER: got task %d\n", tsk.m_id); - if (tsk.m_id > 20) { - tqp->workerExit(); - break; - } - } - return (void*)1; -} - -int main(int argc, char **argv) -{ - int count = 10; - - thisprog = argv[0]; - argc--; argv++; - - while (argc > 0 && **argv == '-') { - (*argv)++; - if (!(**argv)) - /* Cas du "adb - core" */ - Usage(); - while (**argv) - switch (*(*argv)++) { - case 's': op_flags |= OPT_s; break; - case 'b': op_flags |= OPT_b; if (argc < 2) Usage(); - if ((sscanf(*(++argv), "%d", &count)) != 1) - Usage(); - argc--; - goto b1; - default: Usage(); break; - } - b1: argc--; argv++; - } - - if (argc != 0) - Usage(); - - WorkQueue wq("testwq", 10); - - if (!wq.start(2, &worker, &wq)) { - fprintf(stderr, "Start failed\n"); - exit(1); - } - - for (;;) { - Task tsk; - fprintf(stderr, "BOSS: put task %d\n", tsk.m_id); - if (!wq.put(tsk)) { - fprintf(stderr, "Boss: put failed\n"); - exit(1); - } - if ((tsk.m_id % 10) == 0) - sleep(1); - } - exit(0); -} - From 1acc059eeabbf2a51615bb21e4b406386e8f6a86 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Thu, 13 Jan 2022 14:33:37 +0100 Subject: [PATCH 11/19] indents etc --- src/index/rclmonprc.cpp | 91 ++++++++++++++++++----------------------- 1 file changed, 39 insertions(+), 52 deletions(-) diff --git a/src/index/rclmonprc.cpp b/src/index/rclmonprc.cpp index 56251456..77ff26ec 100644 --- a/src/index/rclmonprc.cpp +++ b/src/index/rclmonprc.cpp @@ -1,7 +1,7 @@ #include "autoconfig.h" #ifdef RCL_MONITOR -/* Copyright (C) 2006 J.F.Dockes +/* Copyright (C) 2006-2022 J.F.Dockes * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -122,7 +122,7 @@ struct DelayPat { */ class RclEQData { public: - int m_opts; + int m_opts{0}; // Queue for normal files (unlimited reindex) queue_type m_iqueue; // Queue for delayed reindex files @@ -135,27 +135,21 @@ public: delays_type m_delays; // Configured intervals for path patterns, read from the configuration. vector m_delaypats; - RclConfig *m_config; - bool m_ok; + RclConfig *m_config{nullptr}; + bool m_ok{true}; std::mutex m_mutex; std::condition_variable m_cond; - RclEQData() - : m_config(0), m_ok(true) - { - } void readDelayPats(int dfltsecs); - DelayPat searchDelayPats(const string& path) - { - for (vector::iterator it = m_delaypats.begin(); - it != m_delaypats.end(); it++) { - if (fnmatch(it->pattern.c_str(), path.c_str(), 0) == 0) { - return *it; - } + DelayPat searchDelayPats(const string& path) { + for (const auto& dpat: m_delaypats) { + if (fnmatch(dpat.pattern.c_str(), path.c_str(), 0) == 0) { + return dpat; } - return DelayPat(); } + return DelayPat(); + } void delayInsert(const queue_type::iterator &qit); }; @@ -170,22 +164,21 @@ void RclEQData::readDelayPats(int dfltsecs) vector dplist; if (!stringToStrings(patstring, dplist)) { - LOGERR("rclEQData: bad pattern list: [" << (patstring) << "]\n" ); + LOGERR("rclEQData: bad pattern list: [" << patstring << "]\n"); return; } - for (vector::iterator it = dplist.begin(); - it != dplist.end(); it++) { - string::size_type pos = it->find_last_of(":"); + for (const auto& entry : dplist) { + string::size_type pos = entry.find_last_of(":"); DelayPat dp; - dp.pattern = it->substr(0, pos); - if (pos != string::npos && pos != it->size()-1) { - dp.seconds = atoi(it->substr(pos+1).c_str()); + dp.pattern = entry.substr(0, pos); + if (pos != string::npos && pos != entry.size() - 1) { + dp.seconds = atoi(entry.substr(pos+1).c_str()); } else { dp.seconds = dfltsecs; } m_delaypats.push_back(dp); - LOGDEB2("rclmon::readDelayPats: add [" << (dp.pattern) << "] " << (dp.seconds) << "\n" ); + LOGDEB2("rclmon::readDelayPats: add [" << dp.pattern << "] " << dp.seconds << "\n"); } } @@ -194,10 +187,8 @@ void RclEQData::readDelayPats(int dfltsecs) // when necessary. void RclEQData::delayInsert(const queue_type::iterator &qit) { - MONDEB("RclEQData::delayInsert: minclock " << qit->second.m_minclock << - std::endl); - for (delays_type::iterator dit = m_delays.begin(); - dit != m_delays.end(); dit++) { + MONDEB("RclEQData::delayInsert: minclock " << qit->second.m_minclock << "\n"); + for (delays_type::iterator dit = m_delays.begin(); dit != m_delays.end(); dit++) { queue_type::iterator qit1 = *dit; if ((*qit1).second.m_minclock > qit->second.m_minclock) { m_delays.insert(dit, qit); @@ -230,7 +221,7 @@ std::unique_lock RclMonEventQueue::wait(int seconds, bool *top) { std::unique_lock lock(m_data->m_mutex); - MONDEB("RclMonEventQueue::wait, seconds: " << seconds << std::endl); + MONDEB("RclMonEventQueue::wait, seconds: " << seconds << "\n"); if (!empty()) { MONDEB("RclMonEventQueue:: immediate return\n"); return lock; @@ -310,7 +301,7 @@ bool RclMonEventQueue::empty() // first, earliest one): queue_type::iterator qit = *(m_data->m_delays.begin()); if (qit->second.m_minclock > time(0)) { - MONDEB("RclMonEventQueue::empty(): true (no delay ready " << + MONDEB("RclMonEventQueue::empty(): true (no delay ready " << qit->second.m_minclock << ")\n"); return true; } @@ -324,7 +315,7 @@ bool RclMonEventQueue::empty() RclMonEvent RclMonEventQueue::pop() { time_t now = time(0); - MONDEB("RclMonEventQueue::pop(), now " << now << std::endl); + MONDEB("RclMonEventQueue::pop(), now " << now << "\n"); // Look at the delayed events, get rid of the expired/unactive // ones, possibly return an expired/needidx one. @@ -332,7 +323,7 @@ RclMonEvent RclMonEventQueue::pop() delays_type::iterator dit = m_data->m_delays.begin(); queue_type::iterator qit = *dit; MONDEB("RclMonEventQueue::pop(): in delays: evt minclock " << - qit->second.m_minclock << std::endl); + qit->second.m_minclock << "\n"); if (qit->second.m_minclock <= now) { if (qit->second.m_needidx) { RclMonEvent ev = qit->second; @@ -371,7 +362,7 @@ RclMonEvent RclMonEventQueue::pop() // special processing to limit their reindexing rate. bool RclMonEventQueue::pushEvent(const RclMonEvent &ev) { - MONDEB("RclMonEventQueue::pushEvent for " << ev.m_path << std::endl); + MONDEB("RclMonEventQueue::pushEvent for " << ev.m_path << "\n"); std::unique_lock lock(m_data->m_mutex); DelayPat pat = m_data->searchDelayPats(ev.m_path); @@ -381,8 +372,7 @@ bool RclMonEventQueue::pushEvent(const RclMonEvent &ev) queue_type::iterator qit = m_data->m_dqueue.find(ev.m_path); if (qit == m_data->m_dqueue.end()) { // Not there yet, insert new - qit = - m_data->m_dqueue.insert(queue_type::value_type(ev.m_path, ev)).first; + qit = m_data->m_dqueue.insert(queue_type::value_type(ev.m_path, ev)).first; // Set the time to next index to "now" as it has not been // indexed recently (otherwise it would still be in the // queue), and add the iterator to the delay queue. @@ -491,7 +481,7 @@ bool startMonitor(RclConfig *conf, int opts) #ifndef _WIN32 bool x11dead = !(opts & RCLMON_NOX11) && !x11IsAlive(); if (x11dead) - LOGDEB("RclMonprc: x11 is dead\n" ); + LOGDEB("RclMonprc: x11 is dead\n"); #else bool x11dead = false; #endif @@ -512,26 +502,24 @@ bool startMonitor(RclConfig *conf, int opts) modified.push_back(ev.m_path); break; case RclMonEvent::RCLEVT_DELETE: - LOGDEB0("Monitor: Delete on " << (ev.m_path) << "\n" ); - // If this is for a directory (which the caller should - // tell us because he knows), we should purge the db - // of all the subtree, because on a directory rename, - // inotify will only generate one event for the - // renamed top, not the subentries. This is relatively - // complicated to do though, and we currently do not - // do it, and just wait for a restart to do a full run and - // purge. + LOGDEB0("Monitor: Delete on " << ev.m_path << "\n"); + // If this is for a directory (which the caller should tell us because he + // knows), we should purge the db of all the subtree entries, because on a + // directory rename, inotify will only generate one event for the renamed top, + // not the subentries. The entries from the new subtree are updated when the + // monitor walks it on the DIRCREATE event. + // If the monitor does not have the ISDIR info, we'll just wait for a restart to + // do a full run and purge, no big deal. deleted.push_back(ev.m_path); if (ev.evflags() & RclMonEvent::RCLEVT_ISDIR) { vector paths; if (subtreelist(conf, ev.m_path, paths)) { - deleted.insert(deleted.end(), - paths.begin(), paths.end()); + deleted.insert(deleted.end(), paths.begin(), paths.end()); } } break; default: - LOGDEB("Monitor: got Other on [" << (ev.m_path) << "]\n" ); + LOGDEB("Monitor: got Other on [" << ev.m_path << "]\n"); } } } @@ -579,7 +567,7 @@ bool startMonitor(RclConfig *conf, int opts) #ifndef _WIN32 // Check for a config change if (!(opts & RCLMON_NOCONFCHECK) && o_reexec && conf->sourceChanged()) { - LOGDEB("Rclmonprc: config changed, reexecuting myself\n" ); + LOGDEB("Rclmonprc: config changed, reexecuting myself\n"); // We never want to have a -n option after a config // change. -n was added by the reexec after the initial // pass even if it was not given on the command line @@ -588,7 +576,7 @@ bool startMonitor(RclConfig *conf, int opts) } #endif // ! _WIN32 } - LOGDEB("Rclmonprc: calling queue setTerminate\n" ); + LOGDEB("Rclmonprc: calling queue setTerminate\n"); rclEQ.setTerminate(); // We used to wait for the receiver thread here before returning, @@ -596,9 +584,8 @@ bool startMonitor(RclConfig *conf, int opts) // during our limited time window for exiting. To be reviewed if // we ever need several monitor invocations in the same process // (can't foresee any reason why we'd want to do this). - LOGDEB("Monitor: returning\n" ); + LOGDEB("Monitor: returning\n"); return true; } #endif // RCL_MONITOR - From 4c3ef660189505aa1148f62a6b8a9366735bb688 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Thu, 13 Jan 2022 17:33:37 +0000 Subject: [PATCH 12/19] Windows filesystem monitoring appears to be working, run in foreground in a terminal --- src/common/autoconfig-win.h | 2 +- src/index/rclmonprc.cpp | 14 +- src/index/rclmonrcv.cpp | 267 +++++++++++++++++++----------------- src/index/subtreelist.cpp | 13 +- src/utils/rclutil.cpp | 35 +++-- src/utils/rclutil.h | 3 + 6 files changed, 186 insertions(+), 148 deletions(-) diff --git a/src/common/autoconfig-win.h b/src/common/autoconfig-win.h index d1852cf8..8c589ecc 100644 --- a/src/common/autoconfig-win.h +++ b/src/common/autoconfig-win.h @@ -133,7 +133,7 @@ /* #undef PUTENV_ARG_CONST */ /* Real time monitoring option */ -#undef RCL_MONITOR +#define RCL_MONITOR 1 /* Split camelCase words */ /* #undef RCL_SPLIT_CAMELCASE */ diff --git a/src/index/rclmonprc.cpp b/src/index/rclmonprc.cpp index 56251456..e3a46459 100644 --- a/src/index/rclmonprc.cpp +++ b/src/index/rclmonprc.cpp @@ -512,7 +512,7 @@ bool startMonitor(RclConfig *conf, int opts) modified.push_back(ev.m_path); break; case RclMonEvent::RCLEVT_DELETE: - LOGDEB0("Monitor: Delete on " << (ev.m_path) << "\n" ); + LOGDEB0("Monitor: Delete on " << ev.m_path << "\n"); // If this is for a directory (which the caller should // tell us because he knows), we should purge the db // of all the subtree, because on a directory rename, @@ -522,16 +522,20 @@ bool startMonitor(RclConfig *conf, int opts) // do it, and just wait for a restart to do a full run and // purge. deleted.push_back(ev.m_path); - if (ev.evflags() & RclMonEvent::RCLEVT_ISDIR) { +#ifndef _WIN32 + // We don't know the type of deleted entries on + // win32. So do the subtree things always. + if (ev.evflags() & RclMonEvent::RCLEVT_ISDIR) +#endif + { vector paths; if (subtreelist(conf, ev.m_path, paths)) { - deleted.insert(deleted.end(), - paths.begin(), paths.end()); + deleted.insert(deleted.end(), paths.begin(), paths.end()); } } break; default: - LOGDEB("Monitor: got Other on [" << (ev.m_path) << "]\n" ); + LOGDEB("Monitor: got Other on [" << ev.m_path << "]\n"); } } } diff --git a/src/index/rclmonrcv.cpp b/src/index/rclmonrcv.cpp index 80a1f7e4..dbcbb000 100644 --- a/src/index/rclmonrcv.cpp +++ b/src/index/rclmonrcv.cpp @@ -60,12 +60,10 @@ /** * Recoll real time monitor event receiver. This file has code to interface - * to FAM or inotify and place events on the event queue. + * to FAM, inotify, etc. and place events on the event queue. */ -/** A small virtual interface for monitors. Lets - * either fam/gamin or raw imonitor hide behind - */ +/** Virtual interface for the actual filesystem monitoring module. */ class RclMonitor { public: RclMonitor() {} @@ -75,8 +73,12 @@ public: virtual bool getEvent(RclMonEvent& ev, int msecs = -1) = 0; virtual bool ok() const = 0; // Does this monitor generate 'exist' events at startup? - virtual bool generatesExist() const = 0; - + virtual bool generatesExist() const { + return false; + } + virtual bool isRecursive() const { + return false; + } // Save significant errno after monitor calls int saved_errno{0}; }; @@ -126,9 +128,8 @@ public: if (!m_mon || !m_mon->ok()) return FsTreeWalker::FtwError; // We do nothing special if addWatch fails for a reasonable reason - if (!m_mon->addWatch(fn, true)) { - if (m_mon->saved_errno != EACCES && - m_mon->saved_errno != ENOENT) { + if (!m_mon->isRecursive() && !m_mon->addWatch(fn, true)) { + if (m_mon->saved_errno != EACCES && m_mon->saved_errno != ENOENT) { LOGINF("walkerCB: addWatch failed\n"); return FsTreeWalker::FtwError; } @@ -144,8 +145,8 @@ public: // monitoring ? There should be another way: maybe start // monitoring without actually handling events (just // queue), then run incremental then start handling - // events ? But we also have to do it on a directory - // move! So keep it + // events ? ** But we also have to do it on a directory + // move! So keep it ** We could probably skip it on the initial run though. RclMonEvent ev; ev.m_path = fn; ev.m_etyp = RclMonEvent::RCLEVT_MODIFY; @@ -161,6 +162,96 @@ private: FsTreeWalker& m_walker; }; +static bool rclMonAddTopWatches( + FsTreeWalker& walker, RclConfig& lconfig, RclMonitor *mon, RclMonEventQueue *queue) +{ + // Get top directories from config. Special monitor sublist if + // set, else full list. + vector tdl = lconfig.getTopdirs(true); + if (tdl.empty()) { + LOGERR("rclMonRcvRun:: top directory list (topdirs param.) not found " + "in configuration or topdirs list parse error"); + queue->setTerminate(); + return false; + } + // Walk the directory trees to add watches + WalkCB walkcb(&lconfig, mon, queue, walker); + for (const auto& dir : tdl) { + lconfig.setKeyDir(dir); + // Adjust the follow symlinks options + bool follow; + if (lconfig.getConfParam("followLinks", &follow) && follow) { + walker.setOpts(FsTreeWalker::FtwFollow); + } else { + walker.setOpts(FsTreeWalker::FtwOptNone); + } + if (path_isdir(dir, follow)) { + LOGDEB("rclMonRcvRun: walking " << dir << "\n"); + // If the fs watcher is recursive, we add the watches for the topdirs here, and walk the + // tree just for generating initial events. + if (mon->isRecursive() && !mon->addWatch(dir, true)) { + if (mon->saved_errno != EACCES && mon->saved_errno != ENOENT) { + LOGERR("rclMonAddTopWatches: addWatch failed for [" << dir << "]\n"); + return false; + } + } + if (walker.walk(dir, walkcb) != FsTreeWalker::FtwOk) { + LOGERR("rclMonRcvRun: tree walk failed\n"); + return false; + } + if (walker.getErrCnt() > 0) { + LOGINFO("rclMonRcvRun: fs walker errors: " << walker.getReason() << "\n"); + } + } else { + // We have to special-case regular files which are part of the topdirs list because the + // tree walker only adds watches for directories + if (!mon->addWatch(dir, false)) { + LOGSYSERR("rclMonRcvRun", "addWatch", dir); + } + } + } + + bool doweb = false; + lconfig.getConfParam("processwebqueue", &doweb); + if (doweb) { + string webqueuedir = lconfig.getWebQueueDir(); + if (!mon->addWatch(webqueuedir, true)) { + LOGERR("rclMonRcvRun: addwatch (webqueuedir) failed\n"); + if (mon->saved_errno != EACCES && mon->saved_errno != ENOENT) + return false; + } + } + return true; +} + +static bool rclMonAddSubWatches( + const std::string& path, FsTreeWalker& walker, RclConfig& lconfig, + RclMonitor *mon, RclMonEventQueue *queue) +{ + WalkCB walkcb(&lconfig, mon, queue, walker); + if (walker.walk(path, walkcb) != FsTreeWalker::FtwOk) { + LOGERR("rclMonRcvRun: walking new dir " << path << " : " << walker.getReason() << "\n"); + return false; + } + if (walker.getErrCnt() > 0) { + LOGINFO("rclMonRcvRun: fs walker errors: " << walker.getReason() << "\n"); + } + return true; +} + +// Don't push events for skipped files. This would get filtered on the processing side +// anyway, but causes unnecessary wakeups and messages. Do not test skippedPaths here, +// this would be incorrect (because a topdir can be under a skippedPath and this was +// handled while adding the watches). Also we let the other side process onlyNames. +static bool rclMonShouldSkip(const std::string& path, RclConfig& lconfig, FsTreeWalker& walker) +{ + lconfig.setKeyDir(path_getfather(path)); + walker.setSkippedNames(lconfig.getSkippedNames()); + if (walker.inSkippedNames(path_getsimple(path))) + return true; + return false; +} + // Main thread routine: create watches, then forever wait for and queue events void *rclMonRcvRun(void *q) { @@ -181,100 +272,34 @@ void *rclMonRcvRun(void *q) return 0; } - // Get top directories from config. Special monitor sublist if - // set, else full list. - vector tdl = lconfig.getTopdirs(true); - if (tdl.empty()) { - LOGERR("rclMonRcvRun:: top directory list (topdirs param.) not found " - "in configuration or topdirs list parse error"); - queue->setTerminate(); - return 0; - } - - // Walk the directory trees to add watches FsTreeWalker walker; walker.setSkippedPaths(lconfig.getDaemSkippedPaths()); - WalkCB walkcb(&lconfig, mon, queue, walker); - for (const auto& dir : tdl) { - lconfig.setKeyDir(dir); - // Adjust the follow symlinks options - bool follow; - if (lconfig.getConfParam("followLinks", &follow) && - follow) { - walker.setOpts(FsTreeWalker::FtwFollow); - } else { - walker.setOpts(FsTreeWalker::FtwOptNone); - } - // We have to special-case regular files which are part of the topdirs - // list because we the tree walker only adds watches for directories - if (path_isdir(dir, follow)) { - LOGDEB("rclMonRcvRun: walking " << dir << "\n"); - if (walker.walk(dir, walkcb) != FsTreeWalker::FtwOk) { - LOGERR("rclMonRcvRun: tree walk failed\n"); - goto terminate; - } - if (walker.getErrCnt() > 0) { - LOGINFO("rclMonRcvRun: fs walker errors: " << walker.getReason() << "\n"); - } - } else { - if (!mon->addWatch(dir, false)) { - LOGSYSERR("rclMonRcvRun", "addWatch", dir); - } - } - } - { - bool doweb = false; - lconfig.getConfParam("processwebqueue", &doweb); - if (doweb) { - string webqueuedir = lconfig.getWebQueueDir(); - if (!mon->addWatch(webqueuedir, true)) { - LOGERR("rclMonRcvRun: addwatch (webqueuedir) failed\n"); - if (mon->saved_errno != EACCES && mon->saved_errno != ENOENT) - goto terminate; - } - } + if (!rclMonAddTopWatches(walker, lconfig, mon, queue)) { + LOGERR("rclMonRcvRun: addtopwatches failed\n"); + goto terminate; } // Forever wait for monitoring events and add them to queue: MONDEB("rclMonRcvRun: waiting for events. q->ok(): " << queue->ok() << "\n"); while (queue->ok() && mon->ok()) { RclMonEvent ev; - // Note: I could find no way to get the select - // call to return when a signal is delivered to the process - // (it goes to the main thread, from which I tried to close or - // write to the select fd, with no effect). So set a - // timeout so that an intr will be detected + // Note: I could find no way to get the select call to return when a signal is delivered to + // the process (it goes to the main thread, from which I tried to close or write to the + // select fd, with no effect). So set a timeout so that an intr will be detected if (mon->getEvent(ev, 2000)) { - // Don't push events for skipped files. This would get - // filtered on the processing side anyway, but causes - // unnecessary wakeups and messages. Do not test - // skippedPaths here, this would be incorrect (because a - // topdir can be under a skippedPath and this was handled - // while adding the watches). - // Also we let the other side process onlyNames. - lconfig.setKeyDir(path_getfather(ev.m_path)); - walker.setSkippedNames(lconfig.getSkippedNames()); - if (walker.inSkippedNames(path_getsimple(ev.m_path))) + if (rclMonShouldSkip(ev.m_path, lconfig, walker)) continue; if (ev.m_etyp == RclMonEvent::RCLEVT_DIRCREATE) { - // Recursive addwatch: there may already be stuff - // inside this directory. Ie: files were quickly - // created, or this is actually the target of a - // directory move. This is necessary for inotify, but - // it seems that fam/gamin is doing the job for us so - // that we are generating double events here (no big - // deal as prc will sort/merge). + // Recursive addwatch: there may already be stuff inside this directory. E.g.: files + // were quickly created, or this is actually the target of a directory move. This is + // necessary for inotify, but it seems that fam/gamin is doing the job for us so + // that we are generating double events here (no big deal as prc will sort/merge). LOGDEB("rclMonRcvRun: walking new dir " << ev.m_path << "\n"); - if (walker.walk(ev.m_path, walkcb) != FsTreeWalker::FtwOk) { - LOGERR("rclMonRcvRun: walking new dir " << ev.m_path << - " : " << walker.getReason() << "\n"); + if (!rclMonAddSubWatches(ev.m_path, walker, lconfig, mon, queue)) { goto terminate; } - if (walker.getErrCnt() > 0) { - LOGINFO("rclMonRcvRun: fs walker errors: " << walker.getReason() << "\n"); - } } if (ev.m_etyp != RclMonEvent::RCLEVT_NONE) @@ -298,7 +323,7 @@ bool eraseWatchSubTree(map& idtopath, const string& top) while (it != idtopath.end()) { if (it->second.find(top) == 0) { found = true; - idtopath.erase(it++); + it = idtopath.erase(it); } else { it++; } @@ -557,7 +582,6 @@ public: virtual bool addWatch(const string& path, bool isdir); virtual bool getEvent(RclMonEvent& ev, int msecs = -1); bool ok() const {return m_ok;} - virtual bool generatesExist() const {return false;} private: bool m_ok; @@ -749,22 +773,17 @@ bool RclIntf::getEvent(RclMonEvent& ev, int msecs) #ifdef _WIN32 - /* - * WIN32 VERSION ISSUES: + * WIN32 VERSION NOTES: * - * - It appears that watching a subdirectory of a given directory - * prevents renaming the top directory, Windows says: can't rename - * because open or a file in it is open. This is a major issue of - * course. Check if this can be solved by using a recursive watch - * instead of setting watches on all subdirs. Would need a code - * changes in the "generic" part of course. - * - In general, directory renames need more studying. - * - Otherwise appears to more or less work... + * - When using non-recursive watches (one per dir), it appeared that + * watching a subdirectory of a given directory prevented renaming + * the top directory, Windows says: can't rename because open or a + * file in it is open. This is mostly why we use recursive watches + * on the topdirs only. */ #include #include -#include #include #include @@ -776,33 +795,29 @@ class RclFSWatchWin32; enum class Action {Add = 1, Delete = 2, Modify = 3, Move = 4}; +// Virtual interface for the monitor callback. Note: this for compatibility with the efsw code, as +// rclmon uses a pull, not push interface. The callback pushes the events to a local queue from +// which they are then pulled by the upper level code. class FileWatchListener { public: virtual ~FileWatchListener() {} - - /// Handles the action file action - /// @param watchid The watch id for the directory - /// @param dir The directory - /// @param filename The filename that was accessed (not full path) - /// @param action Action that was performed - /// @param oldFilename The name of the file or directory moved virtual void handleFileAction(WatchID watchid, const std::string& dir, const std::string& fn, Action action, bool isdir, std::string oldfn = "" ) = 0; }; -// Internal watch data +// Internal watch data. This piggy-back our actual data pointer to the MS overlapped pointer. This +// is a bit of a hack, and we could probably use event Ids instead. struct WatcherStructWin32 { OVERLAPPED Overlapped; WatcherWin32 *Watch; }; +// Actual data structure for one directory watch class WatcherWin32 { public: - WatcherWin32() {} - WatchID ID; - FileWatchListener *Listener; + FileWatchListener *Listener{nullptr}; bool Recursive; std::string DirName; std::string OldFileName; @@ -816,6 +831,7 @@ public: RclFSWatchWin32 *Watch{nullptr}; }; +// The efsw top level file system watcher: manages all the directory watches. class RclFSWatchWin32 { public: RclFSWatchWin32(); @@ -850,16 +866,14 @@ private: void removeAllWatches(); }; +// Adapter for the rclmon interface class RclMonitorWin32 : public RclMonitor, public FileWatchListener { public: - RclMonitorWin32() { - MONDEB("RclMonitorWin32::RclMonitorWin32\n"); - } virtual ~RclMonitorWin32() {} virtual bool addWatch(const string& path, bool /*isDir*/) override { MONDEB("RclMonitorWin32::addWatch: " << path << "\n"); - return m_fswatcher.addWatch(path, this, false) != -1; + return m_fswatcher.addWatch(path, this, true) != -1; } virtual bool getEvent(RclMonEvent& ev, int msecs = -1) { @@ -885,6 +899,10 @@ public: virtual bool generatesExist() const override { return false; } + // Can the caller avoid setting watches on subdirs ? + virtual bool isRecursive() const override { + return true; + } virtual void handleFileAction(WatchID watchid, const std::string& dir, const std::string& fn, Action action, bool isdir, std::string oldfn = "") { MONDEB("RclMonitorWin32::handleFileAction: dir [" << dir << "] fn [" << fn << "] act " << @@ -996,8 +1014,6 @@ RclFSWatchWin32::~RclFSWatchWin32() PostQueuedCompletionStatus(mIOCP, 0, reinterpret_cast(this), NULL); } - // delete mThread ?? - removeAllWatches(); CloseHandle(mIOCP); @@ -1010,7 +1026,7 @@ WatchID RclFSWatchWin32::addWatch(const std::string& _dir,FileWatchListener *wat path_slashize(dir); if (!path_isdir(dir)) { LOGDEB("RclFSWatchWin32::addWatch: not a directory: " << dir << "\n"); - return -1; + return 0; } if (!path_readable(dir)) { LOGINF("RclFSWatchWin32::addWatch: not readable: " << dir << "\n"); @@ -1063,7 +1079,7 @@ void RclFSWatchWin32::removeAllWatches() mWatches.clear(); } -/// Unpacks events and passes them to the event processor +// Unpacks events and passes them to the event processor void CALLBACK WatchCallback(DWORD dwNumberOfBytesTransfered, LPOVERLAPPED lpOverlapped) { if (dwNumberOfBytesTransfered == 0 || NULL == lpOverlapped) { @@ -1115,9 +1131,11 @@ void RclFSWatchWin32::run(DWORD msecs) } } -void RclFSWatchWin32::handleAction(WatcherWin32 *watch, const std::string& fn, unsigned long action) +void RclFSWatchWin32::handleAction(WatcherWin32 *watch, const std::string& _fn, unsigned long action) { + std::string fn(_fn); Action fwAction; + path_slashize(fn); MONDEB("handleAction: fn [" << fn << "] action " << action << "\n"); // In case fn is not a simple name but a relative path (probably @@ -1154,8 +1172,9 @@ void RclFSWatchWin32::handleAction(WatcherWin32 *watch, const std::string& fn, u case FILE_ACTION_RENAMED_NEW_NAME: { fwAction = Action::Move; - // If this is a directory, possibly update the watches. - // TBD: this seems wrong because we should process the whole subtree ? + // If this is a directory, possibly update the watches. TBD: this seems wrong because we + // should process the whole subtree ? Also probably not needed at all because we are + // recursive and only set watches on the top directories. if (isdir) { // Update the new directory path std::string oldpath = path_cat(watch->DirName, watch->OldFileName); diff --git a/src/index/subtreelist.cpp b/src/index/subtreelist.cpp index 40018aeb..4c4c2f9c 100644 --- a/src/index/subtreelist.cpp +++ b/src/index/subtreelist.cpp @@ -26,13 +26,18 @@ #include "subtreelist.h" #include "log.h" -bool subtreelist(RclConfig *config, const string& top, - vector& paths) +bool subtreelist(RclConfig *config, const string& _top, vector& paths) { - LOGDEB("subtreelist: top: [" << (top) << "]\n" ); + std::string top(_top); +#ifdef _WIN32 + // Need to convert c:path to /c/path because this is how paths are indexed + top = path_slashdrive(top); +#endif + + LOGDEB("subtreelist: top: [" << top << "]\n"); Rcl::Db rcldb(config); if (!rcldb.open(Rcl::Db::DbRO)) { - LOGERR("subtreelist: can't open database in [" << config->getDbDir() << + LOGERR("subtreelist: can't open index in [" << config->getDbDir() << "]: " << rcldb.getReason() << "\n"); return false; } diff --git a/src/utils/rclutil.cpp b/src/utils/rclutil.cpp index d33f79bb..239f446e 100644 --- a/src/utils/rclutil.cpp +++ b/src/utils/rclutil.cpp @@ -310,23 +310,30 @@ bool printableUrl(const string& fcharset, const string& in, string& out) return true; } +#ifdef _WIN32 +// Convert X:/path to /X/path for path splitting inside the index +string path_slashdrive(const string& path) +{ + string npath; + if (path_hasdrive(path)) { + npath.append(1, '/'); + npath.append(1, path[0]); + if (path_isdriveabs(path)) { + npath.append(path.substr(2)); + } else { + // This should be an error really + npath.append(1, '/'); + npath.append(path.substr(2)); + } + } + return npath; +} +#endif // _WIN32 + string url_gpathS(const string& url) { #ifdef _WIN32 - string u = url_gpath(url); - string nu; - if (path_hasdrive(u)) { - nu.append(1, '/'); - nu.append(1, u[0]); - if (path_isdriveabs(u)) { - nu.append(u.substr(2)); - } else { - // This should be an error really - nu.append(1, '/'); - nu.append(u.substr(2)); - } - } - return nu; + return path_slashdrive(url_gpath(url)); #else return url_gpath(url); #endif diff --git a/src/utils/rclutil.h b/src/utils/rclutil.h index 3187da29..38b4d9d2 100644 --- a/src/utils/rclutil.h +++ b/src/utils/rclutil.h @@ -48,6 +48,9 @@ extern bool printableUrl(const std::string& fcharset, const std::string& in, std::string& out); /// Same but, in the case of a Windows local path, also turn "c:/" into /// "/c/" This should be used only for splitting the path in rcldb. +#ifdef _WIN32 +extern std::string path_slashdrive(const std::string& path); +#endif extern std::string url_gpathS(const std::string& url); /// Like strftime but guaranteed utf-8 output (esp. useful on Windows) From 667e661c4655f64dd88a27a65d67cd0d0a1c18de Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Fri, 14 Jan 2022 09:27:04 +0100 Subject: [PATCH 13/19] Standardize the shebang line of python scripts to using /usr/bin/env, which was already the vastly dominant choice --- src/filters/kosplitter.py | 2 +- src/filters/rclepub | 2 +- src/filters/rclhwp.py | 2 +- src/filters/rclpst.py | 2 +- src/filters/rclpython.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/filters/kosplitter.py b/src/filters/kosplitter.py index 6c80a0f9..732cd3ed 100755 --- a/src/filters/kosplitter.py +++ b/src/filters/kosplitter.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 ################################# # Copyright (C) 2020 J.F.Dockes # This program is free software; you can redistribute it and/or modify diff --git a/src/filters/rclepub b/src/filters/rclepub index e0919a1a..4775c5bd 100755 --- a/src/filters/rclepub +++ b/src/filters/rclepub @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 """Extract Html content from an EPUB file (.epub)""" from __future__ import print_function diff --git a/src/filters/rclhwp.py b/src/filters/rclhwp.py index bb682dd7..b64f67e1 100755 --- a/src/filters/rclhwp.py +++ b/src/filters/rclhwp.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # Copyright (C) 2020 J.F.Dockes # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/src/filters/rclpst.py b/src/filters/rclpst.py index 7b8111bf..42ab7e9a 100755 --- a/src/filters/rclpst.py +++ b/src/filters/rclpst.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 ################################# # Copyright (C) 2019 J.F.Dockes # This program is free software; you can redistribute it and/or modify diff --git a/src/filters/rclpython.py b/src/filters/rclpython.py index 2a4ca490..1d812dcf 100755 --- a/src/filters/rclpython.py +++ b/src/filters/rclpython.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # Rclpython is based on "colorize.py" from: # http://chrisarndt.de/en/software/python/colorize.html From 8b3792026fd1c3fbe4be09c17653174179420b5e Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Fri, 14 Jan 2022 10:39:43 +0100 Subject: [PATCH 14/19] Renamed a few extension-less python handlers with a .py extension for consistency --- packaging/FreeBSD/recoll/pkg-plist | 18 +++---- packaging/homebrew/recoll.rb | 78 +++++++++++++-------------- src/Makefile.am | 26 ++++----- src/README | 6 +-- src/RECOLL-VERSION.txt | 2 +- src/doc/man/recoll.conf.5 | 4 +- src/doc/user/recoll.conf.xml | 4 +- src/doc/user/usermanual.html | 23 ++++---- src/doc/user/usermanual.xml | 8 +-- src/filters/{rcl7z => rcl7z.py} | 2 +- src/filters/{rclaudio => rclaudio.py} | 0 src/filters/{rclchm => rclchm.py} | 0 src/filters/{rcldia => rcldia.py} | 2 +- src/filters/{rclepub => rclepub.py} | 0 src/filters/{rclepub1 => rclepub1.py} | 0 src/filters/rclexecm.py | 2 +- src/filters/{rclics => rclics.py} | 0 src/filters/{rclinfo => rclinfo.py} | 2 +- src/filters/{rclkar => rclkar.py} | 0 src/filters/{rclrar => rclrar.py} | 2 +- src/filters/{rcltar => rcltar.py} | 2 +- src/filters/{rclwar => rclwar.py} | 0 src/filters/{rclzip => rclzip.py} | 2 +- src/internfile/internfile.cpp | 2 +- src/internfile/mh_exec.cpp | 3 +- src/internfile/mh_execm.h | 2 +- src/qtgui/recoll-win.pro | 26 ++++----- src/sampleconf/mimeconf | 42 +++++++-------- src/sampleconf/mimemap | 2 +- src/sampleconf/recoll.conf | 6 +-- src/windows/mimeconf | 32 +++++------ tests/config/mimeconf | 2 +- 32 files changed, 151 insertions(+), 149 deletions(-) rename src/filters/{rcl7z => rcl7z.py} (98%) rename src/filters/{rclaudio => rclaudio.py} (100%) rename src/filters/{rclchm => rclchm.py} (100%) rename src/filters/{rcldia => rcldia.py} (97%) rename src/filters/{rclepub => rclepub.py} (100%) rename src/filters/{rclepub1 => rclepub1.py} (100%) rename src/filters/{rclics => rclics.py} (100%) rename src/filters/{rclinfo => rclinfo.py} (99%) rename src/filters/{rclkar => rclkar.py} (100%) rename src/filters/{rclrar => rclrar.py} (98%) rename src/filters/{rcltar => rcltar.py} (99%) rename src/filters/{rclwar => rclwar.py} (100%) rename src/filters/{rclzip => rclzip.py} (99%) diff --git a/packaging/FreeBSD/recoll/pkg-plist b/packaging/FreeBSD/recoll/pkg-plist index bfcd9397..212639d4 100644 --- a/packaging/FreeBSD/recoll/pkg-plist +++ b/packaging/FreeBSD/recoll/pkg-plist @@ -14,8 +14,8 @@ share/pixmaps/recoll.png %%DATADIR%%/filters/hotrecoll.py %%DATADIR%%/filters/rclabw %%DATADIR%%/filters/rclaptosidman -%%DATADIR%%/filters/rclaudio -%%DATADIR%%/filters/rclchm +%%DATADIR%%/filters/rclaudio.py +%%DATADIR%%/filters/rclchm.py %%DATADIR%%/filters/rcldjvu %%DATADIR%%/filters/rcldoc %%DATADIR%%/filters/rcldvi @@ -23,11 +23,11 @@ share/pixmaps/recoll.png %%DATADIR%%/filters/rclfb2 %%DATADIR%%/filters/rclflac %%DATADIR%%/filters/rclgaim -%%DATADIR%%/filters/rclics +%%DATADIR%%/filters/rclics.py %%DATADIR%%/filters/rclid3 %%DATADIR%%/filters/rclimg -%%DATADIR%%/filters/rclinfo -%%DATADIR%%/filters/rclkar +%%DATADIR%%/filters/rclinfo.py +%%DATADIR%%/filters/rclkar.py %%DATADIR%%/filters/rclkwd %%DATADIR%%/filters/rcllatinclass.py %%DATADIR%%/filters/rcllatinstops.zip @@ -41,7 +41,7 @@ share/pixmaps/recoll.png %%DATADIR%%/filters/rclps %%DATADIR%%/filters/rclpurple %%DATADIR%%/filters/rclpython -%%DATADIR%%/filters/rclrar +%%DATADIR%%/filters/rclrar.py %%DATADIR%%/filters/rclrtf %%DATADIR%%/filters/rclscribus %%DATADIR%%/filters/rclshowinfo @@ -51,11 +51,11 @@ share/pixmaps/recoll.png %%DATADIR%%/filters/rcltex %%DATADIR%%/filters/rcltext %%DATADIR%%/filters/rcluncomp -%%DATADIR%%/filters/rclwar +%%DATADIR%%/filters/rclwar.py %%DATADIR%%/filters/rclwpd %%DATADIR%%/filters/rclxls -%%DATADIR%%/filters/rclzip -%%DATADIR%%/filters/rcl7z +%%DATADIR%%/filters/rclzip.py +%%DATADIR%%/filters/rcl7z.py %%DATADIR%%/filters/xdg-open %%DATADIR%%/images/aptosid-book.png %%DATADIR%%/images/aptosid-manual.png diff --git a/packaging/homebrew/recoll.rb b/packaging/homebrew/recoll.rb index 20d55b31..a12c5377 100644 --- a/packaging/homebrew/recoll.rb +++ b/packaging/homebrew/recoll.rb @@ -60,40 +60,40 @@ index f41a9f39..dc3085a4 100755 # # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this -diff --git filters/rcl7z filters/rcl7z +diff --git filters/rcl7z.py filters/rcl7z.py index c68c8bcb..ac50c4ec 100755 ---- filters/rcl7z -+++ filters/rcl7z +--- filters/rcl7z.py ++++ filters/rcl7z.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python2.7 # 7-Zip file filter for Recoll -diff --git filters/rclaudio filters/rclaudio +diff --git filters/rclaudio.py filters/rclaudio.py index 94ca0be7..08d6375a 100755 ---- filters/rclaudio -+++ filters/rclaudio +--- filters/rclaudio.py ++++ filters/rclaudio.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python2.7 # Audio tag filter for Recoll, using mutagen -diff --git filters/rclchm filters/rclchm +diff --git filters/rclchm.py filters/rclchm.py index f9811c37..3bc9b16d 100755 ---- filters/rclchm -+++ filters/rclchm +--- filters/rclchm.py ++++ filters/rclchm.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python2.7 """Extract Html files from a Microsoft Compiled Html Help file (.chm) Needs at least python 2.2 for HTMLParser (chmlib needs 2.2 too)""" -diff --git filters/rcldia filters/rcldia +diff --git filters/rcldia.py filters/rcldia.py index 282148eb..a480294b 100755 ---- filters/rcldia -+++ filters/rcldia +--- filters/rcldia.py ++++ filters/rcldia.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python2.7 @@ -120,30 +120,30 @@ index e8fa1831..b92b185d 100755 from __future__ import print_function import rclexecm -diff --git filters/rclepub filters/rclepub +diff --git filters/rclepub.py filters/rclepub.py index 8042d7f9..51786af1 100755 ---- filters/rclepub -+++ filters/rclepub +--- filters/rclepub.py ++++ filters/rclepub.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python2.7 """Extract Html content from an EPUB file (.epub)""" from __future__ import print_function -diff --git filters/rclepub1 filters/rclepub1 +diff --git filters/rclepub.py1 filters/rclepub.py1 index bd44f635..a7ea6c06 100755 ---- filters/rclepub1 -+++ filters/rclepub1 +--- filters/rclepub.py1 ++++ filters/rclepub.py1 @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python2.7 """Extract Html content from an EPUB file (.chm), concatenating all sections""" from __future__ import print_function -diff --git filters/rclics filters/rclics +diff --git filters/rclics.py filters/rclics.py index 0ef04f2d..de177024 100755 ---- filters/rclics -+++ filters/rclics +--- filters/rclics.py ++++ filters/rclics.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python2.7 @@ -160,20 +160,20 @@ index 7eb1da91..4eb6c9b0 100755 # Python-based Image Tag extractor for Recoll. This is less thorough # than the Perl-based rclimg script, but useful if you don't want to -diff --git filters/rclinfo filters/rclinfo +diff --git filters/rclinfo.py filters/rclinfo.py index f353d19e..36cf34e0 100755 ---- filters/rclinfo -+++ filters/rclinfo +--- filters/rclinfo.py ++++ filters/rclinfo.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python2.7 # Read a file in GNU info format and output its nodes as subdocs, # interfacing with recoll execm -diff --git filters/rclkar filters/rclkar +diff --git filters/rclkar.py filters/rclkar.py index d6570dd5..34b8d2a2 100755 ---- filters/rclkar -+++ filters/rclkar +--- filters/rclkar.py ++++ filters/rclkar.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python2.7 @@ -230,10 +230,10 @@ index 615455b3..1e411890 100755 # -*- coding: iso-8859-1 -*- """ MoinMoin - Python source parser and colorizer -diff --git filters/rclrar filters/rclrar +diff --git filters/rclrar.py filters/rclrar.py index 8f723fa5..5f6adfb0 100755 ---- filters/rclrar -+++ filters/rclrar +--- filters/rclrar.py ++++ filters/rclrar.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python2.7 @@ -280,10 +280,10 @@ index 8c1b8aea..cee17324 100755 # Copyright (C) 2014 J.F.Dockes # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by -diff --git filters/rcltar filters/rcltar +diff --git filters/rcltar.py filters/rcltar.py index d8bf100d..ab4b306e 100755 ---- filters/rcltar -+++ filters/rcltar +--- filters/rcltar.py ++++ filters/rcltar.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python2.7 @@ -320,10 +320,10 @@ index 32a11c1a..eab3b257 100644 from __future__ import print_function import rclexecm -diff --git filters/rclwar filters/rclwar +diff --git filters/rclwar.py filters/rclwar.py index b654f3b3..301e28e9 100755 ---- filters/rclwar -+++ filters/rclwar +--- filters/rclwar.py ++++ filters/rclwar.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python2.7 @@ -360,10 +360,10 @@ index 158e1222..602769af 100755 # Copyright (C) 2016 J.F.Dockes # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by -diff --git filters/rclzip filters/rclzip +diff --git filters/rclzip.py filters/rclzip.py index 35739625..0c597fbd 100755 ---- filters/rclzip -+++ filters/rclzip +--- filters/rclzip.py ++++ filters/rclzip.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python2.7 diff --git a/src/Makefile.am b/src/Makefile.am index de99959e..7ac3d2d1 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -686,31 +686,31 @@ filters/openxml-xls-body.xsl \ filters/openxml-word-body.xsl \ filters/openxml-meta.xsl \ filters/ppt-dump.py \ -filters/rcl7z \ +filters/rcl7z.py \ filters/rclaptosidman \ -filters/rclaudio \ +filters/rclaudio.py \ filters/rclbasehandler.py \ filters/rclbibtex.sh \ filters/rclcheckneedretry.sh \ -filters/rclchm \ -filters/rcldia \ +filters/rclchm.py \ +filters/rcldia.py \ filters/rcldjvu.py \ filters/rcldoc.py \ filters/rcldvi \ -filters/rclepub \ -filters/rclepub1 \ +filters/rclepub.py \ +filters/rclepub1.py \ filters/rclexec1.py \ filters/rclexecm.py \ filters/rclfb2.py \ filters/rclgaim \ filters/rclgenxslt.py \ filters/rclhwp.py \ -filters/rclics \ +filters/rclics.py \ filters/rclimg \ filters/rclimg.py \ -filters/rclinfo \ +filters/rclinfo.py \ filters/rclipynb.py \ -filters/rclkar \ +filters/rclkar.py \ filters/rclkwd \ filters/rcllatinclass.py \ filters/rcllatinstops.zip \ @@ -729,21 +729,21 @@ filters/rclps \ filters/rclpst.py \ filters/rclpurple \ filters/rclpython.py \ -filters/rclrar \ +filters/rclrar.py \ filters/rclrtf.py \ filters/rclscribus \ filters/rclshowinfo \ -filters/rcltar \ +filters/rcltar.py \ filters/rcltex \ filters/rcltext.py \ filters/rcluncomp \ filters/rcluncomp.py \ -filters/rclwar \ +filters/rclwar.py \ filters/rclxls.py \ filters/rclxml.py \ filters/rclxmp.py \ filters/rclxslt.py \ -filters/rclzip \ +filters/rclzip.py \ filters/recoll-we-move-files.py \ filters/recollepub.zip \ filters/svg.xsl \ diff --git a/src/README b/src/README index 75b13be0..30a2764e 100644 --- a/src/README +++ b/src/README @@ -2763,8 +2763,8 @@ Chapter 4. Programming interface If you can program and want to write an execm handler, it should not be too difficult to make sense of one of the existing modules. For example, - look at rclzip which uses Zip file paths as identifiers (ipath), and - rclics, which uses an integer index. Also have a look at the comments + look at rclzip.py which uses Zip file paths as identifiers (ipath), and + rclics.py, which uses an integer index. Also have a look at the comments inside the internfile/mh_execm.h file and possibly at the corresponding module. @@ -2819,7 +2819,7 @@ Chapter 4. Programming interface text/rtf = exec unrtf --nopict --html; charset=iso-8859-1; mimetype=text/html - application/x-chm = execm rclchm + application/x-chm = execm rclchm.py The fragment specifies that: diff --git a/src/RECOLL-VERSION.txt b/src/RECOLL-VERSION.txt index deade24a..359c4108 100644 --- a/src/RECOLL-VERSION.txt +++ b/src/RECOLL-VERSION.txt @@ -1 +1 @@ -1.31.6 +1.32.0 diff --git a/src/doc/man/recoll.conf.5 b/src/doc/man/recoll.conf.5 index ff3390a7..5617976f 100644 --- a/src/doc/man/recoll.conf.5 +++ b/src/doc/man/recoll.conf.5 @@ -148,7 +148,7 @@ not set, the daemon uses skippedPaths. .TP .BI "zipUseSkippedNames = "bool Use skippedNames inside Zip archives. Fetched -directly by the rclzip handler. Skip the patterns defined by skippedNames +directly by the rclzip.py handler. Skip the patterns defined by skippedNames inside Zip archives. Can be redefined for subdirectories. See https://www.lesbonscomptes.com/recoll/faqsandhowtos/FilteringOutZipArchiveMembers.html @@ -195,7 +195,7 @@ lets you turn off md5 computation for selected types. It is global (no redefinition for subtrees). At the moment, it only has an effect for external handlers (exec and execm). The file types can be specified by listing either MIME types (e.g. audio/mpeg) or handler names -(e.g. rclaudio). +(e.g. rclaudio.py). .TP .BI "compressedfilemaxkbs = "int Size limit for compressed diff --git a/src/doc/user/recoll.conf.xml b/src/doc/user/recoll.conf.xml index 7fbaec38..be66bd0e 100644 --- a/src/doc/user/recoll.conf.xml +++ b/src/doc/user/recoll.conf.xml @@ -112,7 +112,7 @@ not set, the daemon uses skippedPaths. zipUseSkippedNames Use skippedNames inside Zip archives. Fetched -directly by the rclzip handler. Skip the patterns defined by skippedNames +directly by the rclzip.py handler. Skip the patterns defined by skippedNames inside Zip archives. Can be redefined for subdirectories. See https://www.lesbonscomptes.com/recoll/faqsandhowtos/FilteringOutZipArchiveMembers.html @@ -162,7 +162,7 @@ lets you turn off md5 computation for selected types. It is global (no redefinition for subtrees). At the moment, it only has an effect for external handlers (exec and execm). The file types can be specified by listing either MIME types (e.g. audio/mpeg) or handler names -(e.g. rclaudio). +(e.g. rclaudio.py). compressedfilemaxkbs diff --git a/src/doc/user/usermanual.html b/src/doc/user/usermanual.html index 389427e1..9cb3df43 100644 --- a/src/doc/user/usermanual.html +++ b/src/doc/user/usermanual.html @@ -6581,9 +6581,10 @@ text/html [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/r

    All the Python handlers share at least the rclexecm.py module, which handles the communication. Have a look at, - for example, rclzip - for a handler which uses rclexecm.py directly.

    + for example, rclzip.py for a handler which + uses rclexecm.py + directly.

  • Most Python handlers which process @@ -6633,7 +6634,7 @@ text/html [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/r "_top">Git repository (the sample not in the distributed release at the moment).

    You can also have a look at the slightly more complex - rclzip + rclzip.py which uses Zip file paths as identifiers (ipath).

    execm handlers sometimes @@ -6726,7 +6727,7 @@ text/html [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/r text/rtf = exec unrtf --nopict --html; charset=iso-8859-1; mimetype=text/html - application/x-chm = execm rclchm + application/x-chm = execm rclchm.py

    The fragment specifies that:

    @@ -6880,7 +6881,7 @@ text/html [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/r "literal">setfield() helper method. This avoids the necessity to produce HTML, and any issue with HTML quoting. See, for example, rclaudio in rclaudio.py in Recoll 1.23 and later for an example of handler which outputs text/plain and uses zipUseSkippedNames

    Use skippedNames inside Zip archives. Fetched - directly by the rclzip handler. Skip the patterns - defined by skippedNames inside Zip archives. Can - be redefined for subdirectories. See + directly by the rclzip.py handler. Skip the + patterns defined by skippedNames inside Zip + archives. Can be redefined for subdirectories. + See https://www.lesbonscomptes.com/recoll/faqsandhowtos/FilteringOutZipArchiveMembers.html

    + audio/mpeg) or handler names (e.g. + rclaudio.py).

    All the Python handlers share at least the rclexecm.py module, which handles the communication. Have a look at, for - example, rclzip for a handler which + example, rclzip.py for a handler which uses rclexecm.py directly. Most Python handlers which process single-document files by executing another command @@ -4994,7 +4994,7 @@ text/html [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/r the moment). You can also have a look at the slightly more complex - rclzip which uses Zip + rclzip.py which uses Zip file paths as identifiers (ipath). execm handlers sometimes need to make @@ -5062,7 +5062,7 @@ text/html [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/r text/rtf = exec unrtf --nopict --html; charset=iso-8859-1; mimetype=text/html - application/x-chm = execm rclchm + application/x-chm = execm rclchm.py The fragment specifies that: @@ -5205,7 +5205,7 @@ text/html [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/r method to produce metadata, by calling the setfield() helper method. This avoids the necessity to produce HTML, and any issue with HTML quoting. See, - for example, rclaudio in &RCL; 1.23 and + for example, rclaudio.py in &RCL; 1.23 and later for an example of handler which outputs text/plain and uses setfield() to produce metadata. diff --git a/src/filters/rcl7z b/src/filters/rcl7z.py similarity index 98% rename from src/filters/rcl7z rename to src/filters/rcl7z.py index 7ba035ac..96022a30 100755 --- a/src/filters/rcl7z +++ b/src/filters/rcl7z.py @@ -3,7 +3,7 @@ # 7-Zip file filter for Recoll # Thanks to Recoll user Martin Ziegler -# This is a modified version of rclzip, with some help from rcltar +# This is a modified version of rclzip.py, with some help from rcltar.py # # Normally using py7zr https://github.com/miurahr/py7zr # diff --git a/src/filters/rclaudio b/src/filters/rclaudio.py similarity index 100% rename from src/filters/rclaudio rename to src/filters/rclaudio.py diff --git a/src/filters/rclchm b/src/filters/rclchm.py similarity index 100% rename from src/filters/rclchm rename to src/filters/rclchm.py diff --git a/src/filters/rcldia b/src/filters/rcldia.py similarity index 97% rename from src/filters/rcldia rename to src/filters/rcldia.py index 3869bced..45dbeb16 100755 --- a/src/filters/rcldia +++ b/src/filters/rcldia.py @@ -6,7 +6,7 @@ from __future__ import print_function # stefan.friedel@iwr.uni-heidelberg.de 2012 # # add the following to ~/.recoll/mimeconf into the [index] section: -# application/x-dia-diagram = execm rcldia;mimetype=text/plain;charset=utf-8 +# application/x-dia-diagram = execm rcldia.py;mimetype=text/plain;charset=utf-8 # and into the [icons] section: # application/x-dia-diagram = drawing # and finally under [categories]: diff --git a/src/filters/rclepub b/src/filters/rclepub.py similarity index 100% rename from src/filters/rclepub rename to src/filters/rclepub.py diff --git a/src/filters/rclepub1 b/src/filters/rclepub1.py similarity index 100% rename from src/filters/rclepub1 rename to src/filters/rclepub1.py diff --git a/src/filters/rclexecm.py b/src/filters/rclexecm.py index 42d2ff76..1a68a4ce 100644 --- a/src/filters/rclexecm.py +++ b/src/filters/rclexecm.py @@ -372,7 +372,7 @@ def main(proto, extract): params = {'filename' : makebytes(path)} - # Some filters (e.g. rclaudio) need/get a MIME type from the indexer. + # Some filters (e.g. rclaudio.py) need/get a MIME type from the indexer. # We make a half-assed attempt to emulate: mimetype = _g_config.mimeType(path) if not mimetype and not _g_mswindows: diff --git a/src/filters/rclics b/src/filters/rclics.py similarity index 100% rename from src/filters/rclics rename to src/filters/rclics.py diff --git a/src/filters/rclinfo b/src/filters/rclinfo.py similarity index 99% rename from src/filters/rclinfo rename to src/filters/rclinfo.py index cdc1d4da..ff5d711d 100755 --- a/src/filters/rclinfo +++ b/src/filters/rclinfo.py @@ -141,7 +141,7 @@ class InfoSimpleSplitter: if name == b'File': infofile = value except Exception as err: - print("rclinfo: bad line in %s: [%s] %s\n" % \ + print("rclinfo.py: bad line in %s: [%s] %s\n" % \ (infofile, line, err), file = sys.stderr) nodename = prevnodename node += line diff --git a/src/filters/rclkar b/src/filters/rclkar.py similarity index 100% rename from src/filters/rclkar rename to src/filters/rclkar.py diff --git a/src/filters/rclrar b/src/filters/rclrar.py similarity index 98% rename from src/filters/rclrar rename to src/filters/rclrar.py index e6b38bb1..4c3fa766 100755 --- a/src/filters/rclrar +++ b/src/filters/rclrar.py @@ -60,7 +60,7 @@ except Exception as ex: # (https://www.rarlab.com/rar_add.htm). The unrar-free version fails # with the message "Failed the read enough data" # -# This is identical to rclzip except I did a search/replace from zip +# This is identical to rclzip.py except I did a search/replace from zip # to rar, and changed this comment. class RarExtractor: def __init__(self, em): diff --git a/src/filters/rcltar b/src/filters/rcltar.py similarity index 99% rename from src/filters/rcltar rename to src/filters/rcltar.py index c6f2bf4f..1389c703 100755 --- a/src/filters/rcltar +++ b/src/filters/rcltar.py @@ -2,7 +2,7 @@ # Tar-file filter for Recoll # Thanks to Recoll user Martin Ziegler -# This is a modified version of /usr/share/recoll/filters/rclzip +# This is a modified version of /usr/share/recoll/filters/rclzip.py # It works not only for tar-files, but automatically for gzipped and # bzipped tar-files at well. diff --git a/src/filters/rclwar b/src/filters/rclwar.py similarity index 100% rename from src/filters/rclwar rename to src/filters/rclwar.py diff --git a/src/filters/rclzip b/src/filters/rclzip.py similarity index 99% rename from src/filters/rclzip rename to src/filters/rclzip.py index dc046fdf..116609db 100755 --- a/src/filters/rclzip +++ b/src/filters/rclzip.py @@ -51,7 +51,7 @@ if not hasrclconfig: # and stores it in the catalog as an unicode object. Else it uses the # binary string, which it decodes as CP437 (zip standard). # -# When reading the file, the input file name is used by rclzip +# When reading the file, the input file name is used by rclzip.py # directly as an index into the catalog. # # When we send the file name data to the indexer, we have to serialize diff --git a/src/internfile/internfile.cpp b/src/internfile/internfile.cpp index c60e1e21..19c4c44c 100644 --- a/src/internfile/internfile.cpp +++ b/src/internfile/internfile.cpp @@ -51,7 +51,7 @@ using namespace std; // The internal path element separator. This can't be the same as the rcldb // file to ipath separator : "|" // We replace it with a control char if it comes out of a filter (ie: -// rclzip or rclchm can do this). If you want the SOH control char +// rclzip.py or rclchm.py can do this). If you want the SOH control char // inside an ipath, you're out of luck (and a bit weird). static const string cstr_isep(":"); diff --git a/src/internfile/mh_exec.cpp b/src/internfile/mh_exec.cpp index 47ad9db7..88097aa6 100644 --- a/src/internfile/mh_exec.cpp +++ b/src/internfile/mh_exec.cpp @@ -82,8 +82,7 @@ bool MimeHandlerExec::set_document_file_impl(const std::string& mt, tpsread = true; if (!nomd5tps.empty()) { if (params.size() && - nomd5tps.find(path_getsimple(params[0])) != - nomd5tps.end()) { + nomd5tps.find(path_getsimple(params[0])) != nomd5tps.end()) { m_handlernomd5 = true; } // On windows the 1st param is often a script interp diff --git a/src/internfile/mh_execm.h b/src/internfile/mh_execm.h index 7956de8e..27fcba20 100644 --- a/src/internfile/mh_execm.h +++ b/src/internfile/mh_execm.h @@ -73,7 +73,7 @@ text/plainData: 10 * time). Absent during indexing (ipaths are generated and sent back from * the script) * - Mimetype: this is the mime type for the (possibly container) file. - * Can be useful to filters which handle multiple types, like rclaudio. + * Can be useful to filters which handle multiple types, like rclaudio.py. * * The script answers with messages having the following fields: * - Document: translated document data. diff --git a/src/qtgui/recoll-win.pro b/src/qtgui/recoll-win.pro index fa478c4a..5e18edf2 100644 --- a/src/qtgui/recoll-win.pro +++ b/src/qtgui/recoll-win.pro @@ -209,30 +209,30 @@ mac { ../filters/openxml-word-body.xsl \ ../filters/openxml-meta.xsl \ ../filters/ppt-dump.py \ - ../filters/rcl7z \ + ../filters/rcl7z.py \ ../filters/rclaptosidman \ - ../filters/rclaudio \ + ../filters/rclaudio.py \ ../filters/rclbasehandler.py \ ../filters/rclbibtex.sh \ ../filters/rclcheckneedretry.sh \ - ../filters/rclchm \ - ../filters/rcldia \ + ../filters/rclchm.py \ + ../filters/rcldia.py \ ../filters/rcldjvu.py \ ../filters/rcldoc.py \ ../filters/rcldvi \ - ../filters/rclepub \ - ../filters/rclepub1 \ + ../filters/rclepub.py \ + ../filters/rclepub1.py \ ../filters/rclexec1.py \ ../filters/rclexecm.py \ ../filters/rclfb2.py \ ../filters/rclgaim \ ../filters/rclgenxslt.py \ ../filters/rclhwp.py \ - ../filters/rclics \ + ../filters/rclics.py \ ../filters/rclimg \ ../filters/rclimg.py \ - ../filters/rclinfo \ - ../filters/rclkar \ + ../filters/rclinfo.py \ + ../filters/rclkar.py \ ../filters/rclkwd \ ../filters/rcllatinclass.py \ ../filters/rcllatinstops.zip \ @@ -250,21 +250,21 @@ mac { ../filters/rclpst.py \ ../filters/rclpurple \ ../filters/rclpython.py \ - ../filters/rclrar \ + ../filters/rclrar.py \ ../filters/rclrtf.py \ ../filters/rclscribus \ ../filters/rclshowinfo \ - ../filters/rcltar \ + ../filters/rcltar.py \ ../filters/rcltex \ ../filters/rcltext.py \ ../filters/rcluncomp \ ../filters/rcluncomp.py \ - ../filters/rclwar \ + ../filters/rclwar.py \ ../filters/rclxls.py \ ../filters/rclxml.py \ ../filters/rclxmp.py \ ../filters/rclxslt.py \ - ../filters/rclzip \ + ../filters/rclzip.py \ ../filters/recoll-we-move-files.py \ ../filters/recollepub.zip \ ../filters/svg.xsl \ diff --git a/src/sampleconf/mimeconf b/src/sampleconf/mimeconf index f41e5d06..39bb54ca 100644 --- a/src/sampleconf/mimeconf +++ b/src/sampleconf/mimeconf @@ -148,25 +148,25 @@ application/vnd.sun.xml.writer.template = \ body content.xml opendoc-body.xsl #application/x-mobipocket-ebook = execm rclmobi -#application/x-tar = execm rcltar +#application/x-tar = execm rcltar.py -application/epub+zip = execm rclepub +application/epub+zip = execm rclepub.py application/x-ipynb+json = exec jupyter nbconvert --to script --stdout ; mimetype = text/plain application/javascript = internal text/plain -application/ogg = execm rclaudio +application/ogg = execm rclaudio.py application/pdf = execm rclpdf.py application/postscript = exec rclps application/sql = internal text/plain application/vnd.wordperfect = exec wpd2html;mimetype=text/html -application/x-7z-compressed = execm rcl7z +application/x-7z-compressed = execm rcl7z.py application/x-abiword = internal xsltproc abiword.xsl application/x-awk = internal text/plain -application/x-chm = execm rclchm -application/x-dia-diagram = execm rcldia;mimetype=text/plain +application/x-chm = execm rclchm.py +application/x-dia-diagram = execm rcldia.py;mimetype=text/plain application/x-dvi = exec rcldvi -application/x-flac = execm rclaudio +application/x-flac = execm rclaudio.py application/x-gnote = execm rclxml.py -application/x-gnuinfo = execm rclinfo +application/x-gnuinfo = execm rclinfo.py application/x-gnumeric = internal xsltproc gnumeric.xsl application/x-hwp = execm rclhwp.py application/x-kword = exec rclkwd @@ -175,22 +175,22 @@ application/x-mimehtml = internal message/rfc822 application/x-okular-notes = internal xsltproc okular-note.xsl application/x-perl = internal text/plain application/x-php = internal text/plain -application/x-rar = execm rclrar;charset=default +application/x-rar = execm rclrar.py;charset=default application/x-ruby = internal text/plain application/x-scribus = exec rclscribus application/x-shellscript = internal text/plain application/x-tex = exec rcltex -application/x-webarchive = execm rclwar +application/x-webarchive = execm rclwar.py application/x-zerosize = internal -application/zip = execm rclzip;charset=default -audio/aac = execm rclaudio -audio/ape = execm rclaudio -audio/mp4 = execm rclaudio -audio/mpeg = execm rclaudio -audio/ogg = execm rclaudio -audio/x-karaoke = execm rclkar -audio/x-musepack = execm rclaudio -audio/x-wavpack = execm rclaudio +application/zip = execm rclzip.py;charset=default +audio/aac = execm rclaudio.py +audio/ape = execm rclaudio.py +audio/mp4 = execm rclaudio.py +audio/mpeg = execm rclaudio.py +audio/ogg = execm rclaudio.py +audio/x-karaoke = execm rclkar.py +audio/x-musepack = execm rclaudio.py +audio/x-wavpack = execm rclaudio.py image/gif = execm rclimg image/jp2 = execm rclimg image/jpeg = execm rclimg @@ -203,7 +203,7 @@ image/x-xcf = execm rclimg inode/symlink = internal inode/x-empty = internal application/x-zerosize message/rfc822 = internal -text/calendar = execm rclics;mimetype=text/plain +text/calendar = execm rclics.py;mimetype=text/plain text/css = internal text/plain text/html = internal text/plain = internal @@ -234,7 +234,7 @@ text/x-ruby = internal text/x-shellscript = internal text/plain text/x-srt = internal text/plain text/x-tex = exec rcltex -video/mp4 = execm rclaudio +video/mp4 = execm rclaudio.py video/x-msvideo = execm rclimg diff --git a/src/sampleconf/mimemap b/src/sampleconf/mimemap index ced4ecde..bc9ecf95 100644 --- a/src/sampleconf/mimemap +++ b/src/sampleconf/mimemap @@ -107,7 +107,7 @@ .maff = application/zip .7z = application/x-7z-compressed -# The rcltar module can handle compressed tar formats internally so we +# The rcltar.py module can handle compressed tar formats internally so we # use application/x-tar for all tar files compressed or not. Note that tar # file indexing is disabled by default, you'll need to copy and uncomment # the application/x-tar commented line from mimeconf into your personal config diff --git a/src/sampleconf/recoll.conf b/src/sampleconf/recoll.conf index aceba66a..96b4c245 100644 --- a/src/sampleconf/recoll.conf +++ b/src/sampleconf/recoll.conf @@ -155,7 +155,7 @@ skippedPaths = /media # # # Use skippedNames inside Zip archives.Fetched -# directly by the rclzip handler. Skip the patterns defined by skippedNames +# directly by the rclzip.py handler. Skip the patterns defined by skippedNames # inside Zip archives. Can be redefined for subdirectories. # See https://www.lesbonscomptes.com/recoll/faqsandhowtos/FilteringOutZipArchiveMembers.html # @@ -206,9 +206,9 @@ skippedPaths = /media # redefinition for subtrees). At the moment, it only has an effect for # external handlers (exec and execm). The file types can be specified by # listing either MIME types (e.g. audio/mpeg) or handler names -# (e.g. rclaudio). +# (e.g. rclaudio.py). # -nomd5types = rclaudio +nomd5types = rclaudio.py # Size limit for compressed # files.We need to decompress these in a diff --git a/src/windows/mimeconf b/src/windows/mimeconf index 6bc6545e..6af57244 100644 --- a/src/windows/mimeconf +++ b/src/windows/mimeconf @@ -131,36 +131,36 @@ application/vnd.sun.xml.writer.template = \ body content.xml opendoc-body.xsl #application/postscript = exec rclps -#application/x-gnuinfo = execm python rclinfo -#application/x-tar = execm python rcltar +#application/x-gnuinfo = execm python rclinfo.py +#application/x-tar = execm python rcltar.py -application/epub+zip = execm python rclepub +application/epub+zip = execm python rclepub.py application/x-ipynb+json = execm python rclipynb.py application/javascript = internal text/plain -application/ogg = execm python rclaudio +application/ogg = execm python rclaudio.py application/pdf = execm python rclpdf.py application/sql = internal text/plain application/vnd.wordperfect = exec wpd/wpd2html;mimetype=text/html -application/x-7z-compressed = execm python rcl7z +application/x-7z-compressed = execm python rcl7z.py application/x-abiword = internal xsltproc abiword.xsl application/x-awk = internal text/plain -application/x-chm = execm python rclchm -application/x-dia-diagram = execm python rcldia;mimetype=text/plain -application/x-flac = execm python rclaudio +application/x-chm = execm python rclchm.py +application/x-dia-diagram = execm python rcldia.py;mimetype=text/plain +application/x-flac = execm python rclaudio.py application/x-gnote = execm python rclxml.py application/x-hwp = execm python rclhwp.py application/x-mimehtml = internal message/rfc822 application/x-perl = internal text/plain application/x-php = internal text/plain -application/x-rar = execm python rclrar;charset=default +application/x-rar = execm python rclrar.py;charset=default application/x-shellscript = internal text/plain -application/x-webarchive = execm python rclwar +application/x-webarchive = execm python rclwar.py application/x-zerosize = internal -application/zip = execm python rclzip;charset=default -audio/aac = execm python rclaudio -audio/mp4 = execm python rclaudio -audio/mpeg = execm python rclaudio -audio/x-karaoke = execm python rclkar +application/zip = execm python rclzip.py;charset=default +audio/aac = execm python rclaudio.py +audio/mp4 = execm python rclaudio.py +audio/mpeg = execm python rclaudio.py +audio/x-karaoke = execm python rclkar.py image/gif = execm rclimg.exe image/jp2 = execm rclimg.exe image/jpeg = execm rclimg.exe @@ -171,7 +171,7 @@ image/vnd.djvu = execm python rcldjvu.py inode/symlink = internal inode/x-empty = internal application/x-zerosize message/rfc822 = internal -text/calendar = execm python rclics;mimetype=text/plain +text/calendar = execm python rclics.py;mimetype=text/plain text/css = internal text/plain text/html = internal text/plain = internal diff --git a/tests/config/mimeconf b/tests/config/mimeconf index 974d09ce..b38b5fff 100644 --- a/tests/config/mimeconf +++ b/tests/config/mimeconf @@ -8,4 +8,4 @@ # values is identical. [index] -application/x-tar = execm rcltar +application/x-tar = execm rcltar.py From 844b4e8b03e9036ba8c721c3fe62e93f98ec747d Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Fri, 14 Jan 2022 13:11:24 +0100 Subject: [PATCH 15/19] windows: rely on .py extension instead of special keyword to determine use of interpreter --- src/common/autoconfig-mac.h | 4 +- src/common/autoconfig-win.h | 4 +- src/common/rclconfig.cpp | 37 +++++++++--------- src/windows/mimeconf | 76 ++++++++++++++++++------------------- 4 files changed, 59 insertions(+), 62 deletions(-) diff --git a/src/common/autoconfig-mac.h b/src/common/autoconfig-mac.h index 4d452f7a..4007bd35 100644 --- a/src/common/autoconfig-mac.h +++ b/src/common/autoconfig-mac.h @@ -125,7 +125,7 @@ #define PACKAGE_NAME "Recoll" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "Recoll 1.31.6" +#define PACKAGE_STRING "Recoll 1.32.0" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "recoll" @@ -134,7 +134,7 @@ #define PACKAGE_URL "" /* Define to the version of this package. */ -#define PACKAGE_VERSION "1.31.6" +#define PACKAGE_VERSION "1.32.0" /* putenv parameter is const */ /* #undef PUTENV_ARG_CONST */ diff --git a/src/common/autoconfig-win.h b/src/common/autoconfig-win.h index 8c589ecc..2cb98f56 100644 --- a/src/common/autoconfig-win.h +++ b/src/common/autoconfig-win.h @@ -118,7 +118,7 @@ #define PACKAGE_NAME "Recoll" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "Recoll 1.31.6" +#define PACKAGE_STRING "Recoll 1.32.0" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "recoll" @@ -127,7 +127,7 @@ #define PACKAGE_URL "" /* Define to the version of this package. */ -#define PACKAGE_VERSION "1.31.6" +#define PACKAGE_VERSION "1.32.0" /* putenv parameter is const */ /* #undef PUTENV_ARG_CONST */ diff --git a/src/common/rclconfig.cpp b/src/common/rclconfig.cpp index e1fd78f0..ef8f6351 100644 --- a/src/common/rclconfig.cpp +++ b/src/common/rclconfig.cpp @@ -1633,6 +1633,7 @@ vector RclConfig::getDaemSkippedPaths() const // and filtersdir from the config file to the PATH, then use execmd::which() string RclConfig::findFilter(const string &icmd) const { + LOGDEB2("findFilter: " << icmd << "\n"); // If the path is absolute, this is it if (path_isabsolute(icmd)) return icmd; @@ -1680,13 +1681,19 @@ bool RclConfig::processFilterCmd(std::vector& cmd) const LOGDEB0("processFilterCmd: in: " << stringsToString(cmd) << "\n"); auto it = cmd.begin(); - // Special-case python and perl on windows: we need to also locate the - // first argument which is the script name "python somescript.py". - // On Unix, thanks to #!, we usually just run "somescript.py", but need - // the same change if we ever want to use the same cmd line as windows - bool hasinterp = !stringlowercmp("python", *it) || - !stringlowercmp("perl", *it); - +#ifdef _WIN32 + // Special-case interpreters on windows: we used to have an additional 1st argument "python" in + // mimeconf, but we now rely on the .py extension for better sharing of mimeconf. + std::string ext = path_suffix(*it); + if ("py" == ext) { + it = cmd.insert(it, findFilter("python")); + it++; + } else if ("pl" == ext) { + it = cmd.insert(it, findFilter("perl")); + it++; + } +#endif + // Note that, if the cmd vector size is 1, post-incrementing the // iterator in the following statement, which works on x86, leads // to a crash on ARM with gcc 6 and 8 (at least), which does not @@ -1694,25 +1701,15 @@ bool RclConfig::processFilterCmd(std::vector& cmd) const // whatever... We do it later then. *it = findFilter(*it); - if (hasinterp) { - if (cmd.size() < 2) { - LOGERR("processFilterCmd: python/perl cmd: no script?. [" << - stringsToString(cmd) << "]\n"); - return false; - } else { - ++it; - *it = findFilter(*it); - } - } LOGDEB0("processFilterCmd: out: " << stringsToString(cmd) << "\n"); return true; } -bool RclConfig::pythonCmd(const std::string& scriptname, - std::vector& cmd) const +// This now does nothing more than processFilterCmd (after we changed to relying on the py extension) +bool RclConfig::pythonCmd(const std::string& scriptname, std::vector& cmd) const { #ifdef _WIN32 - cmd = {"python", scriptname}; + cmd = {scriptname}; #else cmd = {scriptname}; #endif diff --git a/src/windows/mimeconf b/src/windows/mimeconf index 6af57244..832afa45 100644 --- a/src/windows/mimeconf +++ b/src/windows/mimeconf @@ -1,4 +1,4 @@ -# (C) 2015 J.F.Dockes +# (C) 2015-2022 J.F.Dockes # This file contains most of the data which determines how we # handle the different mime types (also see the "mimeview" file). @@ -30,12 +30,12 @@ # The script (ie: rcluncomp) must output the uncompressed file name on # stdout. Note that the windows version will always use 7z, and ignore # the decompressor parameter in the following lines -application/gzip = uncompress python rcluncomp.py 7z %f %t -application/x-gzip = uncompress python rcluncomp.py 7z %f %t -application/x-compress = uncompress python rcluncomp.py 7z %f %t -application/x-bzip2 = uncompress python rcluncomp.py 7z %f %t -application/x-xz = uncompress python rcluncomp.py 7z %f %t -application/x-lzma = uncompress python rcluncomp.py 7z %f %t +application/gzip = uncompress rcluncomp.py 7z %f %t +application/x-gzip = uncompress rcluncomp.py 7z %f %t +application/x-compress = uncompress rcluncomp.py 7z %f %t +application/x-bzip2 = uncompress rcluncomp.py 7z %f %t +application/x-xz = uncompress rcluncomp.py 7z %f %t +application/x-lzma = uncompress rcluncomp.py 7z %f %t ## ################################### @@ -47,14 +47,14 @@ application/x-lzma = uncompress python rcluncomp.py 7z %f %t # each filter, see the exemples below (ie: msword) [index] -application/msword = execm python rcldoc.py -application/vnd.ms-excel = execm python rclxls.py -application/vnd.ms-outlook = execm python rclpst.py -application/vnd.ms-powerpoint = execm python rclppt.py +application/msword = execm rcldoc.py +application/vnd.ms-excel = execm rclxls.py +application/vnd.ms-outlook = execm rclpst.py +application/vnd.ms-powerpoint = execm rclppt.py # Also Handle the mime type returned by "file -i" for a suffix-less word # file. This could probably just as well be an excel file, but we have to # chose one. -application/vnd.ms-office = execm python rcldoc.py +application/vnd.ms-office = execm rcldoc.py application/vnd.oasis.opendocument.text = \ internal xsltproc meta meta.xml opendoc-meta.xsl \ @@ -89,9 +89,9 @@ application/vnd.openxmlformats-officedocument.wordprocessingml.template = \ body word/footnotes.xml openxml-word-body.xsl \ body word/endnotes.xml openxml-word-body.xsl application/vnd.openxmlformats-officedocument.presentationml.template = \ - execm python rclopxml.py + execm rclopxml.py application/vnd.openxmlformats-officedocument.presentationml.presentation = \ - execm python rclopxml.py + execm rclopxml.py application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = \ internal xsltproc meta docProps/core.xml openxml-meta.xsl \ body xl/sharedStrings.xml openxml-xls-body.xsl @@ -131,52 +131,52 @@ application/vnd.sun.xml.writer.template = \ body content.xml opendoc-body.xsl #application/postscript = exec rclps -#application/x-gnuinfo = execm python rclinfo.py -#application/x-tar = execm python rcltar.py +#application/x-gnuinfo = execm rclinfo.py +#application/x-tar = execm rcltar.py -application/epub+zip = execm python rclepub.py -application/x-ipynb+json = execm python rclipynb.py +application/epub+zip = execm rclepub.py +application/x-ipynb+json = execm rclipynb.py application/javascript = internal text/plain -application/ogg = execm python rclaudio.py -application/pdf = execm python rclpdf.py +application/ogg = execm rclaudio.py +application/pdf = execm rclpdf.py application/sql = internal text/plain application/vnd.wordperfect = exec wpd/wpd2html;mimetype=text/html -application/x-7z-compressed = execm python rcl7z.py +application/x-7z-compressed = execm rcl7z.py application/x-abiword = internal xsltproc abiword.xsl application/x-awk = internal text/plain -application/x-chm = execm python rclchm.py -application/x-dia-diagram = execm python rcldia.py;mimetype=text/plain -application/x-flac = execm python rclaudio.py -application/x-gnote = execm python rclxml.py -application/x-hwp = execm python rclhwp.py +application/x-chm = execm rclchm.py +application/x-dia-diagram = execm rcldia.py;mimetype=text/plain +application/x-flac = execm rclaudio.py +application/x-gnote = execm rclxml.py +application/x-hwp = execm rclhwp.py application/x-mimehtml = internal message/rfc822 application/x-perl = internal text/plain application/x-php = internal text/plain -application/x-rar = execm python rclrar.py;charset=default +application/x-rar = execm rclrar.py;charset=default application/x-shellscript = internal text/plain -application/x-webarchive = execm python rclwar.py +application/x-webarchive = execm rclwar.py application/x-zerosize = internal -application/zip = execm python rclzip.py;charset=default -audio/aac = execm python rclaudio.py -audio/mp4 = execm python rclaudio.py -audio/mpeg = execm python rclaudio.py -audio/x-karaoke = execm python rclkar.py +application/zip = execm rclzip.py;charset=default +audio/aac = execm rclaudio.py +audio/mp4 = execm rclaudio.py +audio/mpeg = execm rclaudio.py +audio/x-karaoke = execm rclkar.py image/gif = execm rclimg.exe image/jp2 = execm rclimg.exe image/jpeg = execm rclimg.exe image/png = execm rclimg.exe image/svg+xml = internal xsltproc svg.xsl image/tiff = execm rclimg.exe -image/vnd.djvu = execm python rcldjvu.py +image/vnd.djvu = execm rcldjvu.py inode/symlink = internal inode/x-empty = internal application/x-zerosize message/rfc822 = internal -text/calendar = execm python rclics.py;mimetype=text/plain +text/calendar = execm rclics.py;mimetype=text/plain text/css = internal text/plain text/html = internal text/plain = internal text/plain1 = internal -#text/rtf = execm python rclrtf.py +#text/rtf = execm rclrtf.py text/rtf = exec unrtf --nopict --html;mimetype=text/html text/x-c = internal text/x-c+ = internal @@ -187,9 +187,9 @@ text/x-csv = internal text/plain text/x-fictionbook = internal xsltproc fb2.xsl text/x-ini = internal text/plain text/x-mail = internal -text/x-orgmode = execm python rclorgmode.py +text/x-orgmode = execm rclorgmode.py text/x-perl = internal text/plain -text/x-python = execm python rclpython.py +text/x-python = execm rclpython.py text/x-shellscript = internal text/plain text/x-srt = internal text/plain image/x-xcf = execm rclimg.exe From 47ea12353af1a7ea7df47eec6f7925a3dad99631 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Fri, 14 Jan 2022 17:18:49 +0100 Subject: [PATCH 16/19] Fix internfile bug which prevented the propagation of a proper title in some cases --- src/internfile/internfile.cpp | 17 ++++++++--------- tests/Maildir/Maildir.txt | 2 +- tests/embed/embed.txt | 2 +- tests/orgmode/orgmode.txt | 2 +- tests/rfc2231/rfc2231.txt | 2 +- 5 files changed, 12 insertions(+), 13 deletions(-) diff --git a/src/internfile/internfile.cpp b/src/internfile/internfile.cpp index 19c4c44c..677a544e 100644 --- a/src/internfile/internfile.cpp +++ b/src/internfile/internfile.cpp @@ -562,13 +562,13 @@ bool FileInterner::dijontorcl(Rcl::Doc& doc) const string *val = 0; if (!doc.peekmeta(Rcl::Doc::keymd5, &val) || val->empty()) doc.meta[Rcl::Doc::keymd5] = ent.second; - } else if (ent.first == cstr_dj_keymt || - ent.first == cstr_dj_keycharset) { + } else if (ent.first == cstr_dj_keymt || ent.first == cstr_dj_keycharset) { // don't need/want these. } else { - LOGDEB2("dijontorcl: " << m_cfg->fieldCanon(ent.first) << " -> " << - ent.second << endl); - doc.addmeta(m_cfg->fieldCanon(ent.first), ent.second); + LOGDEB2("dijontorcl: " << m_cfg->fieldCanon(ent.first) << " -> " << ent.second << "\n"); + if (!ent.second.empty()) { + doc.meta[m_cfg->fieldCanon(ent.first)] = ent.second; + } } } if (doc.meta[Rcl::Doc::keyabs].empty() && @@ -583,7 +583,7 @@ const set nocopyfields{cstr_dj_keycontent, cstr_dj_keymd, cstr_dj_keyanc, cstr_dj_keyorigcharset, cstr_dj_keyfn, cstr_dj_keymt, cstr_dj_keycharset, cstr_dj_keyds}; -static void copymeta(const RclConfig *cfg,Rcl::Doc& doc, const RecollFilter* hp) +static void copymeta(const RclConfig *cfg, Rcl::Doc& doc, const RecollFilter* hp) { for (const auto& entry : hp->get_meta_data()) { if (nocopyfields.find(entry.first) == nocopyfields.end()) { @@ -650,7 +650,7 @@ void FileInterner::collectIpathAndMT(Rcl::Doc& doc) const // handlers to use setfield() instead of embedding // metadata in the HTML meta tags. if (i == 0 || !pathelprev.empty()) { - copymeta(m_cfg, doc, m_handlers[i]); + copymeta(m_cfg, doc, m_handlers[i == 0 ? 0 : i-1]); } if (doc.fbytes.empty()) { lltodecstr(m_handlers[i]->get_docsize(), doc.fbytes); @@ -744,8 +744,7 @@ int FileInterner::addHandler() LOGINFO("FileInterner::addHandler: no filter for [" << mimetype << "]\n"); return ADD_CONTINUE; } - newflt->set_property(Dijon::Filter::OPERATING_MODE, - m_forPreview ? "view" : "index"); + newflt->set_property(Dijon::Filter::OPERATING_MODE, m_forPreview ? "view" : "index"); if (!charset.empty()) newflt->set_property(Dijon::Filter::DEFAULT_CHARSET, charset); diff --git a/tests/Maildir/Maildir.txt b/tests/Maildir/Maildir.txt index 334fd3e7..49a8343d 100644 --- a/tests/Maildir/Maildir.txt +++ b/tests/Maildir/Maildir.txt @@ -1 +1 @@ -FreqFor maildir_uniquexxx : 692 +FreqFor maildir_uniquexxx : 734 diff --git a/tests/embed/embed.txt b/tests/embed/embed.txt index 4c643b6c..6169d50a 100644 --- a/tests/embed/embed.txt +++ b/tests/embed/embed.txt @@ -1,2 +1,2 @@ 1 results -application/msword [file:///home/dockes/projets/fulltext/testrecoll/embed/thunderbirdlocalfolders.zip] [xingfx1.doc] 24576 bytes +application/msword [file:///home/dockes/projets/fulltext/testrecoll/embed/thunderbirdlocalfolders.zip] [xingfx1.doc (Sending a word document)] 24576 bytes diff --git a/tests/orgmode/orgmode.txt b/tests/orgmode/orgmode.txt index 805cbc48..3348afb1 100644 --- a/tests/orgmode/orgmode.txt +++ b/tests/orgmode/orgmode.txt @@ -1,2 +1,2 @@ 1 results -text/plain [file:///home/dockes/projets/fulltext/testrecoll/orgmode/orgmode-example.org] [law and legal code versioned on github] 370 bytes +text/x-orgmode-sub [file:///home/dockes/projets/fulltext/testrecoll/orgmode/orgmode-example.org] [law and legal code versioned on github] 487 bytes diff --git a/tests/rfc2231/rfc2231.txt b/tests/rfc2231/rfc2231.txt index f1d98a3f..38192a38 100644 --- a/tests/rfc2231/rfc2231.txt +++ b/tests/rfc2231/rfc2231.txt @@ -2,5 +2,5 @@ 0 results 0 results 2 results +application/octet-stream [file:///home/dockes/projets/fulltext/testrecoll/rfc2231/thunder] [épatantuniquefilenameterm.bin (vrai attach)] 5785 bytes message/rfc822 [file:///home/dockes/projets/fulltext/testrecoll/rfc2231/thunder] [vrai attach] 11208 bytes -application/octet-stream [file:///home/dockes/projets/fulltext/testrecoll/rfc2231/thunder] [épatantuniquefilenameterm.bin] 5785 bytes From 265bbd0c6e9284e04ec0cdc94311724127608120 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Sat, 15 Jan 2022 09:12:39 +0000 Subject: [PATCH 17/19] Define system specific default configuration directory to allow sharing most of the default configuration files contents --- src/common/rclconfig.cpp | 22 +- src/utils/conftree.h | 39 ++-- src/windows/mimeconf | 434 ++------------------------------------- src/windows/mimeview | 175 +--------------- src/windows/mkinstdir.sh | 16 +- 5 files changed, 69 insertions(+), 617 deletions(-) diff --git a/src/common/rclconfig.cpp b/src/common/rclconfig.cpp index ef8f6351..490d30ea 100644 --- a/src/common/rclconfig.cpp +++ b/src/common/rclconfig.cpp @@ -58,6 +58,16 @@ using namespace std; +// Naming the directory for platform-specific default config files, overriding the top-level ones +// E.g. /usr/share/recoll/examples/windows +#ifdef _WIN32 +static const string confsysdir{"windows"}; +#elif defined(_APPLE__) +static const string confsysdir{"macos"}; +#else +static const string confsysdir; +#endif + // Static, logically const, RclConfig members or module static // variables are initialized once from the first object build during // process initialization. @@ -303,8 +313,15 @@ RclConfig::RclConfig(const string *argcnf) m_cdirs.push_back(cp); } - // Base/installation config - m_cdirs.push_back(path_cat(m_datadir, "examples")); + // Base/installation config, and its platform-specific overrides + std::string defaultsdir = path_cat(m_datadir, "examples"); + if (!confsysdir.empty()) { + std::string sdir = path_cat(defaultsdir, confsysdir); + if (path_isdir(sdir)) { + m_cdirs.push_back(sdir); + } + } + m_cdirs.push_back(defaultsdir); string cnferrloc; for (const auto& dir : m_cdirs) { @@ -376,6 +393,7 @@ bool RclConfig::updateMainConfig() { ConfStack *newconf = new ConfStack("recoll.conf", m_cdirs, true); if (newconf == 0 || !newconf->ok()) { + std::cerr << "updateMainConfig: new Confstack not ok\n"; if (m_conf) return false; m_ok = false; diff --git a/src/utils/conftree.h b/src/utils/conftree.h index d868e436..dc6f86ad 100644 --- a/src/utils/conftree.h +++ b/src/utils/conftree.h @@ -125,8 +125,7 @@ public: * @param readonly if true open readonly, else rw * @param tildexp try tilde (home dir) expansion for subkey values */ - ConfSimple(const char *fname, int readonly = 0, bool tildexp = false, - bool trimvalues = true); + ConfSimple(const char *fname, int readonly = 0, bool tildexp = false, bool trimvalues = true); /** * Build the object by reading content from a string @@ -142,8 +141,7 @@ public: * @param readonly if true open read only, else rw * @param tildexp try tilde (home dir) expansion for subsection names */ - ConfSimple(int readonly = 0, bool tildexp = false, - bool trimvalues = true); + ConfSimple(int readonly = 0, bool tildexp = false, bool trimvalues = true); virtual ~ConfSimple() {}; @@ -184,8 +182,7 @@ public: * Set value for named integer parameter in specified subsection (or global) * @return 0 for error, 1 else */ - virtual int set(const std::string& nm, long long val, - const std::string& sk = std::string()); + virtual int set(const std::string& nm, long long val, const std::string& sk = std::string()); /** * Remove name and value from config @@ -388,8 +385,7 @@ public: construct(fns, ro); } /// Construct out of single file name and multiple directories - ConfStack(const std::string& nm, const std::vector& dirs, - bool ro = true) { + ConfStack(const std::string& nm, const std::vector& dirs, bool ro = true) { std::vector fns; for (const auto& dir : dirs) { fns.push_back(path_cat(dir, nm)); @@ -501,8 +497,8 @@ public: const std::string& sk, const char *pattern = 0) const override { return getNames1(sk, pattern, false); } - virtual std::vector getNamesShallow(const std::string& sk, - const char *patt = 0) const { + virtual std::vector getNamesShallow( + const std::string& sk, const char *patt = 0) const { return getNames1(sk, patt, true); } @@ -570,27 +566,30 @@ private: } } - /// Common construct from file names code. We used to be ok even - /// if some files were not readable/parsable. Now fail if any - /// fails. + /// Common construct from file names. + /// Fail if any fails, except for missing files in all but the bottom location, or the + /// top one in rw mode. void construct(const std::vector& fns, bool ro) { bool ok{true}; - bool first{true}; - for (const auto& fn : fns) { + for (unsigned int i = 0; i < fns.size(); i++) { + const auto& fn{fns[i]}; T* p = new T(fn.c_str(), ro); if (p && p->ok()) { m_confs.push_back(p); } else { delete p; - // In ro mode, we accept a non-existing topmost file - // and treat it as an empty one. - if (!(ro && first && !path_exists(fn))) { - ok = false; + // We accept missing files in all but the bottom/ directory. + // In rw mode, the topmost file must be present. + if (!path_exists(fn)) { + // !ro can only be true for i==0 + if (!ro || (i == fns.size() - 1)) { + ok = false; + break; + } } } // Only the first file is opened rw ro = true; - first = false; } m_ok = ok; } diff --git a/src/windows/mimeconf b/src/windows/mimeconf index 832afa45..19a021d8 100644 --- a/src/windows/mimeconf +++ b/src/windows/mimeconf @@ -1,438 +1,38 @@ # (C) 2015-2022 J.F.Dockes -# This file contains most of the data which determines how we -# handle the different mime types (also see the "mimeview" file). # -# This is the version specific to MS-WINDOWS +# MS-WINDOWS specific definitions for mimeconf # -# Sections: -# top-level: Decompression parameters. Should not be at top-level, historical. -# [index] : Associations of mime types to the filters that translate them -# to plain text or html. -# [icons] : Associations of mime types to result list icons (GUI) -# [categories] : groupings of mime types (media, text, message etc.) -# [guifilters] : defines the filtering checkboxes in the GUI. Uses the -# above categories by default -## ####################################### -# Decompression: these types need a first pass to create a temp file to -# work with. We use a script because uncompress utilities usually work in -# place, which is not suitable. -# -# Obviously this should be in a [decompress] section or such, but it was once -# forgotten and remained global for compatibility... -# -# The %t parameter will be substituted to the name of a temporary directory -# by recoll. This directory is guaranteed empty when calling the filter -# -# The %f parameter will be substituted with the input file. -# -# The script (ie: rcluncomp) must output the uncompressed file name on -# stdout. Note that the windows version will always use 7z, and ignore -# the decompressor parameter in the following lines +# Decompression: the windows version always uses 7z, no decompressor parameter is necessary application/gzip = uncompress rcluncomp.py 7z %f %t application/x-gzip = uncompress rcluncomp.py 7z %f %t application/x-compress = uncompress rcluncomp.py 7z %f %t application/x-bzip2 = uncompress rcluncomp.py 7z %f %t application/x-xz = uncompress rcluncomp.py 7z %f %t application/x-lzma = uncompress rcluncomp.py 7z %f %t +application/x-scribus = +application/x-tex = -## ################################### -# Filters for indexing and internal preview. -# The "internal" filters are hardwired in the c++ code. -# The external "exec" filters are typically scripts. By default, they output the -# document in simple html format, have a look at the scripts. -# A different format (ie text/plain), and a character set can be defined for -# each filter, see the exemples below (ie: msword) [index] - -application/msword = execm rcldoc.py -application/vnd.ms-excel = execm rclxls.py -application/vnd.ms-outlook = execm rclpst.py -application/vnd.ms-powerpoint = execm rclppt.py -# Also Handle the mime type returned by "file -i" for a suffix-less word -# file. This could probably just as well be an excel file, but we have to -# chose one. -application/vnd.ms-office = execm rcldoc.py - -application/vnd.oasis.opendocument.text = \ - internal xsltproc meta meta.xml opendoc-meta.xsl \ - body content.xml opendoc-body.xsl -application/vnd.oasis.opendocument.text-template = \ - internal xsltproc meta meta.xml opendoc-meta.xsl \ - body content.xml opendoc-body.xsl -application/vnd.oasis.opendocument.presentation = \ - internal xsltproc meta meta.xml opendoc-meta.xsl \ - body content.xml opendoc-body.xsl -application/vnd.oasis.opendocument.spreadsheet = \ - internal xsltproc meta meta.xml opendoc-meta.xsl \ - body content.xml opendoc-body.xsl -application/vnd.oasis.opendocument.graphics = \ - internal xsltproc meta meta.xml opendoc-meta.xsl \ - body content.xml opendoc-body.xsl -application/vnd.oasis.opendocument.presentation-flat-xml = \ - internal xsltproc opendoc-flat.xsl -application/vnd.oasis.opendocument.text-flat-xml = \ - internal xsltproc opendoc-flat.xsl -application/vnd.oasis.opendocument.spreadsheet-flat-xml = \ - internal xsltproc opendoc-flat.xsl - -application/vnd.openxmlformats-officedocument.wordprocessingml.document = \ - internal xsltproc meta docProps/core.xml openxml-meta.xsl \ - body word/document.xml openxml-word-body.xsl \ - body word/footnotes.xml openxml-word-body.xsl \ - body word/endnotes.xml openxml-word-body.xsl -application/vnd.openxmlformats-officedocument.wordprocessingml.template = \ - internal xsltproc meta docProps/core.xml openxml-meta.xsl \ - body word/document.xml openxml-word-body.xsl \ - body word/footnotes.xml openxml-word-body.xsl \ - body word/endnotes.xml openxml-word-body.xsl -application/vnd.openxmlformats-officedocument.presentationml.template = \ - execm rclopxml.py -application/vnd.openxmlformats-officedocument.presentationml.presentation = \ - execm rclopxml.py -application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = \ - internal xsltproc meta docProps/core.xml openxml-meta.xsl \ - body xl/sharedStrings.xml openxml-xls-body.xsl -application/vnd.openxmlformats-officedocument.spreadsheetml.template =\ - internal xsltproc meta docProps/core.xml openxml-meta.xsl \ - body xl/sharedStrings.xml openxml-xls-body.xsl - -application/vnd.sun.xml.calc = \ - internal xsltproc meta meta.xml opendoc-meta.xsl \ - body content.xml opendoc-body.xsl -application/vnd.sun.xml.calc.template = \ - internal xsltproc meta meta.xml opendoc-meta.xsl \ - body content.xml opendoc-body.xsl -application/vnd.sun.xml.draw = \ - internal xsltproc meta meta.xml opendoc-meta.xsl \ - body content.xml opendoc-body.xsl -application/vnd.sun.xml.draw.template = \ - internal xsltproc meta meta.xml opendoc-meta.xsl \ - body content.xml opendoc-body.xsl -application/vnd.sun.xml.impress = \ - internal xsltproc meta meta.xml opendoc-meta.xsl \ - body content.xml opendoc-body.xsl -application/vnd.sun.xml.impress.template = \ - internal xsltproc meta meta.xml opendoc-meta.xsl \ - body content.xml opendoc-body.xsl -application/vnd.sun.xml.math = \ - internal xsltproc meta meta.xml opendoc-meta.xsl \ - body content.xml opendoc-body.xsl -application/vnd.sun.xml.writer = \ - internal xsltproc meta meta.xml opendoc-meta.xsl \ - body content.xml opendoc-body.xsl -application/vnd.sun.xml.writer.global = \ - internal xsltproc meta meta.xml opendoc-meta.xsl \ - body content.xml opendoc-body.xsl -application/vnd.sun.xml.writer.template = \ - internal xsltproc meta meta.xml opendoc-meta.xsl \ - body content.xml opendoc-body.xsl - -#application/postscript = exec rclps -#application/x-gnuinfo = execm rclinfo.py -#application/x-tar = execm rcltar.py - -application/epub+zip = execm rclepub.py -application/x-ipynb+json = execm rclipynb.py -application/javascript = internal text/plain -application/ogg = execm rclaudio.py -application/pdf = execm rclpdf.py -application/sql = internal text/plain +application/postscript = application/vnd.wordperfect = exec wpd/wpd2html;mimetype=text/html -application/x-7z-compressed = execm rcl7z.py -application/x-abiword = internal xsltproc abiword.xsl -application/x-awk = internal text/plain -application/x-chm = execm rclchm.py -application/x-dia-diagram = execm rcldia.py;mimetype=text/plain -application/x-flac = execm rclaudio.py -application/x-gnote = execm rclxml.py -application/x-hwp = execm rclhwp.py -application/x-mimehtml = internal message/rfc822 -application/x-perl = internal text/plain -application/x-php = internal text/plain -application/x-rar = execm rclrar.py;charset=default -application/x-shellscript = internal text/plain -application/x-webarchive = execm rclwar.py -application/x-zerosize = internal -application/zip = execm rclzip.py;charset=default -audio/aac = execm rclaudio.py -audio/mp4 = execm rclaudio.py -audio/mpeg = execm rclaudio.py -audio/x-karaoke = execm rclkar.py +application/x-dvi = +application/x-gnuinfo = +application/x-ipynb+json = execm rclipynb.py +application/x-tar = image/gif = execm rclimg.exe image/jp2 = execm rclimg.exe image/jpeg = execm rclimg.exe image/png = execm rclimg.exe -image/svg+xml = internal xsltproc svg.xsl image/tiff = execm rclimg.exe -image/vnd.djvu = execm rcldjvu.py -inode/symlink = internal -inode/x-empty = internal application/x-zerosize -message/rfc822 = internal -text/calendar = execm rclics.py;mimetype=text/plain -text/css = internal text/plain -text/html = internal -text/plain = internal -text/plain1 = internal -#text/rtf = execm rclrtf.py -text/rtf = exec unrtf --nopict --html;mimetype=text/html -text/x-c = internal -text/x-c+ = internal -text/x-c++ = internal -text/x-chm-html = internal text/html -text/x-csharp = internal text/plain -text/x-csv = internal text/plain -text/x-fictionbook = internal xsltproc fb2.xsl -text/x-ini = internal text/plain -text/x-mail = internal -text/x-orgmode = execm rclorgmode.py -text/x-perl = internal text/plain -text/x-python = execm rclpython.py -text/x-shellscript = internal text/plain -text/x-srt = internal text/plain +image/x-nikon-nef = execm rclimg.exe image/x-xcf = execm rclimg.exe - -# Generic XML is best indexed as text, else it generates too many errors -# All parameter and tag names, attribute values etc, are indexed as -# text. rclxml.py tries to just index the text content. -#application/xml = execm rclxml.py -#text/xml = execm rclxml.py -application/xml = internal text/plain -text/xml = internal text/plain - -## ############################################# -# Icons to be used in the result list if required by gui config -[icons] -application/epub+zip = book -application/javascript = source -application/msword = wordprocessing -application/ogg = sownd -application/pdf = pdf -application/postscript = postscript -application/vnd.ms-excel = spreadsheet -application/vnd.ms-powerpoint = presentation -application/vnd.oasis.opendocument.presentation = presentation -application/vnd.oasis.opendocument.spreadsheet = spreadsheet -application/vnd.oasis.opendocument.text = wordprocessing -application/vnd.openxmlformats-officedocument.presentationml.presentation = presentation -application/vnd.openxmlformats-officedocument.presentationml.template = presentation -application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = spreadsheet -application/vnd.openxmlformats-officedocument.spreadsheetml.template = spreadsheet -application/vnd.openxmlformats-officedocument.wordprocessingml.document = wordprocessing -application/vnd.openxmlformats-officedocument.wordprocessingml.template = wordprocessing -application/vnd.sun.xml.calc = spreadsheet -application/vnd.sun.xml.calc.template = spreadsheet -application/vnd.sun.xml.draw = drawing -application/vnd.sun.xml.draw.template = drawing -application/vnd.sun.xml.impress = presentation -application/vnd.sun.xml.impress.template = presentation -application/vnd.sun.xml.math = wordprocessing -application/vnd.sun.xml.writer = wordprocessing -application/vnd.sun.xml.writer.global = wordprocessing -application/vnd.sun.xml.writer.template = wordprocessing -application/vnd.wordperfect = wordprocessing -application/x-abiword = wordprocessing -application/x-awk = source -application/x-chm = book -application/x-dia-diagram = drawing -application/x-dvi = document -application/x-flac = sownd -application/x-fsdirectory = folder -application/x-gnote = document -#application/x-gnuinfo = book -application/x-gnumeric = spreadsheet -application/x-ipynb+json = document -application/x-kword = wordprocessing -application/x-lyx = wordprocessing -application/x-mimehtml = message -application/x-mobipocket-ebook = document -application/x-okular-notes = document -application/x-perl = source -application/x-php = source -application/x-rar = archive -application/x-scribus = document -application/x-scribus = wordprocessing -application/x-shellscript = source -application/x-tar = archive -application/x-tex = wordprocessing -application/x-webarchive = archive -application/xml = document -application/zip = archive -application/x-7z-compressed = archive -audio/mpeg = sownd -audio/x-karaoke = sownd -image/bmp = image -image/gif = image -image/jp2 = image -image/jpeg = image -image/png = image -image/svg+xml = drawing -image/tiff = image -image/vnd.djvu = document -image/x-xcf = image -image/x-xpmi = image -inode/directory = folder -inode/symlink = emblem-symbolic-link -message/rfc822 = message -text/html = html -text/html|chm = bookchap -text/html|epub = bookchap -#text/html|gnuinfo = bookchap -text/plain = txt -text/rtf = wordprocessing -text/x-c = source -text/x-c+ = source -text/x-c++ = source -text/x-csv = txt -text/x-fictionbook = document -text/x-html-aptosid-man = aptosid-book -text/x-html-sidux-man = sidux-book -text/x-ini = txt -text/x-mail = message -text/x-man = document -text/x-orgmode = document -text/x-perl = source -text/x-purple-html-log = pidgin -text/x-purple-log = pidgin -text/x-python = text-x-python -text/x-shellscript = source -text/x-tex = wordprocessing -text/xml = document -video/3gpp = video -video/mp2p = video -video/mp2t = video -video/mp4 = video -video/mpeg = video -video/quicktime = video -video/x-matroska = video -video/x-ms-asf = video -video/x-msvideo = video - -[categories] -# Categories group mime types by "kind". They can be used from the query -# language as an "rclcat" clause. This is fully dynamic, you can change the -# names and groups as you wish, only the mime types are stored in the index. -# -# If you add/remove categories, you may also want to change the -# "guifilters" section below. -text = \ - application/epub+zip \ - application/msword \ - application/pdf \ - application/postscript \ - application/vnd.oasis.opendocument.text \ - application/vnd.openxmlformats-officedocument.wordprocessingml.document \ - application/vnd.openxmlformats-officedocument.wordprocessingml.template \ - application/vnd.sun.xml.writer \ - application/vnd.sun.xml.writer.global \ - application/vnd.sun.xml.writer.template \ - application/vnd.wordperfect \ - application/x-abiword \ - application/x-awk \ - application/x-chm \ - application/x-dvi \ - application/x-gnote \ - application/x-gnuinfo \ - application/x-ipynb+json \ - application/x-kword \ - application/x-lyx \ - application/x-mobipocket-ebook \ - application/x-okular-notes \ - application/x-perl \ - application/x-scribus \ - application/x-shellscript \ - application/x-tex \ - application/xml \ - text/xml \ - text/x-csv \ - text/x-tex \ - image/vnd.djvu \ - text/calendar \ - text/html \ - text/plain \ - text/rtf \ - text/x-c \ - text/x-c++ \ - text/x-c+ \ - text/x-fictionbook \ - text/x-html-aptosid-man \ - text/x-html-sidux-man \ - text/x-ini \ - text/x-man \ - text/x-orgmode \ - text/x-perl \ - text/x-python \ - text/x-shellscript - -spreadsheet = \ - application/vnd.ms-excel \ - application/vnd.oasis.opendocument.spreadsheet \ - application/vnd.openxmlformats-officedocument.spreadsheetml.sheet \ - application/vnd.openxmlformats-officedocument.spreadsheetml.template \ - application/vnd.sun.xml.calc \ - application/vnd.sun.xml.calc.template \ - application/x-gnumeric - -presentation = \ - application/vnd.ms-powerpoint \ - application/vnd.oasis.opendocument.presentation \ - application/vnd.openxmlformats-officedocument.presentationml.presentation \ - application/vnd.openxmlformats-officedocument.presentationml.template \ - application/vnd.sun.xml.impress \ - application/vnd.sun.xml.impress.template - -media = \ - application/ogg \ - application/x-flac \ - audio/* \ - image/* \ - video/* \ - -message = message/rfc822 \ - text/x-gaim-log \ - text/x-mail \ - text/x-purple-log \ - text/x-purple-html-log \ - -other = application/vnd.sun.xml.draw \ - application/vnd.sun.xml.draw.template \ - application/vnd.sun.xml.math \ - application/x-dia-diagram \ - application/x-fsdirectory \ - application/x-mimehtml \ - application/x-rar \ - application/x-tar \ - application/x-webarchive \ - application/zip \ - application/x-7z-compressed \ - inode/directory \ - inode/symlink \ - -[guifilters] -# This defines the top level filters in the GUI (accessed by the the -# radiobuttons above the results area, or a toolbar combobox). -# Each entry defines a label and a query language fragment that will be -# applied to filter the current query if the option is activated. -# -# This does not really belong in mimeconf, but it does belong in the index -# config (not the GUI one), because it's not necessarily the same in all -# configs, it has to go somewhere, and it's not worth a separate config -# file... -# -# By default this filters by document category (see above), but any -# language fragment should be ok. Be aware though that the "document -# history" queries only know about simple "rclcat" filtering. -# -# If you don't want the filter names to be displayed in alphabetic order, -# you can define them with a colon. The part before the colon is not -# displayed but used for ordering, ie: a:zzbutshouldbefirst b:aacomeslast -# -text = rclcat:text -spreadsheet = rclcat:spreadsheet -presentation = rclcat:presentation -media = rclcat:media -message = rclcat:message -other = rclcat:other - +text/x-bibtex = +text/x-gaim-log = +text/x-html-aptosid-man = +text/x-man = +text/x-purple-log = +text/x-tex = +video/x-msvideo = execm rclimg.exe diff --git a/src/windows/mimeview b/src/windows/mimeview index dcf97fe3..72dc53f8 100644 --- a/src/windows/mimeview +++ b/src/windows/mimeview @@ -1,21 +1,5 @@ -## ########################################## -# External viewers, launched by the recoll GUI when you click on a result -# 'edit' link -# -# MS WINDOWS VERSION -# -# Mime types which we should not uncompress if they are found gzipped or -# bzipped because the native viewer knows how to handle. These would be -# exceptions and the list is normally empty -#nouncompforviewmts = +# MS WINDOWS system changes for mimeview -# For releases 1.18 and later: exceptions when using the x-all entry: these -# types will use their local definition. This is useful, e.g.: -# -# - for pdf, where we can pass additional parameters like page to open and -# search string -# - For pages of CHM and EPUB documents where we can choose to open the -# parent document instead of a temporary html file. xallexcepts = \ text/html|epub \ application/x-fsdirectory|parentopen inode/directory|parentopen @@ -44,157 +28,6 @@ application/pdf = C:/users/bill/appdata/local/apps/evince-2.32.0.145/bin/evince #application/pdf = "C:/Program Files/SumatraPDF/SumatraPDF.exe" -page %p %f #application/pdf = "C:/Program Files (x86)/Foxit Software/Foxit Reader/FoxitReader.exe" %f /A page=%p -###### THE FOLLOWING ARE NOT USED AT ALL ON WINDOWS, but the types need to -###### be listed for an "Open" link to appear in the result list -application/epub+zip = ebook-viewer %f - -application/x-gnote = gnote %f - -application/x-mobipocket-ebook = ebook-viewer %f - -application/x-kword = kword %f -application/x-abiword = abiword %f - - -application/postscript = evince --page-index=%p --find=%s %f -application/x-dvi = evince --page-index=%p --find=%s %f - -application/x-lyx = lyx %f -application/x-scribus = scribus %f - -#application/msword = libreoffice %f -application/msword = \ - "C:/Program Files (x86)/LibreOffice 5/program/soffice.exe" %f - -application/x-hwp = hanword %f - -application/vnd.ms-excel = libreoffice %f -application/vnd.ms-powerpoint = libreoffice %f - -application/vnd.oasis.opendocument.text = libreoffice %f -application/vnd.oasis.opendocument.presentation = libreoffice %f -application/vnd.oasis.opendocument.spreadsheet = libreoffice %f - -application/vnd.openxmlformats-officedocument.wordprocessingml.document = \ - libreoffice %f -application/vnd.openxmlformats-officedocument.wordprocessingml.template = \ - libreoffice %f -application/vnd.openxmlformats-officedocument.presentationml.template = \ - libreoffice %f -application/vnd.openxmlformats-officedocument.presentationml.presentation = \ - libreoffice %f -application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = \ - libreoffice %f -application/vnd.openxmlformats-officedocument.spreadsheetml.template =\ - libreoffice %f -application/vnd.sun.xml.calc = libreoffice %f -application/vnd.sun.xml.calc.template = libreoffice %f -application/vnd.sun.xml.draw = libreoffice %f -application/vnd.sun.xml.draw.template = libreoffice %f -application/vnd.sun.xml.impress = libreoffice %f -application/vnd.sun.xml.impress.template = libreoffice %f -application/vnd.sun.xml.math = libreoffice %f -application/vnd.sun.xml.writer = libreoffice %f -application/vnd.sun.xml.writer.global = libreoffice %f -application/vnd.sun.xml.writer.template = libreoffice %f -application/vnd.wordperfect = libreoffice %f -text/rtf = libreoffice %f - -application/x-dia-diagram = dia %f - -application/x-fsdirectory = dolphin %f -inode/directory = dolphin %f - -application/x-gnuinfo = xterm -e "info -f %f" -application/x-gnumeric = gnumeric %f - -application/x-flac = rhythmbox %f -audio/mpeg = rhythmbox %f -application/ogg = rhythmbox %f -audio/x-karaoke = kmid %f - -image/jpeg = gwenview %f -image/png = gwenview %f -image/tiff = gwenview %f -image/gif = gwenview %f -image/svg+xml = inkview %f -image/vnd.djvu = djview %f -image/x-xcf = gimp %f -image/bmp = gwenview %f -image/x-ms-bmp = gwenview %f -image/x-xpmi = gwenview %f - -# Opening mail messages not always works. -# - Thunderbird will only open a single-message file if it has an .emf -# extension -# - "sylpheed %f" seems to work ok as of version 3.3 -# - "kmail --view %u" works -message/rfc822 = thunderbird -file %f -text/x-mail = thunderbird -file %f -application/x-mimehtml = thunderbird -file %f - -text/calendar = evolution %f - -application/x-okular-notes = okular %f - -application/x-rar = ark %f -application/x-tar = ark %f -application/zip = ark %f -application/x-7z-compressed = ark %f - -application/x-awk = emacsclient --no-wait %f -application/x-perl = emacsclient --no-wait %f -text/x-perl = emacsclient --no-wait %f -application/x-shellscript = emacsclient --no-wait %f -text/x-shellscript = emacsclient --no-wait %f -text/x-srt = emacsclient --no-wait %f - -# Or firefox -remote "openFile(%u)" -text/html = firefox %u - -# gnu info nodes are translated to html with a "gnuinfo" -# rclaptg. rclshowinfo knows how to start the info command on the right -# node -text/html|gnuinfo = rclshowinfo %F %(title);ignoreipath=1 - -application/x-webarchive = konqueror %f -text/x-fictionbook = ebook-viewer %f -application/x-tex = emacsclient --no-wait %f -application/xml = emacsclient --no-wait %f -text/xml = emacsclient --no-wait %f -text/x-tex = emacsclient --no-wait %f -text/plain = emacsclient --no-wait %f -text/x-awk = emacsclient --no-wait %f -text/x-c = emacsclient --no-wait %f -text/x-c+ = emacsclient --no-wait %f -text/x-c++ = emacsclient --no-wait %f -text/x-csv = libreoffice %f -text/x-html-sidux-man = konqueror %f -text/x-html-aptosid-man = iceweasel %f - -application/x-chm = kchmviewer %f -# Html pages inside a chm have a chm rclaptg set by the filter. Kchmviewer -# knows how to use the ipath (which is the internal chm path) to open the -# file at the right place -text/html|chm = kchmviewer --url %i %F - -text/x-ini = emacsclient --no-wait %f -text/x-man = xterm -u8 -e "groff -T ascii -man %f | more" -text/x-python = idle %f -text/x-gaim-log = emacsclient --no-wait %f -text/x-purple-html-log = emacsclient --no-wait %f -text/x-purple-log = emacsclient --no-wait %f - -# The video types will usually be handled by the desktop default, but they -# need entries here to get an "Open" link -video/3gpp = vlc %f -video/mp2p = vlc %f -video/mp2t = vlc %f -video/mp4 = vlc %f -video/mpeg = vlc %f -video/quicktime = vlc %f -video/x-matroska = vlc %f -video/x-ms-asf = vlc %f -video/x-msvideo = vlc %f - - +########## +# Other MIME types have no specializations on Windows, but the types need to be listed for an "Open" +# link to appear in the result list, the listing is in the generic file diff --git a/src/windows/mkinstdir.sh b/src/windows/mkinstdir.sh index f37654c7..f01001ad 100644 --- a/src/windows/mkinstdir.sh +++ b/src/windows/mkinstdir.sh @@ -171,13 +171,15 @@ copyrecoll() chkcp $RCL/doc/user/docbook-xsl.css $DESTDIR/Share/doc mkdir -p $DESTDIR/Share/doc/webhelp rsync -av $RCL/doc/user/webhelp/docs/* $DESTDIR/Share/doc/webhelp || exit 1 - chkcp $RCL/sampleconf/fields $DESTDIR/Share/examples + chkcp $RCL/sampleconf/fields $DESTDIR/Share/examples chkcp $RCL/sampleconf/fragment-buttons.xml $DESTDIR/Share/examples - chkcp $RCL/windows/mimeconf $DESTDIR/Share/examples - chkcp $RCL/sampleconf/mimemap $DESTDIR/Share/examples - chkcp $RCL/windows/mimeview $DESTDIR/Share/examples - chkcp $RCL/sampleconf/recoll.conf $DESTDIR/Share/examples - chkcp $RCL/sampleconf/recoll.qss $DESTDIR/Share/examples + chkcp $RCL/sampleconf/mimeconf $DESTDIR/Share/examples + chkcp $RCL/sampleconf/mimeview $DESTDIR/Share/examples + chkcp $RCL/sampleconf/mimemap $DESTDIR/Share/examples + chkcp $RCL/windows/mimeconf $DESTDIR/Share/examples/windows + chkcp $RCL/windows/mimeview $DESTDIR/Share/examples/windows + chkcp $RCL/sampleconf/recoll.conf $DESTDIR/Share/examples + chkcp $RCL/sampleconf/recoll.qss $DESTDIR/Share/examples chkcp $RCL/sampleconf/recoll-dark.qss $DESTDIR/Share/examples chkcp $RCL/sampleconf/recoll-dark.css $DESTDIR/Share/examples @@ -349,7 +351,7 @@ test "$VERSION" = "$CFVERS" || echo Packaging version $CFVERS -for d in doc examples filters images translations; do +for d in doc examples examples/windows filters images translations; do test -d $DESTDIR/Share/$d || mkdir -p $DESTDIR/Share/$d || \ fatal mkdir $d failed done From 87f86ac26857c431e6e90ea32298cd14c07ee41c Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Sat, 15 Jan 2022 10:41:41 +0100 Subject: [PATCH 18/19] Adjust config setup for macos --- src/qtgui/recoll-win.pro | 34 +++--- src/sampleconf/macos/mimeview | 9 ++ src/sampleconf/mimeview.mac | 197 ---------------------------------- 3 files changed, 27 insertions(+), 213 deletions(-) create mode 100644 src/sampleconf/macos/mimeview delete mode 100644 src/sampleconf/mimeview.mac diff --git a/src/qtgui/recoll-win.pro b/src/qtgui/recoll-win.pro index 5e18edf2..7a959d35 100644 --- a/src/qtgui/recoll-win.pro +++ b/src/qtgui/recoll-win.pro @@ -170,30 +170,32 @@ mac { rtitool.cpp FORMS += \ - crontool.ui \ - rtitool.ui + crontool.ui \ + rtitool.ui LIBS += \ -../windows/build-librecoll-Desktop_Qt_5_14_2_clang_64bit-Release/liblibrecoll.a \ - ../../../xapian-core-1.4.18/.libs/libxapian.a \ - -lxslt -lxml2 -liconv -lz + ../windows/build-librecoll-Desktop_Qt_5_14_2_clang_64bit-Release/liblibrecoll.a \ + ../../../xapian-core-1.4.18/.libs/libxapian.a \ + -lxslt -lxml2 -liconv -lz ICON = images/recoll.icns - system(cp ../sampleconf/mimeview.mac ../mimeview) - APP_EXAMPLES.files = \ - ../sampleconf/fragment-buttons.xml \ - ../sampleconf/fields \ - ../sampleconf/recoll.conf \ - ../sampleconf/mimeconf \ - ../sampleconf/recoll.qss \ - ../sampleconf/recoll-dark.qss \ - ../sampleconf/recoll-dark.css \ - ../sampleconf/mimemap \ - ../mimeview + ../sampleconf/fragment-buttons.xml \ + ../sampleconf/fields \ + ../sampleconf/recoll.conf \ + ../sampleconf/mimeconf \ + ../sampleconf/mimeview \ + ../sampleconf/mimemap \ + ../sampleconf/recoll.qss \ + ../sampleconf/recoll-dark.qss \ + ../sampleconf/recoll-dark.css APP_EXAMPLES.path = Contents/Resources/examples + APP_EXAMPLES_MAC.files = \ + ../sampleconf/macos/mimeview + APP_EXAMPLES_MAC.path = Contents/Resources/examples/macos + APP_FILTERS.files = \ ../filters/abiword.xsl \ ../filters/cmdtalk.py \ diff --git a/src/sampleconf/macos/mimeview b/src/sampleconf/macos/mimeview new file mode 100644 index 00000000..6cbe6878 --- /dev/null +++ b/src/sampleconf/macos/mimeview @@ -0,0 +1,9 @@ +# External viewers, launched by the recoll GUI when you click on a result +# 'Open' link - MAC version +# On the MAC, we use "open" for everything, no exceptions at the moment. + +xallexcepts = + +[view] +# Pseudo entry used if the 'use desktop' preference is set in the GUI +application/x-all = open %f diff --git a/src/sampleconf/mimeview.mac b/src/sampleconf/mimeview.mac deleted file mode 100644 index 9677e472..00000000 --- a/src/sampleconf/mimeview.mac +++ /dev/null @@ -1,197 +0,0 @@ -# @(#$Id: mimeview,v 1.16 2008-09-15 08:03:37 dockes Exp $ (C) 2004 J.F.Dockes - -## ########################################## -# External viewers, launched by the recoll GUI when you click on a result -# 'Open' link - MAC version -# On the MAC, we use "open" for everything... - -# Mime types which we should not uncompress if they are found gzipped or -# bzipped because the native viewer knows how to handle. These would be -# exceptions and the list is normally empty -#nouncompforviewmts = - -# For releases 1.18 and later: exceptions when using the x-all entry: these -# types will use their local definition. This is useful, e.g.: -# -# - for pdf, where we can pass additional parameters like page to open and -# search string -# - For pages of CHM and EPUB documents where we can choose to open the -# parent document instead of a temporary html file. -#xallexcepts = application/pdf application/postscript application/x-dvi \ -# text/html|gnuinfo text/html|chm text/html|epub - -[view] -# Pseudo entry used if the 'use desktop' preference is set in the GUI -application/x-all = open %f - -application/epub+zip = ebook-viewer %f -# If you want to open the parent epub document for epub parts instead of -# opening them as html documents: -#text/html|epub = ebook-viewer %F;ignoreipath=1 - -application/x-gnote = gnote %f - -application/x-mobipocket-ebook = ebook-viewer %f - -application/x-kword = kword %f -application/x-abiword = abiword %f - -# Note: the Linux Mint evince clones, atril and xread, have the same options -application/pdf = evince --page-index=%p --find=%s %f -# Or: -#application/pdf = qpdfview --search %s %f#%p - -application/postscript = evince --page-index=%p --find=%s %f -application/x-dvi = evince --page-index=%p --find=%s %f - -application/x-lyx = lyx %f -application/x-scribus = scribus %f - -application/msword = libreoffice %f -application/vnd.ms-excel = libreoffice %f -application/vnd.ms-powerpoint = libreoffice %f - -application/vnd.oasis.opendocument.text = libreoffice %f -application/vnd.oasis.opendocument.presentation = libreoffice %f -application/vnd.oasis.opendocument.spreadsheet = libreoffice %f - -application/vnd.openxmlformats-officedocument.wordprocessingml.document = \ - libreoffice %f -application/vnd.openxmlformats-officedocument.wordprocessingml.template = \ - libreoffice %f -application/vnd.openxmlformats-officedocument.presentationml.template = \ - libreoffice %f -application/vnd.openxmlformats-officedocument.presentationml.presentation = \ - libreoffice %f -application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = \ - libreoffice %f -application/vnd.openxmlformats-officedocument.spreadsheetml.template =\ - libreoffice %f -application/vnd.sun.xml.calc = libreoffice %f -application/vnd.sun.xml.calc.template = libreoffice %f -application/vnd.sun.xml.draw = libreoffice %f -application/vnd.sun.xml.draw.template = libreoffice %f -application/vnd.sun.xml.impress = libreoffice %f -application/vnd.sun.xml.impress.template = libreoffice %f -application/vnd.sun.xml.math = libreoffice %f -application/vnd.sun.xml.writer = libreoffice %f -application/vnd.sun.xml.writer.global = libreoffice %f -application/vnd.sun.xml.writer.template = libreoffice %f -application/vnd.wordperfect = libreoffice %f -text/rtf = libreoffice %f - -application/x-dia-diagram = dia %f - -application/x-fsdirectory = dolphin %f -inode/directory = dolphin %f - -# Both dolphin and nautilus can pre-select a file inside a -# directory. Thunar can't afaik. xdg-open cant pass an additional -# parameters so these are to be xallexcepts. -application/x-fsdirectory|parentopen = dolphin --select %(childurl) %f -inode/directory|parentopen = dolphin --select %(childurl) %f -#application/x-fsdirectory|parentopen = nautilus %(childurl) -#inode/directory|parentopen = nautilus %(childurl) - -application/x-gnuinfo = xterm -e "info -f %f" -application/x-gnumeric = gnumeric %f - -application/x-flac = rhythmbox %f -audio/mpeg = rhythmbox %f -application/ogg = rhythmbox %f -audio/x-karaoke = kmid %f - -image/jpeg = gwenview %f -image/png = gwenview %f -image/tiff = gwenview %f -image/gif = gwenview %f -image/svg+xml = inkview %f -image/vnd.djvu = djview %f -image/x-xcf = gimp %f -image/bmp = gwenview %f -image/x-ms-bmp = gwenview %f -image/x-xpmi = gwenview %f -image/x-nikon-nef = ufraw %f - -# Opening mail messages: -# - Thunderbird will only open a single-message file if it has an .eml -# extension -# - "sylpheed %f" seems to work ok as of version 3.3 -# - "kmail --view %u" works -# - claws-mail: works using a small intermediary shell-script, which you -# set as the viewer here. You need to have at least one account inside -# claws-mail, so that it creates ~/Mail/inbox. Script contents example -# follows. Using 1 is probably not a good idea if this is a real account -# (here I am using a bogus one, so that I can overwrite anything inside -# inbox at will): -# #!/bin/bash -# cp $1 ~/Mail/inbox/1 -# claws-mail --select ~/Mail/inbox/1 -# rm ~/Mail/inbox/1 -message/rfc822 = thunderbird -file %f - -text/x-mail = thunderbird -file %f -application/x-mimehtml = thunderbird -file %f - -text/calendar = evolution %f - -application/x-okular-notes = okular %f - -application/x-rar = ark %f -application/x-tar = ark %f -application/zip = ark %f -application/x-7z-compressed = ark %f - -application/x-awk = emacsclient %f -application/x-perl = emacsclient %f -text/x-perl = emacsclient %f -application/x-shellscript = emacsclient %f -text/x-shellscript = emacsclient %f - -# Or firefox -remote "openFile(%u)" -text/html = firefox %u - -# gnu info nodes are translated to html with a "gnuinfo" -# rclaptg. rclshowinfo knows how to start the info command on the right -# node -text/html|gnuinfo = rclshowinfo %F %(title);ignoreipath=1 - -application/x-webarchive = konqueror %f -text/x-fictionbook = ebook-viewer %f -application/x-tex = emacsclient %f -application/xml = emacsclient %f -text/xml = emacsclient %f -text/x-tex = emacsclient %f -text/plain = emacsclient %f -text/x-awk = emacsclient %f -text/x-c = emacsclient %f -text/x-c+ = emacsclient %f -text/x-c++ = emacsclient %f -text/x-csv = libreoffice %f -text/x-html-sidux-man = konqueror %f -text/x-html-aptosid-man = iceweasel %f - -application/x-chm = kchmviewer %f -# Html pages inside a chm have a chm rclaptg set by the filter. Kchmviewer -# knows how to use the ipath (which is the internal chm path) to open the -# file at the right place -text/html|chm = kchmviewer --url %i %F - -text/x-ini = emacsclient %f -text/x-man = xterm -u8 -e "groff -T ascii -man %f | more" -text/x-python = idle %f -text/x-gaim-log = emacsclient %f -text/x-purple-html-log = emacsclient %f -text/x-purple-log = emacsclient %f - -# The video types will usually be handled by the desktop default, but they -# need entries here to get an "Open" link -video/3gpp = open %f -video/mp2p = open %f -video/mp2t = open %f -video/mp4 = open %f -video/mpeg = open %f -video/quicktime = open %f -video/x-matroska = open %f -video/x-ms-asf = open %f -video/x-msvideo = open %f From b51f9efcf74e683215c2433ad691dbbaad5179d9 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Sat, 15 Jan 2022 09:44:09 +0000 Subject: [PATCH 19/19] Windows qmake files: add deps on librecoll for the exes --- src/qtgui/recoll-win.pro | 41 +++++++++++++-------------- src/windows/qmkrecoll/recollindex.pro | 34 +++++++++++----------- src/windows/qmkrecoll/recollq.pro | 14 ++++----- 3 files changed, 42 insertions(+), 47 deletions(-) diff --git a/src/qtgui/recoll-win.pro b/src/qtgui/recoll-win.pro index 7a959d35..39eae455 100644 --- a/src/qtgui/recoll-win.pro +++ b/src/qtgui/recoll-win.pro @@ -117,40 +117,39 @@ INCLUDEPATH += ../common ../index ../internfile ../query ../unac \ ../utils ../aspell ../rcldb ../qtgui ../xaposix \ confgui widgets windows { - DEFINES += PSAPI_VERSION=1 - DEFINES += __WIN32__ - DEFINES += UNICODE - RC_FILE = recoll.rc + DEFINES += PSAPI_VERSION=1 + DEFINES += __WIN32__ + DEFINES += UNICODE + RC_FILE = recoll.rc - HEADERS += \ - winschedtool.h - SOURCES += \ + HEADERS += \ + winschedtool.h + SOURCES += \ winschedtool.cpp - FORMS += \ + FORMS += \ winschedtool.ui - contains(QMAKE_CC, gcc){ - # MingW - QMAKE_CXXFLAGS += -std=c++11 -Wno-unused-parameter - LIBS += C:/recoll/src/windows/build-librecoll-Desktop_Qt_5_8_0_MinGW_32bit-Release/release/librecoll.dll - } + contains(QMAKE_CC, gcc){ + # MingW + QMAKE_CXXFLAGS += -std=c++11 -Wno-unused-parameter + LIBS += \ + C:/recoll/src/windows/build-librecoll-Desktop_Qt_5_8_0_MinGW_32bit-Release/release/librecoll.dll + } contains(QMAKE_CC, cl){ # MSVC RECOLLDEPS = ../../../recolldeps/msvc DEFINES += USING_STATIC_LIBICONV + PRE_TARGETDEPS = \ + ../windows/build-librecoll-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release/librecoll.lib LIBS += \ - -L../windows/build-librecoll-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release \ - -llibrecoll \ + -L../windows/build-librecoll-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release -llibrecoll \ $$RECOLLDEPS/libxml2/libxml2-2.9.4+dfsg1/win32/bin.msvc/libxml2.lib \ $$RECOLLDEPS/libxslt/libxslt-1.1.29/win32/bin.msvc/libxslt.lib \ - -L../windows/build-libxapian-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release \ - -llibxapian \ - -L$$RECOLLDEPS/build-libiconv-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release \ - -llibiconv \ + -L../windows/build-libxapian-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release -llibxapian \ + -L$$RECOLLDEPS/build-libiconv-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release -llibiconv \ $$RECOLLDEPS/zlib-1.2.11/zdll.lib \ - -lrpcrt4 -lws2_32 -luser32 \ - -lshell32 -lshlwapi -lpsapi -lkernel32 + -lrpcrt4 -lws2_32 -luser32 -lshell32 -lshlwapi -lpsapi -lkernel32 } } diff --git a/src/windows/qmkrecoll/recollindex.pro b/src/windows/qmkrecoll/recollindex.pro index 9fa47f03..c0b18fdb 100644 --- a/src/windows/qmkrecoll/recollindex.pro +++ b/src/windows/qmkrecoll/recollindex.pro @@ -35,25 +35,23 @@ windows { LIBS += \ ../build-librecoll-Desktop_Qt_5_8_0_MinGW_32bit-Release/release/librecoll.dll \ -lshlwapi -lpsapi -lkernel32 - } + } - contains(QMAKE_CC, cl){ - # MSVC - RECOLLDEPS = ../../../../recolldeps/msvc - DEFINES += USING_STATIC_LIBICONV - LIBS += \ - -L../build-librecoll-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release \ - -llibrecoll \ - $$RECOLLDEPS/libxml2/libxml2-2.9.4+dfsg1/win32/bin.msvc/libxml2.lib \ - $$RECOLLDEPS/libxslt/libxslt-1.1.29/win32/bin.msvc/libxslt.lib \ - -L../build-libxapian-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release \ - -llibxapian \ - $$RECOLLDEPS/zlib-1.2.11/zdll.lib \ - -L$$RECOLLDEPS/build-libiconv-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release \ - -llibiconv -lShell32 \ - -lrpcrt4 -lws2_32 -luser32 \ - -lshlwapi -lpsapi -lkernel32 - } + contains(QMAKE_CC, cl){ + # MSVC + RECOLLDEPS = ../../../../recolldeps/msvc + DEFINES += USING_STATIC_LIBICONV + PRE_TARGETDEPS = \ + ../build-librecoll-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release/librecoll.lib + LIBS += \ + -L../build-librecoll-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release -llibrecoll \ + $$RECOLLDEPS/libxml2/libxml2-2.9.4+dfsg1/win32/bin.msvc/libxml2.lib \ + $$RECOLLDEPS/libxslt/libxslt-1.1.29/win32/bin.msvc/libxslt.lib \ + -L../build-libxapian-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release -llibxapian \ + $$RECOLLDEPS/zlib-1.2.11/zdll.lib \ + -L$$RECOLLDEPS/build-libiconv-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release -llibiconv \ + -lShell32 -lrpcrt4 -lws2_32 -luser32 -lshlwapi -lpsapi -lkernel32 + } INCLUDEPATH += ../../windows SOURCES += ../../windows/getopt.cc diff --git a/src/windows/qmkrecoll/recollq.pro b/src/windows/qmkrecoll/recollq.pro index cc91d078..527d5a1a 100644 --- a/src/windows/qmkrecoll/recollq.pro +++ b/src/windows/qmkrecoll/recollq.pro @@ -29,18 +29,16 @@ windows { contains(QMAKE_CC, cl){ # Visual Studio RECOLLDEPS = ../../../../recolldeps/msvc + PRE_TARGETDEPS = \ + ../build-librecoll-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release/librecoll.lib LIBS += \ - -L../build-librecoll-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release \ - -llibrecoll \ + -L../build-librecoll-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release -llibrecoll \ $$RECOLLDEPS/libxml2/libxml2-2.9.4+dfsg1/win32/bin.msvc/libxml2.lib \ $$RECOLLDEPS/libxslt/libxslt-1.1.29/win32/bin.msvc/libxslt.lib \ - -L../build-libxapian-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release \ - -llibxapian \ - -L$$RECOLLDEPS/build-libiconv-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release/ \ - -llibiconv \ + -L../build-libxapian-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release -llibxapian \ + -L$$RECOLLDEPS/build-libiconv-Desktop_Qt_5_14_2_MSVC2017_32bit-Release/release/ -llibiconv \ $$RECOLLDEPS/zlib-1.2.11/zdll.lib \ - -lrpcrt4 -lws2_32 -luser32 -lshell32 \ - -lshlwapi -lpsapi -lkernel32 + -lrpcrt4 -lws2_32 -luser32 -lshell32 -lshlwapi -lpsapi -lkernel32 } INCLUDEPATH += ../../windows