diff --git a/.hgtags b/.hgtags index 9060910b..c3ab73e8 100644 --- a/.hgtags +++ b/.hgtags @@ -162,3 +162,4 @@ e546afbecc6bcd2d3af8c083d4fbc2cb345f4108 RECOLL_1_20_0p1 cd9d0513b797337e51f6e41d6919d530829aa4ef RECOLL_1_20_0p2 264635f65a94989088c09aa3becc3bf1bdae99a5 RECOLL_1_20_1 371d5921692ebfd5f64459953f953cdf0024b1fa RECOLL_1_21_0 +aa49378569744bc59162ca28cdce4b54a4ab6fea RECOLL_1_21_1 diff --git a/src/INSTALL b/src/INSTALL index 1b416248..e8ae6a49 100644 --- a/src/INSTALL +++ b/src/INSTALL @@ -1018,6 +1018,14 @@ Chapter 5. Installation and configuration Maximum handler execution time, after which it is aborted. Some postscript programs just loop... + filtermaxmbytes + + Recoll 1.20.7 and later. Maximum handler memory utilisation. This + uses setrlimit(RLIMIT_AS) on most systems (total virtual memory + space size limit). Some programs may start with 500 MBytes of + mapped shared libraries, so take this into account when choosing a + value. The default is a liberal 2000MB. + filtersdir A directory to search for the external input handler scripts used diff --git a/src/README b/src/README index ba729d19..b99332a5 100644 --- a/src/README +++ b/src/README @@ -1858,25 +1858,23 @@ Chapter 3. Searching third option has been available in recent releases and is probably now the best one: use PRE tags with line wrapping. - o Use desktop preferences to choose document editor: if this is checked, - the xdg-open utility will be used to open files when you click the - Open link in the result list, instead of the application defined in - mimeview. xdg-open will in term use your desktop preferences to choose - an appropriate application. + o Choose editor applicationsr: this opens a dialog which allows you to + select the application to be used to open each MIME type. The default + is nornally to use the xdg-open utility, but you can override it. - o Exceptions: when using the desktop preferences for opening documents, - these are MIME types that will still be opened according to Recoll - preferences. This is useful for passing parameters like page numbers - or search strings to applications that support them (e.g. evince). - This cannot be done with xdg-open which only supports passing one - parameter. + o Exceptions: even wen xdg-open is used by default for opening + documents, you can set exceptions for MIME types that will still be + opened according to Recoll preferences. This is useful for passing + parameters like page numbers or search strings to applications that + support them (e.g. evince). This cannot be done with xdg-open which + only supports passing one parameter. - o Choose editor applications this will let you choose the command - started by the Open links inside the result list, for specific - document types. + o Document filter choice style: this will let you choose if the document + categories are displayed as a list or a set of buttons, or a menu. - o Display category filter as toolbar... this will let you choose if the - document categories are displayed as a list or a set of buttons. + o Start with simple search mode: this lets you choose the value of the + simple search type on program startup. Either a fixed value (e.g. + Query Language, or the value in use when the program last exited. o Auto-start simple search on white space entry: if this is checked, a search will be executed each time you enter a space in the simple @@ -2159,7 +2157,10 @@ Chapter 3. Searching recollq is not built by default. You can use the Makefile in the query directory to build it. This is a very simple program, and if you can program a little c++, you may find it useful to taylor its output format - to your needs. + to your needs. Not that recollq is only really useful on systems where the + Qt libraries (or even the X11 ones) are not available. Otherwise, just use + recoll -t, which takes the exact same parameters and options which are + described for recollq recollq has a man page (not installed by default, look in the doc/man directory). The Usage string is as follows: @@ -4286,6 +4287,14 @@ Chapter 5. Installation and configuration Maximum handler execution time, after which it is aborted. Some postscript programs just loop... + filtermaxmbytes + + Recoll 1.20.7 and later. Maximum handler memory utilisation. This + uses setrlimit(RLIMIT_AS) on most systems (total virtual memory + space size limit). Some programs may start with 500 MBytes of + mapped shared libraries, so take this into account when choosing a + value. The default is a liberal 2000MB. + filtersdir A directory to search for the external input handler scripts used diff --git a/src/common/textsplit.cpp b/src/common/textsplit.cpp index 8a677a64..c41c7ce6 100644 --- a/src/common/textsplit.cpp +++ b/src/common/textsplit.cpp @@ -709,6 +709,12 @@ bool TextSplit::text_to_words(const string &in) // confusing. // ie "MySQL manual" is matched by "MySQL manual" and // "my sql manual" but not "mysql manual" + + // A possibility would be to emit both my and sql at the + // same position. All non-phrase searches would work, and + // both "MySQL manual" and "mysql manual" phrases would + // match too. "my sql manual" would not match, but this is + // not an issue. case A_ULETTER: if (m_span.length() && charclasses[(unsigned char)m_span[m_span.length() - 1]] == diff --git a/src/doc/user/usermanual.xml b/src/doc/user/usermanual.xml index 3b8c1227..cbe8c549 100644 --- a/src/doc/user/usermanual.xml +++ b/src/doc/user/usermanual.xml @@ -2917,7 +2917,11 @@ MimeType=*/* use the Makefile in the query directory to build it. This is a very simple program, and if you can program a little c++, you may find it - useful to taylor its output format to your needs. + useful to taylor its output format to your needs. Not that recollq is + only really useful on systems where the Qt libraries (or even the X11 + ones) are not available. Otherwise, just use recoll + -t, which takes the exact same parameters and options which + are described for recollq recollq has a man page (not installed by default, look in the doc/man directory). The diff --git a/src/filters/ppt-dump.py b/src/filters/ppt-dump.py index bb6b7e3c..f05a5789 100755 --- a/src/filters/ppt-dump.py +++ b/src/filters/ppt-dump.py @@ -114,19 +114,26 @@ def main (args): except getopt.GetoptError: error("error parsing input options\n") usage(exname) - return + return false + status = True try: dumper = PPTDumper(args[0], globals.params) if not dumper.dump(): error("ppt-dump: dump error " + args[0] + "\n") + status = False except: error("ppt-dump: FAILURE (bad format?) " + args[0] + "\n") + status = False if globals.params.dumpText: print(globals.textdump.replace("\r", "\n")) - + return(status) + if __name__ == '__main__': - main(sys.argv) + if main(sys.argv): + sys.exit(0) + else: + sys.exit(1) # vim:set filetype=python shiftwidth=4 softtabstop=4 expandtab: diff --git a/src/internfile/internfile.cpp b/src/internfile/internfile.cpp index e34168cb..6a652181 100644 --- a/src/internfile/internfile.cpp +++ b/src/internfile/internfile.cpp @@ -28,9 +28,8 @@ #include #include #include -#ifndef NO_NAMESPACES + using namespace std; -#endif /* NO_NAMESPACES */ #include "cstr.h" #include "internfile.h" @@ -550,6 +549,10 @@ bool FileInterner::dijontorcl(Rcl::Doc& doc) // doc with an ipath, not the last one which is usually text/plain We // also set the author and modification time from the last doc which // has them. +// +// The stack can contain objects with an ipath element (corresponding +// to actual embedded documents), and, at the top, elements without an +// ipath element, corresponding to format translations of the last doc. // // The docsize is fetched from the first element without an ipath // (first non container). If the last element directly returns @@ -579,7 +582,8 @@ void FileInterner::collectIpathAndMT(Rcl::Doc& doc) const const map& docdata = (*hit)->get_meta_data(); if (getKeyValue(docdata, cstr_dj_keyipath, ipathel)) { if (!ipathel.empty()) { - // We have a non-empty ipath + // Non-empty ipath. This stack element is for an + // actual embedded document, not a format translation. hasipath = true; getKeyValue(docdata, cstr_dj_keymt, doc.mimetype); getKeyValue(docdata, cstr_dj_keyfn, doc.meta[Rcl::Doc::keyfn]); @@ -593,8 +597,18 @@ void FileInterner::collectIpathAndMT(Rcl::Doc& doc) const getKeyValue(docdata, cstr_dj_keydocsize, doc.fbytes); doc.ipath += cstr_isep; } - getKeyValue(docdata, cstr_dj_keyauthor, doc.meta[Rcl::Doc::keyau]); - getKeyValue(docdata, cstr_dj_keymd, doc.dmtime); + // We set the author field from the innermost doc which has + // one: allows finding, e.g. an image attachment having no + // metadata by a search on the sender name. Only do this for + // actually embedded documents (avoid replacing values from + // metacmds for the topmost one). For a topmost doc, author + // will be merged by dijontorcl() later on. About same for + // dmtime, but an external value will be replaced, not + // augmented if dijontorcl() finds an internal value. + if (hasipath) { + getKeyValue(docdata, cstr_dj_keyauthor, doc.meta[Rcl::Doc::keyau]); + getKeyValue(docdata, cstr_dj_keymd, doc.dmtime); + } } // Trim empty tail elements in ipath. @@ -878,12 +892,6 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, const string& ipath return FIAgain; } -// Temporary while we fix backend things -static string urltolocalpath(string url) -{ - return url.substr(7, string::npos); -} - bool FileInterner::tempFileForMT(TempFile& otemp, RclConfig* cnf, const string& mimetype) { diff --git a/src/qtgui/main.cpp b/src/qtgui/main.cpp index 32e1d5c8..93986ffe 100644 --- a/src/qtgui/main.cpp +++ b/src/qtgui/main.cpp @@ -235,8 +235,11 @@ Usage(void) int main(int argc, char **argv) { - // If "-t" is present at all, we don't do the GUI thing and pass the - // whole to recollq for command line / pipe usage. + // if we are named recollq or option "-t" is present at all, we + // don't do the GUI thing and pass the whole to recollq for + // command line / pipe usage. + if (!strcmp(argv[0], "recollq")) + exit(recollq(&theconfig, argc, argv)); for (int i = 0; i < argc; i++) { if (!strcmp(argv[i], "-t")) { exit(recollq(&theconfig, argc, argv)); diff --git a/src/qtgui/rclm_wins.cpp b/src/qtgui/rclm_wins.cpp index d9cb87d1..7b3250b0 100644 --- a/src/qtgui/rclm_wins.cpp +++ b/src/qtgui/rclm_wins.cpp @@ -85,8 +85,7 @@ void RclMain::showFragButs() connect(fragbuts, SIGNAL(fragmentsChanged()), this, SLOT(onFragmentsChanged())); } else { - delete fragbuts; - fragbuts = 0; + deleteZ(fragbuts); } } else { // Close and reopen, in hope that makes us visible... diff --git a/src/qtgui/rclmain_w.cpp b/src/qtgui/rclmain_w.cpp index e9436997..efcc0663 100644 --- a/src/qtgui/rclmain_w.cpp +++ b/src/qtgui/rclmain_w.cpp @@ -279,6 +279,9 @@ void RclMain::init() QKeySequence seq("Ctrl+Shift+s"); QShortcut *sc = new QShortcut(seq, this); connect(sc, SIGNAL (activated()), sSearch, SLOT (takeFocus())); + QKeySequence seql("Ctrl+l"); + sc = new QShortcut(seql, this); + connect(sc, SIGNAL (activated()), sSearch, SLOT (takeFocus())); connect(&m_watcher, SIGNAL(fileChanged(QString)), this, SLOT(idxStatus())); diff --git a/src/query/wasaparse.ypp b/src/query/wasaparse.ypp index 09a583c3..9d88d9c1 100644 --- a/src/query/wasaparse.ypp +++ b/src/query/wasaparse.ypp @@ -12,8 +12,15 @@ using namespace std; +// #define LOG_PARSER +#ifdef LOG_PARSER +#define LOGP(X) {cerr << X;} +#else +#define LOGP(X) +#endif + int yylex(yy::parser::semantic_type *, yy::parser::location_type *, - WasaParserDriver *); + WasaParserDriver *); void yyerror(char const *); static void qualify(Rcl::SearchDataClauseDist *, const string &); @@ -46,8 +53,8 @@ static void addSubQuery(WasaParserDriver *d, %type query %type complexfieldname - /* Non operator tokens need precedence because of the possibility of - concatenation which needs to have lower prec than OR */ + /* Non operator tokens need precedence because of the possibility of + concatenation which needs to have lower prec than OR */ %left WORD %left QUOTED %left QUALIFIERS @@ -60,13 +67,14 @@ static void addSubQuery(WasaParserDriver *d, topquery: query { + LOGP("END PARSING\n"); d->m_result = $1; } query: query query %prec UCONCAT { - //cerr << "q: query query" << endl; + LOGP("q: query query\n"); Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_AND, d->m_stemlang); addSubQuery(d, sd, $1); addSubQuery(d, sd, $2); @@ -74,7 +82,7 @@ query query %prec UCONCAT } | query AND query { - //cerr << "q: query AND query" << endl; + LOGP("q: query AND query\n"); Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_AND, d->m_stemlang); addSubQuery(d, sd, $1); addSubQuery(d, sd, $3); @@ -82,7 +90,7 @@ query query %prec UCONCAT } | query OR query { - //cerr << "q: query OR query" << endl; + LOGP("q: query OR query\n"); Rcl::SearchData *top = new Rcl::SearchData(Rcl::SCLT_AND, d->m_stemlang); Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_OR, d->m_stemlang); addSubQuery(d, sd, $1); @@ -92,13 +100,13 @@ query query %prec UCONCAT } | '(' query ')' { - //cerr << "q: ( query )" << endl; + LOGP("q: ( query )\n"); $$ = $2; } | fieldexpr %prec UCONCAT { - //cerr << "q: fieldexpr" << endl; + LOGP("q: fieldexpr\n"); Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_AND, d->m_stemlang); d->addClause(sd, $1); $$ = sd; @@ -107,12 +115,12 @@ fieldexpr %prec UCONCAT fieldexpr: term { - // cerr << "fe: simple fieldexpr: " << $1->gettext() << endl; + LOGP("fe: simple fieldexpr: " << $1->gettext() << endl); $$ = $1; } | complexfieldname EQUALS term { - // cerr << "fe: " << *$1 << " = " << $3->gettext() << endl; + LOGP("fe: " << *$1 << " = " << $3->gettext() << endl); $3->setfield(*$1); $3->setrel(Rcl::SearchDataClause::REL_EQUALS); $$ = $3; @@ -120,7 +128,7 @@ fieldexpr: term } | complexfieldname CONTAINS term { - // cerr << "fe: " << *$1 << " : " << $3->gettext() << endl; + LOGP("fe: " << *$1 << " : " << $3->gettext() << endl); $3->setfield(*$1); $3->setrel(Rcl::SearchDataClause::REL_CONTAINS); $$ = $3; @@ -128,7 +136,7 @@ fieldexpr: term } | complexfieldname SMALLER term { - // cerr << "fe: " << *$1 << " < " << $3->gettext() << endl; + LOGP(cerr << "fe: " << *$1 << " < " << $3->gettext() << endl); $3->setfield(*$1); $3->setrel(Rcl::SearchDataClause::REL_LT); $$ = $3; @@ -136,7 +144,7 @@ fieldexpr: term } | complexfieldname SMALLEREQ term { - // cerr << "fe: " << *$1 << " <= " << $3->gettext() << endl; + LOGP("fe: " << *$1 << " <= " << $3->gettext() << endl); $3->setfield(*$1); $3->setrel(Rcl::SearchDataClause::REL_LTE); $$ = $3; @@ -144,7 +152,7 @@ fieldexpr: term } | complexfieldname GREATER term { - // cerr << "fe: " << *$1 << " > " << $3->gettext() << endl; + LOGP("fe: " << *$1 << " > " << $3->gettext() << endl); $3->setfield(*$1); $3->setrel(Rcl::SearchDataClause::REL_GT); $$ = $3; @@ -152,7 +160,7 @@ fieldexpr: term } | complexfieldname GREATEREQ term { - // cerr << "fe: " << *$1 << " >= " << $3->gettext() << endl; + LOGP("fe: " << *$1 << " >= " << $3->gettext() << endl); $3->setfield(*$1); $3->setrel(Rcl::SearchDataClause::REL_GTE); $$ = $3; @@ -160,7 +168,7 @@ fieldexpr: term } | '-' fieldexpr { - // cerr << "fe: - fieldexpr[" << $2->gettext() << "]" << endl; + LOGP("fe: - fieldexpr[" << $2->gettext() << "]" << endl); $2->setexclude(true); $$ = $2; } @@ -170,13 +178,13 @@ fieldexpr: term complexfieldname: WORD { - // cerr << "cfn: WORD" << endl; + LOGP("cfn: WORD" << endl); $$ = $1; } | complexfieldname CONTAINS WORD { - // cerr << "cfn: complexfieldname ':' WORD" << endl; + LOGP("cfn: complexfieldname ':' WORD" << endl); $$ = new string(*$1 + string(":") + *$3); delete $1; delete $3; @@ -185,7 +193,7 @@ complexfieldname CONTAINS WORD term: WORD { - //cerr << "term[" << *$1 << "]" << endl; + LOGP("term[" << *$1 << "]" << endl); $$ = new Rcl::SearchDataClauseSimple(Rcl::SCLT_AND, *$1); delete $1; } @@ -197,13 +205,13 @@ WORD qualquote: QUOTED { - // cerr << "QUOTED[" << *$1 << "]" << endl; + LOGP("QUOTED[" << *$1 << "]" << endl); $$ = new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE, *$1, 0); delete $1; } | QUOTED QUALIFIERS { - // cerr << "QUOTED[" << *$1 << "] QUALIFIERS[" << *$2 << "]" << endl; + LOGP("QUOTED[" << *$1 << "] QUALIFIERS[" << *$2 << "]" << endl); Rcl::SearchDataClauseDist *cl = new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE, *$1, 0); qualify(cl, *$2); @@ -318,8 +326,9 @@ static int parseString(WasaParserDriver *d, yy::parser::semantic_type *yylval) break; case '"': /* End of string. Look for qualifiers */ - while ((c = d->GETCHAR()) && !isspace(c)) + while ((c = d->GETCHAR()) && (isalnum(c) || c == '.')) d->qualifiers().push_back(c); + d->UNGETCHAR(c); goto out; default: value->push_back(c); diff --git a/src/rcldb/searchdata.cpp b/src/rcldb/searchdata.cpp index 056d200f..3fda7988 100644 --- a/src/rcldb/searchdata.cpp +++ b/src/rcldb/searchdata.cpp @@ -91,11 +91,11 @@ bool SearchData::maybeAddAutoPhrase(Rcl::Db& db, double freqThreshold) string field; vector words; - // Walk the clause list. If we find any non simple clause or different - // field names, bail out. + // Walk the clause list. If this is not an AND list, we find any + // non simple clause or different field names, bail out. for (qlist_it_t it = m_query.begin(); it != m_query.end(); it++) { SClType tp = (*it)->m_tp; - if (tp != SCLT_AND && tp != SCLT_OR) { + if (tp != SCLT_AND) { LOGDEB2(("SearchData::maybeAddAutoPhrase: wrong tp %d\n", tp)); return false; } diff --git a/website/download.html b/website/download.html index 027ddc75..b1b5b090 100644 --- a/website/download.html +++ b/website/download.html @@ -121,10 +121,10 @@ subdirectory, because of all the places they're referred from

recoll-1.20.6.tar.gz.

-

Release 1.21.0

+

Release 1.21.1

Not the right choice if you are after complete stability: -recoll-1.21.0.tar.gz. See what's +recoll-1.21.1.tar.gz. See what's new in the release notes.