This commit is contained in:
Jean-Francois Dockes 2022-01-13 10:18:22 +00:00
commit d2d2cbff14
33 changed files with 314 additions and 191 deletions

View File

@ -13,7 +13,7 @@
PPA_KEYID=7808CE96D38B9201 PPA_KEYID=7808CE96D38B9201
RCLVERS=1.31.5 RCLVERS=1.31.6
SCOPEVERS=1.20.2.4 SCOPEVERS=1.20.2.4
GSSPVERS=1.1.1 GSSPVERS=1.1.1
PPAVERS=1 PPAVERS=1
@ -85,7 +85,7 @@ done
### KIO. ### KIO.
series="bionic focal groovy hirsute impish" #series="bionic focal hirsute impish"
series= series=
debdir=debiankio debdir=debiankio

View File

@ -1,3 +1,9 @@
recoll (1.31.6-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
* Almost no change: translation files update.
-- Jean-Francois Dockes <jf@dockes.org> Sat, 20 Dec 2021 09:25:00 +0100
recoll (1.31.5-1~ppaPPAVERS~SERIES1) SERIES; urgency=low recoll (1.31.5-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
* Fix ennoying bug in tesseract OCR temporary files cleanup. * Fix ennoying bug in tesseract OCR temporary files cleanup.

View File

@ -1,3 +1,9 @@
kio-recoll (1.31.6-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
* Follow
-- Jean-Francois Dockes <jf@dockes.org> Sat, 20 Dec 2021 09:25:00 +0100
kio-recoll (1.31.5-1~ppaPPAVERS~SERIES1) SERIES; urgency=low kio-recoll (1.31.5-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
* Follow recoll version * Follow recoll version

View File

@ -1 +1 @@
1.31.5 1.31.6

View File

@ -125,7 +125,7 @@
#define PACKAGE_NAME "Recoll" #define PACKAGE_NAME "Recoll"
/* Define to the full name and version of this package. */ /* Define to the full name and version of this package. */
#define PACKAGE_STRING "Recoll 1.31.5" #define PACKAGE_STRING "Recoll 1.31.6"
/* Define to the one symbol short name of this package. */ /* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "recoll" #define PACKAGE_TARNAME "recoll"
@ -134,7 +134,7 @@
#define PACKAGE_URL "" #define PACKAGE_URL ""
/* Define to the version of this package. */ /* Define to the version of this package. */
#define PACKAGE_VERSION "1.31.5" #define PACKAGE_VERSION "1.31.6"
/* putenv parameter is const */ /* putenv parameter is const */
/* #undef PUTENV_ARG_CONST */ /* #undef PUTENV_ARG_CONST */

View File

@ -118,7 +118,7 @@
#define PACKAGE_NAME "Recoll" #define PACKAGE_NAME "Recoll"
/* Define to the full name and version of this package. */ /* Define to the full name and version of this package. */
#define PACKAGE_STRING "Recoll 1.31.5" #define PACKAGE_STRING "Recoll 1.31.6"
/* Define to the one symbol short name of this package. */ /* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "recoll" #define PACKAGE_TARNAME "recoll"
@ -127,7 +127,7 @@
#define PACKAGE_URL "" #define PACKAGE_URL ""
/* Define to the version of this package. */ /* Define to the version of this package. */
#define PACKAGE_VERSION "1.31.5" #define PACKAGE_VERSION "1.31.6"
/* putenv parameter is const */ /* putenv parameter is const */
/* #undef PUTENV_ARG_CONST */ /* #undef PUTENV_ARG_CONST */

View File

@ -596,6 +596,7 @@ bool TextSplit::text_to_words(const string &in)
clearsplitstate(); clearsplitstate();
bool pagepending = false; bool pagepending = false;
bool nlpending = false;
bool softhyphenpending = false; bool softhyphenpending = false;
// Running count of non-alphanum chars. Reset when we see one; // Running count of non-alphanum chars. Reset when we see one;
@ -705,6 +706,10 @@ bool TextSplit::text_to_words(const string &in)
pagepending = false; pagepending = false;
newpage(m_wordpos); newpage(m_wordpos);
} }
if (nlpending) {
nlpending = false;
newline(m_wordpos);
}
break; break;
case WILD: case WILD:
@ -745,6 +750,12 @@ bool TextSplit::text_to_words(const string &in)
break; break;
} }
} else { } else {
// Note about dangling hyphens: we always strip '-' found before whitespace,
// even before a newline, then generate two terms, before and after the line
// break. We have no way to know if '-' is there because a word was broken by
// justification or if it was part of an actual compound word (would need a
// dictionary to check). As soft-hyphen *should* be used if the '-' is not part
// of the text.
if (nextc == -1 || isvisiblewhite(nextc)) { if (nextc == -1 || isvisiblewhite(nextc)) {
goto SPACE; goto SPACE;
} }
@ -844,19 +855,10 @@ bool TextSplit::text_to_words(const string &in)
break; break;
case '\n': case '\n':
nlpending = true;
/* FALLTHROUGH */
case '\r': case '\r':
if (m_span.length() && *m_span.rbegin() == '-') { if (softhyphenpending) {
// if '-' is the last char before end of line, we
// strip it. We have no way to know if this is added
// because of the line split or if it was part of an
// actual compound word (would need a dictionary to
// check). As soft-hyphen *should* be used if the '-'
// is not part of the text, it is better to properly
// process a real compound word, and produce wrong
// output from wrong text. The word-emitting routine
// will strip the trailing '-'.
goto SPACE;
} else if (softhyphenpending) {
// Don't reset soft-hyphen // Don't reset soft-hyphen
continue; continue;
} else { } else {

View File

@ -73,6 +73,9 @@ public:
* just don't know about pages. */ * just don't know about pages. */
virtual void newpage(int /*pos*/) {} virtual void newpage(int /*pos*/) {}
/** Called when we encounter newline \n 0x0a. Override to use the event. */
virtual void newline(int /*pos*/) {}
// Static utility functions: // Static utility functions:
/** Count words in string, as the splitter would generate them */ /** Count words in string, as the splitter would generate them */

View File

@ -613,8 +613,7 @@ location before copy, to allow path translation computations. For
example if a dataset originally indexed as '/home/me/mydata/config' has example if a dataset originally indexed as '/home/me/mydata/config' has
been mounted to '/media/me/mydata', and the GUI is running from a copied been mounted to '/media/me/mydata', and the GUI is running from a copied
configuration, orgidxconfdir would be '/home/me/mydata/config', and configuration, orgidxconfdir would be '/home/me/mydata/config', and
curidxconfdir (as set in the copied configuration) would be curidxconfdir (as set in the copied configuration) would be '/media/me/mydata/config'.
'/media/me/mydata/config'.
.TP .TP
.BI "idxrundir = "dfn .BI "idxrundir = "dfn
Indexing process current directory. The input Indexing process current directory. The input

View File

@ -170,13 +170,12 @@ listing either MIME types (e.g. audio/mpeg) or handler names
files. We need to decompress these in a files. We need to decompress these in a
temporary directory for identification, which can be wasteful in some temporary directory for identification, which can be wasteful in some
cases. Limit the waste. Negative means no limit. 0 results in no cases. Limit the waste. Negative means no limit. 0 results in no
processing of any compressed file. Default 50 MB. processing of any compressed file. Default 100 MB.
</para></listitem></varlistentry> </para></listitem></varlistentry>
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.TEXTFILEMAXMBS"> <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.TEXTFILEMAXMBS">
<term><varname>textfilemaxmbs</varname></term> <term><varname>textfilemaxmbs</varname></term>
<listitem><para>Size limit for text <listitem><para>Size limit for text files. Mostly for skipping monster logs. Default 20 MB. Use a value of -1 to
files. Mostly for skipping monster disable.
logs. Default 20 MB.
</para></listitem></varlistentry> </para></listitem></varlistentry>
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.INDEXALLFILENAMES"> <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.INDEXALLFILENAMES">
<term><varname>indexallfilenames</varname></term> <term><varname>indexallfilenames</varname></term>

View File

@ -10,7 +10,7 @@
<link rel="stylesheet" type="text/css" href="docbook-xsl.css"> <link rel="stylesheet" type="text/css" href="docbook-xsl.css">
<meta name="generator" content="DocBook XSL Stylesheets V1.79.1"> <meta name="generator" content="DocBook XSL Stylesheets V1.79.1">
<meta name="description" content= <meta name="description" content=
"Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.3 or any later version published by the Free Software Foundation; with no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license can be found at the following location: GNU web site. This document introduces full text search notions and describes the installation and use of the Recoll application. This version describes Recoll 1.29."> "Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.3 or any later version published by the Free Software Foundation; with no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license can be found at the following location: GNU web site. This document introduces full text search notions and describes the installation and use of the Recoll application. This version describes Recoll 1.31.">
</head> </head>
<body bgcolor="white" text="black" link="#0000FF" vlink="#840084" <body bgcolor="white" text="black" link="#0000FF" vlink="#840084"
alink="#0000FF"> alink="#0000FF">
@ -53,7 +53,7 @@ alink="#0000FF">
and describes the installation and use of the and describes the installation and use of the
<span class="application">Recoll</span> application. <span class="application">Recoll</span> application.
This version describes <span class= This version describes <span class=
"application">Recoll</span> 1.29.</p> "application">Recoll</span> 1.31.</p>
</div> </div>
</div> </div>
</div> </div>
@ -443,7 +443,7 @@ alink="#0000FF">
<p>This document introduces full text search notions and <p>This document introduces full text search notions and
describes the installation and use of the <span class= describes the installation and use of the <span class=
"application">Recoll</span> application. It is updated for "application">Recoll</span> application. It is updated for
<span class="application">Recoll</span> 1.29.</p> <span class="application">Recoll</span> 1.31.</p>
<p><span class="application">Recoll</span> was for a long <p><span class="application">Recoll</span> was for a long
time dedicated to Unix-like systems. It was only lately time dedicated to Unix-like systems. It was only lately
(2015) ported to <span class="application">MS-Windows</span>. (2015) ported to <span class="application">MS-Windows</span>.
@ -9169,14 +9169,15 @@ hasextract = False
identification, which can be wasteful in some identification, which can be wasteful in some
cases. Limit the waste. Negative means no limit. cases. Limit the waste. Negative means no limit.
0 results in no processing of any compressed 0 results in no processing of any compressed
file. Default 50 MB.</p> file. Default 100 MB.</p>
</dd> </dd>
<dt><a name= <dt><a name=
"RCL.INSTALL.CONFIG.RECOLLCONF.TEXTFILEMAXMBS" id= "RCL.INSTALL.CONFIG.RECOLLCONF.TEXTFILEMAXMBS" id=
"RCL.INSTALL.CONFIG.RECOLLCONF.TEXTFILEMAXMBS"></a><span class="term"><code class="varname">textfilemaxmbs</code></span></dt> "RCL.INSTALL.CONFIG.RECOLLCONF.TEXTFILEMAXMBS"></a><span class="term"><code class="varname">textfilemaxmbs</code></span></dt>
<dd> <dd>
<p>Size limit for text files. Mostly for skipping <p>Size limit for text files. Mostly for skipping
monster logs. Default 20 MB.</p> monster logs. Default 20 MB. Use a value of -1 to
disable.</p>
</dd> </dd>
<dt><a name= <dt><a name=
"RCL.INSTALL.CONFIG.RECOLLCONF.INDEXALLFILENAMES" "RCL.INSTALL.CONFIG.RECOLLCONF.INDEXALLFILENAMES"
@ -10757,17 +10758,28 @@ other = rclcat:other
<li class="listitem"> <li class="listitem">
<p><b>%p.&nbsp;</b>Page index. Only significant for <p><b>%p.&nbsp;</b>Page index. Only significant for
a subset of document types, currently only PDF, a subset of document types, currently only PDF,
Postscript and DVI files. Can be used to start the Postscript and DVI files. If it is set, a
editor at the right page for a match or significant term will be chosen in the query, and
snippet.</p> %p will be substituted with the first page where
the term appears. Can be used to start the editor
at the right page for a match or snippet.</p>
</li>
<li class="listitem">
<p><b>%l.&nbsp;</b>Line number. Only significant
for document types with relevant line breaks,
mostly text/plain and analogs. If it is set, a
significant term will be chosen in the query, and
%p will be substituted with the first line where
the term appears.</p>
</li> </li>
<li class="listitem"> <li class="listitem">
<p><b>%s.&nbsp;</b>Search term. The value will only <p><b>%s.&nbsp;</b>Search term. The value will only
be set for documents with indexed page numbers (ie: be set for documents with indexed page or line
PDF). The value will be one of the matched search numbers and if %p or %l is also used. The value
terms. It would allow pre-setting the value in the will be one of the matched search terms. It would
"Find" entry inside Evince for example, for easy allow pre-setting the value in the "Find" entry
highlighting of the term.</p> inside Evince for example, for easy highlighting of
the term.</p>
</li> </li>
<li class="listitem"> <li class="listitem">
<p><b>%u.&nbsp;</b>Url.</p> <p><b>%u.&nbsp;</b>Url.</p>

View File

@ -5,7 +5,7 @@
<!ENTITY RCL "<application>Recoll</application>"> <!ENTITY RCL "<application>Recoll</application>">
<!ENTITY RCLAPPS "<ulink url='http://www.recoll.org/pages/features.html#doctypes'>http://www.recoll.org/pages/features.html</ulink>"> <!ENTITY RCLAPPS "<ulink url='http://www.recoll.org/pages/features.html#doctypes'>http://www.recoll.org/pages/features.html</ulink>">
<!ENTITY RCLVERSION "1.29"> <!ENTITY RCLVERSION "1.32">
<!ENTITY XAP "<application>Xapian</application>"> <!ENTITY XAP "<application>Xapian</application>">
<!ENTITY WIN "<application>Windows</application>"> <!ENTITY WIN "<application>Windows</application>">
<!ENTITY LIN "<application>Unix</application>-like systems"> <!ENTITY LIN "<application>Unix</application>-like systems">
@ -7114,28 +7114,37 @@ other = rclcat:other
(possibly a script) to be able to handle it.</para></formalpara> (possibly a script) to be able to handle it.</para></formalpara>
</listitem> </listitem>
<listitem><formalpara><title>%M</title> <listitem>
<para>MIME type</para></formalpara> <formalpara><title>%M</title><para>MIME type</para></formalpara>
</listitem> </listitem>
<listitem><formalpara><title>%p</title> <listitem>
<para>Page index. Only significant for a subset of document <formalpara><title>%p</title><para>Page index. Only significant for a subset of
types, currently only PDF, Postscript and DVI files. Can be document types, currently only PDF, Postscript and DVI files. If it is set, a
used to start the editor at the right page for a match or significant term will be chosen in the query, and %p will be substituted with the
snippet.</para></formalpara> first page where the term appears. Can be used to start the editor at the right page
for a match or snippet.</para></formalpara>
</listitem> </listitem>
<listitem><formalpara><title>%s</title> <listitem>
<para>Search term. The value will only be set for documents <formalpara><title>%l</title><para>Line number. Only significant for document
with indexed page numbers (ie: PDF). The value will be one of types with relevant line breaks, mostly text/plain and analogs. If it is set, a
the matched search terms. It would allow pre-setting the significant term will be chosen in the query, and %p will be substituted with the
value in the "Find" entry inside Evince for example, for easy first line where the term appears.</para></formalpara>
highlighting of the term.</para></formalpara>
</listitem> </listitem>
<listitem><formalpara><title>%u</title> <listitem>
<para>Url.</para></formalpara> <formalpara><title>%s</title><para>Search term. The value will only be set for
documents with indexed page or line numbers and if %p or %l is also used. The value
will be one of the matched search terms. It would allow pre-setting the value in the
"Find" entry inside Evince for example, for easy highlighting of the
term.</para></formalpara>
</listitem> </listitem>
<listitem>
<formalpara><title>%u</title><para>Url.</para></formalpara>
</listitem>
</itemizedlist> </itemizedlist>
<para>In addition to the predefined values above, all strings like <para>In addition to the predefined values above, all strings like

View File

@ -23,7 +23,7 @@ class OrgModeExtractor:
iseof = rclexecm.RclExecM.noteof iseof = rclexecm.RclExecM.noteof
if self.currentindex >= len(self.docs) -1: if self.currentindex >= len(self.docs) -1:
iseof = rclexecm.RclExecM.eofnext iseof = rclexecm.RclExecM.eofnext
self.em.setmimetype("text/plain") self.em.setmimetype("text/x-orgmode-sub")
try: try:
self.em.setfield("title", docdata.splitlines()[0]) self.em.setfield("title", docdata.splitlines()[0])
except: except:

View File

@ -59,8 +59,9 @@ bool runWebFilesMoverScript(RclConfig *config)
static string downloadsdir; static string downloadsdir;
if (downloadsdir.empty()) { if (downloadsdir.empty()) {
if (!config->getConfParam("webdownloadsdir", downloadsdir)) { if (!config->getConfParam("webdownloadsdir", downloadsdir)) {
downloadsdir = path_tildexpand("~/Downloads"); downloadsdir = "~/Downloads";
} }
downloadsdir = path_tildexpand(downloadsdir);
} }
vector<string> cmdvec; vector<string> cmdvec;
config->pythonCmd("recoll-we-move-files.py", cmdvec); config->pythonCmd("recoll-we-move-files.py", cmdvec);

View File

@ -304,3 +304,14 @@ def stringsToString(vs):
out.append(s) out.append(s)
return " ".join(out) return " ".join(out)
def valToBool(s):
if not s:
return False
try:
val = int(s)
return val != 0
except:
pass
if type(s) == type(b''):
s = s.decode("UTF-8")
return s[0] in "tTyY"

View File

@ -50,7 +50,7 @@ public:
void startElement(const std::string &nm, void startElement(const std::string &nm,
const std::map<std::string, std::string>&) override { const std::map<std::string, std::string>&) override {
std::cerr << "startElement [" << nm << "]\n"; //std::cerr << "startElement [" << nm << "]\n";
currentText.clear(); currentText.clear();
if (nm == "buttons") { if (nm == "buttons") {
radio = false; radio = false;
@ -67,7 +67,7 @@ public:
} }
} }
void endElement(const std::string& nm) override { void endElement(const std::string& nm) override {
std::cerr << "endElement [" << nm << "]\n"; //std::cerr << "endElement [" << nm << "]\n";
if (nm == "label") { if (nm == "label") {
label = u8s2qs(currentText); label = u8s2qs(currentText);
@ -102,7 +102,7 @@ public:
} }
} }
void characterData(const std::string &str) override { void characterData(const std::string &str) override {
std::cerr << "characterData [" << str << "]\n"; //std::cerr << "characterData [" << str << "]\n";
currentText += str; currentText += str;
} }

View File

@ -34,6 +34,7 @@
#include "rclmain_w.h" #include "rclmain_w.h"
#include "rclzg.h" #include "rclzg.h"
#include "pathut.h" #include "pathut.h"
#include "unacpp.h"
using namespace std; using namespace std;
@ -42,7 +43,6 @@ static const vector<string> browser_list{
"opera", "google-chrome", "chromium-browser", "opera", "google-chrome", "chromium-browser",
"palemoon", "iceweasel", "firefox", "konqueror", "epiphany"}; "palemoon", "iceweasel", "firefox", "konqueror", "epiphany"};
// Start native viewer or preview for input Doc. This is used to allow // Start native viewer or preview for input Doc. This is used to allow
// using recoll from another app (e.g. Unity Scope) to view embedded // using recoll from another app (e.g. Unity Scope) to view embedded
// result docs (docs with an ipath). . We act as a proxy to extract // result docs (docs with an ipath). . We act as a proxy to extract
@ -155,13 +155,27 @@ void RclMain::openWith(Rcl::Doc doc, string cmdspec)
execViewer(subs, false, execname, lcmd, cmdspec, doc); execViewer(subs, false, execname, lcmd, cmdspec, doc);
} }
void RclMain::startNativeViewer(Rcl::Doc doc, int pagenum, QString term) static bool pagenumNeeded(const std::string& cmd)
{ {
return cmd.find("%p") != std::string::npos;
}
static bool linenumNeeded(const std::string& cmd)
{
return cmd.find("%l") != std::string::npos;
}
static bool termNeeded(const std::string& cmd)
{
return cmd.find("%s") != std::string::npos;
}
void RclMain::startNativeViewer(Rcl::Doc doc, int pagenum, QString qterm)
{
std::string term = qs2utf8s(qterm);
string apptag; string apptag;
doc.getmeta(Rcl::Doc::keyapptg, &apptag); doc.getmeta(Rcl::Doc::keyapptg, &apptag);
LOGDEB("RclMain::startNativeViewer: mtype [" << doc.mimetype << LOGDEB("RclMain::startNativeViewer: mtype [" << doc.mimetype <<
"] apptag [" << apptag << "] page " << pagenum << " term [" << "] apptag [" << apptag << "] page " << pagenum << " term [" <<
qs2utf8s(term) << "] url [" << doc.url << "] ipath [" << term << "] url [" << doc.url << "] ipath [" <<
doc.ipath << "]\n"); doc.ipath << "]\n");
// Look for appropriate viewer // Look for appropriate viewer
@ -377,19 +391,19 @@ void RclMain::startNativeViewer(Rcl::Doc doc, int pagenum, QString term)
// If we are not called with a page number (which would happen for a call // If we are not called with a page number (which would happen for a call
// from the snippets window), see if we can compute a page number anyway. // from the snippets window), see if we can compute a page number anyway.
if (pagenum == -1) { if (m_source && pagenum == -1 && (pagenumNeeded(cmd) || termNeeded(cmd)|| linenumNeeded(cmd))) {
pagenum = 1; pagenum = m_source->getFirstMatchPage(doc, term);
string lterm;
if (m_source)
pagenum = m_source->getFirstMatchPage(doc, lterm);
if (pagenum == -1) if (pagenum == -1)
pagenum = 1; pagenum = 1;
else // We get the match term used to compute the page
term = QString::fromUtf8(lterm.c_str());
} }
char cpagenum[20];
sprintf(cpagenum, "%d", pagenum);
int line = 1;
if (m_source && !term.empty() && linenumNeeded(cmd)) {
if (doc.text.empty()) {
rcldb->getDocRawText(doc);
}
line = m_source->getFirstMatchLine(doc, term);
}
// Substitute %xx inside arguments // Substitute %xx inside arguments
string efftime; string efftime;
@ -408,9 +422,10 @@ void RclMain::startNativeViewer(Rcl::Doc doc, int pagenum, QString term)
subs["f"] = fn; subs["f"] = fn;
subs["F"] = fn; subs["F"] = fn;
subs["i"] = FileInterner::getLastIpathElt(doc.ipath); subs["i"] = FileInterner::getLastIpathElt(doc.ipath);
subs["l"] = ulltodecstr(line);
subs["M"] = doc.mimetype; subs["M"] = doc.mimetype;
subs["p"] = cpagenum; subs["p"] = ulltodecstr(pagenum);
subs["s"] = (const char*)term.toLocal8Bit(); subs["s"] = term;
subs["U"] = url_encode(url); subs["U"] = url_encode(url);
subs["u"] = url; subs["u"] = url;
// Let %(xx) access all metadata. // Let %(xx) access all metadata.

View File

@ -140,8 +140,7 @@ public slots:
virtual void showActionsSearch(); virtual void showActionsSearch();
virtual void startPreview(int docnum, Rcl::Doc doc, int keymods); virtual void startPreview(int docnum, Rcl::Doc doc, int keymods);
virtual void startPreview(Rcl::Doc); virtual void startPreview(Rcl::Doc);
virtual void startNativeViewer(Rcl::Doc, int pagenum = -1, virtual void startNativeViewer(Rcl::Doc, int pagenum = -1, QString term = QString());
QString term = QString());
virtual void openWith(Rcl::Doc, string); virtual void openWith(Rcl::Doc, string);
virtual void saveDocToFile(Rcl::Doc); virtual void saveDocToFile(Rcl::Doc);
virtual void previewNextInTab(Preview *, int sid, int docnum); virtual void previewNextInTab(Preview *, int sid, int docnum);

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2012 J.F.Dockes /* Copyright (C) 2012-2021 J.F.Dockes
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or * the Free Software Foundation; either version 2 of the License, or
@ -67,8 +67,7 @@ using namespace std;
class PlainToRichQtSnippets : public PlainToRich { class PlainToRichQtSnippets : public PlainToRich {
public: public:
virtual string startMatch(unsigned int) { virtual string startMatch(unsigned int) {
return string("<span class='rclmatch' style='") return string("<span class='rclmatch' style='") + qs2utf8s(prefs.qtermstyle) + string("'>");
+ qs2utf8s(prefs.qtermstyle) + string("'>");
} }
virtual string endMatch() { virtual string endMatch() {
return string("</span>"); return string("</span>");
@ -82,12 +81,10 @@ void SnippetsW::init()
QPushButton *searchButton = new QPushButton(tr("Search")); QPushButton *searchButton = new QPushButton(tr("Search"));
searchButton->setAutoDefault(false); searchButton->setAutoDefault(false);
buttonBox->addButton(searchButton, QDialogButtonBox::ActionRole); buttonBox->addButton(searchButton, QDialogButtonBox::ActionRole);
// setWindowFlags(Qt::WindowStaysOnTopHint);
searchFM->hide(); searchFM->hide();
onNewShortcuts(); onNewShortcuts();
connect(&SCBase::scBase(), SIGNAL(shortcutsChanged()), connect(&SCBase::scBase(), SIGNAL(shortcutsChanged()), this, SLOT(onNewShortcuts()));
this, SLOT(onNewShortcuts()));
QPushButton *closeButton = buttonBox->button(QDialogButtonBox::Close); QPushButton *closeButton = buttonBox->button(QDialogButtonBox::Close);
if (closeButton) if (closeButton)
@ -105,11 +102,9 @@ void SnippetsW::init()
browserw = new QWebView(this); browserw = new QWebView(this);
verticalLayout->insertWidget(0, browserw); verticalLayout->insertWidget(0, browserw);
browser->setUrl(QUrl(QString::fromUtf8("about:blank"))); browser->setUrl(QUrl(QString::fromUtf8("about:blank")));
connect(browser, SIGNAL(linkClicked(const QUrl &)), connect(browser, SIGNAL(linkClicked(const QUrl &)), this, SLOT(onLinkClicked(const QUrl &)));
this, SLOT(onLinkClicked(const QUrl &)));
browser->page()->setLinkDelegationPolicy(QWebPage::DelegateAllLinks); browser->page()->setLinkDelegationPolicy(QWebPage::DelegateAllLinks);
browser->page()->currentFrame()->setScrollBarPolicy(Qt::Horizontal, browser->page()->currentFrame()->setScrollBarPolicy(Qt::Horizontal, Qt::ScrollBarAlwaysOff);
Qt::ScrollBarAlwaysOff);
QWEBSETTINGS *ws = browser->page()->settings(); QWEBSETTINGS *ws = browser->page()->settings();
if (prefs.reslistfontfamily != "") { if (prefs.reslistfontfamily != "") {
ws->setFontFamily(QWEBSETTINGS::StandardFont, prefs.reslistfontfamily); ws->setFontFamily(QWEBSETTINGS::StandardFont, prefs.reslistfontfamily);
@ -136,8 +131,7 @@ void SnippetsW::init()
#else #else
browserw = new QTextBrowser(this); browserw = new QTextBrowser(this);
verticalLayout->insertWidget(0, browserw); verticalLayout->insertWidget(0, browserw);
connect(browser, SIGNAL(anchorClicked(const QUrl &)), connect(browser, SIGNAL(anchorClicked(const QUrl &)), this, SLOT(onLinkClicked(const QUrl &)));
this, SLOT(onLinkClicked(const QUrl &)));
browser->setReadOnly(true); browser->setReadOnly(true);
browser->setUndoRedoEnabled(false); browser->setUndoRedoEnabled(false);
browser->setOpenLinks(false); browser->setOpenLinks(false);
@ -183,8 +177,7 @@ void SnippetsW::createPopupMenu(const QPoint& pos)
{ {
QMenu *popup = new QMenu(this); QMenu *popup = new QMenu(this);
if (m_sortingByPage) { if (m_sortingByPage) {
popup->addAction(tr("Sort By Relevance"), this, popup->addAction(tr("Sort By Relevance"), this, SLOT(reloadByRelevance()));
SLOT(reloadByRelevance()));
} else { } else {
popup->addAction(tr("Sort By Page"), this, SLOT(reloadByPage())); popup->addAction(tr("Sort By Page"), this, SLOT(reloadByPage()));
} }
@ -230,29 +223,22 @@ void SnippetsW::onSetDoc(Rcl::Doc doc, std::shared_ptr<DocSequence> source)
source->getTerms(hdata); source->getTerms(hdata);
ostringstream oss; ostringstream oss;
oss << oss << "<html><head>"
"<html><head>" "<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">";
"<meta http-equiv=\"content-type\" "
"content=\"text/html; charset=utf-8\">";
oss << "<style type=\"text/css\">\nbody,table,select,input {\n"; oss << "<style type=\"text/css\">\nbody,table,select,input {\n";
oss << "color: " + qs2utf8s(prefs.fontcolor) + ";\n"; oss << "color: " + qs2utf8s(prefs.fontcolor) + ";\n";
oss << "}\n</style>\n"; oss << "}\n</style>\n";
oss << qs2utf8s(prefs.darkreslistheadertext) << qs2utf8s(prefs.reslistheadertext); oss << qs2utf8s(prefs.darkreslistheadertext) << qs2utf8s(prefs.reslistheadertext);
oss << oss << "</head><body><table class=\"snippets\">";
"</head>"
"<body>"
"<table class=\"snippets\">"
;
g_hiliter.set_inputhtml(false); g_hiliter.set_inputhtml(false);
bool nomatch = true; bool nomatch = true;
for (const auto& snippet : vpabs) { for (const auto& snippet : vpabs) {
if (snippet.page == -1) { if (snippet.page == -1) {
oss << "<tr><td colspan=\"2\">" << oss << "<tr><td colspan=\"2\">" << snippet.snippet << "</td></tr>" << "\n";
snippet.snippet << "</td></tr>" << endl;
continue; continue;
} }
list<string> lr; list<string> lr;
@ -263,13 +249,12 @@ void SnippetsW::onSetDoc(Rcl::Doc doc, std::shared_ptr<DocSequence> source)
nomatch = false; nomatch = false;
oss << "<tr><td>"; oss << "<tr><td>";
if (snippet.page > 0) { if (snippet.page > 0) {
oss << "<a href=\"http://h/P" << snippet.page << "T" << oss << "<a href=\"http://h/P" << snippet.page << "T" << snippet.term << "\">" <<
snippet.term << "\">" "P.&nbsp;" << snippet.page << "</a>";
<< "P.&nbsp;" << snippet.page << "</a>";
} }
oss << "</td><td>" << lr.front().c_str() << "</td></tr>" << endl; oss << "</td><td>" << lr.front().c_str() << "</td></tr>" << "\n";
} }
oss << "</table>" << endl; oss << "</table>" << "\n";
if (nomatch) { if (nomatch) {
oss.str("<html><head></head><body>\n"); oss.str("<html><head></head><body>\n");
oss << qs2utf8s(tr("<p>Sorry, no exact match was found within limits. " oss << qs2utf8s(tr("<p>Sorry, no exact match was found within limits. "
@ -278,12 +263,12 @@ void SnippetsW::onSetDoc(Rcl::Doc doc, std::shared_ptr<DocSequence> source)
} }
oss << "\n</body></html>"; oss << "\n</body></html>";
#if defined(USING_WEBKIT) || defined(USING_WEBENGINE) #if defined(USING_WEBKIT) || defined(USING_WEBENGINE)
browser->setHtml(QString::fromUtf8(oss.str().c_str())); browser->setHtml(u8s2qs(oss.str()));
#else #else
browser->clear(); browser->clear();
browser->append("."); browser->append(".");
browser->clear(); browser->clear();
browser->insertHtml(QString::fromUtf8(oss.str().c_str())); browser->insertHtml(u8s2qs(oss.str()));
browser->moveCursor (QTextCursor::Start); browser->moveCursor (QTextCursor::Start);
browser->ensureCursorVisible(); browser->ensureCursorVisible();
#endif #endif
@ -354,8 +339,7 @@ void SnippetsW::onLinkClicked(const QUrl &url)
string term; string term;
if (termpos != string::npos) if (termpos != string::npos)
term = ascurl.substr(termpos+1); term = ascurl.substr(termpos+1);
emit startNativeViewer(m_doc, page, emit startNativeViewer(m_doc, page, u8s2qs(term));
QString::fromUtf8(term.c_str()));
return; return;
} }
} }

View File

@ -111,6 +111,9 @@ public:
virtual int getFirstMatchPage(Rcl::Doc&, std::string&) { virtual int getFirstMatchPage(Rcl::Doc&, std::string&) {
return -1; return -1;
} }
virtual int getFirstMatchLine(const Rcl::Doc&, const std::string&) {
return 1;
}
/** Get duplicates. */ /** Get duplicates. */
virtual bool docDups(const Rcl::Doc&, std::vector<Rcl::Doc>&) { virtual bool docDups(const Rcl::Doc&, std::vector<Rcl::Doc>&) {
return false; return false;

View File

@ -126,6 +126,17 @@ int DocSequenceDb::getFirstMatchPage(Rcl::Doc &doc, string& term)
return -1; return -1;
} }
int DocSequenceDb::getFirstMatchLine(const Rcl::Doc &doc, const string& term)
{
std::unique_lock<std::mutex> locker(o_dblock);
if (!setQuery())
return false;
if (m_q->whatDb()) {
return m_q->getFirstMatchLine(doc, term);
}
return 1;
}
list<string> DocSequenceDb::expand(Rcl::Doc &doc) list<string> DocSequenceDb::expand(Rcl::Doc &doc)
{ {
std::unique_lock<std::mutex> locker(o_dblock); std::unique_lock<std::mutex> locker(o_dblock);

View File

@ -43,6 +43,7 @@ public:
virtual bool getAbstract(Rcl::Doc &doc, std::vector<std::string>&) override; virtual bool getAbstract(Rcl::Doc &doc, std::vector<std::string>&) override;
virtual int getFirstMatchPage(Rcl::Doc&, std::string& term) override; virtual int getFirstMatchPage(Rcl::Doc&, std::string& term) override;
virtual int getFirstMatchLine(const Rcl::Doc&, const std::string& term) override;
virtual bool docDups(const Rcl::Doc& doc, std::vector<Rcl::Doc>& dups) virtual bool docDups(const Rcl::Doc& doc, std::vector<Rcl::Doc>& dups)
override; override;
virtual std::string getDescription() override; virtual std::string getDescription() override;

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2004 J.F.Dockes /* Copyright (C) 2004-2021 J.F.Dockes
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or * the Free Software Foundation; either version 2 of the License, or
@ -60,8 +60,7 @@ public:
* @param in raw text out of internfile. * @param in raw text out of internfile.
* @param out rich text output, divided in chunks (to help our caller * @param out rich text output, divided in chunks (to help our caller
* avoid inserting half tags into textedit which doesnt like it) * avoid inserting half tags into textedit which doesnt like it)
* @param in hdata terms and groups to be highlighted. These are * @param in hdata terms and groups to be highlighted. See utils/hldata.h
* lowercase and unaccented.
* @param chunksize max size of chunks in output list * @param chunksize max size of chunks in output list
*/ */
virtual bool plaintorich(const std::string &in, std::list<std::string> &out, virtual bool plaintorich(const std::string &in, std::list<std::string> &out,

View File

@ -141,11 +141,9 @@ public:
// add/update fragment definition. // add/update fragment definition.
virtual bool takeword(const std::string& term, int pos, int bts, int bte) { virtual bool takeword(const std::string& term, int pos, int bts, int bte) {
LOGDEB1("takeword: [" << term << "] bytepos: "<<bts<<":"<<bte<<endl); LOGDEB1("takeword: [" << term << "] bytepos: "<<bts<<":"<<bte<<endl);
// Limit time taken with monster documents. The resulting // Limit time taken with monster documents. The resulting abstract will be incorrect or
// abstract will be incorrect or inexistent, but this is // inexistent, but this is better than taking forever (the default cutoff value comes from
// better than taking forever (the default cutoff value comes // the snippetMaxPosWalk configuration parameter, and is 10E6)
// from the snippetMaxPosWalk configuration parameter, and is
// 10E6)
if (maxtermcount && termcount++ > maxtermcount) { if (maxtermcount && termcount++ > maxtermcount) {
LOGINF("Rclabsfromtext: stopping because maxtermcount reached: "<< LOGINF("Rclabsfromtext: stopping because maxtermcount reached: "<<
maxtermcount << endl); maxtermcount << endl);
@ -154,8 +152,7 @@ public:
} }
// Also limit the number of fragments (just in case safety) // Also limit the number of fragments (just in case safety)
if (m_fragments.size() > maxtermcount / 100) { if (m_fragments.size() > maxtermcount / 100) {
LOGINF("Rclabsfromtext: stopping because maxfragments reached: "<< LOGINF("Rclabsfromtext: stopping: max fragments count: " << maxtermcount/100 << "\n");
maxtermcount/100 << endl);
retflags |= ABSRES_TRUNC; retflags |= ABSRES_TRUNC;
return false; return false;
} }
@ -193,8 +190,7 @@ public:
m_curterm = term; m_curterm = term;
m_curtermcoef = coef; m_curtermcoef = coef;
} else { } else {
LOGDEB2("Extending current fragment: " << m_remainingWords << LOGDEB2("Extending current fragment: "<<m_remainingWords<<" -> "<<m_ctxwords<< "\n");
" -> " << m_ctxwords << endl);
m_extcount++; m_extcount++;
#ifdef COMPUTE_HLZONES #ifdef COMPUTE_HLZONES
if (m_prevwordhit) { if (m_prevwordhit) {
@ -215,9 +211,8 @@ public:
m_curfragcoef += coef; m_curfragcoef += coef;
m_remainingWords = m_ctxwords + 1; m_remainingWords = m_ctxwords + 1;
if (m_extcount > 5) { if (m_extcount > 5) {
// Limit expansion of contiguous fragments (this is to // Limit expansion of contiguous fragments (this is to avoid common terms in search
// avoid common terms in search causing long // causing long heavyweight meaningless fragments. Also, limit length).
// heavyweight meaningless fragments. Also, limit length).
m_remainingWords = 1; m_remainingWords = 1;
m_extcount = 0; m_extcount = 0;
} }
@ -247,18 +242,14 @@ public:
LOGDEB1("FRAGMENT: from byte " << m_curfrag.first << LOGDEB1("FRAGMENT: from byte " << m_curfrag.first <<
" to byte " << m_curfrag.second << endl); " to byte " << m_curfrag.second << endl);
LOGDEB1("FRAGMENT TEXT [" << m_rawtext.substr( LOGDEB1("FRAGMENT TEXT [" << m_rawtext.substr(
m_curfrag.first, m_curfrag.second-m_curfrag.first) m_curfrag.first, m_curfrag.second-m_curfrag.first) << "]\n");
<< "]\n"); // We used to not push weak fragments if we had a lot already. This can cause
// We used to not push weak fragments if we had a lot // problems if the fragments we drop are actually group fragments (which have not
// already. This can cause problems if the fragments // got their boost yet). The right cut value is difficult to determine, because the
// we drop are actually group fragments (which have // absolute values of the coefs depend on many things (index size, etc.) The old
// not got their boost yet). The right cut value is // test was if (m_totalcoef < 5.0 || m_curfragcoef >= 1.0) We now just avoid
// difficult to determine, because the absolute values // creating a monster by testing the current fragments count at the top of the
// of the coefs depend on many things (index size, // function
// etc.) The old test was if (m_totalcoef < 5.0 ||
// m_curfragcoef >= 1.0) We now just avoid creating a
// monster by testing the current fragments count at
// the top of the function
m_fragments.push_back(MatchFragment(m_curfrag.first, m_fragments.push_back(MatchFragment(m_curfrag.first,
m_curfrag.second, m_curfrag.second,
m_curfragcoef, m_curfragcoef,
@ -298,8 +289,7 @@ public:
m_curtermcoef = 0.0; m_curtermcoef = 0.0;
} }
LOGDEB("TextSplitABS: stored total " << m_fragments.size() << LOGDEB("TextSplitABS: stored total " << m_fragments.size() << " fragments" << endl);
" fragments" << endl);
vector<GroupMatchEntry> tboffs; vector<GroupMatchEntry> tboffs;
// Look for matches to PHRASE and NEAR term groups and finalize // Look for matches to PHRASE and NEAR term groups and finalize
@ -340,9 +330,8 @@ public:
} }
auto fragit = m_fragments.begin(); auto fragit = m_fragments.begin();
for (const auto& grpmatch : tboffs) { for (const auto& grpmatch : tboffs) {
LOGDEB2("LOOKING FOR FRAGMENT: group: " << grpmatch.offs.first << LOGDEB2("LOOKING FOR FRAGMENT: group: " << grpmatch.offs.first << "-" <<
"-" << grpmatch.offs.second << " curfrag " << grpmatch.offs.second<<" curfrag "<<fragit->start<<"-"<<fragit->stop<<"\n");
fragit->start << "-" << fragit->stop << endl);
while (fragit->stop < grpmatch.offs.first) { while (fragit->stop < grpmatch.offs.first) {
fragit++; fragit++;
if (fragit == m_fragments.end()) { if (fragit == m_fragments.end()) {
@ -417,21 +406,19 @@ int Query::Native::abstractFromText(
bool sortbypage bool sortbypage
) )
{ {
(void)chron; PRETEND_USE(chron);
LOGABS("abstractFromText: entry: " << chron.millis() << "mS\n"); LOGABS("abstractFromText: entry: " << chron.millis() << "mS\n");
string rawtext; string rawtext;
if (!ndb->getRawText(docid, rawtext)) { if (!ndb->getRawText(docid, rawtext)) {
LOGDEB0("abstractFromText: can't fetch text\n"); LOGDEB0("abstractFromText: can't fetch text\n");
return ABSRES_ERROR; return ABSRES_ERROR;
} }
LOGABS("abstractFromText: got raw text: size " << rawtext.size() << " " << LOGABS("abstractFromText: got raw text: size "<<rawtext.size()<<" "<<chron.millis()<<"mS\n");
chron.millis() << "mS\n");
#if 0 && ! (XAPIAN_MAJOR_VERSION <= 1 && XAPIAN_MINOR_VERSION <= 2) && \ #if 0 && XAPIAN_AT_LEAST(1,3,5)
(defined(RAWTEXT_IN_DATA))
// Tryout the Xapian internal method. // Tryout the Xapian internal method.
string snippet = xmset.snippet(rawtext); string snippet = xmset.snippet(rawtext, 60);
LOGDEB("SNIPPET: [" << snippet << "] END SNIPPET\n"); std::cerr << "XAPIAN SNIPPET: [" << snippet << "] END SNIPPET\n";
#endif #endif
// We need the q coefs for individual terms // We need the q coefs for individual terms
@ -452,8 +439,7 @@ int Query::Native::abstractFromText(
} }
LOGABS("abstractFromText: getterms: " << chron.millis() << "mS\n"); LOGABS("abstractFromText: getterms: " << chron.millis() << "mS\n");
TextSplitABS splitter(rawtext, matchTerms, hld, wordcoefs, ctxwords, TextSplitABS splitter(rawtext, matchTerms, hld, wordcoefs, ctxwords, TextSplit::TXTS_NONE,
TextSplit::TXTS_NONE,
m_q->m_snipMaxPosWalk); m_q->m_snipMaxPosWalk);
splitter.text_to_words(rawtext); splitter.text_to_words(rawtext);
LOGABS("abstractFromText: text_to_words: " << chron.millis() << "mS\n"); LOGABS("abstractFromText: text_to_words: " << chron.millis() << "mS\n");
@ -484,8 +470,7 @@ int Query::Native::abstractFromText(
// main term and the page positions. // main term and the page positions.
unsigned int count = 0; unsigned int count = 0;
for (const auto& entry : result) { for (const auto& entry : result) {
string frag( string frag(fixfrag(rawtext.substr(entry.start, entry.stop - entry.start)));
fixfrag(rawtext.substr(entry.start, entry.stop - entry.start)));
#ifdef COMPUTE_HLZONES #ifdef COMPUTE_HLZONES
// This would need to be modified to take tag parameters // This would need to be modified to take tag parameters
@ -506,8 +491,7 @@ int Query::Native::abstractFromText(
if (page < 0) if (page < 0)
page = 0; page = 0;
} }
LOGDEB0("=== FRAGMENT: p. " << page << " Coef: " << entry.coef << LOGDEB0("=== FRAGMENT: p. " << page << " Coef: " << entry.coef << ": " << frag << endl);
": " << frag << endl);
vabs.push_back(Snippet(page, frag).setTerm(entry.term)); vabs.push_back(Snippet(page, frag).setTerm(entry.term));
if (count++ >= maxtotaloccs) if (count++ >= maxtotaloccs)
break; break;
@ -515,4 +499,45 @@ int Query::Native::abstractFromText(
return ABSRES_OK | splitter.getretflags(); return ABSRES_OK | splitter.getretflags();
} }
class TermLineSplitter : public TextSplit {
public:
TermLineSplitter(const std::string& term)
: TextSplit(TextSplit::TXTS_NOSPANS), m_term(term) {
}
bool takeword(const std::string& _term, int, int, int) override {
std::string term;
if (o_index_stripchars) {
if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) {
LOGINFO("PlainToRich::takeword: unac failed for [" << term << "]\n");
return true;
}
}
if (term == m_term) {
return false;
}
return true;
}
void newline(int) override {
m_line++;
}
int getline() {
return m_line;
}
private:
int m_line{1};
std::string m_term;
};
int Query::getFirstMatchLine(const Doc &doc, const std::string& term)
{
int line = 1;
TermLineSplitter splitter(term);
bool ret = splitter.text_to_words(doc.text);
// The splitter takeword() breaks by returning false as soon as the term is found
if (ret == false) {
line = splitter.getline();
}
return line;
}
} }

View File

@ -254,7 +254,7 @@ double Query::Native::qualityTerms(Xapian::docid docid,
} }
// Return page number for first match of "significant" term. // Choose most interesting term and return the page number for its first match
int Query::Native::getFirstMatchPage(Xapian::docid docid, string& term) int Query::Native::getFirstMatchPage(Xapian::docid docid, string& term)
{ {
LOGDEB("Query::Native::getFirstMatchPage\n"); LOGDEB("Query::Native::getFirstMatchPage\n");
@ -286,9 +286,7 @@ int Query::Native::getFirstMatchPage(Xapian::docid docid, string& term)
qualityTerms(docid, terms, byQ); qualityTerms(docid, terms, byQ);
for (auto mit = byQ.rbegin(); mit != byQ.rend(); mit++) { for (auto mit = byQ.rbegin(); mit != byQ.rend(); mit++) {
for (vector<string>::const_iterator qit = mit->second.begin(); for (const auto& qterm : mit->second) {
qit != mit->second.end(); qit++) {
string qterm = *qit;
Xapian::PositionIterator pos; Xapian::PositionIterator pos;
string emptys; string emptys;
try { try {
@ -619,9 +617,8 @@ int Query::Native::abstractFromIndex(
// possibly retried by our caller. // possibly retried by our caller.
// //
// @param[out] vabs the abstract is returned as a vector of snippets. // @param[out] vabs the abstract is returned as a vector of snippets.
int Query::Native::makeAbstract(Xapian::docid docid, int Query::Native::makeAbstract(
vector<Snippet>& vabs, Xapian::docid docid, vector<Snippet>& vabs, int imaxoccs, int ictxwords, bool sortbypage)
int imaxoccs, int ictxwords, bool sortbypage)
{ {
chron.restart(); chron.restart();
LOGDEB("makeAbstract: docid " << docid << " imaxoccs " << LOGDEB("makeAbstract: docid " << docid << " imaxoccs " <<

View File

@ -96,10 +96,13 @@ const string pathelt_prefix = "XP";
static const string udi_prefix("Q"); static const string udi_prefix("Q");
static const string parent_prefix("F"); static const string parent_prefix("F");
// Special terms to mark begin/end of field (for anchored searches), and // Special terms to mark begin/end of field (for anchored searches).
// page breaks
string start_of_field_term; string start_of_field_term;
string end_of_field_term; string end_of_field_term;
// Special term for page breaks. Note that we use a complicated mechanism for multiple page
// breaks at the same position, when it would have been probably simpler to use XXPG/n terms
// instead (did not try to implement though). A change would force users to reindex.
const string page_break_term = "XXPG/"; const string page_break_term = "XXPG/";
// Special term to mark documents with children. // Special term to mark documents with children.
@ -1846,16 +1849,14 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
} }
} }
// If empty pages (multiple break at same pos) were recorded, save // If empty pages (multiple break at same pos) were recorded, save them (this is
// them (this is because we have no way to record them in the // because we have no way to record them in the Xapian list)
// Xapian list
if (!tpidx.m_pageincrvec.empty()) { if (!tpidx.m_pageincrvec.empty()) {
ostringstream multibreaks; ostringstream multibreaks;
for (unsigned int i = 0; i < tpidx.m_pageincrvec.size(); i++) { for (unsigned int i = 0; i < tpidx.m_pageincrvec.size(); i++) {
if (i != 0) if (i != 0)
multibreaks << ","; multibreaks << ",";
multibreaks << tpidx.m_pageincrvec[i].first << "," << multibreaks << tpidx.m_pageincrvec[i].first << "," << tpidx.m_pageincrvec[i].second;
tpidx.m_pageincrvec[i].second;
} }
RECORD_APPEND(record, string(cstr_mbreaks), multibreaks.str()); RECORD_APPEND(record, string(cstr_mbreaks), multibreaks.str());
} }

View File

@ -360,7 +360,6 @@ int Query::getFirstMatchPage(const Doc &doc, string& term)
return m_reason.empty() ? pagenum : -1; return m_reason.empty() ? pagenum : -1;
} }
// Mset size // Mset size
// Note: times for retrieving (multiple times)all docs from a sample // Note: times for retrieving (multiple times)all docs from a sample
// 25k docs db (q: mime:*) // 25k docs db (q: mime:*)
@ -511,8 +510,7 @@ vector<string> Query::expand(const Doc &doc)
Xapian::ESet eset = m_nq->xenquire->get_eset(20, rset, false); Xapian::ESet eset = m_nq->xenquire->get_eset(20, rset, false);
LOGDEB("ESet terms:\n"); LOGDEB("ESet terms:\n");
// We filter out the special terms // We filter out the special terms
for (Xapian::ESetIterator it = eset.begin(); for (Xapian::ESetIterator it = eset.begin(); it != eset.end(); it++) {
it != eset.end(); it++) {
LOGDEB(" [" << (*it) << "]\n"); LOGDEB(" [" << (*it) << "]\n");
if ((*it).empty() || has_prefix(*it)) if ((*it).empty() || has_prefix(*it))
continue; continue;

View File

@ -115,10 +115,18 @@ public:
// Returned as a vector of pair<page,snippet> page is 0 if unknown // Returned as a vector of pair<page,snippet> page is 0 if unknown
int makeDocAbstract(const Doc &doc, std::vector<Snippet>& abst, int makeDocAbstract(const Doc &doc, std::vector<Snippet>& abst,
int maxoccs= -1, int ctxwords= -1,bool sortbypage=false); int maxoccs= -1, int ctxwords= -1,bool sortbypage=false);
/** Retrieve page number for first match for "significant" query term
* @param term returns the chosen term */ /** Choose most interesting term and return the page number for its first match
* @param term returns the chosen term
* @return page number or -1 if term not found or other issue
*/
int getFirstMatchPage(const Doc &doc, std::string& term); int getFirstMatchPage(const Doc &doc, std::string& term);
/** Compute line number for first match of term. Only works if doc.text has text.
* This uses a text split. Both this and the above getFirstMaxPage() could be done and saved
* while we compute the abstracts, quite a lot of waste here. */
int getFirstMatchLine(const Doc &doc, const std::string& term);
/** Retrieve a reference to the searchData we are using */ /** Retrieve a reference to the searchData we are using */
std::shared_ptr<SearchData> getSD() { std::shared_ptr<SearchData> getSD() {
return m_sd; return m_sd;

View File

@ -225,6 +225,7 @@ text/x-lua = internal
text/x-mail = internal text/x-mail = internal
text/x-man = exec rclman;maxseconds=30 text/x-man = exec rclman;maxseconds=30
text/x-orgmode = execm rclorgmode.py text/x-orgmode = execm rclorgmode.py
text/x-orgmode-sub = internal text/plain
text/x-perl = internal text/plain text/x-perl = internal text/plain
text/x-purple-html-log = internal text/html text/x-purple-html-log = internal text/html
text/x-purple-log = exec rclpurple text/x-purple-log = exec rclpurple
@ -359,6 +360,7 @@ text/x-java = source
text/x-lua = source text/x-lua = source
text/x-mail = message text/x-mail = message
text/x-man = document text/x-man = document
text/x-orgmode = document
text/x-perl = source text/x-perl = source
text/x-php = source text/x-php = source
text/x-purple-html-log = pidgin text/x-purple-html-log = pidgin
@ -443,6 +445,7 @@ text = \
text/x-ini \ text/x-ini \
text/x-java \ text/x-java \
text/x-man \ text/x-man \
text/x-orgmode \
text/x-perl \ text/x-perl \
text/x-php \ text/x-php \
text/x-python \ text/x-python \

View File

@ -214,12 +214,13 @@ nomd5types = rclaudio
# files.</brief><descr>We need to decompress these in a # files.</brief><descr>We need to decompress these in a
# temporary directory for identification, which can be wasteful in some # temporary directory for identification, which can be wasteful in some
# cases. Limit the waste. Negative means no limit. 0 results in no # cases. Limit the waste. Negative means no limit. 0 results in no
# processing of any compressed file. Default 50 MB.</descr></var> # processing of any compressed file. Default 100 MB.</descr></var>
compressedfilemaxkbs = 100000 compressedfilemaxkbs = 100000
# <var name="textfilemaxmbs" type="int"><brief>Size limit for text # <var name="textfilemaxmbs" type="int">
# files.</brief><descr>Mostly for skipping monster # <brief>Size limit for text files.</brief>
# logs. Default 20 MB.</descr></var> # <descr>Mostly for skipping monster logs. Default 20 MB. Use a value of -1 to
# disable.</descr></var>
textfilemaxmbs = 20 textfilemaxmbs = 20
# <var name="indexallfilenames" type="bool"><brief>Index the file names of # <var name="indexallfilenames" type="bool"><brief>Index the file names of

View File

@ -619,11 +619,17 @@ static const string& thumbnailsdir()
return thumbnailsd; return thumbnailsd;
} }
// Place for 1024x1024 files
static const string thmbdirxxlarge = "xx-large";
// Place for 512x512 files
static const string thmbdirxlarge = "x-large";
// Place for 256x256 files // Place for 256x256 files
static const string thmbdirlarge = "large"; static const string thmbdirlarge = "large";
// 128x128 // 128x128
static const string thmbdirnormal = "normal"; static const string thmbdirnormal = "normal";
static const vector<string> thmbdirs{thmbdirxxlarge, thmbdirxlarge, thmbdirlarge, thmbdirnormal};
static void thumbname(const string& url, string& name) static void thumbname(const string& url, string& name)
{ {
string digest; string digest;
@ -635,26 +641,47 @@ static void thumbname(const string& url, string& name)
bool thumbPathForUrl(const string& url, int size, string& path) bool thumbPathForUrl(const string& url, int size, string& path)
{ {
string name; string name, path128, path256, path512, path1024;
thumbname(url, name); thumbname(url, name);
if (size <= 128) { if (size <= 128) {
path = path_cat(thumbnailsdir(), thmbdirnormal); path = path_cat(thumbnailsdir(), thmbdirnormal);
path = path_cat(path, name); path = path_cat(path, name);
path128 = path;
} else if (size <= 256) {
path = path_cat(thumbnailsdir(), thmbdirlarge);
path = path_cat(path, name);
path256 = path;
} else if (size <= 512) {
path = path_cat(thumbnailsdir(), thmbdirxlarge);
path = path_cat(path, name);
path512 = path;
} else {
path = path_cat(thumbnailsdir(), thmbdirxxlarge);
path = path_cat(path, name);
path1024 = path;
}
if (access(path.c_str(), R_OK) == 0) {
return true;
}
// Not found in requested size. Try to find any size and return it. Let the client scale.
for (const auto& tdir : thmbdirs) {
path = path_cat(thumbnailsdir(), tdir);
path = path_cat(path, name);
if (access(path.c_str(), R_OK) == 0) { if (access(path.c_str(), R_OK) == 0) {
return true; return true;
} }
} }
path = path_cat(thumbnailsdir(), thmbdirlarge);
path = path_cat(path, name);
if (access(path.c_str(), R_OK) == 0) {
return true;
}
// File does not exist. Path corresponds to the large version at this point, // File does not exist. Return appropriate path anyway.
// fix it if needed.
if (size <= 128) { if (size <= 128) {
path = path_cat(path_home(), thmbdirnormal); path = path128;
path = path_cat(path, name); } else if (size <= 256) {
path = path256;
} else if (size <= 512) {
path = path512;
} else {
path = path1024;
} }
return false; return false;
} }

View File

@ -318,7 +318,7 @@ template <class T> void stringsToCSV(const T& tokens, string& s, char sep)
s.append(1, sep); s.append(1, sep);
} }
// Remove last separator. // Remove last separator.
if (s.size()) if (!s.empty())
s.pop_back(); s.pop_back();
} }
@ -951,7 +951,7 @@ bool parsedateinterval(const string& s, DateInterval *dip)
return false; return false;
} }
vector<string>::const_iterator it = vs.begin(); auto it = vs.cbegin();
if (*it == "P" || *it == "p") { if (*it == "P" || *it == "p") {
it++; it++;
if (!parseperiod(it, vs.end(), &p1)) { if (!parseperiod(it, vs.end(), &p1)) {
@ -1221,7 +1221,7 @@ std::string SimpleRegexp::simpleSub(
const std::string& in, const std::string& repl) const std::string& in, const std::string& repl)
{ {
if (!ok()) { if (!ok()) {
return std::string(); return {};
} }
int err; int err;
@ -1256,7 +1256,7 @@ bool SimpleRegexp::simpleMatch(const string& val) const
string SimpleRegexp::getMatch(const string& val, int i) const string SimpleRegexp::getMatch(const string& val, int i) const
{ {
if (i > m->nmatch) { if (i > m->nmatch) {
return string(); return {};
} }
return val.substr(m->matches[i].rm_so, return val.substr(m->matches[i].rm_so,
m->matches[i].rm_eo - m->matches[i].rm_so); m->matches[i].rm_eo - m->matches[i].rm_so);

View File

@ -187,6 +187,7 @@ text/x-csv = internal text/plain
text/x-fictionbook = internal xsltproc fb2.xsl text/x-fictionbook = internal xsltproc fb2.xsl
text/x-ini = internal text/plain text/x-ini = internal text/plain
text/x-mail = internal text/x-mail = internal
text/x-orgmode = execm python rclorgmode.py
text/x-perl = internal text/plain text/x-perl = internal text/plain
text/x-python = execm python rclpython.py text/x-python = execm python rclpython.py
text/x-shellscript = internal text/plain text/x-shellscript = internal text/plain
@ -291,6 +292,7 @@ text/x-html-sidux-man = sidux-book
text/x-ini = txt text/x-ini = txt
text/x-mail = message text/x-mail = message
text/x-man = document text/x-man = document
text/x-orgmode = document
text/x-perl = source text/x-perl = source
text/x-purple-html-log = pidgin text/x-purple-html-log = pidgin
text/x-purple-log = pidgin text/x-purple-log = pidgin
@ -359,6 +361,7 @@ text = \
text/x-html-sidux-man \ text/x-html-sidux-man \
text/x-ini \ text/x-ini \
text/x-man \ text/x-man \
text/x-orgmode \
text/x-perl \ text/x-perl \
text/x-python \ text/x-python \
text/x-shellscript text/x-shellscript