Merge branch 'master' of https://framagit.org/medoc92/recoll
This commit is contained in:
commit
d2d2cbff14
@ -13,7 +13,7 @@
|
||||
|
||||
PPA_KEYID=7808CE96D38B9201
|
||||
|
||||
RCLVERS=1.31.5
|
||||
RCLVERS=1.31.6
|
||||
SCOPEVERS=1.20.2.4
|
||||
GSSPVERS=1.1.1
|
||||
PPAVERS=1
|
||||
@ -85,7 +85,7 @@ done
|
||||
|
||||
|
||||
### KIO.
|
||||
series="bionic focal groovy hirsute impish"
|
||||
#series="bionic focal hirsute impish"
|
||||
series=
|
||||
|
||||
debdir=debiankio
|
||||
|
||||
@ -1,3 +1,9 @@
|
||||
recoll (1.31.6-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
||||
|
||||
* Almost no change: translation files update.
|
||||
|
||||
-- Jean-Francois Dockes <jf@dockes.org> Sat, 20 Dec 2021 09:25:00 +0100
|
||||
|
||||
recoll (1.31.5-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
||||
|
||||
* Fix ennoying bug in tesseract OCR temporary files cleanup.
|
||||
|
||||
@ -1,3 +1,9 @@
|
||||
kio-recoll (1.31.6-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
||||
|
||||
* Follow
|
||||
|
||||
-- Jean-Francois Dockes <jf@dockes.org> Sat, 20 Dec 2021 09:25:00 +0100
|
||||
|
||||
kio-recoll (1.31.5-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
||||
|
||||
* Follow recoll version
|
||||
|
||||
@ -1 +1 @@
|
||||
1.31.5
|
||||
1.31.6
|
||||
|
||||
@ -125,7 +125,7 @@
|
||||
#define PACKAGE_NAME "Recoll"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "Recoll 1.31.5"
|
||||
#define PACKAGE_STRING "Recoll 1.31.6"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "recoll"
|
||||
@ -134,7 +134,7 @@
|
||||
#define PACKAGE_URL ""
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "1.31.5"
|
||||
#define PACKAGE_VERSION "1.31.6"
|
||||
|
||||
/* putenv parameter is const */
|
||||
/* #undef PUTENV_ARG_CONST */
|
||||
|
||||
@ -118,7 +118,7 @@
|
||||
#define PACKAGE_NAME "Recoll"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "Recoll 1.31.5"
|
||||
#define PACKAGE_STRING "Recoll 1.31.6"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "recoll"
|
||||
@ -127,7 +127,7 @@
|
||||
#define PACKAGE_URL ""
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "1.31.5"
|
||||
#define PACKAGE_VERSION "1.31.6"
|
||||
|
||||
/* putenv parameter is const */
|
||||
/* #undef PUTENV_ARG_CONST */
|
||||
|
||||
@ -596,6 +596,7 @@ bool TextSplit::text_to_words(const string &in)
|
||||
clearsplitstate();
|
||||
|
||||
bool pagepending = false;
|
||||
bool nlpending = false;
|
||||
bool softhyphenpending = false;
|
||||
|
||||
// Running count of non-alphanum chars. Reset when we see one;
|
||||
@ -705,6 +706,10 @@ bool TextSplit::text_to_words(const string &in)
|
||||
pagepending = false;
|
||||
newpage(m_wordpos);
|
||||
}
|
||||
if (nlpending) {
|
||||
nlpending = false;
|
||||
newline(m_wordpos);
|
||||
}
|
||||
break;
|
||||
|
||||
case WILD:
|
||||
@ -745,6 +750,12 @@ bool TextSplit::text_to_words(const string &in)
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// Note about dangling hyphens: we always strip '-' found before whitespace,
|
||||
// even before a newline, then generate two terms, before and after the line
|
||||
// break. We have no way to know if '-' is there because a word was broken by
|
||||
// justification or if it was part of an actual compound word (would need a
|
||||
// dictionary to check). As soft-hyphen *should* be used if the '-' is not part
|
||||
// of the text.
|
||||
if (nextc == -1 || isvisiblewhite(nextc)) {
|
||||
goto SPACE;
|
||||
}
|
||||
@ -844,19 +855,10 @@ bool TextSplit::text_to_words(const string &in)
|
||||
break;
|
||||
|
||||
case '\n':
|
||||
nlpending = true;
|
||||
/* FALLTHROUGH */
|
||||
case '\r':
|
||||
if (m_span.length() && *m_span.rbegin() == '-') {
|
||||
// if '-' is the last char before end of line, we
|
||||
// strip it. We have no way to know if this is added
|
||||
// because of the line split or if it was part of an
|
||||
// actual compound word (would need a dictionary to
|
||||
// check). As soft-hyphen *should* be used if the '-'
|
||||
// is not part of the text, it is better to properly
|
||||
// process a real compound word, and produce wrong
|
||||
// output from wrong text. The word-emitting routine
|
||||
// will strip the trailing '-'.
|
||||
goto SPACE;
|
||||
} else if (softhyphenpending) {
|
||||
if (softhyphenpending) {
|
||||
// Don't reset soft-hyphen
|
||||
continue;
|
||||
} else {
|
||||
|
||||
@ -73,6 +73,9 @@ public:
|
||||
* just don't know about pages. */
|
||||
virtual void newpage(int /*pos*/) {}
|
||||
|
||||
/** Called when we encounter newline \n 0x0a. Override to use the event. */
|
||||
virtual void newline(int /*pos*/) {}
|
||||
|
||||
// Static utility functions:
|
||||
|
||||
/** Count words in string, as the splitter would generate them */
|
||||
|
||||
@ -613,8 +613,7 @@ location before copy, to allow path translation computations. For
|
||||
example if a dataset originally indexed as '/home/me/mydata/config' has
|
||||
been mounted to '/media/me/mydata', and the GUI is running from a copied
|
||||
configuration, orgidxconfdir would be '/home/me/mydata/config', and
|
||||
curidxconfdir (as set in the copied configuration) would be
|
||||
'/media/me/mydata/config'.
|
||||
curidxconfdir (as set in the copied configuration) would be '/media/me/mydata/config'.
|
||||
.TP
|
||||
.BI "idxrundir = "dfn
|
||||
Indexing process current directory. The input
|
||||
|
||||
@ -170,13 +170,12 @@ listing either MIME types (e.g. audio/mpeg) or handler names
|
||||
files. We need to decompress these in a
|
||||
temporary directory for identification, which can be wasteful in some
|
||||
cases. Limit the waste. Negative means no limit. 0 results in no
|
||||
processing of any compressed file. Default 50 MB.
|
||||
processing of any compressed file. Default 100 MB.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.TEXTFILEMAXMBS">
|
||||
<term><varname>textfilemaxmbs</varname></term>
|
||||
<listitem><para>Size limit for text
|
||||
files. Mostly for skipping monster
|
||||
logs. Default 20 MB.
|
||||
<listitem><para>Size limit for text files. Mostly for skipping monster logs. Default 20 MB. Use a value of -1 to
|
||||
disable.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.INDEXALLFILENAMES">
|
||||
<term><varname>indexallfilenames</varname></term>
|
||||
|
||||
@ -10,7 +10,7 @@
|
||||
<link rel="stylesheet" type="text/css" href="docbook-xsl.css">
|
||||
<meta name="generator" content="DocBook XSL Stylesheets V1.79.1">
|
||||
<meta name="description" content=
|
||||
"Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.3 or any later version published by the Free Software Foundation; with no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license can be found at the following location: GNU web site. This document introduces full text search notions and describes the installation and use of the Recoll application. This version describes Recoll 1.29.">
|
||||
"Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.3 or any later version published by the Free Software Foundation; with no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license can be found at the following location: GNU web site. This document introduces full text search notions and describes the installation and use of the Recoll application. This version describes Recoll 1.31.">
|
||||
</head>
|
||||
<body bgcolor="white" text="black" link="#0000FF" vlink="#840084"
|
||||
alink="#0000FF">
|
||||
@ -53,7 +53,7 @@ alink="#0000FF">
|
||||
and describes the installation and use of the
|
||||
<span class="application">Recoll</span> application.
|
||||
This version describes <span class=
|
||||
"application">Recoll</span> 1.29.</p>
|
||||
"application">Recoll</span> 1.31.</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -443,7 +443,7 @@ alink="#0000FF">
|
||||
<p>This document introduces full text search notions and
|
||||
describes the installation and use of the <span class=
|
||||
"application">Recoll</span> application. It is updated for
|
||||
<span class="application">Recoll</span> 1.29.</p>
|
||||
<span class="application">Recoll</span> 1.31.</p>
|
||||
<p><span class="application">Recoll</span> was for a long
|
||||
time dedicated to Unix-like systems. It was only lately
|
||||
(2015) ported to <span class="application">MS-Windows</span>.
|
||||
@ -9169,14 +9169,15 @@ hasextract = False
|
||||
identification, which can be wasteful in some
|
||||
cases. Limit the waste. Negative means no limit.
|
||||
0 results in no processing of any compressed
|
||||
file. Default 50 MB.</p>
|
||||
file. Default 100 MB.</p>
|
||||
</dd>
|
||||
<dt><a name=
|
||||
"RCL.INSTALL.CONFIG.RECOLLCONF.TEXTFILEMAXMBS" id=
|
||||
"RCL.INSTALL.CONFIG.RECOLLCONF.TEXTFILEMAXMBS"></a><span class="term"><code class="varname">textfilemaxmbs</code></span></dt>
|
||||
<dd>
|
||||
<p>Size limit for text files. Mostly for skipping
|
||||
monster logs. Default 20 MB.</p>
|
||||
monster logs. Default 20 MB. Use a value of -1 to
|
||||
disable.</p>
|
||||
</dd>
|
||||
<dt><a name=
|
||||
"RCL.INSTALL.CONFIG.RECOLLCONF.INDEXALLFILENAMES"
|
||||
@ -10757,17 +10758,28 @@ other = rclcat:other
|
||||
<li class="listitem">
|
||||
<p><b>%p. </b>Page index. Only significant for
|
||||
a subset of document types, currently only PDF,
|
||||
Postscript and DVI files. Can be used to start the
|
||||
editor at the right page for a match or
|
||||
snippet.</p>
|
||||
Postscript and DVI files. If it is set, a
|
||||
significant term will be chosen in the query, and
|
||||
%p will be substituted with the first page where
|
||||
the term appears. Can be used to start the editor
|
||||
at the right page for a match or snippet.</p>
|
||||
</li>
|
||||
<li class="listitem">
|
||||
<p><b>%l. </b>Line number. Only significant
|
||||
for document types with relevant line breaks,
|
||||
mostly text/plain and analogs. If it is set, a
|
||||
significant term will be chosen in the query, and
|
||||
%p will be substituted with the first line where
|
||||
the term appears.</p>
|
||||
</li>
|
||||
<li class="listitem">
|
||||
<p><b>%s. </b>Search term. The value will only
|
||||
be set for documents with indexed page numbers (ie:
|
||||
PDF). The value will be one of the matched search
|
||||
terms. It would allow pre-setting the value in the
|
||||
"Find" entry inside Evince for example, for easy
|
||||
highlighting of the term.</p>
|
||||
be set for documents with indexed page or line
|
||||
numbers and if %p or %l is also used. The value
|
||||
will be one of the matched search terms. It would
|
||||
allow pre-setting the value in the "Find" entry
|
||||
inside Evince for example, for easy highlighting of
|
||||
the term.</p>
|
||||
</li>
|
||||
<li class="listitem">
|
||||
<p><b>%u. </b>Url.</p>
|
||||
|
||||
@ -5,7 +5,7 @@
|
||||
|
||||
<!ENTITY RCL "<application>Recoll</application>">
|
||||
<!ENTITY RCLAPPS "<ulink url='http://www.recoll.org/pages/features.html#doctypes'>http://www.recoll.org/pages/features.html</ulink>">
|
||||
<!ENTITY RCLVERSION "1.29">
|
||||
<!ENTITY RCLVERSION "1.32">
|
||||
<!ENTITY XAP "<application>Xapian</application>">
|
||||
<!ENTITY WIN "<application>Windows</application>">
|
||||
<!ENTITY LIN "<application>Unix</application>-like systems">
|
||||
@ -7114,28 +7114,37 @@ other = rclcat:other
|
||||
(possibly a script) to be able to handle it.</para></formalpara>
|
||||
</listitem>
|
||||
|
||||
<listitem><formalpara><title>%M</title>
|
||||
<para>MIME type</para></formalpara>
|
||||
<listitem>
|
||||
<formalpara><title>%M</title><para>MIME type</para></formalpara>
|
||||
</listitem>
|
||||
|
||||
<listitem><formalpara><title>%p</title>
|
||||
<para>Page index. Only significant for a subset of document
|
||||
types, currently only PDF, Postscript and DVI files. Can be
|
||||
used to start the editor at the right page for a match or
|
||||
snippet.</para></formalpara>
|
||||
<listitem>
|
||||
<formalpara><title>%p</title><para>Page index. Only significant for a subset of
|
||||
document types, currently only PDF, Postscript and DVI files. If it is set, a
|
||||
significant term will be chosen in the query, and %p will be substituted with the
|
||||
first page where the term appears. Can be used to start the editor at the right page
|
||||
for a match or snippet.</para></formalpara>
|
||||
</listitem>
|
||||
|
||||
<listitem><formalpara><title>%s</title>
|
||||
<para>Search term. The value will only be set for documents
|
||||
with indexed page numbers (ie: PDF). The value will be one of
|
||||
the matched search terms. It would allow pre-setting the
|
||||
value in the "Find" entry inside Evince for example, for easy
|
||||
highlighting of the term.</para></formalpara>
|
||||
<listitem>
|
||||
<formalpara><title>%l</title><para>Line number. Only significant for document
|
||||
types with relevant line breaks, mostly text/plain and analogs. If it is set, a
|
||||
significant term will be chosen in the query, and %p will be substituted with the
|
||||
first line where the term appears.</para></formalpara>
|
||||
</listitem>
|
||||
|
||||
<listitem><formalpara><title>%u</title>
|
||||
<para>Url.</para></formalpara>
|
||||
<listitem>
|
||||
<formalpara><title>%s</title><para>Search term. The value will only be set for
|
||||
documents with indexed page or line numbers and if %p or %l is also used. The value
|
||||
will be one of the matched search terms. It would allow pre-setting the value in the
|
||||
"Find" entry inside Evince for example, for easy highlighting of the
|
||||
term.</para></formalpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<formalpara><title>%u</title><para>Url.</para></formalpara>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
|
||||
<para>In addition to the predefined values above, all strings like
|
||||
|
||||
@ -23,7 +23,7 @@ class OrgModeExtractor:
|
||||
iseof = rclexecm.RclExecM.noteof
|
||||
if self.currentindex >= len(self.docs) -1:
|
||||
iseof = rclexecm.RclExecM.eofnext
|
||||
self.em.setmimetype("text/plain")
|
||||
self.em.setmimetype("text/x-orgmode-sub")
|
||||
try:
|
||||
self.em.setfield("title", docdata.splitlines()[0])
|
||||
except:
|
||||
|
||||
@ -59,8 +59,9 @@ bool runWebFilesMoverScript(RclConfig *config)
|
||||
static string downloadsdir;
|
||||
if (downloadsdir.empty()) {
|
||||
if (!config->getConfParam("webdownloadsdir", downloadsdir)) {
|
||||
downloadsdir = path_tildexpand("~/Downloads");
|
||||
downloadsdir = "~/Downloads";
|
||||
}
|
||||
downloadsdir = path_tildexpand(downloadsdir);
|
||||
}
|
||||
vector<string> cmdvec;
|
||||
config->pythonCmd("recoll-we-move-files.py", cmdvec);
|
||||
|
||||
@ -304,3 +304,14 @@ def stringsToString(vs):
|
||||
out.append(s)
|
||||
return " ".join(out)
|
||||
|
||||
def valToBool(s):
|
||||
if not s:
|
||||
return False
|
||||
try:
|
||||
val = int(s)
|
||||
return val != 0
|
||||
except:
|
||||
pass
|
||||
if type(s) == type(b''):
|
||||
s = s.decode("UTF-8")
|
||||
return s[0] in "tTyY"
|
||||
|
||||
@ -50,7 +50,7 @@ public:
|
||||
|
||||
void startElement(const std::string &nm,
|
||||
const std::map<std::string, std::string>&) override {
|
||||
std::cerr << "startElement [" << nm << "]\n";
|
||||
//std::cerr << "startElement [" << nm << "]\n";
|
||||
currentText.clear();
|
||||
if (nm == "buttons") {
|
||||
radio = false;
|
||||
@ -67,7 +67,7 @@ public:
|
||||
}
|
||||
}
|
||||
void endElement(const std::string& nm) override {
|
||||
std::cerr << "endElement [" << nm << "]\n";
|
||||
//std::cerr << "endElement [" << nm << "]\n";
|
||||
|
||||
if (nm == "label") {
|
||||
label = u8s2qs(currentText);
|
||||
@ -102,7 +102,7 @@ public:
|
||||
}
|
||||
}
|
||||
void characterData(const std::string &str) override {
|
||||
std::cerr << "characterData [" << str << "]\n";
|
||||
//std::cerr << "characterData [" << str << "]\n";
|
||||
currentText += str;
|
||||
}
|
||||
|
||||
|
||||
@ -34,6 +34,7 @@
|
||||
#include "rclmain_w.h"
|
||||
#include "rclzg.h"
|
||||
#include "pathut.h"
|
||||
#include "unacpp.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -42,7 +43,6 @@ static const vector<string> browser_list{
|
||||
"opera", "google-chrome", "chromium-browser",
|
||||
"palemoon", "iceweasel", "firefox", "konqueror", "epiphany"};
|
||||
|
||||
|
||||
// Start native viewer or preview for input Doc. This is used to allow
|
||||
// using recoll from another app (e.g. Unity Scope) to view embedded
|
||||
// result docs (docs with an ipath). . We act as a proxy to extract
|
||||
@ -155,13 +155,27 @@ void RclMain::openWith(Rcl::Doc doc, string cmdspec)
|
||||
execViewer(subs, false, execname, lcmd, cmdspec, doc);
|
||||
}
|
||||
|
||||
void RclMain::startNativeViewer(Rcl::Doc doc, int pagenum, QString term)
|
||||
static bool pagenumNeeded(const std::string& cmd)
|
||||
{
|
||||
return cmd.find("%p") != std::string::npos;
|
||||
}
|
||||
static bool linenumNeeded(const std::string& cmd)
|
||||
{
|
||||
return cmd.find("%l") != std::string::npos;
|
||||
}
|
||||
static bool termNeeded(const std::string& cmd)
|
||||
{
|
||||
return cmd.find("%s") != std::string::npos;
|
||||
}
|
||||
|
||||
void RclMain::startNativeViewer(Rcl::Doc doc, int pagenum, QString qterm)
|
||||
{
|
||||
std::string term = qs2utf8s(qterm);
|
||||
string apptag;
|
||||
doc.getmeta(Rcl::Doc::keyapptg, &apptag);
|
||||
LOGDEB("RclMain::startNativeViewer: mtype [" << doc.mimetype <<
|
||||
"] apptag [" << apptag << "] page " << pagenum << " term [" <<
|
||||
qs2utf8s(term) << "] url [" << doc.url << "] ipath [" <<
|
||||
term << "] url [" << doc.url << "] ipath [" <<
|
||||
doc.ipath << "]\n");
|
||||
|
||||
// Look for appropriate viewer
|
||||
@ -377,19 +391,19 @@ void RclMain::startNativeViewer(Rcl::Doc doc, int pagenum, QString term)
|
||||
|
||||
// If we are not called with a page number (which would happen for a call
|
||||
// from the snippets window), see if we can compute a page number anyway.
|
||||
if (pagenum == -1) {
|
||||
pagenum = 1;
|
||||
string lterm;
|
||||
if (m_source)
|
||||
pagenum = m_source->getFirstMatchPage(doc, lterm);
|
||||
if (m_source && pagenum == -1 && (pagenumNeeded(cmd) || termNeeded(cmd)|| linenumNeeded(cmd))) {
|
||||
pagenum = m_source->getFirstMatchPage(doc, term);
|
||||
if (pagenum == -1)
|
||||
pagenum = 1;
|
||||
else // We get the match term used to compute the page
|
||||
term = QString::fromUtf8(lterm.c_str());
|
||||
}
|
||||
char cpagenum[20];
|
||||
sprintf(cpagenum, "%d", pagenum);
|
||||
|
||||
int line = 1;
|
||||
if (m_source && !term.empty() && linenumNeeded(cmd)) {
|
||||
if (doc.text.empty()) {
|
||||
rcldb->getDocRawText(doc);
|
||||
}
|
||||
line = m_source->getFirstMatchLine(doc, term);
|
||||
}
|
||||
|
||||
// Substitute %xx inside arguments
|
||||
string efftime;
|
||||
@ -408,9 +422,10 @@ void RclMain::startNativeViewer(Rcl::Doc doc, int pagenum, QString term)
|
||||
subs["f"] = fn;
|
||||
subs["F"] = fn;
|
||||
subs["i"] = FileInterner::getLastIpathElt(doc.ipath);
|
||||
subs["l"] = ulltodecstr(line);
|
||||
subs["M"] = doc.mimetype;
|
||||
subs["p"] = cpagenum;
|
||||
subs["s"] = (const char*)term.toLocal8Bit();
|
||||
subs["p"] = ulltodecstr(pagenum);
|
||||
subs["s"] = term;
|
||||
subs["U"] = url_encode(url);
|
||||
subs["u"] = url;
|
||||
// Let %(xx) access all metadata.
|
||||
|
||||
@ -140,8 +140,7 @@ public slots:
|
||||
virtual void showActionsSearch();
|
||||
virtual void startPreview(int docnum, Rcl::Doc doc, int keymods);
|
||||
virtual void startPreview(Rcl::Doc);
|
||||
virtual void startNativeViewer(Rcl::Doc, int pagenum = -1,
|
||||
QString term = QString());
|
||||
virtual void startNativeViewer(Rcl::Doc, int pagenum = -1, QString term = QString());
|
||||
virtual void openWith(Rcl::Doc, string);
|
||||
virtual void saveDocToFile(Rcl::Doc);
|
||||
virtual void previewNextInTab(Preview *, int sid, int docnum);
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2012 J.F.Dockes
|
||||
/* Copyright (C) 2012-2021 J.F.Dockes
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
@ -67,8 +67,7 @@ using namespace std;
|
||||
class PlainToRichQtSnippets : public PlainToRich {
|
||||
public:
|
||||
virtual string startMatch(unsigned int) {
|
||||
return string("<span class='rclmatch' style='")
|
||||
+ qs2utf8s(prefs.qtermstyle) + string("'>");
|
||||
return string("<span class='rclmatch' style='") + qs2utf8s(prefs.qtermstyle) + string("'>");
|
||||
}
|
||||
virtual string endMatch() {
|
||||
return string("</span>");
|
||||
@ -82,12 +81,10 @@ void SnippetsW::init()
|
||||
QPushButton *searchButton = new QPushButton(tr("Search"));
|
||||
searchButton->setAutoDefault(false);
|
||||
buttonBox->addButton(searchButton, QDialogButtonBox::ActionRole);
|
||||
// setWindowFlags(Qt::WindowStaysOnTopHint);
|
||||
searchFM->hide();
|
||||
|
||||
onNewShortcuts();
|
||||
connect(&SCBase::scBase(), SIGNAL(shortcutsChanged()),
|
||||
this, SLOT(onNewShortcuts()));
|
||||
connect(&SCBase::scBase(), SIGNAL(shortcutsChanged()), this, SLOT(onNewShortcuts()));
|
||||
|
||||
QPushButton *closeButton = buttonBox->button(QDialogButtonBox::Close);
|
||||
if (closeButton)
|
||||
@ -105,11 +102,9 @@ void SnippetsW::init()
|
||||
browserw = new QWebView(this);
|
||||
verticalLayout->insertWidget(0, browserw);
|
||||
browser->setUrl(QUrl(QString::fromUtf8("about:blank")));
|
||||
connect(browser, SIGNAL(linkClicked(const QUrl &)),
|
||||
this, SLOT(onLinkClicked(const QUrl &)));
|
||||
connect(browser, SIGNAL(linkClicked(const QUrl &)), this, SLOT(onLinkClicked(const QUrl &)));
|
||||
browser->page()->setLinkDelegationPolicy(QWebPage::DelegateAllLinks);
|
||||
browser->page()->currentFrame()->setScrollBarPolicy(Qt::Horizontal,
|
||||
Qt::ScrollBarAlwaysOff);
|
||||
browser->page()->currentFrame()->setScrollBarPolicy(Qt::Horizontal, Qt::ScrollBarAlwaysOff);
|
||||
QWEBSETTINGS *ws = browser->page()->settings();
|
||||
if (prefs.reslistfontfamily != "") {
|
||||
ws->setFontFamily(QWEBSETTINGS::StandardFont, prefs.reslistfontfamily);
|
||||
@ -136,8 +131,7 @@ void SnippetsW::init()
|
||||
#else
|
||||
browserw = new QTextBrowser(this);
|
||||
verticalLayout->insertWidget(0, browserw);
|
||||
connect(browser, SIGNAL(anchorClicked(const QUrl &)),
|
||||
this, SLOT(onLinkClicked(const QUrl &)));
|
||||
connect(browser, SIGNAL(anchorClicked(const QUrl &)), this, SLOT(onLinkClicked(const QUrl &)));
|
||||
browser->setReadOnly(true);
|
||||
browser->setUndoRedoEnabled(false);
|
||||
browser->setOpenLinks(false);
|
||||
@ -183,8 +177,7 @@ void SnippetsW::createPopupMenu(const QPoint& pos)
|
||||
{
|
||||
QMenu *popup = new QMenu(this);
|
||||
if (m_sortingByPage) {
|
||||
popup->addAction(tr("Sort By Relevance"), this,
|
||||
SLOT(reloadByRelevance()));
|
||||
popup->addAction(tr("Sort By Relevance"), this, SLOT(reloadByRelevance()));
|
||||
} else {
|
||||
popup->addAction(tr("Sort By Page"), this, SLOT(reloadByPage()));
|
||||
}
|
||||
@ -230,29 +223,22 @@ void SnippetsW::onSetDoc(Rcl::Doc doc, std::shared_ptr<DocSequence> source)
|
||||
source->getTerms(hdata);
|
||||
|
||||
ostringstream oss;
|
||||
oss <<
|
||||
"<html><head>"
|
||||
"<meta http-equiv=\"content-type\" "
|
||||
"content=\"text/html; charset=utf-8\">";
|
||||
oss << "<html><head>"
|
||||
"<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">";
|
||||
|
||||
oss << "<style type=\"text/css\">\nbody,table,select,input {\n";
|
||||
oss << "color: " + qs2utf8s(prefs.fontcolor) + ";\n";
|
||||
oss << "}\n</style>\n";
|
||||
oss << qs2utf8s(prefs.darkreslistheadertext) << qs2utf8s(prefs.reslistheadertext);
|
||||
|
||||
oss <<
|
||||
"</head>"
|
||||
"<body>"
|
||||
"<table class=\"snippets\">"
|
||||
;
|
||||
oss << "</head><body><table class=\"snippets\">";
|
||||
|
||||
g_hiliter.set_inputhtml(false);
|
||||
bool nomatch = true;
|
||||
|
||||
for (const auto& snippet : vpabs) {
|
||||
if (snippet.page == -1) {
|
||||
oss << "<tr><td colspan=\"2\">" <<
|
||||
snippet.snippet << "</td></tr>" << endl;
|
||||
oss << "<tr><td colspan=\"2\">" << snippet.snippet << "</td></tr>" << "\n";
|
||||
continue;
|
||||
}
|
||||
list<string> lr;
|
||||
@ -263,13 +249,12 @@ void SnippetsW::onSetDoc(Rcl::Doc doc, std::shared_ptr<DocSequence> source)
|
||||
nomatch = false;
|
||||
oss << "<tr><td>";
|
||||
if (snippet.page > 0) {
|
||||
oss << "<a href=\"http://h/P" << snippet.page << "T" <<
|
||||
snippet.term << "\">"
|
||||
<< "P. " << snippet.page << "</a>";
|
||||
oss << "<a href=\"http://h/P" << snippet.page << "T" << snippet.term << "\">" <<
|
||||
"P. " << snippet.page << "</a>";
|
||||
}
|
||||
oss << "</td><td>" << lr.front().c_str() << "</td></tr>" << endl;
|
||||
oss << "</td><td>" << lr.front().c_str() << "</td></tr>" << "\n";
|
||||
}
|
||||
oss << "</table>" << endl;
|
||||
oss << "</table>" << "\n";
|
||||
if (nomatch) {
|
||||
oss.str("<html><head></head><body>\n");
|
||||
oss << qs2utf8s(tr("<p>Sorry, no exact match was found within limits. "
|
||||
@ -278,12 +263,12 @@ void SnippetsW::onSetDoc(Rcl::Doc doc, std::shared_ptr<DocSequence> source)
|
||||
}
|
||||
oss << "\n</body></html>";
|
||||
#if defined(USING_WEBKIT) || defined(USING_WEBENGINE)
|
||||
browser->setHtml(QString::fromUtf8(oss.str().c_str()));
|
||||
browser->setHtml(u8s2qs(oss.str()));
|
||||
#else
|
||||
browser->clear();
|
||||
browser->append(".");
|
||||
browser->clear();
|
||||
browser->insertHtml(QString::fromUtf8(oss.str().c_str()));
|
||||
browser->insertHtml(u8s2qs(oss.str()));
|
||||
browser->moveCursor (QTextCursor::Start);
|
||||
browser->ensureCursorVisible();
|
||||
#endif
|
||||
@ -354,8 +339,7 @@ void SnippetsW::onLinkClicked(const QUrl &url)
|
||||
string term;
|
||||
if (termpos != string::npos)
|
||||
term = ascurl.substr(termpos+1);
|
||||
emit startNativeViewer(m_doc, page,
|
||||
QString::fromUtf8(term.c_str()));
|
||||
emit startNativeViewer(m_doc, page, u8s2qs(term));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
@ -111,6 +111,9 @@ public:
|
||||
virtual int getFirstMatchPage(Rcl::Doc&, std::string&) {
|
||||
return -1;
|
||||
}
|
||||
virtual int getFirstMatchLine(const Rcl::Doc&, const std::string&) {
|
||||
return 1;
|
||||
}
|
||||
/** Get duplicates. */
|
||||
virtual bool docDups(const Rcl::Doc&, std::vector<Rcl::Doc>&) {
|
||||
return false;
|
||||
|
||||
@ -126,6 +126,17 @@ int DocSequenceDb::getFirstMatchPage(Rcl::Doc &doc, string& term)
|
||||
return -1;
|
||||
}
|
||||
|
||||
int DocSequenceDb::getFirstMatchLine(const Rcl::Doc &doc, const string& term)
|
||||
{
|
||||
std::unique_lock<std::mutex> locker(o_dblock);
|
||||
if (!setQuery())
|
||||
return false;
|
||||
if (m_q->whatDb()) {
|
||||
return m_q->getFirstMatchLine(doc, term);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
list<string> DocSequenceDb::expand(Rcl::Doc &doc)
|
||||
{
|
||||
std::unique_lock<std::mutex> locker(o_dblock);
|
||||
|
||||
@ -43,6 +43,7 @@ public:
|
||||
|
||||
virtual bool getAbstract(Rcl::Doc &doc, std::vector<std::string>&) override;
|
||||
virtual int getFirstMatchPage(Rcl::Doc&, std::string& term) override;
|
||||
virtual int getFirstMatchLine(const Rcl::Doc&, const std::string& term) override;
|
||||
virtual bool docDups(const Rcl::Doc& doc, std::vector<Rcl::Doc>& dups)
|
||||
override;
|
||||
virtual std::string getDescription() override;
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2004 J.F.Dockes
|
||||
/* Copyright (C) 2004-2021 J.F.Dockes
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
@ -60,8 +60,7 @@ public:
|
||||
* @param in raw text out of internfile.
|
||||
* @param out rich text output, divided in chunks (to help our caller
|
||||
* avoid inserting half tags into textedit which doesnt like it)
|
||||
* @param in hdata terms and groups to be highlighted. These are
|
||||
* lowercase and unaccented.
|
||||
* @param in hdata terms and groups to be highlighted. See utils/hldata.h
|
||||
* @param chunksize max size of chunks in output list
|
||||
*/
|
||||
virtual bool plaintorich(const std::string &in, std::list<std::string> &out,
|
||||
|
||||
@ -141,11 +141,9 @@ public:
|
||||
// add/update fragment definition.
|
||||
virtual bool takeword(const std::string& term, int pos, int bts, int bte) {
|
||||
LOGDEB1("takeword: [" << term << "] bytepos: "<<bts<<":"<<bte<<endl);
|
||||
// Limit time taken with monster documents. The resulting
|
||||
// abstract will be incorrect or inexistent, but this is
|
||||
// better than taking forever (the default cutoff value comes
|
||||
// from the snippetMaxPosWalk configuration parameter, and is
|
||||
// 10E6)
|
||||
// Limit time taken with monster documents. The resulting abstract will be incorrect or
|
||||
// inexistent, but this is better than taking forever (the default cutoff value comes from
|
||||
// the snippetMaxPosWalk configuration parameter, and is 10E6)
|
||||
if (maxtermcount && termcount++ > maxtermcount) {
|
||||
LOGINF("Rclabsfromtext: stopping because maxtermcount reached: "<<
|
||||
maxtermcount << endl);
|
||||
@ -154,8 +152,7 @@ public:
|
||||
}
|
||||
// Also limit the number of fragments (just in case safety)
|
||||
if (m_fragments.size() > maxtermcount / 100) {
|
||||
LOGINF("Rclabsfromtext: stopping because maxfragments reached: "<<
|
||||
maxtermcount/100 << endl);
|
||||
LOGINF("Rclabsfromtext: stopping: max fragments count: " << maxtermcount/100 << "\n");
|
||||
retflags |= ABSRES_TRUNC;
|
||||
return false;
|
||||
}
|
||||
@ -193,8 +190,7 @@ public:
|
||||
m_curterm = term;
|
||||
m_curtermcoef = coef;
|
||||
} else {
|
||||
LOGDEB2("Extending current fragment: " << m_remainingWords <<
|
||||
" -> " << m_ctxwords << endl);
|
||||
LOGDEB2("Extending current fragment: "<<m_remainingWords<<" -> "<<m_ctxwords<< "\n");
|
||||
m_extcount++;
|
||||
#ifdef COMPUTE_HLZONES
|
||||
if (m_prevwordhit) {
|
||||
@ -215,9 +211,8 @@ public:
|
||||
m_curfragcoef += coef;
|
||||
m_remainingWords = m_ctxwords + 1;
|
||||
if (m_extcount > 5) {
|
||||
// Limit expansion of contiguous fragments (this is to
|
||||
// avoid common terms in search causing long
|
||||
// heavyweight meaningless fragments. Also, limit length).
|
||||
// Limit expansion of contiguous fragments (this is to avoid common terms in search
|
||||
// causing long heavyweight meaningless fragments. Also, limit length).
|
||||
m_remainingWords = 1;
|
||||
m_extcount = 0;
|
||||
}
|
||||
@ -247,18 +242,14 @@ public:
|
||||
LOGDEB1("FRAGMENT: from byte " << m_curfrag.first <<
|
||||
" to byte " << m_curfrag.second << endl);
|
||||
LOGDEB1("FRAGMENT TEXT [" << m_rawtext.substr(
|
||||
m_curfrag.first, m_curfrag.second-m_curfrag.first)
|
||||
<< "]\n");
|
||||
// We used to not push weak fragments if we had a lot
|
||||
// already. This can cause problems if the fragments
|
||||
// we drop are actually group fragments (which have
|
||||
// not got their boost yet). The right cut value is
|
||||
// difficult to determine, because the absolute values
|
||||
// of the coefs depend on many things (index size,
|
||||
// etc.) The old test was if (m_totalcoef < 5.0 ||
|
||||
// m_curfragcoef >= 1.0) We now just avoid creating a
|
||||
// monster by testing the current fragments count at
|
||||
// the top of the function
|
||||
m_curfrag.first, m_curfrag.second-m_curfrag.first) << "]\n");
|
||||
// We used to not push weak fragments if we had a lot already. This can cause
|
||||
// problems if the fragments we drop are actually group fragments (which have not
|
||||
// got their boost yet). The right cut value is difficult to determine, because the
|
||||
// absolute values of the coefs depend on many things (index size, etc.) The old
|
||||
// test was if (m_totalcoef < 5.0 || m_curfragcoef >= 1.0) We now just avoid
|
||||
// creating a monster by testing the current fragments count at the top of the
|
||||
// function
|
||||
m_fragments.push_back(MatchFragment(m_curfrag.first,
|
||||
m_curfrag.second,
|
||||
m_curfragcoef,
|
||||
@ -298,8 +289,7 @@ public:
|
||||
m_curtermcoef = 0.0;
|
||||
}
|
||||
|
||||
LOGDEB("TextSplitABS: stored total " << m_fragments.size() <<
|
||||
" fragments" << endl);
|
||||
LOGDEB("TextSplitABS: stored total " << m_fragments.size() << " fragments" << endl);
|
||||
vector<GroupMatchEntry> tboffs;
|
||||
|
||||
// Look for matches to PHRASE and NEAR term groups and finalize
|
||||
@ -340,9 +330,8 @@ public:
|
||||
}
|
||||
auto fragit = m_fragments.begin();
|
||||
for (const auto& grpmatch : tboffs) {
|
||||
LOGDEB2("LOOKING FOR FRAGMENT: group: " << grpmatch.offs.first <<
|
||||
"-" << grpmatch.offs.second << " curfrag " <<
|
||||
fragit->start << "-" << fragit->stop << endl);
|
||||
LOGDEB2("LOOKING FOR FRAGMENT: group: " << grpmatch.offs.first << "-" <<
|
||||
grpmatch.offs.second<<" curfrag "<<fragit->start<<"-"<<fragit->stop<<"\n");
|
||||
while (fragit->stop < grpmatch.offs.first) {
|
||||
fragit++;
|
||||
if (fragit == m_fragments.end()) {
|
||||
@ -417,21 +406,19 @@ int Query::Native::abstractFromText(
|
||||
bool sortbypage
|
||||
)
|
||||
{
|
||||
(void)chron;
|
||||
PRETEND_USE(chron);
|
||||
LOGABS("abstractFromText: entry: " << chron.millis() << "mS\n");
|
||||
string rawtext;
|
||||
if (!ndb->getRawText(docid, rawtext)) {
|
||||
LOGDEB0("abstractFromText: can't fetch text\n");
|
||||
return ABSRES_ERROR;
|
||||
}
|
||||
LOGABS("abstractFromText: got raw text: size " << rawtext.size() << " " <<
|
||||
chron.millis() << "mS\n");
|
||||
LOGABS("abstractFromText: got raw text: size "<<rawtext.size()<<" "<<chron.millis()<<"mS\n");
|
||||
|
||||
#if 0 && ! (XAPIAN_MAJOR_VERSION <= 1 && XAPIAN_MINOR_VERSION <= 2) && \
|
||||
(defined(RAWTEXT_IN_DATA))
|
||||
#if 0 && XAPIAN_AT_LEAST(1,3,5)
|
||||
// Tryout the Xapian internal method.
|
||||
string snippet = xmset.snippet(rawtext);
|
||||
LOGDEB("SNIPPET: [" << snippet << "] END SNIPPET\n");
|
||||
string snippet = xmset.snippet(rawtext, 60);
|
||||
std::cerr << "XAPIAN SNIPPET: [" << snippet << "] END SNIPPET\n";
|
||||
#endif
|
||||
|
||||
// We need the q coefs for individual terms
|
||||
@ -452,8 +439,7 @@ int Query::Native::abstractFromText(
|
||||
}
|
||||
LOGABS("abstractFromText: getterms: " << chron.millis() << "mS\n");
|
||||
|
||||
TextSplitABS splitter(rawtext, matchTerms, hld, wordcoefs, ctxwords,
|
||||
TextSplit::TXTS_NONE,
|
||||
TextSplitABS splitter(rawtext, matchTerms, hld, wordcoefs, ctxwords, TextSplit::TXTS_NONE,
|
||||
m_q->m_snipMaxPosWalk);
|
||||
splitter.text_to_words(rawtext);
|
||||
LOGABS("abstractFromText: text_to_words: " << chron.millis() << "mS\n");
|
||||
@ -484,8 +470,7 @@ int Query::Native::abstractFromText(
|
||||
// main term and the page positions.
|
||||
unsigned int count = 0;
|
||||
for (const auto& entry : result) {
|
||||
string frag(
|
||||
fixfrag(rawtext.substr(entry.start, entry.stop - entry.start)));
|
||||
string frag(fixfrag(rawtext.substr(entry.start, entry.stop - entry.start)));
|
||||
|
||||
#ifdef COMPUTE_HLZONES
|
||||
// This would need to be modified to take tag parameters
|
||||
@ -506,8 +491,7 @@ int Query::Native::abstractFromText(
|
||||
if (page < 0)
|
||||
page = 0;
|
||||
}
|
||||
LOGDEB0("=== FRAGMENT: p. " << page << " Coef: " << entry.coef <<
|
||||
": " << frag << endl);
|
||||
LOGDEB0("=== FRAGMENT: p. " << page << " Coef: " << entry.coef << ": " << frag << endl);
|
||||
vabs.push_back(Snippet(page, frag).setTerm(entry.term));
|
||||
if (count++ >= maxtotaloccs)
|
||||
break;
|
||||
@ -515,4 +499,45 @@ int Query::Native::abstractFromText(
|
||||
return ABSRES_OK | splitter.getretflags();
|
||||
}
|
||||
|
||||
class TermLineSplitter : public TextSplit {
|
||||
public:
|
||||
TermLineSplitter(const std::string& term)
|
||||
: TextSplit(TextSplit::TXTS_NOSPANS), m_term(term) {
|
||||
}
|
||||
bool takeword(const std::string& _term, int, int, int) override {
|
||||
std::string term;
|
||||
if (o_index_stripchars) {
|
||||
if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) {
|
||||
LOGINFO("PlainToRich::takeword: unac failed for [" << term << "]\n");
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (term == m_term) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
void newline(int) override {
|
||||
m_line++;
|
||||
}
|
||||
int getline() {
|
||||
return m_line;
|
||||
}
|
||||
private:
|
||||
int m_line{1};
|
||||
std::string m_term;
|
||||
};
|
||||
|
||||
int Query::getFirstMatchLine(const Doc &doc, const std::string& term)
|
||||
{
|
||||
int line = 1;
|
||||
TermLineSplitter splitter(term);
|
||||
bool ret = splitter.text_to_words(doc.text);
|
||||
// The splitter takeword() breaks by returning false as soon as the term is found
|
||||
if (ret == false) {
|
||||
line = splitter.getline();
|
||||
}
|
||||
return line;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -254,7 +254,7 @@ double Query::Native::qualityTerms(Xapian::docid docid,
|
||||
}
|
||||
|
||||
|
||||
// Return page number for first match of "significant" term.
|
||||
// Choose most interesting term and return the page number for its first match
|
||||
int Query::Native::getFirstMatchPage(Xapian::docid docid, string& term)
|
||||
{
|
||||
LOGDEB("Query::Native::getFirstMatchPage\n");
|
||||
@ -286,9 +286,7 @@ int Query::Native::getFirstMatchPage(Xapian::docid docid, string& term)
|
||||
qualityTerms(docid, terms, byQ);
|
||||
|
||||
for (auto mit = byQ.rbegin(); mit != byQ.rend(); mit++) {
|
||||
for (vector<string>::const_iterator qit = mit->second.begin();
|
||||
qit != mit->second.end(); qit++) {
|
||||
string qterm = *qit;
|
||||
for (const auto& qterm : mit->second) {
|
||||
Xapian::PositionIterator pos;
|
||||
string emptys;
|
||||
try {
|
||||
@ -619,9 +617,8 @@ int Query::Native::abstractFromIndex(
|
||||
// possibly retried by our caller.
|
||||
//
|
||||
// @param[out] vabs the abstract is returned as a vector of snippets.
|
||||
int Query::Native::makeAbstract(Xapian::docid docid,
|
||||
vector<Snippet>& vabs,
|
||||
int imaxoccs, int ictxwords, bool sortbypage)
|
||||
int Query::Native::makeAbstract(
|
||||
Xapian::docid docid, vector<Snippet>& vabs, int imaxoccs, int ictxwords, bool sortbypage)
|
||||
{
|
||||
chron.restart();
|
||||
LOGDEB("makeAbstract: docid " << docid << " imaxoccs " <<
|
||||
|
||||
@ -96,10 +96,13 @@ const string pathelt_prefix = "XP";
|
||||
static const string udi_prefix("Q");
|
||||
static const string parent_prefix("F");
|
||||
|
||||
// Special terms to mark begin/end of field (for anchored searches), and
|
||||
// page breaks
|
||||
// Special terms to mark begin/end of field (for anchored searches).
|
||||
string start_of_field_term;
|
||||
string end_of_field_term;
|
||||
|
||||
// Special term for page breaks. Note that we use a complicated mechanism for multiple page
|
||||
// breaks at the same position, when it would have been probably simpler to use XXPG/n terms
|
||||
// instead (did not try to implement though). A change would force users to reindex.
|
||||
const string page_break_term = "XXPG/";
|
||||
|
||||
// Special term to mark documents with children.
|
||||
@ -1846,16 +1849,14 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
|
||||
}
|
||||
}
|
||||
|
||||
// If empty pages (multiple break at same pos) were recorded, save
|
||||
// them (this is because we have no way to record them in the
|
||||
// Xapian list
|
||||
// If empty pages (multiple break at same pos) were recorded, save them (this is
|
||||
// because we have no way to record them in the Xapian list)
|
||||
if (!tpidx.m_pageincrvec.empty()) {
|
||||
ostringstream multibreaks;
|
||||
for (unsigned int i = 0; i < tpidx.m_pageincrvec.size(); i++) {
|
||||
if (i != 0)
|
||||
multibreaks << ",";
|
||||
multibreaks << tpidx.m_pageincrvec[i].first << "," <<
|
||||
tpidx.m_pageincrvec[i].second;
|
||||
multibreaks << tpidx.m_pageincrvec[i].first << "," << tpidx.m_pageincrvec[i].second;
|
||||
}
|
||||
RECORD_APPEND(record, string(cstr_mbreaks), multibreaks.str());
|
||||
}
|
||||
|
||||
@ -360,7 +360,6 @@ int Query::getFirstMatchPage(const Doc &doc, string& term)
|
||||
return m_reason.empty() ? pagenum : -1;
|
||||
}
|
||||
|
||||
|
||||
// Mset size
|
||||
// Note: times for retrieving (multiple times)all docs from a sample
|
||||
// 25k docs db (q: mime:*)
|
||||
@ -511,8 +510,7 @@ vector<string> Query::expand(const Doc &doc)
|
||||
Xapian::ESet eset = m_nq->xenquire->get_eset(20, rset, false);
|
||||
LOGDEB("ESet terms:\n");
|
||||
// We filter out the special terms
|
||||
for (Xapian::ESetIterator it = eset.begin();
|
||||
it != eset.end(); it++) {
|
||||
for (Xapian::ESetIterator it = eset.begin(); it != eset.end(); it++) {
|
||||
LOGDEB(" [" << (*it) << "]\n");
|
||||
if ((*it).empty() || has_prefix(*it))
|
||||
continue;
|
||||
|
||||
@ -115,10 +115,18 @@ public:
|
||||
// Returned as a vector of pair<page,snippet> page is 0 if unknown
|
||||
int makeDocAbstract(const Doc &doc, std::vector<Snippet>& abst,
|
||||
int maxoccs= -1, int ctxwords= -1,bool sortbypage=false);
|
||||
/** Retrieve page number for first match for "significant" query term
|
||||
* @param term returns the chosen term */
|
||||
|
||||
/** Choose most interesting term and return the page number for its first match
|
||||
* @param term returns the chosen term
|
||||
* @return page number or -1 if term not found or other issue
|
||||
*/
|
||||
int getFirstMatchPage(const Doc &doc, std::string& term);
|
||||
|
||||
/** Compute line number for first match of term. Only works if doc.text has text.
|
||||
* This uses a text split. Both this and the above getFirstMaxPage() could be done and saved
|
||||
* while we compute the abstracts, quite a lot of waste here. */
|
||||
int getFirstMatchLine(const Doc &doc, const std::string& term);
|
||||
|
||||
/** Retrieve a reference to the searchData we are using */
|
||||
std::shared_ptr<SearchData> getSD() {
|
||||
return m_sd;
|
||||
|
||||
@ -225,6 +225,7 @@ text/x-lua = internal
|
||||
text/x-mail = internal
|
||||
text/x-man = exec rclman;maxseconds=30
|
||||
text/x-orgmode = execm rclorgmode.py
|
||||
text/x-orgmode-sub = internal text/plain
|
||||
text/x-perl = internal text/plain
|
||||
text/x-purple-html-log = internal text/html
|
||||
text/x-purple-log = exec rclpurple
|
||||
@ -359,6 +360,7 @@ text/x-java = source
|
||||
text/x-lua = source
|
||||
text/x-mail = message
|
||||
text/x-man = document
|
||||
text/x-orgmode = document
|
||||
text/x-perl = source
|
||||
text/x-php = source
|
||||
text/x-purple-html-log = pidgin
|
||||
@ -443,6 +445,7 @@ text = \
|
||||
text/x-ini \
|
||||
text/x-java \
|
||||
text/x-man \
|
||||
text/x-orgmode \
|
||||
text/x-perl \
|
||||
text/x-php \
|
||||
text/x-python \
|
||||
|
||||
@ -214,12 +214,13 @@ nomd5types = rclaudio
|
||||
# files.</brief><descr>We need to decompress these in a
|
||||
# temporary directory for identification, which can be wasteful in some
|
||||
# cases. Limit the waste. Negative means no limit. 0 results in no
|
||||
# processing of any compressed file. Default 50 MB.</descr></var>
|
||||
# processing of any compressed file. Default 100 MB.</descr></var>
|
||||
compressedfilemaxkbs = 100000
|
||||
|
||||
# <var name="textfilemaxmbs" type="int"><brief>Size limit for text
|
||||
# files.</brief><descr>Mostly for skipping monster
|
||||
# logs. Default 20 MB.</descr></var>
|
||||
# <var name="textfilemaxmbs" type="int">
|
||||
# <brief>Size limit for text files.</brief>
|
||||
# <descr>Mostly for skipping monster logs. Default 20 MB. Use a value of -1 to
|
||||
# disable.</descr></var>
|
||||
textfilemaxmbs = 20
|
||||
|
||||
# <var name="indexallfilenames" type="bool"><brief>Index the file names of
|
||||
|
||||
@ -619,11 +619,17 @@ static const string& thumbnailsdir()
|
||||
return thumbnailsd;
|
||||
}
|
||||
|
||||
// Place for 1024x1024 files
|
||||
static const string thmbdirxxlarge = "xx-large";
|
||||
// Place for 512x512 files
|
||||
static const string thmbdirxlarge = "x-large";
|
||||
// Place for 256x256 files
|
||||
static const string thmbdirlarge = "large";
|
||||
// 128x128
|
||||
static const string thmbdirnormal = "normal";
|
||||
|
||||
static const vector<string> thmbdirs{thmbdirxxlarge, thmbdirxlarge, thmbdirlarge, thmbdirnormal};
|
||||
|
||||
static void thumbname(const string& url, string& name)
|
||||
{
|
||||
string digest;
|
||||
@ -635,26 +641,47 @@ static void thumbname(const string& url, string& name)
|
||||
|
||||
bool thumbPathForUrl(const string& url, int size, string& path)
|
||||
{
|
||||
string name;
|
||||
string name, path128, path256, path512, path1024;
|
||||
thumbname(url, name);
|
||||
if (size <= 128) {
|
||||
path = path_cat(thumbnailsdir(), thmbdirnormal);
|
||||
path = path_cat(path, name);
|
||||
path128 = path;
|
||||
} else if (size <= 256) {
|
||||
path = path_cat(thumbnailsdir(), thmbdirlarge);
|
||||
path = path_cat(path, name);
|
||||
path256 = path;
|
||||
} else if (size <= 512) {
|
||||
path = path_cat(thumbnailsdir(), thmbdirxlarge);
|
||||
path = path_cat(path, name);
|
||||
path512 = path;
|
||||
} else {
|
||||
path = path_cat(thumbnailsdir(), thmbdirxxlarge);
|
||||
path = path_cat(path, name);
|
||||
path1024 = path;
|
||||
}
|
||||
if (access(path.c_str(), R_OK) == 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Not found in requested size. Try to find any size and return it. Let the client scale.
|
||||
for (const auto& tdir : thmbdirs) {
|
||||
path = path_cat(thumbnailsdir(), tdir);
|
||||
path = path_cat(path, name);
|
||||
if (access(path.c_str(), R_OK) == 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
path = path_cat(thumbnailsdir(), thmbdirlarge);
|
||||
path = path_cat(path, name);
|
||||
if (access(path.c_str(), R_OK) == 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// File does not exist. Path corresponds to the large version at this point,
|
||||
// fix it if needed.
|
||||
// File does not exist. Return appropriate path anyway.
|
||||
if (size <= 128) {
|
||||
path = path_cat(path_home(), thmbdirnormal);
|
||||
path = path_cat(path, name);
|
||||
path = path128;
|
||||
} else if (size <= 256) {
|
||||
path = path256;
|
||||
} else if (size <= 512) {
|
||||
path = path512;
|
||||
} else {
|
||||
path = path1024;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -318,7 +318,7 @@ template <class T> void stringsToCSV(const T& tokens, string& s, char sep)
|
||||
s.append(1, sep);
|
||||
}
|
||||
// Remove last separator.
|
||||
if (s.size())
|
||||
if (!s.empty())
|
||||
s.pop_back();
|
||||
}
|
||||
|
||||
@ -951,7 +951,7 @@ bool parsedateinterval(const string& s, DateInterval *dip)
|
||||
return false;
|
||||
}
|
||||
|
||||
vector<string>::const_iterator it = vs.begin();
|
||||
auto it = vs.cbegin();
|
||||
if (*it == "P" || *it == "p") {
|
||||
it++;
|
||||
if (!parseperiod(it, vs.end(), &p1)) {
|
||||
@ -1221,7 +1221,7 @@ std::string SimpleRegexp::simpleSub(
|
||||
const std::string& in, const std::string& repl)
|
||||
{
|
||||
if (!ok()) {
|
||||
return std::string();
|
||||
return {};
|
||||
}
|
||||
|
||||
int err;
|
||||
@ -1256,7 +1256,7 @@ bool SimpleRegexp::simpleMatch(const string& val) const
|
||||
string SimpleRegexp::getMatch(const string& val, int i) const
|
||||
{
|
||||
if (i > m->nmatch) {
|
||||
return string();
|
||||
return {};
|
||||
}
|
||||
return val.substr(m->matches[i].rm_so,
|
||||
m->matches[i].rm_eo - m->matches[i].rm_so);
|
||||
|
||||
@ -187,6 +187,7 @@ text/x-csv = internal text/plain
|
||||
text/x-fictionbook = internal xsltproc fb2.xsl
|
||||
text/x-ini = internal text/plain
|
||||
text/x-mail = internal
|
||||
text/x-orgmode = execm python rclorgmode.py
|
||||
text/x-perl = internal text/plain
|
||||
text/x-python = execm python rclpython.py
|
||||
text/x-shellscript = internal text/plain
|
||||
@ -291,6 +292,7 @@ text/x-html-sidux-man = sidux-book
|
||||
text/x-ini = txt
|
||||
text/x-mail = message
|
||||
text/x-man = document
|
||||
text/x-orgmode = document
|
||||
text/x-perl = source
|
||||
text/x-purple-html-log = pidgin
|
||||
text/x-purple-log = pidgin
|
||||
@ -359,6 +361,7 @@ text = \
|
||||
text/x-html-sidux-man \
|
||||
text/x-ini \
|
||||
text/x-man \
|
||||
text/x-orgmode \
|
||||
text/x-perl \
|
||||
text/x-python \
|
||||
text/x-shellscript
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user