doc
This commit is contained in:
parent
ad89225b24
commit
3ebf1a7db2
@ -17,8 +17,9 @@ XSLDIR="/usr/share/xml/docbook/stylesheet/docbook-xsl/"
|
||||
|
||||
# Options common to the single-file and chunked versions
|
||||
commonoptions=--stringparam section.autolabel 1 \
|
||||
--stringparam section.autolabel.max.depth 3 \
|
||||
--stringparam section.autolabel.max.depth 2 \
|
||||
--stringparam section.label.includes.component.label 1 \
|
||||
--stringparam toc.max.depth 3 \
|
||||
--stringparam autotoc.label.in.hyperlink 0 \
|
||||
--stringparam abstract.notitle.enabled 1 \
|
||||
--stringparam html.stylesheet docbook-xsl.css \
|
||||
|
||||
@ -1429,7 +1429,7 @@ alink="#0000FF">
|
||||
other constraints. Most of the relevant parameters are
|
||||
described in the <a class="link" href=
|
||||
"#RCL.INSTALL.CONFIG.RECOLLCONF.TERMS" title=
|
||||
"6.4.2.2. Parameters affecting how we generate terms and organize the index">
|
||||
"Parameters affecting how we generate terms and organize the index">
|
||||
linked section</a>.</p>
|
||||
<p>The different search interfaces (GUI, command line,
|
||||
...) have different methods to define the set of indexes
|
||||
@ -2362,7 +2362,7 @@ recoll -c <em class=
|
||||
"varname">mondelaypatterns</code> parameter in the
|
||||
<a class="link" href=
|
||||
"#RCL.INSTALL.CONFIG.RECOLLCONF.MISC" title=
|
||||
"6.4.2.5. Miscellaneous parameters">configuration
|
||||
"Miscellaneous parameters">configuration
|
||||
section</a>.</p>
|
||||
</div>
|
||||
</div>
|
||||
@ -2655,8 +2655,7 @@ recoll -c <em class=
|
||||
<p>The format of the result list entries is entirely
|
||||
configurable by using the preference dialog to <a class=
|
||||
"link" href="#RCL.SEARCH.GUI.CUSTOM.RESLIST" title=
|
||||
"3.1.15.1. The result list format">edit an HTML
|
||||
fragment</a>.</p>
|
||||
"The result list format">edit an HTML fragment</a>.</p>
|
||||
<p>You can click on the <code class="literal">Query
|
||||
details</code> link at the top of the results page to see
|
||||
the query actually performed, after stem expansion and
|
||||
@ -2674,8 +2673,8 @@ recoll -c <em class=
|
||||
<div>
|
||||
<h4 class="title"><a name=
|
||||
"RCL.SEARCH.GUI.RESLIST.SUGGS" id=
|
||||
"RCL.SEARCH.GUI.RESLIST.SUGGS"></a>3.1.2.1. No
|
||||
results: the spelling suggestions</h4>
|
||||
"RCL.SEARCH.GUI.RESLIST.SUGGS"></a>No results:
|
||||
the spelling suggestions</h4>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -2696,8 +2695,8 @@ recoll -c <em class=
|
||||
<div>
|
||||
<h4 class="title"><a name=
|
||||
"RCL.SEARCH.GUI.RESULTLIST.MENU" id=
|
||||
"RCL.SEARCH.GUI.RESULTLIST.MENU"></a>3.1.2.2. The
|
||||
result list right-click menu</h4>
|
||||
"RCL.SEARCH.GUI.RESULTLIST.MENU"></a>The result
|
||||
list right-click menu</h4>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -2992,7 +2991,7 @@ recoll -c <em class=
|
||||
<div>
|
||||
<h4 class="title"><a name=
|
||||
"RCL.SEARCH.GUI.PREVIEW.SEARCH" id=
|
||||
"RCL.SEARCH.GUI.PREVIEW.SEARCH"></a>3.1.6.1. Searching
|
||||
"RCL.SEARCH.GUI.PREVIEW.SEARCH"></a>Searching
|
||||
inside the preview</h4>
|
||||
</div>
|
||||
</div>
|
||||
@ -3153,8 +3152,7 @@ recoll -c <em class=
|
||||
<p><span class="application">Recoll</span> keeps a
|
||||
history of searches. See <a class="link" href=
|
||||
"#RCL.SEARCH.GUI.COMPLEX.HISTORY" title=
|
||||
"3.1.8.3. Avanced search history">Advanced search
|
||||
history</a>.</p>
|
||||
"Avanced search history">Advanced search history</a>.</p>
|
||||
<p>The dialog has two tabs:</p>
|
||||
<div class="orderedlist">
|
||||
<ol class="orderedlist" type="1">
|
||||
@ -3184,7 +3182,7 @@ recoll -c <em class=
|
||||
<div>
|
||||
<h4 class="title"><a name=
|
||||
"RCL.SEARCH.GUI.COMPLEX.TERMS" id=
|
||||
"RCL.SEARCH.GUI.COMPLEX.TERMS"></a>3.1.8.1. Avanced
|
||||
"RCL.SEARCH.GUI.COMPLEX.TERMS"></a>Avanced
|
||||
search: the "find" tab</h4>
|
||||
</div>
|
||||
</div>
|
||||
@ -3256,7 +3254,7 @@ recoll -c <em class=
|
||||
<div>
|
||||
<h4 class="title"><a name=
|
||||
"RCL.SEARCH.GUI.COMPLEX.FILTER" id=
|
||||
"RCL.SEARCH.GUI.COMPLEX.FILTER"></a>3.1.8.2. Avanced
|
||||
"RCL.SEARCH.GUI.COMPLEX.FILTER"></a>Avanced
|
||||
search: the "filter" tab</h4>
|
||||
</div>
|
||||
</div>
|
||||
@ -3324,7 +3322,7 @@ recoll -c <em class=
|
||||
<div>
|
||||
<h4 class="title"><a name=
|
||||
"RCL.SEARCH.GUI.COMPLEX.HISTORY" id=
|
||||
"RCL.SEARCH.GUI.COMPLEX.HISTORY"></a>3.1.8.3. Avanced
|
||||
"RCL.SEARCH.GUI.COMPLEX.HISTORY"></a>Avanced
|
||||
search history</h4>
|
||||
</div>
|
||||
</div>
|
||||
@ -3590,8 +3588,8 @@ recoll -c <em class=
|
||||
<div>
|
||||
<h4 class="title"><a name=
|
||||
"RCL.SEARCH.GUI.TIPS.TERMS" id=
|
||||
"RCL.SEARCH.GUI.TIPS.TERMS"></a>3.1.13.1. Terms
|
||||
and search expansion</h4>
|
||||
"RCL.SEARCH.GUI.TIPS.TERMS"></a>Terms and search
|
||||
expansion</h4>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -3654,8 +3652,8 @@ recoll -c <em class=
|
||||
<div>
|
||||
<h4 class="title"><a name=
|
||||
"RCL.SEARCH.GUI.TIPS.PHRASES" id=
|
||||
"RCL.SEARCH.GUI.TIPS.PHRASES"></a>3.1.13.2. Working
|
||||
with phrases and proximity</h4>
|
||||
"RCL.SEARCH.GUI.TIPS.PHRASES"></a>Working with
|
||||
phrases and proximity</h4>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -3711,7 +3709,7 @@ recoll -c <em class=
|
||||
<div>
|
||||
<h4 class="title"><a name=
|
||||
"RCL.SEARCH.GUI.TIPS.MISC" id=
|
||||
"RCL.SEARCH.GUI.TIPS.MISC"></a>3.1.13.3. Others</h4>
|
||||
"RCL.SEARCH.GUI.TIPS.MISC"></a>Others</h4>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -4019,8 +4017,8 @@ recoll -c <em class=
|
||||
presentation of each result list entry. See the
|
||||
<a class="link" href=
|
||||
"#RCL.SEARCH.GUI.CUSTOM.RESLIST" title=
|
||||
"3.1.15.1. The result list format">result list
|
||||
customisation section</a>.</p>
|
||||
"The result list format">result list customisation
|
||||
section</a>.</p>
|
||||
</li>
|
||||
<li class="listitem">
|
||||
<p><a name="RCL.SEARCH.GUI.CUSTOM.RESULTHEAD" id=
|
||||
@ -4030,8 +4028,8 @@ recoll -c <em class=
|
||||
at the end of the result page HTML header. More
|
||||
detail in the <a class="link" href=
|
||||
"#RCL.SEARCH.GUI.CUSTOM.RESLIST" title=
|
||||
"3.1.15.1. The result list format">result list
|
||||
customisation section.</a></p>
|
||||
"The result list format">result list customisation
|
||||
section.</a></p>
|
||||
</li>
|
||||
<li class="listitem">
|
||||
<p><span class="guilabel">Date format</span>:
|
||||
@ -4158,8 +4156,8 @@ recoll -c <em class=
|
||||
<div>
|
||||
<h4 class="title"><a name=
|
||||
"RCL.SEARCH.GUI.CUSTOM.RESLIST" id=
|
||||
"RCL.SEARCH.GUI.CUSTOM.RESLIST"></a>3.1.15.1. The
|
||||
result list format</h4>
|
||||
"RCL.SEARCH.GUI.CUSTOM.RESLIST"></a>The result
|
||||
list format</h4>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -4915,9 +4913,9 @@ recoll -c <em class=
|
||||
for a bug in versions 1.19 to 1.19.11p1). Wildcards
|
||||
will be expanded, but please <a class="link" href=
|
||||
"#RCL.SEARCH.WILDCARDS.PATH" title=
|
||||
"3.8.1.1. Wildcards and path filtering">have a
|
||||
look</a> at an important limitation of wildcards in
|
||||
path filters.</p>
|
||||
"Wildcards and path filtering">have a look</a> at an
|
||||
important limitation of wildcards in path
|
||||
filters.</p>
|
||||
<p>Relative paths also make sense, for example,
|
||||
<code class="literal">dir:share/doc</code> would
|
||||
match either <code class=
|
||||
@ -5365,8 +5363,8 @@ recoll -c <em class=
|
||||
<div>
|
||||
<h4 class="title"><a name=
|
||||
"RCL.SEARCH.WILDCARDS.PATH" id=
|
||||
"RCL.SEARCH.WILDCARDS.PATH"></a>3.8.1.1. Wildcards
|
||||
and path filtering</h4>
|
||||
"RCL.SEARCH.WILDCARDS.PATH"></a>Wildcards and
|
||||
path filtering</h4>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -6382,12 +6380,12 @@ recollindex -c "$confdir"
|
||||
the result list by using the appropriate directive in
|
||||
the definition of the <a class="link" href=
|
||||
"#RCL.SEARCH.GUI.CUSTOM.RESLIST" title=
|
||||
"3.1.15.1. The result list format">result list
|
||||
paragraph format</a>. All fields are displayed on the
|
||||
fields screen of the preview window (which you can
|
||||
reach through the right-click menu). This is
|
||||
independant of the fact that the search which
|
||||
produced the results used the field or not.</p>
|
||||
"The result list format">result list paragraph
|
||||
format</a>. All fields are displayed on the fields
|
||||
screen of the preview window (which you can reach
|
||||
through the right-click menu). This is independant of
|
||||
the fact that the search which produced the results
|
||||
used the field or not.</p>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
@ -6423,14 +6421,16 @@ recollindex -c "$confdir"
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<p><span class="application">Recoll</span> versions after
|
||||
1.11 define a Python programming interface, both for
|
||||
searching and creating/updating an index.</p>
|
||||
<p>The search interface is used in the <span class=
|
||||
"application">Recoll</span> Ubuntu Unity Lens and the
|
||||
<span class="application">Recoll</span> Web UI. It can
|
||||
run queries on any <span class=
|
||||
"application">Recoll</span> configuration.</p>
|
||||
<p>The <span class="application">Recoll</span> Python
|
||||
programming interface can be used both for searching and
|
||||
for creating/updating an index. Bindings exist for
|
||||
Python2 and Python3.</p>
|
||||
<p>The search interface is used in a number of active
|
||||
projects: the <span class="application">Recoll</span>
|
||||
<span class="application">Gnome Shell Search
|
||||
Provider</span>, the <span class=
|
||||
"application">Recoll</span> Web UI, and the upmpdcli UPnP
|
||||
Media Server, in addition to many small scripts.</p>
|
||||
<p>The index update section of the API may be used to
|
||||
create and update <span class="application">Recoll</span>
|
||||
indexes on specific configurations (separate from the
|
||||
@ -6467,6 +6467,23 @@ recollindex -c "$confdir"
|
||||
here. A paragraph at the end of this section will explain
|
||||
a few differences and ways to write code compatible with
|
||||
both versions.</p>
|
||||
<p>The <code class="literal">recoll</code> package now
|
||||
contains two modules:</p>
|
||||
<div class="itemizedlist">
|
||||
<ul class="itemizedlist" style=
|
||||
"list-style-type: disc;">
|
||||
<li class="listitem">
|
||||
<p>The <code class="literal">recoll</code> module
|
||||
contains functions and classes used to query (or
|
||||
update) the index.</p>
|
||||
</li>
|
||||
<li class="listitem">
|
||||
<p>The <code class="literal">rclextract</code>
|
||||
module contains functions and classes used at query
|
||||
time to access document data.</p>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<p>There is a good chance that your system repository has
|
||||
packages for the Recoll Python API, sometimes in a
|
||||
package separate from the main one (maybe named something
|
||||
@ -6493,15 +6510,17 @@ recollindex -c "$confdir"
|
||||
nres = query.execute("some query")
|
||||
results = query.fetchmany(20)
|
||||
for doc in results:
|
||||
print(doc.url, doc.title)
|
||||
print("%s %s" % (doc.url, doc.title))
|
||||
</pre>
|
||||
<p>You can also take a look at the source for the
|
||||
<a class="ulink" href=
|
||||
"https://github.com/koniu/recoll-webui" target=
|
||||
"_top">Recoll WebUI</a>, or the <a class="ulink" href=
|
||||
"https://opensourceprojects.eu/p/upmpdcli/code/ci/c8c8e75bd181ad9db2df14da05934e53ca867a06/tree/src/mediaserver/cdplugins/uprcl/uprclfolders.py"
|
||||
target="_top">upmpdcli local media server</a>, which are
|
||||
both based on the Python API.</p>
|
||||
"https://opensourceprojects.eu/p/recollwebui/code/ci/78ddb20787b2a894b5e4661a8d5502c4511cf71e/tree/"
|
||||
target="_top">Recoll WebUI</a>, the <a class="ulink"
|
||||
href="https://opensourceprojects.eu/p/upmpdcli/code/ci/c8c8e75bd181ad9db2df14da05934e53ca867a06/tree/src/mediaserver/cdplugins/uprcl/uprclfolders.py"
|
||||
target="_top">upmpdcli local media server</a>, or the
|
||||
<a class="ulink" href=
|
||||
"https://opensourceprojects.eu/p/recollgssp/code/ci/3f120108e099f9d687306c0be61593994326d52d/tree/gssp-recoll.py"
|
||||
target="_top">Gnome Shell Search Provider</a>.</p>
|
||||
</div>
|
||||
<div class="sect2">
|
||||
<div class="titlepage">
|
||||
@ -6604,11 +6623,19 @@ recollindex -c "$confdir"
|
||||
<dt><span class="term">Stored and indexed
|
||||
fields</span></dt>
|
||||
<dd>
|
||||
<p>The <code class="filename">fields</code> file
|
||||
inside the <span class="application">Recoll</span>
|
||||
<p>The <a class="link" href=
|
||||
"#RCL.INSTALL.CONFIG.FIELDS" title=
|
||||
"6.4.3. The fields file"><code class=
|
||||
"filename">fields</code> file</a> inside the
|
||||
<span class="application">Recoll</span>
|
||||
configuration defines which document fields are
|
||||
either "indexed" (searchable), "stored"
|
||||
(retrievable with search results), or both.</p>
|
||||
either <code class="literal">indexed</code>
|
||||
(searchable), <code class="literal">stored</code>
|
||||
(retrievable with search results), or both. Apart
|
||||
from a few standard/internal fields, only the
|
||||
<code class="literal">stored</code> fields are
|
||||
retrievable through the Python search
|
||||
interface.</p>
|
||||
</dd>
|
||||
</dl>
|
||||
</div>
|
||||
@ -6624,113 +6651,64 @@ recollindex -c "$confdir"
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect3">
|
||||
<div class="titlepage">
|
||||
<div>
|
||||
<div>
|
||||
<h4 class="title"><a name=
|
||||
"RCL.PROGRAM.PYTHONAPI.PACKAGE" id=
|
||||
"RCL.PROGRAM.PYTHONAPI.PACKAGE"></a>5.3.3.1. Recoll
|
||||
package</h4>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<p>The <code class="literal">recoll</code> package
|
||||
contains two modules:</p>
|
||||
<div class="itemizedlist">
|
||||
<ul class="itemizedlist" style=
|
||||
"list-style-type: disc;">
|
||||
<li class="listitem">
|
||||
<p>The <code class="literal">recoll</code> module
|
||||
contains functions and classes used to query (or
|
||||
update) the index. This section will only
|
||||
describe the query part, see further for the
|
||||
update part.</p>
|
||||
</li>
|
||||
<li class="listitem">
|
||||
<p>The <code class="literal">rclextract</code>
|
||||
module contains functions and classes used to
|
||||
access document data.</p>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect3">
|
||||
<div class="titlepage">
|
||||
<div>
|
||||
<div>
|
||||
<h4 class="title"><a name=
|
||||
"RCL.PROGRAM.PYTHONAPI.RECOLL" id=
|
||||
"RCL.PROGRAM.PYTHONAPI.RECOLL"></a>5.3.3.2. The
|
||||
recoll module</h4>
|
||||
"RCL.PROGRAM.PYTHONAPI.RECOLL"></a>The recoll
|
||||
module</h4>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect4">
|
||||
<div class="simplesect">
|
||||
<div class="titlepage">
|
||||
<div>
|
||||
<div>
|
||||
<h5 class="title"><a name=
|
||||
"RCL.PROGRAM.PYTHONAPI.RECOLL.FUNCTIONS" id=
|
||||
"RCL.PROGRAM.PYTHONAPI.RECOLL.FUNCTIONS"></a>Functions</h5>
|
||||
"RCL.PROGRAM.PYTHONAPI.RECOLL.CONNECT" id=
|
||||
"RCL.PROGRAM.PYTHONAPI.RECOLL.CONNECT"></a>connect(confdir=None,
|
||||
extra_dbs=None, writable = False)</h5>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="variablelist">
|
||||
<dl class="variablelist">
|
||||
<dt><span class="term">connect(confdir=None,
|
||||
extra_dbs=None, writable = False)</span></dt>
|
||||
<dd>
|
||||
<p>The <code class="literal">connect()</code>
|
||||
function connects to one or several
|
||||
<span class="application">Recoll</span>
|
||||
index(es) and returns a <code class=
|
||||
"literal">Db</code> object.</p>
|
||||
function connects to one or several <span class=
|
||||
"application">Recoll</span> index(es) and returns a
|
||||
<code class="literal">Db</code> object.</p>
|
||||
<p>This call initializes the recoll module, and it
|
||||
should always be performed before any other call or
|
||||
object creation.</p>
|
||||
<div class="itemizedlist">
|
||||
<ul class="itemizedlist" style=
|
||||
"list-style-type: disc;">
|
||||
<li class="listitem">
|
||||
<p><code class="literal">confdir</code>
|
||||
may specify a configuration directory.
|
||||
The usual defaults apply.</p>
|
||||
<p><code class="literal">confdir</code> may
|
||||
specify a configuration directory. The usual
|
||||
defaults apply.</p>
|
||||
</li>
|
||||
<li class="listitem">
|
||||
<p><code class="literal">extra_dbs</code>
|
||||
is a list of additional indexes (Xapian
|
||||
<p><code class="literal">extra_dbs</code> is a
|
||||
list of additional indexes (Xapian
|
||||
directories).</p>
|
||||
</li>
|
||||
<li class="listitem">
|
||||
<p><code class="literal">writable</code>
|
||||
decides if we can index new data through
|
||||
this connection.</p>
|
||||
decides if we can index new data through this
|
||||
connection.</p>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<p>This call initializes the recoll module, and
|
||||
it should always be performed before any other
|
||||
call or object creation.</p>
|
||||
</dd>
|
||||
</dl>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect4">
|
||||
<div class="simplesect">
|
||||
<div class="titlepage">
|
||||
<div>
|
||||
<div>
|
||||
<h5 class="title"><a name=
|
||||
"RCL.PROGRAM.PYTHONAPI.RECOLL.CLASSES" id=
|
||||
"RCL.PROGRAM.PYTHONAPI.RECOLL.CLASSES"></a>Classes</h5>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect5">
|
||||
<div class="titlepage">
|
||||
<div>
|
||||
<div>
|
||||
<h6 class="title"><a name=
|
||||
"RCL.PROGRAM.PYTHONAPI.RECOLL.CLASSES.DB" id=
|
||||
"RCL.PROGRAM.PYTHONAPI.RECOLL.CLASSES.DB"></a>The
|
||||
Db class</h6>
|
||||
"RCL.PROGRAM.PYTHONAPI.RECOLL.DB" id=
|
||||
"RCL.PROGRAM.PYTHONAPI.RECOLL.DB"></a>The Db
|
||||
class</h5>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -6741,9 +6719,9 @@ recollindex -c "$confdir"
|
||||
<dl class="variablelist">
|
||||
<dt><span class="term">Db.close()</span></dt>
|
||||
<dd>
|
||||
<p>Closes the connection. You can't do
|
||||
anything with the <code class=
|
||||
"literal">Db</code> object after this.</p>
|
||||
<p>Closes the connection. You can't do anything
|
||||
with the <code class="literal">Db</code> object
|
||||
after this.</p>
|
||||
</dd>
|
||||
<dt><span class="term">Db.query(),
|
||||
Db.cursor()</span></dt>
|
||||
@ -6768,9 +6746,9 @@ recollindex -c "$confdir"
|
||||
expr, field='', maxlen=-1, casesens=False,
|
||||
diacsens=False, lang='english')</span></dt>
|
||||
<dd>
|
||||
<p>Expand an expression against the index
|
||||
term list. Performs the basic function from
|
||||
the GUI term explorer tool. <code class=
|
||||
<p>Expand an expression against the index term
|
||||
list. Performs the basic function from the GUI
|
||||
term explorer tool. <code class=
|
||||
"literal">match_type</code> can be either of
|
||||
<code class="literal">wildcard</code>,
|
||||
<code class="literal">regexp</code> or
|
||||
@ -6781,23 +6759,21 @@ recollindex -c "$confdir"
|
||||
</dl>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect5">
|
||||
<div class="simplesect">
|
||||
<div class="titlepage">
|
||||
<div>
|
||||
<div>
|
||||
<h6 class="title"><a name=
|
||||
<h5 class="title"><a name=
|
||||
"RCL.PROGRAM.PYTHONAPI.RECOLL.CLASSES.QUERY"
|
||||
id=
|
||||
"RCL.PROGRAM.PYTHONAPI.RECOLL.CLASSES.QUERY"></a>The
|
||||
Query class</h6>
|
||||
id="RCL.PROGRAM.PYTHONAPI.RECOLL.CLASSES.QUERY">
|
||||
</a>The Query class</h5>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<p>A <code class="literal">Query</code> object
|
||||
(equivalent to a cursor in the Python DB API) is
|
||||
created by a <code class=
|
||||
"literal">Db.query()</code> call. It is used to
|
||||
execute index searches.</p>
|
||||
created by a <code class="literal">Db.query()</code>
|
||||
call. It is used to execute index searches.</p>
|
||||
<div class="variablelist">
|
||||
<dl class="variablelist">
|
||||
<dt><span class="term">Query.sortby(fieldname,
|
||||
@ -6810,14 +6786,13 @@ recollindex -c "$confdir"
|
||||
</dd>
|
||||
<dt><span class=
|
||||
"term">Query.execute(query_string, stemming=1,
|
||||
stemlang="english",
|
||||
fetchtext=False)</span></dt>
|
||||
stemlang="english", fetchtext=False)</span></dt>
|
||||
<dd>
|
||||
<p>Starts a search for <em class=
|
||||
"replaceable"><code>query_string</code></em>,
|
||||
a <span class="application">Recoll</span>
|
||||
search language string. If the index stores
|
||||
the document texts and <code class=
|
||||
"replaceable"><code>query_string</code></em>, a
|
||||
<span class="application">Recoll</span> search
|
||||
language string. If the index stores the
|
||||
document texts and <code class=
|
||||
"literal">fetchtext</code> is True, store the
|
||||
document extracted text in <code class=
|
||||
"literal">doc.text</code>.</p>
|
||||
@ -6826,9 +6801,9 @@ recollindex -c "$confdir"
|
||||
"term">Query.executesd(SearchData,
|
||||
fetchtext=False)</span></dt>
|
||||
<dd>
|
||||
<p>Starts a search for the query defined by
|
||||
the SearchData object. If the index stores
|
||||
the document texts and <code class=
|
||||
<p>Starts a search for the query defined by the
|
||||
SearchData object. If the index stores the
|
||||
document texts and <code class=
|
||||
"literal">fetchtext</code> is True, store the
|
||||
document extracted text in <code class=
|
||||
"literal">doc.text</code>.</p>
|
||||
@ -6838,8 +6813,8 @@ recollindex -c "$confdir"
|
||||
<dd>
|
||||
<p>Fetches the next <code class=
|
||||
"literal">Doc</code> objects in the current
|
||||
search results, and returns them as an array
|
||||
of the required size, which is by default the
|
||||
search results, and returns them as an array of
|
||||
the required size, which is by default the
|
||||
value of the <code class=
|
||||
"literal">arraysize</code> data member.</p>
|
||||
</dd>
|
||||
@ -6851,8 +6826,7 @@ recollindex -c "$confdir"
|
||||
search results. Generates a StopIteration
|
||||
exception if there are no results left.</p>
|
||||
</dd>
|
||||
<dt><span class=
|
||||
"term">Query.close()</span></dt>
|
||||
<dt><span class="term">Query.close()</span></dt>
|
||||
<dd>
|
||||
<p>Closes the query. The object is unusable
|
||||
after the call.</p>
|
||||
@ -6868,14 +6842,13 @@ recollindex -c "$confdir"
|
||||
<dt><span class=
|
||||
"term">Query.getgroups()</span></dt>
|
||||
<dd>
|
||||
<p>Retrieves the expanded query terms as a
|
||||
list of pairs. Meaningful only after
|
||||
executexx In each pair, the first entry is a
|
||||
list of user terms (of size one for simple
|
||||
terms, or more for group and phrase clauses),
|
||||
the second a list of query terms as derived
|
||||
from the user terms and used in the Xapian
|
||||
Query.</p>
|
||||
<p>Retrieves the expanded query terms as a list
|
||||
of pairs. Meaningful only after executexx In
|
||||
each pair, the first entry is a list of user
|
||||
terms (of size one for simple terms, or more
|
||||
for group and phrase clauses), the second a
|
||||
list of query terms as derived from the user
|
||||
terms and used in the Xapian Query.</p>
|
||||
</dd>
|
||||
<dt><span class=
|
||||
"term">Query.getxquery()</span></dt>
|
||||
@ -6890,26 +6863,24 @@ recollindex -c "$confdir"
|
||||
<p>Will insert <span "class=rclmatch">,
|
||||
</span> tags around the match areas in
|
||||
the input text and return the modified text.
|
||||
<code class="literal">ishtml</code> can be
|
||||
set to indicate that the input text is HTML
|
||||
and that HTML special characters should not
|
||||
be escaped. <code class=
|
||||
"literal">methods</code> if set should be an
|
||||
object with methods startMatch(i) and
|
||||
endMatch() which will be called for each
|
||||
match and should return a begin and end
|
||||
tag</p>
|
||||
<code class="literal">ishtml</code> can be set
|
||||
to indicate that the input text is HTML and
|
||||
that HTML special characters should not be
|
||||
escaped. <code class="literal">methods</code>
|
||||
if set should be an object with methods
|
||||
startMatch(i) and endMatch() which will be
|
||||
called for each match and should return a begin
|
||||
and end tag</p>
|
||||
</dd>
|
||||
<dt><span class=
|
||||
"term">Query.makedocabstract(doc, methods =
|
||||
object))</span></dt>
|
||||
<dt><span class="term">Query.makedocabstract(doc,
|
||||
methods = object))</span></dt>
|
||||
<dd>
|
||||
<p>Create a snippets abstract for
|
||||
<code class="literal">doc</code> (a
|
||||
<code class="literal">Doc</code> object) by
|
||||
selecting text around the match terms. If
|
||||
methods is set, will also perform
|
||||
highlighting. See the highlight method.</p>
|
||||
<p>Create a snippets abstract for <code class=
|
||||
"literal">doc</code> (a <code class=
|
||||
"literal">Doc</code> object) by selecting text
|
||||
around the match terms. If methods is set, will
|
||||
also perform highlighting. See the highlight
|
||||
method.</p>
|
||||
</dd>
|
||||
<dt><span class="term">Query.__iter__() and
|
||||
Query.next()</span></dt>
|
||||
@ -6928,8 +6899,7 @@ recollindex -c "$confdir"
|
||||
<p>Default number of records processed by
|
||||
fetchmany (r/w).</p>
|
||||
</dd>
|
||||
<dt><span class=
|
||||
"term">Query.rowcount</span></dt>
|
||||
<dt><span class="term">Query.rowcount</span></dt>
|
||||
<dd>
|
||||
<p>Number of records returned by the last
|
||||
execute.</p>
|
||||
@ -6938,39 +6908,38 @@ recollindex -c "$confdir"
|
||||
"term">Query.rownumber</span></dt>
|
||||
<dd>
|
||||
<p>Next index to be fetched from results.
|
||||
Normally increments after each fetchone()
|
||||
call, but can be set/reset before the call to
|
||||
effect seeking (equivalent to using
|
||||
<code class="literal">scroll()</code>).
|
||||
Starts at 0.</p>
|
||||
Normally increments after each fetchone() call,
|
||||
but can be set/reset before the call to effect
|
||||
seeking (equivalent to using <code class=
|
||||
"literal">scroll()</code>). Starts at 0.</p>
|
||||
</dd>
|
||||
</dl>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect5">
|
||||
<div class="simplesect">
|
||||
<div class="titlepage">
|
||||
<div>
|
||||
<div>
|
||||
<h6 class="title"><a name=
|
||||
"RCL.PROGRAM.PYTHONAPI.RECOLL.CLASSES.DOC"
|
||||
id="RCL.PROGRAM.PYTHONAPI.RECOLL.CLASSES.DOC">
|
||||
</a>The Doc class</h6>
|
||||
<h5 class="title"><a name=
|
||||
"RCL.PROGRAM.PYTHONAPI.RECOLL.CLASSES.DOC" id=
|
||||
"RCL.PROGRAM.PYTHONAPI.RECOLL.CLASSES.DOC"></a>The
|
||||
Doc class</h5>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<p>A <code class="literal">Doc</code> object
|
||||
contains index data for a given document. The data
|
||||
is extracted from the index when searching, or set
|
||||
by the indexer program when updating. The Doc
|
||||
object has many attributes to be read or set by its
|
||||
user. It matches exactly the Rcl::Doc C++ object.
|
||||
Some of the attributes are predefined, but,
|
||||
especially when indexing, others can be set, the
|
||||
name of which will be processed as field names by
|
||||
the indexing configuration. Inputs can be specified
|
||||
as Unicode or strings. Outputs are Unicode objects.
|
||||
All dates are specified as Unix timestamps, printed
|
||||
as strings. Please refer to the <code class=
|
||||
<p>A <code class="literal">Doc</code> object contains
|
||||
index data for a given document. The data is
|
||||
extracted from the index when searching, or set by
|
||||
the indexer program when updating. The Doc object has
|
||||
many attributes to be read or set by its user. It
|
||||
mostly matches the Rcl::Doc C++ object. Some of the
|
||||
attributes are predefined, but, especially when
|
||||
indexing, others can be set, the name of which will
|
||||
be processed as field names by the indexing
|
||||
configuration. Inputs can be specified as Unicode or
|
||||
strings. Outputs are Unicode objects. All dates are
|
||||
specified as Unix timestamps, printed as strings.
|
||||
Please refer to the <code class=
|
||||
"filename">rcldb/rcldoc.cpp</code> C++ file for a
|
||||
full description of the predefined attributes. Here
|
||||
follows a short list.</p>
|
||||
@ -6984,23 +6953,21 @@ recollindex -c "$confdir"
|
||||
</li>
|
||||
<li class="listitem">
|
||||
<p><code class="literal">ipath</code> the
|
||||
document <code class="literal">ipath</code>
|
||||
for embedded documents.</p>
|
||||
document <code class="literal">ipath</code> for
|
||||
embedded documents.</p>
|
||||
</li>
|
||||
<li class="listitem">
|
||||
<p><code class="literal">fbytes,
|
||||
dbytes</code> the document file and text
|
||||
sizes.</p>
|
||||
<p><code class="literal">fbytes, dbytes</code>
|
||||
the document file and text sizes.</p>
|
||||
</li>
|
||||
<li class="listitem">
|
||||
<p><code class="literal">fmtime,
|
||||
dmtime</code> the document file and document
|
||||
times.</p>
|
||||
<p><code class="literal">fmtime, dmtime</code>
|
||||
the document file and document times.</p>
|
||||
</li>
|
||||
<li class="listitem">
|
||||
<p><code class="literal">xdocid</code> the
|
||||
document Xapian document ID. This is useful
|
||||
if you want to access the document through a
|
||||
document Xapian document ID. This is useful if
|
||||
you want to access the document through a
|
||||
direct Xapian operation.</p>
|
||||
</li>
|
||||
<li class="listitem">
|
||||
@ -7016,13 +6983,15 @@ recollindex -c "$confdir"
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<p>At query time, only the fields that are defined
|
||||
as <code class="literal">stored</code> either by
|
||||
default or in the <code class=
|
||||
"filename">fields</code> configuration file will be
|
||||
meaningful in the <code class="literal">Doc</code>
|
||||
object. Especially this will not be the case for
|
||||
the document text. See the <code class=
|
||||
<p>At query time, only the fields that are defined as
|
||||
<code class="literal">stored</code> either by default
|
||||
or in the <code class="filename">fields</code>
|
||||
configuration file will be meaningful in the
|
||||
<code class="literal">Doc</code> object. The document
|
||||
processed text may be present or not, depending if
|
||||
the index stores the text at all, and if it does, on
|
||||
the <code class="literal">fetchtext</code> query
|
||||
execute option. See also the <code class=
|
||||
"literal">rclextract</code> module for accessing
|
||||
document contents.</p>
|
||||
<div class="variablelist">
|
||||
@ -7031,26 +7000,24 @@ recollindex -c "$confdir"
|
||||
operator</span></dt>
|
||||
<dd>
|
||||
<p>Retrieve the named document attribute. You
|
||||
can also use <code class=
|
||||
"literal">getattr(doc, key)</code> or
|
||||
<code class="literal">doc.key</code>.</p>
|
||||
can also use <code class="literal">getattr(doc,
|
||||
key)</code> or <code class=
|
||||
"literal">doc.key</code>.</p>
|
||||
</dd>
|
||||
<dt><span class="term">doc.key =
|
||||
value</span></dt>
|
||||
<dd>
|
||||
<p>Set the the named document attribute. You
|
||||
can also use <code class=
|
||||
"literal">setattr(doc, key,
|
||||
value)</code>.</p>
|
||||
can also use <code class="literal">setattr(doc,
|
||||
key, value)</code>.</p>
|
||||
</dd>
|
||||
<dt><span class="term">getbinurl()</span></dt>
|
||||
<dd>
|
||||
<p>Retrieve the URL in byte array format (no
|
||||
transcoding), for use as parameter to a
|
||||
system call.</p>
|
||||
transcoding), for use as parameter to a system
|
||||
call.</p>
|
||||
</dd>
|
||||
<dt><span class=
|
||||
"term">setbinurl(url)</span></dt>
|
||||
<dt><span class="term">setbinurl(url)</span></dt>
|
||||
<dd>
|
||||
<p>Set the URL in byte array format (no
|
||||
transcoding).</p>
|
||||
@ -7068,25 +7035,25 @@ recollindex -c "$confdir"
|
||||
</dl>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect5">
|
||||
<div class="simplesect">
|
||||
<div class="titlepage">
|
||||
<div>
|
||||
<div>
|
||||
<h6 class="title"><a name=
|
||||
<h5 class="title"><a name=
|
||||
"RCL.PROGRAM.PYTHONAPI.RECOLL.CLASSES.SEARCHDATA"
|
||||
id=
|
||||
"RCL.PROGRAM.PYTHONAPI.RECOLL.CLASSES.SEARCHDATA">
|
||||
</a>The SearchData class</h6>
|
||||
</a>The SearchData class</h5>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<p>A <code class="literal">SearchData</code> object
|
||||
allows building a query by combining clauses, for
|
||||
execution by <code class=
|
||||
"literal">Query.executesd()</code>. It can be used
|
||||
in replacement of the query language approach. The
|
||||
interface is going to change a little, so no
|
||||
detailed doc for now...</p>
|
||||
"literal">Query.executesd()</code>. It can be used in
|
||||
replacement of the query language approach. The
|
||||
interface is going to change a little, so no detailed
|
||||
doc for now...</p>
|
||||
<div class="variablelist">
|
||||
<dl class="variablelist">
|
||||
<dt><span class=
|
||||
@ -7098,21 +7065,21 @@ recollindex -c "$confdir"
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect3">
|
||||
<div class="titlepage">
|
||||
<div>
|
||||
<div>
|
||||
<h4 class="title"><a name=
|
||||
"RCL.PROGRAM.PYTHONAPI.RCLEXTRACT" id=
|
||||
"RCL.PROGRAM.PYTHONAPI.RCLEXTRACT"></a>5.3.3.3. The
|
||||
"RCL.PROGRAM.PYTHONAPI.RCLEXTRACT"></a>The
|
||||
rclextract module</h4>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<p>Prior to <span class="application">Recoll</span>
|
||||
1.25, index queries never provide document content
|
||||
because it is not stored. More recent versions usually
|
||||
1.25, index queries could not provide document content
|
||||
because it was never stored. <span class=
|
||||
"application">Recoll</span> 1.25 and later usually
|
||||
store the document text, which can be optionally
|
||||
retrieved when running a query (see <code class=
|
||||
"literal">query.execute()</code> above - the result is
|
||||
@ -7126,7 +7093,7 @@ recollindex -c "$confdir"
|
||||
<p>You need to import the <code class=
|
||||
"literal">recoll</code> module before the <code class=
|
||||
"literal">rclextract</code> module.</p>
|
||||
<div class="sect4">
|
||||
<div class="simplesect">
|
||||
<div class="titlepage">
|
||||
<div>
|
||||
<div>
|
||||
@ -7207,7 +7174,7 @@ not doc.ipath and (not "rclbes" in doc.keys() or doc["rclbes"] == "FS")
|
||||
<div>
|
||||
<h4 class="title"><a name=
|
||||
"RCL.PROGRAM.PYTHONAPI.SEARCH.EXAMPLE" id=
|
||||
"RCL.PROGRAM.PYTHONAPI.SEARCH.EXAMPLE"></a>5.3.3.4. Search
|
||||
"RCL.PROGRAM.PYTHONAPI.SEARCH.EXAMPLE"></a>Search
|
||||
API usage example</h4>
|
||||
</div>
|
||||
</div>
|
||||
@ -7305,7 +7272,7 @@ for i in range(nres):
|
||||
<div>
|
||||
<h4 class="title"><a name=
|
||||
"RCL.PROGRAM.PYTHONAPI.UPDATE.UPDATE" id=
|
||||
"RCL.PROGRAM.PYTHONAPI.UPDATE.UPDATE"></a>5.3.4.1. Python
|
||||
"RCL.PROGRAM.PYTHONAPI.UPDATE.UPDATE"></a>Python
|
||||
update interface</h4>
|
||||
</div>
|
||||
</div>
|
||||
@ -7399,7 +7366,7 @@ for i in range(nres):
|
||||
<div>
|
||||
<h4 class="title"><a name=
|
||||
"RCL.PROGRAM.PYTHONAPI.UPDATE.ACCESS" id=
|
||||
"RCL.PROGRAM.PYTHONAPI.UPDATE.ACCESS"></a>5.3.4.2. Query
|
||||
"RCL.PROGRAM.PYTHONAPI.UPDATE.ACCESS"></a>Query
|
||||
data access for external indexers (1.23)</h4>
|
||||
</div>
|
||||
</div>
|
||||
@ -7449,7 +7416,7 @@ for i in range(nres):
|
||||
<div>
|
||||
<h4 class="title"><a name=
|
||||
"RCL.PROGRAM.PYTHONAPI.UPDATE.SAMPLES" id=
|
||||
"RCL.PROGRAM.PYTHONAPI.UPDATE.SAMPLES"></a>5.3.4.3. External
|
||||
"RCL.PROGRAM.PYTHONAPI.UPDATE.SAMPLES"></a>External
|
||||
indexer samples</h4>
|
||||
</div>
|
||||
</div>
|
||||
@ -8404,7 +8371,7 @@ for i in range(nres):
|
||||
<div>
|
||||
<h4 class="title"><a name=
|
||||
"RCL.INSTALL.CONFIG.RECOLLCONF.WHATDOCS" id=
|
||||
"RCL.INSTALL.CONFIG.RECOLLCONF.WHATDOCS"></a>6.4.2.1. Parameters
|
||||
"RCL.INSTALL.CONFIG.RECOLLCONF.WHATDOCS"></a>Parameters
|
||||
affecting what documents we index</h4>
|
||||
</div>
|
||||
</div>
|
||||
@ -8738,7 +8705,7 @@ for i in range(nres):
|
||||
<div>
|
||||
<h4 class="title"><a name=
|
||||
"RCL.INSTALL.CONFIG.RECOLLCONF.TERMS" id=
|
||||
"RCL.INSTALL.CONFIG.RECOLLCONF.TERMS"></a>6.4.2.2. Parameters
|
||||
"RCL.INSTALL.CONFIG.RECOLLCONF.TERMS"></a>Parameters
|
||||
affecting how we generate terms and organize the
|
||||
index</h4>
|
||||
</div>
|
||||
@ -9008,7 +8975,7 @@ for i in range(nres):
|
||||
<div>
|
||||
<h4 class="title"><a name=
|
||||
"RCL.INSTALL.CONFIG.RECOLLCONF.STORE" id=
|
||||
"RCL.INSTALL.CONFIG.RECOLLCONF.STORE"></a>6.4.2.3. Parameters
|
||||
"RCL.INSTALL.CONFIG.RECOLLCONF.STORE"></a>Parameters
|
||||
affecting where and how we store things</h4>
|
||||
</div>
|
||||
</div>
|
||||
@ -9163,7 +9130,7 @@ for i in range(nres):
|
||||
<div>
|
||||
<h4 class="title"><a name=
|
||||
"RCL.INSTALL.CONFIG.RECOLLCONF.PERFS" id=
|
||||
"RCL.INSTALL.CONFIG.RECOLLCONF.PERFS"></a>6.4.2.4. Parameters
|
||||
"RCL.INSTALL.CONFIG.RECOLLCONF.PERFS"></a>Parameters
|
||||
affecting indexing performance and resource
|
||||
usage</h4>
|
||||
</div>
|
||||
@ -9264,7 +9231,7 @@ for i in range(nres):
|
||||
<div>
|
||||
<h4 class="title"><a name=
|
||||
"RCL.INSTALL.CONFIG.RECOLLCONF.MISC" id=
|
||||
"RCL.INSTALL.CONFIG.RECOLLCONF.MISC"></a>6.4.2.5. Miscellaneous
|
||||
"RCL.INSTALL.CONFIG.RECOLLCONF.MISC"></a>Miscellaneous
|
||||
parameters</h4>
|
||||
</div>
|
||||
</div>
|
||||
@ -9541,7 +9508,7 @@ for i in range(nres):
|
||||
<div>
|
||||
<h4 class="title"><a name=
|
||||
"RCL.INSTALL.CONFIG.RECOLLCONF.QUERY" id=
|
||||
"RCL.INSTALL.CONFIG.RECOLLCONF.QUERY"></a>6.4.2.6. Query-time
|
||||
"RCL.INSTALL.CONFIG.RECOLLCONF.QUERY"></a>Query-time
|
||||
parameters (no impact on the index)</h4>
|
||||
</div>
|
||||
</div>
|
||||
@ -9616,7 +9583,7 @@ for i in range(nres):
|
||||
<div>
|
||||
<h4 class="title"><a name=
|
||||
"RCL.INSTALL.CONFIG.RECOLLCONF.PDF" id=
|
||||
"RCL.INSTALL.CONFIG.RECOLLCONF.PDF"></a>6.4.2.7. Parameters
|
||||
"RCL.INSTALL.CONFIG.RECOLLCONF.PDF"></a>Parameters
|
||||
for the PDF input script</h4>
|
||||
</div>
|
||||
</div>
|
||||
@ -9687,7 +9654,7 @@ for i in range(nres):
|
||||
<div>
|
||||
<h4 class="title"><a name=
|
||||
"RCL.INSTALL.CONFIG.RECOLLCONF.SPECLOCATIONS" id=
|
||||
"RCL.INSTALL.CONFIG.RECOLLCONF.SPECLOCATIONS"></a>6.4.2.8. Parameters
|
||||
"RCL.INSTALL.CONFIG.RECOLLCONF.SPECLOCATIONS"></a>Parameters
|
||||
set for specific locations</h4>
|
||||
</div>
|
||||
</div>
|
||||
@ -9820,7 +9787,7 @@ for i in range(nres):
|
||||
<div>
|
||||
<h4 class="title"><a name=
|
||||
"RCL.INSTALL.CONFIG.FIELDS.XATTR" id=
|
||||
"RCL.INSTALL.CONFIG.FIELDS.XATTR"></a>6.4.3.1. Extended
|
||||
"RCL.INSTALL.CONFIG.FIELDS.XATTR"></a>Extended
|
||||
attributes in the fields file</h4>
|
||||
</div>
|
||||
</div>
|
||||
@ -10150,7 +10117,7 @@ other = rclcat:other
|
||||
<div>
|
||||
<h4 class="title"><a name=
|
||||
"RCL.INSTALL.CONFIG.EXAMPLES.ADDVIEW" id=
|
||||
"RCL.INSTALL.CONFIG.EXAMPLES.ADDVIEW"></a>6.4.8.1. Adding
|
||||
"RCL.INSTALL.CONFIG.EXAMPLES.ADDVIEW"></a>Adding
|
||||
an external viewer for an non-indexed type</h4>
|
||||
</div>
|
||||
</div>
|
||||
@ -10213,7 +10180,7 @@ other = rclcat:other
|
||||
<div>
|
||||
<h4 class="title"><a name=
|
||||
"RCL.INSTALL.CONFIG.EXAMPLES.ADDINDEX" id=
|
||||
"RCL.INSTALL.CONFIG.EXAMPLES.ADDINDEX"></a>6.4.8.2. Adding
|
||||
"RCL.INSTALL.CONFIG.EXAMPLES.ADDINDEX"></a>Adding
|
||||
indexing support for a new file type</h4>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@ -4966,13 +4966,14 @@ recollindex -c "$confdir"
|
||||
<sect2 id="RCL.PROGRAM.PYTHONAPI.INTRO">
|
||||
<title>Introduction</title>
|
||||
|
||||
<para>&RCL; versions after 1.11 define a Python programming
|
||||
interface, both for searching and creating/updating an
|
||||
index.</para>
|
||||
<para>The &RCL; Python programming interface can be used both for
|
||||
searching and for creating/updating an index. Bindings exist for
|
||||
Python2 and Python3.</para>
|
||||
|
||||
<para>The search interface is used in the &RCL; Ubuntu Unity Lens
|
||||
and the &RCL; Web UI. It can run queries on any &RCL;
|
||||
configuration.</para>
|
||||
<para>The search interface is used in a number of active projects:
|
||||
the &RCL; <application>Gnome Shell Search Provider</application>,
|
||||
the &RCL; Web UI, and the upmpdcli UPnP Media Server, in addition
|
||||
to many small scripts.</para>
|
||||
|
||||
<para>The index update section of the API may be used to create and
|
||||
update &RCL; indexes on specific configurations (separate from the
|
||||
@ -4998,6 +4999,19 @@ recollindex -c "$confdir"
|
||||
paragraph at the end of this section will explain a few differences
|
||||
and ways to write code compatible with both versions.</para>
|
||||
|
||||
<para>The <literal>recoll</literal> package now contains two
|
||||
modules:</para>
|
||||
<itemizedlist>
|
||||
<listitem><para>The <literal>recoll</literal> module contains
|
||||
functions and classes used to query (or update) the
|
||||
index.</para></listitem>
|
||||
|
||||
<listitem><para>The <literal>rclextract</literal> module contains
|
||||
functions and classes used at query time to access document
|
||||
data.</para>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
|
||||
<para>There is a good chance that your system repository has
|
||||
packages for the Recoll Python API, sometimes in a package separate
|
||||
from the main one (maybe named something like python-recoll). Else
|
||||
@ -5022,13 +5036,17 @@ recollindex -c "$confdir"
|
||||
nres = query.execute("some query")
|
||||
results = query.fetchmany(20)
|
||||
for doc in results:
|
||||
print(doc.url, doc.title)
|
||||
print("%s %s" % (doc.url, doc.title))
|
||||
]]></programlisting>
|
||||
|
||||
<para>You can also take a look at the source for the <ulink
|
||||
url="https://github.com/koniu/recoll-webui">Recoll
|
||||
WebUI</ulink>, or the <ulink url="https://opensourceprojects.eu/p/upmpdcli/code/ci/c8c8e75bd181ad9db2df14da05934e53ca867a06/tree/src/mediaserver/cdplugins/uprcl/uprclfolders.py">upmpdcli local media server</ulink>, which are both
|
||||
based on the Python API.</para>
|
||||
<para>You can also take a look at the source for the
|
||||
<ulink url="https://opensourceprojects.eu/p/recollwebui/code/ci/78ddb20787b2a894b5e4661a8d5502c4511cf71e/tree/">Recoll
|
||||
WebUI</ulink>, the
|
||||
<ulink url="https://opensourceprojects.eu/p/upmpdcli/code/ci/c8c8e75bd181ad9db2df14da05934e53ca867a06/tree/src/mediaserver/cdplugins/uprcl/uprclfolders.py">upmpdcli
|
||||
local media server</ulink>, or the
|
||||
<ulink
|
||||
url="https://opensourceprojects.eu/p/recollgssp/code/ci/3f120108e099f9d687306c0be61593994326d52d/tree/gssp-recoll.py">Gnome
|
||||
Shell Search Provider</ulink>.</para>
|
||||
|
||||
</sect2>
|
||||
|
||||
@ -5104,10 +5122,14 @@ recollindex -c "$confdir"
|
||||
|
||||
<varlistentry>
|
||||
<term>Stored and indexed fields</term>
|
||||
<listitem><para>The <filename>fields</filename> file inside
|
||||
the &RCL; configuration defines which document fields are
|
||||
either "indexed" (searchable), "stored" (retrievable with
|
||||
search results), or both.</para>
|
||||
<listitem><para>The <link
|
||||
linkend="RCL.INSTALL.CONFIG.FIELDS"><filename>fields</filename>
|
||||
file</link> inside the &RCL; configuration defines which
|
||||
document fields are either <literal>indexed</literal>
|
||||
(searchable), <literal>stored</literal> (retrievable with
|
||||
search results), or both. Apart from a few standard/internal
|
||||
fields, only the <literal>stored</literal> fields are
|
||||
retrievable through the Python search interface.</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
@ -5118,37 +5140,18 @@ recollindex -c "$confdir"
|
||||
<sect2 id="RCL.PROGRAM.PYTHONAPI.SEARCH">
|
||||
<title>Python search interface</title>
|
||||
|
||||
<sect3 id="RCL.PROGRAM.PYTHONAPI.PACKAGE">
|
||||
<title>Recoll package</title>
|
||||
|
||||
<para>The <literal>recoll</literal> package contains two
|
||||
modules:
|
||||
<itemizedlist>
|
||||
<listitem><para>The <literal>recoll</literal> module contains
|
||||
functions and classes used to query (or update) the
|
||||
index. This section will only describe the query part, see
|
||||
further for the update part.</para></listitem>
|
||||
<listitem><para>The <literal>rclextract</literal> module contains
|
||||
functions and classes used to access document
|
||||
data.</para></listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</sect3>
|
||||
|
||||
<sect3 id="RCL.PROGRAM.PYTHONAPI.RECOLL">
|
||||
<title>The recoll module</title>
|
||||
|
||||
<sect4 id="RCL.PROGRAM.PYTHONAPI.RECOLL.FUNCTIONS">
|
||||
<title>Functions</title>
|
||||
<simplesect id="RCL.PROGRAM.PYTHONAPI.RECOLL.CONNECT">
|
||||
<title>connect(confdir=None, extra_dbs=None, writable = False)</title>
|
||||
|
||||
<variablelist>
|
||||
<varlistentry>
|
||||
<term>connect(confdir=None, extra_dbs=None,
|
||||
writable = False)</term>
|
||||
<listitem>
|
||||
<para>The <literal>connect()</literal> function connects to
|
||||
one or several &RCL; index(es) and returns
|
||||
a <literal>Db</literal> object.</para>
|
||||
<para>This call initializes the recoll module, and it should
|
||||
always be performed before any other call or object
|
||||
creation.</para>
|
||||
<itemizedlist>
|
||||
<listitem><para><literal>confdir</literal> may specify
|
||||
a configuration directory. The usual defaults
|
||||
@ -5159,24 +5162,13 @@ recollindex -c "$confdir"
|
||||
we can index new data through this
|
||||
connection.</para></listitem>
|
||||
</itemizedlist>
|
||||
<para>This call initializes the recoll module, and it should
|
||||
always be performed before any other call or object
|
||||
creation.</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
</variablelist>
|
||||
</sect4>
|
||||
</simplesect>
|
||||
|
||||
|
||||
<sect4 id="RCL.PROGRAM.PYTHONAPI.RECOLL.CLASSES">
|
||||
<title>Classes</title>
|
||||
|
||||
<sect5 id="RCL.PROGRAM.PYTHONAPI.RECOLL.CLASSES.DB">
|
||||
<simplesect id="RCL.PROGRAM.PYTHONAPI.RECOLL.DB">
|
||||
<title>The Db class</title>
|
||||
|
||||
<para>A Db object is created by
|
||||
a <literal>connect()</literal> call and holds a
|
||||
connection to a Recoll index.</para>
|
||||
<para>A Db object is created by a <literal>connect()</literal>
|
||||
call and holds a connection to a Recoll index.</para>
|
||||
<variablelist>
|
||||
<varlistentry>
|
||||
<term>Db.close()</term>
|
||||
@ -5216,10 +5208,8 @@ recollindex -c "$confdir"
|
||||
|
||||
</variablelist>
|
||||
|
||||
</sect5>
|
||||
|
||||
|
||||
<sect5 id="RCL.PROGRAM.PYTHONAPI.RECOLL.CLASSES.QUERY">
|
||||
</simplesect>
|
||||
<simplesect id="RCL.PROGRAM.PYTHONAPI.RECOLL.CLASSES.QUERY">
|
||||
<title>The Query class</title>
|
||||
|
||||
<para>A <literal>Query</literal> object (equivalent to a
|
||||
@ -5355,17 +5345,15 @@ recollindex -c "$confdir"
|
||||
|
||||
</variablelist>
|
||||
|
||||
</sect5>
|
||||
|
||||
|
||||
<sect5 id="RCL.PROGRAM.PYTHONAPI.RECOLL.CLASSES.DOC">
|
||||
</simplesect>
|
||||
<simplesect id="RCL.PROGRAM.PYTHONAPI.RECOLL.CLASSES.DOC">
|
||||
<title>The Doc class</title>
|
||||
|
||||
<para>A <literal>Doc</literal> object contains index data
|
||||
for a given document. The data is extracted from the
|
||||
index when searching, or set by the indexer program when
|
||||
updating. The Doc object has many attributes to be read or
|
||||
set by its user. It matches exactly the Rcl::Doc C++
|
||||
set by its user. It mostly matches the Rcl::Doc C++
|
||||
object. Some of the attributes are predefined, but,
|
||||
especially when indexing, others can be set, the name of
|
||||
which will be processed as field names by the indexing
|
||||
@ -5405,13 +5393,14 @@ recollindex -c "$confdir"
|
||||
</itemizedlist>
|
||||
</para>
|
||||
|
||||
<para>At query time, only the fields that are defined
|
||||
as <literal>stored</literal> either by default or in
|
||||
the <filename>fields</filename> configuration file will be
|
||||
meaningful in the <literal>Doc</literal>
|
||||
object. Especially this will not be the case for the
|
||||
document text. See the <literal>rclextract</literal>
|
||||
module for accessing document contents.</para>
|
||||
<para>At query time, only the fields that are defined as
|
||||
<literal>stored</literal> either by default or in the
|
||||
<filename>fields</filename> configuration file will be meaningful
|
||||
in the <literal>Doc</literal> object. The document processed text
|
||||
may be present or not, depending if the index stores the text at
|
||||
all, and if it does, on the <literal>fetchtext</literal> query
|
||||
execute option. See also the <literal>rclextract</literal> module
|
||||
for accessing document contents.</para>
|
||||
|
||||
<variablelist>
|
||||
|
||||
@ -5460,9 +5449,9 @@ recollindex -c "$confdir"
|
||||
</varlistentry>
|
||||
</variablelist>
|
||||
|
||||
</sect5> <!-- Doc -->
|
||||
</simplesect> <!-- Doc -->
|
||||
|
||||
<sect5 id="RCL.PROGRAM.PYTHONAPI.RECOLL.CLASSES.SEARCHDATA">
|
||||
<simplesect id="RCL.PROGRAM.PYTHONAPI.RECOLL.CLASSES.SEARCHDATA">
|
||||
<title>The SearchData class</title>
|
||||
|
||||
<para>A <literal>SearchData</literal> object allows building
|
||||
@ -5482,17 +5471,16 @@ recollindex -c "$confdir"
|
||||
</varlistentry>
|
||||
</variablelist>
|
||||
|
||||
</sect5> <!-- SearchData -->
|
||||
</simplesect> <!-- SearchData -->
|
||||
|
||||
</sect4> <!-- recoll.classes -->
|
||||
</sect3> <!-- Recoll module -->
|
||||
|
||||
<sect3 id="RCL.PROGRAM.PYTHONAPI.RCLEXTRACT">
|
||||
<title>The rclextract module</title>
|
||||
|
||||
|
||||
<para>Prior to &RCL; 1.25, index queries never provide document
|
||||
content because it is not stored. More recent versions usually
|
||||
<para>Prior to &RCL; 1.25, index queries could not provide document
|
||||
content because it was never stored. &RCL; 1.25 and later usually
|
||||
store the document text, which can be optionally retrieved when
|
||||
running a query (see <literal>query.execute()</literal>
|
||||
above - the result is always plain text).</para>
|
||||
@ -5506,7 +5494,7 @@ recollindex -c "$confdir"
|
||||
<para>You need to import the <literal>recoll</literal> module
|
||||
before the <literal>rclextract</literal> module.</para>
|
||||
|
||||
<sect4 id="RCL.PROGRAM.PYTHONAPI.RCLEXTRACT.CLASSES.EXTRACTOR">
|
||||
<simplesect id="RCL.PROGRAM.PYTHONAPI.RCLEXTRACT.CLASSES.EXTRACTOR">
|
||||
<title>The Extractor class</title>
|
||||
|
||||
<variablelist>
|
||||
@ -5565,7 +5553,7 @@ not doc.ipath and (not "rclbes" in doc.keys() or doc["rclbes"] == "FS")
|
||||
|
||||
</variablelist>
|
||||
|
||||
</sect4>
|
||||
</simplesect>
|
||||
</sect3> <!-- rclextract module -->
|
||||
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user