From 51761b7aa6908626c143fe16e2c8caca625bbacd Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Fri, 8 Jan 2021 14:34:32 +0100 Subject: [PATCH] python doc update --- src/doc/user/usermanual.html | 67 ++++++++++++-------------- src/doc/user/usermanual.xml | 92 +++++++++++++++++------------------- 2 files changed, 73 insertions(+), 86 deletions(-) diff --git a/src/doc/user/usermanual.html b/src/doc/user/usermanual.html index 2957ed0d..22bf4179 100644 --- a/src/doc/user/usermanual.html +++ b/src/doc/user/usermanual.html @@ -6681,7 +6681,8 @@ text/html [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/r

The Recoll Python programming interface can be used both for searching and for creating/updating an index. Bindings exist for - Python2 and Python3.

+ Python2 and Python3 (Jan 2021: python2 support will be + dropped soon).

The search interface is used in a number of active projects: the

-        #!/usr/bin/env python
+#!/usr/bin/python3
 
-        from recoll import recoll
+from recoll import recoll
 
-        db = recoll.connect()
-        query = db.query()
-        nres = query.execute("some query")
-        results = query.fetchmany(20)
-        for doc in results:
-            print("%s %s" % (doc.url, doc.title))
-        
+db = recoll.connect() +query = db.query() +nres = query.execute("some query") +results = query.fetchmany(20) +for doc in results: + print("%s %s" % (doc.url, doc.title)) +

You can also take a look at the source for the The rclextract module can give access to the original document and to the document text content (if not stored by the index, or - to access an HTML version of the text). Acessing the + to access an HTML version of the text). Accessing the original document is particularly useful if it is embedded (e.g. an email attachment).

You need to import the

-#!/usr/bin/env python
+#!/usr/bin/python3
 
 from recoll import recoll
 
@@ -7455,17 +7456,15 @@ db.setAbstractParams(maxchars=80, contextwords=4)
 
 query = db.query()
 nres = query.execute("some user question")
-print "Result count: ", nres
+print("Result count: %d" % nres)
 if nres > 5:
     nres = 5
 for i in range(nres):
     doc = query.fetchone()
-    print "Result #%d" % (query.rownumber,)
+    print("Result #%d" % (query.rownumber))
     for k in ("title", "size"):
-        print k, ":", getattr(doc, k).encode('utf-8')
-    abs = db.makeDocAbstract(doc, query).encode('utf-8')
-    print abs
-    print
+        print("%s : %s" % (k, getattr(doc, k)))
+    print("%s\n" % db.makeDocAbstract(doc, query))
 
@@ -7651,9 +7650,9 @@ for i in range(nres): Recoll source (which sets rclbes="MBOX"):

[MBOX]
-          fetch = /path/to/recoll/src/python/samples/rclmbox.py fetch
-          makesig = path/to/recoll/src/python/samples/rclmbox.py makesig
-          
+fetch = /path/to/recoll/src/python/samples/rclmbox.py fetch +makesig = path/to/recoll/src/python/samples/rclmbox.py makesig +

fetch and makesig define two commands to execute to respectively retrieve the document text and compute @@ -7708,27 +7707,21 @@ for i in range(nres): of course).

Adapting to the new package structure:

-          
-                   try:
-                   from recoll import recoll
-                   from recoll import rclextract
-                   hasextract = True
-                   except:
-                   import recoll
-                   hasextract = False
-          
-        
+try: +from recoll import recoll +from recoll import rclextract +hasextract = True +except: +import recoll +hasextract = False +

Adapting to the change of nature of the next Query member. The same test can be used to choose to use the scroll() method (new) or set the next value (old).

-
-          
-                   rownum = query.next if type(query.next) == int else \
-                   query.rownumber
-          
-        
+
rownum = query.next if type(query.next) == int else query.rownumber
diff --git a/src/doc/user/usermanual.xml b/src/doc/user/usermanual.xml index 508dfaf9..3ed1a898 100644 --- a/src/doc/user/usermanual.xml +++ b/src/doc/user/usermanual.xml @@ -5144,7 +5144,8 @@ text/html [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/r The &RCL; Python programming interface can be used both for searching and for creating/updating an index. Bindings exist for - Python2 and Python3. + Python2 and Python3 (Jan 2021: python2 support will be dropped + soon). The search interface is used in a number of active projects: the - +db = recoll.connect() +query = db.query() +nres = query.execute("some query") +results = query.fetchmany(20) +for doc in results: + print("%s %s" % (doc.url, doc.title)) +]]> You can also take a look at the source for the The rclextract module can give access to the original document and to the document text content (if not stored by the index, or to access an HTML version of the text). - Acessing the original document is particularly useful if it is + Accessing the original document is particularly useful if it is embedded (e.g. an email attachment). You need to import the recoll module @@ -5703,19 +5704,20 @@ qdoc = query.fetchone() extractor = recoll.Extractor(qdoc) doc = extractor.textextract(qdoc.ipath) # use doc.text, e.g. for previewing - Passing qdoc.ipath to + + Passing qdoc.ipath to textextract() is redundant, but reflects the fact that the Extractor object actually has the capability to access the other entries in a compound document. - - - - Extractor.idoctofile(ipath, targetmtype, outfile='') - Extracts document into an output file, - which can be given explicitly or will be created as a - temporary file to be deleted by the caller. Typical - use: + + + + Extractor.idoctofile(ipath, targetmtype, outfile='') + Extracts document into an output file, + which can be given explicitly or will be created as a + temporary file to be deleted by the caller. Typical + use: from recoll import recoll, rclextract @@ -5750,7 +5752,7 @@ not doc.ipath and (not "rclbes" in doc.keys() or doc["rclbes"] == "FS") highlighting and data extraction functions. 5: nres = 5 for i in range(nres): doc = query.fetchone() - print "Result #%d" % (query.rownumber,) + print("Result #%d" % (query.rownumber)) for k in ("title", "size"): - print k, ":", getattr(doc, k).encode('utf-8') - abs = db.makeDocAbstract(doc, query).encode('utf-8') - print abs - print + print("%s : %s" % (k, getattr(doc, k))) + print("%s\n" % db.makeDocAbstract(doc, query)) ]]> @@ -5911,10 +5911,11 @@ for i in range(nres): access data from the specified indexer. Example, for the mbox indexing sample found in the Recoll source (which sets rclbes="MBOX"): - [MBOX] - fetch = /path/to/recoll/src/python/samples/rclmbox.py fetch - makesig = path/to/recoll/src/python/samples/rclmbox.py makesig - +[MBOX] +fetch = /path/to/recoll/src/python/samples/rclmbox.py fetch +makesig = path/to/recoll/src/python/samples/rclmbox.py makesig + + fetch and makesig define two commands to execute to respectively retrieve the document text and compute the document signature (the example @@ -5953,17 +5954,15 @@ for i in range(nres): course). Adapting to the new package structure: - - - + Adapting to the change of nature of the next Query @@ -5971,12 +5970,7 @@ for i in range(nres): the scroll() method (new) or set the next value (old). - - - +