diff --git a/src/doc/user/usermanual.sgml b/src/doc/user/usermanual.sgml
index 48e4b12e..d23d809a 100644
--- a/src/doc/user/usermanual.sgml
+++ b/src/doc/user/usermanual.sgml
@@ -4188,13 +4188,15 @@ or
Extract document defined
by ipath and return
a Doc object. The doc.text field
- has the document text as either text/plain or
+ has the document text converted to either text/plain or
text/html according to doc.mimetype. The typical use
would be as follows:
qdoc = query.fetchone()
extractor = recoll.Extractor(qdoc)
-text = extractor.textextract(qdoc.ipath)
+doc = extractor.textextract(qdoc.ipath)
+# use doc.text, e.g. for previewing
+
diff --git a/tests/pythonapi/extract.py b/tests/pythonapi/extract.py
new file mode 100644
index 00000000..4861fde7
--- /dev/null
+++ b/tests/pythonapi/extract.py
@@ -0,0 +1,45 @@
+import sys
+import hashlib
+from recoll import recoll
+from recoll import rclextract
+
+if sys.version_info[0] >= 3:
+ ISP3 = True
+else:
+ ISP3 = False
+
+def utf8string(s):
+ if ISP3:
+ return s
+ else:
+ return s.encode('utf8')
+
+db = recoll.connect()
+query = db.query()
+
+# This normally has only one result, a well-known html file
+nres = query.execute("HtmlAttachment_uniqueTerm", stemming=0)
+print("Result count: %d %d" % (nres, query.rowcount))
+doc = query.fetchone()
+xtrac = rclextract.Extractor(doc)
+doc = xtrac.textextract(doc.ipath)
+print("Text length: %d"%len(doc.text))
+
+refdigest = 'bfbb63f7a245c31767585b45014dbd07'
+
+# This normally has 2 results, one of which is a pdf attachment.
+nres = query.execute("population_size_cultural_transmission", stemming=0)
+for doc in query:
+ if doc.mimetype == 'application/pdf':
+ xtrac = rclextract.Extractor(doc)
+ filename = xtrac.idoctofile(doc.ipath, doc.mimetype)
+ f = open(filename, 'rb')
+ data = f.read()
+ f.close()
+ m = hashlib.md5()
+ m.update(data)
+ digest = m.hexdigest()
+ print(digest)
+ if digest != refdigest:
+ print("extract.py: wrong digest for extracted file!")
+
diff --git a/tests/pythonapi/pythonapi.txt b/tests/pythonapi/pythonapi.txt
index b0beb11e..bacfbb95 100644
--- a/tests/pythonapi/pythonapi.txt
+++ b/tests/pythonapi/pythonapi.txt
@@ -53,6 +53,10 @@ User query [title:"été à noël"]
Xapian query: [(10 * (Sete PHRASE 3 Sa PHRASE 3 Snoel))]
nres 1
doc.title: [HTML fields test file: été à noël]
+python extract.py
+Result count: 1 1
+Text length: 3457
+bfbb63f7a245c31767585b45014dbd07
python simple.py
Xapian query: [(huniique:(wqf=11))]
Result count: 2 2