added python sample for extracting doc texts
This commit is contained in:
parent
9e05c167e9
commit
e802fce01e
34
src/python/samples/doctexts.py
Executable file
34
src/python/samples/doctexts.py
Executable file
@ -0,0 +1,34 @@
|
||||
#!/usr/bin/python3
|
||||
'''Show how to extract the document texts from an index which stores them,
|
||||
which is the default for Recoll versions with Xapian 1.4 support, after 1.24.
|
||||
Would not work with 1.23 and earlier. This also depends on the
|
||||
indexStoreDocText configuration variable. The usual RECOLL_CONFDIR can be used
|
||||
to determine the index we operate on.
|
||||
Use pyloglevel/pylogfilename or redirect stderr to get rid of the log messages.
|
||||
'''
|
||||
|
||||
import sys
|
||||
from recoll import recoll
|
||||
|
||||
|
||||
def deb(s):
|
||||
print("%s"%s, file=sys.stderr)
|
||||
|
||||
def usage():
|
||||
deb("Usage doctexts.py")
|
||||
sys.exit(1)
|
||||
|
||||
if len(sys.argv) != 1:
|
||||
usage()
|
||||
|
||||
db = recoll.connect()
|
||||
q = db.query()
|
||||
q.execute("mime:*", fetchtext=True)
|
||||
|
||||
ndocs = 0
|
||||
for doc in q:
|
||||
ndocs += 1
|
||||
print("TITLE: %s" % doc.title)
|
||||
print("TEXT: %s" % doc.get('text'))
|
||||
|
||||
print("Got %d documents" %ndocs)
|
||||
Loading…
x
Reference in New Issue
Block a user