35 lines
845 B
Python
Executable File
35 lines
845 B
Python
Executable File
#!/usr/bin/python3
|
|
'''Show how to extract the document texts from an index which stores them,
|
|
which is the default for Recoll versions with Xapian 1.4 support, after 1.24.
|
|
Would not work with 1.23 and earlier. This also depends on the
|
|
indexStoreDocText configuration variable. The usual RECOLL_CONFDIR can be used
|
|
to determine the index we operate on.
|
|
Use pyloglevel/pylogfilename or redirect stderr to get rid of the log messages.
|
|
'''
|
|
|
|
import sys
|
|
from recoll import recoll
|
|
|
|
|
|
def deb(s):
|
|
print("%s"%s, file=sys.stderr)
|
|
|
|
def usage():
|
|
deb("Usage doctexts.py")
|
|
sys.exit(1)
|
|
|
|
if len(sys.argv) != 1:
|
|
usage()
|
|
|
|
db = recoll.connect()
|
|
q = db.query()
|
|
q.execute("mime:*", fetchtext=True)
|
|
|
|
ndocs = 0
|
|
for doc in q:
|
|
ndocs += 1
|
|
print("TITLE: %s" % doc.title)
|
|
print("TEXT: %s" % doc.get('text'))
|
|
|
|
print("Got %d documents" %ndocs)
|