When storing doc text, always use a metadata entry. Get rid of the code to

store it in the data record. Make storing the default. Add "fetchtext" parameter to getDoc() to fetch and store the text in doc.text. Make this accessible from Python. Misc comments and indents.
2018-01-25 13:20:02 +01:00 · 2018-01-25 13:20:02 +01:00 · 3d4fd3c62e
commit 3d4fd3c62e
parent 2eaefa2b5d
7 changed files with 233 additions and 292 deletions
--- a/src/python/recoll/pyrecoll.cpp
+++ b/src/python/recoll/pyrecoll.cpp
@ -63,7 +63,8 @@ typedef struct {
 static void 
 SearchData_dealloc(recoll_SearchDataObject *self)
 {
-    LOGDEB("SearchData_dealloc. Releasing. Count before: "  << (self->sd.use_count()) << "\n" );
+    LOGDEB("SearchData_dealloc. Releasing. Count before: " <<
           self->sd.use_count() << "\n");
    self->sd.reset();
    Py_TYPE(self)->tp_free((PyObject*)self);
 }
@ -71,7 +72,7 @@ SearchData_dealloc(recoll_SearchDataObject *self)
 static PyObject *
 SearchData_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 {
-    LOGDEB("SearchData_new\n" );
+    LOGDEB("SearchData_new\n");
    recoll_SearchDataObject *self;
    self = (recoll_SearchDataObject *)type->tp_alloc(type, 0);
@ -90,7 +91,7 @@ PyDoc_STRVAR(doc_SearchDataObject,
 static int
 SearchData_init(recoll_SearchDataObject *self, PyObject *args, PyObject *kwargs)
 {
-    LOGDEB("SearchData_init\n" );
+    LOGDEB("SearchData_init\n");
    static const char* kwlist[] = {"type", "stemlang", NULL};
    char *stp = 0;
    char *steml = 0;
@ -180,9 +181,9 @@ static PyObject *
 SearchData_addclause(recoll_SearchDataObject* self, PyObject *args, 
 		     PyObject *kwargs)
 {
-    LOGDEB0("SearchData_addclause\n" );
+    LOGDEB0("SearchData_addclause\n");
    if (!self->sd) {
-	LOGERR("SearchData_addclause: not init??\n" );
+	LOGERR("SearchData_addclause: not init??\n");
        PyErr_SetString(PyExc_AttributeError, "sd");
        return 0;
    }
@ -294,7 +295,7 @@ SearchData_addclause(recoll_SearchDataObject* self, PyObject *args,
 static void 
 Doc_dealloc(recoll_DocObject *self)
 {
-    LOGDEB("Doc_dealloc\n" );
+    LOGDEB("Doc_dealloc\n");
    if (self->doc)
 	the_docs.erase(self->doc);
    deleteZ(self->doc);
@ -304,7 +305,7 @@ Doc_dealloc(recoll_DocObject *self)
 static PyObject *
 Doc_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 {
-    LOGDEB("Doc_new\n" );
+    LOGDEB("Doc_new\n");
    recoll_DocObject *self;
    self = (recoll_DocObject *)type->tp_alloc(type, 0);
@ -318,7 +319,7 @@ Doc_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 static int
 Doc_init(recoll_DocObject *self, PyObject *, PyObject *)
 {
-    LOGDEB("Doc_init\n" );
+    LOGDEB("Doc_init\n");
    if (self->doc)
 	the_docs.erase(self->doc);
    delete self->doc;
@ -340,7 +341,7 @@ PyDoc_STRVAR(doc_Doc_getbinurl,
 static PyObject *
 Doc_getbinurl(recoll_DocObject *self)
 {
-    LOGDEB0("Doc_getbinurl\n" );
+    LOGDEB0("Doc_getbinurl\n");
    if (self->doc == 0 || 
 	the_docs.find(self->doc) == the_docs.end()) {
        PyErr_SetString(PyExc_AttributeError, "doc");
@ -359,7 +360,7 @@ PyDoc_STRVAR(doc_Doc_setbinurl,
 static PyObject *
 Doc_setbinurl(recoll_DocObject *self, PyObject *value)
 {
-    LOGDEB0("Doc_setbinurl\n" );
+    LOGDEB0("Doc_setbinurl\n");
    if (self->doc == 0 || 
 	the_docs.find(self->doc) == the_docs.end()) {
        PyErr_SetString(PyExc_AttributeError, "doc??");
@ -381,7 +382,7 @@ PyDoc_STRVAR(doc_Doc_keys,
 static PyObject *
 Doc_keys(recoll_DocObject *self)
 {
-    LOGDEB0("Doc_keys\n" );
+    LOGDEB0("Doc_keys\n");
    if (self->doc == 0 || 
 	the_docs.find(self->doc) == the_docs.end()) {
        PyErr_SetString(PyExc_AttributeError, "doc");
@ -406,7 +407,7 @@ PyDoc_STRVAR(doc_Doc_items,
 static PyObject *
 Doc_items(recoll_DocObject *self)
 {
-    LOGDEB0("Doc_items\n" );
+    LOGDEB0("Doc_items\n");
    if (self->doc == 0 || 
 	the_docs.find(self->doc) == the_docs.end()) {
        PyErr_SetString(PyExc_AttributeError, "doc");
@ -516,7 +517,7 @@ PyDoc_STRVAR(doc_Doc_get,
 static PyObject *
 Doc_get(recoll_DocObject *self, PyObject *args)
 {
-    LOGDEB1("Doc_get\n" );
+    LOGDEB1("Doc_get\n");
    if (self->doc == 0 || the_docs.find(self->doc) == the_docs.end()) {
        PyErr_SetString(PyExc_AttributeError, "doc??");
 	return 0;
@ -567,7 +568,7 @@ Doc_getattro(recoll_DocObject *self, PyObject *nameobj)
    if (PyUnicode_Check(nameobj)) {
 	PyObject* utf8o = PyUnicode_AsUTF8String(nameobj);
 	if (utf8o == 0) {
-	    LOGERR("Doc_getattro: encoding name to utf8 failed\n" );
+	    LOGERR("Doc_getattro: encoding name to utf8 failed\n");
 	    PyErr_SetString(PyExc_AttributeError, "name??");
 	    Py_RETURN_NONE;
 	}
@ -583,7 +584,7 @@ Doc_getattro(recoll_DocObject *self, PyObject *nameobj)
    string key = rclconfig->fieldQCanon(string(name));
    string value;
    if (idocget(self, key, value)) {
-	LOGDEB1("Doc_getattro: ["  << key << "] -> ["  << value << "]\n");
+	LOGDEB1("Doc_getattro: [" << key << "] -> [" << value << "]\n");
 	// Return a python unicode object
 	return PyUnicode_Decode(value.c_str(), value.size(), "utf-8","replace");
    }
@ -621,7 +622,7 @@ Doc_setattr(recoll_DocObject *self, char *name, PyObject *value)
    PyObject* putf8 = PyUnicode_AsUTF8String(value);
    if (putf8 == 0) {
-	LOGERR("Doc_setmeta: encoding to utf8 failed\n" );
+	LOGERR("Doc_setmeta: encoding to utf8 failed\n");
 	PyErr_SetString(PyExc_AttributeError, "value??");
 	return -1;
    }
@ -709,7 +710,7 @@ Doc_subscript(recoll_DocObject *self, PyObject *key)
    if (PyUnicode_Check(key)) {
        PyObject* utf8o = PyUnicode_AsUTF8String(key);
 	if (utf8o == 0) {
-	    LOGERR("Doc_getitemo: encoding name to utf8 failed\n" );
+	    LOGERR("Doc_getitemo: encoding name to utf8 failed\n");
 	    PyErr_SetString(PyExc_AttributeError, "name??");
 	    Py_RETURN_NONE;
 	}
@ -830,6 +831,7 @@ typedef struct {
    int         ascending;
    int         arraysize; // Default size for fetchmany
    recoll_DbObject* connection;
    bool        fetchtext;
 } recoll_QueryObject;
 PyDoc_STRVAR(doc_Query_close,
@ -838,7 +840,7 @@ PyDoc_STRVAR(doc_Query_close,
 static PyObject *
 Query_close(recoll_QueryObject *self)
 {
-    LOGDEB("Query_close\n" );
+    LOGDEB("Query_close\n");
    if (self->query) {
 	the_queries.erase(self->query);
        deleteZ(self->query);
@ -854,7 +856,7 @@ Query_close(recoll_QueryObject *self)
 static void 
 Query_dealloc(recoll_QueryObject *self)
 {
-    LOGDEB("Query_dealloc\n" );
+    LOGDEB("Query_dealloc\n");
    PyObject *ret = Query_close(self);
    Py_DECREF(ret);
    Py_TYPE(self)->tp_free((PyObject*)self);
@ -863,7 +865,7 @@ Query_dealloc(recoll_QueryObject *self)
 static PyObject *
 Query_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
 {
-    LOGDEB("Query_new\n" );
+    LOGDEB("Query_new\n");
    recoll_QueryObject *self;
    self = (recoll_QueryObject *)type->tp_alloc(type, 0);
@ -876,6 +878,7 @@ Query_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
    self->ascending = 1;
    self->arraysize = 1;
    self->connection = 0;
    self->fetchtext = false;
    return (PyObject *)self;
 }
@ -885,7 +888,7 @@ Query_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
 static int
 Query_init(recoll_QueryObject *self, PyObject *, PyObject *)
 {
-    LOGDEB("Query_init\n" );
+    LOGDEB("Query_init\n");
    if (self->query)
 	the_queries.erase(self->query);
@ -913,7 +916,7 @@ PyDoc_STRVAR(doc_Query_sortby,
 static PyObject *
 Query_sortby(recoll_QueryObject* self, PyObject *args, PyObject *kwargs)
 {
-    LOGDEB0("Query_sortby\n" );
+    LOGDEB0("Query_sortby\n");
    static const char *kwlist[] = {"field", "ascending", NULL};
    char *sfield = 0;
    PyObject *ascobj = 0;
@ -936,7 +939,8 @@ Query_sortby(recoll_QueryObject* self, PyObject *args, PyObject *kwargs)
 }
 PyDoc_STRVAR(doc_Query_execute,
-"execute(query_string, stemming=1|0, stemlang=\"stemming language\")\n"
+"execute(query_string, stemming=1|0, stemlang=\"stemming language\", "
             "fetchtext=False)\n"
 "\n"
 "Starts a search for query_string, a Recoll search language string\n"
 "(mostly Xesam-compatible).\n"
@ -947,20 +951,28 @@ PyDoc_STRVAR(doc_Query_execute,
 static PyObject *
 Query_execute(recoll_QueryObject* self, PyObject *args, PyObject *kwargs)
 {
-    LOGDEB0("Query_execute\n" );
+    LOGDEB0("Query_execute\n");
-    static const char *kwlist[] = {"query_string", "stemming", "stemlang", NULL};
+    static const char *kwlist[] = {"query_string", "stemming", "stemlang",
                                   "fetchtext", NULL};
    char *sutf8 = 0; // needs freeing
    char *sstemlang = 0;
    int dostem = 1;
    PyObject *dostemobj = 0;
-    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "es|Oes:Query_execute", 
+    PyObject *fetchtextobj = 0;
    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "es|OesO:Query_execute", 
 				     (char**)kwlist, "utf-8", &sutf8,
 				     &dostemobj, 
-				     "utf-8", &sstemlang)) {
+				     "utf-8", &sstemlang, &fetchtextobj)) {
 	return 0;
    }
    bool dostem{true};
    if (dostemobj != 0 && !PyObject_IsTrue(dostemobj))
-	dostem = 0;
+	dostem = false;
    if (fetchtextobj != 0 && PyObject_IsTrue(fetchtextobj)) {
 	self->fetchtext = true;
    } else {
        self->fetchtext = false;
    }
    string utf8(sutf8);
    PyMem_Free(sutf8);
@ -970,7 +982,8 @@ Query_execute(recoll_QueryObject* self, PyObject *args, PyObject *kwargs)
 	PyMem_Free(sstemlang);
    }
-    LOGDEB0("Query_execute: ["  << (utf8) << "] dostem "  << (dostem) << " stemlang ["  << (stemlang) << "]\n" );
+    LOGDEB0("Query_execute: [" << utf8 << "] dostem " << dostem <<
            " stemlang [" << stemlang << "]\n");
    if (self->query == 0 || 
 	the_queries.find(self->query) == the_queries.end()) {
@ -999,7 +1012,7 @@ Query_execute(recoll_QueryObject* self, PyObject *args, PyObject *kwargs)
 }
 PyDoc_STRVAR(doc_Query_executesd,
-"executesd(SearchData)\n"
+"executesd(SearchData, fetchtext=False)\n"
 "\n"
 "Starts a search for the query defined by the SearchData object.\n"
 );
@ -1007,12 +1020,13 @@ PyDoc_STRVAR(doc_Query_executesd,
 static PyObject *
 Query_executesd(recoll_QueryObject* self, PyObject *args, PyObject *kwargs)
 {
-    LOGDEB0("Query_executeSD\n" );
+    LOGDEB0("Query_executeSD\n");
-    static const char *kwlist[] = {"searchdata", NULL};
+    static const char *kwlist[] = {"searchdata", "fetchtext", NULL};
    recoll_SearchDataObject *pysd = 0;
-    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!:Query_execute", 
+    PyObject *fetchtextobj = 0;
-				     (char **)kwlist,
+    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!|O:Query_execute", 
-				     &recoll_SearchDataType, &pysd)) {
+				     (char **)kwlist, &recoll_SearchDataType,
                                     &pysd, &fetchtextobj)) {
 	return 0;
    }
    if (pysd == 0 || self->query == 0 || 
@ -1020,6 +1034,11 @@ Query_executesd(recoll_QueryObject* self, PyObject *args, PyObject *kwargs)
        PyErr_SetString(PyExc_AttributeError, "query");
 	return 0;
    }
    if (fetchtextobj != 0 && PyObject_IsTrue(fetchtextobj)) {
 	self->fetchtext = true;
    } else {
        self->fetchtext = false;
    }
    self->query->setSortBy(*self->sortfield, self->ascending);
    self->query->setQuery(pysd->sd);
    int cnt = self->query->getResCnt();
@ -1049,7 +1068,7 @@ PyDoc_STRVAR(doc_Query_fetchone,
 static PyObject *
 Query_fetchone(PyObject *_self)
 {
-    LOGDEB0("Query_fetchone/next\n" );
+    LOGDEB0("Query_fetchone/next\n");
    recoll_QueryObject* self = (recoll_QueryObject*)_self;
    if (self->query == 0 || 
@ -1072,7 +1091,7 @@ Query_fetchone(PyObject *_self)
    // We used to check against rowcount here, but this was wrong:
    // xapian result count estimate are sometimes wrong, we must go on
    // fetching until we fail
-    if (!self->query->getDoc(self->next, *result->doc)) {
+    if (!self->query->getDoc(self->next, *result->doc, self->fetchtext)) {
        PyErr_SetNone(PyExc_StopIteration);
 	return 0;
    }
@ -1090,7 +1109,7 @@ PyDoc_STRVAR(doc_Query_fetchmany,
 static PyObject *
 Query_fetchmany(recoll_QueryObject* self, PyObject *args, PyObject *kwargs)
 {
-    LOGDEB0("Query_fetchmany\n" );
+    LOGDEB0("Query_fetchmany\n");
    static const char *kwlist[] = {"size", NULL};
    int size = 0;
@ -1120,7 +1139,7 @@ Query_fetchmany(recoll_QueryObject* self, PyObject *args, PyObject *kwargs)
            PyErr_SetString(PyExc_EnvironmentError, "doc create failed");
            return 0;
        }
-        if (!self->query->getDoc(self->next, *docobj->doc)) {
+        if (!self->query->getDoc(self->next, *docobj->doc, self->fetchtext)) {
            PyErr_SetNone(PyExc_StopIteration);
            break;
        }
@ -1140,7 +1159,7 @@ PyDoc_STRVAR(doc_Query_scroll,
 static PyObject *
 Query_scroll(recoll_QueryObject* self, PyObject *args, PyObject *kwargs)
 {
-    LOGDEB0("Query_scroll\n" );
+    LOGDEB0("Query_scroll\n");
    static const char *kwlist[] = {"position", "mode", NULL};
    int pos = 0;
    char *smode = 0;
@ -1227,7 +1246,7 @@ public:
 static PyObject *
 Query_highlight(recoll_QueryObject* self, PyObject *args, PyObject *kwargs)
 {
-    LOGDEB0("Query_highlight\n" );
+    LOGDEB0("Query_highlight\n");
    static const char *kwlist[] = {"text", "ishtml", "eolbr", "methods", NULL};
    char *sutf8 = 0; // needs freeing
    int ishtml = 0;
@ -1249,7 +1268,7 @@ Query_highlight(recoll_QueryObject* self, PyObject *args, PyObject *kwargs)
 	ishtml = 1;
    if (eolbrobj && !PyObject_IsTrue(eolbrobj))
 	eolbr = 0;
-    LOGDEB0("Query_highlight: ishtml "  << (ishtml) << "\n" );
+    LOGDEB0("Query_highlight: ishtml " << ishtml << "\n");
    if (self->query == 0 || 
 	the_queries.find(self->query) == the_queries.end()) {
@ -1287,7 +1306,7 @@ PyDoc_STRVAR(doc_Query_makedocabstract,
 static PyObject *
 Query_makedocabstract(recoll_QueryObject* self, PyObject *args,PyObject *kwargs)
 {
-    LOGDEB0("Query_makeDocAbstract\n" );
+    LOGDEB0("Query_makeDocAbstract\n");
    static const char *kwlist[] = {"doc", "methods", NULL};
    recoll_DocObject *pydoc = 0;
    PyObject *hlmethods = 0;
@ -1299,12 +1318,12 @@ Query_makedocabstract(recoll_QueryObject* self, PyObject *args,PyObject *kwargs)
    }
    if (pydoc->doc == 0 || the_docs.find(pydoc->doc) == the_docs.end()) {
-	LOGERR("Query_makeDocAbstract: doc not found "  << (pydoc->doc) << "\n" );
+	LOGERR("Query_makeDocAbstract: doc not found " << pydoc->doc << "\n");
        PyErr_SetString(PyExc_AttributeError, "doc");
        return 0;
    }
    if (the_queries.find(self->query) == the_queries.end()) {
-	LOGERR("Query_makeDocAbstract: query not found "  << (self->query) << "\n" );
+	LOGERR("Query_makeDocAbstract: query not found " << self->query << "\n");
        PyErr_SetString(PyExc_AttributeError, "query");
        return 0;
    }
@ -1357,7 +1376,7 @@ PyDoc_STRVAR(doc_Query_getxquery,
 static PyObject *
 Query_getxquery(recoll_QueryObject* self, PyObject *, PyObject *)
 {
-    LOGDEB0("Query_getxquery self->query "  << (self->query) << "\n" );
+    LOGDEB0("Query_getxquery self->query " << self->query << "\n");
    if (self->query == 0 || 
 	the_queries.find(self->query) == the_queries.end()) {
@ -1385,7 +1404,7 @@ PyDoc_STRVAR(doc_Query_getgroups,
 static PyObject *
 Query_getgroups(recoll_QueryObject* self, PyObject *, PyObject *)
 {
-    LOGDEB0("Query_getgroups\n" );
+    LOGDEB0("Query_getgroups\n");
    if (self->query == 0 || 
 	the_queries.find(self->query) == the_queries.end()) {
@ -1530,7 +1549,7 @@ typedef struct recoll_DbObject {
 static PyObject *
 Db_close(recoll_DbObject *self)
 {
-    LOGDEB("Db_close. self "  << (self) << "\n" );
+    LOGDEB("Db_close. self " << self << "\n");
    if (self->db) {
 	the_dbs.erase(self->db);
        delete self->db;
@ -1542,7 +1561,7 @@ Db_close(recoll_DbObject *self)
 static void 
 Db_dealloc(recoll_DbObject *self)
 {
-    LOGDEB("Db_dealloc\n" );
+    LOGDEB("Db_dealloc\n");
    PyObject *ret = Db_close(self);
    Py_DECREF(ret);
    Py_TYPE(self)->tp_free((PyObject*)self);
@ -1551,7 +1570,7 @@ Db_dealloc(recoll_DbObject *self)
 static PyObject *
 Db_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 {
-    LOGDEB2("Db_new\n" );
+    LOGDEB2("Db_new\n");
    recoll_DbObject *self;
    self = (recoll_DbObject *)type->tp_alloc(type, 0);
@ -1583,7 +1602,7 @@ Db_init(recoll_DbObject *self, PyObject *args, PyObject *kwargs)
    } else {
 	rclconfig = recollinit(0, 0, reason, 0);
    }
-    LOGDEB("Db_init\n" );
+    LOGDEB("Db_init\n");
    if (rclconfig == 0) {
 	PyErr_SetString(PyExc_EnvironmentError, reason.c_str());
@ -1599,7 +1618,7 @@ Db_init(recoll_DbObject *self, PyObject *args, PyObject *kwargs)
    delete self->db;
    self->db = new Rcl::Db(rclconfig);
    if (!self->db->open(writable ? Rcl::Db::DbUpd : Rcl::Db::DbRO)) {
-	LOGERR("Db_init: db open error\n" );
+	LOGERR("Db_init: db open error\n");
 	PyErr_SetString(PyExc_EnvironmentError, "Can't open index");
        return -1;
    }
@ -1642,9 +1661,9 @@ Db_init(recoll_DbObject *self, PyObject *args, PyObject *kwargs)
 static PyObject *
 Db_query(recoll_DbObject* self)
 {
-    LOGDEB("Db_query\n" );
+    LOGDEB("Db_query\n");
    if (self->db == 0 || the_dbs.find(self->db) == the_dbs.end()) {
-	LOGERR("Db_query: db not found "  << (self->db) << "\n" );
+	LOGERR("Db_query: db not found " << self->db << "\n");
        PyErr_SetString(PyExc_AttributeError, "db");
        return 0;
    }
@ -1663,18 +1682,19 @@ Db_query(recoll_DbObject* self)
 static PyObject *
 Db_setAbstractParams(recoll_DbObject *self, PyObject *args, PyObject *kwargs)
 {
-    LOGDEB0("Db_setAbstractParams\n" );
+    LOGDEB0("Db_setAbstractParams\n");
    static const char *kwlist[] = {"maxchars", "contextwords", NULL};
    int ctxwords = -1, maxchars = -1;
    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii", (char**)kwlist,
 				     &maxchars, &ctxwords))
 	return 0;
    if (self->db == 0 || the_dbs.find(self->db) == the_dbs.end()) {
-	LOGERR("Db_query: db not found "  << (self->db) << "\n" );
+	LOGERR("Db_query: db not found " << self->db << "\n");
        PyErr_SetString(PyExc_AttributeError, "db id not found");
        return 0;
    }
-    LOGDEB0("Db_setAbstractParams: mxchrs "  << (maxchars) << ", ctxwrds "  << (ctxwords) << "\n" );
+    LOGDEB0("Db_setAbstractParams: mxchrs " << maxchars << ", ctxwrds " <<
            ctxwords << "\n");
    self->db->setAbstractParams(-1, maxchars, ctxwords);
    Py_RETURN_NONE;
 }
@ -1682,7 +1702,7 @@ Db_setAbstractParams(recoll_DbObject *self, PyObject *args, PyObject *kwargs)
 static PyObject *
 Db_makeDocAbstract(recoll_DbObject* self, PyObject *args)
 {
-    LOGDEB0("Db_makeDocAbstract\n" );
+    LOGDEB0("Db_makeDocAbstract\n");
    recoll_DocObject *pydoc = 0;
    recoll_QueryObject *pyquery = 0;
    if (!PyArg_ParseTuple(args, "O!O!:Db_makeDocAbstract",
@ -1691,18 +1711,18 @@ Db_makeDocAbstract(recoll_DbObject* self, PyObject *args)
 	return 0;
    }
    if (self->db == 0 || the_dbs.find(self->db) == the_dbs.end()) {
-	LOGERR("Db_makeDocAbstract: db not found "  << (self->db) << "\n" );
+	LOGERR("Db_makeDocAbstract: db not found " << self->db << "\n");
        PyErr_SetString(PyExc_AttributeError, "db");
        return 0;
    }
    if (pydoc->doc == 0 || the_docs.find(pydoc->doc) == the_docs.end()) {
-	LOGERR("Db_makeDocAbstract: doc not found "  << (pydoc->doc) << "\n" );
+	LOGERR("Db_makeDocAbstract: doc not found " << pydoc->doc << "\n");
        PyErr_SetString(PyExc_AttributeError, "doc");
        return 0;
    }
    if (pyquery->query == 0 || 
 	the_queries.find(pyquery->query) == the_queries.end()) {
-	LOGERR("Db_makeDocAbstract: query not found "  << (pyquery->query) << "\n" );
+	LOGERR("Db_makeDocAbstract: query not found " << pyquery->query << "\n");
        PyErr_SetString(PyExc_AttributeError, "query");
        return 0;
    }
@ -1727,7 +1747,7 @@ PyDoc_STRVAR(doc_Db_termMatch,
 static PyObject *
 Db_termMatch(recoll_DbObject* self, PyObject *args, PyObject *kwargs)
 {
-    LOGDEB0("Db_termMatch\n" );
+    LOGDEB0("Db_termMatch\n");
    static const char *kwlist[] = {"type", "expr", "field", "maxlen", 
 				   "casesens", "diacsens", "lang", NULL};
    char *tp = 0;
@ -1750,7 +1770,7 @@ Db_termMatch(recoll_DbObject* self, PyObject *args, PyObject *kwargs)
 	return 0;
    if (self->db == 0 || the_dbs.find(self->db) == the_dbs.end()) {
-	LOGERR("Db_termMatch: db not found "  << (self->db) << "\n" );
+	LOGERR("Db_termMatch: db not found " << self->db << "\n");
        PyErr_SetString(PyExc_AttributeError, "db");
 	goto out;
    }
@ -1775,7 +1795,7 @@ Db_termMatch(recoll_DbObject* self, PyObject *args, PyObject *kwargs)
    if (!self->db->termMatch(typ_sens, lang ? lang : "english", 
 			     expr, result, maxlen, field ? field : "")) {
-	LOGERR("Db_termMatch: db termMatch error\n" );
+	LOGERR("Db_termMatch: db termMatch error\n");
        PyErr_SetString(PyExc_AttributeError, "rcldb termMatch error");
 	goto out;
    }
@ -1796,7 +1816,7 @@ out:
 static PyObject *
 Db_needUpdate(recoll_DbObject* self, PyObject *args, PyObject *kwds)
 {
-    LOGDEB0("Db_needUpdate\n" );
+    LOGDEB0("Db_needUpdate\n");
    char *udi = 0; // needs freeing
    char *sig = 0; // needs freeing
    if (!PyArg_ParseTuple(args, "eses:Db_needUpdate", 
@ -1804,7 +1824,7 @@ Db_needUpdate(recoll_DbObject* self, PyObject *args, PyObject *kwds)
 	return 0;
    }
    if (self->db == 0 || the_dbs.find(self->db) == the_dbs.end()) {
-	LOGERR("Db_needUpdate: db not found "  << (self->db) << "\n" );
+	LOGERR("Db_needUpdate: db not found " << self->db << "\n");
        PyErr_SetString(PyExc_AttributeError, "db");
 	PyMem_Free(udi);
 	PyMem_Free(sig);
@ -1819,13 +1839,13 @@ Db_needUpdate(recoll_DbObject* self, PyObject *args, PyObject *kwds)
 static PyObject *
 Db_delete(recoll_DbObject* self, PyObject *args, PyObject *kwds)
 {
-    LOGDEB0("Db_delete\n" );
+    LOGDEB0("Db_delete\n");
    char *udi = 0; // needs freeing
    if (!PyArg_ParseTuple(args, "es:Db_delete", "utf-8", &udi)) {
 	return 0;
    }
    if (self->db == 0 || the_dbs.find(self->db) == the_dbs.end()) {
-	LOGERR("Db_delete: db not found "  << (self->db) << "\n" );
+	LOGERR("Db_delete: db not found " << self->db << "\n");
        PyErr_SetString(PyExc_AttributeError, "db");
 	PyMem_Free(udi);
        return 0;
@ -1838,9 +1858,9 @@ Db_delete(recoll_DbObject* self, PyObject *args, PyObject *kwds)
 static PyObject *
 Db_purge(recoll_DbObject* self)
 {
-    LOGDEB0("Db_purge\n" );
+    LOGDEB0("Db_purge\n");
    if (self->db == 0 || the_dbs.find(self->db) == the_dbs.end()) {
-	LOGERR("Db_purge: db not found "  << (self->db) << "\n" );
+	LOGERR("Db_purge: db not found " << self->db << "\n");
        PyErr_SetString(PyExc_AttributeError, "db");
        return 0;
    }
@ -1851,7 +1871,7 @@ Db_purge(recoll_DbObject* self)
 static PyObject *
 Db_addOrUpdate(recoll_DbObject* self, PyObject *args, PyObject *)
 {
-    LOGDEB0("Db_addOrUpdate\n" );
+    LOGDEB0("Db_addOrUpdate\n");
    char *sudi = 0; // needs freeing
    char *sparent_udi = 0; // needs freeing
    recoll_DocObject *pydoc;
@ -1867,17 +1887,17 @@ Db_addOrUpdate(recoll_DbObject* self, PyObject *args, PyObject *)
    PyMem_Free(sparent_udi);
    if (self->db == 0 || the_dbs.find(self->db) == the_dbs.end()) {
-	LOGERR("Db_addOrUpdate: db not found "  << (self->db) << "\n" );
+	LOGERR("Db_addOrUpdate: db not found " << self->db << "\n");
        PyErr_SetString(PyExc_AttributeError, "db");
        return 0;
    }
    if (pydoc->doc == 0 || the_docs.find(pydoc->doc) == the_docs.end()) {
-	LOGERR("Db_addOrUpdate: doc not found "  << (pydoc->doc) << "\n" );
+	LOGERR("Db_addOrUpdate: doc not found " << pydoc->doc << "\n");
        PyErr_SetString(PyExc_AttributeError, "doc");
        return 0;
    }
    if (!self->db->addOrUpdate(udi, parent_udi, *pydoc->doc)) {
-	LOGERR("Db_addOrUpdate: rcldb error\n" );
+	LOGERR("Db_addOrUpdate: rcldb error\n");
        PyErr_SetString(PyExc_AttributeError, "rcldb error");
        return 0;
    }
@ -1992,7 +2012,7 @@ static PyTypeObject recoll_DbType = {
 static PyObject *
 recoll_connect(PyObject *self, PyObject *args, PyObject *kwargs)
 {
-    LOGDEB2("recoll_connect\n" );
+    LOGDEB2("recoll_connect\n");
    recoll_DbObject *db = (recoll_DbObject *)
 	PyObject_Call((PyObject *)&recoll_DbType, args, kwargs);
    return (PyObject *)db;
--- a/src/rcldb/rclabsfromtext.cpp
+++ b/src/rcldb/rclabsfromtext.cpp
@ -329,41 +329,10 @@ int Query::Native::abstractFromText(
    )
 {
    Xapian::Database& xrdb(ndb->xrdb);
    Xapian::Document xdoc;
-    string reason;
+    string rawtext;
-    XAPTRY(xdoc = xrdb.get_document(docid), xrdb, reason);
+    if (!ndb->getRawText(docid, rawtext)) {
-    if (!reason.empty()) {
+        LOGDEB0("abstractFromText: can't fetch text\n");
        LOGERR("abstractFromText: could not get doc: " << reason << endl);
        return ABSRES_ERROR;
    }
    string rawtext, data;
 #ifdef RAWTEXT_IN_DATA
    XAPTRY(data = xdoc.get_data(), xrdb, reason);
    if (!reason.empty()) {
        LOGERR("abstractFromText: could not get data: " << reason << endl);
        return ABSRES_ERROR;
    }
    Doc doc;
    if (ndb->dbDataToRclDoc(docid, data, doc)) {
        rawtext = doc.meta["RAWTEXT"];
    }
 #endif
 #ifdef RAWTEXT_IN_METADATA
    XAPTRY(rawtext = ndb->xrdb.get_metadata(ndb->rawtextMetaKey(docid)),
           ndb->xrdb, reason);
    if (!reason.empty()) {
        LOGERR("abstractFromText: could not get value: " << reason << endl);
        return ABSRES_ERROR;
    }
    ZLibUtBuf cbuf;
    inflateToBuf(rawtext.c_str(), rawtext.size(), cbuf);
    rawtext.assign(cbuf.getBuf(), cbuf.getCnt());
 #endif
    if (rawtext.empty()) {
        LOGDEB0("abstractFromText: no text\n");
        return ABSRES_ERROR;
    }
--- a/src/rcldb/rcldb.cpp
+++ b/src/rcldb/rcldb.cpp
@ -202,12 +202,8 @@ void *DbUpdWorker(void* vdbp)
 	switch (tsk->op) {
 	case DbUpdTask::AddOrUpdate:
 	    LOGDEB("DbUpdWorker: got add/update task, ql " << qsz << "\n");
-	    status = ndbp->addOrUpdateWrite(tsk->udi, tsk->uniterm, 
+	    status = ndbp->addOrUpdateWrite(
-					    tsk->doc, tsk->txtlen
+                tsk->udi, tsk->uniterm, tsk->doc, tsk->txtlen, tsk->rawztext);
 #ifdef RAWTEXT_IN_METADATA
                          , tsk->rawztext
 #endif
                );
 	    break;
 	case DbUpdTask::Delete:
 	    LOGDEB("DbUpdWorker: got delete task, ql " << qsz << "\n");
@ -267,9 +263,12 @@ void Db::Native::openWrite(const string& dir, Db::OpenMode mode)
        // to force using Chert. No sense in doing this if we are
        // storing the text anyway.
 #if XAPIAN_AT_LEAST(1,3,0) && XAPIAN_HAS_CHERT_BACKEND
-        // New Xapian with Chert support. Use Chert and the old
+        // Xapian with Glass and Chert support. If storedoctext is
-        // abstract generation method, except if told otherwise by the
+        // specified in the configuration, use the default backend
-        // configuration.
+        // (Glass), else force Chert. There might be reasons why
        // someone would want to use Chert and store text anyway, but
        // it's an exotic case, and things are complicated enough
        // already.
        if (o_index_storedoctext) {
            xwdb = Xapian::WritableDatabase(dir, action);
            m_storetext = true;
@ -286,15 +285,13 @@ void Db::Native::openWrite(const string& dir, Db::OpenMode mode)
            xwdb = Xapian::WritableDatabase(stub, action);
            m_storetext = false;
        }
-#elif ! XAPIAN_AT_LEAST(1,3,0)
+#elif (! XAPIAN_AT_LEAST(1,3,0)) || XAPIAN_AT_LEAST(1,5,0)
-        // Old Xapian. Use the default index format and let the user
+        // Old Xapian (chert only) or newer (no chert). Use the
-        // decide of the abstract generation method.
+        // default index backend and let the user decide of the
        // abstract generation method. The configured default is to
        // store the text.
        xwdb = Xapian::WritableDatabase(dir, action);
        m_storetext = o_index_storedoctext;
 #else
        // Newer Xapian with no Chert support. Store the text.
        xwdb = Xapian::WritableDatabase(dir, action);
        m_storetext = true;
 #endif
        // Set the storetext value inside the index descriptor (new
        // with recoll 1.24, maybe we'll have other stuff to store in
@ -533,7 +530,7 @@ Xapian::docid Db::Native::getDoc(const string& udi, int idxi,
 // Turn data record from db into document fields
 bool Db::Native::dbDataToRclDoc(Xapian::docid docid, std::string &data, 
-				Doc &doc)
+				Doc &doc, bool fetchtext)
 {
    LOGDEB2("Db::dbDataToRclDoc: data:\n" << data << "\n");
    ConfSimple parms(data);
@ -593,6 +590,9 @@ bool Db::Native::dbDataToRclDoc(Xapian::docid docid, std::string &data,
    }
    doc.meta[Doc::keyurl] = doc.url;
    doc.meta[Doc::keymt] = doc.dmtime.empty() ? doc.fmtime : doc.dmtime;
    if (fetchtext) {
        getRawText(docid, doc.text);
    }
    return true;
 }
@ -672,16 +672,33 @@ int Db::Native::getPageNumberForPosition(const vector<int>& pbreaks, int pos)
    return int(it - pbreaks.begin() + 1);
 }
 bool Db::Native::getRawText(Xapian::docid docid, string& rawtext)
 {
    if (!m_storetext) {
        LOGDEB("Db::Native::getRawText: document text not stored in index\n");
        return false;
    }
    string reason;
    XAPTRY(rawtext = xrdb.get_metadata(rawtextMetaKey(docid)), xrdb, reason);
    if (!reason.empty()) {
        LOGERR("Rcl::Db::getRawText: could not get value: " << reason << endl);
        return false;
    }
    if (rawtext.empty()) {
        return true;
    }
    ZLibUtBuf cbuf;
    inflateToBuf(rawtext.c_str(), rawtext.size(), cbuf);
    rawtext.assign(cbuf.getBuf(), cbuf.getCnt());
    return true;
 }
 // Note: we're passed a Xapian::Document* because Xapian
 // reference-counting is not mt-safe. We take ownership and need
 // to delete it before returning.
-bool Db::Native::addOrUpdateWrite(const string& udi, const string& uniterm, 
+bool Db::Native::addOrUpdateWrite(
-				  Xapian::Document *newdocument_ptr, 
+    const string& udi, const string& uniterm, Xapian::Document *newdocument_ptr, 
-                                  size_t textlen
+    size_t textlen, const string& rawztext)
 #ifdef RAWTEXT_IN_METADATA
                          , const string& rawztext
 #endif
    )
 {
 #ifdef IDX_THREADS
    Chrono chron;
@ -738,7 +755,6 @@ bool Db::Native::addOrUpdateWrite(const string& udi, const string& uniterm,
 	}
    }
 #ifdef RAWTEXT_IN_METADATA
    XAPTRY(xwdb.set_metadata(rawtextMetaKey(did), rawztext),
           xwdb, m_rcldb->m_reason);
    if (!m_rcldb->m_reason.empty()) {
@ -746,7 +762,6 @@ bool Db::Native::addOrUpdateWrite(const string& udi, const string& uniterm,
               m_rcldb->m_reason << "\n");
        // This only affects snippets, so let's say not fatal
    }
 #endif
    // Test if we're over the flush threshold (limit memory usage):
    bool ret = m_rcldb->maybeflush(textlen);
@ -1436,9 +1451,7 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
    // Udi unique term: this is used for file existence/uptodate
    // checks, and unique id for the replace_document() call.
    string uniterm = make_uniterm(udi);
-#if defined(RAWTEXT_IN_METADATA)
+    string rawztext; // Doc compressed text
        string rawztext; // Doc compressed text
 #endif
    if (doc.onlyxattr) {
 	// Only updating an existing doc with new extended attributes
@ -1553,13 +1566,11 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
 	if (!splitter.text_to_words(doc.text)) {
 	    LOGDEB("Db::addOrUpdate: split failed for main text\n");
        } else {
 #if defined(RAWTEXT_IN_METADATA)
            if (m_ndb->m_storetext) {
                ZLibUtBuf buf;
                deflateToBuf(doc.text.c_str(), doc.text.size(), buf);
                rawztext.assign(buf.getBuf(), buf.getCnt());
            }
 #endif
        }
 #ifdef TEXTSPLIT_STATS
@ -1771,23 +1782,14 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
 	    newdocument.add_boolean_term(wrap_prefix("XM") + *md5);
 	}
 #ifdef RAWTEXT_IN_DATA
        if (m_ndb->m_storetext) {
            RECORD_APPEND(record, string("RAWTEXT"),
                          neutchars(doc.text, cstr_nc));
        }
 #endif
 	LOGDEB0("Rcl::Db::add: new doc record:\n" << record << "\n");
 	newdocument.set_data(record);
    }
 #ifdef IDX_THREADS
    if (m_ndb->m_havewriteq) {
-	DbUpdTask *tp = new DbUpdTask(DbUpdTask::AddOrUpdate, udi, uniterm, 
+	DbUpdTask *tp = new DbUpdTask(
-				      newdocument_ptr, doc.text.length()
+            DbUpdTask::AddOrUpdate, udi, uniterm, newdocument_ptr,
-#ifdef RAWTEXT_IN_METADATA
+            doc.text.length(), rawztext);
                                      , rawztext
 #endif
            );
 	if (!m_ndb->m_wqueue.put(tp)) {
 	    LOGERR("Db::addOrUpdate:Cant queue task\n");
            delete newdocument_ptr;
@ -1799,11 +1801,7 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
 #endif
    return m_ndb->addOrUpdateWrite(udi, uniterm, newdocument_ptr,
-				   doc.text.length()
+				   doc.text.length(), rawztext);
 #ifdef RAWTEXT_IN_METADATA
                                   , rawztext
 #endif
        );
 }
 bool Db::Native::docToXdocXattrOnly(TextSplitDb *splitter, const string &udi, 
@ -2230,11 +2228,7 @@ bool Db::purgeFile(const string &udi, bool *existed)
    if (m_ndb->m_havewriteq) {
        string rztxt;
 	DbUpdTask *tp = new DbUpdTask(DbUpdTask::Delete, udi, uniterm, 
-				      0, (size_t)-1,
+				      0, (size_t)-1, rztxt);
 #if defined(RAWTEXT_IN_METADATA)
                                      rztxt
 #endif
            );
 	if (!m_ndb->m_wqueue.put(tp)) {
 	    LOGERR("Db::purgeFile:Cant queue task\n");
 	    return false;
@ -2262,11 +2256,7 @@ bool Db::purgeOrphans(const string &udi)
    if (m_ndb->m_havewriteq) {
        string rztxt;
 	DbUpdTask *tp = new DbUpdTask(DbUpdTask::PurgeOrphans, udi, uniterm, 
-				      0, (size_t)-1,
+				      0, (size_t)-1, rztxt);
 #ifdef RAWTEXT_IN_METADATA
                                      rztxt
 #endif
            );
 	if (!m_ndb->m_wqueue.put(tp)) {
 	    LOGERR("Db::purgeFile:Cant queue task\n");
 	    return false;
--- a/src/rcldb/rcldb.h
+++ b/src/rcldb/rcldb.h
@ -63,8 +63,8 @@ namespace Rcl {
 // is incompatible anyway.
 enum value_slot {
    // Omega-compatible values:
-    VALUE_LASTMOD = 0,	// 4 byte big endian value - seconds since 1970.
+    VALUE_LASTMOD = 0,  // 4 byte big endian value - seconds since 1970.
-    VALUE_MD5 = 1,	// 16 byte MD5 checksum of original document.
+    VALUE_MD5 = 1,      // 16 byte MD5 checksum of original document.
    VALUE_SIZE = 2,     // sortable_serialise(<file size in bytes>)
    ////////// Recoll only:
@ -80,24 +80,16 @@ class Query;
 class TermMatchEntry {
 public:
    TermMatchEntry() 
-	: wcf(0) 
+        : wcf(0) {}
    {
    }
    TermMatchEntry(const string& t, int f, int d)
-	: term(t), wcf(f), docs(d) 
+        : term(t), wcf(f), docs(d) {}
    {
    }
    TermMatchEntry(const string& t)
-    : term(t), wcf(0) 
+        : term(t), wcf(0) {}
-    {
+    bool operator==(const TermMatchEntry &o) const { 
        return term == o.term;
    }
-    bool operator==(const TermMatchEntry &o) const 
+    bool operator<(const TermMatchEntry &o) const { 
-    { 
+        return term < o.term;
 	return term == o.term;
    }
    bool operator<(const TermMatchEntry &o) const 
    { 
 	return term < o.term;
    }
    string term;
@ -108,13 +100,11 @@ public:
 /** Term match result list header: statistics and global info */
 class TermMatchResult {
 public:
-    TermMatchResult() 
+    TermMatchResult() {
-    {
+        clear();
 	clear();
    }
-    void clear() 
+    void clear() {
-    {
+        entries.clear(); 
 	entries.clear(); 
    }
    // Term expansion
    vector<TermMatchEntry> entries;
@ -125,7 +115,7 @@ public:
 class DbStats {
 public:
    DbStats()
-	:dbdoccount(0), dbavgdoclen(0), mindoclen(0), maxdoclen(0) { }
+        :dbdoccount(0), dbavgdoclen(0), mindoclen(0), maxdoclen(0) {}
    // Index-wide stats
    unsigned int dbdoccount;
    double       dbavgdoclen;
@ -137,27 +127,27 @@ public:
 inline bool has_prefix(const string& trm)
 {
    if (o_index_stripchars) {
-	return !trm.empty() && 'A' <= trm[0] && trm[0] <= 'Z';
+        return !trm.empty() && 'A' <= trm[0] && trm[0] <= 'Z';
    } else {
-	return !trm.empty() && trm[0] == ':';
+        return !trm.empty() && trm[0] == ':';
    }
 }
 inline string strip_prefix(const string& trm)
 {
    if (trm.empty())
-	return trm;
+        return trm;
    string::size_type st = 0;
    if (o_index_stripchars) {
-	st = trm.find_first_not_of("ABCDEFIJKLMNOPQRSTUVWXYZ");
+        st = trm.find_first_not_of("ABCDEFIJKLMNOPQRSTUVWXYZ");
-	if (st == string::npos)
+        if (st == string::npos)
-	    return string();
+            return string();
    } else {
-	if (has_prefix(trm)) {
+        if (has_prefix(trm)) {
-	    st = trm.find_last_of(":") + 1;
+            st = trm.find_last_of(":") + 1;
-	} else {
+        } else {
-	    return trm;
+            return trm;
-	}
+        }
    }
    return trm.substr(st);
 }
@ -165,9 +155,9 @@ inline string strip_prefix(const string& trm)
 inline string wrap_prefix(const string& pfx) 
 {
    if (o_index_stripchars) {
-	return pfx;
+        return pfx;
    } else {
-	return cstr_colon + pfx + cstr_colon;
+        return cstr_colon + pfx + cstr_colon;
    }
 }
@ -175,7 +165,7 @@ inline string wrap_prefix(const string& pfx)
 * Wrapper class for the native database.
 */
 class Db {
- public:
+public:
    // A place for things we don't want visible here.
    class Native;
    friend class Native;
@ -203,13 +193,11 @@ class Db {
     * special chars... 
     * @param with_aspell test for use with aspell, else for xapian speller
     */
-    static bool isSpellingCandidate(const string& term, bool with_aspell=true)
+    static bool isSpellingCandidate(const string& term, bool with_aspell=true) {
-    {
+        if (term.empty() || term.length() > 50 || has_prefix(term))
-	if (term.empty() || term.length() > 50)
+            return false;
-	    return false;
+
-	if (has_prefix(term))
+        Utf8Iter u8i(term);
 	    return false;
 	Utf8Iter u8i(term);
        if (with_aspell) {
            // If spelling with aspell, neither katakana nor other cjk
            // scripts are candidates
@ -232,10 +220,10 @@ class Db {
            return false;
 #endif
        }
-	if (term.find_first_of(" !\"#$%&()*+,-./0123456789:;<=>?@[\\]^_`{|}~") 
+        if (term.find_first_of(" !\"#$%&()*+,-./0123456789:;<=>?@[\\]^_`{|}~") 
-	    != string::npos)
+            != string::npos)
-	    return false;
+            return false;
-	return true;
+        return true;
    }
    /** Return spelling suggestion */
@ -283,7 +271,7 @@ class Db {
    void setExistingFlags(const string& udi, unsigned int docid);
    /** Indicate if we are doing a systematic reindex. This complements
-	needUpdate() return */
+        needUpdate() return */
    bool inFullReset() {return o_inPlaceReset || m_mode == DbTrunc;}
    /** Add or update document identified by unique identifier.
@ -305,8 +293,8 @@ class Db {
     *   much as possible depending on the document type. 
     *   ** doc will be modified in a destructive way **
     */
-    bool addOrUpdate(const string &udi, 
+    bool addOrUpdate(const string &udi, const string &parent_udi, Doc &doc);
-		     const string &parent_udi, Doc &doc);
+
 #ifdef IDX_THREADS
    void waitUpdIdle();
 #endif
@ -314,8 +302,8 @@ class Db {
    /** Delete document(s) for given UDI, including subdocs */
    bool purgeFile(const string &udi, bool *existed = 0);
    /** Delete subdocs with an out of date sig. We do this to purge
-	obsolete subdocs during a partial update where no general purge
+        obsolete subdocs during a partial update where no general purge
-	will be done */
+        will be done */
    bool purgeOrphans(const string &udi);
    /** Remove documents that no longer exist in the file system. This
@ -377,20 +365,19 @@ class Db {
     *        in the TermMatchResult header
     */
    enum MatchType {ET_NONE=0, ET_WILD=1, ET_REGEXP=2, ET_STEM=3, 
-		    ET_DIACSENS=8, ET_CASESENS=16, ET_SYNEXP=32, ET_PATHELT=64};
+                    ET_DIACSENS=8, ET_CASESENS=16, ET_SYNEXP=32, ET_PATHELT=64};
-    int matchTypeTp(int tp) 
+    int matchTypeTp(int tp) {
-    {
+        return tp & 7;
 	return tp & 7;
    }
    bool termMatch(int typ_sens, const string &lang, const string &term, 
-		   TermMatchResult& result, int max = -1,
+                   TermMatchResult& result, int max = -1,
-		   const string& field = "", vector<string> *multiwords = 0);
+                   const string& field = "", vector<string> *multiwords = 0);
    bool dbStats(DbStats& stats, bool listFailed);
    /** Return min and max years for doc mod times in db */
    bool maxYearSpan(int *minyear, int *maxyear);
    /** Return all mime types in index. This can be different from the
-	ones defined in the config because of 'file' command
+        ones defined in the config because of 'file' command
-	usage. Inserts the types at the end of the parameter */
+        usage. Inserts the types at the end of the parameter */
    bool getAllDbMimeTypes(std::vector<std::string>&);
    /** Wildcard expansion specific to file names. Internal/sdata use only */
@ -398,13 +385,11 @@ class Db {
    /** Set parameters for synthetic abstract generation */
    void setAbstractParams(int idxTrunc, int synthLen, int syntCtxLen);
-    int getAbsCtxLen() const 
+    int getAbsCtxLen() const {
-    {
+        return m_synthAbsWordCtxLen;
 	return m_synthAbsWordCtxLen;
    }
-    int getAbsLen() const
+    int getAbsLen() const {
-    {
+        return m_synthAbsLen;
 	return m_synthAbsLen;
    }
    /** Get document for given udi
     *
@ -453,28 +438,26 @@ class Db {
    bool termExists(const string& term);
    /** Test if terms stem to different roots. */
    bool stemDiffers(const string& lang, const string& term, 
-		     const string& base);
+                     const string& base);
    const RclConfig *getConf() {return m_config;}
    /** 
-	Activate the "in place reset" mode where all documents are
+        Activate the "in place reset" mode where all documents are
-	considered as needing update. This is a global/per-process
+        considered as needing update. This is a global/per-process
-	option, and can't be reset. It should be set at the start of
+        option, and can't be reset. It should be set at the start of
-	the indexing pass. 2012-10: no idea why this is done this way...
+        the indexing pass. 2012-10: no idea why this is done this way...
    */
    static void setInPlaceReset() {o_inPlaceReset = true;}
    /** Flush interval get/set. This is used by the first indexing
-	pass to override the config value and flush more rapidly
+        pass to override the config value and flush more rapidly
-	initially so that the user can quickly play with queries */
+        initially so that the user can quickly play with queries */
-    int getFlushMb() 
+    int getFlushMb() {
-    {
+        return  m_flushMb;
 	return  m_flushMb;
    }
-    void setFlushMb(int mb)
+    void setFlushMb(int mb) {
-    {
+        m_flushMb = mb;
 	m_flushMb = mb;
    }
    bool doFlush();
@ -556,8 +539,8 @@ private:
    // Reinitialize when adding/removing additional dbs
    bool adjustdbs(); 
    bool idxTermMatch(int typ_sens, const string &lang, const string &term, 
-		      TermMatchResult& result, int max = -1, 
+                      TermMatchResult& result, int max = -1, 
-		      const string& field = cstr_null);
+                      const string& field = cstr_null);
    // Flush when idxflushmb is reached
    bool maybeflush(int64_t moretext);
--- a/src/rcldb/rcldb_p.h
+++ b/src/rcldb/rcldb_p.h
@ -30,9 +30,6 @@
 #endif // IDX_THREADS
 #include "xmacros.h"
 // Store raw doc text in data record or metadata ?
 #undef RAWTEXT_IN_DATA
 #define RAWTEXT_IN_METADATA
 namespace Rcl {
@ -55,15 +52,10 @@ public:
    // available on the caller site.
    // Take some care to avoid sharing string data (if string impl is cow)
    DbUpdTask(Op _op, const string& ud, const string& un, 
-	      Xapian::Document *d, size_t tl
+	      Xapian::Document *d, size_t tl, string& rztxt
 #ifdef RAWTEXT_IN_METADATA
              , string& rztxt
 #endif
        ) : op(_op), udi(ud.begin(), ud.end()), uniterm(un.begin(), un.end()), 
            doc(d), txtlen(tl) {
 #ifdef RAWTEXT_IN_METADATA
        rawztext.swap(rztxt);
 #endif
    }
    // Udi and uniterm equivalently designate the doc
    Op op;
@ -74,9 +66,7 @@ public:
    // purge because we actually don't know it, and the code fakes a
    // text length based on the term count.
    size_t txtlen;
 #ifdef RAWTEXT_IN_METADATA
    string rawztext; // Compressed doc text
 #endif
 };
 #endif // IDX_THREADS
@ -119,10 +109,7 @@ class Db::Native {
    // Final steps of doc update, part which need to be single-threaded
    bool addOrUpdateWrite(const string& udi, const string& uniterm, 
 			  Xapian::Document *doc, size_t txtlen
-#ifdef RAWTEXT_IN_METADATA
+                          , const string& rawztext);
                          , const string& rawztext
 #endif
        );
    /** Delete all documents which are contained in the input document, 
     * which must be a file-level one.
@ -141,7 +128,8 @@ class Db::Native {
    bool getPagePositions(Xapian::docid docid, vector<int>& vpos);
    int getPageNumberForPosition(const vector<int>& pbreaks, int pos);
-    bool dbDataToRclDoc(Xapian::docid docid, std::string &data, Doc &doc);
+    bool dbDataToRclDoc(Xapian::docid docid, std::string &data, Doc &doc,
                        bool fetchtext = false);
    size_t whatDbIdx(Xapian::docid id);
@ -193,7 +181,6 @@ class Db::Native {
    /** Check if a page position list is defined */
    bool hasPages(Xapian::docid id);
 #ifdef RAWTEXT_IN_METADATA
    std::string rawtextMetaKey(Xapian::docid did) {
        // Xapian's Olly Betts avises to use a key which will
        // sort the same as the docid (which we do), and to
@ -205,10 +192,10 @@ class Db::Native {
        sprintf(buf, "%010d", did);
        return buf;
    }
-#endif
+
    bool getRawText(Xapian::docid docid, string& rawtext);
    void deleteDocument(Xapian::docid docid) {
 #ifdef RAWTEXT_IN_METADATA
        string metareason;
        XAPTRY(xwdb.set_metadata(rawtextMetaKey(docid), string()),
               xwdb, metareason);
@ -217,7 +204,6 @@ class Db::Native {
                   metareason << "\n");
            // not fatal
        }
 #endif
        xwdb.delete_document(docid);
    }
 };
--- a/src/rcldb/rclquery.cpp
+++ b/src/rcldb/rclquery.cpp
@ -64,14 +64,14 @@ static const string& docfToDatf(const string& df)
 // custom field data will have to be processed before insertion to
 // achieve equivalent results.
 #if XAPIAN_MAJOR_VERSION == 1 && XAPIAN_MINOR_VERSION < 2
-class QSorter : public Xapian::Sorter {
+class QSorter : public Xapian::Sorter
 #else
-class QSorter : public Xapian::KeyMaker {
+class QSorter : public Xapian::KeyMaker
 #endif
 {
 public:
    QSorter(const string& f) 
-        : m_fld(docfToDatf(f) + "=") 
+        : m_fld(docfToDatf(f) + "=") {
    {
        m_ismtime = !m_fld.compare("dmtime=");
        if (m_ismtime)
            m_issize = false;
@ -80,8 +80,7 @@ public:
                !m_fld.compare("pcbytes=");
    }
-    virtual std::string operator()(const Xapian::Document& xdoc) const 
+    virtual std::string operator()(const Xapian::Document& xdoc) const {
    {
        string data = xdoc.get_data();
        // It would be simpler to do the record->Rcl::Doc thing, but
        // hand-doing this will be faster. It makes more assumptions
@ -372,7 +371,7 @@ int Query::getResCnt()
 // Note that as stated by a Xapian developer, Enquire searches from
 // scratch each time get_mset() is called. So the better performance
 // on subsequent calls is probably only due to disk caching.
-bool Query::getDoc(int xapi, Doc &doc)
+bool Query::getDoc(int xapi, Doc &doc, bool fetchtext)
 {
    LOGDEB1("Query::getDoc: xapian enquire index " << xapi << "\n");
    if (ISNULL(m_nq) || !m_nq->xenquire) {
@ -451,7 +450,7 @@ bool Query::getDoc(int xapi, Doc &doc)
    }
    // Parse xapian document's data and populate doc fields
-    return m_db->m_ndb->dbDataToRclDoc(docid, data, doc);
+    return m_db->m_ndb->dbDataToRclDoc(docid, data, doc, fetchtext);
 }
 vector<string> Query::expand(const Doc &doc)
--- a/src/rcldb/rclquery.h
+++ b/src/rcldb/rclquery.h
@ -66,25 +66,21 @@ class Query {
    ~Query();
    /** Get explanation about last error */
-    std::string getReason() const
+    std::string getReason() const {
    {
 	return m_reason;
    }
    /** Choose sort order. Must be called before setQuery */
    void setSortBy(const std::string& fld, bool ascending = true);
-    const std::string& getSortBy() const 
+    const std::string& getSortBy() const {
    {
 	return m_sortField;
    }
-    bool getSortAscending() const 
+    bool getSortAscending() const {
    {
 	return m_sortAscending;
    }
    /** Return or filter results with identical content checksum */
-    void setCollapseDuplicates(bool on) 
+    void setCollapseDuplicates(bool on) {
    {
 	m_collapseDuplicates = on;
    }
@ -98,7 +94,7 @@ class Query {
    int getResCnt();
    /** Get document at rank i in current query results. */
-    bool getDoc(int i, Doc &doc);
+    bool getDoc(int i, Doc &doc, bool fetchtext = false);
    /** Get possibly expanded list of query terms */
    bool getQueryTerms(std::vector<std::string>& terms);
@ -117,8 +113,7 @@ class Query {
    int getFirstMatchPage(const Doc &doc, std::string& term);
    /** Retrieve a reference to the searchData we are using */
-    std::shared_ptr<SearchData> getSD() 
+    std::shared_ptr<SearchData> getSD() {
    {
 	return m_sd;
    }
@ -126,8 +121,7 @@ class Query {
    std::vector<std::string> expand(const Doc &doc);
    /** Return the Db we're set for */
-    Db *whatDb() const
+    Db *whatDb() const {
    {
 	return m_db;
    }