general field name handling cleanup + sort facility in rclquery

2008-09-16 08:18:30 +00:00 · 2008-09-16 08:18:30 +00:00 · 7d30485f87
commit 7d30485f87
parent 5cc1de9aad
18 changed files with 556 additions and 297 deletions
--- a/src/common/rclconfig.cpp
+++ b/src/common/rclconfig.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.57 2008-09-08 16:49:10 dockes Exp $ (C) 2004 J.F.Dockes";
+static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.58 2008-09-16 08:18:30 dockes Exp $ (C) 2004 J.F.Dockes";
 #endif
 /*
 *   This program is free software; you can redistribute it and/or modify
@ -467,27 +467,21 @@ bool RclConfig::readFieldsConfig(const string& cnferrloc)

    // Build a direct map avoiding all indirections for field to
    // prefix translation
-    // Add direct prefixes
+    // Add direct prefixes from the [prefixes] section
    list<string>tps = m_fields->getNames("prefixes");
    for (list<string>::const_iterator it = tps.begin(); it != tps.end();it++) {
 	string val;
 	m_fields->get(*it, val, "prefixes");
-	m_fldtopref[*it] = val;
+	m_fldtopfx[stringtolower(*it)] = val;
    }
-    // Add prefixes for aliases:
+    // Add prefixes for aliases (build alias-to-canonic map while we're at it)
    tps = m_fields->getNames("aliases");
    for (list<string>::const_iterator it = tps.begin(); it != tps.end();it++) {
-	string canonic = *it; // canonic name
+	string canonic = stringtolower(*it); // canonic name
 	string pfx;
-	map<string,string>::const_iterator pit = m_fldtopref.find(canonic);
-	if (pit != m_fldtopref.end()) {
+	map<string,string>::const_iterator pit = m_fldtopfx.find(canonic);
+	if (pit != m_fldtopfx.end()) {
 	    pfx = pit->second;
-	} else {
-	    // Note: it's perfectly normal to have no prefix for the canonic
-	    // name, this could be a stored, not indexed field
-	    LOGDEB2(("RclConfig::readFieldsConfig: no pfx for canonic [%s]\n",
-		    canonic.c_str()));
-	    continue;
 	}
 	string aliases;
 	m_fields->get(canonic, aliases, "aliases");
@ -495,12 +489,14 @@ bool RclConfig::readFieldsConfig(const string& cnferrloc)
 	stringToStrings(aliases, l);
 	for (list<string>::const_iterator ait = l.begin();
 	     ait != l.end(); ait++) {
-	    m_fldtopref[*ait] = pfx;
+	    if (!pfx.empty())
+		m_fldtopfx[stringtolower(*ait)] = pfx;
+	    m_aliastocanon[stringtolower(*ait)] = canonic;
 	}
    }
 #if 0
-    for (map<string,string>::const_iterator it = m_fldtopref.begin();
-	 it != m_fldtopref.end(); it++) {
+    for (map<string,string>::const_iterator it = m_fldtopfx.begin();
+	 it != m_fldtopfx.end(); it++) {
 	LOGDEB(("RclConfig::readFieldsConfig: [%s] => [%s]\n",
 		it->first.c_str(), it->second.c_str()));
    }
@ -512,8 +508,9 @@ bool RclConfig::readFieldsConfig(const string& cnferrloc)
 	stringToStrings(ss, sl);
 	for (list<string>::const_iterator it = sl.begin(); 
 	     it != sl.end(); it++) {
-	    LOGDEB(("Inserting [%s] in stored list\n", (*it).c_str()));
-	    m_storedFields.insert(*it);
+	    string fld = fieldCanon(stringtolower(*it));
+	    LOGDEB(("Inserting [%s] in stored list\n", fld.c_str()));
+	    m_storedFields.insert(fld);
 	}
    }

@ -521,10 +518,11 @@ bool RclConfig::readFieldsConfig(const string& cnferrloc)
 }

 // Return term indexing prefix for field name (ie: "filename" -> "XSFN")
+// The input must be a canonical field name (alias translation done already)
 bool RclConfig::getFieldPrefix(const string& fld, string &pfx)
 {
-    map<string,string>::const_iterator pit = m_fldtopref.find(fld);
-    if (pit != m_fldtopref.end()) {
+    map<string,string>::const_iterator pit = m_fldtopfx.find(fld);
+    if (pit != m_fldtopfx.end()) {
 	pfx = pit->second;
 	return true;
    } else {
@ -572,10 +570,13 @@ bool RclConfig::getFieldSpecialisationPrefixes(const string& fld,
    pfxes.unique();
    return true;
 }
-bool RclConfig::fieldIsStored(const string& fld)
+
+string RclConfig::fieldCanon(const string& fld)
 {
-    set<string>::const_iterator it = m_storedFields.find(fld);
-    return it != m_storedFields.end();
+    map<string, string>::const_iterator it = m_aliastocanon.find(fld);
+    if (it != m_aliastocanon.end())
+	return it->second;
+    return fld;
 }

 string RclConfig::getMimeViewerDef(const string &mtype)
--- a/src/common/rclconfig.h
+++ b/src/common/rclconfig.h
@ -16,7 +16,7 @@
 */
 #ifndef _RCLCONFIG_H_INCLUDED_
 #define _RCLCONFIG_H_INCLUDED_
-/* @(#$Id: rclconfig.h,v 1.40 2008-09-08 16:49:10 dockes Exp $  (C) 2004 J.F.Dockes */
+/* @(#$Id: rclconfig.h,v 1.41 2008-09-16 08:18:30 dockes Exp $  (C) 2004 J.F.Dockes */

 #include <list>
 #include <string>
@ -144,7 +144,7 @@ class RclConfig {
    /** mimeconf: get list of mime types for category */
    bool getMimeCatTypes(const string& cat, list<string>&);

-    /** mimeconf: get field prefix from field name */
+    /** fields: get field prefix from field name */
    bool getFieldPrefix(const string& fldname, string &pfx);
    /** Get implied meanings for field name (ie: author->[author, from]) */
    bool getFieldSpecialisations(const string& fld, 
@ -152,8 +152,9 @@ class RclConfig {
    /** Get prefixes for specialisations of field name */
    bool getFieldSpecialisationPrefixes(const string& fld, 
 					list<string>& pfxes);
-    bool fieldIsStored(const string& fld);
    const set<string>& getStoredFields() {return m_storedFields;}
+    /** Get canonic name for possible alias */
+    string fieldCanon(const string& fld);

    /** mimeview: get/set external viewer exec string(s) for mimetype(s) */
    string getMimeViewerDef(const string &mimetype);
@ -196,7 +197,8 @@ class RclConfig {
    ConfStack<ConfSimple> *mimeconf; // but their content may depend on it.
    ConfStack<ConfSimple> *mimeview; // 
    ConfStack<ConfSimple> *m_fields;
-    map<string, string>  m_fldtopref;
+    map<string, string>  m_fldtopfx;
+    map<string, string>  m_aliastocanon;
    set<string>          m_storedFields;

    void        *m_stopsuffixes;
--- a/src/internfile/internfile.cpp
+++ b/src/internfile/internfile.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: internfile.cpp,v 1.41 2008-09-08 16:49:10 dockes Exp $ (C) 2004 J.F.Dockes";
+static char rcsid[] = "@(#$Id: internfile.cpp,v 1.42 2008-09-16 08:18:30 dockes Exp $ (C) 2004 J.F.Dockes";
 #endif
 /*
 *   This program is free software; you can redistribute it and/or modify
@ -296,7 +296,6 @@ static const string keyds("description");
 static const string keyfn("filename");
 static const string keymd("modificationdate");
 static const string keymt("mimetype");
-static const string keyoc("origcharset");
 static const string keytt("title");

 bool FileInterner::dijontorcl(Rcl::Doc& doc)
@ -310,7 +309,7 @@ bool FileInterner::dijontorcl(Rcl::Doc& doc)
 	    doc.text = it->second;
 	} else if (it->first == keymd) {
 	    doc.dmtime = it->second;
-	} else if (it->first == keyoc) {
+	} else if (it->first == Rcl::Doc::keyoc) {
 	    doc.origcharset = it->second;
 	} else if (it->first == keymt || it->first == keycs) {
 	    // don't need these.
--- a/src/python/recoll/pyrecoll.cpp
+++ b/src/python/recoll/pyrecoll.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: pyrecoll.cpp,v 1.11 2008-09-08 16:49:10 dockes Exp $ (C) 2007 J.F.Dockes";
+static char rcsid[] = "@(#$Id: pyrecoll.cpp,v 1.12 2008-09-16 08:18:30 dockes Exp $ (C) 2007 J.F.Dockes";
 #endif


@ -84,31 +84,33 @@ SearchData_init(recoll_SearchDataObject *self, PyObject *args, PyObject *kwargs)
    return 0;
 }

+/* Note: addclause necessite And/Or vient du fait que le string peut avoir
+   plusieurs mots. A transferer dans l'i/f Python ou pas ? */
 PyDoc_STRVAR(doc_addClause,
 "addClause(type='and'|'or'|'excl'|'phrase'|'near'|'sub', qstring=string,\n"
-"          slack=int, field=string, subSearch=SearchData,\n"
+"          slack=int, field=string, subSearch=SearchData)\n"
 "Adds a simple clause to the SearchData And/Or chain, or a subquery\n"
 "defined by another SearchData object\n"
 );
-/* Note: necessite And/Or vient du fait que le string peut avoir
-   plusieurs mots. A transferer dans l'i/f Python ou pas ? */

-/* Forward decl, def needs recoll_searchDataTyep */
+/* Forward declaration only, definition needs recoll_searchDataType */
 static PyObject *
 SearchData_addClause(recoll_SearchDataObject* self, PyObject *args, 
 		     PyObject *kwargs);

+
+
 static PyMethodDef SearchData_methods[] = {
    {"addClause", (PyCFunction)SearchData_addClause, METH_VARARGS|METH_KEYWORDS,
-     doc_addClause
-    },
+     doc_addClause},
    {NULL}  /* Sentinel */
 };

 PyDoc_STRVAR(doc_SearchDataObject,
 "SearchData()\n"
 "\n"
-"A SearchData object describes a query.\n"
+"A SearchData object describes a query. It has a number of global parameters\n"
+"and a chain of search clauses.\n"
 );
 static PyTypeObject recoll_SearchDataType = {
    PyObject_HEAD_INIT(NULL)
@ -165,9 +167,9 @@ SearchData_addClause(recoll_SearchDataObject* self, PyObject *args,
    static char *kwlist[] = {"type", "qstring", "slack", "field",
 			     "subsearch", NULL};
    char *tp = 0;
-    char *qs = 0;
+    char *qs = 0; // needs freeing
    int slack = 0;
-    char *fld = 0;
+    char *fld = 0; // needs freeing
    recoll_SearchDataObject *sub = 0;
    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "ses|iesO!", kwlist,
 				     &tp, "utf-8", &qs, &slack,
@ -221,11 +223,13 @@ SearchData_addClause(recoll_SearchDataObject* self, PyObject *args,
        PyErr_SetString(PyExc_AttributeError, "Bad tp arg");
 	return 0;
    }
-
+    PyMem_Free(qs);
+    PyMem_Free(fld);
    self->sd->addClause(cl);
    Py_RETURN_NONE;
 }

+
 ///////////////////////////////////////////////////////////////////////
 ///// Doc code
 typedef struct {
@ -272,38 +276,86 @@ Doc_init(recoll_DocObject *self, PyObject *, PyObject *)
    return 0;
 }

-// The "closure" thing is actually the meta field name. This is how
-// python allows one set of get/set functions to get/set different
-// attributes (pass them an additional parameters as from the
-// getseters table and call it a "closure"
 static PyObject *
-Doc_getmeta(recoll_DocObject *self, void *closure)
+Doc_getattr(recoll_DocObject *self, char *name)
 {
-    LOGDEB0(("Doc_getmeta: [%s]\n", (const char *)closure));
+    LOGDEB(("Doc_getattr: name [%s]\n", name));
    if (self->doc == 0 || 
 	the_docs.find(self->doc) == the_docs.end()) {
        PyErr_SetString(PyExc_AttributeError, "doc");
 	return 0;
    }
-
 #if 0
    for (map<string,string>::const_iterator it = self->doc->meta.begin();
 	 it != self->doc->meta.end(); it++) {
 	LOGDEB(("meta[%s] -> [%s]\n", it->first.c_str(), it->second.c_str()));
    }
 #endif
+    string key = rclconfig->fieldCanon(stringtolower(string(name)));

-    // Retrieve utf-8 coded value for meta field (if it doesnt exist,
-    // this inserts a null value in the array, we could be nicer.
-    string meta = self->doc->meta[(const char *)closure];
+    // Handle special cases, then try retrieving key value from meta 
+    // array
+    string value;
+    switch (key.at(0)) {
+    case 'f':
+	if (!key.compare(Rcl::Doc::keyfs)) {
+	    value = self->doc->fbytes;
+	} else if (!key.compare(Rcl::Doc::keyfn)) {
+	    value = self->doc->utf8fn;
+	} else if (!key.compare(Rcl::Doc::keyfs)) {
+	    value = self->doc->fbytes;
+	} else if (!key.compare(Rcl::Doc::keyfmt)) {
+	    value = self->doc->fmtime;
+	}
+	break;
+    case 'd':
+	if (!key.compare(Rcl::Doc::keyds)) {
+	    value = self->doc->dbytes;
+	} else if (!key.compare(Rcl::Doc::keydmt)) {
+	    value = self->doc->dmtime;
+	}
+	break;
+    case 'i':
+	if (!key.compare(Rcl::Doc::keyipt)) {
+	    value = self->doc->ipath;
+	}
+	break;
+    case 'm':
+	if (!key.compare(Rcl::Doc::keytp)) {
+	    value = self->doc->mimetype;
+	} else if (!key.compare(Rcl::Doc::keymt)) {
+	    value = self->doc->dmtime.empty() ? self->doc->fmtime : 
+		self->doc->dmtime;
+	}
+	break;
+    case 'o':
+	if (!key.compare(Rcl::Doc::keyoc)) {
+	    value = self->doc->origcharset;
+	}
+	break;
+    case 's':
+	if (!key.compare(Rcl::Doc::keysig)) {
+	    value = self->doc->sig;
+	} else 	if (!key.compare(Rcl::Doc::keysz)) {
+	    value = self->doc->dbytes.empty() ? self->doc->fbytes : 
+		self->doc->dbytes;
+	}
+
+	break;
+    default:
+	value = self->doc->meta[key];
+    }
+
+    LOGDEB(("Doc_getattr: [%s] (%s) -> [%s]\n",
+	    name, key.c_str(), value.c_str()));
    // Return a python unicode object
-    PyObject* res = PyUnicode_Decode(meta.c_str(), meta.size(), "UTF-8", 
+    PyObject* res = PyUnicode_Decode(value.c_str(), value.size(), "UTF-8", 
 				     "replace");
    return res;
 }

 static int
-Doc_setmeta(recoll_DocObject *self, PyObject *value, void *closure)
+Doc_setattr(recoll_DocObject *self, char *name, PyObject *value)
 {
    if (self->doc == 0 || 
 	the_docs.find(self->doc) == the_docs.end()) {
@ -330,56 +382,55 @@ Doc_setmeta(recoll_DocObject *self, PyObject *value, void *closure)
    }

    char* uvalue = PyString_AsString(putf8);
-    const char *key = (const char *)closure;
-    if (key == 0) {
-        PyErr_SetString(PyExc_AttributeError, "key??");
+    if (name == 0) {
+        PyErr_SetString(PyExc_AttributeError, "name??");
 	return -1;
    }

-    LOGDEB0(("Doc_setmeta: setting [%s] to [%s]\n", key, uvalue));
-    self->doc->meta[key] = uvalue;
-    switch (key[0]) {
+    LOGDEB0(("Doc_setattr: setting [%s] to [%s]\n", name, uvalue));
+    self->doc->meta[name] = uvalue;
+    switch (name[0]) {
    case 'd':
-	if (!strcmp(key, "dbytes")) {
+	if (!strcmp(name, "dbytes")) {
 	    self->doc->dbytes = uvalue;
 	}
 	break;
    case 'f':
-	if (!strcmp(key, "fbytes")) {
+	if (!strcmp(name, "fbytes")) {
 	    self->doc->fbytes = uvalue;
 	}
 	break;
    case 'i':
-	if (!strcmp(key, "ipath")) {
+	if (!strcmp(name, "ipath")) {
 	    self->doc->ipath = uvalue;
 	}
 	break;
    case 'm':
-	if (!strcmp(key, "mimetype")) {
+	if (!strcmp(name, "mimetype")) {
 	    self->doc->mimetype = uvalue;
-	} else if (!strcmp(key, "mtime")) {
+	} else if (!strcmp(name, "mtime")) {
 	    self->doc->dmtime = uvalue;
 	}
 	break;
    case 's':
-	if (!strcmp(key, "sig")) {
+	if (!strcmp(name, "sig")) {
 	    self->doc->sig = uvalue;
 	}
 	break;
    case 't':
-	if (!strcmp(key, "text")) {
+	if (!strcmp(name, "text")) {
 	    self->doc->text = uvalue;
 	}
 	break;
    case 'u':
-	if (!strcmp(key, "url")) {
+	if (!strcmp(name, "url")) {
 	    self->doc->url = uvalue;
 	}
 	break;
    }
    return 0;
 }
-
+#if 0
 static PyGetSetDef Doc_getseters[] = {
    // Name, get, set, doc, closure
    {"url", (getter)Doc_getmeta, (setter)Doc_setmeta, 
@ -410,6 +461,7 @@ static PyGetSetDef Doc_getseters[] = {
     "sig", (void *)"sig"},
    {NULL}  /* Sentinel */
 };
+#endif

 PyDoc_STRVAR(doc_DocObject,
 "Doc()\n"
@ -427,8 +479,8 @@ static PyTypeObject recoll_DocType = {
    0,                         /*tp_itemsize*/
    (destructor)Doc_dealloc,    /*tp_dealloc*/
    0,                         /*tp_print*/
-    0,                         /*tp_getattr*/
-    0,                         /*tp_setattr*/
+    (getattrfunc)Doc_getattr,  /*tp_getattr*/
+    (setattrfunc)Doc_setattr, /*tp_setattr*/
    0,                         /*tp_compare*/
    0,                         /*tp_repr*/
    0,                         /*tp_as_number*/
@ -450,7 +502,7 @@ static PyTypeObject recoll_DocType = {
    0,		               /* tp_iternext */
    0,                         /* tp_methods */
    0,                         /* tp_members */
-    Doc_getseters,             /* tp_getset */
+    0,                         /* tp_getset */
    0,                         /* tp_base */
    0,                         /* tp_dict */
    0,                         /* tp_descr_get */
@ -470,7 +522,10 @@ typedef struct {
    /* Type-specific fields go here. */
    Rcl::Query *query;
    int         next; // Index of result to be fetched next or -1 if uninit
+    char       *sortfield;
+    int         ascending;
 } recoll_QueryObject;
+
 /////////////////////////////////////////////
 /// Query object 
 static void 
@ -481,6 +536,7 @@ Query_dealloc(recoll_QueryObject *self)
 	the_queries.erase(self->query);
    delete self->query;
    self->query = 0;
+    self->sortfield = 0;
    self->ob_type->tp_free((PyObject*)self);
 }

@ -495,6 +551,7 @@ Query_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
 	return 0;
    self->query = 0;
    self->next = -1;
+    self->sortfield = 0;
    return (PyObject *)self;
 }

@ -511,9 +568,29 @@ Query_init(recoll_QueryObject *self, PyObject *, PyObject *)
    delete self->query;
    self->query = 0;
    self->next = -1;
+    self->sortfield = 0;
+    self->ascending = true;
    return 0;
 }

+PyDoc_STRVAR(doc_Query_sortby,
+"sortby(field=fieldname, ascending=true)\n"
+"Sort results by 'fieldname', in ascending or descending order.\n"
+"Only one field can be used, no subsorts for now.\n"
+);
+
+static PyObject *
+Query_sortby(recoll_QueryObject* self, PyObject *args, PyObject *kwargs)
+{
+    LOGDEB(("Query_sortby\n"));
+    static char *kwlist[] = {"field", "ascending", NULL};
+    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|i", kwlist, 
+				     &self->sortfield,
+				     &self->ascending))
+	return 0;
+    Py_RETURN_NONE;
+}
+
 PyDoc_STRVAR(doc_Query_execute,
 "execute(query_string, stemmming=1|0)\n"
 "\n"
@ -527,14 +604,17 @@ Query_execute(recoll_QueryObject* self, PyObject *args, PyObject *kwargs)
 {
    LOGDEB(("Query_execute\n"));
    static char *kwlist[] = {"query_string", "stemming", NULL};
-    char *utf8 = 0;
+    char *sutf8 = 0; // needs freeing
    int dostem = 1;
    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "es|i:Query_execute", 
-				     kwlist, "utf-8", &utf8,
+				     kwlist, "utf-8", &sutf8,
 				     &dostem)) {
 	return 0;
    }
-    LOGDEB(("Query_execute:  [%s]\n", utf8));
+    LOGDEB(("Query_execute:  [%s]\n", sutf8));
+
+    string utf8(sutf8);
+    PyMem_Free(sutf8);
    if (self->query == 0 || 
 	the_queries.find(self->query) == the_queries.end()) {
        PyErr_SetString(PyExc_AttributeError, "query");
@ -542,11 +622,12 @@ Query_execute(recoll_QueryObject* self, PyObject *args, PyObject *kwargs)
    }
    string reason;
    Rcl::SearchData *sd = wasaStringToRcl(utf8, reason);
-    PyMem_Free(utf8);
+
    if (!sd) {
 	PyErr_SetString(PyExc_ValueError, reason.c_str());
 	return 0;
    }
+    sd->setSortBy(self->sortfield, self->ascending);
    RefCntr<Rcl::SearchData> rq(sd);
    self->query->setQuery(rq, dostem?Rcl::Query::QO_STEM:Rcl::Query::QO_NONE);
    int cnt = self->query->getResCnt();
@ -557,7 +638,7 @@ Query_execute(recoll_QueryObject* self, PyObject *args, PyObject *kwargs)
 PyDoc_STRVAR(doc_Query_executesd,
 "execute(SearchData, stemming=1|0)\n"
 "\n"
-"Starts a search for the query defined by SearchData.\n"
+"Starts a search for the query defined by the SearchData object.\n"
 );

 static PyObject *
@ -576,6 +657,7 @@ Query_executesd(recoll_QueryObject* self, PyObject *args, PyObject *kwargs)
        PyErr_SetString(PyExc_AttributeError, "query");
 	return 0;
    }
+    pysd->sd->setSortBy(self->sortfield, self->ascending);
    self->query->setQuery(pysd->sd, dostem ? Rcl::Query::QO_STEM : 
 			  Rcl::Query::QO_NONE);
    int cnt = self->query->getResCnt();
@ -616,18 +698,22 @@ Query_fetchone(recoll_QueryObject* self, PyObject *, PyObject *)
 	return 0;
    }
    self->next++;
+
    // Move some data from the dedicated fields to the meta array to make 
-    // fetching attributes easier
+    // fetching attributes easier. Is this actually needed ? Useful for
+    // url and relevancy rating which are also formatted .
    Rcl::Doc *doc = result->doc;
-    printableUrl(rclconfig->getDefCharset(), doc->url, doc->meta["url"]);
-    doc->meta["mimetype"] = doc->mimetype;
-    doc->meta["mtime"] = doc->dmtime.empty() ? doc->fmtime : doc->dmtime;
-    doc->meta["ipath"] = doc->ipath;
-    doc->meta["fbytes"] = doc->fbytes;
-    doc->meta["dbytes"] = doc->dbytes;
+    printableUrl(rclconfig->getDefCharset(), doc->url, 
+		 doc->meta[Rcl::Doc::keyurl]);
+    doc->meta[Rcl::Doc::keytp] = doc->mimetype;
+    doc->meta[Rcl::Doc::keymt] = doc->dmtime.empty() ? 
+	doc->fmtime : doc->dmtime;
+    doc->meta[Rcl::Doc::keyipt] = doc->ipath;
+    doc->meta[Rcl::Doc::keyfs] = doc->fbytes;
+    doc->meta[Rcl::Doc::keyds] = doc->dbytes;
    char pc[20];
    sprintf(pc, "%02d %%", percent);
-    doc->meta["relevance"] = pc;
+    doc->meta[Rcl::Doc::keyrr] = pc;

    return (PyObject *)result;
 }
@ -637,7 +723,10 @@ static PyMethodDef Query_methods[] = {
     doc_Query_execute},
    {"executesd", (PyCFunction)Query_executesd, METH_VARARGS|METH_KEYWORDS, 
     doc_Query_executesd},
-    {"fetchone", (PyCFunction)Query_fetchone, METH_VARARGS,doc_Query_fetchone},
+    {"fetchone", (PyCFunction)Query_fetchone, METH_VARARGS,
+     doc_Query_fetchone},
+    {"sortby", (PyCFunction)Query_sortby, METH_VARARGS|METH_KEYWORDS,
+     doc_Query_sortby},
    {NULL}  /* Sentinel */
 };

@ -881,8 +970,8 @@ Db_makeDocAbstract(recoll_DbObject* self, PyObject *args, PyObject *)
 static PyObject *
 Db_needUpdate(recoll_DbObject* self, PyObject *args, PyObject *kwds)
 {
-    char *udi = 0;
-    char *sig = 0;
+    char *udi = 0; // needs freeing
+    char *sig = 0; // needs freeing
    LOGDEB(("Db_needUpdate\n"));
    if (!PyArg_ParseTuple(args, "eses:Db_needUpdate", 
 			  "utf-8", &udi, "utf-8", &sig)) {
@ -891,6 +980,8 @@ Db_needUpdate(recoll_DbObject* self, PyObject *args, PyObject *kwds)
    if (self->db == 0 || the_dbs.find(self->db) == the_dbs.end()) {
 	LOGERR(("Db_needUpdate: db not found %p\n", self->db));
        PyErr_SetString(PyExc_AttributeError, "db");
+	PyMem_Free(udi);
+	PyMem_Free(sig);
        return 0;
    }
    bool result = self->db->needUpdate(udi, sig);
@ -903,16 +994,20 @@ static PyObject *
 Db_addOrUpdate(recoll_DbObject* self, PyObject *args, PyObject *)
 {
    LOGDEB(("Db_addOrUpdate\n"));
-    char *udi = 0;
-    char *parent_udi = 0;
-
+    char *sudi = 0; // needs freeing
+    char *sparent_udi = 0; // needs freeing
    recoll_DocObject *pydoc;

    if (!PyArg_ParseTuple(args, "esO!|es:Db_addOrUpdate",
-			  "utf-8", &udi, &recoll_DocType, &pydoc,
-			  "utf-8", &parent_udi)) {
+			  "utf-8", &sudi, &recoll_DocType, &pydoc,
+			  "utf-8", &sparent_udi)) {
 	return 0;
    }
+    string udi(sudi);
+    string parent_udi(sparent_udi ? sparent_udi : "");
+    PyMem_Free(sudi);
+    PyMem_Free(sparent_udi);
+
    if (self->db == 0 || the_dbs.find(self->db) == the_dbs.end()) {
 	LOGERR(("Db_addOrUpdate: db not found %p\n", self->db));
        PyErr_SetString(PyExc_AttributeError, "db");
@ -923,16 +1018,11 @@ Db_addOrUpdate(recoll_DbObject* self, PyObject *args, PyObject *)
        PyErr_SetString(PyExc_AttributeError, "doc");
        return 0;
    }
-    if (!self->db->addOrUpdate(udi, parent_udi?parent_udi:"", *pydoc->doc)) {
+    if (!self->db->addOrUpdate(udi, parent_udi, *pydoc->doc)) {
 	LOGERR(("Db_addOrUpdate: rcldb error\n"));
        PyErr_SetString(PyExc_AttributeError, "rcldb error");
-	PyMem_Free(udi);
-	PyMem_Free(parent_udi);
        return 0;
    }
-    PyMem_Free(udi);
-    if (parent_udi)
-	PyMem_Free(parent_udi);
    Py_RETURN_NONE;
 }
    
--- a/src/python/recoll/setup.py
+++ b/src/python/recoll/setup.py
@ -27,6 +27,7 @@ module1 = Extension('recoll',
                               top + 'query/wasatorcl.cpp',
                               top + 'rcldb/pathhash.cpp',
                               top + 'rcldb/rcldb.cpp',
+                               top + 'rcldb/rcldoc.cpp',
                               top + 'rcldb/rclquery.cpp',
                               top + 'rcldb/searchdata.cpp',
                               top + 'rcldb/stemdb.cpp',
--- a/src/python/samples/recollqsd.py
+++ b/src/python/samples/recollqsd.py
@ -0,0 +1,33 @@
+#!/usr/bin/env python
+
+import sys
+import recoll
+
+def dotest(db, q):
+    query = db.query()
+    query.sortby("title", 1)
+
+    nres = query.executesd(q, stemming = 1)
+    print "Result count: ", nres
+    if nres > 10:
+        nres = 10
+    while query.next >= 0 and query.next < nres: 
+        doc = query.fetchone()
+        print query.next
+        for k in ("url", "mtime", "title", "author", "abstract"):
+            print k, ":", getattr(doc, k).encode('utf-8')
+            #abs = db.makeDocAbstract(doc, query).encode('utf-8')
+            #print abs
+        print
+# End dotest
+
+sd = recoll.SearchData()
+sd.addClause("and", "essaouira maroc")
+#sd.addClause("and", "dockes", field="author")
+#sd.addClause("phrase", "jean francois", 1)
+#sd.addClause("excl", "plage")
+
+db = recoll.connect()
+dotest(db, sd)
+
+sys.exit(0)
--- a/src/python/xesam/xesam-recoll-service
+++ b/src/python/xesam/xesam-recoll-service
@ -1,24 +1,25 @@
 #!/usr/bin/env python
 """
-Demo implementation of a xesam server. Run it like
+Recoll implementation of a xesam server. 
+Based on the example in the xesam-tools package by:
+ Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com>

-    demo/xesam-dummy-service [-s|--state-messages]
+Run it like
+
+xesam-recoll-service 

 And launch a search on it via

-    ./xesam-tool search hello
+  xesam-tool search hello

-You can use the -s or --state-messages switch to enable StateChanged
-signal monitoring in xesam-tool as well as in xesam-dummy-service.
 """

-
 # Sets up path to uninstalled xesam module
 import demo

 import xesam
 import xesam.query
-from xesam.server import *
+import xesam.server
 import gobject
 import sys

@ -34,15 +35,16 @@ class RecollServer (xesam.server.Searcher):
    """
    
    def __init__ (self):
-        h_fact = HandleFactory ()
-        fact = ClientFactory (self, h_fact, RecollSession, RecollSearch)
+        h_fact = xesam.server.HandleFactory ()
+        fact = xesam.server.ClientFactory (self, h_fact, 
+                                           RecollSession, RecollSearch)
        xesam.server.Searcher.__init__ (self, h_fact, fact)
        self.set_echo_queries (True)
        self.rcldb = recoll.connect()

    def start (self):
        # Export our selves via a SearchServerStub
-        SearchServerStub(self).start()
+        xesam.server.SearchServerStub(self).start()
    
    def GetProperty (self, shandle, name):
        prop = xesam.server.Searcher.GetProperty(self, shandle, name)
@ -54,33 +56,24 @@ class RecollServer (xesam.server.Searcher):
        xesam.debug ("Set property request for '%s=%s', on session '%s', returning %s" % (name, value, shandle,val))
        return val

-class RecollSession (Session):
+class RecollSession (xesam.server.Session):
    """
    
    """
    def __init__ (self, searcher, session_handle):
-        Session.__init__ (self, searcher, session_handle)
+        xesam.server.Session.__init__ (self, searcher, session_handle)
        self.set_property ("recoll.org", "xesam-recoll-service")
        
-class RecollSearch (Search):
+class RecollSearch (xesam.server.Search):
    """
        
    """
-    # Translation from known xesam/whatever field names to Recoll Doc elements
-    FLDTRANS = \
-        {
-        "xesam:title"   : lambda doc : doc.title,
-        "xesam:summary" : lambda doc : doc.abstract,
-        "xesam:mimeType" : lambda doc : doc.mimetype,
-        "xesam:contentModified" : lambda doc : \
-            timestampToIso8601(doc.dmtime or doc.fmtime),
-        "xesam:url"     : lambda doc : doc.url
-        }
+
    SLICE = 10

    def __init__ (self, searcher, session, search_handle, \
                      query=None, xml=None) :
-        Search.__init__ (self, searcher, session, search_handle, \
+        xesam.server.Search.__init__ (self, searcher, session, search_handle, \
                             query=query, xml=xml)
       
        self._hit_fields = session.get_property (xesam.SESSION_HIT_FIELDS)
@ -88,10 +81,7 @@ class RecollSearch (Search):
            xesam.error ("Got property hit.fields as None."
                             " Setting default xesam:url")
            self._hit_fields = ["xesam:url"]
-        print "RecollSearch: fields:", self._hit_fields
-        # TOBEDONE: if fields includes "snippet" we need to generate
-        # the synthetic abstract for each returned doc
-        # Also relevancyRating, ContentCategory et SourceCategory
+        xesam.debug("RecollSearch: fields:" % self._hit_fields)
        xesam.debug ("Created %s with handle %s and query:\n%s" % 
                     (self.__class__, self.get_handle(), self.get_query()))

@ -99,6 +89,21 @@ class RecollSearch (Search):
        if not isinstance(self.get_query(), xesam.query.UserQuery):
            raise Exception ("Only UserQuery supported ATM, sorry.")
        self.rclquery = self._searcher.rcldb.query()
+        
+        # In the latest version (>0.95), primary/secondary is replaced by 
+        # a field list.
+        sortfield = session.get_property(xesam.SESSION_SORT_PRIMARY)
+        order = session.get_property(xesam.SESSION_SORT_ORDER)
+
+        # xesam-tool does not know how to set these for now, so let's
+        # TEST here
+        sortfield = "contentModified"
+        order = "descending"
+        xesam.debug("Session sort primary %s order %s" % (sortfield, order))
+        # END TEST
+
+        if sortfield:
+            self.rclquery.sortby(sortfield, order == "ascending" and 1 or 0)

    def start (self):
        xesam.debug ("RecollSearch '%s' got [%s]" % 
@ -110,10 +115,16 @@ class RecollSearch (Search):
            doc = self.rclquery.fetchone()
            data = []
            for fld in self._hit_fields:
-                if self.FLDTRANS.has_key (fld):
-                    data.append(self.FLDTRANS[fld](doc))
+                # Need to handle ContentCategory and SourceCategory
+                fld = fld.lower().replace("xesam:", "")
+                xesam.debug("Adding data for fld %s" % (fld))
+                if fld == "snippet":
+                    data.append(self._searcher.rcldb.makeDocAbstract(doc, 
+                                                                self.rclquery))
+                elif fld == "contentmodified":
+                    data.append(timestampToIso8601(getattr(doc, "mtime")))
                else:
-                    data.append("")
+                    data.append(getattr(doc, fld, ""))
            self.add_new_hit (self._hit_fields, data)
            hits += 1
            if hits >= self.SLICE:
@ -135,7 +146,7 @@ class RecollSearch (Search):
        xesam.debug ("RecollSearch get_hits")

        if self._stopped:
-            return Search.get_hits(self, num_hits)
+            return xesam.server.Search.get_hits(self, num_hits)

        hits = 0
        done = 0;
@ -163,7 +174,7 @@ class RecollSearch (Search):
            xesam.debug ("Search '%s' emitted 'done'" % self.get_handle())
            self.stop()

-        return Search.get_hits(self, num_hits)
+        return xesam.server.Search.get_hits(self, num_hits)


 if __name__ == "__main__":
--- a/src/query/recollq.cpp
+++ b/src/query/recollq.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: recollq.cpp,v 1.14 2008-09-08 16:49:10 dockes Exp $ (C) 2006 J.F.Dockes";
+static char rcsid[] = "@(#$Id: recollq.cpp,v 1.15 2008-09-16 08:18:30 dockes Exp $ (C) 2006 J.F.Dockes";
 #endif
 /*
 *   This program is free software; you can redistribute it and/or modify
@ -42,6 +42,33 @@ using namespace std;
 #include "internfile.h"
 #include "wipedir.h"

+bool dump_contents(RclConfig *rclconfig, string& tmpdir, Rcl::Doc& doc)
+{
+    string fn = doc.url.substr(7);
+    struct stat st;
+    if (stat(fn.c_str(), &st) != 0) {
+	cout << "No such file: " << fn << endl;
+	return true;
+    } 
+    if (tmpdir.empty() || access(tmpdir.c_str(), 0) < 0) {
+	string reason;
+	if (!maketmpdir(tmpdir, reason)) {
+	    cerr << "Cannot create temporary directory: "
+		 << reason << endl;
+	    return false;
+	}
+    }
+    wipedir(tmpdir);
+    FileInterner interner(fn, &st, rclconfig, tmpdir, &doc.mimetype);
+    if (interner.internfile(doc, doc.ipath)) {
+	cout << doc.text << endl;
+    } else {
+	cout << "Cant intern: " << fn << endl;
+    }
+    return true;
+}
+
+
 static char *thisprog;
 static char usage [] =
 " [-o|-a|-f] <query string>\n"
@ -60,6 +87,8 @@ static char usage [] =
 "    -n <cnt> limit the maximum number of results (0->no limit, default 2000)\n"
 "    -b : basic. Just output urls, no mime types or titles\n"
 "    -m : dump the whole document meta[] array\n"
+"    -S fld : sort by field name\n"
+"    -D : sort descending\n"
 ;
 static void
 Usage(void)
@ -82,10 +111,15 @@ static int     op_flags;
 #define OPT_q     0x200
 #define OPT_t     0x400
 #define OPT_m     0x800
+#define OPT_D     0x1000
+#define OPT_S     0x2000
+

 int recollq(RclConfig **cfp, int argc, char **argv)
 {
    string a_config;
+    string sortfield;
+
    int limit = 2000;
    thisprog = argv[0];
    argc--; argv++;
@ -103,6 +137,7 @@ int recollq(RclConfig **cfp, int argc, char **argv)
 		a_config = *(++argv);
 		argc--; goto b1;
            case 'd':   op_flags |= OPT_d; break;
+            case 'D':   op_flags |= OPT_D; break;
            case 'f':   op_flags |= OPT_f; break;
            case 'l':   op_flags |= OPT_l; break;
            case 'm':   op_flags |= OPT_m; break;
@ -112,6 +147,9 @@ int recollq(RclConfig **cfp, int argc, char **argv)
 		argc--; goto b1;
            case 'o':   op_flags |= OPT_o; break;
            case 'q':   op_flags |= OPT_q; break;
+	    case 'S':	op_flags |= OPT_S; if (argc < 2)  Usage();
+		sortfield = *(++argv);
+		argc--; goto b1;
            case 't':   op_flags |= OPT_t; break;
            default: Usage();   break;
            }
@ -168,6 +206,10 @@ int recollq(RclConfig **cfp, int argc, char **argv)
 	return 1;
    }

+    if (op_flags & OPT_S) {
+	sd->setSortBy(sortfield, (op_flags & OPT_D) ? false : true);
+    }
+
    RefCntr<Rcl::SearchData> rq(sd);
    Rcl::Query query(&rcldb);
    query.setQuery(rq, Rcl::Query::QO_STEM);
@ -197,6 +239,7 @@ int recollq(RclConfig **cfp, int argc, char **argv)
 		<< "[" << doc.url.c_str() << "]" << "\t" 
 		<< "[" << doc.meta[Rcl::Doc::keytt].c_str() << "]" << "\t"
 		<< doc.fbytes.c_str()   << "\tbytes" << "\t"
+		<< doc.dmtime.c_str()   << "\tSecs" << "\t"
 		<<  endl;
 	    if (op_flags & OPT_m) {
 		for (map<string,string>::const_iterator it = doc.meta.begin();
@ -204,32 +247,11 @@ int recollq(RclConfig **cfp, int argc, char **argv)
 		    cout << it->first << " = " << it->second << endl;
 		}
 	    }
-	    cout << endl;
 	}
+
 	if (op_flags & OPT_d) {
-	    string fn = doc.url.substr(7);
-	    struct stat st;
-	    if (stat(fn.c_str(), &st) != 0) {
-		cout << "No such file: " << fn << endl;
-		continue;
-	    } 
-	    if (tmpdir.empty() || access(tmpdir.c_str(), 0) < 0) {
-		string reason;
-		if (!maketmpdir(tmpdir, reason)) {
-		    cerr << "Cannot create temporary directory: "
-			 << reason << endl;
-		    return 1;
-		}
-	    }
-	    wipedir(tmpdir);
-	    FileInterner interner(fn, &st, rclconfig, tmpdir, &doc.mimetype);
-	    if (interner.internfile(doc, doc.ipath)) {
-		cout << doc.text << endl;
-	    } else {
-		cout << "Cant intern: " << fn << endl;
-	    }
-	}
-	
+	    dump_contents(rclconfig, tmpdir, doc);
+	}	
    }

    // Maybe clean up temporary directory
--- a/src/rcldb/rcldb.cpp
+++ b/src/rcldb/rcldb.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.144 2008-09-09 12:58:23 dockes Exp $ (C) 2004 J.F.Dockes";
+static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.145 2008-09-16 08:18:30 dockes Exp $ (C) 2004 J.F.Dockes";
 #endif
 /*
 *   This program is free software; you can redistribute it and/or modify
@ -129,12 +129,10 @@ bool Db::Native::subDocs(const string &udi, vector<Xapian::docid>& docids)
    return false;
 }

+// Only ONE field name inside the index data record differs from the
+// Rcl::Doc ones: caption<->title, for a remnant of compatibility with
+// omega
 static const string keycap("caption");
-static const string keymtp("mtype");
-static const string keyfmt("fmtime");
-static const string keydmt("dmtime");
-static const string keyoc("origcharset");
-static const string keyurl("url");

 // Turn data record from db into document fields
 bool Db::Native::dbDataToRclDoc(Xapian::docid docid, std::string &data, 
@ -144,11 +142,11 @@ bool Db::Native::dbDataToRclDoc(Xapian::docid docid, std::string &data,
    ConfSimple parms(&data);
    if (!parms.ok())
 	return false;
-    parms.get(keyurl, doc.url);
-    parms.get(keymtp, doc.mimetype);
-    parms.get(keyfmt, doc.fmtime);
-    parms.get(keydmt, doc.dmtime);
-    parms.get(keyoc, doc.origcharset);
+    parms.get(Doc::keyurl, doc.url);
+    parms.get(Doc::keytp, doc.mimetype);
+    parms.get(Doc::keyfmt, doc.fmtime);
+    parms.get(Doc::keydmt, doc.dmtime);
+    parms.get(Doc::keyoc, doc.origcharset);
    parms.get(keycap, doc.meta[Doc::keytt]);
    parms.get(Doc::keykw, doc.meta[Doc::keykw]);
    parms.get(Doc::keyabs, doc.meta[Doc::keyabs]);
@ -162,10 +160,10 @@ bool Db::Native::dbDataToRclDoc(Xapian::docid docid, std::string &data,
    char buf[20];
    sprintf(buf,"%.2f", float(percent) / 100.0);
    doc.meta[Doc::keyrr] = buf;
-    parms.get(string("ipath"), doc.ipath);
-    parms.get(string("fbytes"), doc.fbytes);
-    parms.get(string("dbytes"), doc.dbytes);
-    parms.get(string("sig"), doc.sig);
+    parms.get(Doc::keyipt, doc.ipath);
+    parms.get(Doc::keyfs, doc.fbytes);
+    parms.get(Doc::keyds, doc.dbytes);
+    parms.get(Doc::keysig, doc.sig);
    doc.xdocid = docid;

    // Other, not predefined meta fields:
@ -691,24 +689,25 @@ bool Db::isopen()
 // indexed with no prefix (ie: abstract)
 bool Db::fieldToPrefix(const string& fldname, string &pfx)
 {
-    // This is the default table
+    // This is the default table. We prefer the data from rclconfig if 
+    // available
    static map<string, string> fldToPrefs;
    if (fldToPrefs.empty()) {
 	fldToPrefs[Doc::keyabs] = string();
 	fldToPrefs["ext"] = "XE";
-	fldToPrefs["filename"] = "XSFN";
+	fldToPrefs[Doc::keyfn] = "XSFN";

-	fldToPrefs["title"] = "S";
 	fldToPrefs[keycap] = "S";
+	fldToPrefs[Doc::keytt] = "S";
 	fldToPrefs["subject"] = "S";

 	fldToPrefs[Doc::keyau] = "A";
 	fldToPrefs["creator"] = "A";
 	fldToPrefs["from"] = "A";

+	fldToPrefs[Doc::keykw] = "K";
 	fldToPrefs["keyword"] = "K";
 	fldToPrefs["tag"] = "K";
-	fldToPrefs[Doc::keykw] = "K";
 	fldToPrefs["tags"] = "K";
    }

@ -719,6 +718,7 @@ bool Db::fieldToPrefix(const string& fldname, string &pfx)
    if (config && config->getFieldPrefix(fld, pfx))
 	return true;

+    // No data in rclconfig? Check default values
    map<string, string>::const_iterator it = fldToPrefs.find(fld);
    if (it != fldToPrefs.end()) {
 	pfx = it->second;
@ -816,9 +816,17 @@ void Db::setAbstractParams(int idxtrunc, int syntlen, int syntctxlen)
 	m_synthAbsWordCtxLen = syntctxlen;
 }

+static inline void leftzeropad(string& s, unsigned len)
+{
+    if (s.length() && s.length() < len)
+	s = s.insert(0, len-s.length(), '0');
+}
+
 static const int MB = 1024 * 1024;
 static const string nc("\n\r\x0c");

+#define RECORD_APPEND(R, NM, VAL) {R += NM + "=" + VAL + "\n";}
+
 // Add document in internal form to the database: index the terms in
 // the title abstract and body and add special terms for file name,
 // date, mime type ... , create the document data record (more
@ -958,39 +966,43 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi,
    // reasonable lengths and suppress newlines (so that the data
    // record can keep a simple syntax)

-    string record = "url=" + doc.url;
-    record += "\nmtype=" + doc.mimetype;
-    record += "\nfmtime=" + doc.fmtime;
+    string record;
+    RECORD_APPEND(record, Doc::keyurl, doc.url);
+    RECORD_APPEND(record, Doc::keytp, doc.mimetype);
+    // We left-zero-pad the times so that they are lexico-sortable
+    leftzeropad(doc.fmtime, 11);
+    RECORD_APPEND(record, Doc::keyfmt, doc.fmtime);
    if (!doc.dmtime.empty()) {
-	record += "\ndmtime=" + doc.dmtime;
+	leftzeropad(doc.dmtime, 11);
+	RECORD_APPEND(record, Doc::keydmt, doc.dmtime);
    }
-    record += "\norigcharset=" + doc.origcharset;
+    RECORD_APPEND(record, Doc::keyoc, doc.origcharset);

    if (!doc.fbytes.empty())
-	record += string("\nfbytes=") + doc.fbytes;
+	RECORD_APPEND(record, Doc::keyfs, doc.fbytes);
    // Note that we add the signature both as a value and in the data record
    if (!doc.sig.empty())
-	record += string("\nsig=") + doc.sig;
+	RECORD_APPEND(record, Doc::keysig, doc.sig);
    newdocument.add_value(VALUE_SIG, doc.sig);

    char sizebuf[30]; 
    sprintf(sizebuf, "%u", (unsigned int)doc.text.length());
-    record += string("\ndbytes=") + sizebuf;
+    RECORD_APPEND(record, Doc::keyds, sizebuf);

    if (!doc.ipath.empty())
-	record += "\nipath=" + doc.ipath;
+	RECORD_APPEND(record, Doc::keyipt, doc.ipath);

    if (doc.meta[Doc::keytt].empty())
 	doc.meta[Doc::keytt] = doc.utf8fn;
    doc.meta[Doc::keytt] = 
 	neutchars(truncate_to_word(doc.meta[Doc::keytt], 150), nc);
    if (!doc.meta[Doc::keytt].empty())
-	record += "\n" + keycap + "=" + doc.meta[Doc::keytt];
+	RECORD_APPEND(record, keycap, doc.meta[Doc::keytt]);

    doc.meta[Doc::keykw] = 
 	neutchars(truncate_to_word(doc.meta[Doc::keykw], 300), nc);
    if (!doc.meta[Doc::keykw].empty())
-	record += "\n" + Doc::keykw + "=" + doc.meta[Doc::keykw];
+	RECORD_APPEND(record, Doc::keykw, doc.meta[Doc::keykw]);

    // If abstract is empty, we make up one with the beginning of the
    // document. This is then not indexed, but part of the doc data so
@ -1010,22 +1022,23 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi,
 		      nc);
    }
    if (!doc.meta[Doc::keyabs].empty())
-	record += "\n" + Doc::keyabs + "=" + doc.meta[Doc::keyabs];
+	RECORD_APPEND(record, Doc::keyabs, doc.meta[Doc::keyabs]);

    RclConfig *config = RclConfig::getMainConfig();
    if (config) {
 	const set<string>& stored = config->getStoredFields();
 	for (set<string>::const_iterator it = stored.begin();
 	     it != stored.end(); it++) {
+	    string nm = stringtolower(config->fieldCanon(*it));
 	    if (!doc.meta[*it].empty()) {
 		string value = 
 		    neutchars(truncate_to_word(doc.meta[*it], 150), nc);
-		record += "\n" + *it + "=" + value;
+		RECORD_APPEND(record, nm, value);
 	    }
 	}
    }
-    record += "\n";
-    LOGDEB0(("Rcl::Db::add: new doc record:\n %s\n", record.c_str()));
+
+    LOGDEB0(("Rcl::Db::add: new doc record:\n%s\n", record.c_str()));
    newdocument.set_data(record);

    const char *fnc = udi.c_str();
@ -1105,21 +1118,6 @@ bool Db::needUpdate(const string &udi, const string& sig)

 	    // Retrieve old file/doc signature from value
 	    string osig = doc.get_value(VALUE_SIG);
-#if 0
-	    // Get old  sig from data record
-	    string data = doc.get_data();
-	    string::size_type i1, i2;
-	    i1 = data.find("sig=");
-	    if (i1 == string::npos) 
-		return true;
-	    i1 += 4;
-	    if (i1 >= data.length())
-		return true;
-	    i2 = data.find_first_of("\n\r", i1);
-	    if (i2 == string::npos)
-		return true;
-	    string osig = data.substr(i1, i2-i1);
-#endif
 	    LOGDEB2(("Db::needUpdate: oldsig [%s] new [%s]\n",
 		     osig.c_str(), sig.c_str()));
 	    // Compare new/old sig
@ -1287,14 +1285,12 @@ bool Db::purgeFile(const string &udi)
    return false;
 }

+// File name wild card expansion. This is a specialisation ot termMatch
 bool Db::filenameWildExp(const string& fnexp, list<string>& names)
 {
-    // File name search, with possible wildcards. 
-    // We expand wildcards by scanning the filename terms (prefixed 
-    // with XSFN) from the database. 
-    // We build an OR query with the expanded values if any.
    string pattern;
    dumb_string(fnexp, pattern);
+    names.clear();

    // If pattern is not quoted, and has no wildcards, we add * at
    // each end: match any substring
@ -1303,33 +1299,14 @@ bool Db::filenameWildExp(const string& fnexp, list<string>& names)
    } else if (pattern.find_first_of("*?[") == string::npos) {
 	pattern = "*" + pattern + "*";
    } // else let it be
+    LOGDEB(("Rcl::Db::filenameWildExp: pattern: [%s]\n", pattern.c_str()));

-    LOGDEB((" pattern: [%s]\n", pattern.c_str()));
-
-    // Match pattern against all file names in the db
-    string ermsg;
-    try {
-	Xapian::TermIterator it = m_ndb->db.allterms_begin(); 
-	it.skip_to("XSFN");
-	for (;it != m_ndb->db.allterms_end(); it++) {
-	    if ((*it).find("XSFN") != 0)
-		break;
-	    string fn = (*it).substr(4);
-	    LOGDEB2(("Matching [%s] and [%s]\n", pattern.c_str(), fn.c_str()));
-	    if (fnmatch(pattern.c_str(), fn.c_str(), 0) != FNM_NOMATCH) {
-		names.push_back((*it).c_str());
-	    }
-	    // Limit the match count
-	    if (names.size() > 1000) {
-		LOGERR(("Db::filenameWildExp: too many matched file names\n"));
-		break;
-	    }
-	}
-    } XCATCHERROR(ermsg);
-    if (!ermsg.empty()) {
-	LOGERR(("filenameWildExp: xapian error: %s\n", ermsg.c_str()));
+    list<TermMatchEntry> entries;
+    if (!termMatch(ET_WILD, string(), pattern, entries, 1000, Doc::keyfn))
 	return false;
-    }
+    for (list<TermMatchEntry>::const_iterator it = entries.begin();
+	 it != entries.end(); it++) 
+	names.push_back("XSFN"+it->term);

    if (names.empty()) {
 	// Build an impossible query: we know its impossible because we
@ -1385,11 +1362,11 @@ const string regSpecChars = "(.[{";
 bool Db::termMatch(MatchType typ, const string &lang,
 		   const string &root, 
 		   list<TermMatchEntry>& res,
-		   int max)
+		   int max, 
+		   const string& field)
 {
    if (!m_ndb || !m_ndb->m_isopen)
 	return false;
-
    Xapian::Database db = m_ndb->m_iswritable ? m_ndb->wdb: m_ndb->db;

    res.clear();
@ -1399,6 +1376,11 @@ bool Db::termMatch(MatchType typ, const string &lang,
    dumb_string(root, droot);
    string nochars = typ == ET_WILD ? wildSpecChars : regSpecChars;

+    string prefix;
+    if (!field.empty()) {
+	(void)fieldToPrefix(field, prefix); 
+    }
+
    if (typ == ET_STEM) {
 	if (!stemExpand(lang, root, res, max))
 	    return false;
@ -1429,33 +1411,43 @@ bool Db::termMatch(MatchType typ, const string &lang,
 	string::size_type es = droot.find_first_of(nochars);
 	string is;
 	switch (es) {
-	case string::npos: is = droot;break;
-	case 0: break;
-	default: is = droot.substr(0, es);break;
+	case string::npos: is = prefix + droot; break;
+	case 0: is = prefix; break;
+	default: is = prefix + droot.substr(0, es); break;
 	}
 	LOGDEB(("termMatch: initsec: [%s]\n", is.c_str()));

-	Xapian::TermIterator it = db.allterms_begin(); 
-	if (!is.empty())
-	    it.skip_to(is.c_str());
-	for (int n = 0;it != db.allterms_end(); it++) {
-	    // If we're beyond the terms matching the initial string, end
-	    if (!is.empty() && (*it).find(is) != 0)
-		break;
-	    // Don't match special internal terms beginning with uppercase ascii
-	    if ((*it).at(0) >= 'A' && (*it).at(0) <= 'Z')
-		continue;
-	    if (typ == ET_WILD) {
-		if (fnmatch(droot.c_str(), (*it).c_str(), 0) == FNM_NOMATCH)
-		    continue;
-	    } else {
-		if (regexec(&reg, (*it).c_str(), 0, 0, 0))
-		    continue;
+	string ermsg;
+	try {
+	    Xapian::TermIterator it = db.allterms_begin(); 
+	    if (!is.empty())
+		it.skip_to(is.c_str());
+	    for (int n = 0; it != db.allterms_end(); it++) {
+		// If we're beyond the terms matching the initial string, end
+		if (!is.empty() && (*it).find(is) != 0)
+		    break;
+		string term;
+		if (!prefix.empty())
+		    term = (*it).substr(prefix.length());
+		else
+		    term = *it;
+		if (typ == ET_WILD) {
+		    if (fnmatch(droot.c_str(), term.c_str(), 0) == FNM_NOMATCH)
+			continue;
+		} else {
+		    if (regexec(&reg, term.c_str(), 0, 0, 0))
+			continue;
+		}
+		// Do we want stem expansion here? We don't do it for now
+		res.push_back(TermMatchEntry(term, it.get_termfreq()));
+		++n;
 	    }
-	    // Do we want stem expansion here? We don't do it for now
-	    res.push_back(TermMatchEntry(*it, it.get_termfreq()));
-	    ++n;
+	} XCATCHERROR(ermsg);
+	if (!ermsg.empty()) {
+	    LOGERR(("termMatch: %s\n", ermsg.c_str()));
+	    return false;
 	}
+
 	if (typ == ET_REGEXP) {
 	    regfree(&reg);
 	}
--- a/src/rcldb/rcldb.h
+++ b/src/rcldb/rcldb.h
@ -16,7 +16,7 @@
 */
 #ifndef _DB_H_INCLUDED_
 #define _DB_H_INCLUDED_
-/* @(#$Id: rcldb.h,v 1.61 2008-08-26 07:38:29 dockes Exp $  (C) 2004 J.F.Dockes */
+/* @(#$Id: rcldb.h,v 1.62 2008-09-16 08:18:30 dockes Exp $  (C) 2004 J.F.Dockes */

 #include <string>
 #include <list>
@ -156,9 +156,11 @@ class Db {
     * Stem expansion is performed if lang is not empty */
    enum MatchType {ET_WILD, ET_REGEXP, ET_STEM};
    bool termMatch(MatchType typ, const string &lang, const string &s, 
-		   list<TermMatchEntry>& result, int max = -1);
+		   list<TermMatchEntry>& result, int max = -1, 
+		   const string& field = "");

-    /** Specific filename wildcard expansion */
+    /** Special filename wildcard to XSFN terms expansion.
+	internal/searchdata use only */
    bool filenameWildExp(const string& exp, list<string>& names);

    /** Set parameters for synthetic abstract generation */
--- a/src/rcldb/rcldb_p.h
+++ b/src/rcldb/rcldb_p.h
@ -4,7 +4,7 @@
 #include "xapian.h"

 namespace Rcl {
-/* @(#$Id: rcldb_p.h,v 1.4 2008-09-05 10:34:17 dockes Exp $  (C) 2007 J.F.Dockes */
+/* @(#$Id: rcldb_p.h,v 1.5 2008-09-16 08:18:30 dockes Exp $  (C) 2007 J.F.Dockes */

 // Generic Xapian exception catching code. We do this quite often,
 // and I have no idea how to do this except for a macro
@ -70,5 +70,14 @@ class Db::Native {
    bool subDocs(const string &udi, vector<Xapian::docid>& docids);

 };
+
+// Field names inside the index data record may differ from the rcldoc ones
+// (esp.: caption / title)
+inline const string& docfToDatf(const string& df)
+{
+    static const string keycap("caption");
+    return df.compare(Doc::keytt) ? df : keycap;
+}
+
 }
 #endif /* _rcldb_p_h_included_ */
--- a/src/rcldb/rcldoc.cpp
+++ b/src/rcldb/rcldoc.cpp
@ -1,14 +1,25 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: rcldoc.cpp,v 1.1 2008-09-08 16:49:10 dockes Exp $ (C) 2007 J.F.Dockes";
+static char rcsid[] = "@(#$Id: rcldoc.cpp,v 1.2 2008-09-16 08:18:30 dockes Exp $ (C) 2007 J.F.Dockes";
 #endif


 #include "rcldoc.h"
 namespace Rcl {
-const string Doc::keyabs("abstract");
-const string Doc::keyau("author");
-const string Doc::keyfn("filename");
-const string Doc::keykw("keywords");
-const string Doc::keyrr("relevancyrating");
-const string Doc::keytt("title");
+    const string Doc::keyurl("url");
+    const string Doc::keyfn("filename");
+    const string Doc::keyipt("ipath");
+    const string Doc::keytp("mtype");
+    const string Doc::keyfmt("fmtime");
+    const string Doc::keydmt("dmtime");
+    const string Doc::keymt("mtime");
+    const string Doc::keyoc("origcharset");
+    const string Doc::keyfs("fbytes");
+    const string Doc::keyds("dbytes");
+    const string Doc::keysz("size");
+    const string Doc::keysig("sig");
+    const string Doc::keyrr("relevancyrating");
+    const string Doc::keyabs("abstract");
+    const string Doc::keyau("author");
+    const string Doc::keytt("title");
+    const string Doc::keykw("keywords");
 }
--- a/src/rcldb/rcldoc.h
+++ b/src/rcldb/rcldoc.h
@ -16,7 +16,7 @@
 */
 #ifndef _RCLDOC_H_INCLUDED_
 #define _RCLDOC_H_INCLUDED_
-/* @(#$Id: rcldoc.h,v 1.9 2008-09-08 16:49:10 dockes Exp $  (C) 2006 J.F.Dockes */
+/* @(#$Id: rcldoc.h,v 1.10 2008-09-16 08:18:30 dockes Exp $  (C) 2006 J.F.Dockes */

 #include <string>
 #include <map>
@ -44,9 +44,9 @@ class Doc {
    // can be accessed after a query without fetching the actual document).
    // We indicate the routine that sets them up during indexing
    
-    // This is just "file://" + binary filename. No transcoding: this
-    // is used to access files
-    // Index: computed from fn by Db::add caller. Query: from doc data.
+    // This is just "file://" + binary or url-encoded filename. No
+    // transcoding: this is used to access files Index: computed from
+    // fn by Db::add caller. Query: from doc data.
    string url;

    // Transcoded version of the simple file name for SFN-prefixed
@ -134,12 +134,29 @@ class Doc {
 	pc = 0;
 	xdocid = 0;
    }
-    static const string keyfn;
-    static const string keyrr;
-    static const string keyabs;
-    static const string keyau;
-    static const string keytt;
-    static const string keykw;
+
+    // The official names for recoll native fields when used in a text
+    // context (ie: the python interface duplicates some of the fixed
+    // fields in the meta array, these are the names used). Defined in
+    // rcldoc.cpp. For fields stored in the meta[] array (ie, title,
+    // author), filters _must_ use these values
+    static const string keyurl; // url
+    static const string keyfn;  // file name
+    static const string keyipt; // ipath
+    static const string keytp;  // mime type
+    static const string keyfmt; // file mtime
+    static const string keydmt; // document mtime
+    static const string keymt;  // mtime dmtime if set else fmtime
+    static const string keyoc;  // original charset
+    static const string keyfs;  // file size
+    static const string keyds;  // document size
+    static const string keysz;  // dbytes if set else fbytes
+    static const string keysig; // sig
+    static const string keyrr;  // relevancy rating
+    static const string keyabs; // abstract
+    static const string keyau;  // author
+    static const string keytt;  // title
+    static const string keykw;  // keywords
 };


--- a/src/rcldb/rclquery.cpp
+++ b/src/rcldb/rclquery.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: rclquery.cpp,v 1.5 2008-09-05 11:45:16 dockes Exp $ (C) 2008 J.F.Dockes";
+static char rcsid[] = "@(#$Id: rclquery.cpp,v 1.6 2008-09-16 08:18:30 dockes Exp $ (C) 2008 J.F.Dockes";
 #endif

 #include <stdlib.h>
@ -8,6 +8,8 @@ static char rcsid[] = "@(#$Id: rclquery.cpp,v 1.5 2008-09-05 11:45:16 dockes Exp
 #include <list>
 #include <vector>

+#include "xapian/sorter.h"
+
 #include "rcldb.h"
 #include "rcldb_p.h"
 #include "rclquery.h"
@ -20,6 +22,8 @@ static char rcsid[] = "@(#$Id: rclquery.cpp,v 1.5 2008-09-05 11:45:16 dockes Exp
 #ifndef NO_NAMESPACES
 namespace Rcl {
 #endif
+
+
 class FilterMatcher : public Xapian::MatchDecider {
 public:
    FilterMatcher(const string &topdir)
@ -41,7 +45,7 @@ public:

 	// The only filtering for now is on file path (subtree)
 	string url;
-	parms.get(string("url"), url);
+	parms.get(Doc::keyurl, url);
 	LOGDEB2(("FilterMatcher topdir [%s] url [%s]\n",
 		 m_topdir.c_str(), url.c_str()));
 	if (url.find(m_topdir, 7) == 7) {
@ -55,14 +59,46 @@ private:
    string m_topdir;
 };

+// Sort helper class
+class QSorter : public Xapian::Sorter {
+public:
+    QSorter(const string& f) : m_fld(docfToDatf(f) + "=") {}
+
+    virtual std::string operator()(const Xapian::Document& xdoc) const {
+	string data = xdoc.get_data();
+
+	// It would be simpler to do the record->Rcl::Doc thing, but
+	// hand-doing this will be faster. It makes more assumptions
+	// about the format than a ConfTree though:
+	string::size_type i1, i2;
+	i1 = data.find(m_fld);
+	if (i1 == string::npos) 
+	    return string();
+	i1 += m_fld.length();
+	if (i1 >= data.length())
+	    return string();
+	i2 = data.find_first_of("\n\r", i1);
+	if (i2 == string::npos)
+	    return string();
+	return data.substr(i1, i2-i1);
+    }
+
+private:
+    string m_fld;
+};
+
 Query::Query(Db *db)
-    : m_nq(new Native(this)), m_db(db)
+    : m_nq(new Native(this)), m_db(db), m_sorter(0)
 {
 }

 Query::~Query()
 {
    deleteZ(m_nq);
+    if (m_sorter) {
+	delete (QSorter*)m_sorter;
+	m_sorter = 0;
+    }
 }

 string Query::getReason() const
@ -75,6 +111,7 @@ Db *Query::whatDb()
    return m_db;
 }

+
 //#define ISNULL(X) (X).isNull()
 #define ISNULL(X) !(X)

@ -114,6 +151,17 @@ bool Query::setQuery(RefCntr<SearchData> sdata, int opts,
    try {
 	m_nq->enquire = new Xapian::Enquire(m_db->m_ndb->db);
 	m_nq->enquire->set_query(m_nq->query);
+	if (!sdata->getSortBy().empty()) {
+	    if (m_sorter) {
+		delete (QSorter*)m_sorter;
+		m_sorter = 0;
+	    }
+	    m_sorter = new QSorter(sdata->getSortBy());
+	    // It really seems there is a xapian bug about sort order, we 
+	    // invert here.
+	    m_nq->enquire->set_sort_by_key((QSorter*)m_sorter, 
+					   !sdata->getSortAscending());
+	}
 	m_nq->mset = Xapian::MSet();
 	// Get the query description and trim the "Xapian::Query"
 	d = m_nq->query.get_description();
--- a/src/rcldb/rclquery.h
+++ b/src/rcldb/rclquery.h
@ -1,6 +1,6 @@
 #ifndef _rclquery_h_included_
 #define _rclquery_h_included_
-/* @(#$Id: rclquery.h,v 1.2 2008-07-01 08:31:08 dockes Exp $  (C) 2008 J.F.Dockes */
+/* @(#$Id: rclquery.h,v 1.3 2008-09-16 08:18:30 dockes Exp $  (C) 2008 J.F.Dockes */
 /*
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
@ -87,6 +87,7 @@ private:
    string m_filterTopDir; // Current query filter on subtree top directory 
    string m_reason; // Error explanation
    Db    *m_db;
+    void  *m_sorter;
    unsigned int m_qOpts;
    /* Copyconst and assignement private and forbidden */
    Query(const Query &) {}
--- a/src/rcldb/searchdata.cpp
+++ b/src/rcldb/searchdata.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.22 2008-08-28 15:42:43 dockes Exp $ (C) 2006 J.F.Dockes";
+static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.23 2008-09-16 08:18:30 dockes Exp $ (C) 2006 J.F.Dockes";
 #endif
 /*
 *   This program is free software; you can redistribute it and/or modify
@ -153,6 +153,14 @@ void SearchData::erase() {
    m_reason.erase();
 }

+void SearchData::setSortBy(const string& fld, bool ascending) {
+    RclConfig *cfg = RclConfig::getMainConfig();
+    m_sortField = cfg->fieldCanon(stringtolower(fld));
+    m_sortAscending = ascending;
+    LOGDEB0(("SearchData::setSortBy: [%s] %s\n", m_sortField.c_str(),
+	     m_sortAscending ? "ascending" : "descending"));
+}
+
 // Am I a file name only search ? This is to turn off term highlighting
 bool SearchData::fileNameOnly() 
 {
@ -572,9 +580,9 @@ bool SearchDataClauseFilename::toNativeQuery(Rcl::Db &db, void *p,
    list<string> names;
    for (list<string>::iterator it = patterns.begin();
 	 it != patterns.end(); it++) {
-	// This relies on filenameWildExp not resetting and always
-	// adding to the input
-	db.filenameWildExp(*it, names);
+	list<string> more;
+	db.filenameWildExp(*it, more);
+	names.splice(names.end(), more);
    }
    // Build a query out of the matching file name terms.
    *qp = Xapian::Query(Xapian::Query::OP_OR, names.begin(), names.end());
--- a/src/rcldb/searchdata.h
+++ b/src/rcldb/searchdata.h
@ -16,7 +16,7 @@
 */
 #ifndef _SEARCHDATA_H_INCLUDED_
 #define _SEARCHDATA_H_INCLUDED_
-/* @(#$Id: searchdata.h,v 1.17 2008-09-08 15:47:44 dockes Exp $  (C) 2004 J.F.Dockes */
+/* @(#$Id: searchdata.h,v 1.18 2008-09-16 08:18:30 dockes Exp $  (C) 2004 J.F.Dockes */

 /** 
 * Structures to hold data coming almost directly from the gui
@ -110,11 +110,17 @@ public:
    /** Add file type for filtering results */
    void addFiletype(const string& ft) {m_filetypes.push_back(ft);}

+    /** Choose sort order. Should this be in RclQuery instead ? */
+    void setSortBy(const string& fld, bool ascending = true);
+    const string& getSortBy() const {return m_sortField;}
+    bool getSortAscending() const {return m_sortAscending;}
 private:
-    SClType                    m_tp; // Only SCLT_AND or SCLT_OR here
-    vector<SearchDataClause *> m_query;
-    vector<string>             m_filetypes; // Restrict to filetypes if set.
-    string                     m_topdir; // Restrict to subtree.
+    SClType                   m_tp; // Only SCLT_AND or SCLT_OR here
+    vector<SearchDataClause*> m_query;
+    vector<string>            m_filetypes; // Restrict to filetypes if set.
+    string                    m_topdir; // Restrict to subtree.
+    string                    m_sortField;
+    bool                      m_sortAscending;
    // Printable expanded version of the complete query, retrieved/set
    // from rcldb after the Xapian::setQuery() call
    string m_description; 
--- a/src/sampleconf/fields
+++ b/src/sampleconf/fields
@ -1,4 +1,4 @@
-# @(#$Id: fields,v 1.2 2008-09-15 08:03:37 dockes Exp $  (C) 2007 J.F.Dockes
+# @(#$Id: fields,v 1.3 2008-09-16 08:18:30 dockes Exp $  (C) 2007 J.F.Dockes
 # Field names configuration. This defines how one may search ie for 
 # author:Hemingway
 # Important: 
@ -43,13 +43,18 @@ stored = author
 ##########################
 # This section defines field names aliases or synonyms. Any right hand side
 # value will be turned into the lhs canonic name before further treatment
-# Left-hand values must match names in the prefixes section or
-# data-record fields.
-# Note to filter writers: only canonic names should be used when indexing.
+#
+# The left-hand values in the recoll distribution file are well known and
+# must match names used in the c++ code, or even the index data
+# record. They can't change! But you can add others.
+#
+# Filters should only add canonic names to the meta array when indexing,
+# not aliases.
+
 [aliases]
 abstract = summary dc:summary description xesam:description
 author = creator dc:creator xesam:author xesam:creator
-caption = title dc:title subject
+title = title dc:title subject
 # catg = dc:type contentCategory
 dbytes = size xesam:size
 dmtime = date dc:date dc:datemodified datemodified contentmodified \
@ -64,5 +69,6 @@ url = dc:identifier xesam:url
 #########################
 # This section defines a hierarchy for field names. Searching for a lhs
 # ancestor will be expanded to a search for itself and all rhs descendants
+# This is not used for now
 [specialisations]
 author = from