added field/prefixes for author and title + command line query language

2007-01-17 13:53:41 +00:00 · 2007-01-17 13:53:41 +00:00 · 1d683ad411
commit 1d683ad411
parent ee85be5c61
15 changed files with 256 additions and 105 deletions
--- a/src/internfile/internfile.cpp
+++ b/src/internfile/internfile.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: internfile.cpp,v 1.24 2007-01-15 13:06:38 dockes Exp $ (C) 2004 J.F.Dockes";
+static char rcsid[] = "@(#$Id: internfile.cpp,v 1.25 2007-01-17 13:53:40 dockes Exp $ (C) 2004 J.F.Dockes";
 #endif
 /*
 *   This program is free software; you can redistribute it and/or modify
@ -236,6 +236,7 @@ static inline bool getKeyValue(const map<string, string>& docdata,
 }
 static const string keyab("abstract");
 static const string keyau("author");
 static const string keycs("charset");
 static const string keyct("content");
 static const string keyfn("filename");
@ -251,6 +252,7 @@ bool FileInterner::dijontorcl(Rcl::Doc& doc)
    Dijon::Filter *df = m_handlers.back();
    const std::map<std::string, std::string>& docdata = df->get_meta_data();
    getKeyValue(docdata, keyau, doc.author);
    getKeyValue(docdata, keyoc, doc.origcharset);
    getKeyValue(docdata, keyct, doc.text);    
    getKeyValue(docdata, keytt, doc.title);
--- a/src/internfile/mh_html.cpp
+++ b/src/internfile/mh_html.cpp
@ -122,6 +122,7 @@ bool MimeHandlerHtml::next_document()
    m_metaData["charset"] = "utf-8";
    m_metaData["title"] = result.title;
    m_metaData["keywords"] = result.keywords;
    m_metaData["author"] = result.author;
    m_metaData["modificationdate"] = result.dmtime;
    m_metaData["sample"] = result.sample;
    m_metaData["mimetype"] = "text/plain";
--- a/src/internfile/mh_mail.cpp
+++ b/src/internfile/mh_mail.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: mh_mail.cpp,v 1.28 2007-01-13 10:28:37 dockes Exp $ (C) 2005 J.F.Dockes";
+static char rcsid[] = "@(#$Id: mh_mail.cpp,v 1.29 2007-01-17 13:53:40 dockes Exp $ (C) 2005 J.F.Dockes";
 #endif
 /*
 *   This program is free software; you can redistribute it and/or modify
@ -217,6 +217,9 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
    if (doc->h.getFirstHeader("From", hi)) {
 	rfc2047_decode(hi.getValue(), transcoded);
 	text += string("From: ") + transcoded + string("\n");
 	if (depth == 1) {
 	    m_metaData["author"] = transcoded;
 	}
    }
    if (doc->h.getFirstHeader("To", hi)) {
 	rfc2047_decode(hi.getValue(), transcoded);
@ -245,7 +248,7 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
    }
    text += '\n';
-    LOGDEB2(("MimeHandlerMail::rocessMsg:ismultipart %d mime subtype '%s'\n",
+    LOGDEB2(("MimeHandlerMail::processMsg:ismultipart %d mime subtype '%s'\n",
 	    doc->isMultipart(), doc->getSubType().c_str()));
    walkmime(doc, depth);
--- a/src/internfile/myhtmlparse.cpp
+++ b/src/internfile/myhtmlparse.cpp
@ -154,6 +154,11 @@ MyHtmlParser::opening_tag(const string &tag, const map<string,string> &p)
 			    string tmp = i->second;
 			    decode_entities(tmp);
 			    keywords += tmp;
 			} else if (name == "author") {
 			    if (!author.empty()) author += ' ';
 			    string tmp = i->second;
 			    decode_entities(tmp);
 			    author += tmp;
 			} else if (name == "date") {
 			    // Yes this doesnt exist. It's output by filters
 			    // And the format isn't even standard http/html
@ -168,19 +173,6 @@ MyHtmlParser::opening_tag(const string &tag, const map<string,string> &p)
 				dmtime = ascuxtime;
 			    }
 			} 
 #if 0 // We're not a robot, so we don't care about robots metainfo
 			else if (name == "robots") {
 			    string val = i->second;
 			    decode_entities(val);
 			    lowercase_term(val);
 			    if (val.find("none") != string::npos ||
 				val.find("noindex") != string::npos) {
 				indexing_allowed = false;
 				LOGDEB1(("myhtmlparse: robots/noindex\n"));
 				throw false;
 			    }
 			}
 #endif // 0
 		    } else if ((j = p.find("http-equiv")) != p.end()) {
 			string hequiv = j->second;
 			lowercase_term(hequiv);
--- a/src/internfile/myhtmlparse.h
+++ b/src/internfile/myhtmlparse.h
@ -37,7 +37,7 @@ class MyHtmlParser : public HtmlParser {
    bool in_body_tag; 
    bool in_pre_tag;
    bool pending_space;
-    string title, sample, keywords, dump, dmtime;
+    string title, sample, keywords, dump, dmtime, author;
    string ocharset; // This is the charset our user thinks the doc was
    string charset; // This is the charset it was supposedly converted to
    string doccharset; // Set this to value of charset parameter in header
--- a/src/lib/Makefile
+++ b/src/lib/Makefile
@ -8,8 +8,8 @@ LIBS = librcl.a
 all: $(LIBS)
-OBJS =  rclaspell.o rclconfig.o rclinit.o textsplit.o unacpp.o csguess.o indexer.o mimetype.o htmlparse.o myhtmlparse.o mimehandler.o internfile.o mh_exec.o mh_html.o mh_mail.o mh_mbox.o mh_text.o docseq.o history.o sortseq.o pathhash.o rcldb.o searchdata.o stemdb.o base64.o conftree.o copyfile.o debuglog.o execmd.o fstreewalk.o idfile.o md5.o mimeparse.o pathut.o readfile.o smallut.o transcode.o wipedir.o x11mon.o
+OBJS =  rclaspell.o rclconfig.o rclinit.o textsplit.o unacpp.o csguess.o indexer.o mimetype.o htmlparse.o myhtmlparse.o mimehandler.o internfile.o mh_exec.o mh_html.o mh_mail.o mh_mbox.o mh_text.o docseq.o history.o sortseq.o wasastringtoquery.o wasatorcl.o pathhash.o rcldb.o searchdata.o stemdb.o base64.o conftree.o copyfile.o debuglog.o execmd.o fstreewalk.o idfile.o md5.o mimeparse.o pathut.o readfile.o smallut.o transcode.o wipedir.o x11mon.o
-DEPS =  rclaspell.dep.stamp rclconfig.dep.stamp rclinit.dep.stamp textsplit.dep.stamp unacpp.dep.stamp csguess.dep.stamp indexer.dep.stamp mimetype.dep.stamp htmlparse.dep.stamp myhtmlparse.dep.stamp mimehandler.dep.stamp internfile.dep.stamp mh_exec.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp mh_mbox.dep.stamp mh_text.dep.stamp docseq.dep.stamp history.dep.stamp sortseq.dep.stamp pathhash.dep.stamp rcldb.dep.stamp searchdata.dep.stamp stemdb.dep.stamp base64.dep.stamp conftree.dep.stamp copyfile.dep.stamp debuglog.dep.stamp execmd.dep.stamp fstreewalk.dep.stamp idfile.dep.stamp md5.dep.stamp mimeparse.dep.stamp pathut.dep.stamp readfile.dep.stamp smallut.dep.stamp transcode.dep.stamp wipedir.dep.stamp x11mon.dep.stamp
+DEPS =  rclaspell.dep.stamp rclconfig.dep.stamp rclinit.dep.stamp textsplit.dep.stamp unacpp.dep.stamp csguess.dep.stamp indexer.dep.stamp mimetype.dep.stamp htmlparse.dep.stamp myhtmlparse.dep.stamp mimehandler.dep.stamp internfile.dep.stamp mh_exec.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp mh_mbox.dep.stamp mh_text.dep.stamp docseq.dep.stamp history.dep.stamp sortseq.dep.stamp wasastringtoquery.dep.stamp wasatorcl.dep.stamp pathhash.dep.stamp rcldb.dep.stamp searchdata.dep.stamp stemdb.dep.stamp base64.dep.stamp conftree.dep.stamp copyfile.dep.stamp debuglog.dep.stamp execmd.dep.stamp fstreewalk.dep.stamp idfile.dep.stamp md5.dep.stamp mimeparse.dep.stamp pathut.dep.stamp readfile.dep.stamp smallut.dep.stamp transcode.dep.stamp wipedir.dep.stamp x11mon.dep.stamp
 librcl.a : $(DEPS) $(OBJS) unac.o
 	ar ru librcl.a $(OBJS) unac.o
@ -57,6 +57,10 @@ history.o : ../query/history.cpp
 	$(CXX) $(ALL_CXXFLAGS) -c ../query/history.cpp
 sortseq.o : ../query/sortseq.cpp
 	$(CXX) $(ALL_CXXFLAGS) -c ../query/sortseq.cpp
 wasastringtoquery.o : ../query/wasastringtoquery.cpp
 	$(CXX) $(ALL_CXXFLAGS) -c ../query/wasastringtoquery.cpp
 wasatorcl.o : ../query/wasatorcl.cpp
 	$(CXX) $(ALL_CXXFLAGS) -c ../query/wasatorcl.cpp
 pathhash.o : ../rcldb/pathhash.cpp
 	$(CXX) $(ALL_CXXFLAGS) -c ../rcldb/pathhash.cpp
 rcldb.o : ../rcldb/rcldb.cpp
@ -161,6 +165,12 @@ history.dep.stamp : ../query/history.cpp
 sortseq.dep.stamp : ../query/sortseq.cpp
 	$(CXX) -M $(ALL_CXXFLAGS) ../query/sortseq.cpp > sortseq.dep
 	touch sortseq.dep.stamp
 wasastringtoquery.dep.stamp : ../query/wasastringtoquery.cpp
 	$(CXX) -M $(ALL_CXXFLAGS) ../query/wasastringtoquery.cpp > wasastringtoquery.dep
 	touch wasastringtoquery.dep.stamp
 wasatorcl.dep.stamp : ../query/wasatorcl.cpp
 	$(CXX) -M $(ALL_CXXFLAGS) ../query/wasatorcl.cpp > wasatorcl.dep
 	touch wasatorcl.dep.stamp
 pathhash.dep.stamp : ../rcldb/pathhash.cpp
 	$(CXX) -M $(ALL_CXXFLAGS) ../rcldb/pathhash.cpp > pathhash.dep
 	touch pathhash.dep.stamp
@ -238,6 +248,8 @@ include mh_text.dep
 include docseq.dep
 include history.dep
 include sortseq.dep
 include wasastringtoquery.dep
 include wasatorcl.dep
 include pathhash.dep
 include rcldb.dep
 include searchdata.dep
--- a/src/lib/mkMake
+++ b/src/lib/mkMake
@ -24,6 +24,8 @@ ${depth}/internfile/mh_text.cpp \
 ${depth}/query/docseq.cpp \
 ${depth}/query/history.cpp \
 ${depth}/query/sortseq.cpp \
 ${depth}/query/wasastringtoquery.cpp \
 ${depth}/query/wasatorcl.cpp \
 ${depth}/rcldb/pathhash.cpp \
 ${depth}/rcldb/rcldb.cpp \
 ${depth}/rcldb/searchdata.cpp \
--- a/src/query/Makefile
+++ b/src/query/Makefile
@ -1,11 +1,11 @@
 depth = ..
 include $(depth)/mk/sysconf
-PROGS = xadump #trhist qtry qxtry 
+PROGS = xadump rclqlang #trhist qtry qxtry 
 all: $(PROGS)
-SRCS = xadump.cpp
+SRCS = xadump.cpp rclqlang.cpp
 .cpp.o : 
 	$(CXX) -c $(ALL_CXXFLAGS) -o $@ $<
@ -14,6 +14,11 @@ xadump : $(XADUMP_OBJS)
 	$(CXX) $(ALL_CXXFLAGS) -o xadump $(XADUMP_OBJS) \
 	       $(LIBICONV) $(LIBXAPIAN) $(LIBSYS)
 RCLQLANG_OBJS= rclqlang.o $(BIGLIB)
 rclqlang : $(RCLQLANG_OBJS)
 	$(CXX) $(ALL_CXXFLAGS) -o rclqlang $(RCLQLANG_OBJS) \
 	       $(LIBICONV) $(LIBXAPIAN) $(LIBSYS)
 HISTORY_OBJS= trhist.o  $(BIGLIB) $(MIMELIB)
 trhist : $(HISTORY_OBJS)
 	$(CXX) $(ALL_CXXFLAGS) -o trhist $(HISTORY_OBJS) \
@ -21,9 +26,19 @@ trhist : $(HISTORY_OBJS)
 trhist.o : history.cpp history.h
 	$(CXX) $(ALL_CXXFLAGS) -DTEST_HISTORY -c -o trhist.o history.cpp
-$(BIGLIB):
+WASASTRINGTOQUERY_OBJS= trwasastrtoq.o  $(BIGLIB) $(MIMELIB)
-	cd $(depth)/lib;make
+trwasastrtoq : $(WASASTRINGTOQUERY_OBJS)
-   
+	$(CXX) $(ALL_CXXFLAGS) -o trwasastrtoq $(WASASTRINGTOQUERY_OBJS) \
 	       $(LIBICONV) $(LIBXAPIAN)
 trwasastrtoq.o : wasastringtoquery.cpp wasastringtoquery.h
 	$(CXX) $(ALL_CXXFLAGS) -DTEST_WASASTRINGTOQUERY -c \
 	       -o trwasastrtoq.o wasastringtoquery.cpp
 $(BIGLIB): force
 	cd $(depth)/lib;$(MAKE)
 force:
 depend: alldeps.stamp
 alldeps.stamp : $(SRCS)
 	$(CXX) -M $(ALL_CXXFLAGS) $(SRCS) > alldeps
--- a/src/query/wasastringtoquery.cpp
+++ b/src/query/wasastringtoquery.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: wasastringtoquery.cpp,v 1.3 2006-12-10 17:03:08 dockes Exp $ (C) 2006 J.F.Dockes";
+static char rcsid[] = "@(#$Id: wasastringtoquery.cpp,v 1.4 2007-01-17 13:53:41 dockes Exp $ (C) 2006 J.F.Dockes";
 #endif
 /*
 *   This program is free software; you can redistribute it and/or modify
@ -17,7 +17,7 @@ static char rcsid[] = "@(#$Id: wasastringtoquery.cpp,v 1.3 2006-12-10 17:03:08 d
 *   Free Software Foundation, Inc.,
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */
-#ifndef TEST_STRINGTOQUERY
+#ifndef TEST_WASASTRINGTOQUERY
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@ -25,6 +25,13 @@ static char rcsid[] = "@(#$Id: wasastringtoquery.cpp,v 1.3 2006-12-10 17:03:08 d
 #include "wasastringtoquery.h"
 //#define DEB_WASASTRINGTOQ 1
 #ifdef DEB_WASASTRINGTOQ
 #define DPRINT(X) fprintf X
 #else
 #define DPRINT(X)
 #endif
 WasaQuery::~WasaQuery()
 {
    for (vector<WasaQuery*>::iterator it = m_subs.begin();
@ -61,16 +68,16 @@ void WasaQuery::describe(string &desc) const
 	desc += ")";
    }
    desc += "(";
    string fieldspec = m_fieldspec.empty() ? "" : m_fieldspec + ": ";
    switch (m_op) {
    case OP_NULL: 
 	desc += "NULL"; 
 	break;
    case OP_LEAF: 
-	desc += m_fieldspec.empty() ?
+	desc += fieldspec + m_value;
 	    m_value : m_fieldspec + ":" + m_value;
 	break;
    case OP_EXCL: 
-	desc += string("NOT (" ) + m_value + ") ";
+	desc += string("NOT (" ) + fieldspec + m_value + ") ";
 	break;
    case OP_OR: 
    case OP_AND:
@ -84,6 +91,8 @@ void WasaQuery::describe(string &desc) const
 	}
 	break;
    }
    if (desc[desc.length() - 1] == ' ')
 	desc.erase(desc.length() - 1);
    desc += ") "; 
 }
@ -111,7 +120,7 @@ void WasaQuery::describe(string &desc) const
 * parenthesis increases the index, but we're not interested in all
 */
 static const char * parserExpr = 
-    "([oO][rR])"                     //1 OR is a special word
+    "([oO][rR])[[:space:]]*"        //1 OR is a special word
    "|"
    "("                              //2
      "([+-])?"                      //3 Force or exclude indicator
@ -125,7 +134,7 @@ static const char * parserExpr =
        "|"
        "([^[:space:]]+)"            //9 ANormalTerm
      ")"
-    ")"
+    ")[[:space:]]*"
 ;
 // For debugging the parser. But see also NMATCH
@ -236,17 +245,18 @@ StringToWasaQuery::Internal::stringToQuery(const string& str, string& reason)
 	    reason = "Internal regular expression handling error";
 	    return 0;
 	}
-#if 0
+
-	if (loop) printf("Next part:\n");
+#ifdef DEB_WASASTRINGTOQ
-	for (i = 0; i < NMATCH; i++) {
+	if (loop) DPRINT((stderr, "Next part:\n"));
 	for (unsigned int i = 0; i < NMATCH; i++) {
 	    if (m_pmatch[i].rm_so == -1) 	continue;
 	    char match[maxmatchlen+1];
 	    memcpy(match, m_cp + m_pmatch[i].rm_so,
 		   m_pmatch[i].rm_eo - m_pmatch[i].rm_so);
 	    match[m_pmatch[i].rm_eo - m_pmatch[i].rm_so] = 0;
 	    if (matchNames[i][0])
-		printf("%10s: [%s] (%d->%d)\n", matchNames[i], match, 
+		DPRINT((stderr, "%10s: [%s] (%d->%d)\n", matchNames[i], match, 
-		       (int)m_pmatch[i].rm_so, (int)m_pmatch[i].rm_eo);
+			(int)m_pmatch[i].rm_so, (int)m_pmatch[i].rm_eo));
 	}
 #endif
 	char match[maxmatchlen+1];
@ -348,14 +358,17 @@ StringToWasaQuery::Internal::stringToQuery(const string& str, string& reason)
 	    if (prev_or) {
 		// We're in an OR subquery, add new subquery
 		orClause->m_subs.push_back(nclause);
 		DPRINT((stderr, "Adding to OR chain\n"));
 	    } else {
 		if (orClause) {
 		    // Getting out of OR. Add the OR subquery to the main one
 		    query->m_subs.push_back(orClause);
 		    DPRINT((stderr, "Adding OR chain to main\n"));
 		    orClause = 0;
 		}
 		// Add new subquery to main one.
 		query->m_subs.push_back(nclause);
 		DPRINT((stderr, "Adding to main chain\n"));
 	    }
 	    prev_or = false;
 	}
@ -369,6 +382,12 @@ StringToWasaQuery::Internal::stringToQuery(const string& str, string& reason)
 	    break;
    }
    if (orClause) {
 	// Getting out of OR. Add the OR subquery to the main one
 	query->m_subs.push_back(orClause);
 	DPRINT((stderr, "Adding OR chain to main\n"));
    }
    regfree(&m_rx);
    m_rxneedsfree = false;
    return query;
@ -404,4 +423,4 @@ int main(int argc, char **argv)
    exit(0);
 }
-#endif // TEST_STRINGTOQUERY
+#endif // TEST_WASASTRINGTOQUERY
--- a/src/query/wasastringtoquery.h
+++ b/src/query/wasastringtoquery.h
@ -1,6 +1,6 @@
 #ifndef _WASASTRINGTOQUERY_H_INCLUDED_
 #define _WASASTRINGTOQUERY_H_INCLUDED_
-/* @(#$Id: wasastringtoquery.h,v 1.3 2006-12-10 17:03:08 dockes Exp $  (C) 2006 J.F.Dockes */
+/* @(#$Id: wasastringtoquery.h,v 1.4 2007-01-17 13:53:41 dockes Exp $  (C) 2006 J.F.Dockes */
 /*
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
@ -40,23 +40,28 @@ public:
    {}
    ~WasaQuery();
-    // Get string describing the query tree from this point
+    /** Get string describing the query tree from this point */
    void describe(string &desc) const;
    /** Op to be performed on either value or subqueries */
    WasaQuery::Op      m_op;
    /** Field specification if any (ie: title, author ...) */
    string             m_fieldspec;
-    /* Valid for op == OP_LEAF */
+
    /* String value. Valid for op == OP_LEAF */
    string             m_value;
-    /* Valid for conjunctions */
+
    /** Subqueries. Valid for conjunctions */
    vector<WasaQuery*> m_subs;
-    /* Restrict results to some file type, defined by either mime, app group, 
+    /** Restrict results to some file type, defined by either mime,
-     * or extension */
+     *  app group, or extension */
    enum TypeKind {WQTK_NONE, WQTK_MIME, WQTK_GROUP, WQTK_EXT};
    TypeKind           m_typeKind;
    vector<string>     m_types;
-    /* Sort on relevance, date, name or group */
+    /** Sort on relevance, date, name or group */
    enum SortKind {WQSK_REL, WQSK_DATE, WQSK_ALPHA, WQSK_GROUP};
    vector<SortKind>   m_sortSpec;
 };
--- a/src/query/wasatorcl.cpp
+++ b/src/query/wasatorcl.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: wasatorcl.cpp,v 1.2 2006-12-10 17:03:08 dockes Exp $ (C) 2006 J.F.Dockes";
+static char rcsid[] = "@(#$Id: wasatorcl.cpp,v 1.3 2007-01-17 13:53:41 dockes Exp $ (C) 2006 J.F.Dockes";
 #endif
 #ifndef TEST_WASATORCL
@ -27,11 +27,13 @@ Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa)
 	    if ((*it)->m_value.find_first_of(" \t\n\r") != string::npos) {
 		sdata->addClause
 		    (new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE, 
-						     (*it)->m_value, 0));
+						   (*it)->m_value, 0, 
 						   (*it)->m_fieldspec));
 	    } else {
 		sdata->addClause
 		    (new Rcl::SearchDataClauseSimple(Rcl::SCLT_AND, 
-						     (*it)->m_value));
+						     (*it)->m_value, 
 						     (*it)->m_fieldspec));
 	    }
 	    break;
 	case WasaQuery::OP_EXCL:
@ -41,7 +43,8 @@ Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa)
 	    sdata->addClause
 		(new Rcl::SearchDataClauseSimple(Rcl::SCLT_EXCL, 
 						 string("\"") + 
-						 (*it)->m_value + "\""));
+						 (*it)->m_value + "\"",
 						 (*it)->m_fieldspec));
 	    break;
 	case WasaQuery::OP_OR:
 	    // Concatenate all OR values as phrases. Hope there are no
@ -55,7 +58,8 @@ Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa)
 		}
 		sdata->addClause
 		    (new Rcl::SearchDataClauseSimple(Rcl::SCLT_OR, 
-						     orvalue));
+						     orvalue,
 						     (*it)->m_fieldspec));
 	    }
 	}
    }
@ -105,7 +109,7 @@ int main(int argc, char *argv[])
    if (argc != 1) {
 	fprintf(stderr, "need one arg\n");
-	exit(1);
+	return 1;
    }
    const string str = *argv++;argc--;
    string reason;
@ -113,14 +117,12 @@ int main(int argc, char *argv[])
    RclConfig *config = recollinit(RCLINIT_NONE, 0, 0, reason, 0);
    if (config == 0 || !config->ok()) {
        cerr << "Configuration problem: " << reason << endl;
-        exit(1);
+	return 1;
    }
    string dbdir = config->getDbDir();
    if (dbdir.empty()) {
 	// Note: this will have to be replaced by a call to a
 	// configuration buildin dialog for initial configuration
        cerr << "Configuration problem: " << "No dbdir" << endl;
-	exit(1);
+	return 1;
    }
    Rcl::Db rcldb;
    if (!rcldb.open(dbdir, Rcl::Db::DbRO, 0)) {
--- a/src/rcldb/rcldb.cpp
+++ b/src/rcldb/rcldb.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.101 2006-12-19 12:11:21 dockes Exp $ (C) 2004 J.F.Dockes";
+static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.102 2007-01-17 13:53:41 dockes Exp $ (C) 2004 J.F.Dockes";
 #endif
 /*
 *   This program is free software; you can redistribute it and/or modify
@ -80,6 +80,7 @@ namespace Rcl {
 // Synthetic abstract marker (to discriminate from abstract actually
 // found in doc)
 const static string rclSyntAbs = "?!#@";
 const static string emptystring;
 // A class for data and methods that would have to expose
 // Xapian-specific stuff if they were in Rcl::Db. There could actually be
@ -703,15 +704,24 @@ bool Db::isopen()
    return m_ndb->m_isopen;
 }
-// A small class to hold state while splitting text
+// The text splitter callback class which receives words from the
 // splitter and adds postings to the Xapian document.
 class mySplitterCB : public TextSplitCB {
 public:
-    Xapian::Document &doc;
+    Xapian::Document &doc;   // Xapian document 
    Xapian::termpos basepos; // Base for document section
-    Xapian::termpos curpos;  // Last position sent to callback
+    Xapian::termpos curpos;  // Current position. Used to set basepos for the
-    mySplitterCB(Xapian::Document &d) : doc(d), basepos(1), curpos(0)
+                             // following section
    mySplitterCB(Xapian::Document &d) 
 	: doc(d), basepos(1), curpos(0)
    {}
    bool takeword(const std::string &term, int pos, int, int);
    void setprefix(const string& pref) {prefix = pref;}
 private:
    // If prefix is set, we also add a posting for the prefixed terms
    // (ie: for titles, add postings for both "term" and "Sterm")
    string  prefix; 
 };
 // Callback for the document to word splitting class during indexation
@ -731,7 +741,11 @@ bool mySplitterCB::takeword(const std::string &term, int pos, int, int)
 	// be possible to assign different weigths to doc parts (ie title)
 	// by using a higher value
 	curpos = pos;
-	doc.add_posting(term, basepos + curpos, 1);
+	pos += basepos;
 	doc.add_posting(term, pos, 1);
 	if (!prefix.empty()) {
 	    doc.add_posting(prefix + term, pos, 1);
 	}
 	return true;
    } catch (const Xapian::Error &e) {
 	ermsg = e.get_msg().c_str();
@ -804,8 +818,9 @@ bool Db::add(const string &fn, const Doc &idoc,
 	doc.abstract = truncate_to_word(doc.abstract, m_idxAbsTruncLen);
    }
    doc.abstract = neutchars(doc.abstract, "\n\r");
-    doc.title = truncate_to_word(doc.title, 100);
+    doc.title = neutchars(truncate_to_word(doc.title, 150), "\n\r");
-    doc.keywords = truncate_to_word(doc.keywords, 300);
+    doc.author = neutchars(truncate_to_word(doc.author, 150), "\n\r");
    doc.keywords = neutchars(truncate_to_word(doc.keywords, 300), "\n\r");
    Xapian::Document newdocument;
@ -824,13 +839,30 @@ bool Db::add(const string &fn, const Doc &idoc,
    }
    // Split and index title
-    LOGDEB2(("Db::add: split title [%s]\n", doc.title.c_str()));
+    if (!doc.title.empty()) {
-    if (!dumb_string(doc.title, noacc)) {
+	LOGDEB2(("Db::add: split title [%s]\n", doc.title.c_str()));
-	LOGERR(("Db::add: dumb_string failed\n"));
+	if (!dumb_string(doc.title, noacc)) {
-	return false;
+	    LOGERR(("Db::add: dumb_string failed\n"));
 	    return false;
 	}
 	splitData.setprefix("S"); // Subject
 	splitter.text_to_words(noacc);
 	splitData.setprefix(emptystring);
 	splitData.basepos += splitData.curpos + 100;
    }
    // Split and index author
    if (!doc.author.empty()) {
 	LOGDEB2(("Db::add: split author [%s]\n", doc.author.c_str()));
 	if (!dumb_string(doc.author, noacc)) {
 	    LOGERR(("Db::add: dumb_string failed\n"));
 	    return false;
 	}
 	splitData.setprefix("A"); 
 	splitter.text_to_words(noacc);
 	splitData.setprefix(emptystring);
 	splitData.basepos += splitData.curpos + 100;
    }
    splitter.text_to_words(noacc);
    splitData.basepos += splitData.curpos + 100;
    // Split and index body
    LOGDEB2(("Db::add: split body\n"));
@ -842,13 +874,17 @@ bool Db::add(const string &fn, const Doc &idoc,
    splitData.basepos += splitData.curpos + 100;
    // Split and index keywords
-    LOGDEB2(("Db::add: split kw [%s]\n", doc.keywords.c_str()));
+    if (!doc.keywords.empty()) {
-    if (!dumb_string(doc.keywords, noacc)) {
+	LOGDEB2(("Db::add: split kw [%s]\n", doc.keywords.c_str()));
-	LOGERR(("Db::add: dumb_string failed\n"));
+	if (!dumb_string(doc.keywords, noacc)) {
-	return false;
+	    LOGERR(("Db::add: dumb_string failed\n"));
 	    return false;
 	}
 	splitData.setprefix("K");
 	splitter.text_to_words(noacc);
 	splitData.setprefix(emptystring);
 	splitData.basepos += splitData.curpos + 100;
    }
    splitter.text_to_words(noacc);
    splitData.basepos += splitData.curpos + 100;
    // Split and index abstract. We don't do this if it is synthetic
    // any more (this used to give a relevance boost to the beginning
@ -946,6 +982,9 @@ bool Db::add(const string &fn, const Doc &idoc,
    record += "\ncaption=" + doc.title;
    record += "\nkeywords=" + doc.keywords;
    record += "\nabstract=" + doc.abstract;
    if (!doc.author.empty()) {
 	record += "\nauthor=" + doc.author;
    }
    record += "\n";
    LOGDEB1(("Newdocument data: %s\n", record.c_str()));
    newdocument.set_data(record);
--- a/src/rcldb/rcldoc.h
+++ b/src/rcldb/rcldoc.h
@ -16,7 +16,7 @@
 */
 #ifndef _RCLDOC_H_INCLUDED_
 #define _RCLDOC_H_INCLUDED_
-/* @(#$Id: rcldoc.h,v 1.1 2006-12-14 14:54:13 dockes Exp $  (C) 2006 J.F.Dockes */
+/* @(#$Id: rcldoc.h,v 1.2 2007-01-17 13:53:41 dockes Exp $  (C) 2006 J.F.Dockes */
 #include <string>
@ -48,6 +48,7 @@ class Doc {
    string origcharset;  // Charset we transcoded from (in case we want back)
                         // Possibly set by handler
    string title;        // Possibly set by handler
    string author;       // Possibly set by handler
    string keywords;     // Possibly set by handler
    string abstract;     // Possibly set by handler
    bool   syntabs;      // true if abstract is just the top of doc, not an 
--- a/src/rcldb/searchdata.cpp
+++ b/src/rcldb/searchdata.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.7 2006-12-19 12:11:21 dockes Exp $ (C) 2006 J.F.Dockes";
+static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.8 2007-01-17 13:53:41 dockes Exp $ (C) 2006 J.F.Dockes";
 #endif
 /*
 *   This program is free software; you can redistribute it and/or modify
@ -154,6 +154,7 @@ public:
    bool translate(const string &iq,
 		   const string &prefix,
 		   string &ermsg,
 		   list<Xapian::Query> &pqueries,
 		   int slack = 0, bool useNear = false);
@ -257,6 +258,14 @@ void multiply_groups(vector<vector<string> >::const_iterator vvit,
    }
 }
 static void addPrefix(list<string>& terms, const string& prefix)
 {
    if (prefix.empty())
 	return;
    for (list<string>::iterator it = terms.begin(); it != terms.end(); it++)
 	it->insert(0, prefix);
 }
 /** 
 * Turn string into list of xapian queries. There is little
 * interpretation done on the string (no +term -term or filename:term
@ -271,6 +280,7 @@ void multiply_groups(vector<vector<string> >::const_iterator vvit,
 *   count)
 */
 bool StringToXapianQ::translate(const string &iq,
 				const string &prefix,
 				string &ermsg,
 				list<Xapian::Query> &pqueries,
 				int slack, bool useNear)
@ -301,24 +311,25 @@ bool StringToXapianQ::translate(const string &iq,
 	    splitterS.text_to_words(*it);
 	    TextSplit splitterW(&splitDataW, TextSplit::TXTS_NOSPANS);
 	    splitterW.text_to_words(*it);
-	    wsQData& splitData = splitDataS;
+	    wsQData *splitData = &splitDataS;
-	    if (splitDataS.terms.size() > 1 && splitDataS.terms.size() != 
+	    if (splitDataS.terms.size() > 1 && 
-		splitDataW.terms.size())
+		splitDataS.terms.size() != splitDataW.terms.size())
-		splitData = splitDataW;
+		splitData = &splitDataW;
 	    LOGDEB1(("strToXapianQ: splitter term count: %d\n", 
-		     splitData.terms.size()));
+		     splitData->terms.size()));
-	    switch(splitData.terms.size()) {
+	    switch(splitData->terms.size()) {
 	    case 0: continue;// ??
 	    case 1: // Not a real phrase: one term
 		{
-		    string term = splitData.terms.front();
+		    string term = splitData->terms.front();
 		    list<string> exp;  
 		    maybeStemExp(false, term, exp);
 		    m_terms.insert(m_terms.end(), exp.begin(), exp.end());
 		    // Push either term or OR of stem-expanded set
 		    addPrefix(exp, prefix);
 		    pqueries.push_back(Xapian::Query(Xapian::Query::OP_OR, 
 						     exp.begin(), exp.end()));
 		    m_terms.insert(m_terms.end(), exp.begin(), exp.end());
 		}
 		break;
@ -329,8 +340,8 @@ bool StringToXapianQ::translate(const string &iq,
 		list<Xapian::Query> orqueries;
 		bool hadmultiple = false;
 		vector<vector<string> >groups;
-		for (vector<string>::iterator it = splitData.terms.begin();
+		for (vector<string>::iterator it = splitData->terms.begin();
-		     it != splitData.terms.end(); it++) {
+		     it != splitData->terms.end(); it++) {
 		    // Some version of xapian will accept only one OR clause
 		    // inside NEAR, all others must be leafs
 		    bool nostemexp = 
@ -341,6 +352,7 @@ bool StringToXapianQ::translate(const string &iq,
 		    maybeStemExp(nostemexp, *it, exp);
 		    groups.push_back(vector<string>(exp.begin(), exp.end()));
 		    addPrefix(exp, prefix);
 		    orqueries.push_back(Xapian::Query(Xapian::Query::OP_OR, 
 						      exp.begin(), exp.end()));
 #ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF
@ -352,7 +364,7 @@ bool StringToXapianQ::translate(const string &iq,
 		pqueries.push_back(Xapian::Query(op,
 						 orqueries.begin(),
 						 orqueries.end(),
-					 splitData.terms.size() + slack));
+					 splitData->terms.size() + slack));
 		// Add NEAR/PHRASE groups to the highlighting data. Must
 		// push all combinations
 		vector<vector<string> > allcombs;
@ -378,6 +390,28 @@ bool StringToXapianQ::translate(const string &iq,
    return true;
 }
 // Try to translate field specification into field prefix. This should
 // probably be an Rcl::Db method and much more configurable (store
 // prefix translation list in config ?)
 static string fieldToPrefix(const string& i_field)
 {
    static map<string, string> fldToPrefs;
    if (fldToPrefs.empty()) {
 	fldToPrefs["title"] = "S";
 	fldToPrefs["caption"] = "S";
 	fldToPrefs["subject"] = "S";
 	fldToPrefs["author"] = "A";
 	fldToPrefs["from"] = "A";
 	fldToPrefs["keyword"] = "K";
    }
    string fld(i_field); 
    stringtolower(fld);
    map<string, string>::const_iterator it = fldToPrefs.find(fld);
    if (it != fldToPrefs.end())
 	return it->second;
    return "";
 }
 // Translate a simple OR, AND, or EXCL search clause. 
 bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p, 
 					   const string& stemlang)
@ -397,9 +431,12 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p,
 	LOGERR(("SearchDataClauseSimple: bad m_tp %d\n", m_tp));
 	return false;
    }
    string prefix;
    if (!m_field.empty())
 	prefix = fieldToPrefix(m_field);
    list<Xapian::Query> pqueries;
    StringToXapianQ tr(db, stemlang);
-    if (!tr.translate(m_text, m_reason, pqueries))
+    if (!tr.translate(m_text, prefix, m_reason, pqueries))
 	return false;
    if (pqueries.empty()) {
 	LOGERR(("SearchDataClauseSimple: resolved to null query\n"));
@ -437,12 +474,16 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p,
    list<Xapian::Query> pqueries;
    Xapian::Query nq;
    string prefix;
    if (!m_field.empty())
 	prefix = fieldToPrefix(m_field);
    // Use stringToXapianQueries to lowercase and simplify the phrase
    // terms etc. The result should be a single element list
    string s = string("\"") + m_text + string("\"");
    bool useNear = m_tp == SCLT_NEAR;
    StringToXapianQ tr(db, stemlang);
-    if (!tr.translate(s, m_reason, pqueries, m_slack, useNear))
+    if (!tr.translate(s, prefix, m_reason, pqueries, m_slack, useNear))
 	return false;
    if (pqueries.empty()) {
 	LOGERR(("SearchDataClauseDist: resolved to null query\n"));
--- a/src/rcldb/searchdata.h
+++ b/src/rcldb/searchdata.h
@ -16,7 +16,7 @@
 */
 #ifndef _SEARCHDATA_H_INCLUDED_
 #define _SEARCHDATA_H_INCLUDED_
-/* @(#$Id: searchdata.h,v 1.7 2006-12-05 15:17:13 dockes Exp $  (C) 2004 J.F.Dockes */
+/* @(#$Id: searchdata.h,v 1.8 2007-01-17 13:53:41 dockes Exp $  (C) 2004 J.F.Dockes */
 /** 
 * Structures to hold data coming almost directly from the gui
@ -46,11 +46,22 @@ enum SClType {
 class SearchDataClause;
 /** 
- * Holder for a list of search clauses. Some of the clauses may be be reference
+  Data structure representing A Recoll query.
- * to other subqueries in the future. For now, they just reflect user entry in 
+  This is currently simply a list of search clauses. 
- * a query field: type, some text and possibly a distance. Each clause may
+
- * hold several queries in the Xapian sense, for exemple several terms
+  For now, clauses in the list just reflect user entry in a query
- * and phrases as would result from ["this is a phrase" term1 term2]
+  field: some text, a clause type (AND/OR/NEAR etc.) and possibly a
  distance. Each clause may hold several queries in the Xapian sense,
  for exemple several terms and phrases as would result from 
  ["this is a phrase" term1 term2]
  This means that SearchData will be translated into a Xapian
  Query tree of depth 2.
  The structure might be extended in the future so that some of the
  clauses may be references to other subqueries (there doesn't seem to
  be an urgent need for this)
 */
 class SearchData {
 public:
@ -134,15 +145,19 @@ protected:
 */
 class SearchDataClauseSimple : public SearchDataClause {
 public:
-    SearchDataClauseSimple(SClType tp, string txt)
+    SearchDataClauseSimple(SClType tp, const string& txt, 
-	: SearchDataClause(tp), m_text(txt), m_slack(0) {}
+			   const string& fld = "")
 	: SearchDataClause(tp), m_text(txt), m_field(fld), m_slack(0) {}
    virtual ~SearchDataClauseSimple() {}
    /** Translate to Xapian query */
    virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang);
-    virtual bool getTerms(vector<string>& terms, 
+    /** Retrieve query terms and term groups. This is used for highlighting */
-			  vector<vector<string> >& groups,
+    virtual bool getTerms(vector<string>& terms, /* Single terms */
-			  vector<int>& gslks) const
+			  vector<vector<string> >& groups, /* Prox grps */
 			  vector<int>& gslks) const        /* Prox slacks */
    {
 	terms.insert(terms.end(), m_terms.begin(), m_terms.end());
 	groups.insert(groups.end(), m_groups.begin(), m_groups.end());
@ -151,7 +166,8 @@ public:
    }
 protected:
-    string  m_text;
+    string  m_text;  // Raw user entry text.
    string  m_field; // Field specification if any
    // Single terms and phrases resulting from breaking up m_text;
    // valid after toNativeQuery() call
    vector<string>          m_terms;
@ -161,10 +177,10 @@ protected:
    int m_slack;
 };
-/** Filename search. */
+/** Filename search clause. */
 class SearchDataClauseFilename : public SearchDataClauseSimple {
 public:
-    SearchDataClauseFilename(string txt)
+    SearchDataClauseFilename(const string& txt)
 	: SearchDataClauseSimple(SCLT_FILENAME, txt) {}
    virtual ~SearchDataClauseFilename() {}
    virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang);
@ -176,8 +192,9 @@ public:
 */
 class SearchDataClauseDist : public SearchDataClauseSimple {
 public:
-    SearchDataClauseDist(SClType tp, string txt, int slack) 
+    SearchDataClauseDist(SClType tp, const string& txt, int slack, 
-	: SearchDataClauseSimple(tp, txt) {m_slack = slack;}
+			 const string& fld = "")
 	: SearchDataClauseSimple(tp, txt, fld) {m_slack = slack;}
    virtual ~SearchDataClauseDist() {}
    virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang);