added field/prefixes for author and title + command line query language

2007-01-17 13:53:41 +00:00 · 2007-01-17 13:53:41 +00:00 · 1d683ad411
commit 1d683ad411
parent ee85be5c61
15 changed files with 256 additions and 105 deletions
--- a/src/internfile/internfile.cpp
+++ b/src/internfile/internfile.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: internfile.cpp,v 1.24 2007-01-15 13:06:38 dockes Exp $ (C) 2004 J.F.Dockes";
+static char rcsid[] = "@(#$Id: internfile.cpp,v 1.25 2007-01-17 13:53:40 dockes Exp $ (C) 2004 J.F.Dockes";
 #endif
 /*
 *   This program is free software; you can redistribute it and/or modify
@ -236,6 +236,7 @@ static inline bool getKeyValue(const map<string, string>& docdata,
 }

 static const string keyab("abstract");
+static const string keyau("author");
 static const string keycs("charset");
 static const string keyct("content");
 static const string keyfn("filename");
@ -251,6 +252,7 @@ bool FileInterner::dijontorcl(Rcl::Doc& doc)
    Dijon::Filter *df = m_handlers.back();
    const std::map<std::string, std::string>& docdata = df->get_meta_data();

+    getKeyValue(docdata, keyau, doc.author);
    getKeyValue(docdata, keyoc, doc.origcharset);
    getKeyValue(docdata, keyct, doc.text);    
    getKeyValue(docdata, keytt, doc.title);
--- a/src/internfile/mh_html.cpp
+++ b/src/internfile/mh_html.cpp
@ -122,6 +122,7 @@ bool MimeHandlerHtml::next_document()
    m_metaData["charset"] = "utf-8";
    m_metaData["title"] = result.title;
    m_metaData["keywords"] = result.keywords;
+    m_metaData["author"] = result.author;
    m_metaData["modificationdate"] = result.dmtime;
    m_metaData["sample"] = result.sample;
    m_metaData["mimetype"] = "text/plain";
--- a/src/internfile/mh_mail.cpp
+++ b/src/internfile/mh_mail.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: mh_mail.cpp,v 1.28 2007-01-13 10:28:37 dockes Exp $ (C) 2005 J.F.Dockes";
+static char rcsid[] = "@(#$Id: mh_mail.cpp,v 1.29 2007-01-17 13:53:40 dockes Exp $ (C) 2005 J.F.Dockes";
 #endif
 /*
 *   This program is free software; you can redistribute it and/or modify
@ -217,6 +217,9 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
    if (doc->h.getFirstHeader("From", hi)) {
 	rfc2047_decode(hi.getValue(), transcoded);
 	text += string("From: ") + transcoded + string("\n");
+	if (depth == 1) {
+	    m_metaData["author"] = transcoded;
+	}
    }
    if (doc->h.getFirstHeader("To", hi)) {
 	rfc2047_decode(hi.getValue(), transcoded);
@ -245,7 +248,7 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
    }
    text += '\n';

-    LOGDEB2(("MimeHandlerMail::rocessMsg:ismultipart %d mime subtype '%s'\n",
+    LOGDEB2(("MimeHandlerMail::processMsg:ismultipart %d mime subtype '%s'\n",
 	    doc->isMultipart(), doc->getSubType().c_str()));
    walkmime(doc, depth);

--- a/src/internfile/myhtmlparse.cpp
+++ b/src/internfile/myhtmlparse.cpp
@ -154,6 +154,11 @@ MyHtmlParser::opening_tag(const string &tag, const map<string,string> &p)
 			    string tmp = i->second;
 			    decode_entities(tmp);
 			    keywords += tmp;
+			} else if (name == "author") {
+			    if (!author.empty()) author += ' ';
+			    string tmp = i->second;
+			    decode_entities(tmp);
+			    author += tmp;
 			} else if (name == "date") {
 			    // Yes this doesnt exist. It's output by filters
 			    // And the format isn't even standard http/html
@ -168,19 +173,6 @@ MyHtmlParser::opening_tag(const string &tag, const map<string,string> &p)
 				dmtime = ascuxtime;
 			    }
 			} 
-#if 0 // We're not a robot, so we don't care about robots metainfo
-			else if (name == "robots") {
-			    string val = i->second;
-			    decode_entities(val);
-			    lowercase_term(val);
-			    if (val.find("none") != string::npos ||
-				val.find("noindex") != string::npos) {
-				indexing_allowed = false;
-				LOGDEB1(("myhtmlparse: robots/noindex\n"));
-				throw false;
-			    }
-			}
-#endif // 0
 		    } else if ((j = p.find("http-equiv")) != p.end()) {
 			string hequiv = j->second;
 			lowercase_term(hequiv);
--- a/src/internfile/myhtmlparse.h
+++ b/src/internfile/myhtmlparse.h
@ -37,7 +37,7 @@ class MyHtmlParser : public HtmlParser {
    bool in_body_tag; 
    bool in_pre_tag;
    bool pending_space;
-    string title, sample, keywords, dump, dmtime;
+    string title, sample, keywords, dump, dmtime, author;
    string ocharset; // This is the charset our user thinks the doc was
    string charset; // This is the charset it was supposedly converted to
    string doccharset; // Set this to value of charset parameter in header
--- a/src/lib/Makefile
+++ b/src/lib/Makefile
@ -8,8 +8,8 @@ LIBS = librcl.a

 all: $(LIBS)

-OBJS =  rclaspell.o rclconfig.o rclinit.o textsplit.o unacpp.o csguess.o indexer.o mimetype.o htmlparse.o myhtmlparse.o mimehandler.o internfile.o mh_exec.o mh_html.o mh_mail.o mh_mbox.o mh_text.o docseq.o history.o sortseq.o pathhash.o rcldb.o searchdata.o stemdb.o base64.o conftree.o copyfile.o debuglog.o execmd.o fstreewalk.o idfile.o md5.o mimeparse.o pathut.o readfile.o smallut.o transcode.o wipedir.o x11mon.o
-DEPS =  rclaspell.dep.stamp rclconfig.dep.stamp rclinit.dep.stamp textsplit.dep.stamp unacpp.dep.stamp csguess.dep.stamp indexer.dep.stamp mimetype.dep.stamp htmlparse.dep.stamp myhtmlparse.dep.stamp mimehandler.dep.stamp internfile.dep.stamp mh_exec.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp mh_mbox.dep.stamp mh_text.dep.stamp docseq.dep.stamp history.dep.stamp sortseq.dep.stamp pathhash.dep.stamp rcldb.dep.stamp searchdata.dep.stamp stemdb.dep.stamp base64.dep.stamp conftree.dep.stamp copyfile.dep.stamp debuglog.dep.stamp execmd.dep.stamp fstreewalk.dep.stamp idfile.dep.stamp md5.dep.stamp mimeparse.dep.stamp pathut.dep.stamp readfile.dep.stamp smallut.dep.stamp transcode.dep.stamp wipedir.dep.stamp x11mon.dep.stamp
+OBJS =  rclaspell.o rclconfig.o rclinit.o textsplit.o unacpp.o csguess.o indexer.o mimetype.o htmlparse.o myhtmlparse.o mimehandler.o internfile.o mh_exec.o mh_html.o mh_mail.o mh_mbox.o mh_text.o docseq.o history.o sortseq.o wasastringtoquery.o wasatorcl.o pathhash.o rcldb.o searchdata.o stemdb.o base64.o conftree.o copyfile.o debuglog.o execmd.o fstreewalk.o idfile.o md5.o mimeparse.o pathut.o readfile.o smallut.o transcode.o wipedir.o x11mon.o
+DEPS =  rclaspell.dep.stamp rclconfig.dep.stamp rclinit.dep.stamp textsplit.dep.stamp unacpp.dep.stamp csguess.dep.stamp indexer.dep.stamp mimetype.dep.stamp htmlparse.dep.stamp myhtmlparse.dep.stamp mimehandler.dep.stamp internfile.dep.stamp mh_exec.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp mh_mbox.dep.stamp mh_text.dep.stamp docseq.dep.stamp history.dep.stamp sortseq.dep.stamp wasastringtoquery.dep.stamp wasatorcl.dep.stamp pathhash.dep.stamp rcldb.dep.stamp searchdata.dep.stamp stemdb.dep.stamp base64.dep.stamp conftree.dep.stamp copyfile.dep.stamp debuglog.dep.stamp execmd.dep.stamp fstreewalk.dep.stamp idfile.dep.stamp md5.dep.stamp mimeparse.dep.stamp pathut.dep.stamp readfile.dep.stamp smallut.dep.stamp transcode.dep.stamp wipedir.dep.stamp x11mon.dep.stamp

 librcl.a : $(DEPS) $(OBJS) unac.o
 	ar ru librcl.a $(OBJS) unac.o
@ -57,6 +57,10 @@ history.o : ../query/history.cpp
 	$(CXX) $(ALL_CXXFLAGS) -c ../query/history.cpp
 sortseq.o : ../query/sortseq.cpp
 	$(CXX) $(ALL_CXXFLAGS) -c ../query/sortseq.cpp
+wasastringtoquery.o : ../query/wasastringtoquery.cpp
+	$(CXX) $(ALL_CXXFLAGS) -c ../query/wasastringtoquery.cpp
+wasatorcl.o : ../query/wasatorcl.cpp
+	$(CXX) $(ALL_CXXFLAGS) -c ../query/wasatorcl.cpp
 pathhash.o : ../rcldb/pathhash.cpp
 	$(CXX) $(ALL_CXXFLAGS) -c ../rcldb/pathhash.cpp
 rcldb.o : ../rcldb/rcldb.cpp
@ -161,6 +165,12 @@ history.dep.stamp : ../query/history.cpp
 sortseq.dep.stamp : ../query/sortseq.cpp
 	$(CXX) -M $(ALL_CXXFLAGS) ../query/sortseq.cpp > sortseq.dep
 	touch sortseq.dep.stamp
+wasastringtoquery.dep.stamp : ../query/wasastringtoquery.cpp
+	$(CXX) -M $(ALL_CXXFLAGS) ../query/wasastringtoquery.cpp > wasastringtoquery.dep
+	touch wasastringtoquery.dep.stamp
+wasatorcl.dep.stamp : ../query/wasatorcl.cpp
+	$(CXX) -M $(ALL_CXXFLAGS) ../query/wasatorcl.cpp > wasatorcl.dep
+	touch wasatorcl.dep.stamp
 pathhash.dep.stamp : ../rcldb/pathhash.cpp
 	$(CXX) -M $(ALL_CXXFLAGS) ../rcldb/pathhash.cpp > pathhash.dep
 	touch pathhash.dep.stamp
@ -238,6 +248,8 @@ include mh_text.dep
 include docseq.dep
 include history.dep
 include sortseq.dep
+include wasastringtoquery.dep
+include wasatorcl.dep
 include pathhash.dep
 include rcldb.dep
 include searchdata.dep
--- a/src/lib/mkMake
+++ b/src/lib/mkMake
@ -24,6 +24,8 @@ ${depth}/internfile/mh_text.cpp \
 ${depth}/query/docseq.cpp \
 ${depth}/query/history.cpp \
 ${depth}/query/sortseq.cpp \
+${depth}/query/wasastringtoquery.cpp \
+${depth}/query/wasatorcl.cpp \
 ${depth}/rcldb/pathhash.cpp \
 ${depth}/rcldb/rcldb.cpp \
 ${depth}/rcldb/searchdata.cpp \
--- a/src/query/Makefile
+++ b/src/query/Makefile
@ -1,11 +1,11 @@
 depth = ..
 include $(depth)/mk/sysconf

-PROGS = xadump #trhist qtry qxtry 
+PROGS = xadump rclqlang #trhist qtry qxtry 

 all: $(PROGS)

-SRCS = xadump.cpp
+SRCS = xadump.cpp rclqlang.cpp
 .cpp.o : 
 	$(CXX) -c $(ALL_CXXFLAGS) -o $@ $<

@ -14,6 +14,11 @@ xadump : $(XADUMP_OBJS)
 	$(CXX) $(ALL_CXXFLAGS) -o xadump $(XADUMP_OBJS) \
 	       $(LIBICONV) $(LIBXAPIAN) $(LIBSYS)

+RCLQLANG_OBJS= rclqlang.o $(BIGLIB)
+rclqlang : $(RCLQLANG_OBJS)
+	$(CXX) $(ALL_CXXFLAGS) -o rclqlang $(RCLQLANG_OBJS) \
+	       $(LIBICONV) $(LIBXAPIAN) $(LIBSYS)
+
 HISTORY_OBJS= trhist.o  $(BIGLIB) $(MIMELIB)
 trhist : $(HISTORY_OBJS)
 	$(CXX) $(ALL_CXXFLAGS) -o trhist $(HISTORY_OBJS) \
@ -21,9 +26,19 @@ trhist : $(HISTORY_OBJS)
 trhist.o : history.cpp history.h
 	$(CXX) $(ALL_CXXFLAGS) -DTEST_HISTORY -c -o trhist.o history.cpp

-$(BIGLIB):
-	cd $(depth)/lib;make
-   
+WASASTRINGTOQUERY_OBJS= trwasastrtoq.o  $(BIGLIB) $(MIMELIB)
+trwasastrtoq : $(WASASTRINGTOQUERY_OBJS)
+	$(CXX) $(ALL_CXXFLAGS) -o trwasastrtoq $(WASASTRINGTOQUERY_OBJS) \
+	       $(LIBICONV) $(LIBXAPIAN)
+trwasastrtoq.o : wasastringtoquery.cpp wasastringtoquery.h
+	$(CXX) $(ALL_CXXFLAGS) -DTEST_WASASTRINGTOQUERY -c \
+	       -o trwasastrtoq.o wasastringtoquery.cpp
+
+$(BIGLIB): force
+	cd $(depth)/lib;$(MAKE)
+force:
+
+  
 depend: alldeps.stamp
 alldeps.stamp : $(SRCS)
 	$(CXX) -M $(ALL_CXXFLAGS) $(SRCS) > alldeps
--- a/src/query/wasastringtoquery.cpp
+++ b/src/query/wasastringtoquery.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: wasastringtoquery.cpp,v 1.3 2006-12-10 17:03:08 dockes Exp $ (C) 2006 J.F.Dockes";
+static char rcsid[] = "@(#$Id: wasastringtoquery.cpp,v 1.4 2007-01-17 13:53:41 dockes Exp $ (C) 2006 J.F.Dockes";
 #endif
 /*
 *   This program is free software; you can redistribute it and/or modify
@ -17,7 +17,7 @@ static char rcsid[] = "@(#$Id: wasastringtoquery.cpp,v 1.3 2006-12-10 17:03:08 d
 *   Free Software Foundation, Inc.,
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */
-#ifndef TEST_STRINGTOQUERY
+#ifndef TEST_WASASTRINGTOQUERY
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@ -25,6 +25,13 @@ static char rcsid[] = "@(#$Id: wasastringtoquery.cpp,v 1.3 2006-12-10 17:03:08 d

 #include "wasastringtoquery.h"

+//#define DEB_WASASTRINGTOQ 1
+#ifdef DEB_WASASTRINGTOQ
+#define DPRINT(X) fprintf X
+#else
+#define DPRINT(X)
+#endif
+
 WasaQuery::~WasaQuery()
 {
    for (vector<WasaQuery*>::iterator it = m_subs.begin();
@ -61,16 +68,16 @@ void WasaQuery::describe(string &desc) const
 	desc += ")";
    }
    desc += "(";
+    string fieldspec = m_fieldspec.empty() ? "" : m_fieldspec + ": ";
    switch (m_op) {
    case OP_NULL: 
 	desc += "NULL"; 
 	break;
    case OP_LEAF: 
-	desc += m_fieldspec.empty() ?
-	    m_value : m_fieldspec + ":" + m_value;
+	desc += fieldspec + m_value;
 	break;
    case OP_EXCL: 
-	desc += string("NOT (" ) + m_value + ") ";
+	desc += string("NOT (" ) + fieldspec + m_value + ") ";
 	break;
    case OP_OR: 
    case OP_AND:
@ -84,6 +91,8 @@ void WasaQuery::describe(string &desc) const
 	}
 	break;
    }
+    if (desc[desc.length() - 1] == ' ')
+	desc.erase(desc.length() - 1);
    desc += ") "; 
 }

@ -111,7 +120,7 @@ void WasaQuery::describe(string &desc) const
 * parenthesis increases the index, but we're not interested in all
 */
 static const char * parserExpr = 
-    "([oO][rR])"                     //1 OR is a special word
+    "([oO][rR])[[:space:]]*"        //1 OR is a special word
    "|"
    "("                              //2
      "([+-])?"                      //3 Force or exclude indicator
@ -125,7 +134,7 @@ static const char * parserExpr =
        "|"
        "([^[:space:]]+)"            //9 ANormalTerm
      ")"
-    ")"
+    ")[[:space:]]*"
 ;

 // For debugging the parser. But see also NMATCH
@ -236,17 +245,18 @@ StringToWasaQuery::Internal::stringToQuery(const string& str, string& reason)
 	    reason = "Internal regular expression handling error";
 	    return 0;
 	}
-#if 0
-	if (loop) printf("Next part:\n");
-	for (i = 0; i < NMATCH; i++) {
+
+#ifdef DEB_WASASTRINGTOQ
+	if (loop) DPRINT((stderr, "Next part:\n"));
+	for (unsigned int i = 0; i < NMATCH; i++) {
 	    if (m_pmatch[i].rm_so == -1) 	continue;
 	    char match[maxmatchlen+1];
 	    memcpy(match, m_cp + m_pmatch[i].rm_so,
 		   m_pmatch[i].rm_eo - m_pmatch[i].rm_so);
 	    match[m_pmatch[i].rm_eo - m_pmatch[i].rm_so] = 0;
 	    if (matchNames[i][0])
-		printf("%10s: [%s] (%d->%d)\n", matchNames[i], match, 
-		       (int)m_pmatch[i].rm_so, (int)m_pmatch[i].rm_eo);
+		DPRINT((stderr, "%10s: [%s] (%d->%d)\n", matchNames[i], match, 
+			(int)m_pmatch[i].rm_so, (int)m_pmatch[i].rm_eo));
 	}
 #endif
 	char match[maxmatchlen+1];
@ -348,14 +358,17 @@ StringToWasaQuery::Internal::stringToQuery(const string& str, string& reason)
 	    if (prev_or) {
 		// We're in an OR subquery, add new subquery
 		orClause->m_subs.push_back(nclause);
+		DPRINT((stderr, "Adding to OR chain\n"));
 	    } else {
 		if (orClause) {
 		    // Getting out of OR. Add the OR subquery to the main one
 		    query->m_subs.push_back(orClause);
+		    DPRINT((stderr, "Adding OR chain to main\n"));
 		    orClause = 0;
 		}
 		// Add new subquery to main one.
 		query->m_subs.push_back(nclause);
+		DPRINT((stderr, "Adding to main chain\n"));
 	    }
 	    prev_or = false;
 	}
@ -369,6 +382,12 @@ StringToWasaQuery::Internal::stringToQuery(const string& str, string& reason)
 	    break;
    }

+    if (orClause) {
+	// Getting out of OR. Add the OR subquery to the main one
+	query->m_subs.push_back(orClause);
+	DPRINT((stderr, "Adding OR chain to main\n"));
+    }
+
    regfree(&m_rx);
    m_rxneedsfree = false;
    return query;
@ -404,4 +423,4 @@ int main(int argc, char **argv)
    exit(0);
 }

-#endif // TEST_STRINGTOQUERY
+#endif // TEST_WASASTRINGTOQUERY
--- a/src/query/wasastringtoquery.h
+++ b/src/query/wasastringtoquery.h
@ -1,6 +1,6 @@
 #ifndef _WASASTRINGTOQUERY_H_INCLUDED_
 #define _WASASTRINGTOQUERY_H_INCLUDED_
-/* @(#$Id: wasastringtoquery.h,v 1.3 2006-12-10 17:03:08 dockes Exp $  (C) 2006 J.F.Dockes */
+/* @(#$Id: wasastringtoquery.h,v 1.4 2007-01-17 13:53:41 dockes Exp $  (C) 2006 J.F.Dockes */
 /*
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
@ -40,23 +40,28 @@ public:
    {}
    ~WasaQuery();

-    // Get string describing the query tree from this point
+    /** Get string describing the query tree from this point */
    void describe(string &desc) const;

+    /** Op to be performed on either value or subqueries */
    WasaQuery::Op      m_op;
+
+    /** Field specification if any (ie: title, author ...) */
    string             m_fieldspec;
-    /* Valid for op == OP_LEAF */
+
+    /* String value. Valid for op == OP_LEAF */
    string             m_value;
-    /* Valid for conjunctions */
+
+    /** Subqueries. Valid for conjunctions */
    vector<WasaQuery*> m_subs;
    
-    /* Restrict results to some file type, defined by either mime, app group, 
-     * or extension */
+    /** Restrict results to some file type, defined by either mime,
+     *  app group, or extension */
    enum TypeKind {WQTK_NONE, WQTK_MIME, WQTK_GROUP, WQTK_EXT};
    TypeKind           m_typeKind;
    vector<string>     m_types;

-    /* Sort on relevance, date, name or group */
+    /** Sort on relevance, date, name or group */
    enum SortKind {WQSK_REL, WQSK_DATE, WQSK_ALPHA, WQSK_GROUP};
    vector<SortKind>   m_sortSpec;
 };
--- a/src/query/wasatorcl.cpp
+++ b/src/query/wasatorcl.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: wasatorcl.cpp,v 1.2 2006-12-10 17:03:08 dockes Exp $ (C) 2006 J.F.Dockes";
+static char rcsid[] = "@(#$Id: wasatorcl.cpp,v 1.3 2007-01-17 13:53:41 dockes Exp $ (C) 2006 J.F.Dockes";
 #endif
 #ifndef TEST_WASATORCL

@ -27,11 +27,13 @@ Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa)
 	    if ((*it)->m_value.find_first_of(" \t\n\r") != string::npos) {
 		sdata->addClause
 		    (new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE, 
-						     (*it)->m_value, 0));
+						   (*it)->m_value, 0, 
+						   (*it)->m_fieldspec));
 	    } else {
 		sdata->addClause
 		    (new Rcl::SearchDataClauseSimple(Rcl::SCLT_AND, 
-						     (*it)->m_value));
+						     (*it)->m_value, 
+						     (*it)->m_fieldspec));
 	    }
 	    break;
 	case WasaQuery::OP_EXCL:
@ -41,7 +43,8 @@ Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa)
 	    sdata->addClause
 		(new Rcl::SearchDataClauseSimple(Rcl::SCLT_EXCL, 
 						 string("\"") + 
-						 (*it)->m_value + "\""));
+						 (*it)->m_value + "\"",
+						 (*it)->m_fieldspec));
 	    break;
 	case WasaQuery::OP_OR:
 	    // Concatenate all OR values as phrases. Hope there are no
@ -55,7 +58,8 @@ Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa)
 		}
 		sdata->addClause
 		    (new Rcl::SearchDataClauseSimple(Rcl::SCLT_OR, 
-						     orvalue));
+						     orvalue,
+						     (*it)->m_fieldspec));
 	    }
 	}
    }
@ -105,7 +109,7 @@ int main(int argc, char *argv[])

    if (argc != 1) {
 	fprintf(stderr, "need one arg\n");
-	exit(1);
+	return 1;
    }
    const string str = *argv++;argc--;
    string reason;
@ -113,14 +117,12 @@ int main(int argc, char *argv[])
    RclConfig *config = recollinit(RCLINIT_NONE, 0, 0, reason, 0);
    if (config == 0 || !config->ok()) {
        cerr << "Configuration problem: " << reason << endl;
-        exit(1);
+	return 1;
    }
    string dbdir = config->getDbDir();
    if (dbdir.empty()) {
-	// Note: this will have to be replaced by a call to a
-	// configuration buildin dialog for initial configuration
        cerr << "Configuration problem: " << "No dbdir" << endl;
-	exit(1);
+	return 1;
    }
    Rcl::Db rcldb;
    if (!rcldb.open(dbdir, Rcl::Db::DbRO, 0)) {
--- a/src/rcldb/rcldb.cpp
+++ b/src/rcldb/rcldb.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.101 2006-12-19 12:11:21 dockes Exp $ (C) 2004 J.F.Dockes";
+static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.102 2007-01-17 13:53:41 dockes Exp $ (C) 2004 J.F.Dockes";
 #endif
 /*
 *   This program is free software; you can redistribute it and/or modify
@ -80,6 +80,7 @@ namespace Rcl {
 // Synthetic abstract marker (to discriminate from abstract actually
 // found in doc)
 const static string rclSyntAbs = "?!#@";
+const static string emptystring;

 // A class for data and methods that would have to expose
 // Xapian-specific stuff if they were in Rcl::Db. There could actually be
@ -703,15 +704,24 @@ bool Db::isopen()
    return m_ndb->m_isopen;
 }

-// A small class to hold state while splitting text
+// The text splitter callback class which receives words from the
+// splitter and adds postings to the Xapian document.
 class mySplitterCB : public TextSplitCB {
 public:
-    Xapian::Document &doc;
+    Xapian::Document &doc;   // Xapian document 
    Xapian::termpos basepos; // Base for document section
-    Xapian::termpos curpos;  // Last position sent to callback
-    mySplitterCB(Xapian::Document &d) : doc(d), basepos(1), curpos(0)
+    Xapian::termpos curpos;  // Current position. Used to set basepos for the
+                             // following section
+    mySplitterCB(Xapian::Document &d) 
+	: doc(d), basepos(1), curpos(0)
    {}
    bool takeword(const std::string &term, int pos, int, int);
+    void setprefix(const string& pref) {prefix = pref;}
+
+private:
+    // If prefix is set, we also add a posting for the prefixed terms
+    // (ie: for titles, add postings for both "term" and "Sterm")
+    string  prefix; 
 };

 // Callback for the document to word splitting class during indexation
@ -731,7 +741,11 @@ bool mySplitterCB::takeword(const std::string &term, int pos, int, int)
 	// be possible to assign different weigths to doc parts (ie title)
 	// by using a higher value
 	curpos = pos;
-	doc.add_posting(term, basepos + curpos, 1);
+	pos += basepos;
+	doc.add_posting(term, pos, 1);
+	if (!prefix.empty()) {
+	    doc.add_posting(prefix + term, pos, 1);
+	}
 	return true;
    } catch (const Xapian::Error &e) {
 	ermsg = e.get_msg().c_str();
@ -804,8 +818,9 @@ bool Db::add(const string &fn, const Doc &idoc,
 	doc.abstract = truncate_to_word(doc.abstract, m_idxAbsTruncLen);
    }
    doc.abstract = neutchars(doc.abstract, "\n\r");
-    doc.title = truncate_to_word(doc.title, 100);
-    doc.keywords = truncate_to_word(doc.keywords, 300);
+    doc.title = neutchars(truncate_to_word(doc.title, 150), "\n\r");
+    doc.author = neutchars(truncate_to_word(doc.author, 150), "\n\r");
+    doc.keywords = neutchars(truncate_to_word(doc.keywords, 300), "\n\r");

    Xapian::Document newdocument;

@ -824,13 +839,30 @@ bool Db::add(const string &fn, const Doc &idoc,
    }

    // Split and index title
-    LOGDEB2(("Db::add: split title [%s]\n", doc.title.c_str()));
-    if (!dumb_string(doc.title, noacc)) {
-	LOGERR(("Db::add: dumb_string failed\n"));
-	return false;
+    if (!doc.title.empty()) {
+	LOGDEB2(("Db::add: split title [%s]\n", doc.title.c_str()));
+	if (!dumb_string(doc.title, noacc)) {
+	    LOGERR(("Db::add: dumb_string failed\n"));
+	    return false;
+	}
+	splitData.setprefix("S"); // Subject
+	splitter.text_to_words(noacc);
+	splitData.setprefix(emptystring);
+	splitData.basepos += splitData.curpos + 100;
+    }
+
+    // Split and index author
+    if (!doc.author.empty()) {
+	LOGDEB2(("Db::add: split author [%s]\n", doc.author.c_str()));
+	if (!dumb_string(doc.author, noacc)) {
+	    LOGERR(("Db::add: dumb_string failed\n"));
+	    return false;
+	}
+	splitData.setprefix("A"); 
+	splitter.text_to_words(noacc);
+	splitData.setprefix(emptystring);
+	splitData.basepos += splitData.curpos + 100;
    }
-    splitter.text_to_words(noacc);
-    splitData.basepos += splitData.curpos + 100;

    // Split and index body
    LOGDEB2(("Db::add: split body\n"));
@ -842,13 +874,17 @@ bool Db::add(const string &fn, const Doc &idoc,
    splitData.basepos += splitData.curpos + 100;

    // Split and index keywords
-    LOGDEB2(("Db::add: split kw [%s]\n", doc.keywords.c_str()));
-    if (!dumb_string(doc.keywords, noacc)) {
-	LOGERR(("Db::add: dumb_string failed\n"));
-	return false;
+    if (!doc.keywords.empty()) {
+	LOGDEB2(("Db::add: split kw [%s]\n", doc.keywords.c_str()));
+	if (!dumb_string(doc.keywords, noacc)) {
+	    LOGERR(("Db::add: dumb_string failed\n"));
+	    return false;
+	}
+	splitData.setprefix("K");
+	splitter.text_to_words(noacc);
+	splitData.setprefix(emptystring);
+	splitData.basepos += splitData.curpos + 100;
    }
-    splitter.text_to_words(noacc);
-    splitData.basepos += splitData.curpos + 100;

    // Split and index abstract. We don't do this if it is synthetic
    // any more (this used to give a relevance boost to the beginning
@ -946,6 +982,9 @@ bool Db::add(const string &fn, const Doc &idoc,
    record += "\ncaption=" + doc.title;
    record += "\nkeywords=" + doc.keywords;
    record += "\nabstract=" + doc.abstract;
+    if (!doc.author.empty()) {
+	record += "\nauthor=" + doc.author;
+    }
    record += "\n";
    LOGDEB1(("Newdocument data: %s\n", record.c_str()));
    newdocument.set_data(record);
--- a/src/rcldb/rcldoc.h
+++ b/src/rcldb/rcldoc.h
@ -16,7 +16,7 @@
 */
 #ifndef _RCLDOC_H_INCLUDED_
 #define _RCLDOC_H_INCLUDED_
-/* @(#$Id: rcldoc.h,v 1.1 2006-12-14 14:54:13 dockes Exp $  (C) 2006 J.F.Dockes */
+/* @(#$Id: rcldoc.h,v 1.2 2007-01-17 13:53:41 dockes Exp $  (C) 2006 J.F.Dockes */

 #include <string>

@ -48,6 +48,7 @@ class Doc {
    string origcharset;  // Charset we transcoded from (in case we want back)
                         // Possibly set by handler
    string title;        // Possibly set by handler
+    string author;       // Possibly set by handler
    string keywords;     // Possibly set by handler
    string abstract;     // Possibly set by handler
    bool   syntabs;      // true if abstract is just the top of doc, not an 
--- a/src/rcldb/searchdata.cpp
+++ b/src/rcldb/searchdata.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.7 2006-12-19 12:11:21 dockes Exp $ (C) 2006 J.F.Dockes";
+static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.8 2007-01-17 13:53:41 dockes Exp $ (C) 2006 J.F.Dockes";
 #endif
 /*
 *   This program is free software; you can redistribute it and/or modify
@ -154,6 +154,7 @@ public:


    bool translate(const string &iq,
+		   const string &prefix,
 		   string &ermsg,
 		   list<Xapian::Query> &pqueries,
 		   int slack = 0, bool useNear = false);
@ -257,6 +258,14 @@ void multiply_groups(vector<vector<string> >::const_iterator vvit,
    }
 }

+static void addPrefix(list<string>& terms, const string& prefix)
+{
+    if (prefix.empty())
+	return;
+    for (list<string>::iterator it = terms.begin(); it != terms.end(); it++)
+	it->insert(0, prefix);
+}
+
 /** 
 * Turn string into list of xapian queries. There is little
 * interpretation done on the string (no +term -term or filename:term
@ -271,6 +280,7 @@ void multiply_groups(vector<vector<string> >::const_iterator vvit,
 *   count)
 */
 bool StringToXapianQ::translate(const string &iq,
+				const string &prefix,
 				string &ermsg,
 				list<Xapian::Query> &pqueries,
 				int slack, bool useNear)
@ -301,24 +311,25 @@ bool StringToXapianQ::translate(const string &iq,
 	    splitterS.text_to_words(*it);
 	    TextSplit splitterW(&splitDataW, TextSplit::TXTS_NOSPANS);
 	    splitterW.text_to_words(*it);
-	    wsQData& splitData = splitDataS;
-	    if (splitDataS.terms.size() > 1 && splitDataS.terms.size() != 
-		splitDataW.terms.size())
-		splitData = splitDataW;
+	    wsQData *splitData = &splitDataS;
+	    if (splitDataS.terms.size() > 1 && 
+		splitDataS.terms.size() != splitDataW.terms.size())
+		splitData = &splitDataW;

 	    LOGDEB1(("strToXapianQ: splitter term count: %d\n", 
-		     splitData.terms.size()));
-	    switch(splitData.terms.size()) {
+		     splitData->terms.size()));
+	    switch(splitData->terms.size()) {
 	    case 0: continue;// ??
 	    case 1: // Not a real phrase: one term
 		{
-		    string term = splitData.terms.front();
+		    string term = splitData->terms.front();
 		    list<string> exp;  
 		    maybeStemExp(false, term, exp);
+		    m_terms.insert(m_terms.end(), exp.begin(), exp.end());
 		    // Push either term or OR of stem-expanded set
+		    addPrefix(exp, prefix);
 		    pqueries.push_back(Xapian::Query(Xapian::Query::OP_OR, 
 						     exp.begin(), exp.end()));
-		    m_terms.insert(m_terms.end(), exp.begin(), exp.end());
 		}
 		break;

@ -329,8 +340,8 @@ bool StringToXapianQ::translate(const string &iq,
 		list<Xapian::Query> orqueries;
 		bool hadmultiple = false;
 		vector<vector<string> >groups;
-		for (vector<string>::iterator it = splitData.terms.begin();
-		     it != splitData.terms.end(); it++) {
+		for (vector<string>::iterator it = splitData->terms.begin();
+		     it != splitData->terms.end(); it++) {
 		    // Some version of xapian will accept only one OR clause
 		    // inside NEAR, all others must be leafs
 		    bool nostemexp = 
@ -341,6 +352,7 @@ bool StringToXapianQ::translate(const string &iq,
 		    maybeStemExp(nostemexp, *it, exp);

 		    groups.push_back(vector<string>(exp.begin(), exp.end()));
+		    addPrefix(exp, prefix);
 		    orqueries.push_back(Xapian::Query(Xapian::Query::OP_OR, 
 						      exp.begin(), exp.end()));
 #ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF
@ -352,7 +364,7 @@ bool StringToXapianQ::translate(const string &iq,
 		pqueries.push_back(Xapian::Query(op,
 						 orqueries.begin(),
 						 orqueries.end(),
-					 splitData.terms.size() + slack));
+					 splitData->terms.size() + slack));
 		// Add NEAR/PHRASE groups to the highlighting data. Must
 		// push all combinations
 		vector<vector<string> > allcombs;
@ -378,6 +390,28 @@ bool StringToXapianQ::translate(const string &iq,
    return true;
 }

+// Try to translate field specification into field prefix. This should
+// probably be an Rcl::Db method and much more configurable (store
+// prefix translation list in config ?)
+static string fieldToPrefix(const string& i_field)
+{
+    static map<string, string> fldToPrefs;
+    if (fldToPrefs.empty()) {
+	fldToPrefs["title"] = "S";
+	fldToPrefs["caption"] = "S";
+	fldToPrefs["subject"] = "S";
+	fldToPrefs["author"] = "A";
+	fldToPrefs["from"] = "A";
+	fldToPrefs["keyword"] = "K";
+    }
+    string fld(i_field); 
+    stringtolower(fld);
+    map<string, string>::const_iterator it = fldToPrefs.find(fld);
+    if (it != fldToPrefs.end())
+	return it->second;
+    return "";
+}
+
 // Translate a simple OR, AND, or EXCL search clause. 
 bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p, 
 					   const string& stemlang)
@ -397,9 +431,12 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p,
 	LOGERR(("SearchDataClauseSimple: bad m_tp %d\n", m_tp));
 	return false;
    }
+    string prefix;
+    if (!m_field.empty())
+	prefix = fieldToPrefix(m_field);
    list<Xapian::Query> pqueries;
    StringToXapianQ tr(db, stemlang);
-    if (!tr.translate(m_text, m_reason, pqueries))
+    if (!tr.translate(m_text, prefix, m_reason, pqueries))
 	return false;
    if (pqueries.empty()) {
 	LOGERR(("SearchDataClauseSimple: resolved to null query\n"));
@ -437,12 +474,16 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p,
    list<Xapian::Query> pqueries;
    Xapian::Query nq;

+    string prefix;
+    if (!m_field.empty())
+	prefix = fieldToPrefix(m_field);
+
    // Use stringToXapianQueries to lowercase and simplify the phrase
    // terms etc. The result should be a single element list
    string s = string("\"") + m_text + string("\"");
    bool useNear = m_tp == SCLT_NEAR;
    StringToXapianQ tr(db, stemlang);
-    if (!tr.translate(s, m_reason, pqueries, m_slack, useNear))
+    if (!tr.translate(s, prefix, m_reason, pqueries, m_slack, useNear))
 	return false;
    if (pqueries.empty()) {
 	LOGERR(("SearchDataClauseDist: resolved to null query\n"));
--- a/src/rcldb/searchdata.h
+++ b/src/rcldb/searchdata.h
@ -16,7 +16,7 @@
 */
 #ifndef _SEARCHDATA_H_INCLUDED_
 #define _SEARCHDATA_H_INCLUDED_
-/* @(#$Id: searchdata.h,v 1.7 2006-12-05 15:17:13 dockes Exp $  (C) 2004 J.F.Dockes */
+/* @(#$Id: searchdata.h,v 1.8 2007-01-17 13:53:41 dockes Exp $  (C) 2004 J.F.Dockes */

 /** 
 * Structures to hold data coming almost directly from the gui
@ -46,11 +46,22 @@ enum SClType {
 class SearchDataClause;

 /** 
- * Holder for a list of search clauses. Some of the clauses may be be reference
- * to other subqueries in the future. For now, they just reflect user entry in 
- * a query field: type, some text and possibly a distance. Each clause may
- * hold several queries in the Xapian sense, for exemple several terms
- * and phrases as would result from ["this is a phrase" term1 term2]
+  Data structure representing A Recoll query.
+  This is currently simply a list of search clauses. 
+
+  For now, clauses in the list just reflect user entry in a query
+  field: some text, a clause type (AND/OR/NEAR etc.) and possibly a
+  distance. Each clause may hold several queries in the Xapian sense,
+  for exemple several terms and phrases as would result from 
+  ["this is a phrase" term1 term2]
+
+  This means that SearchData will be translated into a Xapian
+  Query tree of depth 2.
+
+  The structure might be extended in the future so that some of the
+  clauses may be references to other subqueries (there doesn't seem to
+  be an urgent need for this)
+
 */
 class SearchData {
 public:
@ -134,15 +145,19 @@ protected:
 */
 class SearchDataClauseSimple : public SearchDataClause {
 public:
-    SearchDataClauseSimple(SClType tp, string txt)
-	: SearchDataClause(tp), m_text(txt), m_slack(0) {}
+    SearchDataClauseSimple(SClType tp, const string& txt, 
+			   const string& fld = "")
+	: SearchDataClause(tp), m_text(txt), m_field(fld), m_slack(0) {}
+
    virtual ~SearchDataClauseSimple() {}

+    /** Translate to Xapian query */
    virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang);

-    virtual bool getTerms(vector<string>& terms, 
-			  vector<vector<string> >& groups,
-			  vector<int>& gslks) const
+    /** Retrieve query terms and term groups. This is used for highlighting */
+    virtual bool getTerms(vector<string>& terms, /* Single terms */
+			  vector<vector<string> >& groups, /* Prox grps */
+			  vector<int>& gslks) const        /* Prox slacks */
    {
 	terms.insert(terms.end(), m_terms.begin(), m_terms.end());
 	groups.insert(groups.end(), m_groups.begin(), m_groups.end());
@ -151,7 +166,8 @@ public:
    }

 protected:
-    string  m_text;
+    string  m_text;  // Raw user entry text.
+    string  m_field; // Field specification if any
    // Single terms and phrases resulting from breaking up m_text;
    // valid after toNativeQuery() call
    vector<string>          m_terms;
@ -161,10 +177,10 @@ protected:
    int m_slack;
 };

-/** Filename search. */
+/** Filename search clause. */
 class SearchDataClauseFilename : public SearchDataClauseSimple {
 public:
-    SearchDataClauseFilename(string txt)
+    SearchDataClauseFilename(const string& txt)
 	: SearchDataClauseSimple(SCLT_FILENAME, txt) {}
    virtual ~SearchDataClauseFilename() {}
    virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang);
@ -176,8 +192,9 @@ public:
 */
 class SearchDataClauseDist : public SearchDataClauseSimple {
 public:
-    SearchDataClauseDist(SClType tp, string txt, int slack) 
-	: SearchDataClauseSimple(tp, txt) {m_slack = slack;}
+    SearchDataClauseDist(SClType tp, const string& txt, int slack, 
+			 const string& fld = "")
+	: SearchDataClauseSimple(tp, txt, fld) {m_slack = slack;}
    virtual ~SearchDataClauseDist() {}

    virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang);