*** empty log message ***

2005-02-08 14:54:38 +00:00 · 2005-02-08 14:54:38 +00:00 · fe550bf0e8
commit fe550bf0e8
parent 2e35f674a6
8 changed files with 98 additions and 20 deletions
--- a/src/Makefile
+++ b/src/Makefile
@ -1,7 +1,7 @@
 all:
 	cd lib;make
 	cd index;make
-	cd qtgui;rm -f recoll;make
+	cd qtgui;qmake recoll.pro ; rm -f recoll;make
 clean:
 	cd common;make clean
 	cd index;make clean
--- a/src/README
+++ b/src/README
@ -1,4 +1,4 @@
- @(#$Id: README,v 1.1 2005-02-04 14:21:17 dockes Exp $  (C) 2004 J.F.Dockes
+ @(#$Id: README,v 1.2 2005-02-08 14:54:38 dockes Exp $  (C) 2004 J.F.Dockes
 Hello.
@ -13,9 +13,8 @@ It will become much better in the near future.
 What it has:
- - Easy installation. No db, web server or exotic language necessary. The
+ - Easy installation. No db, web server or exotic language necessary. 
-   binary packages are statically linked and should run almost as soon as
+   The idea is that EVERYBODY should index their files because it
   unpacked. The idea is that EVERYBODY should index their files because it
   makes life easier.
 - Indexes text, pdf, html, postscript. Deals with compressed versions of
   same. 
--- a/src/qtgui/recoll.pro
+++ b/src/qtgui/recoll.pro
@ -23,8 +23,7 @@ unix {
  UI_DIR = .ui
  MOC_DIR = .moc
  OBJECTS_DIR = .obj
-  LIBS += ../lib/librcl.a -L/usr/local/lib -lxapian -liconv \
+  LIBS += ../lib/librcl.a -L/usr/local/lib -lxapian -liconv
       -lfontconfig -lfreetype -lexpat -lz
  INCLUDEPATH += ../common ../index ../query ../unac ../utils 
 }
--- a/src/qtgui/recollmain.ui
+++ b/src/qtgui/recollmain.ui
@ -311,6 +311,8 @@
    <include location="local" impldecl="in implementation">recollmain.ui.h</include>
 </includes>
 <variables>
    <variable>std::string stemlang;</variable>
    <variable>bool dostem;</variable>
    <variable>int reslist_current;</variable>
    <variable>int reslist_winfirst;</variable>
 </variables>
--- a/src/qtgui/recollmain.ui.h
+++ b/src/qtgui/recollmain.ui.h
@ -259,14 +259,24 @@ void RecollMain::queryText_returnPressed()
 				     "to complete?");
 	    return;
 	}
    }
    if (stemlang.empty()) {
 	string param;
 	if (rclconfig->getConfParam("querystemming", param))
 	    dostem = ConfTree::stringToBool(param);
 	else
 	    dostem = false;
 	if (!rclconfig->getConfParam("querystemminglanguage", stemlang))
 	    stemlang = "english";
    }
    reslist_current = -1;
    reslist_winfirst = -1;
    QCString u8 =  queryText->text().utf8();
-    if (!rcldb->setQuery(string((const char *)u8)))
+    if (!rcldb->setQuery(string((const char *)u8), dostem ? 
 			 Rcl::Db::QO_STEM : Rcl::Db::QO_NONE, stemlang))
 	return;
    list<string> terms;
    listNextPB_clicked();
--- a/src/rcldb/rcldb.cpp
+++ b/src/rcldb/rcldb.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.22 2005-02-08 11:59:08 dockes Exp $ (C) 2004 J.F.Dockes";
+static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.23 2005-02-08 14:45:54 dockes Exp $ (C) 2004 J.F.Dockes";
 #endif
 #include <stdio.h>
 #include <sys/stat.h>
@ -452,10 +452,49 @@ class wsQData : public TextSplitCB {
    }
 };
 #include <xapian/stem.h>
-bool Rcl::Db::setQuery(const std::string &iqstring)
+// Expand term to list of all terms which expand to the same term.
 // This is currently awfully inefficient as we actually stem the whole
 // db term list ! Need to build an efficient structure when finishing
 // indexing, but good enough for testing
 static list<string> stemexpand(Native *ndb, string term, const string& lang)
 {
-    LOGDEB(("Rcl::Db::setQuery: %s\n", iqstring.c_str()));
+    list<string> explist;
    try {
 	Xapian::Stem stemmer(lang);
 	string stem = stemmer.stem_word(term);
 	LOGDEB(("stemexpand: term '%s' stem '%s'\n", 
 		term.c_str(), stem.c_str()));
 	Xapian::TermIterator it;
 	for (it = ndb->db.allterms_begin(); 
 	     it != ndb->db.allterms_end(); it++) {
 	    string stem1 = stemmer.stem_word(*it);
 	    if (stem == stem1)
 		explist.push_back(*it);
 	}
 	if (explist.size() == 0)
 	    explist.push_back(term);
 	if (1) {
 	    string expanded;
 	    for (list<string>::const_iterator it = explist.begin(); 
 		 it != explist.end(); it++) {
 		expanded += *it + " ";
 	    }
 	    LOGDEB(("stemexpand: expanded list: %s\n", expanded.c_str()));
 	}
    } catch (...) {
 	LOGERR(("Stemming failed: no stemmer for %s ? \n", lang.c_str()));
 	explist.push_back(term);
    }
    return explist;
 }
 bool Rcl::Db::setQuery(const std::string &iqstring, QueryOpts opts, 
 		       const string& stemlang)
 {
    LOGDEB(("Rcl::Db::setQuery: q: '%s', opts 0x%x, stemlang %s\n", 
 	    iqstring.c_str(), (unsigned int)opts, stemlang.c_str()));
    Native *ndb = (Native *)pdata;
    if (!ndb)
 	return false;
@ -465,13 +504,14 @@ bool Rcl::Db::setQuery(const std::string &iqstring)
 	return false;
    }
-    // First extract phrases:
+    // First split into (possibly single word) phrases ("this is a phrase"):
    list<string> phrases;
    ConfTree::stringToStrings(qstring, phrases);
    for (list<string>::const_iterator i=phrases.begin();
 	 i != phrases.end();i++) {
 	LOGDEB(("Rcl::Db::setQuery: phrase: '%s'\n", i->c_str()));
    }
    list<Xapian::Query> pqueries;
    for (list<string>::const_iterator it = phrases.begin(); 
 	 it != phrases.end(); it++) {
@ -482,8 +522,16 @@ bool Rcl::Db::setQuery(const std::string &iqstring)
 	LOGDEB(("Splitter term count: %d\n", splitData.terms.size()));
 	switch(splitData.terms.size()) {
 	case 0: continue;// ??
-	case 1:
+	case 1: {
-	    pqueries.push_back(Xapian::Query(splitData.terms.front()));
+	    list<string> exp;  
 	    if (opts & QO_STEM) 
 		exp = stemexpand(ndb, splitData.terms.front(), stemlang);
 	    else
 		exp.push_back(splitData.terms.front());
 	    pqueries.push_back(Xapian::Query(Xapian::Query::OP_OR, 
 					     exp.begin(), 
 					     exp.end()));
 	}
 	    break;
 	default:
 	    LOGDEB(("Pushing phrase: %s\n", splitData.catterms().c_str()));
--- a/src/rcldb/rcldb.h
+++ b/src/rcldb/rcldb.h
@ -1,6 +1,6 @@
 #ifndef _DB_H_INCLUDED_
 #define _DB_H_INCLUDED_
-/* @(#$Id: rcldb.h,v 1.10 2005-02-08 11:59:08 dockes Exp $  (C) 2004 J.F.Dockes */
+/* @(#$Id: rcldb.h,v 1.11 2005-02-08 14:45:54 dockes Exp $  (C) 2004 J.F.Dockes */
 #include <string>
 #include <list>
@ -76,7 +76,9 @@ class Db {
    // Query-related functions
    // Parse query string and initialize query
-    bool setQuery(const string &q);
+    enum QueryOpts {QO_NONE=0, QO_STEM = 1};
    bool setQuery(const string &q, QueryOpts opts = QO_NONE, 
 		  const string& stemlang = "english");
    bool getQueryTerms(std::list<string>& terms);
    // Get document at rank i. This is probably vastly inferior to the type
--- a/src/sampleconf/recoll.conf
+++ b/src/sampleconf/recoll.conf
@ -1,20 +1,38 @@
-# @(#$Id: recoll.conf,v 1.2 2005-02-04 09:30:44 dockes Exp $  (C) 2004 J.F.Dockes
+# @(#$Id: recoll.conf,v 1.3 2005-02-08 14:45:54 dockes Exp $  (C) 2004 J.F.Dockes
-# Recoll default configuration file: this will index your home directory 
+# Recoll default configuration file. This should be copied to
 # ~/.recoll/recoll.conf 
 # Space-separated list of directories to index
 topdirs = ~
 # Use stemming of query terms or not (ie: expand search for floors to
 # floor, flooring, etc... There is currently a serious performance hit for
 # this (at query time), but you can try it, it may be acceptable depending
 # on your database size 
 querystemming = 0
 querystemminglanguage = english
 # Name of file suffix to mime-type map file. 
 mimemapfile = mimemap
 # Name of mime-type to filter type/name map file. 
 mimeconffile = mimeconf
 # Where to store the database.
 dbdir = ~/.recoll/xapiandb
 # Default character set. Values found inside files, ie content tag in html
 # documents, will override this. It can be specified per directory (see
 # below). Used when converting to utf-8 (internal storage format).
 defaultcharset = iso-8859-1
 defaultlanguage = french
 # Guessing charsets usually does not work well
 guesscharset = 0
 # You could specify different parameters for a subdirectory like this: (no
 # tilde substitution there for now, sorry)
 #[/home/me/englishdocs/plain]
-#defaultlanguage = english
+#defaultcharset = iso-8859-2