*** empty log message ***
This commit is contained in:
parent
2e35f674a6
commit
fe550bf0e8
@ -1,7 +1,7 @@
|
||||
all:
|
||||
cd lib;make
|
||||
cd index;make
|
||||
cd qtgui;rm -f recoll;make
|
||||
cd qtgui;qmake recoll.pro ; rm -f recoll;make
|
||||
clean:
|
||||
cd common;make clean
|
||||
cd index;make clean
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
@(#$Id: README,v 1.1 2005-02-04 14:21:17 dockes Exp $ (C) 2004 J.F.Dockes
|
||||
@(#$Id: README,v 1.2 2005-02-08 14:54:38 dockes Exp $ (C) 2004 J.F.Dockes
|
||||
|
||||
Hello.
|
||||
|
||||
@ -13,9 +13,8 @@ It will become much better in the near future.
|
||||
|
||||
What it has:
|
||||
|
||||
- Easy installation. No db, web server or exotic language necessary. The
|
||||
binary packages are statically linked and should run almost as soon as
|
||||
unpacked. The idea is that EVERYBODY should index their files because it
|
||||
- Easy installation. No db, web server or exotic language necessary.
|
||||
The idea is that EVERYBODY should index their files because it
|
||||
makes life easier.
|
||||
- Indexes text, pdf, html, postscript. Deals with compressed versions of
|
||||
same.
|
||||
|
||||
@ -23,8 +23,7 @@ unix {
|
||||
UI_DIR = .ui
|
||||
MOC_DIR = .moc
|
||||
OBJECTS_DIR = .obj
|
||||
LIBS += ../lib/librcl.a -L/usr/local/lib -lxapian -liconv \
|
||||
-lfontconfig -lfreetype -lexpat -lz
|
||||
LIBS += ../lib/librcl.a -L/usr/local/lib -lxapian -liconv
|
||||
INCLUDEPATH += ../common ../index ../query ../unac ../utils
|
||||
}
|
||||
|
||||
|
||||
@ -311,6 +311,8 @@
|
||||
<include location="local" impldecl="in implementation">recollmain.ui.h</include>
|
||||
</includes>
|
||||
<variables>
|
||||
<variable>std::string stemlang;</variable>
|
||||
<variable>bool dostem;</variable>
|
||||
<variable>int reslist_current;</variable>
|
||||
<variable>int reslist_winfirst;</variable>
|
||||
</variables>
|
||||
|
||||
@ -259,14 +259,24 @@ void RecollMain::queryText_returnPressed()
|
||||
"to complete?");
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
if (stemlang.empty()) {
|
||||
string param;
|
||||
if (rclconfig->getConfParam("querystemming", param))
|
||||
dostem = ConfTree::stringToBool(param);
|
||||
else
|
||||
dostem = false;
|
||||
if (!rclconfig->getConfParam("querystemminglanguage", stemlang))
|
||||
stemlang = "english";
|
||||
}
|
||||
|
||||
reslist_current = -1;
|
||||
reslist_winfirst = -1;
|
||||
|
||||
QCString u8 = queryText->text().utf8();
|
||||
|
||||
if (!rcldb->setQuery(string((const char *)u8)))
|
||||
if (!rcldb->setQuery(string((const char *)u8), dostem ?
|
||||
Rcl::Db::QO_STEM : Rcl::Db::QO_NONE, stemlang))
|
||||
return;
|
||||
list<string> terms;
|
||||
listNextPB_clicked();
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.22 2005-02-08 11:59:08 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.23 2005-02-08 14:45:54 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <sys/stat.h>
|
||||
@ -452,10 +452,49 @@ class wsQData : public TextSplitCB {
|
||||
}
|
||||
};
|
||||
|
||||
#include <xapian/stem.h>
|
||||
|
||||
bool Rcl::Db::setQuery(const std::string &iqstring)
|
||||
// Expand term to list of all terms which expand to the same term.
|
||||
// This is currently awfully inefficient as we actually stem the whole
|
||||
// db term list ! Need to build an efficient structure when finishing
|
||||
// indexing, but good enough for testing
|
||||
static list<string> stemexpand(Native *ndb, string term, const string& lang)
|
||||
{
|
||||
LOGDEB(("Rcl::Db::setQuery: %s\n", iqstring.c_str()));
|
||||
list<string> explist;
|
||||
try {
|
||||
Xapian::Stem stemmer(lang);
|
||||
string stem = stemmer.stem_word(term);
|
||||
LOGDEB(("stemexpand: term '%s' stem '%s'\n",
|
||||
term.c_str(), stem.c_str()));
|
||||
Xapian::TermIterator it;
|
||||
for (it = ndb->db.allterms_begin();
|
||||
it != ndb->db.allterms_end(); it++) {
|
||||
string stem1 = stemmer.stem_word(*it);
|
||||
if (stem == stem1)
|
||||
explist.push_back(*it);
|
||||
}
|
||||
if (explist.size() == 0)
|
||||
explist.push_back(term);
|
||||
if (1) {
|
||||
string expanded;
|
||||
for (list<string>::const_iterator it = explist.begin();
|
||||
it != explist.end(); it++) {
|
||||
expanded += *it + " ";
|
||||
}
|
||||
LOGDEB(("stemexpand: expanded list: %s\n", expanded.c_str()));
|
||||
}
|
||||
} catch (...) {
|
||||
LOGERR(("Stemming failed: no stemmer for %s ? \n", lang.c_str()));
|
||||
explist.push_back(term);
|
||||
}
|
||||
return explist;
|
||||
}
|
||||
|
||||
bool Rcl::Db::setQuery(const std::string &iqstring, QueryOpts opts,
|
||||
const string& stemlang)
|
||||
{
|
||||
LOGDEB(("Rcl::Db::setQuery: q: '%s', opts 0x%x, stemlang %s\n",
|
||||
iqstring.c_str(), (unsigned int)opts, stemlang.c_str()));
|
||||
Native *ndb = (Native *)pdata;
|
||||
if (!ndb)
|
||||
return false;
|
||||
@ -465,13 +504,14 @@ bool Rcl::Db::setQuery(const std::string &iqstring)
|
||||
return false;
|
||||
}
|
||||
|
||||
// First extract phrases:
|
||||
// First split into (possibly single word) phrases ("this is a phrase"):
|
||||
list<string> phrases;
|
||||
ConfTree::stringToStrings(qstring, phrases);
|
||||
for (list<string>::const_iterator i=phrases.begin();
|
||||
i != phrases.end();i++) {
|
||||
LOGDEB(("Rcl::Db::setQuery: phrase: '%s'\n", i->c_str()));
|
||||
}
|
||||
|
||||
list<Xapian::Query> pqueries;
|
||||
for (list<string>::const_iterator it = phrases.begin();
|
||||
it != phrases.end(); it++) {
|
||||
@ -482,8 +522,16 @@ bool Rcl::Db::setQuery(const std::string &iqstring)
|
||||
LOGDEB(("Splitter term count: %d\n", splitData.terms.size()));
|
||||
switch(splitData.terms.size()) {
|
||||
case 0: continue;// ??
|
||||
case 1:
|
||||
pqueries.push_back(Xapian::Query(splitData.terms.front()));
|
||||
case 1: {
|
||||
list<string> exp;
|
||||
if (opts & QO_STEM)
|
||||
exp = stemexpand(ndb, splitData.terms.front(), stemlang);
|
||||
else
|
||||
exp.push_back(splitData.terms.front());
|
||||
pqueries.push_back(Xapian::Query(Xapian::Query::OP_OR,
|
||||
exp.begin(),
|
||||
exp.end()));
|
||||
}
|
||||
break;
|
||||
default:
|
||||
LOGDEB(("Pushing phrase: %s\n", splitData.catterms().c_str()));
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
#ifndef _DB_H_INCLUDED_
|
||||
#define _DB_H_INCLUDED_
|
||||
/* @(#$Id: rcldb.h,v 1.10 2005-02-08 11:59:08 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: rcldb.h,v 1.11 2005-02-08 14:45:54 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include <string>
|
||||
#include <list>
|
||||
@ -76,7 +76,9 @@ class Db {
|
||||
// Query-related functions
|
||||
|
||||
// Parse query string and initialize query
|
||||
bool setQuery(const string &q);
|
||||
enum QueryOpts {QO_NONE=0, QO_STEM = 1};
|
||||
bool setQuery(const string &q, QueryOpts opts = QO_NONE,
|
||||
const string& stemlang = "english");
|
||||
bool getQueryTerms(std::list<string>& terms);
|
||||
|
||||
// Get document at rank i. This is probably vastly inferior to the type
|
||||
|
||||
@ -1,20 +1,38 @@
|
||||
# @(#$Id: recoll.conf,v 1.2 2005-02-04 09:30:44 dockes Exp $ (C) 2004 J.F.Dockes
|
||||
# @(#$Id: recoll.conf,v 1.3 2005-02-08 14:45:54 dockes Exp $ (C) 2004 J.F.Dockes
|
||||
|
||||
# Recoll default configuration file: this will index your home directory
|
||||
# Recoll default configuration file. This should be copied to
|
||||
# ~/.recoll/recoll.conf
|
||||
|
||||
# Space-separated list of directories to index
|
||||
|
||||
topdirs = ~
|
||||
|
||||
# Use stemming of query terms or not (ie: expand search for floors to
|
||||
# floor, flooring, etc... There is currently a serious performance hit for
|
||||
# this (at query time), but you can try it, it may be acceptable depending
|
||||
# on your database size
|
||||
querystemming = 0
|
||||
querystemminglanguage = english
|
||||
|
||||
# Name of file suffix to mime-type map file.
|
||||
mimemapfile = mimemap
|
||||
# Name of mime-type to filter type/name map file.
|
||||
mimeconffile = mimeconf
|
||||
|
||||
# Where to store the database.
|
||||
dbdir = ~/.recoll/xapiandb
|
||||
|
||||
# Default character set. Values found inside files, ie content tag in html
|
||||
# documents, will override this. It can be specified per directory (see
|
||||
# below). Used when converting to utf-8 (internal storage format).
|
||||
defaultcharset = iso-8859-1
|
||||
defaultlanguage = french
|
||||
|
||||
# Guessing charsets usually does not work well
|
||||
guesscharset = 0
|
||||
|
||||
# You could specify different parameters for a subdirectory like this: (no
|
||||
# tilde substitution there for now, sorry)
|
||||
#[/home/me/englishdocs/plain]
|
||||
#defaultlanguage = english
|
||||
#defaultcharset = iso-8859-2
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user