*** empty log message ***
This commit is contained in:
parent
2e35f674a6
commit
fe550bf0e8
@ -1,7 +1,7 @@
|
|||||||
all:
|
all:
|
||||||
cd lib;make
|
cd lib;make
|
||||||
cd index;make
|
cd index;make
|
||||||
cd qtgui;rm -f recoll;make
|
cd qtgui;qmake recoll.pro ; rm -f recoll;make
|
||||||
clean:
|
clean:
|
||||||
cd common;make clean
|
cd common;make clean
|
||||||
cd index;make clean
|
cd index;make clean
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
@(#$Id: README,v 1.1 2005-02-04 14:21:17 dockes Exp $ (C) 2004 J.F.Dockes
|
@(#$Id: README,v 1.2 2005-02-08 14:54:38 dockes Exp $ (C) 2004 J.F.Dockes
|
||||||
|
|
||||||
Hello.
|
Hello.
|
||||||
|
|
||||||
@ -13,9 +13,8 @@ It will become much better in the near future.
|
|||||||
|
|
||||||
What it has:
|
What it has:
|
||||||
|
|
||||||
- Easy installation. No db, web server or exotic language necessary. The
|
- Easy installation. No db, web server or exotic language necessary.
|
||||||
binary packages are statically linked and should run almost as soon as
|
The idea is that EVERYBODY should index their files because it
|
||||||
unpacked. The idea is that EVERYBODY should index their files because it
|
|
||||||
makes life easier.
|
makes life easier.
|
||||||
- Indexes text, pdf, html, postscript. Deals with compressed versions of
|
- Indexes text, pdf, html, postscript. Deals with compressed versions of
|
||||||
same.
|
same.
|
||||||
|
|||||||
@ -23,8 +23,7 @@ unix {
|
|||||||
UI_DIR = .ui
|
UI_DIR = .ui
|
||||||
MOC_DIR = .moc
|
MOC_DIR = .moc
|
||||||
OBJECTS_DIR = .obj
|
OBJECTS_DIR = .obj
|
||||||
LIBS += ../lib/librcl.a -L/usr/local/lib -lxapian -liconv \
|
LIBS += ../lib/librcl.a -L/usr/local/lib -lxapian -liconv
|
||||||
-lfontconfig -lfreetype -lexpat -lz
|
|
||||||
INCLUDEPATH += ../common ../index ../query ../unac ../utils
|
INCLUDEPATH += ../common ../index ../query ../unac ../utils
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -311,6 +311,8 @@
|
|||||||
<include location="local" impldecl="in implementation">recollmain.ui.h</include>
|
<include location="local" impldecl="in implementation">recollmain.ui.h</include>
|
||||||
</includes>
|
</includes>
|
||||||
<variables>
|
<variables>
|
||||||
|
<variable>std::string stemlang;</variable>
|
||||||
|
<variable>bool dostem;</variable>
|
||||||
<variable>int reslist_current;</variable>
|
<variable>int reslist_current;</variable>
|
||||||
<variable>int reslist_winfirst;</variable>
|
<variable>int reslist_winfirst;</variable>
|
||||||
</variables>
|
</variables>
|
||||||
|
|||||||
@ -259,14 +259,24 @@ void RecollMain::queryText_returnPressed()
|
|||||||
"to complete?");
|
"to complete?");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
if (stemlang.empty()) {
|
||||||
|
string param;
|
||||||
|
if (rclconfig->getConfParam("querystemming", param))
|
||||||
|
dostem = ConfTree::stringToBool(param);
|
||||||
|
else
|
||||||
|
dostem = false;
|
||||||
|
if (!rclconfig->getConfParam("querystemminglanguage", stemlang))
|
||||||
|
stemlang = "english";
|
||||||
|
}
|
||||||
|
|
||||||
reslist_current = -1;
|
reslist_current = -1;
|
||||||
reslist_winfirst = -1;
|
reslist_winfirst = -1;
|
||||||
|
|
||||||
QCString u8 = queryText->text().utf8();
|
QCString u8 = queryText->text().utf8();
|
||||||
|
|
||||||
if (!rcldb->setQuery(string((const char *)u8)))
|
if (!rcldb->setQuery(string((const char *)u8), dostem ?
|
||||||
|
Rcl::Db::QO_STEM : Rcl::Db::QO_NONE, stemlang))
|
||||||
return;
|
return;
|
||||||
list<string> terms;
|
list<string> terms;
|
||||||
listNextPB_clicked();
|
listNextPB_clicked();
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.22 2005-02-08 11:59:08 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.23 2005-02-08 14:45:54 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
@ -452,10 +452,49 @@ class wsQData : public TextSplitCB {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#include <xapian/stem.h>
|
||||||
|
|
||||||
bool Rcl::Db::setQuery(const std::string &iqstring)
|
// Expand term to list of all terms which expand to the same term.
|
||||||
|
// This is currently awfully inefficient as we actually stem the whole
|
||||||
|
// db term list ! Need to build an efficient structure when finishing
|
||||||
|
// indexing, but good enough for testing
|
||||||
|
static list<string> stemexpand(Native *ndb, string term, const string& lang)
|
||||||
{
|
{
|
||||||
LOGDEB(("Rcl::Db::setQuery: %s\n", iqstring.c_str()));
|
list<string> explist;
|
||||||
|
try {
|
||||||
|
Xapian::Stem stemmer(lang);
|
||||||
|
string stem = stemmer.stem_word(term);
|
||||||
|
LOGDEB(("stemexpand: term '%s' stem '%s'\n",
|
||||||
|
term.c_str(), stem.c_str()));
|
||||||
|
Xapian::TermIterator it;
|
||||||
|
for (it = ndb->db.allterms_begin();
|
||||||
|
it != ndb->db.allterms_end(); it++) {
|
||||||
|
string stem1 = stemmer.stem_word(*it);
|
||||||
|
if (stem == stem1)
|
||||||
|
explist.push_back(*it);
|
||||||
|
}
|
||||||
|
if (explist.size() == 0)
|
||||||
|
explist.push_back(term);
|
||||||
|
if (1) {
|
||||||
|
string expanded;
|
||||||
|
for (list<string>::const_iterator it = explist.begin();
|
||||||
|
it != explist.end(); it++) {
|
||||||
|
expanded += *it + " ";
|
||||||
|
}
|
||||||
|
LOGDEB(("stemexpand: expanded list: %s\n", expanded.c_str()));
|
||||||
|
}
|
||||||
|
} catch (...) {
|
||||||
|
LOGERR(("Stemming failed: no stemmer for %s ? \n", lang.c_str()));
|
||||||
|
explist.push_back(term);
|
||||||
|
}
|
||||||
|
return explist;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Rcl::Db::setQuery(const std::string &iqstring, QueryOpts opts,
|
||||||
|
const string& stemlang)
|
||||||
|
{
|
||||||
|
LOGDEB(("Rcl::Db::setQuery: q: '%s', opts 0x%x, stemlang %s\n",
|
||||||
|
iqstring.c_str(), (unsigned int)opts, stemlang.c_str()));
|
||||||
Native *ndb = (Native *)pdata;
|
Native *ndb = (Native *)pdata;
|
||||||
if (!ndb)
|
if (!ndb)
|
||||||
return false;
|
return false;
|
||||||
@ -465,13 +504,14 @@ bool Rcl::Db::setQuery(const std::string &iqstring)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// First extract phrases:
|
// First split into (possibly single word) phrases ("this is a phrase"):
|
||||||
list<string> phrases;
|
list<string> phrases;
|
||||||
ConfTree::stringToStrings(qstring, phrases);
|
ConfTree::stringToStrings(qstring, phrases);
|
||||||
for (list<string>::const_iterator i=phrases.begin();
|
for (list<string>::const_iterator i=phrases.begin();
|
||||||
i != phrases.end();i++) {
|
i != phrases.end();i++) {
|
||||||
LOGDEB(("Rcl::Db::setQuery: phrase: '%s'\n", i->c_str()));
|
LOGDEB(("Rcl::Db::setQuery: phrase: '%s'\n", i->c_str()));
|
||||||
}
|
}
|
||||||
|
|
||||||
list<Xapian::Query> pqueries;
|
list<Xapian::Query> pqueries;
|
||||||
for (list<string>::const_iterator it = phrases.begin();
|
for (list<string>::const_iterator it = phrases.begin();
|
||||||
it != phrases.end(); it++) {
|
it != phrases.end(); it++) {
|
||||||
@ -482,8 +522,16 @@ bool Rcl::Db::setQuery(const std::string &iqstring)
|
|||||||
LOGDEB(("Splitter term count: %d\n", splitData.terms.size()));
|
LOGDEB(("Splitter term count: %d\n", splitData.terms.size()));
|
||||||
switch(splitData.terms.size()) {
|
switch(splitData.terms.size()) {
|
||||||
case 0: continue;// ??
|
case 0: continue;// ??
|
||||||
case 1:
|
case 1: {
|
||||||
pqueries.push_back(Xapian::Query(splitData.terms.front()));
|
list<string> exp;
|
||||||
|
if (opts & QO_STEM)
|
||||||
|
exp = stemexpand(ndb, splitData.terms.front(), stemlang);
|
||||||
|
else
|
||||||
|
exp.push_back(splitData.terms.front());
|
||||||
|
pqueries.push_back(Xapian::Query(Xapian::Query::OP_OR,
|
||||||
|
exp.begin(),
|
||||||
|
exp.end()));
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
LOGDEB(("Pushing phrase: %s\n", splitData.catterms().c_str()));
|
LOGDEB(("Pushing phrase: %s\n", splitData.catterms().c_str()));
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
#ifndef _DB_H_INCLUDED_
|
#ifndef _DB_H_INCLUDED_
|
||||||
#define _DB_H_INCLUDED_
|
#define _DB_H_INCLUDED_
|
||||||
/* @(#$Id: rcldb.h,v 1.10 2005-02-08 11:59:08 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: rcldb.h,v 1.11 2005-02-08 14:45:54 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <list>
|
#include <list>
|
||||||
@ -76,7 +76,9 @@ class Db {
|
|||||||
// Query-related functions
|
// Query-related functions
|
||||||
|
|
||||||
// Parse query string and initialize query
|
// Parse query string and initialize query
|
||||||
bool setQuery(const string &q);
|
enum QueryOpts {QO_NONE=0, QO_STEM = 1};
|
||||||
|
bool setQuery(const string &q, QueryOpts opts = QO_NONE,
|
||||||
|
const string& stemlang = "english");
|
||||||
bool getQueryTerms(std::list<string>& terms);
|
bool getQueryTerms(std::list<string>& terms);
|
||||||
|
|
||||||
// Get document at rank i. This is probably vastly inferior to the type
|
// Get document at rank i. This is probably vastly inferior to the type
|
||||||
|
|||||||
@ -1,20 +1,38 @@
|
|||||||
# @(#$Id: recoll.conf,v 1.2 2005-02-04 09:30:44 dockes Exp $ (C) 2004 J.F.Dockes
|
# @(#$Id: recoll.conf,v 1.3 2005-02-08 14:45:54 dockes Exp $ (C) 2004 J.F.Dockes
|
||||||
|
|
||||||
# Recoll default configuration file: this will index your home directory
|
# Recoll default configuration file. This should be copied to
|
||||||
|
# ~/.recoll/recoll.conf
|
||||||
|
|
||||||
|
# Space-separated list of directories to index
|
||||||
|
|
||||||
topdirs = ~
|
topdirs = ~
|
||||||
|
|
||||||
|
# Use stemming of query terms or not (ie: expand search for floors to
|
||||||
|
# floor, flooring, etc... There is currently a serious performance hit for
|
||||||
|
# this (at query time), but you can try it, it may be acceptable depending
|
||||||
|
# on your database size
|
||||||
|
querystemming = 0
|
||||||
|
querystemminglanguage = english
|
||||||
|
|
||||||
|
# Name of file suffix to mime-type map file.
|
||||||
mimemapfile = mimemap
|
mimemapfile = mimemap
|
||||||
|
# Name of mime-type to filter type/name map file.
|
||||||
mimeconffile = mimeconf
|
mimeconffile = mimeconf
|
||||||
|
|
||||||
|
# Where to store the database.
|
||||||
dbdir = ~/.recoll/xapiandb
|
dbdir = ~/.recoll/xapiandb
|
||||||
|
|
||||||
|
# Default character set. Values found inside files, ie content tag in html
|
||||||
|
# documents, will override this. It can be specified per directory (see
|
||||||
|
# below). Used when converting to utf-8 (internal storage format).
|
||||||
defaultcharset = iso-8859-1
|
defaultcharset = iso-8859-1
|
||||||
defaultlanguage = french
|
defaultlanguage = french
|
||||||
|
|
||||||
|
# Guessing charsets usually does not work well
|
||||||
guesscharset = 0
|
guesscharset = 0
|
||||||
|
|
||||||
# You could specify different parameters for a subdirectory like this: (no
|
# You could specify different parameters for a subdirectory like this: (no
|
||||||
# tilde substitution there for now, sorry)
|
# tilde substitution there for now, sorry)
|
||||||
#[/home/me/englishdocs/plain]
|
#[/home/me/englishdocs/plain]
|
||||||
#defaultlanguage = english
|
#defaultcharset = iso-8859-2
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user