Allow stem expansion for several (all) stemming languages at a time

This commit is contained in:
dockes 2007-08-01 10:04:53 +00:00
parent 4cffc83104
commit e883b24ffc
8 changed files with 77 additions and 30 deletions

View File

@ -17,7 +17,7 @@
#ifndef _GUIUTILS_H_INCLUDED_
#define _GUIUTILS_H_INCLUDED_
/*
* @(#$Id: guiutils.h,v 1.23 2007-07-12 10:13:37 dockes Exp $ (C) 2005 Jean-Francois Dockes
* @(#$Id: guiutils.h,v 1.24 2007-08-01 10:04:53 dockes Exp $ (C) 2005 Jean-Francois Dockes
* jean-francois.dockes@wanadoo.fr
*
* This program is free software; you can redistribute it and/or modify
@ -144,4 +144,6 @@ extern PrefsPack prefs;
/** Read write settings from disk file */
extern void rwSettings(bool dowrite);
extern QString g_stringAllStem, g_stringNoStem;
#endif /* _GUIUTILS_H_INCLUDED_ */

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: rclmain_w.cpp,v 1.37 2007-08-01 07:55:03 dockes Exp $ (C) 2005 J.F.Dockes";
static char rcsid[] = "@(#$Id: rclmain_w.cpp,v 1.38 2007-08-01 10:04:53 dockes Exp $ (C) 2005 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -76,6 +76,7 @@ using std::pair;
#include "moc_rclmain_w.cpp"
extern "C" int XFlush(void *);
QString g_stringAllStem, g_stringNoStem;
// Taken from qt designer. Don't know why it's needed.
#if (QT_VERSION < 0x040000)
@ -108,10 +109,13 @@ void RclMain::init()
}
// Stemming language menu
m_idNoStem = preferencesMenu->insertItem(tr("(no stemming)"));
m_stemLangToId[tr("(no stemming)")] = m_idNoStem;
g_stringNoStem = tr("(no stemming)");
g_stringAllStem = tr("(all languages)");
m_idNoStem = preferencesMenu->insertItem(g_stringNoStem);
m_stemLangToId[g_stringNoStem] = m_idNoStem;
m_idAllStem = preferencesMenu->insertItem(g_stringAllStem);
m_stemLangToId[g_stringAllStem] = m_idAllStem;
LOGDEB(("idNoStem: %d\n", m_idNoStem));
// Can't get the stemming languages from the db at this stage as
// db not open yet (the case where it does not even exist makes
// things complicated). So get the languages from the config
@ -124,7 +128,8 @@ void RclMain::init()
QMessageBox::warning(0, "Recoll",
tr("error retrieving stemming languages"));
}
int curid = m_idNoStem, id; // Menu ids are negative integers
int curid = prefs.queryStemLang == "ALL" ? m_idAllStem : m_idNoStem;
int id;
for (list<string>::const_iterator it = langs.begin();
it != langs.end(); it++) {
QString qlang = QString::fromAscii(it->c_str(), it->length());
@ -248,6 +253,8 @@ void RclMain::setStemLang(int id)
QString lang;
if (id == m_idNoStem) {
lang = "";
} else if (id == m_idAllStem) {
lang = "ALL";
} else {
lang = preferencesMenu->text(id);
}
@ -265,6 +272,8 @@ void RclMain::setStemLang(const QString& lang)
int id;
if (lang == "") {
id = m_idNoStem;
} else if (lang == "ALL") {
id = m_idAllStem;
} else {
map<QString, int>::iterator it = m_stemLangToId.find(lang);
if (it == m_stemLangToId.end())
@ -399,7 +408,13 @@ void RclMain::startSearch(RefCntr<Rcl::SearchData> sdata)
if (!prefs.queryStemLang.length() == 0)
qopts |= Rcl::Db::QO_STEM;
QApplication::setOverrideCursor(QCursor(Qt::WaitCursor));
if (!rcldb->setQuery(sdata, qopts, prefs.queryStemLang.ascii())) {
string stemLang = (const char *)prefs.queryStemLang.ascii();
if (stemLang == "ALL") {
rclconfig->getConfParam("indexstemminglanguages", stemLang);
}
if (!rcldb->setQuery(sdata, qopts, stemLang)) {
QMessageBox::warning(0, "Recoll", tr("Cant start query: ") +
QString::fromAscii(rcldb->getReason().c_str()));
return;

View File

@ -126,6 +126,7 @@ private:
int m_searchId;
map<QString, int> m_stemLangToId;
int m_idNoStem;
int m_idAllStem;
virtual void init();
virtual void previewPrevOrNextInTab(Preview *, int sid, int docnum,

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: ssearch_w.cpp,v 1.21 2007-06-12 10:33:48 dockes Exp $ (C) 2006 J.F.Dockes";
static char rcsid[] = "@(#$Id: ssearch_w.cpp,v 1.22 2007-08-01 10:04:53 dockes Exp $ (C) 2006 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -215,8 +215,12 @@ void SSearch::completion()
// Query database
const int max = 100;
list<Rcl::TermMatchEntry> strs;
if (!rcldb->termMatch(Rcl::Db::ET_WILD, prefs.queryStemLang.ascii(),
s, strs, max) || strs.size() == 0) {
string stemLang = (const char *)prefs.queryStemLang.ascii();
if (stemLang == "ALL") {
rclconfig->getConfParam("indexstemminglanguages", stemLang);
}
if (!rcldb->termMatch(Rcl::Db::ET_WILD, stemLang, s, strs, max) ||
strs.size() == 0) {
QApplication::beep();
return;
}

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: uiprefs_w.cpp,v 1.20 2007-08-01 07:55:03 dockes Exp $ (C) 2005 J.F.Dockes";
static char rcsid[] = "@(#$Id: uiprefs_w.cpp,v 1.21 2007-08-01 10:04:53 dockes Exp $ (C) 2005 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -121,25 +121,22 @@ void UIPrefsDialog::setFromPrefs()
// Stemming language combobox
stemLangCMB->clear();
stemLangCMB->insertItem(tr("(no stemming)"));
stemLangCMB->insertItem(g_stringNoStem);
stemLangCMB->insertItem(g_stringAllStem);
list<string> langs;
if (!getStemLangs(langs)) {
QMessageBox::warning(0, "Recoll",
tr("error retrieving stemming languages"));
}
int i = 0, cur = -1;
int cur = prefs.queryStemLang == "" ? 0 : 1;
for (list<string>::const_iterator it = langs.begin();
it != langs.end(); it++) {
stemLangCMB->
insertItem(QString::fromAscii(it->c_str(), it->length()));
i++;
if (cur == -1) {
if (!strcmp(prefs.queryStemLang.ascii(), it->c_str()))
cur = i;
if (cur == 0 && !strcmp(prefs.queryStemLang.ascii(), it->c_str())) {
cur = stemLangCMB->count();
}
}
if (cur < 0)
cur = 0;
stemLangCMB->setCurrentItem(cur);
autoPhraseCB->setChecked(prefs.ssearchAutoPhrase);
@ -191,6 +188,8 @@ void UIPrefsDialog::accept()
if (stemLangCMB->currentItem() == 0) {
prefs.queryStemLang = "";
} else if (stemLangCMB->currentItem() == 1) {
prefs.queryStemLang = "ALL";
} else {
prefs.queryStemLang = stemLangCMB->currentText();
}
@ -238,6 +237,8 @@ void UIPrefsDialog::setStemLang(const QString& lang)
int cur = 0;
if (lang == "") {
cur = 0;
} else if (lang == "ALL") {
cur = 1;
} else {
for (int i = 1; i < stemLangCMB->count(); i++) {
if (lang == stemLangCMB->text(i)) {

View File

@ -1,4 +1,4 @@
/* @(#$Id: uiprefs_w.h,v 1.9 2007-08-01 07:55:03 dockes Exp $ (C) 2006 J.F.Dockes */
/* @(#$Id: uiprefs_w.h,v 1.10 2007-08-01 10:04:53 dockes Exp $ (C) 2006 J.F.Dockes */
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@ -17,7 +17,7 @@
*/
#ifndef _UIPREFS_W_H_INCLUDED_
#define _UIPREFS_W_H_INCLUDED_
/* @(#$Id: uiprefs_w.h,v 1.9 2007-08-01 07:55:03 dockes Exp $ (C) 2005 J.F.Dockes */
/* @(#$Id: uiprefs_w.h,v 1.10 2007-08-01 10:04:53 dockes Exp $ (C) 2005 J.F.Dockes */
#include <qvariant.h>
#include <qdialog.h>
@ -89,5 +89,4 @@ private:
};
#endif /* _UIPREFS_W_H_INCLUDED_ */

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: stemdb.cpp,v 1.9 2007-05-24 09:35:02 dockes Exp $ (C) 2005 J.F.Dockes";
static char rcsid[] = "@(#$Id: stemdb.cpp,v 1.10 2007-08-01 10:04:53 dockes Exp $ (C) 2005 J.F.Dockes";
#endif
/**
@ -218,12 +218,13 @@ static string stringlistdisp(const list<string>& sl)
}
/**
* Expand term to list of all terms which stem to the same term.
* Expand term to list of all terms which stem to the same term, for one
* expansion language
*/
bool stemExpand(const std::string& dbdir,
const std::string& lang,
const std::string& term,
list<string>& result)
static bool stemExpandOne(const std::string& dbdir,
const std::string& lang,
const std::string& term,
list<string>& result)
{
try {
Xapian::Stem stemmer(lang);
@ -281,6 +282,30 @@ bool stemExpand(const std::string& dbdir,
return true;
}
/**
* Expand term to list of all terms which stem to the same term, add the
* expansion sets for possibly multiple expansion languages
*/
bool stemExpand(const std::string& dbdir,
const std::string& langs,
const std::string& term,
list<string>& result)
{
list<string> llangs;
stringToStrings(langs, llangs);
for (list<string>::const_iterator it = llangs.begin();
it != llangs.end(); it++) {
list<string> oneexp;
stemExpandOne(dbdir, *it, term, oneexp);
result.insert(result.end(), oneexp.begin(), oneexp.end());
}
result.sort();
result.unique();
return true;
}
}
}

View File

@ -1,6 +1,6 @@
#ifndef _STEMDB_H_INCLUDED_
#define _STEMDB_H_INCLUDED_
/* @(#$Id: stemdb.h,v 1.3 2006-12-19 12:11:21 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: stemdb.h,v 1.4 2007-08-01 10:04:53 dockes Exp $ (C) 2004 J.F.Dockes */
/// Stem database code
///
/// Stem databases list stems and the set of index terms they expand to. They
@ -30,7 +30,7 @@ extern bool createDb(Xapian::Database& xdb,
const std::string& dbdir, const std::string& lang);
/// Expand term to stem siblings
extern bool stemExpand(const std::string& dbdir,
const std::string& lang,
const std::string& langs,
const std::string& term,
list<string>& result);
#ifndef NO_NAMESPACES