suggest alternate spellings if no results

This commit is contained in:
dockes 2009-11-26 14:03:02 +00:00
parent 203e8a9f9d
commit f554960b9b
12 changed files with 248 additions and 66 deletions

View File

@ -19,25 +19,7 @@ static char rcsid[] = "@(#$Id: rclaspell.cpp,v 1.10 2007-12-13 06:58:21 dockes E
#include "pathut.h"
#include "execmd.h"
#include "rclaspell.h"
// Stuff that we don't wish to see in the .h (possible sysdeps, etc.)
class AspellData {
public:
AspellData() : m_handle(0) {}
~AspellData() {
if (m_handle)
dlclose(m_handle);
}
void *m_handle;
string m_exec;
};
Aspell::~Aspell()
{
if (m_data)
delete m_data;
}
#include "debuglog.h"
// Just a place where we keep the Aspell library entry points together
class AspellApi {
@ -52,6 +34,7 @@ public:
struct AspellConfig * (*aspell_speller_config)(struct AspellSpeller *);
const struct AspellWordList * (*aspell_speller_suggest)
(struct AspellSpeller *, const char *, int);
int (*aspell_speller_check)(struct AspellSpeller *, const char *, int);
struct AspellStringEnumeration * (*aspell_word_list_elements)
(const struct AspellWordList * ths);
const char * (*aspell_string_enumeration_next)
@ -86,10 +69,45 @@ static const char *aspell_lib_suffixes[] = {
};
static const unsigned int nlibsuffs = sizeof(aspell_lib_suffixes) / sizeof(char *);
// Stuff that we don't wish to see in the .h (possible sysdeps, etc.)
class AspellData {
public:
AspellData()
: m_handle(0), m_speller(0)
{}
~AspellData() {
LOGDEB2(("~AspellData\n"));
if (m_handle) {
dlclose(m_handle);
m_handle = 0;
}
if (m_speller) {
// Dumps core if I do this??
//aapi.delete_aspell_speller(m_speller);
m_speller = 0;
LOGDEB2(("~AspellData: speller done\n"));
}
}
void *m_handle;
string m_exec;
AspellSpeller *m_speller;
};
Aspell::Aspell(RclConfig *cnf)
: m_config(cnf), m_data(0)
{
}
Aspell::~Aspell()
{
deleteZ(m_data);
}
bool Aspell::init(string &reason)
{
delete m_data;
m_data = 0;
deleteZ(m_data);
// Language: we get this from the configuration, else from the NLS
// environment. The aspell language names used for selecting language
// definition files (used to create dictionaries) are like en, fr
@ -114,6 +132,7 @@ bool Aspell::init(string &reason)
}
if (m_data->m_exec.empty()) {
reason = "aspell program not found or not executable";
deleteZ(m_data);
return false;
}
@ -151,8 +170,9 @@ bool Aspell::init(string &reason)
found:
if (m_data->m_handle == 0) {
reason += string(" : ") + dlerror();
return false;
reason += string(" : ") + dlerror();
deleteZ(m_data);
return false;
}
string badnames;
@ -172,6 +192,8 @@ bool Aspell::init(string &reason)
NMTOPTR(aspell_speller_suggest,
(const struct AspellWordList *(*)(struct AspellSpeller *,
const char *, int)));
NMTOPTR(aspell_speller_check,
(int (*)(struct AspellSpeller *, const char *, int)));
NMTOPTR(aspell_word_list_elements,
(struct AspellStringEnumeration *(*)
(const struct AspellWordList *)));
@ -189,6 +211,7 @@ bool Aspell::init(string &reason)
if (!badnames.empty()) {
reason = string("Aspell::init: symbols not found:") + badnames;
deleteZ(m_data);
return false;
}
@ -279,18 +302,16 @@ bool Aspell::buildDict(Rcl::Db &db, string &reason)
}
bool Aspell::suggest(Rcl::Db &db,
string &term, list<string> &suggestions, string &reason)
bool Aspell::make_speller(string& reason)
{
if (!ok())
return false;
if (m_data->m_speller != 0)
return true;
AspellCanHaveError *ret;
AspellSpeller *speller;
AspellConfig *config;
config = aapi.new_aspell_config();
AspellConfig *config = aapi.new_aspell_config();
aapi.aspell_config_replace(config, "lang", m_lang.c_str());
aapi.aspell_config_replace(config, "encoding", "utf-8");
aapi.aspell_config_replace(config, "master", dicPath().c_str());
@ -304,12 +325,48 @@ bool Aspell::suggest(Rcl::Db &db,
aapi.delete_aspell_can_have_error(ret);
return false;
}
speller = aapi.to_aspell_speller(ret);
config = aapi.aspell_speller_config(speller);
m_data->m_speller = aapi.to_aspell_speller(ret);
return true;
}
bool Aspell::check(Rcl::Db &db, const string &term, string& reason)
{
LOGDEB2(("Aspell::check [%s]\n", term.c_str()));
if (!ok() || !make_speller(reason))
return false;
if (term.empty())
return true; //??
int ret = aapi.aspell_speller_check(m_data->m_speller,
term.c_str(), term.length());
reason.clear();
switch (ret) {
case 0: return false;
case 1: return true;
default:
case -1:
reason.append("Aspell error: ");
reason.append(aapi.aspell_speller_error_message(m_data->m_speller));
return false;
}
}
bool Aspell::suggest(Rcl::Db &db, const string &term,
list<string>& suggestions, string& reason)
{
if (!ok() || !make_speller(reason))
return false;
if (term.empty())
return true; //??
AspellCanHaveError *ret;
const AspellWordList *wl =
aapi.aspell_speller_suggest(speller, term.c_str(), term.length());
aapi.aspell_speller_suggest(m_data->m_speller,
term.c_str(), term.length());
if (wl == 0) {
reason = aapi.aspell_speller_error_message(speller);
reason = aapi.aspell_speller_error_message(m_data->m_speller);
return false;
}
AspellStringEnumeration *els = aapi.aspell_word_list_elements(wl);
@ -326,8 +383,6 @@ bool Aspell::suggest(Rcl::Db &db,
suggestions.push_back(word);
}
aapi.delete_aspell_string_enumeration(els);
aapi.delete_aspell_speller(speller);
// Config belongs to speller here? aapi.delete_aspell_config(config);
return true;
}

View File

@ -31,8 +31,7 @@ class AspellData;
class Aspell {
public:
Aspell(RclConfig *cnf)
: m_config(cnf), m_data(0) {};
Aspell(RclConfig *cnf);
~Aspell();
/** Check health */
@ -45,8 +44,11 @@ class Aspell {
* of an indexing pass. */
bool buildDict(Rcl::Db &db, string &reason);
/** Check that word is in dictionary. ret==false && !reason.empty() => err*/
bool check(Rcl::Db &db, const string& term, string& reason);
/** Return a list of possible expansions for a given word */
bool suggest(Rcl::Db &db, string &term, list<string> &suggestions,
bool suggest(Rcl::Db &db, const string& term, list<string> &suggestions,
string &reason);
private:
@ -54,6 +56,8 @@ class Aspell {
RclConfig *m_config;
string m_lang;
AspellData *m_data;
bool make_speller(string& reason);
};
#endif /* RCL_USE_ASPELL */

View File

@ -2,6 +2,8 @@
static char rcsid[] = "@(#$Id: reslist.cpp,v 1.52 2008-12-17 15:12:08 dockes Exp $ (C) 2005 J.F.Dockes";
#endif
#include "autoconfig.h"
#include <time.h>
#include <stdlib.h>
@ -46,11 +48,34 @@ static char rcsid[] = "@(#$Id: reslist.cpp,v 1.52 2008-12-17 15:12:08 dockes Exp
#include "reslist.h"
#include "moc_reslist.cpp"
#include "rclhelp.h"
#ifdef RCL_USE_ASPELL
#include "rclaspell.h"
#endif
#ifndef MIN
#define MIN(A,B) ((A) < (B) ? (A) : (B))
#endif
class QtGuiResListPager : public ResListPager {
public:
QtGuiResListPager(ResList *p, int ps)
: ResListPager(ps), m_parent(p)
{}
virtual bool append(const string& data);
virtual bool append(const string& data, int idx, const Rcl::Doc& doc);
virtual string trans(const string& in);
virtual string detailsLink();
virtual const string &parFormat();
virtual string nextUrl();
virtual string prevUrl();
virtual string pageTop();
virtual string iconPath(const string& mt);
virtual void suggest(const vector<string>uterms, vector<string>&sugg);
private:
ResList *m_parent;
};
class PlainToRichQtReslist : public PlainToRich {
public:
virtual ~PlainToRichQtReslist() {}
@ -105,6 +130,7 @@ ResList::~ResList()
QT_TR_NOOP("Preview"),
QT_TR_NOOP("Open"),
QT_TR_NOOP("(show query)"),
QT_TR_NOOP("<p><i>Alternate spellings (accents suppressed): </i>"),
};
}
@ -425,6 +451,39 @@ string QtGuiResListPager::iconPath(const string& mtype)
return iconpath;
}
void QtGuiResListPager::suggest(const vector<string>uterms, vector<string>&sugg)
{
sugg.clear();
#ifdef RCL_USE_ASPELL
bool noaspell = false;
rclconfig->getConfParam("noaspell", &noaspell);
if (noaspell)
return;
if (!aspell) {
LOGERR(("QtGuiResListPager:: aspell not initialized\n"));
return;
}
for (vector<string>::const_iterator uit = uterms.begin();
uit != uterms.end(); uit++) {
list<string> asuggs;
string reason;
if (aspell->check(*rcldb, *uit, reason))
continue;
else if (!reason.empty())
return;
if (!aspell->suggest(*rcldb, *uit, asuggs, reason)) {
LOGERR(("QtGuiResListPager::suggest: aspell failed: %s\n",
reason.c_str()));
continue;
}
if (!asuggs.empty()) {
sugg.push_back(*asuggs.begin());
}
}
#endif
}
// Fill up result list window with next screen of hits
void ResList::resultPageNext()
{

View File

@ -28,24 +28,7 @@ class Q3PopupMenu;
#include "reslistpager.h"
class ResList;
class QtGuiResListPager : public ResListPager {
public:
QtGuiResListPager(ResList *p, int ps)
: ResListPager(ps), m_parent(p)
{}
virtual bool append(const string& data);
virtual bool append(const string& data, int idx, const Rcl::Doc& doc);
virtual string trans(const string& in);
virtual string detailsLink();
virtual const string &parFormat();
virtual string nextUrl();
virtual string prevUrl();
virtual string pageTop();
virtual string iconPath(const string& mt);
private:
ResList *m_parent;
};
class QtGuiResListPager;
/**
* Display a list of document records. The data can be out of the history

View File

@ -123,6 +123,11 @@ class DocSequence {
{
terms.clear(); groups.clear(); gslks.clear(); return true;
}
/** Get user-input terms (before stemming etc.) */
virtual void getUTerms(vector<string>& terms)
{
terms.clear();
}
virtual list<string> expand(Rcl::Doc &) {return list<string>();}
/** Optional functionality. Yeah, not nice */
@ -159,6 +164,10 @@ public:
{
return m_seq->getTerms(terms, groups, gslks);
}
virtual void getUTerms(vector<string>& terms)
{
m_seq->getUTerms(terms);
}
protected:
RefCntr<DocSequence> m_seq;

View File

@ -45,6 +45,11 @@ bool DocSequenceDb::getTerms(vector<string>& terms,
return m_fsdata->getTerms(terms, groups, gslks);
}
void DocSequenceDb::getUTerms(vector<string>& terms)
{
m_sdata->getUTerms(terms);
}
string DocSequenceDb::getDescription()
{
return m_fsdata->getDescription();

View File

@ -35,6 +35,7 @@ class DocSequenceDb : public DocSequence {
virtual bool getTerms(vector<string>& terms,
vector<vector<string> >& groups,
vector<int>& gslks);
virtual void getUTerms(vector<string>& terms);
virtual string getAbstract(Rcl::Doc &doc);
virtual bool getEnclosing(Rcl::Doc& doc, Rcl::Doc& pdoc);
virtual string getDescription();

View File

@ -98,6 +98,22 @@ void ResListPager::displayPage()
if (pageEmpty()) {
chunk += trans("<p><b>No results found</b><br>");
vector<string>uterms;
m_docSource->getUTerms(uterms);
if (!uterms.empty()) {
vector<string> spellings;
suggest(uterms, spellings);
if (!spellings.empty()) {
chunk +=
trans("<p><i>Alternate spellings (accents suppressed): </i>");
for (vector<string>::iterator it = spellings.begin();
it != spellings.end(); it++) {
chunk += *it;
chunk += " ";
}
chunk += "</p>";
}
}
} else {
unsigned int resCnt = m_docSource->getResCnt();
if (m_winfirst + m_respage.size() < resCnt) {

View File

@ -67,6 +67,9 @@ public:
virtual string prevUrl();
virtual string pageTop() {return string();}
virtual string iconPath(const string& mtype);
virtual void suggest(const vector<string>, vector<string>&sugg) {
sugg.clear();
}
private:
void initall()
{

View File

@ -196,6 +196,7 @@ static void noPrefixList(const list<string>& in, list<string>& out)
#else
#define LOGABS LOGDEB2
#endif
#if 0
static void listList(const string& what, const list<string>&l)
{
string a;
@ -204,6 +205,7 @@ static void listList(const string& what, const list<string>&l)
}
LOGDEB(("%s: %s\n", what.c_str(), a.c_str()));
}
#endif
// Build a document abstract by extracting text chunks around the query terms
// This uses the db termlists, not the original document.

View File

@ -22,6 +22,7 @@ static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.32 2008-12-19 09:55:36 dockes
#include <string>
#include <vector>
#include <algorithm>
#include "xapian.h"
@ -173,6 +174,15 @@ bool SearchData::getTerms(vector<string>& terms,
(*it)->getTerms(terms, groups, gslks);
return true;
}
// Extract user terms
void SearchData::getUTerms(vector<string>& terms) const
{
for (qlist_cit_t it = m_query.begin(); it != m_query.end(); it++)
(*it)->getUTerms(terms);
sort(terms.begin(), terms.end());
vector<string>::iterator it = unique(terms.begin(), terms.end());
terms.erase(it, terms.end());
}
// Splitter callback for breaking a user string into simple terms and
// phrases. This is for parts of the user entry which would appear as
@ -249,6 +259,11 @@ public:
groups.insert(groups.end(), m_groups.begin(), m_groups.end());
return true;
}
bool getUTerms(vector<string>& terms)
{
terms.insert(terms.end(), m_uterms.begin(), m_uterms.end());
return true;
}
private:
void expandTerm(bool dont, const string& term, list<string>& exp,
@ -265,10 +280,22 @@ private:
const string& m_stemlang;
bool m_doBoostUserTerms;
// Single terms and phrases resulting from breaking up text;
vector<string> m_uterms;
vector<string> m_terms;
vector<vector<string> > m_groups;
};
#if 0
static void listVector(const string& what, const vector<string>&l)
{
string a;
for (vector<string>::const_iterator it = l.begin(); it != l.end(); it++) {
a = a + *it + " ";
}
LOGDEB(("%s: %s\n", what.c_str(), a.c_str()));
}
#endif
/** Expand stem and wildcards
*
* @param nostemexp don't perform stem expansion. This is mainly used to
@ -300,12 +327,10 @@ void StringToXapianQ::expandTerm(bool nostemexp,
if (haswild || m_stemlang.empty())
nostemexp = true;
if (!nostemexp) {
}
if (nostemexp && !haswild) {
// Neither stemming nor wildcard expansion: just the word
sterm = term;
m_uterms.push_back(sterm);
exp.push_front(term);
exp.resize(1);
} else {
@ -314,6 +339,7 @@ void StringToXapianQ::expandTerm(bool nostemexp,
m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, term, l);
} else {
sterm = term;
m_uterms.push_back(sterm);
m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, l);
}
for (list<TermMatchEntry>::const_iterator it = l.begin();
@ -321,6 +347,7 @@ void StringToXapianQ::expandTerm(bool nostemexp,
exp.push_back(it->term);
}
}
//listVector("ExpandTerm:uterms now: ", m_uterms);
}
// Do distribution of string vectors: a,b c,d -> a,c a,d b,c b,d
@ -383,10 +410,10 @@ void StringToXapianQ::processSimpleSpan(const string& span, bool nostemexp,
// less wqf). This does not happen if there are wildcards anywhere
// in the search.
if (m_doBoostUserTerms && !sterm.empty()) {
xq = Xapian::Query(Xapian::Query::OP_OR,
xq,
Xapian::Query(m_prefix+sterm,
original_term_wqf_booster));
xq = Xapian::Query(Xapian::Query::OP_OR,
xq,
Xapian::Query(m_prefix+sterm,
original_term_wqf_booster));
}
pqueries.push_back(xq);
}
@ -468,6 +495,7 @@ bool StringToXapianQ::processUserString(const string &iq,
{
LOGDEB(("StringToXapianQ:: query string: [%s]\n", iq.c_str()));
ermsg.erase();
m_uterms.clear();
m_terms.clear();
m_groups.clear();
@ -589,6 +617,8 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p,
return false;
}
tr.getTerms(m_terms, m_groups);
tr.getUTerms(m_uterms);
//listVector("SearchDataClauseSimple: Uterms: ", m_uterms);
*qp = Xapian::Query(op, pqueries.begin(), pqueries.end());
return true;
}
@ -659,6 +689,7 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p,
return true;
}
tr.getTerms(m_terms, m_groups);
tr.getUTerms(m_uterms);
*qp = *pqueries.begin();
return true;
}
@ -675,5 +706,9 @@ bool SearchDataClauseSub::getTerms(vector<string>& terms,
{
return m_sub.getconstptr()->getTerms(terms, groups, gslks);
}
void SearchDataClauseSub::getUTerms(vector<string>& terms) const
{
m_sub.getconstptr()->getUTerms(terms);
}
} // Namespace Rcl

View File

@ -98,6 +98,9 @@ public:
*/
bool getTerms(vector<string>& terms,
vector<vector<string> >& groups, vector<int>& gslks) const;
/** Get user-input terms (before expansion etc.) */
void getUTerms(vector<string>& terms) const;
/**
* Get/set the description field which is retrieved from xapian after
* initializing the query. It is stored here for usage in the GUI.
@ -140,7 +143,8 @@ public:
virtual string getReason() const {return m_reason;}
virtual bool getTerms(vector<string>&, vector<vector<string> >&,
vector<int>&) const = 0;
// {return true;}
virtual void getUTerms(vector<string>&) const = 0;
SClType getTp() {return m_tp;}
void setParent(SearchData *p) {m_parentSearch = p;}
virtual void setModifiers(Modifier mod) {m_modifiers = mod;}
@ -187,14 +191,19 @@ public:
gslks.insert(gslks.end(), m_groups.size(), m_slack);
return true;
}
virtual void getUTerms(vector<string>& terms) const
{
terms.insert(terms.end(), m_uterms.begin(), m_uterms.end());
}
protected:
string m_text; // Raw user entry text.
string m_field; // Field specification if any
// Single terms and phrases resulting from breaking up m_text;
// valid after toNativeQuery() call
vector<string> m_terms;
vector<vector<string> > m_groups;
vector<vector<string> > m_groups;
// User terms before expansion
vector<string> m_uterms;
// Declare m_slack here. Always 0, but allows getTerms to work for
// SearchDataClauseDist
int m_slack;
@ -249,6 +258,7 @@ public:
virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang);
virtual bool getTerms(vector<string>&, vector<vector<string> >&,
vector<int>&) const;
virtual void getUTerms(vector<string>&) const;
protected:
RefCntr<SearchData> m_sub;
};