wen
This commit is contained in:
parent
9d8ce3df62
commit
9661a4431e
@ -32,6 +32,7 @@ AM_CPPFLAGS = -Wall -Wno-unused -std=c++11 \
|
|||||||
$(X_CFLAGS) \
|
$(X_CFLAGS) \
|
||||||
-DRECOLL_DATADIR=\"${pkgdatadir}\" \
|
-DRECOLL_DATADIR=\"${pkgdatadir}\" \
|
||||||
-D_GNU_SOURCE \
|
-D_GNU_SOURCE \
|
||||||
|
-DTESTING_XAPIAN_SPELL \
|
||||||
$(DEFS)
|
$(DEFS)
|
||||||
|
|
||||||
ACLOCAL_AMFLAGS = -I m4
|
ACLOCAL_AMFLAGS = -I m4
|
||||||
|
|||||||
@ -197,7 +197,9 @@ static inline int whatcc(unsigned int c)
|
|||||||
#define UNICODE_IS_CJK(p) \
|
#define UNICODE_IS_CJK(p) \
|
||||||
((p) > 127 && \
|
((p) > 127 && \
|
||||||
(((p) >= 0x2E80 && (p) <= 0x2EFF) || \
|
(((p) >= 0x2E80 && (p) <= 0x2EFF) || \
|
||||||
((p) >= 0x3000 && (p) <= 0x9FFF) || \
|
((p) >= 0x3000 && (p) <= 0x309F) || \
|
||||||
|
((p) >= 0x3100 && (p) <= 0x31EF) || \
|
||||||
|
((p) >= 0x3200 && (p) <= 0x9FFF) || \
|
||||||
((p) >= 0xA700 && (p) <= 0xA71F) || \
|
((p) >= 0xA700 && (p) <= 0xA71F) || \
|
||||||
((p) >= 0xAC00 && (p) <= 0xD7AF) || \
|
((p) >= 0xAC00 && (p) <= 0xD7AF) || \
|
||||||
((p) >= 0xF900 && (p) <= 0xFAFF) || \
|
((p) >= 0xF900 && (p) <= 0xFAFF) || \
|
||||||
@ -206,10 +208,19 @@ static inline int whatcc(unsigned int c)
|
|||||||
((p) >= 0x20000 && (p) <= 0x2A6DF) || \
|
((p) >= 0x20000 && (p) <= 0x2A6DF) || \
|
||||||
((p) >= 0x2F800 && (p) <= 0x2FA1F)))
|
((p) >= 0x2F800 && (p) <= 0x2FA1F)))
|
||||||
|
|
||||||
|
#define UNICODE_IS_KATAKANA(p) \
|
||||||
|
((p) > 127 && \
|
||||||
|
(((p) >= 0x30A0 && (p) <= 0x30FF) || \
|
||||||
|
((p) >= 0x31F0 && (p) <= 0x31FF)))
|
||||||
|
|
||||||
bool TextSplit::isCJK(int c)
|
bool TextSplit::isCJK(int c)
|
||||||
{
|
{
|
||||||
return UNICODE_IS_CJK(c);
|
return UNICODE_IS_CJK(c);
|
||||||
}
|
}
|
||||||
|
bool TextSplit::isKATAKANA(int c)
|
||||||
|
{
|
||||||
|
return UNICODE_IS_KATAKANA(c);
|
||||||
|
}
|
||||||
|
|
||||||
bool TextSplit::o_processCJK = true;
|
bool TextSplit::o_processCJK = true;
|
||||||
unsigned int TextSplit::o_CJKNgramLen = 2;
|
unsigned int TextSplit::o_CJKNgramLen = 2;
|
||||||
|
|||||||
@ -92,8 +92,7 @@ public:
|
|||||||
/** Called when we encounter formfeed \f 0x0c. Override to use the event.
|
/** Called when we encounter formfeed \f 0x0c. Override to use the event.
|
||||||
* Mostly or exclusively used with pdftoxx output. Other filters mostly
|
* Mostly or exclusively used with pdftoxx output. Other filters mostly
|
||||||
* just don't know about pages. */
|
* just don't know about pages. */
|
||||||
virtual void newpage(int /*pos*/)
|
virtual void newpage(int /*pos*/) {
|
||||||
{
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Static utility functions:
|
// Static utility functions:
|
||||||
@ -111,10 +110,12 @@ public:
|
|||||||
* non-utf-8 input (iso-8859 config files work ok). This hopefully
|
* non-utf-8 input (iso-8859 config files work ok). This hopefully
|
||||||
* handles all Unicode whitespace, but needs correct utf-8 input
|
* handles all Unicode whitespace, but needs correct utf-8 input
|
||||||
*/
|
*/
|
||||||
static bool stringToStrings(const std::string &s, std::vector<std::string> &tokens);
|
static bool stringToStrings(const std::string &s,
|
||||||
|
std::vector<std::string> &tokens);
|
||||||
|
|
||||||
/** Is char CJK ? */
|
/** Is char CJK ? (excluding Katakana) */
|
||||||
static bool isCJK(int c);
|
static bool isCJK(int c);
|
||||||
|
static bool isKATAKANA(int c);
|
||||||
|
|
||||||
/** Statistics about word length (average and dispersion) can
|
/** Statistics about word length (average and dispersion) can
|
||||||
* detect bad data like undecoded base64 or other mis-identified
|
* detect bad data like undecoded base64 or other mis-identified
|
||||||
|
|||||||
@ -40,9 +40,6 @@
|
|||||||
#include "rclmain_w.h"
|
#include "rclmain_w.h"
|
||||||
#include "ssearch_w.h"
|
#include "ssearch_w.h"
|
||||||
#include "guiutils.h"
|
#include "guiutils.h"
|
||||||
#ifdef RCL_USE_ASPELL
|
|
||||||
#include "rclaspell.h"
|
|
||||||
#endif
|
|
||||||
#include "smallut.h"
|
#include "smallut.h"
|
||||||
#include "readfile.h"
|
#include "readfile.h"
|
||||||
|
|
||||||
@ -83,9 +80,6 @@ void deleteAllTempFiles()
|
|||||||
|
|
||||||
Rcl::Db *rcldb;
|
Rcl::Db *rcldb;
|
||||||
|
|
||||||
#ifdef RCL_USE_ASPELL
|
|
||||||
Aspell *aspell;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int recollNeedsExit;
|
int recollNeedsExit;
|
||||||
RclMain *mainWindow;
|
RclMain *mainWindow;
|
||||||
@ -158,10 +152,6 @@ static void recollCleanup()
|
|||||||
|
|
||||||
deleteAllTempFiles();
|
deleteAllTempFiles();
|
||||||
|
|
||||||
#ifdef RCL_USE_ASPELL
|
|
||||||
deleteZ(aspell);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
LOGDEB2("recollCleanup: done\n" );
|
LOGDEB2("recollCleanup: done\n" );
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -322,15 +312,6 @@ int main(int argc, char **argv)
|
|||||||
|
|
||||||
// fprintf(stderr, "Translations installed\n");
|
// fprintf(stderr, "Translations installed\n");
|
||||||
|
|
||||||
#ifdef RCL_USE_ASPELL
|
|
||||||
aspell = new Aspell(theconfig);
|
|
||||||
aspell->init(reason);
|
|
||||||
if (!aspell || !aspell->ok()) {
|
|
||||||
LOGDEB("Aspell speller creation failed " << (reason) << "\n" );
|
|
||||||
aspell = 0;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
string historyfile = path_cat(theconfig->getConfDir(), "history");
|
string historyfile = path_cat(theconfig->getConfDir(), "history");
|
||||||
g_dynconf = new RclDynConf(historyfile);
|
g_dynconf = new RclDynConf(historyfile);
|
||||||
if (!g_dynconf || !g_dynconf->ok()) {
|
if (!g_dynconf || !g_dynconf->ok()) {
|
||||||
|
|||||||
@ -46,11 +46,6 @@ extern void startManual(const string& helpindex);
|
|||||||
|
|
||||||
extern void applyStyleSheet(const QString&);
|
extern void applyStyleSheet(const QString&);
|
||||||
|
|
||||||
#ifdef RCL_USE_ASPELL
|
|
||||||
class Aspell;
|
|
||||||
extern Aspell *aspell;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
inline std::string qs2utf8s(const QString& qs)
|
inline std::string qs2utf8s(const QString& qs)
|
||||||
{
|
{
|
||||||
return std::string((const char *)qs.toUtf8());
|
return std::string((const char *)qs.toUtf8());
|
||||||
|
|||||||
@ -2,7 +2,7 @@ TEMPLATE = app
|
|||||||
LANGUAGE = C++
|
LANGUAGE = C++
|
||||||
|
|
||||||
VPATH = @srcdir@
|
VPATH = @srcdir@
|
||||||
DEFINES += BUILDING_RECOLL
|
DEFINES += BUILDING_RECOLL TESTING_XAPIAN_SPELL
|
||||||
|
|
||||||
@QMAKE_ENABLE_WEBKIT@ QT += webkit
|
@QMAKE_ENABLE_WEBKIT@ QT += webkit
|
||||||
@QMAKE_DISABLE_WEBKIT@ QMAKE_CXXFLAGS += -DRESLIST_TEXTBROWSER -DSNIPPETS_TEXTBROWSER
|
@QMAKE_DISABLE_WEBKIT@ QMAKE_CXXFLAGS += -DRESLIST_TEXTBROWSER -DSNIPPETS_TEXTBROWSER
|
||||||
|
|||||||
@ -55,9 +55,6 @@
|
|||||||
#include "reslist.h"
|
#include "reslist.h"
|
||||||
#include "moc_reslist.cpp"
|
#include "moc_reslist.cpp"
|
||||||
#include "rclhelp.h"
|
#include "rclhelp.h"
|
||||||
#ifdef RCL_USE_ASPELL
|
|
||||||
#include "rclaspell.h"
|
|
||||||
#endif
|
|
||||||
#include "appformime.h"
|
#include "appformime.h"
|
||||||
#include "respopup.h"
|
#include "respopup.h"
|
||||||
|
|
||||||
@ -201,53 +198,36 @@ void QtGuiResListPager::suggest(const vector<string>uterms,
|
|||||||
map<string, vector<string> >& sugg)
|
map<string, vector<string> >& sugg)
|
||||||
{
|
{
|
||||||
sugg.clear();
|
sugg.clear();
|
||||||
#ifdef RCL_USE_ASPELL
|
|
||||||
bool noaspell = false;
|
|
||||||
theconfig->getConfParam("noaspell", &noaspell);
|
|
||||||
if (noaspell)
|
|
||||||
return;
|
|
||||||
if (!aspell) {
|
|
||||||
LOGERR("QtGuiResListPager:: aspell not initialized\n" );
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool issimple = m_reslist && m_reslist->m_rclmain &&
|
bool issimple = m_reslist && m_reslist->m_rclmain &&
|
||||||
m_reslist->m_rclmain->lastSearchSimple();
|
m_reslist->m_rclmain->lastSearchSimple();
|
||||||
|
|
||||||
for (vector<string>::const_iterator uit = uterms.begin();
|
for (const auto& uit : uterms) {
|
||||||
uit != uterms.end(); uit++) {
|
vector<string> tsuggs;
|
||||||
list<string> asuggs;
|
|
||||||
string reason;
|
|
||||||
|
|
||||||
// If the term is in the dictionary, Aspell::suggest won't
|
// If the term is in the dictionary, Aspell::suggest won't
|
||||||
// list alternatives. In fact we may want to check the
|
// list alternatives. In fact we may want to check the
|
||||||
// frequencies and propose something anyway if a possible
|
// frequencies and propose something anyway if a possible
|
||||||
// variation is much more common (as google does) ?
|
// variation is much more common (as google does) ?
|
||||||
if (!aspell->suggest(*rcldb, *uit, asuggs, reason)) {
|
if (!rcldb->getSpellingSuggestions(uit, tsuggs)) {
|
||||||
LOGERR("QtGuiResListPager::suggest: aspell failed: " << (reason) << "\n" );
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// We should check that the term stems differently from the
|
// We should check that the term stems differently from the
|
||||||
// base word (else it's not useful to expand the search). Or
|
// base word (else it's not useful to expand the search). Or
|
||||||
// is it ? This should depend if stemming is turned on or not
|
// is it ? This should depend if stemming is turned on or not
|
||||||
|
|
||||||
if (!asuggs.empty()) {
|
if (!tsuggs.empty()) {
|
||||||
sugg[*uit] = vector<string>(asuggs.begin(), asuggs.end());
|
sugg[uit] = vector<string>(tsuggs.begin(), tsuggs.end());
|
||||||
if (sugg[*uit].size() > 5)
|
if (sugg[uit].size() > 5)
|
||||||
sugg[*uit].resize(5);
|
sugg[uit].resize(5);
|
||||||
// Set up the links as a <href="Sold|new">.
|
// Set up the links as a <href="Sold|new">.
|
||||||
for (vector<string>::iterator it = sugg[*uit].begin();
|
for (auto& it : sugg[uit]) {
|
||||||
it != sugg[*uit].end(); it++) {
|
|
||||||
if (issimple) {
|
if (issimple) {
|
||||||
*it = string("<a href=\"S") + *uit + "|" + *it + "\">" +
|
it = string("<a href=\"S") + uit + "|" + it + "\">" +
|
||||||
*it + "</a>";
|
it + "</a>";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
string QtGuiResListPager::iconUrl(RclConfig *config, Rcl::Doc& doc)
|
string QtGuiResListPager::iconUrl(RclConfig *config, Rcl::Doc& doc)
|
||||||
|
|||||||
@ -47,10 +47,6 @@
|
|||||||
#include "execmd.h"
|
#include "execmd.h"
|
||||||
#include "indexer.h"
|
#include "indexer.h"
|
||||||
|
|
||||||
#ifdef RCL_USE_ASPELL
|
|
||||||
#include "rclaspell.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
using std::list;
|
using std::list;
|
||||||
using std::multimap;
|
using std::multimap;
|
||||||
using std::string;
|
using std::string;
|
||||||
@ -64,14 +60,8 @@ void SpellW::init()
|
|||||||
m_c2t.push_back(TYPECMB_REG);
|
m_c2t.push_back(TYPECMB_REG);
|
||||||
expTypeCMB->addItem(tr("Stem expansion"));
|
expTypeCMB->addItem(tr("Stem expansion"));
|
||||||
m_c2t.push_back(TYPECMB_STEM);
|
m_c2t.push_back(TYPECMB_STEM);
|
||||||
#ifdef RCL_USE_ASPELL
|
expTypeCMB->addItem(tr("Spelling/Phonetic"));
|
||||||
bool noaspell = false;
|
m_c2t.push_back(TYPECMB_SPELL);
|
||||||
theconfig->getConfParam("noaspell", &noaspell);
|
|
||||||
if (!noaspell) {
|
|
||||||
expTypeCMB->addItem(tr("Spelling/Phonetic"));
|
|
||||||
m_c2t.push_back(TYPECMB_ASPELL);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
expTypeCMB->addItem(tr("Show index statistics"));
|
expTypeCMB->addItem(tr("Show index statistics"));
|
||||||
m_c2t.push_back(TYPECMB_STATS);
|
m_c2t.push_back(TYPECMB_STATS);
|
||||||
|
|
||||||
@ -189,37 +179,19 @@ void SpellW::doExpand()
|
|||||||
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
#ifdef RCL_USE_ASPELL
|
case TYPECMB_SPELL:
|
||||||
case TYPECMB_ASPELL:
|
|
||||||
{
|
{
|
||||||
LOGDEB("SpellW::doExpand: aspelling\n" );
|
LOGDEB("SpellW::doExpand: spelling [" << expr << "]\n" );
|
||||||
if (!aspell) {
|
vector<string> suggs;
|
||||||
QMessageBox::warning(0, "Recoll",
|
if (!rcldb->getSpellingSuggestions(expr, suggs)) {
|
||||||
tr("Aspell init failed. "
|
QMessageBox::warning(0, "Recoll", tr("Spell expansion error. "));
|
||||||
"Aspell not installed?"));
|
|
||||||
LOGDEB("SpellW::doExpand: aspell init error\n" );
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
list<string> suggs;
|
for (const auto& it : suggs) {
|
||||||
if (!aspell->suggest(*rcldb, expr, suggs, reason)) {
|
res.entries.push_back(Rcl::TermMatchEntry(it));
|
||||||
QMessageBox::warning(0, "Recoll",
|
}
|
||||||
tr("Aspell expansion error. "));
|
|
||||||
LOGERR("SpellW::doExpand:suggest failed: " << (reason) << "\n" );
|
|
||||||
}
|
|
||||||
for (list<string>::const_iterator it = suggs.begin();
|
|
||||||
it != suggs.end(); it++)
|
|
||||||
res.entries.push_back(Rcl::TermMatchEntry(*it));
|
|
||||||
#ifdef TESTING_XAPIAN_SPELL
|
|
||||||
string rclsugg = rcldb->getSpellingSuggestion(expr);
|
|
||||||
if (!rclsugg.empty()) {
|
|
||||||
res.entries.push_back(Rcl::TermMatchEntry("Xapian spelling:"));
|
|
||||||
res.entries.push_back(Rcl::TermMatchEntry(rclsugg));
|
|
||||||
}
|
|
||||||
#endif // TESTING_XAPIAN_SPELL
|
|
||||||
statsLBL->setText(tr("%1 results").arg(res.entries.size()));
|
statsLBL->setText(tr("%1 results").arg(res.entries.size()));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
#endif // RCL_USE_ASPELL
|
|
||||||
|
|
||||||
case TYPECMB_STATS:
|
case TYPECMB_STATS:
|
||||||
{
|
{
|
||||||
@ -229,7 +201,6 @@ void SpellW::doExpand()
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (res.entries.empty()) {
|
if (res.entries.empty()) {
|
||||||
resTW->setItem(0, 0, new QTableWidgetItem(tr("No expansion found")));
|
resTW->setItem(0, 0, new QTableWidgetItem(tr("No expansion found")));
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@ -14,8 +14,8 @@
|
|||||||
* Free Software Foundation, Inc.,
|
* Free Software Foundation, Inc.,
|
||||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
*/
|
*/
|
||||||
#ifndef _ASPELL_W_H_INCLUDED_
|
#ifndef _SPELL_W_H_INCLUDED_
|
||||||
#define _ASPELL_W_H_INCLUDED_
|
#define _SPELL_W_H_INCLUDED_
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
@ -36,7 +36,7 @@ public:
|
|||||||
virtual bool eventFilter(QObject *target, QEvent *event );
|
virtual bool eventFilter(QObject *target, QEvent *event );
|
||||||
|
|
||||||
enum comboboxchoice {TYPECMB_NONE, TYPECMB_WILD, TYPECMB_REG, TYPECMB_STEM,
|
enum comboboxchoice {TYPECMB_NONE, TYPECMB_WILD, TYPECMB_REG, TYPECMB_STEM,
|
||||||
TYPECMB_ASPELL, TYPECMB_STATS};
|
TYPECMB_SPELL, TYPECMB_STATS};
|
||||||
public slots:
|
public slots:
|
||||||
virtual void doExpand();
|
virtual void doExpand();
|
||||||
virtual void wordChanged(const QString&);
|
virtual void wordChanged(const QString&);
|
||||||
@ -62,4 +62,4 @@ private:
|
|||||||
void setModeCommon(comboboxchoice mode);
|
void setModeCommon(comboboxchoice mode);
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* _ASPELL_W_H_INCLUDED_ */
|
#endif /* _SPELL_W_H_INCLUDED_ */
|
||||||
|
|||||||
@ -57,6 +57,9 @@ using namespace std;
|
|||||||
#include "rclinit.h"
|
#include "rclinit.h"
|
||||||
#include "internfile.h"
|
#include "internfile.h"
|
||||||
#include "utf8fn.h"
|
#include "utf8fn.h"
|
||||||
|
#ifdef RCL_USE_ASPELL
|
||||||
|
#include "rclaspell.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
// Recoll index format version is stored in user metadata. When this change,
|
// Recoll index format version is stored in user metadata. When this change,
|
||||||
// we can't open the db and will have to reindex.
|
// we can't open the db and will have to reindex.
|
||||||
@ -731,11 +734,13 @@ Db::Db(const RclConfig *cfp)
|
|||||||
|
|
||||||
Db::~Db()
|
Db::~Db()
|
||||||
{
|
{
|
||||||
LOGDEB2("Db::~Db\n" );
|
LOGDEB2("Db::~Db\n");
|
||||||
if (m_ndb == 0)
|
if (m_ndb == 0)
|
||||||
return;
|
return;
|
||||||
LOGDEB("Db::~Db: isopen " << (m_ndb->m_isopen) << " m_iswritable " << (m_ndb->m_iswritable) << "\n" );
|
LOGDEB("Db::~Db: isopen " << m_ndb->m_isopen << " m_iswritable " <<
|
||||||
|
m_ndb->m_iswritable << "\n");
|
||||||
i_close(true);
|
i_close(true);
|
||||||
|
delete m_aspell;
|
||||||
delete m_config;
|
delete m_config;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1055,9 +1060,11 @@ class TextSplitDb : public TextSplitP {
|
|||||||
// gets added to basepos in addition to the inter-section increment
|
// gets added to basepos in addition to the inter-section increment
|
||||||
// to compute the first position of the next section.
|
// to compute the first position of the next section.
|
||||||
Xapian::termpos curpos;
|
Xapian::termpos curpos;
|
||||||
|
Xapian::WritableDatabase& wdb;
|
||||||
|
|
||||||
TextSplitDb(Xapian::Document &d, TermProc *prc)
|
TextSplitDb(Xapian::WritableDatabase& _wdb, Xapian::Document &d,
|
||||||
: TextSplitP(prc), doc(d), basepos(1), curpos(0)
|
TermProc *prc)
|
||||||
|
: TextSplitP(prc), doc(d), basepos(1), curpos(0), wdb(_wdb)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
// Reimplement text_to_words to insert the begin and end anchor terms.
|
// Reimplement text_to_words to insert the begin and end anchor terms.
|
||||||
@ -1132,8 +1139,8 @@ public:
|
|||||||
m_ts->doc.add_posting(term, pos, m_ts->ft.wdfinc);
|
m_ts->doc.add_posting(term, pos, m_ts->ft.wdfinc);
|
||||||
|
|
||||||
#ifdef TESTING_XAPIAN_SPELL
|
#ifdef TESTING_XAPIAN_SPELL
|
||||||
if (Db::isSpellingCandidate(term)) {
|
if (Db::isSpellingCandidate(term, false)) {
|
||||||
m_ts->db.add_spelling(term);
|
m_ts->wdb.add_spelling(term);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
// Index the prefixed term.
|
// Index the prefixed term.
|
||||||
@ -1192,30 +1199,80 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
#ifdef TESTING_XAPIAN_SPELL
|
// At the moment, we normally use the Xapian speller for Katakana and
|
||||||
string Db::getSpellingSuggestion(const string& word)
|
// aspell for everything else
|
||||||
|
bool Db::getSpellingSuggestions(const string& word, vector<string>& suggs)
|
||||||
{
|
{
|
||||||
if (m_ndb == 0)
|
LOGDEB("Db::getSpellingSuggestions:[" << word << "]\n" );
|
||||||
return string();
|
suggs.clear();
|
||||||
|
if (nullptr == m_ndb) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
string term = word;
|
string term = word;
|
||||||
|
|
||||||
if (o_index_stripchars)
|
if (isSpellingCandidate(term, true)) {
|
||||||
if (!unacmaybefold(word, term, "UTF-8", UNACOP_UNACFOLD)) {
|
// Term is candidate for aspell processing
|
||||||
LOGINFO("Db::getSpelling: unac failed for [" << (word) << "]\n" );
|
#ifdef RCL_USE_ASPELL
|
||||||
return string();
|
bool noaspell = false;
|
||||||
}
|
m_config->getConfParam("noaspell", &noaspell);
|
||||||
|
if (noaspell) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (nullptr == m_aspell) {
|
||||||
|
m_aspell = new Aspell(m_config);
|
||||||
|
if (m_aspell) {
|
||||||
|
string reason;
|
||||||
|
m_aspell->init(reason);
|
||||||
|
if (!m_aspell->ok()) {
|
||||||
|
LOGDEB(("Aspell speller init failed %s\n", reason.c_str()));
|
||||||
|
delete m_aspell;
|
||||||
|
m_aspell = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (!isSpellingCandidate(term))
|
if (nullptr == m_aspell) {
|
||||||
return string();
|
LOGERR("Db::getSpellingSuggestions: aspell not initialized\n");
|
||||||
return m_ndb->xrdb.get_spelling_suggestion(term);
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
list<string> asuggs;
|
||||||
|
string reason;
|
||||||
|
if (!m_aspell->suggest(*this, term, asuggs, reason)) {
|
||||||
|
LOGERR("Db::getSpellingSuggestions: aspell failed: " << reason <<
|
||||||
|
"\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
suggs = vector<string>(asuggs.begin(), asuggs.end());
|
||||||
#endif
|
#endif
|
||||||
|
} else {
|
||||||
|
#ifdef TESTING_XAPIAN_SPELL
|
||||||
|
// Was not aspell candidate (e.g.: katakana). Maybe use Xapian
|
||||||
|
// speller?
|
||||||
|
if (isSpellingCandidate(term, false)) {
|
||||||
|
if (!o_index_stripchars) {
|
||||||
|
if (!unacmaybefold(word, term, "UTF-8", UNACOP_UNACFOLD)) {
|
||||||
|
LOGINFO("Db::getSpelling: unac failed for [" << word <<
|
||||||
|
"]\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
string sugg = m_ndb->xrdb.get_spelling_suggestion(term);
|
||||||
|
if (!sugg.empty()) {
|
||||||
|
suggs.push_back(sugg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
// Let our user set the parameters for abstract processing
|
// Let our user set the parameters for abstract processing
|
||||||
void Db::setAbstractParams(int idxtrunc, int syntlen, int syntctxlen)
|
void Db::setAbstractParams(int idxtrunc, int syntlen, int syntctxlen)
|
||||||
{
|
{
|
||||||
LOGDEB1("Db::setAbstractParams: trunc " << (idxtrunc) << " syntlen " << (syntlen) << " ctxlen " << (syntctxlen) << "\n" );
|
LOGDEB1("Db::setAbstractParams: trunc " << idxtrunc << " syntlen " <<
|
||||||
|
syntlen << " ctxlen " << syntctxlen << "\n");
|
||||||
if (idxtrunc >= 0)
|
if (idxtrunc >= 0)
|
||||||
m_idxAbsTruncLen = idxtrunc;
|
m_idxAbsTruncLen = idxtrunc;
|
||||||
if (syntlen > 0)
|
if (syntlen > 0)
|
||||||
@ -1238,7 +1295,7 @@ static const string cstr_nc("\n\r\x0c\\");
|
|||||||
// metadata), and update database
|
// metadata), and update database
|
||||||
bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
|
bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
|
||||||
{
|
{
|
||||||
LOGDEB("Db::add: udi [" << (udi) << "] parent [" << (parent_udi) << "]\n" );
|
LOGDEB("Db::add: udi [" << udi << "] parent [" << parent_udi << "]\n");
|
||||||
if (m_ndb == 0)
|
if (m_ndb == 0)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
@ -1259,7 +1316,7 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
|
|||||||
if (o_index_stripchars)
|
if (o_index_stripchars)
|
||||||
nxt = &tpprep;
|
nxt = &tpprep;
|
||||||
|
|
||||||
TextSplitDb splitter(newdocument, nxt);
|
TextSplitDb splitter(m_ndb->xwdb, newdocument, nxt);
|
||||||
tpidx.setTSD(&splitter);
|
tpidx.setTSD(&splitter);
|
||||||
|
|
||||||
// Udi unique term: this is used for file existence/uptodate
|
// Udi unique term: this is used for file existence/uptodate
|
||||||
|
|||||||
@ -54,6 +54,7 @@ using std::vector;
|
|||||||
// reasonable)
|
// reasonable)
|
||||||
|
|
||||||
class RclConfig;
|
class RclConfig;
|
||||||
|
class Aspell;
|
||||||
|
|
||||||
namespace Rcl {
|
namespace Rcl {
|
||||||
|
|
||||||
@ -200,26 +201,30 @@ class Db {
|
|||||||
|
|
||||||
/** Test word for spelling correction candidate: not too long, no
|
/** Test word for spelling correction candidate: not too long, no
|
||||||
special chars... */
|
special chars... */
|
||||||
static bool isSpellingCandidate(const string& term)
|
static bool isSpellingCandidate(const string& term, bool aspell=true)
|
||||||
{
|
{
|
||||||
if (term.empty() || term.length() > 50)
|
if (term.empty() || term.length() > 50)
|
||||||
return false;
|
return false;
|
||||||
if (has_prefix(term))
|
if (has_prefix(term))
|
||||||
return false;
|
return false;
|
||||||
Utf8Iter u8i(term);
|
Utf8Iter u8i(term);
|
||||||
if (TextSplit::isCJK(*u8i))
|
if (aspell) {
|
||||||
return false;
|
if (TextSplit::isCJK(*u8i) || TextSplit::isKATAKANA(*u8i))
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
if (!TextSplit::isKATAKANA(*u8i)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
if (term.find_first_of(" !\"#$%&()*+,-./0123456789:;<=>?@[\\]^_`{|}~")
|
if (term.find_first_of(" !\"#$%&()*+,-./0123456789:;<=>?@[\\]^_`{|}~")
|
||||||
!= string::npos)
|
!= string::npos)
|
||||||
return false;
|
return false;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#ifdef TESTING_XAPIAN_SPELL
|
|
||||||
/** Return spelling suggestion */
|
/** Return spelling suggestion */
|
||||||
string getSpellingSuggestion(const string& word);
|
bool getSpellingSuggestions(const string& word,
|
||||||
#endif
|
std::vector<std::string>& suggs);
|
||||||
|
|
||||||
/* The next two, only for searchdata, should be somehow hidden */
|
/* The next two, only for searchdata, should be somehow hidden */
|
||||||
/* Return configured stop words */
|
/* Return configured stop words */
|
||||||
@ -490,6 +495,9 @@ private:
|
|||||||
// place for this.
|
// place for this.
|
||||||
SynGroups m_syngroups;
|
SynGroups m_syngroups;
|
||||||
|
|
||||||
|
// Aspell object if needed
|
||||||
|
Aspell *m_aspell = nullptr;
|
||||||
|
|
||||||
/***************
|
/***************
|
||||||
* Parameters cached out of the configuration files. Logically const
|
* Parameters cached out of the configuration files. Logically const
|
||||||
* after init */
|
* after init */
|
||||||
|
|||||||
@ -3,6 +3,7 @@ Jean-Francois Dockes <jf at dockes.org>
|
|||||||
:date:
|
:date:
|
||||||
|
|
||||||
:recollversion: 1.23.0-2017-01-07-78b8ad
|
:recollversion: 1.23.0-2017-01-07-78b8ad
|
||||||
|
:windir: downwin-0e7f2
|
||||||
|
|
||||||
image:recoll-windows10-thumb.png[link="recoll-windows10.png"]
|
image:recoll-windows10-thumb.png[link="recoll-windows10.png"]
|
||||||
|
|
||||||
@ -35,7 +36,7 @@ files which would take space for nothing otherwise.
|
|||||||
== Installation
|
== Installation
|
||||||
|
|
||||||
- Download the
|
- Download the
|
||||||
http://www.recoll.org/windows/recoll-setup-{recollversion}.exe[Recoll
|
http://www.recoll.org/{windir}/recoll-setup-{recollversion}.exe[Recoll
|
||||||
setup file].
|
setup file].
|
||||||
|
|
||||||
- Execute the setup file. This is a vanilla installer generated by Inno
|
- Execute the setup file. This is a vanilla installer generated by Inno
|
||||||
@ -50,14 +51,14 @@ files which would take space for nothing otherwise.
|
|||||||
http://www.7-zip.org/. This is only useful if you need to index files
|
http://www.7-zip.org/. This is only useful if you need to index files
|
||||||
compressed with Unix methods (not needed for zip files).
|
compressed with Unix methods (not needed for zip files).
|
||||||
|
|
||||||
NOTE: The installer needs administrator rights in order to install to
|
//NOTE: The installer needs administrator rights in order to install to
|
||||||
`C:\Program Files`. If you want to install on a machine where you have no
|
//`C:\Program Files`. If you want to install on a machine where you have no
|
||||||
administrator rights, you can use the
|
//administrator rights, you can use the
|
||||||
http://www.recoll.org/windows/recoll-{recollversion}.7z[installation
|
//http://www.recoll.org/{windir}/recoll-{recollversion}.7z[installation
|
||||||
directory archive] instead and extract it anywhere, this works just the
|
//directory archive] instead and extract it anywhere, this works just the
|
||||||
same (you will need the free http://www.7-zip.org/[7z] to extract it). If
|
//same (you will need the free http://www.7-zip.org/[7z] to extract it). If
|
||||||
you are in this case, you can ignore the setup-related steps of the
|
//you are in this case, you can ignore the setup-related steps of the
|
||||||
procedure of course.
|
//procedure of course.
|
||||||
|
|
||||||
== Configuration
|
== Configuration
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user