Apply stemming to terms containing a single dash (e.g. thread-safe, thread-safeness)
This commit is contained in:
parent
5087447ef6
commit
b1b0a41973
@ -1,4 +1,4 @@
|
|||||||
/* Copyright (C) 2004-2018 J.F.Dockes
|
/* Copyright (C) 2004-2022 J.F.Dockes
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
* the Free Software Foundation; either version 2 of the License, or
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
@ -869,6 +869,7 @@ bool Db::Native::purgeFileWrite(bool orphansOnly, const string& udi,
|
|||||||
/* Rcl::Db methods ///////////////////////////////// */
|
/* Rcl::Db methods ///////////////////////////////// */
|
||||||
|
|
||||||
bool Db::o_inPlaceReset;
|
bool Db::o_inPlaceReset;
|
||||||
|
bool Db::o_nospell_chars[256];
|
||||||
|
|
||||||
Db::Db(const RclConfig *cfp)
|
Db::Db(const RclConfig *cfp)
|
||||||
{
|
{
|
||||||
@ -885,6 +886,10 @@ Db::Db(const RclConfig *cfp)
|
|||||||
start_of_field_term = "XXST/";
|
start_of_field_term = "XXST/";
|
||||||
end_of_field_term = "XXND/";
|
end_of_field_term = "XXND/";
|
||||||
}
|
}
|
||||||
|
memset(o_nospell_chars, 0, sizeof(o_nospell_chars));
|
||||||
|
for (unsigned char c : " !\"#$%&()*+,-./0123456789:;<=>?@[\\]^_`{|}~") {
|
||||||
|
o_nospell_chars[(unsigned int)c] = 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
m_ndb = new Native(this);
|
m_ndb = new Native(this);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
/* Copyright (C) 2004 J.F.Dockes
|
/* Copyright (C) 2004-2022 J.F.Dockes
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
* the Free Software Foundation; either version 2 of the License, or
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
@ -259,22 +259,26 @@ public:
|
|||||||
return false;
|
return false;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
if (term.find_first_of(" !\"#$%&()*+,-./0123456789:;<=>?@[\\]^_`{|}~")
|
|
||||||
!= string::npos)
|
// Most punctuation chars inhibate stemming. We accept one dash. See o_nospell_chars init in
|
||||||
return false;
|
// the rcldb constructor.
|
||||||
|
int ccnt = 0;
|
||||||
|
for (unsigned char c : term) {
|
||||||
|
if (o_nospell_chars[(unsigned int)c] && (c != '-' || ++ccnt > 1))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Return spelling suggestion */
|
/** Return spelling suggestion */
|
||||||
bool getSpellingSuggestions(const string& word,
|
bool getSpellingSuggestions(const string& word, std::vector<std::string>& suggs);
|
||||||
std::vector<std::string>& suggs);
|
|
||||||
|
|
||||||
/* The next two, only for searchdata, should be somehow hidden */
|
/* The next two, only for searchdata, should be somehow hidden */
|
||||||
/* Return configured stop words */
|
/* Return configured stop words */
|
||||||
const StopList& getStopList() const {return m_stops;}
|
const StopList& getStopList() const {return m_stops;}
|
||||||
/* Field name to prefix translation (ie: author -> 'A') */
|
/* Field name to prefix translation (ie: author -> 'A') */
|
||||||
bool fieldToTraits(const string& fldname, const FieldTraits **ftpp,
|
bool fieldToTraits(const string& fldname, const FieldTraits **ftpp, bool isquery = false);
|
||||||
bool isquery = false);
|
|
||||||
|
|
||||||
/* Update-related methods ******************************************/
|
/* Update-related methods ******************************************/
|
||||||
|
|
||||||
@ -422,9 +426,8 @@ public:
|
|||||||
int matchTypeTp(int tp) {
|
int matchTypeTp(int tp) {
|
||||||
return tp & 7;
|
return tp & 7;
|
||||||
}
|
}
|
||||||
bool termMatch(int typ_sens, const string &lang, const string &term,
|
bool termMatch(int typ_sens, const string &lang, const string &term, TermMatchResult& result,
|
||||||
TermMatchResult& result, int max = -1,
|
int max = -1, const string& field = "", vector<string> *multiwords = 0);
|
||||||
const string& field = "", vector<string> *multiwords = 0);
|
|
||||||
bool dbStats(DbStats& stats, bool listFailed);
|
bool dbStats(DbStats& stats, bool listFailed);
|
||||||
/** Return min and max years for doc mod times in db */
|
/** Return min and max years for doc mod times in db */
|
||||||
bool maxYearSpan(int *minyear, int *maxyear);
|
bool maxYearSpan(int *minyear, int *maxyear);
|
||||||
@ -521,8 +524,7 @@ public:
|
|||||||
/** Test term existence */
|
/** Test term existence */
|
||||||
bool termExists(const string& term);
|
bool termExists(const string& term);
|
||||||
/** Test if terms stem to different roots. */
|
/** Test if terms stem to different roots. */
|
||||||
bool stemDiffers(const string& lang, const string& term,
|
bool stemDiffers(const string& lang, const string& term, const string& base);
|
||||||
const string& base);
|
|
||||||
|
|
||||||
const RclConfig *getConf() {return m_config;}
|
const RclConfig *getConf() {return m_config;}
|
||||||
|
|
||||||
@ -620,6 +622,7 @@ private:
|
|||||||
// beginning, with the advantage that, for small index formats updates,
|
// beginning, with the advantage that, for small index formats updates,
|
||||||
// between releases the index remains available while being recreated.
|
// between releases the index remains available while being recreated.
|
||||||
static bool o_inPlaceReset;
|
static bool o_inPlaceReset;
|
||||||
|
static bool o_nospell_chars[256];
|
||||||
/******* End logical constnesss */
|
/******* End logical constnesss */
|
||||||
|
|
||||||
#ifdef IDX_THREADS
|
#ifdef IDX_THREADS
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user