comments
This commit is contained in:
parent
dff7753f7f
commit
c2ec5234df
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: stemdb.cpp,v 1.3 2006-09-19 14:30:39 dockes Exp $ (C) 2005 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: stemdb.cpp,v 1.4 2006-09-20 06:21:43 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -67,8 +67,6 @@ class DirWiper {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Deciding if we try to stem the term. If it has numerals or capitals
|
|
||||||
// we don't
|
|
||||||
inline static bool
|
inline static bool
|
||||||
p_notlowerascii(unsigned int c)
|
p_notlowerascii(unsigned int c)
|
||||||
{
|
{
|
||||||
@ -105,7 +103,11 @@ bool createDb(Xapian::Database& xdb, const string& dbdir, const string& lang)
|
|||||||
Xapian::TermIterator it;
|
Xapian::TermIterator it;
|
||||||
for (it = xdb.allterms_begin();
|
for (it = xdb.allterms_begin();
|
||||||
it != xdb.allterms_end(); it++) {
|
it != xdb.allterms_end(); it++) {
|
||||||
// If it has any non-lowercase 7bit char, cant be stemmable
|
// Deciding if we try to stem the term. If it has any
|
||||||
|
// non-lowercase 7bit char, dont. Note that
|
||||||
|
// as we are dealing with unaccented data, we are still
|
||||||
|
// processing most of western european languages (where
|
||||||
|
// most unaccented letters are ascii)
|
||||||
string::iterator sit = (*it).begin(), eit = sit + (*it).length();
|
string::iterator sit = (*it).begin(), eit = sit + (*it).length();
|
||||||
if ((sit = find_if(sit, eit, p_notlowerascii)) != eit) {
|
if ((sit = find_if(sit, eit, p_notlowerascii)) != eit) {
|
||||||
++nostem;
|
++nostem;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user