indents and auto

This commit is contained in:
Jean-Francois Dockes 2022-09-23 17:43:28 +02:00
parent 20c3a7ed12
commit 5087447ef6

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2005 J.F.Dockes /* Copyright (C) 2005-2022 J.F.Dockes
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or * the Free Software Foundation; either version 2 of the License, or
@ -16,8 +16,7 @@
*/ */
/** /**
* Management of the auxiliary databases listing stems and their expansion * Management of the auxiliary databases listing stems and their expansion terms
* terms
*/ */
#include "autoconfig.h" #include "autoconfig.h"
@ -44,48 +43,42 @@ namespace Rcl {
/** /**
* Expand for one or several languages * Expand for one or several languages
*/ */
bool StemDb::stemExpand(const std::string& langs, const std::string& _term, bool StemDb::stemExpand(const std::string& langs, const std::string& _term, vector<string>& result)
vector<string>& result)
{ {
vector<string> llangs; vector<string> llangs;
stringToStrings(langs, llangs); stringToStrings(langs, llangs);
// The stemdb keys may have kept their diacritics or not but they // The stemdb keys may have kept their diacritics or not but they are always lower-case. It
// are always lower-case. It would be more logical for the term // would be more logical for the term transformers to perform before doing the stemming, but
// transformers to perform before doing the stemming, but this // this would be inefficient when there are several stemming languages
// would be inefficient when there are several stemming languages
string term; string term;
unacmaybefold(_term, term, "UTF-8", UNACOP_FOLD); unacmaybefold(_term, term, "UTF-8", UNACOP_FOLD);
for (vector<string>::const_iterator it = llangs.begin(); for (const auto& lang : llangs) {
it != llangs.end(); it++) { SynTermTransStem stemmer(lang);
SynTermTransStem stemmer(*it); XapComputableSynFamMember expander(getdb(), synFamStem, lang, &stemmer);
XapComputableSynFamMember expander(getdb(), synFamStem, *it, &stemmer); (void)expander.synExpand(term, result);
(void)expander.synExpand(term, result);
} }
if (!o_index_stripchars) { if (!o_index_stripchars) {
string unac; string unac;
unacmaybefold(term, unac, "UTF-8", UNACOP_UNAC); unacmaybefold(term, unac, "UTF-8", UNACOP_UNAC);
// Expand the unaccented stem, using the unaccented stem // Expand the unaccented stem, using the unaccented stem db. Because it's a different db, We
// db. Because it's a different db, We need to do it even if // need to do it even if the input has no accent (unac == term)
// the input has no accent (unac == term) for (const auto& lang : llangs) {
for (vector<string>::const_iterator it = llangs.begin(); SynTermTransStem stemmer(lang);
it != llangs.end(); it++) { XapComputableSynFamMember expander(getdb(), synFamStemUnac, lang, &stemmer);
SynTermTransStem stemmer(*it); (void)expander.synExpand(unac, result);
XapComputableSynFamMember expander(getdb(), synFamStemUnac, }
*it, &stemmer);
(void)expander.synExpand(unac, result);
}
} }
if (result.empty()) if (result.empty())
result.push_back(term); result.push_back(term);
sort(result.begin(), result.end()); sort(result.begin(), result.end());
vector<string>::iterator uit = unique(result.begin(), result.end()); auto uit = unique(result.begin(), result.end());
result.resize(uit - result.begin()); result.resize(uit - result.begin());
LOGDEB1("stemExpand:" << (langs) << ": " << (term) << " -> " << (stringsToString(result)) << "\n" ); LOGDEB1("stemExpand:" << langs << ": " << term << " -> " << stringsToString(result) << "\n");
return true; return true;
} }