From 53f57e75c6ebd4065643f951537eb8816cd2d0c3 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Sat, 21 Nov 2015 12:48:09 +0100 Subject: [PATCH] rcldb syngroups: allow setting synonyms source after construction --- src/rcldb/rcldb.cpp | 8 +++++--- src/rcldb/rcldb.h | 14 ++++++++++++-- src/rcldb/rclterms.cpp | 8 +------- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index 87da3870..c9c412c9 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -764,9 +764,6 @@ bool Db::open(OpenMode mode, OpenError *error) } if (!m_config->getStopfile().empty()) m_stops.setFile(m_config->getStopfile()); - // Synonyms are only used at query time for now - if (mode == DbRO) - m_syngroups.setfile(m_config->getSynGroupsFile()); string dir = m_config->getDbDir(); string ermsg; @@ -1241,6 +1238,11 @@ void Db::setAbstractParams(int idxtrunc, int syntlen, int syntctxlen) m_synthAbsWordCtxLen = syntctxlen; } +bool Db::setSynGroupsFile(const string& fn) +{ + return m_syngroups.setfile(fn); +} + static const string cstr_nc("\n\r\x0c\\"); #define RECORD_APPEND(R, NM, VAL) {R += NM + "=" + VAL + "\n";} diff --git a/src/rcldb/rcldb.h b/src/rcldb/rcldb.h index 2d679648..38f34e39 100644 --- a/src/rcldb/rcldb.h +++ b/src/rcldb/rcldb.h @@ -456,6 +456,9 @@ class Db { } bool doFlush(); + // Use empty fn for no synonyms + bool setSynGroupsFile(const std::string& fn); + /* This has to be public for access by embedded Query::Native */ Native *m_ndb; private: @@ -477,13 +480,20 @@ private: // First fs occup check ? int m_occFirstCheck; + // Synonym groups. There is no strict reason that this has to be + // an Rcl::Db member, as it is only used when building each It + // could be a SearchData member, or even a parameter to + // Query::setQuery(). Otoh, building the syngroups structure from + // a file may be expensive and it's unlikely to change with every + // query, so it makes sense to cache it, and Rcl::Db is not a bad + // place for this. + SynGroups m_syngroups; + /*************** * Parameters cached out of the configuration files. Logically const * after init */ // Stop terms: those don't get indexed. StopList m_stops; - // Synonym groups - SynGroups m_syngroups; // Truncation length for stored meta fields int m_idxMetaStoredLen; diff --git a/src/rcldb/rclterms.cpp b/src/rcldb/rclterms.cpp index bd2eebba..5631fde2 100644 --- a/src/rcldb/rclterms.cpp +++ b/src/rcldb/rclterms.cpp @@ -270,16 +270,10 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term, exp1.swap(lexp); sort(lexp.begin(), lexp.end()); lexp.erase(unique(lexp.begin(), lexp.end()), lexp.end()); - LOGDEB(("ExpTerm: stemexp: %s\n", + LOGDEB(("ExpTerm: stemexp: %s\n", stringsToString(lexp).c_str())); } - // Expand the result for synonyms. Note that doing it here - // means that multi-term synonyms will not work - // (e.g. stakhanovist -> "hard at work". We would have to - // separate the multi-word expansions for our caller to - // add them as phrases to the query. Not impossible, but - // let's keep it at single words for now. if (m_syngroups.ok() && (typ_sens & ET_SYNEXP)) { LOGDEB(("ExpTerm: got syngroups\n")); vector exp1(lexp);