From 68b25b750cdb5ce1d50a03bb956661e1c5760895 Mon Sep 17 00:00:00 2001 From: dockes Date: Fri, 2 Feb 2007 10:10:53 +0000 Subject: [PATCH] sort and uniquify termMatch results out of stem expansion --- src/rcldb/rcldb.cpp | 6 ++++-- src/rcldb/rcldb.h | 4 +++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index a07a8792..7e40d726 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.104 2007-01-25 15:47:23 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.105 2007-02-02 10:10:53 dockes Exp $ (C) 2004 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -1410,10 +1410,12 @@ bool Db::termMatch(MatchType typ, const string &lang, if (typ == ET_STEM) { if (!stemExpand(lang, root, res, max)) return false; + res.sort(); + res.unique(); for (list::iterator it = res.begin(); it != res.end(); it++) { it->wcf = db.get_collection_freq(it->term); - LOGDEB(("termMatch: %d [%s]\n", it->wcf, it->term.c_str())); + LOGDEB1(("termMatch: %d [%s]\n", it->wcf, it->term.c_str())); } } else { regex_t reg; diff --git a/src/rcldb/rcldb.h b/src/rcldb/rcldb.h index 8b1562e5..4e45fa9a 100644 --- a/src/rcldb/rcldb.h +++ b/src/rcldb/rcldb.h @@ -16,7 +16,7 @@ */ #ifndef _DB_H_INCLUDED_ #define _DB_H_INCLUDED_ -/* @(#$Id: rcldb.h,v 1.45 2006-12-19 12:11:21 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: rcldb.h,v 1.46 2007-02-02 10:10:53 dockes Exp $ (C) 2004 J.F.Dockes */ #include #include @@ -59,6 +59,8 @@ public: TermMatchEntry() : wcf(0) {} TermMatchEntry(const string&t, int f) : term(t), wcf(f) {} TermMatchEntry(const string&t) : term(t), wcf(0) {} + bool operator==(const TermMatchEntry &o) { return term == o.term;} + bool operator<(const TermMatchEntry &o) { return term < o.term;} string term; int wcf; // Within collection frequency };