From 554f75c99cc757b6d47df62ddeccd4d6e09b3759 Mon Sep 17 00:00:00 2001 From: dockes Date: Fri, 8 Dec 2006 07:11:17 +0000 Subject: [PATCH] only autophrase if query has several terms --- src/common/textsplit.cpp | 49 +++++++++++++++++++++++++++++++--------- src/common/textsplit.h | 20 ++++++++++------ src/qtgui/ssearch_w.cpp | 6 +++-- 3 files changed, 55 insertions(+), 20 deletions(-) diff --git a/src/common/textsplit.cpp b/src/common/textsplit.cpp index 488f7af0..24615dc1 100644 --- a/src/common/textsplit.cpp +++ b/src/common/textsplit.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: textsplit.cpp,v 1.26 2006-11-20 11:17:53 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: textsplit.cpp,v 1.27 2006-12-08 07:11:17 dockes Exp $ (C) 2004 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -349,6 +349,25 @@ bool TextSplit::text_to_words(const string &in) return true; } +// Callback class for utility function usage +class utSplitterCB : public TextSplitCB { + public: + int wcnt; + utSplitterCB() : wcnt(0) {} + bool takeword(const string &term, int pos, int bs, int be) { + wcnt++; + return true; + } +}; + +int TextSplit::countWords(const string& s, TextSplit::Flags flgs) +{ + utSplitterCB cb; + TextSplit splitter(&cb, flgs); + splitter.text_to_words(s); + return cb.wcnt; +} + #else // TEST driver -> #include @@ -371,7 +390,7 @@ class mySplitterCB : public TextSplitCB { public: mySplitterCB() : first(1), nooutput(false) {} void setNoOut(bool val) {nooutput = val;} - bool takeword(const std::string &term, int pos, int bs, int be) { + bool takeword(const string &term, int pos, int bs, int be) { if (nooutput) return true; if (first) { @@ -403,10 +422,11 @@ static string teststring1 = " nouvel-an "; static string thisprog; static string usage = - " textsplit [opts] [filename]\n" - " -S: no output\n" - " -s: only spans\n" - " -w: only words\n" + " textsplit [opts] [filename]\n" + " -S: no output\n" + " -s: only spans\n" + " -w: only words\n" + " -c: just count words\n" " if filename is 'stdin', will read stdin for data (end with ^D)\n" " \n\n" ; @@ -422,6 +442,7 @@ static int op_flags; #define OPT_s 0x1 #define OPT_w 0x2 #define OPT_S 0x4 +#define OPT_c 0x8 int main(int argc, char **argv) { @@ -435,6 +456,7 @@ int main(int argc, char **argv) Usage(); while (**argv) switch (*(*argv)++) { + case 'c': op_flags |= OPT_c; break; case 's': op_flags |= OPT_s; break; case 'S': op_flags |= OPT_S; break; case 'w': op_flags |= OPT_w; break; @@ -455,9 +477,9 @@ int main(int argc, char **argv) flags = TextSplit::TXTS_ONLYSPANS; else if (op_flags&OPT_w) flags = TextSplit::TXTS_NOSPANS; - TextSplit splitter(&cb, flags); + + string data; if (argc == 1) { - string data; const char *filename = *argv++; argc--; if (!strcmp(filename, "stdin")) { char buf[1024]; @@ -467,11 +489,16 @@ int main(int argc, char **argv) } } else if (!file_to_string(filename, data)) exit(1); - splitter.text_to_words(data); } else { cout << endl << teststring << endl << endl; - splitter.text_to_words(teststring); + data = teststring; } - + if (op_flags & OPT_c) { + int n = TextSplit::countWords(data, flags); + cout << n << " words" << endl; + } else { + TextSplit splitter(&cb, flags); + splitter.text_to_words(data); + } } #endif // TEST diff --git a/src/common/textsplit.h b/src/common/textsplit.h index 2d157764..50bcabaf 100644 --- a/src/common/textsplit.h +++ b/src/common/textsplit.h @@ -16,7 +16,7 @@ */ #ifndef _TEXTSPLIT_H_INCLUDED_ #define _TEXTSPLIT_H_INCLUDED_ -/* @(#$Id: textsplit.h,v 1.14 2006-11-20 11:17:53 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: textsplit.h,v 1.15 2006-12-08 07:11:17 dockes Exp $ (C) 2004 J.F.Dockes */ #include #ifndef NO_NAMESPACES @@ -27,9 +27,9 @@ using std::string; * Function class whose takeword method is called for every detected word while * splitting text. */ class TextSplitCB { - public: +public: virtual ~TextSplitCB() {} - virtual bool takeword(const std::string& term, + virtual bool takeword(const string& term, int pos, // term pos int bts, // byte offset of first char in term int bte // byte offset of first char after term @@ -43,7 +43,7 @@ class TextSplitCB { * but 'ts much simpler this way... */ class TextSplit { - public: +public: enum Flags {TXTS_NONE = 0, TXTS_ONLYSPANS = 1, TXTS_NOSPANS = 2}; /** * Constructor: just store callback object @@ -53,9 +53,13 @@ class TextSplit { /** * Split text, emit words and positions. */ - bool text_to_words(const std::string &in); + bool text_to_words(const string &in); - private: + // Utility functions : these does not need the user to setup a callback + // etc. + static int countWords(const string &in, Flags flgs = TXTS_ONLYSPANS); + +private: Flags m_flags; TextSplitCB *cb; int maxWordLength; @@ -72,8 +76,10 @@ class TextSplit { int prevpos; unsigned int prevlen; - bool emitterm(bool isspan, std::string &term, int pos, int bs, int be); + bool emitterm(bool isspan, string &term, int pos, int bs, int be); bool doemit(bool spanerase, int bp); + }; + #endif /* _TEXTSPLIT_H_INCLUDED_ */ diff --git a/src/qtgui/ssearch_w.cpp b/src/qtgui/ssearch_w.cpp index bd82831d..390a138c 100644 --- a/src/qtgui/ssearch_w.cpp +++ b/src/qtgui/ssearch_w.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: ssearch_w.cpp,v 1.14 2006-12-08 06:45:05 dockes Exp $ (C) 2006 J.F.Dockes"; +static char rcsid[] = "@(#$Id: ssearch_w.cpp,v 1.15 2006-12-08 07:11:17 dockes Exp $ (C) 2006 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -33,6 +33,7 @@ static char rcsid[] = "@(#$Id: ssearch_w.cpp,v 1.14 2006-12-08 06:45:05 dockes E #include "searchdata.h" #include "ssearch_w.h" #include "refcntr.h" +#include "textsplit.h" enum SSearchType {SST_ANY = 0, SST_ALL = 1, SST_FNM = 2}; @@ -82,7 +83,8 @@ void SSearch::startSimpleSearch() SSearchType tp = (SSearchType)searchTypCMB->currentItem(); if (prefs.ssearchAutoPhrase && (tp == SST_ANY || tp == SST_ALL) && - u8.find_first_of("\"") == string::npos) { + u8.find_first_of("\"") == string::npos && + TextSplit::countWords(u8) > 1) { sdata->addClause(new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE, u8, 0)); }