only autophrase if query has several terms
This commit is contained in:
parent
b363de63f0
commit
554f75c99c
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: textsplit.cpp,v 1.26 2006-11-20 11:17:53 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: textsplit.cpp,v 1.27 2006-12-08 07:11:17 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -349,6 +349,25 @@ bool TextSplit::text_to_words(const string &in)
|
||||
return true;
|
||||
}
|
||||
|
||||
// Callback class for utility function usage
|
||||
class utSplitterCB : public TextSplitCB {
|
||||
public:
|
||||
int wcnt;
|
||||
utSplitterCB() : wcnt(0) {}
|
||||
bool takeword(const string &term, int pos, int bs, int be) {
|
||||
wcnt++;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
int TextSplit::countWords(const string& s, TextSplit::Flags flgs)
|
||||
{
|
||||
utSplitterCB cb;
|
||||
TextSplit splitter(&cb, flgs);
|
||||
splitter.text_to_words(s);
|
||||
return cb.wcnt;
|
||||
}
|
||||
|
||||
#else // TEST driver ->
|
||||
|
||||
#include <unistd.h>
|
||||
@ -371,7 +390,7 @@ class mySplitterCB : public TextSplitCB {
|
||||
public:
|
||||
mySplitterCB() : first(1), nooutput(false) {}
|
||||
void setNoOut(bool val) {nooutput = val;}
|
||||
bool takeword(const std::string &term, int pos, int bs, int be) {
|
||||
bool takeword(const string &term, int pos, int bs, int be) {
|
||||
if (nooutput)
|
||||
return true;
|
||||
if (first) {
|
||||
@ -403,10 +422,11 @@ static string teststring1 = " nouvel-an ";
|
||||
static string thisprog;
|
||||
|
||||
static string usage =
|
||||
" textsplit [opts] [filename]\n"
|
||||
" -S: no output\n"
|
||||
" -s: only spans\n"
|
||||
" -w: only words\n"
|
||||
" textsplit [opts] [filename]\n"
|
||||
" -S: no output\n"
|
||||
" -s: only spans\n"
|
||||
" -w: only words\n"
|
||||
" -c: just count words\n"
|
||||
" if filename is 'stdin', will read stdin for data (end with ^D)\n"
|
||||
" \n\n"
|
||||
;
|
||||
@ -422,6 +442,7 @@ static int op_flags;
|
||||
#define OPT_s 0x1
|
||||
#define OPT_w 0x2
|
||||
#define OPT_S 0x4
|
||||
#define OPT_c 0x8
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
@ -435,6 +456,7 @@ int main(int argc, char **argv)
|
||||
Usage();
|
||||
while (**argv)
|
||||
switch (*(*argv)++) {
|
||||
case 'c': op_flags |= OPT_c; break;
|
||||
case 's': op_flags |= OPT_s; break;
|
||||
case 'S': op_flags |= OPT_S; break;
|
||||
case 'w': op_flags |= OPT_w; break;
|
||||
@ -455,9 +477,9 @@ int main(int argc, char **argv)
|
||||
flags = TextSplit::TXTS_ONLYSPANS;
|
||||
else if (op_flags&OPT_w)
|
||||
flags = TextSplit::TXTS_NOSPANS;
|
||||
TextSplit splitter(&cb, flags);
|
||||
|
||||
string data;
|
||||
if (argc == 1) {
|
||||
string data;
|
||||
const char *filename = *argv++; argc--;
|
||||
if (!strcmp(filename, "stdin")) {
|
||||
char buf[1024];
|
||||
@ -467,11 +489,16 @@ int main(int argc, char **argv)
|
||||
}
|
||||
} else if (!file_to_string(filename, data))
|
||||
exit(1);
|
||||
splitter.text_to_words(data);
|
||||
} else {
|
||||
cout << endl << teststring << endl << endl;
|
||||
splitter.text_to_words(teststring);
|
||||
data = teststring;
|
||||
}
|
||||
|
||||
if (op_flags & OPT_c) {
|
||||
int n = TextSplit::countWords(data, flags);
|
||||
cout << n << " words" << endl;
|
||||
} else {
|
||||
TextSplit splitter(&cb, flags);
|
||||
splitter.text_to_words(data);
|
||||
}
|
||||
}
|
||||
#endif // TEST
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
*/
|
||||
#ifndef _TEXTSPLIT_H_INCLUDED_
|
||||
#define _TEXTSPLIT_H_INCLUDED_
|
||||
/* @(#$Id: textsplit.h,v 1.14 2006-11-20 11:17:53 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: textsplit.h,v 1.15 2006-12-08 07:11:17 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include <string>
|
||||
#ifndef NO_NAMESPACES
|
||||
@ -27,9 +27,9 @@ using std::string;
|
||||
* Function class whose takeword method is called for every detected word while * splitting text.
|
||||
*/
|
||||
class TextSplitCB {
|
||||
public:
|
||||
public:
|
||||
virtual ~TextSplitCB() {}
|
||||
virtual bool takeword(const std::string& term,
|
||||
virtual bool takeword(const string& term,
|
||||
int pos, // term pos
|
||||
int bts, // byte offset of first char in term
|
||||
int bte // byte offset of first char after term
|
||||
@ -43,7 +43,7 @@ class TextSplitCB {
|
||||
* but 'ts much simpler this way...
|
||||
*/
|
||||
class TextSplit {
|
||||
public:
|
||||
public:
|
||||
enum Flags {TXTS_NONE = 0, TXTS_ONLYSPANS = 1, TXTS_NOSPANS = 2};
|
||||
/**
|
||||
* Constructor: just store callback object
|
||||
@ -53,9 +53,13 @@ class TextSplit {
|
||||
/**
|
||||
* Split text, emit words and positions.
|
||||
*/
|
||||
bool text_to_words(const std::string &in);
|
||||
bool text_to_words(const string &in);
|
||||
|
||||
private:
|
||||
// Utility functions : these does not need the user to setup a callback
|
||||
// etc.
|
||||
static int countWords(const string &in, Flags flgs = TXTS_ONLYSPANS);
|
||||
|
||||
private:
|
||||
Flags m_flags;
|
||||
TextSplitCB *cb;
|
||||
int maxWordLength;
|
||||
@ -72,8 +76,10 @@ class TextSplit {
|
||||
int prevpos;
|
||||
unsigned int prevlen;
|
||||
|
||||
bool emitterm(bool isspan, std::string &term, int pos, int bs, int be);
|
||||
bool emitterm(bool isspan, string &term, int pos, int bs, int be);
|
||||
bool doemit(bool spanerase, int bp);
|
||||
|
||||
};
|
||||
|
||||
|
||||
#endif /* _TEXTSPLIT_H_INCLUDED_ */
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: ssearch_w.cpp,v 1.14 2006-12-08 06:45:05 dockes Exp $ (C) 2006 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: ssearch_w.cpp,v 1.15 2006-12-08 07:11:17 dockes Exp $ (C) 2006 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -33,6 +33,7 @@ static char rcsid[] = "@(#$Id: ssearch_w.cpp,v 1.14 2006-12-08 06:45:05 dockes E
|
||||
#include "searchdata.h"
|
||||
#include "ssearch_w.h"
|
||||
#include "refcntr.h"
|
||||
#include "textsplit.h"
|
||||
|
||||
enum SSearchType {SST_ANY = 0, SST_ALL = 1, SST_FNM = 2};
|
||||
|
||||
@ -82,7 +83,8 @@ void SSearch::startSimpleSearch()
|
||||
SSearchType tp = (SSearchType)searchTypCMB->currentItem();
|
||||
|
||||
if (prefs.ssearchAutoPhrase && (tp == SST_ANY || tp == SST_ALL) &&
|
||||
u8.find_first_of("\"") == string::npos) {
|
||||
u8.find_first_of("\"") == string::npos &&
|
||||
TextSplit::countWords(u8) > 1) {
|
||||
sdata->addClause(new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE,
|
||||
u8, 0));
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user