only autophrase if query has several terms
This commit is contained in:
parent
b363de63f0
commit
554f75c99c
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: textsplit.cpp,v 1.26 2006-11-20 11:17:53 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: textsplit.cpp,v 1.27 2006-12-08 07:11:17 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -349,6 +349,25 @@ bool TextSplit::text_to_words(const string &in)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Callback class for utility function usage
|
||||||
|
class utSplitterCB : public TextSplitCB {
|
||||||
|
public:
|
||||||
|
int wcnt;
|
||||||
|
utSplitterCB() : wcnt(0) {}
|
||||||
|
bool takeword(const string &term, int pos, int bs, int be) {
|
||||||
|
wcnt++;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
int TextSplit::countWords(const string& s, TextSplit::Flags flgs)
|
||||||
|
{
|
||||||
|
utSplitterCB cb;
|
||||||
|
TextSplit splitter(&cb, flgs);
|
||||||
|
splitter.text_to_words(s);
|
||||||
|
return cb.wcnt;
|
||||||
|
}
|
||||||
|
|
||||||
#else // TEST driver ->
|
#else // TEST driver ->
|
||||||
|
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
@ -371,7 +390,7 @@ class mySplitterCB : public TextSplitCB {
|
|||||||
public:
|
public:
|
||||||
mySplitterCB() : first(1), nooutput(false) {}
|
mySplitterCB() : first(1), nooutput(false) {}
|
||||||
void setNoOut(bool val) {nooutput = val;}
|
void setNoOut(bool val) {nooutput = val;}
|
||||||
bool takeword(const std::string &term, int pos, int bs, int be) {
|
bool takeword(const string &term, int pos, int bs, int be) {
|
||||||
if (nooutput)
|
if (nooutput)
|
||||||
return true;
|
return true;
|
||||||
if (first) {
|
if (first) {
|
||||||
@ -403,10 +422,11 @@ static string teststring1 = " nouvel-an ";
|
|||||||
static string thisprog;
|
static string thisprog;
|
||||||
|
|
||||||
static string usage =
|
static string usage =
|
||||||
" textsplit [opts] [filename]\n"
|
" textsplit [opts] [filename]\n"
|
||||||
" -S: no output\n"
|
" -S: no output\n"
|
||||||
" -s: only spans\n"
|
" -s: only spans\n"
|
||||||
" -w: only words\n"
|
" -w: only words\n"
|
||||||
|
" -c: just count words\n"
|
||||||
" if filename is 'stdin', will read stdin for data (end with ^D)\n"
|
" if filename is 'stdin', will read stdin for data (end with ^D)\n"
|
||||||
" \n\n"
|
" \n\n"
|
||||||
;
|
;
|
||||||
@ -422,6 +442,7 @@ static int op_flags;
|
|||||||
#define OPT_s 0x1
|
#define OPT_s 0x1
|
||||||
#define OPT_w 0x2
|
#define OPT_w 0x2
|
||||||
#define OPT_S 0x4
|
#define OPT_S 0x4
|
||||||
|
#define OPT_c 0x8
|
||||||
|
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
@ -435,6 +456,7 @@ int main(int argc, char **argv)
|
|||||||
Usage();
|
Usage();
|
||||||
while (**argv)
|
while (**argv)
|
||||||
switch (*(*argv)++) {
|
switch (*(*argv)++) {
|
||||||
|
case 'c': op_flags |= OPT_c; break;
|
||||||
case 's': op_flags |= OPT_s; break;
|
case 's': op_flags |= OPT_s; break;
|
||||||
case 'S': op_flags |= OPT_S; break;
|
case 'S': op_flags |= OPT_S; break;
|
||||||
case 'w': op_flags |= OPT_w; break;
|
case 'w': op_flags |= OPT_w; break;
|
||||||
@ -455,9 +477,9 @@ int main(int argc, char **argv)
|
|||||||
flags = TextSplit::TXTS_ONLYSPANS;
|
flags = TextSplit::TXTS_ONLYSPANS;
|
||||||
else if (op_flags&OPT_w)
|
else if (op_flags&OPT_w)
|
||||||
flags = TextSplit::TXTS_NOSPANS;
|
flags = TextSplit::TXTS_NOSPANS;
|
||||||
TextSplit splitter(&cb, flags);
|
|
||||||
|
string data;
|
||||||
if (argc == 1) {
|
if (argc == 1) {
|
||||||
string data;
|
|
||||||
const char *filename = *argv++; argc--;
|
const char *filename = *argv++; argc--;
|
||||||
if (!strcmp(filename, "stdin")) {
|
if (!strcmp(filename, "stdin")) {
|
||||||
char buf[1024];
|
char buf[1024];
|
||||||
@ -467,11 +489,16 @@ int main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
} else if (!file_to_string(filename, data))
|
} else if (!file_to_string(filename, data))
|
||||||
exit(1);
|
exit(1);
|
||||||
splitter.text_to_words(data);
|
|
||||||
} else {
|
} else {
|
||||||
cout << endl << teststring << endl << endl;
|
cout << endl << teststring << endl << endl;
|
||||||
splitter.text_to_words(teststring);
|
data = teststring;
|
||||||
|
}
|
||||||
|
if (op_flags & OPT_c) {
|
||||||
|
int n = TextSplit::countWords(data, flags);
|
||||||
|
cout << n << " words" << endl;
|
||||||
|
} else {
|
||||||
|
TextSplit splitter(&cb, flags);
|
||||||
|
splitter.text_to_words(data);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
#endif // TEST
|
#endif // TEST
|
||||||
|
|||||||
@ -16,7 +16,7 @@
|
|||||||
*/
|
*/
|
||||||
#ifndef _TEXTSPLIT_H_INCLUDED_
|
#ifndef _TEXTSPLIT_H_INCLUDED_
|
||||||
#define _TEXTSPLIT_H_INCLUDED_
|
#define _TEXTSPLIT_H_INCLUDED_
|
||||||
/* @(#$Id: textsplit.h,v 1.14 2006-11-20 11:17:53 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: textsplit.h,v 1.15 2006-12-08 07:11:17 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#ifndef NO_NAMESPACES
|
#ifndef NO_NAMESPACES
|
||||||
@ -27,9 +27,9 @@ using std::string;
|
|||||||
* Function class whose takeword method is called for every detected word while * splitting text.
|
* Function class whose takeword method is called for every detected word while * splitting text.
|
||||||
*/
|
*/
|
||||||
class TextSplitCB {
|
class TextSplitCB {
|
||||||
public:
|
public:
|
||||||
virtual ~TextSplitCB() {}
|
virtual ~TextSplitCB() {}
|
||||||
virtual bool takeword(const std::string& term,
|
virtual bool takeword(const string& term,
|
||||||
int pos, // term pos
|
int pos, // term pos
|
||||||
int bts, // byte offset of first char in term
|
int bts, // byte offset of first char in term
|
||||||
int bte // byte offset of first char after term
|
int bte // byte offset of first char after term
|
||||||
@ -43,7 +43,7 @@ class TextSplitCB {
|
|||||||
* but 'ts much simpler this way...
|
* but 'ts much simpler this way...
|
||||||
*/
|
*/
|
||||||
class TextSplit {
|
class TextSplit {
|
||||||
public:
|
public:
|
||||||
enum Flags {TXTS_NONE = 0, TXTS_ONLYSPANS = 1, TXTS_NOSPANS = 2};
|
enum Flags {TXTS_NONE = 0, TXTS_ONLYSPANS = 1, TXTS_NOSPANS = 2};
|
||||||
/**
|
/**
|
||||||
* Constructor: just store callback object
|
* Constructor: just store callback object
|
||||||
@ -53,9 +53,13 @@ class TextSplit {
|
|||||||
/**
|
/**
|
||||||
* Split text, emit words and positions.
|
* Split text, emit words and positions.
|
||||||
*/
|
*/
|
||||||
bool text_to_words(const std::string &in);
|
bool text_to_words(const string &in);
|
||||||
|
|
||||||
private:
|
// Utility functions : these does not need the user to setup a callback
|
||||||
|
// etc.
|
||||||
|
static int countWords(const string &in, Flags flgs = TXTS_ONLYSPANS);
|
||||||
|
|
||||||
|
private:
|
||||||
Flags m_flags;
|
Flags m_flags;
|
||||||
TextSplitCB *cb;
|
TextSplitCB *cb;
|
||||||
int maxWordLength;
|
int maxWordLength;
|
||||||
@ -72,8 +76,10 @@ class TextSplit {
|
|||||||
int prevpos;
|
int prevpos;
|
||||||
unsigned int prevlen;
|
unsigned int prevlen;
|
||||||
|
|
||||||
bool emitterm(bool isspan, std::string &term, int pos, int bs, int be);
|
bool emitterm(bool isspan, string &term, int pos, int bs, int be);
|
||||||
bool doemit(bool spanerase, int bp);
|
bool doemit(bool spanerase, int bp);
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
#endif /* _TEXTSPLIT_H_INCLUDED_ */
|
#endif /* _TEXTSPLIT_H_INCLUDED_ */
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: ssearch_w.cpp,v 1.14 2006-12-08 06:45:05 dockes Exp $ (C) 2006 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: ssearch_w.cpp,v 1.15 2006-12-08 07:11:17 dockes Exp $ (C) 2006 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -33,6 +33,7 @@ static char rcsid[] = "@(#$Id: ssearch_w.cpp,v 1.14 2006-12-08 06:45:05 dockes E
|
|||||||
#include "searchdata.h"
|
#include "searchdata.h"
|
||||||
#include "ssearch_w.h"
|
#include "ssearch_w.h"
|
||||||
#include "refcntr.h"
|
#include "refcntr.h"
|
||||||
|
#include "textsplit.h"
|
||||||
|
|
||||||
enum SSearchType {SST_ANY = 0, SST_ALL = 1, SST_FNM = 2};
|
enum SSearchType {SST_ANY = 0, SST_ALL = 1, SST_FNM = 2};
|
||||||
|
|
||||||
@ -82,7 +83,8 @@ void SSearch::startSimpleSearch()
|
|||||||
SSearchType tp = (SSearchType)searchTypCMB->currentItem();
|
SSearchType tp = (SSearchType)searchTypCMB->currentItem();
|
||||||
|
|
||||||
if (prefs.ssearchAutoPhrase && (tp == SST_ANY || tp == SST_ALL) &&
|
if (prefs.ssearchAutoPhrase && (tp == SST_ANY || tp == SST_ALL) &&
|
||||||
u8.find_first_of("\"") == string::npos) {
|
u8.find_first_of("\"") == string::npos &&
|
||||||
|
TextSplit::countWords(u8) > 1) {
|
||||||
sdata->addClause(new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE,
|
sdata->addClause(new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE,
|
||||||
u8, 0));
|
u8, 0));
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user