most of adv search working. Still need subtree/filename filters
This commit is contained in:
parent
04a926456a
commit
ce740a26ad
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: textsplit.cpp,v 1.12 2005-09-22 14:09:04 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: textsplit.cpp,v 1.13 2005-10-19 10:21:48 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
#ifndef TEST_TEXTSPLIT
|
#ifndef TEST_TEXTSPLIT
|
||||||
|
|
||||||
@ -82,11 +82,6 @@ bool TextSplit::emitterm(bool isspan, string &w, int pos,
|
|||||||
{
|
{
|
||||||
LOGDEB2(("TextSplit::emitterm: '%s' pos %d\n", w.c_str(), pos));
|
LOGDEB2(("TextSplit::emitterm: '%s' pos %d\n", w.c_str(), pos));
|
||||||
|
|
||||||
// It may happen that our cleanup would result in emitting the
|
|
||||||
// same term twice. We try to avoid this
|
|
||||||
static string prevterm;
|
|
||||||
static int prevpos = -1;
|
|
||||||
|
|
||||||
if (!cb)
|
if (!cb)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
#ifndef _TEXTSPLIT_H_INCLUDED_
|
#ifndef _TEXTSPLIT_H_INCLUDED_
|
||||||
#define _TEXTSPLIT_H_INCLUDED_
|
#define _TEXTSPLIT_H_INCLUDED_
|
||||||
/* @(#$Id: textsplit.h,v 1.7 2005-10-10 13:25:23 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: textsplit.h,v 1.8 2005-10-19 10:21:48 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#ifndef NO_NAMESPACES
|
#ifndef NO_NAMESPACES
|
||||||
@ -28,6 +28,10 @@ class TextSplitCB {
|
|||||||
*/
|
*/
|
||||||
class TextSplit {
|
class TextSplit {
|
||||||
bool fq; // for query: Are we splitting for query or index ?
|
bool fq; // for query: Are we splitting for query or index ?
|
||||||
|
// It may happen that our cleanup would result in emitting the
|
||||||
|
// same term twice. We try to avoid this
|
||||||
|
string prevterm;
|
||||||
|
int prevpos;
|
||||||
TextSplitCB *cb;
|
TextSplitCB *cb;
|
||||||
int maxWordLength;
|
int maxWordLength;
|
||||||
bool emitterm(bool isspan, std::string &term, int pos, int bs, int be);
|
bool emitterm(bool isspan, std::string &term, int pos, int bs, int be);
|
||||||
@ -38,7 +42,7 @@ class TextSplit {
|
|||||||
* Constructor: just store callback object
|
* Constructor: just store callback object
|
||||||
*/
|
*/
|
||||||
TextSplit(TextSplitCB *t, bool forquery = false)
|
TextSplit(TextSplitCB *t, bool forquery = false)
|
||||||
: fq(forquery), cb(t), maxWordLength(40) {}
|
: fq(forquery), prevpos(-1), cb(t), maxWordLength(40) {}
|
||||||
/**
|
/**
|
||||||
* Split text, emit words and positions.
|
* Split text, emit words and positions.
|
||||||
*/
|
*/
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
# @(#$Id: rcldoc,v 1.1 2005-02-09 12:07:30 dockes Exp $ (C) 2004 J.F.Dockes
|
# @(#$Id: rcldoc,v 1.2 2005-10-19 10:21:48 dockes Exp $ (C) 2004 J.F.Dockes
|
||||||
# Parts taken from Estraier:
|
# Parts taken from Estraier:
|
||||||
#================================================================
|
#================================================================
|
||||||
# Estraier: a personal full-text search system
|
# Estraier: a personal full-text search system
|
||||||
@ -35,6 +35,31 @@ fi
|
|||||||
|
|
||||||
infile="$1"
|
infile="$1"
|
||||||
|
|
||||||
|
iscmd()
|
||||||
|
{
|
||||||
|
cmd=$1
|
||||||
|
case $cmd in
|
||||||
|
*/*)
|
||||||
|
if test -x $cmd ; then return 0; else return 1; fi ;;
|
||||||
|
*)
|
||||||
|
IFS=: ; set -- $PATH; unset IFS
|
||||||
|
for d in $* ; do test -x $d/$cmd && return 0; done
|
||||||
|
return 1 ;;
|
||||||
|
esac
|
||||||
|
}
|
||||||
|
checkcmds()
|
||||||
|
{
|
||||||
|
for cmd in $*;do
|
||||||
|
if iscmd $cmd
|
||||||
|
then
|
||||||
|
else
|
||||||
|
echo $cmd not found 1>&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
}
|
||||||
|
checkcmds awk antiword iconv
|
||||||
|
|
||||||
# check the input file existence
|
# check the input file existence
|
||||||
if test ! -f "$infile"
|
if test ! -f "$infile"
|
||||||
then
|
then
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
# @(#$Id: rclpdf,v 1.1 2005-02-01 17:20:05 dockes Exp $ (C) 2004 J.F.Dockes
|
# @(#$Id: rclpdf,v 1.2 2005-10-19 10:21:48 dockes Exp $ (C) 2004 J.F.Dockes
|
||||||
# This is copied almost verbatim from Estraier:
|
# This is copied almost verbatim from Estraier:
|
||||||
#================================================================
|
#================================================================
|
||||||
# Estraier: a personal full-text search system
|
# Estraier: a personal full-text search system
|
||||||
@ -33,6 +33,31 @@ then
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
iscmd()
|
||||||
|
{
|
||||||
|
cmd=$1
|
||||||
|
case $cmd in
|
||||||
|
*/*)
|
||||||
|
if test -x $cmd ; then return 0; else return 1; fi ;;
|
||||||
|
*)
|
||||||
|
IFS=: ; set -- $PATH; unset IFS
|
||||||
|
for d in $* ; do test -x $d/$cmd && return 0; done
|
||||||
|
return 1 ;;
|
||||||
|
esac
|
||||||
|
}
|
||||||
|
checkcmds()
|
||||||
|
{
|
||||||
|
for cmd in $*;do
|
||||||
|
if iscmd $cmd
|
||||||
|
then
|
||||||
|
else
|
||||||
|
echo $cmd not found 1>&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
}
|
||||||
|
checkcmds pdftotext iconv awk
|
||||||
|
|
||||||
# output the result
|
# output the result
|
||||||
pdftotext -raw -htmlmeta -enc UTF-8 -eol unix -q "$infile" - |
|
pdftotext -raw -htmlmeta -enc UTF-8 -eol unix -q "$infile" - |
|
||||||
iconv -f UTF-8 -t UTF-8 -c -s |
|
iconv -f UTF-8 -t UTF-8 -c -s |
|
||||||
@ -80,5 +105,3 @@ BEGIN {
|
|||||||
# didn't really understand its use as iconv -c is supposed to fix the
|
# didn't really understand its use as iconv -c is supposed to fix the
|
||||||
# encoding anyway
|
# encoding anyway
|
||||||
|
|
||||||
# exit normally
|
|
||||||
exit 0
|
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
# @(#$Id: rclps,v 1.1 2005-02-02 17:57:08 dockes Exp $ (C) 2004 J.F.Dockes
|
# @(#$Id: rclps,v 1.2 2005-10-19 10:21:48 dockes Exp $ (C) 2004 J.F.Dockes
|
||||||
# Parts taken from Estraier:
|
# Parts taken from Estraier:
|
||||||
#================================================================
|
#================================================================
|
||||||
# Estraier: a personal full-text search system
|
# Estraier: a personal full-text search system
|
||||||
@ -34,6 +34,31 @@ fi
|
|||||||
|
|
||||||
infile="$1"
|
infile="$1"
|
||||||
|
|
||||||
|
iscmd()
|
||||||
|
{
|
||||||
|
cmd=$1
|
||||||
|
case $cmd in
|
||||||
|
*/*)
|
||||||
|
if test -x $cmd ; then return 0; else return 1; fi ;;
|
||||||
|
*)
|
||||||
|
IFS=: ; set -- $PATH; unset IFS
|
||||||
|
for d in $* ; do test -x $d/$cmd && return 0; done
|
||||||
|
return 1 ;;
|
||||||
|
esac
|
||||||
|
}
|
||||||
|
checkcmds()
|
||||||
|
{
|
||||||
|
for cmd in $*;do
|
||||||
|
if iscmd $cmd
|
||||||
|
then
|
||||||
|
else
|
||||||
|
echo $cmd not found 1>&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
}
|
||||||
|
checkcmds $decoder iconv awk
|
||||||
|
|
||||||
# check the input file existence
|
# check the input file existence
|
||||||
if test ! -f "$infile"
|
if test ! -f "$infile"
|
||||||
then
|
then
|
||||||
@ -69,5 +94,3 @@ END {
|
|||||||
printf("</p></body></html>\n");
|
printf("</p></body></html>\n");
|
||||||
}' | iconv -f iso-8859-1 -t UTF-8 -c -s
|
}' | iconv -f iso-8859-1 -t UTF-8 -c -s
|
||||||
|
|
||||||
# exit normally
|
|
||||||
exit 0
|
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
# @(#$Id: rclsoff,v 1.1 2005-02-09 12:07:30 dockes Exp $ (C) 2004 J.F.Dockes
|
# @(#$Id: rclsoff,v 1.2 2005-10-19 10:21:48 dockes Exp $ (C) 2004 J.F.Dockes
|
||||||
# Parts taken from Estraier:
|
# Parts taken from Estraier:
|
||||||
#================================================================
|
#================================================================
|
||||||
# Estraier: a personal full-text search system
|
# Estraier: a personal full-text search system
|
||||||
@ -28,6 +28,31 @@ fi
|
|||||||
|
|
||||||
infile="$1"
|
infile="$1"
|
||||||
|
|
||||||
|
iscmd()
|
||||||
|
{
|
||||||
|
cmd=$1
|
||||||
|
case $cmd in
|
||||||
|
*/*)
|
||||||
|
if test -x $cmd ; then return 0; else return 1; fi ;;
|
||||||
|
*)
|
||||||
|
IFS=: ; set -- $PATH; unset IFS
|
||||||
|
for d in $* ; do test -x $d/$cmd && return 0; done
|
||||||
|
return 1 ;;
|
||||||
|
esac
|
||||||
|
}
|
||||||
|
checkcmds()
|
||||||
|
{
|
||||||
|
for cmd in $*;do
|
||||||
|
if iscmd $cmd
|
||||||
|
then
|
||||||
|
else
|
||||||
|
echo $cmd not found 1>&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
}
|
||||||
|
checkcmds awk iconv unzip
|
||||||
|
|
||||||
# check the input file existence
|
# check the input file existence
|
||||||
if test ! -f "$infile"
|
if test ! -f "$infile"
|
||||||
then
|
then
|
||||||
|
|||||||
@ -413,7 +413,7 @@
|
|||||||
<include location="local" impldecl="in implementation">advsearch.ui.h</include>
|
<include location="local" impldecl="in implementation">advsearch.ui.h</include>
|
||||||
</includes>
|
</includes>
|
||||||
<signals>
|
<signals>
|
||||||
<signal>startSearch(AdvSearchData)</signal>
|
<signal>startSearch(Rcl::AdvSearchData)</signal>
|
||||||
</signals>
|
</signals>
|
||||||
<slots>
|
<slots>
|
||||||
<slot>delFiltypPB_clicked()</slot>
|
<slot>delFiltypPB_clicked()</slot>
|
||||||
|
|||||||
@ -91,14 +91,14 @@ void advsearch::restrictFtCB_toggled(bool on)
|
|||||||
|
|
||||||
void advsearch::searchPB_clicked()
|
void advsearch::searchPB_clicked()
|
||||||
{
|
{
|
||||||
AdvSearchData mydata;
|
Rcl::AdvSearchData mydata;
|
||||||
mydata.allwords = string((const char*)(andWordsLE->text().utf8()));
|
mydata.allwords = string((const char*)(andWordsLE->text().utf8()));
|
||||||
mydata.phrase = string((const char*)(phraseLE->text().utf8()));
|
mydata.phrase = string((const char*)(phraseLE->text().utf8()));
|
||||||
mydata.orwords = string((const char*)(orWordsLE->text().utf8()));
|
mydata.orwords = string((const char*)(orWordsLE->text().utf8()));
|
||||||
mydata.nowords = string((const char*)(noWordsLE->text().utf8()));
|
mydata.nowords = string((const char*)(noWordsLE->text().utf8()));
|
||||||
if (restrictFtCB->isOn() && noFiltypsLB->count() > 0) {
|
if (restrictFtCB->isOn() && noFiltypsLB->count() > 0) {
|
||||||
for (unsigned int i = 0; i < yesFiltypsLB->count(); i++) {
|
for (unsigned int i = 0; i < yesFiltypsLB->count(); i++) {
|
||||||
QCString ctext = noFiltypsLB->item(i)->text().utf8();
|
QCString ctext = yesFiltypsLB->item(i)->text().utf8();
|
||||||
mydata.filetypes.push_back(string((const char *)ctext));
|
mydata.filetypes.push_back(string((const char *)ctext));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: main.cpp,v 1.9 2005-10-10 12:29:42 dockes Exp $ (C) 2005 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: main.cpp,v 1.10 2005-10-19 10:21:48 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
@ -10,8 +10,9 @@ static char rcsid[] = "@(#$Id: main.cpp,v 1.9 2005-10-10 12:29:42 dockes Exp $ (
|
|||||||
|
|
||||||
#include <qmessagebox.h>
|
#include <qmessagebox.h>
|
||||||
|
|
||||||
#include "recollmain.h"
|
|
||||||
#include "rcldb.h"
|
#include "rcldb.h"
|
||||||
|
using Rcl::AdvSearchData;
|
||||||
|
|
||||||
#include "rclconfig.h"
|
#include "rclconfig.h"
|
||||||
#include "pathut.h"
|
#include "pathut.h"
|
||||||
#include "recoll.h"
|
#include "recoll.h"
|
||||||
@ -19,11 +20,43 @@ static char rcsid[] = "@(#$Id: main.cpp,v 1.9 2005-10-10 12:29:42 dockes Exp $ (
|
|||||||
#include "wipedir.h"
|
#include "wipedir.h"
|
||||||
#include "rclinit.h"
|
#include "rclinit.h"
|
||||||
|
|
||||||
|
#include "recollmain.h"
|
||||||
|
|
||||||
RclConfig *rclconfig;
|
RclConfig *rclconfig;
|
||||||
Rcl::Db *rcldb;
|
Rcl::Db *rcldb;
|
||||||
int recollNeedsExit;
|
int recollNeedsExit;
|
||||||
string tmpdir;
|
string tmpdir;
|
||||||
|
|
||||||
|
void getQueryStemming(bool &dostem, std::string &stemlang)
|
||||||
|
{
|
||||||
|
string param;
|
||||||
|
if (rclconfig->getConfParam("querystemming", param))
|
||||||
|
dostem = ConfTree::stringToBool(param);
|
||||||
|
else
|
||||||
|
dostem = false;
|
||||||
|
if (!rclconfig->getConfParam("querystemminglanguage", stemlang))
|
||||||
|
stemlang = "english";
|
||||||
|
}
|
||||||
|
|
||||||
|
bool maybeOpenDb(string &reason)
|
||||||
|
{
|
||||||
|
if (!rcldb)
|
||||||
|
return false;
|
||||||
|
if (!rcldb->isopen()) {
|
||||||
|
string dbdir;
|
||||||
|
if (rclconfig->getConfParam(string("dbdir"), dbdir) == 0) {
|
||||||
|
reason = "No db directory in configuration";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
dbdir = path_tildexpand(dbdir);
|
||||||
|
if (!rcldb->open(dbdir, Rcl::Db::DbRO)) {
|
||||||
|
reason = "Could not open database in " +
|
||||||
|
dbdir + " wait for indexing to complete?";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
void recollCleanup()
|
void recollCleanup()
|
||||||
{
|
{
|
||||||
@ -86,7 +119,7 @@ int main( int argc, char ** argv )
|
|||||||
|
|
||||||
rcldb = new Rcl::Db;
|
rcldb = new Rcl::Db;
|
||||||
|
|
||||||
if (!rcldb->open(dbdir, Rcl::Db::DbRO)) {
|
if (!rcldb || !rcldb->open(dbdir, Rcl::Db::DbRO)) {
|
||||||
startindexing = 1;
|
startindexing = 1;
|
||||||
QMessageBox::information(0, "Recoll",
|
QMessageBox::information(0, "Recoll",
|
||||||
QString("Could not open database in ") +
|
QString("Could not open database in ") +
|
||||||
|
|||||||
@ -1,3 +1,6 @@
|
|||||||
|
#ifndef lint
|
||||||
|
static char rcsid[] = "@(#$Id: plaintorich.cpp,v 1.3 2005-10-19 10:21:48 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|||||||
@ -1,29 +1,22 @@
|
|||||||
#ifndef _RECOLL_H_INCLUDED_
|
#ifndef _RECOLL_H_INCLUDED_
|
||||||
#define _RECOLL_H_INCLUDED_
|
#define _RECOLL_H_INCLUDED_
|
||||||
/* @(#$Id: recoll.h,v 1.3 2005-10-17 13:36:53 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: recoll.h,v 1.4 2005-10-19 10:21:48 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <list>
|
#include <list>
|
||||||
#include "rclconfig.h"
|
#include "rclconfig.h"
|
||||||
#include "rcldb.h"
|
#include "rcldb.h"
|
||||||
#include "idxthread.h"
|
#include "idxthread.h"
|
||||||
|
|
||||||
extern void recollCleanup();
|
|
||||||
|
|
||||||
// Misc declarations in need of sharing between the UI files
|
// Misc declarations in need of sharing between the UI files
|
||||||
|
|
||||||
|
extern void recollCleanup();
|
||||||
|
extern bool maybeOpenDb(std::string &reason);
|
||||||
|
extern void getQueryStemming(bool &dostem, std::string &stemlang);
|
||||||
|
|
||||||
extern RclConfig *rclconfig;
|
extern RclConfig *rclconfig;
|
||||||
extern Rcl::Db *rcldb;
|
extern Rcl::Db *rcldb;
|
||||||
extern string tmpdir;
|
extern std::string tmpdir;
|
||||||
|
|
||||||
extern int recollNeedsExit;
|
extern int recollNeedsExit;
|
||||||
|
|
||||||
// Holder for data collected by the advanced search dialog
|
|
||||||
struct AdvSearchData {
|
|
||||||
std::string allwords;
|
|
||||||
std::string phrase;
|
|
||||||
std::string orwords;
|
|
||||||
std::string nowords;
|
|
||||||
std::list<std::string> filetypes; // restrict to types. Empty if inactive
|
|
||||||
std::string topdir; // restrict to subtree. Empty if inactive
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif /* _RECOLL_H_INCLUDED_ */
|
#endif /* _RECOLL_H_INCLUDED_ */
|
||||||
|
|||||||
@ -282,6 +282,7 @@
|
|||||||
<includes>
|
<includes>
|
||||||
<include location="local" impldecl="in declaration">preview/.ui/preview.h</include>
|
<include location="local" impldecl="in declaration">preview/.ui/preview.h</include>
|
||||||
<include location="local" impldecl="in declaration">recoll.h</include>
|
<include location="local" impldecl="in declaration">recoll.h</include>
|
||||||
|
<include location="local" impldecl="in declaration">advsearch.h</include>
|
||||||
<include location="local" impldecl="in implementation">recollmain.ui.h</include>
|
<include location="local" impldecl="in implementation">recollmain.ui.h</include>
|
||||||
</includes>
|
</includes>
|
||||||
<variables>
|
<variables>
|
||||||
@ -290,6 +291,7 @@
|
|||||||
<variable>bool dostem;</variable>
|
<variable>bool dostem;</variable>
|
||||||
<variable>std::string stemlang;</variable>
|
<variable>std::string stemlang;</variable>
|
||||||
<variable>Preview *curPreview;</variable>
|
<variable>Preview *curPreview;</variable>
|
||||||
|
<variable>advsearch *asearchform;</variable>
|
||||||
</variables>
|
</variables>
|
||||||
<slots>
|
<slots>
|
||||||
<slot>fileExit()</slot>
|
<slot>fileExit()</slot>
|
||||||
@ -304,7 +306,7 @@
|
|||||||
<slot>listNextPB_clicked()</slot>
|
<slot>listNextPB_clicked()</slot>
|
||||||
<slot>advSearchPB_clicked()</slot>
|
<slot>advSearchPB_clicked()</slot>
|
||||||
<slot>previewClosed( Preview * w )</slot>
|
<slot>previewClosed( Preview * w )</slot>
|
||||||
<slot>startAdvSearch( AdvSearchData sdata )</slot>
|
<slot>startAdvSearch( Rcl::AdvSearchData sdata )</slot>
|
||||||
</slots>
|
</slots>
|
||||||
<functions>
|
<functions>
|
||||||
<function access="private">init()</function>
|
<function access="private">init()</function>
|
||||||
|
|||||||
@ -32,6 +32,8 @@ using std::pair;
|
|||||||
#include "smallut.h"
|
#include "smallut.h"
|
||||||
#include "plaintorich.h"
|
#include "plaintorich.h"
|
||||||
#include "unacpp.h"
|
#include "unacpp.h"
|
||||||
|
#include "advsearch.h"
|
||||||
|
|
||||||
|
|
||||||
#ifndef MIN
|
#ifndef MIN
|
||||||
#define MIN(A,B) ((A) < (B) ? (A) : (B))
|
#define MIN(A,B) ((A) < (B) ? (A) : (B))
|
||||||
@ -45,6 +47,7 @@ static const int respagesize = 8;
|
|||||||
void RecollMain::init()
|
void RecollMain::init()
|
||||||
{
|
{
|
||||||
curPreview = 0;
|
curPreview = 0;
|
||||||
|
asearchform = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// We want to catch ^Q everywhere to mean quit.
|
// We want to catch ^Q everywhere to mean quit.
|
||||||
@ -266,31 +269,13 @@ void RecollMain::reslistTE_clicked(int par, int car)
|
|||||||
void RecollMain::queryText_returnPressed()
|
void RecollMain::queryText_returnPressed()
|
||||||
{
|
{
|
||||||
LOGDEB(("RecollMain::queryText_returnPressed()\n"));
|
LOGDEB(("RecollMain::queryText_returnPressed()\n"));
|
||||||
if (!rcldb->isopen()) {
|
string reason;
|
||||||
string dbdir;
|
if (!maybeOpenDb(reason)) {
|
||||||
if (rclconfig->getConfParam(string("dbdir"), dbdir) == 0) {
|
QMessageBox::critical(0, "Recoll", QString(reason.c_str()));
|
||||||
QMessageBox::critical(0, "Recoll",
|
return;
|
||||||
QString("No db directory in configuration"));
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
dbdir = path_tildexpand(dbdir);
|
|
||||||
if (!rcldb->open(dbdir, Rcl::Db::DbRO)) {
|
|
||||||
QMessageBox::information(0, "Recoll",
|
|
||||||
QString("Could not open database in ") +
|
|
||||||
QString(dbdir) + " wait for indexing " +
|
|
||||||
"to complete?");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (stemlang.empty()) {
|
|
||||||
string param;
|
|
||||||
if (rclconfig->getConfParam("querystemming", param))
|
|
||||||
dostem = ConfTree::stringToBool(param);
|
|
||||||
else
|
|
||||||
dostem = false;
|
|
||||||
if (!rclconfig->getConfParam("querystemminglanguage", stemlang))
|
|
||||||
stemlang = "english";
|
|
||||||
}
|
}
|
||||||
|
if (stemlang.empty())
|
||||||
|
getQueryStemming(dostem, stemlang);
|
||||||
|
|
||||||
reslist_current = -1;
|
reslist_current = -1;
|
||||||
reslist_winfirst = -1;
|
reslist_winfirst = -1;
|
||||||
@ -388,7 +373,7 @@ void RecollMain::listNextPB_clicked()
|
|||||||
strftime(datebuf, 99, "<i>Modified:</i> %F %T", tm);
|
strftime(datebuf, 99, "<i>Modified:</i> %F %T", tm);
|
||||||
}
|
}
|
||||||
string abst = stripMarkup(doc.abstract);
|
string abst = stripMarkup(doc.abstract);
|
||||||
LOGDEB(("Abstract: {%s}\n", abst.c_str()));
|
LOGDEB1(("Abstract: {%s}\n", abst.c_str()));
|
||||||
string result = "<p>" +
|
string result = "<p>" +
|
||||||
string(perbuf) + " <b>" + doc.title + "</b><br>" +
|
string(perbuf) + " <b>" + doc.title + "</b><br>" +
|
||||||
doc.mimetype + " " +
|
doc.mimetype + " " +
|
||||||
@ -433,46 +418,40 @@ void RecollMain::previewClosed(Preview *w)
|
|||||||
delete w;
|
delete w;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Open advanced search dialog.
|
||||||
|
|
||||||
#include "advsearch.h"
|
|
||||||
|
|
||||||
advsearch *asearchform;
|
|
||||||
|
|
||||||
void RecollMain::advSearchPB_clicked()
|
void RecollMain::advSearchPB_clicked()
|
||||||
{
|
{
|
||||||
if (asearchform == 0) {
|
if (asearchform == 0) {
|
||||||
// Couldn't find way to have a normal wm frame
|
|
||||||
asearchform = new advsearch(this, "Advanced search", FALSE,
|
asearchform = new advsearch(this, "Advanced search", FALSE,
|
||||||
WStyle_Customize | WStyle_NormalBorder |
|
WStyle_Customize | WStyle_NormalBorder |
|
||||||
WStyle_Title | WStyle_SysMenu);
|
WStyle_Title | WStyle_SysMenu);
|
||||||
asearchform->setSizeGripEnabled(FALSE);
|
asearchform->setSizeGripEnabled(FALSE);
|
||||||
connect(asearchform, SIGNAL(startSearch(AdvSearchData)),
|
connect(asearchform, SIGNAL(startSearch(Rcl::AdvSearchData)),
|
||||||
this, SLOT(startAdvSearch(AdvSearchData)));
|
this, SLOT(startAdvSearch(Rcl::AdvSearchData)));
|
||||||
asearchform->show();
|
asearchform->show();
|
||||||
} else {
|
} else {
|
||||||
asearchform->show();
|
asearchform->show();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RecollMain::startAdvSearch(AdvSearchData sdata)
|
// Execute and advanced search query
|
||||||
|
void RecollMain::startAdvSearch(Rcl::AdvSearchData sdata)
|
||||||
{
|
{
|
||||||
LOGDEB(("RecollMain::startAdvSearch\n"));
|
LOGDEB(("RecollMain::startAdvSearch\n"));
|
||||||
LOGDEB((" allwords: %s\n", sdata.allwords.c_str()));
|
string reason;
|
||||||
LOGDEB((" phrase: %s\n", sdata.phrase.c_str()));
|
if (!maybeOpenDb(reason)) {
|
||||||
LOGDEB((" orwords: %s\n", sdata.orwords.c_str()));
|
QMessageBox::critical(0, "Recoll", QString(reason.c_str()));
|
||||||
LOGDEB((" nowords: %s\n", sdata.nowords.c_str()));
|
return;
|
||||||
string ft;
|
|
||||||
for (list<string>::iterator it = sdata.filetypes.begin();
|
|
||||||
it != sdata.filetypes.end(); it++) {
|
|
||||||
ft += *it + " ";
|
|
||||||
}
|
}
|
||||||
if (!ft.empty())
|
|
||||||
LOGDEB(("Searched file types: %s\n", ft.c_str()));
|
|
||||||
if (!sdata.topdir.empty())
|
|
||||||
LOGDEB(("Restricted to: %s\n", sdata.topdir.c_str()));
|
|
||||||
|
|
||||||
|
if (stemlang.empty())
|
||||||
|
getQueryStemming(dostem, stemlang);
|
||||||
|
|
||||||
|
reslist_current = -1;
|
||||||
|
reslist_winfirst = -1;
|
||||||
|
|
||||||
|
if (!rcldb->setQuery(sdata, stemlang))
|
||||||
|
return;
|
||||||
|
curPreview = 0;
|
||||||
|
listNextPB_clicked();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.28 2005-04-06 10:20:11 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.29 2005-10-19 10:21:47 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
@ -37,9 +37,10 @@ class Native {
|
|||||||
|
|
||||||
// Querying
|
// Querying
|
||||||
Xapian::Database db;
|
Xapian::Database db;
|
||||||
Xapian::Query query;
|
Xapian::Query query; // query descriptor: terms and subqueries
|
||||||
|
// joined by operators (or/and etc...)
|
||||||
Xapian::Enquire *enquire;
|
Xapian::Enquire *enquire;
|
||||||
Xapian::MSet mset;
|
Xapian::MSet mset;
|
||||||
|
|
||||||
Native() : isopen(false), iswritable(false), enquire(0) {
|
Native() : isopen(false), iswritable(false), enquire(0) {
|
||||||
}
|
}
|
||||||
@ -206,8 +207,8 @@ bool mySplitterCB::takeword(const std::string &term, int pos, int, int)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Unaccent and lowercase data: use unac
|
// Unaccent and lowercase data: use unac
|
||||||
// for accents, and do it by hand for upper / lower. Note lowercasing is
|
// for accents, and do it by hand for upper / lower.
|
||||||
// only for ascii letters anyway, so it's just A-Z -> a-z
|
// TOBEDONE: lowercasing is done only for ascii letters, just A-Z -> a-z
|
||||||
// Removing crlfs is so that we can use the text in the document data fields.
|
// Removing crlfs is so that we can use the text in the document data fields.
|
||||||
bool Rcl::dumb_string(const string &in, string &out)
|
bool Rcl::dumb_string(const string &in, string &out)
|
||||||
{
|
{
|
||||||
@ -404,15 +405,15 @@ bool Rcl::Db::needUpdate(const string &filename, const struct stat *stp)
|
|||||||
// If the db is up to date, set the update flags for all documents
|
// If the db is up to date, set the update flags for all documents
|
||||||
Xapian::PostingIterator doc;
|
Xapian::PostingIterator doc;
|
||||||
try {
|
try {
|
||||||
Xapian::PostingIterator did0 = ndb->wdb.postlist_begin(pathterm);
|
Xapian::PostingIterator docid0 = ndb->wdb.postlist_begin(pathterm);
|
||||||
for (Xapian::PostingIterator did = did0;
|
for (Xapian::PostingIterator docid = docid0;
|
||||||
did != ndb->wdb.postlist_end(pathterm); did++) {
|
docid != ndb->wdb.postlist_end(pathterm); docid++) {
|
||||||
|
|
||||||
Xapian::Document doc = ndb->wdb.get_document(*did);
|
Xapian::Document doc = ndb->wdb.get_document(*docid);
|
||||||
|
|
||||||
// Check the date once. no need to look at the others if the
|
// Check the date once. no need to look at the others if the
|
||||||
// db needs updating.
|
// db needs updating.
|
||||||
if (did == did0) {
|
if (docid == docid0) {
|
||||||
string data = doc.get_data();
|
string data = doc.get_data();
|
||||||
const char *cp = strstr(data.c_str(), "mtime=");
|
const char *cp = strstr(data.c_str(), "mtime=");
|
||||||
cp += 6;
|
cp += 6;
|
||||||
@ -424,8 +425,8 @@ bool Rcl::Db::needUpdate(const string &filename, const struct stat *stp)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Db is up to date. Make a note that this document exists.
|
// Db is up to date. Make a note that this document exists.
|
||||||
if (*did < ndb->updated.size())
|
if (*docid < ndb->updated.size())
|
||||||
ndb->updated[*did] = true;
|
ndb->updated[*docid] = true;
|
||||||
}
|
}
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
return true;
|
return true;
|
||||||
@ -596,13 +597,13 @@ bool Rcl::Db::purge()
|
|||||||
// trying to delete an unexistant document ?
|
// trying to delete an unexistant document ?
|
||||||
// Flushing before trying the deletes seeems to work around the problem
|
// Flushing before trying the deletes seeems to work around the problem
|
||||||
ndb->wdb.flush();
|
ndb->wdb.flush();
|
||||||
for (Xapian::docid did = 1; did < ndb->updated.size(); ++did) {
|
for (Xapian::docid docid = 1; docid < ndb->updated.size(); ++docid) {
|
||||||
if (!ndb->updated[did]) {
|
if (!ndb->updated[docid]) {
|
||||||
try {
|
try {
|
||||||
ndb->wdb.delete_document(did);
|
ndb->wdb.delete_document(docid);
|
||||||
LOGDEB(("Rcl::Db::purge: deleted document #%d\n", did));
|
LOGDEB(("Rcl::Db::purge: deleted document #%d\n", docid));
|
||||||
} catch (const Xapian::DocNotFoundError &) {
|
} catch (const Xapian::DocNotFoundError &) {
|
||||||
LOGDEB2(("Rcl::Db::purge: document #%d not found\n", did));
|
LOGDEB2(("Rcl::Db::purge: document #%d not found\n", docid));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -628,6 +629,13 @@ class wsQData : public TextSplitCB {
|
|||||||
terms.push_back(term);
|
terms.push_back(term);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
void dumball() {
|
||||||
|
for (vector<string>::iterator it=terms.begin(); it !=terms.end();it++){
|
||||||
|
string dumb;
|
||||||
|
Rcl::dumb_string(*it, dumb);
|
||||||
|
*it = dumb;
|
||||||
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -638,11 +646,11 @@ static list<string> stemexpand(Native *ndb, string term, const string& lang)
|
|||||||
try {
|
try {
|
||||||
Xapian::Stem stemmer(lang);
|
Xapian::Stem stemmer(lang);
|
||||||
string stem = stemmer.stem_word(term);
|
string stem = stemmer.stem_word(term);
|
||||||
LOGDEB(("stemexpand: '%s' -> '%s'\n", term.c_str(), stem.c_str()));
|
LOGDEB(("stemexpand: '%s' stem-> '%s'\n", term.c_str(), stem.c_str()));
|
||||||
// Try to fetch the doc from the stem db
|
// Try to fetch the doc from the stem db
|
||||||
string stemdbdir = stemdbname(ndb->basedir, lang);
|
string stemdbdir = stemdbname(ndb->basedir, lang);
|
||||||
Xapian::Database sdb(stemdbdir);
|
Xapian::Database sdb(stemdbdir);
|
||||||
LOGDEB1(("Rcl::Db::stemexpand: %s lastdocid: %d\n",
|
LOGDEB1(("stemexpand: %s lastdocid: %d\n",
|
||||||
stemdbdir.c_str(), sdb.get_lastdocid()));
|
stemdbdir.c_str(), sdb.get_lastdocid()));
|
||||||
if (!sdb.term_exists(stem)) {
|
if (!sdb.term_exists(stem)) {
|
||||||
LOGDEB1(("Rcl::Db::stemexpand: no term for %s\n", stem.c_str()));
|
LOGDEB1(("Rcl::Db::stemexpand: no term for %s\n", stem.c_str()));
|
||||||
@ -651,7 +659,7 @@ static list<string> stemexpand(Native *ndb, string term, const string& lang)
|
|||||||
}
|
}
|
||||||
Xapian::PostingIterator did = sdb.postlist_begin(stem);
|
Xapian::PostingIterator did = sdb.postlist_begin(stem);
|
||||||
if (did == sdb.postlist_end(stem)) {
|
if (did == sdb.postlist_end(stem)) {
|
||||||
LOGDEB1(("Rcl::Db::stemexpand: no term(1) for %s\n",stem.c_str()));
|
LOGDEB1(("stemexpand: no term(1) for %s\n",stem.c_str()));
|
||||||
explist.push_back(term);
|
explist.push_back(term);
|
||||||
return explist;
|
return explist;
|
||||||
}
|
}
|
||||||
@ -669,7 +677,7 @@ static list<string> stemexpand(Native *ndb, string term, const string& lang)
|
|||||||
if (find(explist.begin(), explist.end(), term) == explist.end()) {
|
if (find(explist.begin(), explist.end(), term) == explist.end()) {
|
||||||
explist.push_back(term);
|
explist.push_back(term);
|
||||||
}
|
}
|
||||||
LOGDEB(("Rcl::Db::stemexpand: %s -> %s\n", stem.c_str(),
|
LOGDEB(("stemexpand: %s -> %s\n", stem.c_str(),
|
||||||
stringlistdisp(explist).c_str()));
|
stringlistdisp(explist).c_str()));
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
LOGERR(("stemexpand: error accessing stem db\n"));
|
LOGERR(("stemexpand: error accessing stem db\n"));
|
||||||
@ -679,6 +687,81 @@ static list<string> stemexpand(Native *ndb, string term, const string& lang)
|
|||||||
return explist;
|
return explist;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Turn string into possibly complex xapian query. There is little
|
||||||
|
// interpretation done on the string (no +term -term or filename:term
|
||||||
|
// stuff). We just separate words and phrases, and interpret
|
||||||
|
// capitalized terms as wanting no stem expansion
|
||||||
|
static void stringToXapianQueries(const string &iq,
|
||||||
|
const string& stemlang,
|
||||||
|
Native *ndb,
|
||||||
|
list<Xapian::Query> &pqueries,
|
||||||
|
Rcl::Db::QueryOpts opts = Rcl::Db::QO_NONE)
|
||||||
|
{
|
||||||
|
string qstring = iq;
|
||||||
|
#if 0
|
||||||
|
// Unaccent and lowerterm. Note that lowerterming here may not be
|
||||||
|
// such a good idea because it forbids using capitalized words to
|
||||||
|
// indicate that a term should not use stem expansion, for
|
||||||
|
// example.
|
||||||
|
if (!Rcl::dumb_string(iqstring, qstring))
|
||||||
|
return false;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Split into (possibly single word) phrases ("this is a phrase"):
|
||||||
|
list<string> phrases;
|
||||||
|
ConfTree::stringToStrings(qstring, phrases);
|
||||||
|
|
||||||
|
// Then process each phrase: split into terms and transform into
|
||||||
|
// appropriate Xapian Query
|
||||||
|
|
||||||
|
for (list<string>::iterator it=phrases.begin(); it !=phrases.end(); it++) {
|
||||||
|
LOGDEB(("strToXapianQ: phrase or word: [%s]\n", it->c_str()));
|
||||||
|
|
||||||
|
wsQData splitData;
|
||||||
|
TextSplit splitter(&splitData, true);
|
||||||
|
splitter.text_to_words(*it);
|
||||||
|
LOGDEB(("strToXapianQ: splitter term count: %d\n",
|
||||||
|
splitData.terms.size()));
|
||||||
|
switch(splitData.terms.size()) {
|
||||||
|
case 0: continue;// ??
|
||||||
|
case 1: // Not a real phrase: one term
|
||||||
|
{
|
||||||
|
string term = splitData.terms.front();
|
||||||
|
bool nostemexp = false;
|
||||||
|
// Yes this doesnt work with accented or non-european
|
||||||
|
// majuscules. TOBEDONE: something :)
|
||||||
|
if (term.length() > 0 && term[0] >= 'A' && term[0] <= 'Z')
|
||||||
|
nostemexp = true;
|
||||||
|
|
||||||
|
LOGDEB(("Term: %s\n", term.c_str()));
|
||||||
|
|
||||||
|
// Possibly perform stem compression/expansion
|
||||||
|
list<string> exp;
|
||||||
|
string term1;
|
||||||
|
Rcl::dumb_string(term, term1);
|
||||||
|
if (!nostemexp && (opts & Rcl::Db::QO_STEM)) {
|
||||||
|
exp = stemexpand(ndb, term1, stemlang);
|
||||||
|
} else {
|
||||||
|
exp.push_back(term1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Push either term or stem-expanded set
|
||||||
|
pqueries.push_back(Xapian::Query(Xapian::Query::OP_OR,
|
||||||
|
exp.begin(), exp.end()));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
// Phrase: no stem expansion
|
||||||
|
splitData.dumball();
|
||||||
|
LOGDEB(("Pushing phrase: [%s]\n", splitData.catterms().c_str()));
|
||||||
|
pqueries.push_back(Xapian::Query(Xapian::Query::OP_PHRASE,
|
||||||
|
splitData.terms.begin(),
|
||||||
|
splitData.terms.end()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool Rcl::Db::setQuery(const std::string &iqstring, QueryOpts opts,
|
bool Rcl::Db::setQuery(const std::string &iqstring, QueryOpts opts,
|
||||||
const string& stemlang)
|
const string& stemlang)
|
||||||
{
|
{
|
||||||
@ -688,48 +771,8 @@ bool Rcl::Db::setQuery(const std::string &iqstring, QueryOpts opts,
|
|||||||
if (!ndb)
|
if (!ndb)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
string qstring;;
|
|
||||||
if (!dumb_string(iqstring, qstring)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// First split into (possibly single word) phrases ("this is a phrase"):
|
|
||||||
list<string> phrases;
|
|
||||||
ConfTree::stringToStrings(qstring, phrases);
|
|
||||||
for (list<string>::const_iterator i=phrases.begin();
|
|
||||||
i != phrases.end();i++) {
|
|
||||||
LOGDEB(("Rcl::Db::setQuery: phrase: '%s'\n", i->c_str()));
|
|
||||||
}
|
|
||||||
|
|
||||||
list<Xapian::Query> pqueries;
|
list<Xapian::Query> pqueries;
|
||||||
for (list<string>::const_iterator it = phrases.begin();
|
stringToXapianQueries(iqstring, stemlang, ndb, pqueries, opts);
|
||||||
it != phrases.end(); it++) {
|
|
||||||
|
|
||||||
wsQData splitData;
|
|
||||||
TextSplit splitter(&splitData, true);
|
|
||||||
splitter.text_to_words(*it);
|
|
||||||
LOGDEB1(("Rcl::Db::setquery: splitter term count: %d\n",
|
|
||||||
splitData.terms.size()));
|
|
||||||
switch(splitData.terms.size()) {
|
|
||||||
case 0: continue;// ??
|
|
||||||
case 1: {
|
|
||||||
list<string> exp;
|
|
||||||
if (opts & QO_STEM)
|
|
||||||
exp = stemexpand(ndb, splitData.terms.front(), stemlang);
|
|
||||||
else
|
|
||||||
exp.push_back(splitData.terms.front());
|
|
||||||
pqueries.push_back(Xapian::Query(Xapian::Query::OP_OR,
|
|
||||||
exp.begin(),
|
|
||||||
exp.end()));
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
LOGDEB(("Pushing phrase: %s\n", splitData.catterms().c_str()));
|
|
||||||
pqueries.push_back(Xapian::Query(Xapian::Query::OP_PHRASE,
|
|
||||||
splitData.terms.begin(),
|
|
||||||
splitData.terms.end()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ndb->query = Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(),
|
ndb->query = Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(),
|
||||||
pqueries.end());
|
pqueries.end());
|
||||||
delete ndb->enquire;
|
delete ndb->enquire;
|
||||||
@ -739,6 +782,93 @@ bool Rcl::Db::setQuery(const std::string &iqstring, QueryOpts opts,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Rcl::Db::setQuery(AdvSearchData &sdata, const string& stemlang)
|
||||||
|
{
|
||||||
|
LOGDEB(("Rcl::Db::setQuery: adv:\n"));
|
||||||
|
LOGDEB((" allwords: %s\n", sdata.allwords.c_str()));
|
||||||
|
LOGDEB((" phrase: %s\n", sdata.phrase.c_str()));
|
||||||
|
LOGDEB((" orwords: %s\n", sdata.orwords.c_str()));
|
||||||
|
LOGDEB((" nowords: %s\n", sdata.nowords.c_str()));
|
||||||
|
string ft;
|
||||||
|
for (list<string>::iterator it = sdata.filetypes.begin();
|
||||||
|
it != sdata.filetypes.end(); it++) {ft += *it + " ";}
|
||||||
|
if (!ft.empty())
|
||||||
|
LOGDEB((" searched file types: %s\n", ft.c_str()));
|
||||||
|
if (!sdata.topdir.empty())
|
||||||
|
LOGDEB((" restricted to: %s\n", sdata.topdir.c_str()));
|
||||||
|
|
||||||
|
Native *ndb = (Native *)pdata;
|
||||||
|
if (!ndb)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
list<Xapian::Query> pqueries;
|
||||||
|
Xapian::Query xq;
|
||||||
|
|
||||||
|
if (!sdata.allwords.empty()) {
|
||||||
|
stringToXapianQueries(sdata.allwords, stemlang, ndb, pqueries);
|
||||||
|
if (!pqueries.empty()) {
|
||||||
|
xq = Xapian::Query(Xapian::Query::OP_AND, pqueries.begin(),
|
||||||
|
pqueries.end());
|
||||||
|
pqueries.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!sdata.orwords.empty()) {
|
||||||
|
stringToXapianQueries(sdata.orwords, stemlang, ndb, pqueries);
|
||||||
|
if (!pqueries.empty()) {
|
||||||
|
Xapian::Query nq;
|
||||||
|
nq = Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(),
|
||||||
|
pqueries.end());
|
||||||
|
xq = xq.empty() ? nq :
|
||||||
|
Xapian::Query(Xapian::Query::OP_AND, xq, nq);
|
||||||
|
pqueries.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!sdata.nowords.empty()) {
|
||||||
|
stringToXapianQueries(sdata.nowords, stemlang, ndb, pqueries);
|
||||||
|
if (!pqueries.empty()) {
|
||||||
|
Xapian::Query nq;
|
||||||
|
nq = Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(),
|
||||||
|
pqueries.end());
|
||||||
|
xq = xq.empty() ? nq :
|
||||||
|
Xapian::Query(Xapian::Query::OP_AND_NOT, xq, nq);
|
||||||
|
pqueries.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!sdata.phrase.empty()) {
|
||||||
|
Xapian::Query nq;
|
||||||
|
string s = string("\"") + sdata.phrase + string("\"");
|
||||||
|
stringToXapianQueries(s, stemlang, ndb, pqueries);
|
||||||
|
if (!pqueries.empty()) {
|
||||||
|
// There should be a single list element phrase query.
|
||||||
|
xq = xq.empty() ? *pqueries.begin() :
|
||||||
|
Xapian::Query(Xapian::Query::OP_AND, xq, *pqueries.begin());
|
||||||
|
pqueries.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!sdata.filetypes.empty()) {
|
||||||
|
Xapian::Query tq;
|
||||||
|
for (list<string>::iterator it = sdata.filetypes.begin();
|
||||||
|
it != sdata.filetypes.end(); it++) {
|
||||||
|
string term = "T" + *it;
|
||||||
|
LOGDEB(("Adding file type term: [%s]\n", term.c_str()));
|
||||||
|
tq = tq.empty() ? Xapian::Query(term) :
|
||||||
|
Xapian::Query(Xapian::Query::OP_OR, tq, Xapian::Query(term));
|
||||||
|
}
|
||||||
|
xq = xq.empty() ? tq : Xapian::Query(Xapian::Query::OP_AND, xq, tq);
|
||||||
|
}
|
||||||
|
|
||||||
|
ndb->query = xq;
|
||||||
|
delete ndb->enquire;
|
||||||
|
ndb->enquire = new Xapian::Enquire(ndb->db);
|
||||||
|
ndb->enquire->set_query(ndb->query);
|
||||||
|
ndb->mset = Xapian::MSet();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool Rcl::Db::getQueryTerms(list<string>& terms)
|
bool Rcl::Db::getQueryTerms(list<string>& terms)
|
||||||
{
|
{
|
||||||
Native *ndb = (Native *)pdata;
|
Native *ndb = (Native *)pdata;
|
||||||
@ -766,6 +896,10 @@ int Rcl::Db::getResCnt()
|
|||||||
return ndb->mset.get_matches_lower_bound();
|
return ndb->mset.get_matches_lower_bound();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Get document at rank i in query (i is the index in the whole result
|
||||||
|
// set, as in the enquire class. We check if the current mset has the
|
||||||
|
// doc, else ask for an other one. We use msets of 10 documents. Don't
|
||||||
|
// know if the whole thing makes sense at all but it seems to work.
|
||||||
bool Rcl::Db::getDoc(int i, Doc &doc, int *percent)
|
bool Rcl::Db::getDoc(int i, Doc &doc, int *percent)
|
||||||
{
|
{
|
||||||
LOGDEB1(("Rcl::Db::getDoc: %d\n", i));
|
LOGDEB1(("Rcl::Db::getDoc: %d\n", i));
|
||||||
|
|||||||
@ -1,12 +1,13 @@
|
|||||||
#ifndef _DB_H_INCLUDED_
|
#ifndef _DB_H_INCLUDED_
|
||||||
#define _DB_H_INCLUDED_
|
#define _DB_H_INCLUDED_
|
||||||
/* @(#$Id: rcldb.h,v 1.13 2005-03-25 09:40:27 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: rcldb.h,v 1.14 2005-10-19 10:21:47 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <list>
|
#include <list>
|
||||||
|
|
||||||
#ifndef NO_NAMESPACES
|
#ifndef NO_NAMESPACES
|
||||||
using std::string;
|
using std::string;
|
||||||
|
using std::list;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// rcldb defines an interface for a 'real' text database. The current
|
// rcldb defines an interface for a 'real' text database. The current
|
||||||
@ -24,7 +25,9 @@ using std::string;
|
|||||||
|
|
||||||
struct stat;
|
struct stat;
|
||||||
|
|
||||||
|
#ifndef NO_NAMESPACES
|
||||||
namespace Rcl {
|
namespace Rcl {
|
||||||
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Dumb bunch holder for document attributes and data
|
* Dumb bunch holder for document attributes and data
|
||||||
@ -57,6 +60,19 @@ class Doc {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holder for the advanced query data
|
||||||
|
*/
|
||||||
|
class AdvSearchData {
|
||||||
|
public:
|
||||||
|
string allwords;
|
||||||
|
string phrase;
|
||||||
|
string orwords;
|
||||||
|
string nowords;
|
||||||
|
list<string> filetypes; // restrict to types. Empty if inactive
|
||||||
|
string topdir; // restrict to subtree. Empty if inactive
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Wrapper class for the native database.
|
* Wrapper class for the native database.
|
||||||
*/
|
*/
|
||||||
@ -83,7 +99,8 @@ class Db {
|
|||||||
enum QueryOpts {QO_NONE=0, QO_STEM = 1};
|
enum QueryOpts {QO_NONE=0, QO_STEM = 1};
|
||||||
bool setQuery(const string &q, QueryOpts opts = QO_NONE,
|
bool setQuery(const string &q, QueryOpts opts = QO_NONE,
|
||||||
const string& stemlang = "english");
|
const string& stemlang = "english");
|
||||||
bool getQueryTerms(std::list<string>& terms);
|
bool setQuery(AdvSearchData &q, const string& stemlang = "english");
|
||||||
|
bool getQueryTerms(list<string>& terms);
|
||||||
|
|
||||||
// Get document at rank i. This is probably vastly inferior to the type
|
// Get document at rank i. This is probably vastly inferior to the type
|
||||||
// of interface in Xapian, but we have to start with something simple
|
// of interface in Xapian, but we have to start with something simple
|
||||||
@ -96,6 +113,9 @@ class Db {
|
|||||||
// Unaccent and lowercase data.
|
// Unaccent and lowercase data.
|
||||||
extern bool dumb_string(const string &in, string &out);
|
extern bool dumb_string(const string &in, string &out);
|
||||||
|
|
||||||
|
#ifndef NO_NAMESPACES
|
||||||
}
|
}
|
||||||
|
#endif // NO_NAMESPACES
|
||||||
|
|
||||||
|
|
||||||
#endif /* _DB_H_INCLUDED_ */
|
#endif /* _DB_H_INCLUDED_ */
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user