simple term highlighting in query preview
This commit is contained in:
parent
74434a3b02
commit
2a020407da
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: textsplit.cpp,v 1.4 2004-12-17 13:01:01 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: textsplit.cpp,v 1.5 2005-02-07 13:17:47 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
#ifndef TEST_TEXTSPLIT
|
#ifndef TEST_TEXTSPLIT
|
||||||
|
|
||||||
@ -7,6 +7,7 @@ static char rcsid[] = "@(#$Id: textsplit.cpp,v 1.4 2004-12-17 13:01:01 dockes Ex
|
|||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
#include "textsplit.h"
|
#include "textsplit.h"
|
||||||
|
#include "debuglog.h"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
@ -57,9 +58,12 @@ static void setcharclasses()
|
|||||||
init = 1;
|
init = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool TextSplit::emitterm(string &w, int pos, bool doerase = true)
|
bool TextSplit::emitterm(string &w, int pos, bool doerase,
|
||||||
|
int btstart, int btend)
|
||||||
{
|
{
|
||||||
if (!termsink)
|
LOGDEB2(("TextSplit::emitterm: '%s' pos %d\n", w.c_str(), pos));
|
||||||
|
|
||||||
|
if (!cb)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// Maybe trim end of word. These are chars that we would keep inside
|
// Maybe trim end of word. These are chars that we would keep inside
|
||||||
@ -77,7 +81,7 @@ bool TextSplit::emitterm(string &w, int pos, bool doerase = true)
|
|||||||
}
|
}
|
||||||
breakloop:
|
breakloop:
|
||||||
if (w.length() > 0 && w.length() < (unsigned)maxWordLength) {
|
if (w.length() > 0 && w.length() < (unsigned)maxWordLength) {
|
||||||
bool ret = termsink(cdata, w, pos);
|
bool ret = cb->takeword(w, pos, btstart, btend);
|
||||||
if (doerase)
|
if (doerase)
|
||||||
w.erase();
|
w.erase();
|
||||||
return ret;
|
return ret;
|
||||||
@ -92,14 +96,16 @@ bool TextSplit::emitterm(string &w, int pos, bool doerase = true)
|
|||||||
*/
|
*/
|
||||||
bool TextSplit::text_to_words(const string &in)
|
bool TextSplit::text_to_words(const string &in)
|
||||||
{
|
{
|
||||||
|
LOGDEB2(("TextSplit::text_to_words: cb %p\n", cb));
|
||||||
setcharclasses();
|
setcharclasses();
|
||||||
string span;
|
string span;
|
||||||
string word;
|
string word;
|
||||||
bool number = false;
|
bool number = false;
|
||||||
int wordpos = 0;
|
int wordpos = 0;
|
||||||
int spanpos = 0;
|
int spanpos = 0;
|
||||||
|
unsigned int i;
|
||||||
|
|
||||||
for (unsigned int i = 0; i < in.length(); i++) {
|
for (i = 0; i < in.length(); i++) {
|
||||||
int c = in[i];
|
int c = in[i];
|
||||||
int cc = charclasses[c];
|
int cc = charclasses[c];
|
||||||
switch (cc) {
|
switch (cc) {
|
||||||
@ -107,10 +113,10 @@ bool TextSplit::text_to_words(const string &in)
|
|||||||
SPACE:
|
SPACE:
|
||||||
if (word.length()) {
|
if (word.length()) {
|
||||||
if (span.length() != word.length()) {
|
if (span.length() != word.length()) {
|
||||||
if (!emitterm(span, spanpos))
|
if (!emitterm(span, spanpos, true, i-span.length(), i))
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!emitterm(word, wordpos++))
|
if (!emitterm(word, wordpos++, true, i-word.length(), i))
|
||||||
return false;
|
return false;
|
||||||
number = false;
|
number = false;
|
||||||
}
|
}
|
||||||
@ -127,10 +133,10 @@ bool TextSplit::text_to_words(const string &in)
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (span.length() != word.length()) {
|
if (span.length() != word.length()) {
|
||||||
if (!emitterm(span, spanpos, false))
|
if (!emitterm(span, spanpos, false, i-span.length(), i))
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!emitterm(word, wordpos++))
|
if (!emitterm(word, wordpos++, true, i-word.length(), i))
|
||||||
return false;
|
return false;
|
||||||
number = false;
|
number = false;
|
||||||
span += c;
|
span += c;
|
||||||
@ -140,10 +146,10 @@ bool TextSplit::text_to_words(const string &in)
|
|||||||
case '@':
|
case '@':
|
||||||
if (word.length()) {
|
if (word.length()) {
|
||||||
if (span.length() != word.length()) {
|
if (span.length() != word.length()) {
|
||||||
if (!emitterm(span, spanpos, false))
|
if (!emitterm(span, spanpos, false, i-span.length(), i))
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!emitterm(word, wordpos++))
|
if (!emitterm(word, wordpos++, true, i-word.length(), i))
|
||||||
return false;
|
return false;
|
||||||
number = false;
|
number = false;
|
||||||
} else
|
} else
|
||||||
@ -155,7 +161,7 @@ bool TextSplit::text_to_words(const string &in)
|
|||||||
word += c;
|
word += c;
|
||||||
} else {
|
} else {
|
||||||
if (word.length()) {
|
if (word.length()) {
|
||||||
if (!emitterm(word, wordpos++))
|
if (!emitterm(word, wordpos++, true, i-word.length(), i))
|
||||||
return false;
|
return false;
|
||||||
number = false;
|
number = false;
|
||||||
} else
|
} else
|
||||||
@ -202,9 +208,9 @@ bool TextSplit::text_to_words(const string &in)
|
|||||||
}
|
}
|
||||||
if (word.length()) {
|
if (word.length()) {
|
||||||
if (span.length() != word.length())
|
if (span.length() != word.length())
|
||||||
if (!emitterm(span, spanpos))
|
if (!emitterm(span, spanpos, true, i-span.length(), i))
|
||||||
return false;
|
return false;
|
||||||
return emitterm(word, wordpos);
|
return emitterm(word, wordpos, true, i-word.length(), i);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -222,12 +228,14 @@ bool TextSplit::text_to_words(const string &in)
|
|||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
bool termsink(void *, const string &term, int pos)
|
// A small class to hold state while splitting text
|
||||||
{
|
class mySplitterCB : public TextSplitCB {
|
||||||
cout << pos << " " << term << endl;
|
public:
|
||||||
return true;
|
bool takeword(const std::string &term, int pos, int bs, int be) {
|
||||||
}
|
cout << pos << " " << term << " bs " << bs << " be " << be << endl;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
static string teststring =
|
static string teststring =
|
||||||
"jfd@okyz.com "
|
"jfd@okyz.com "
|
||||||
@ -241,7 +249,8 @@ static string teststring =
|
|||||||
|
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
TextSplit splitter(termsink, 0);
|
mySplitterCB cb;
|
||||||
|
TextSplit splitter(&cb);
|
||||||
if (argc == 2) {
|
if (argc == 2) {
|
||||||
string data;
|
string data;
|
||||||
if (!file_to_string(argv[1], data))
|
if (!file_to_string(argv[1], data))
|
||||||
|
|||||||
@ -1,9 +1,20 @@
|
|||||||
#ifndef _TEXTSPLIT_H_INCLUDED_
|
#ifndef _TEXTSPLIT_H_INCLUDED_
|
||||||
#define _TEXTSPLIT_H_INCLUDED_
|
#define _TEXTSPLIT_H_INCLUDED_
|
||||||
/* @(#$Id: textsplit.h,v 1.3 2005-01-24 13:17:58 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: textsplit.h,v 1.4 2005-02-07 13:17:47 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
// Function class whose called for every detected word
|
||||||
|
class TextSplitCB {
|
||||||
|
public:
|
||||||
|
virtual ~TextSplitCB() {}
|
||||||
|
virtual bool takeword(const std::string& term,
|
||||||
|
int pos, // term pos
|
||||||
|
int bts, // byte offset of first char in term
|
||||||
|
int bte // byte offset of first char after term
|
||||||
|
) = 0;
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Split text into words.
|
* Split text into words.
|
||||||
* See comments at top of .cpp for more explanations.
|
* See comments at top of .cpp for more explanations.
|
||||||
@ -11,19 +22,14 @@
|
|||||||
* but 'ts much simpler this way...
|
* but 'ts much simpler this way...
|
||||||
*/
|
*/
|
||||||
class TextSplit {
|
class TextSplit {
|
||||||
public:
|
TextSplitCB *cb;
|
||||||
typedef bool (*TermSink)(void *cdata, const std::string & term, int pos);
|
|
||||||
private:
|
|
||||||
TermSink termsink;
|
|
||||||
void *cdata;
|
|
||||||
int maxWordLength;
|
int maxWordLength;
|
||||||
bool emitterm(std::string &term, int pos, bool doerase);
|
bool emitterm(std::string &term, int pos, bool doerase, int, int);
|
||||||
public:
|
public:
|
||||||
/**
|
/**
|
||||||
* Constructor: just store callback and client data
|
* Constructor: just store callback and client data
|
||||||
*/
|
*/
|
||||||
TextSplit(TermSink t, void *c) : termsink(t), cdata(c), maxWordLength(40)
|
TextSplit(TextSplitCB *t) : cb(t), maxWordLength(40) {}
|
||||||
{}
|
|
||||||
/**
|
/**
|
||||||
* Split text, emit words and positions.
|
* Split text, emit words and positions.
|
||||||
*/
|
*/
|
||||||
|
|||||||
@ -15,9 +15,13 @@
|
|||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
|
|
||||||
|
#include <utility>
|
||||||
|
using std::pair;
|
||||||
|
|
||||||
#include <qmessagebox.h>
|
#include <qmessagebox.h>
|
||||||
#include <qcstring.h>
|
#include <qcstring.h>
|
||||||
|
|
||||||
|
|
||||||
#include "rcldb.h"
|
#include "rcldb.h"
|
||||||
#include "rclconfig.h"
|
#include "rclconfig.h"
|
||||||
#include "debuglog.h"
|
#include "debuglog.h"
|
||||||
@ -25,10 +29,12 @@
|
|||||||
#include "pathut.h"
|
#include "pathut.h"
|
||||||
#include "recoll.h"
|
#include "recoll.h"
|
||||||
#include "internfile.h"
|
#include "internfile.h"
|
||||||
|
#include "textsplit.h"
|
||||||
|
#include "smallut.h"
|
||||||
|
|
||||||
void RecollMain::fileExit()
|
void RecollMain::fileExit()
|
||||||
{
|
{
|
||||||
LOGDEB(("RecollMain: fileExit\n"));
|
LOGDEB1(("RecollMain: fileExit\n"));
|
||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -52,17 +58,66 @@ void RecollMain::fileStart_IndexingAction_activated()
|
|||||||
startindexing = 1;
|
startindexing = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static string plaintorich(const string &in)
|
// Text splitter callback used to take note of the query terms byte offsets
|
||||||
|
// inside the text. This is then used to post highlight tags.
|
||||||
|
class myTextSplitCB : public TextSplitCB {
|
||||||
|
public:
|
||||||
|
list<pair<int, int> > tboffs;
|
||||||
|
const list<string> *terms;
|
||||||
|
myTextSplitCB(const list<string>& terms) : terms(&terms) {}
|
||||||
|
virtual bool takeword(const std::string& term, int, int bts, int bte) {
|
||||||
|
for (list<string>::const_iterator it = terms->begin();
|
||||||
|
it != terms->end(); it++) {
|
||||||
|
if (!stringlowercmp(*it, term)) {
|
||||||
|
tboffs.push_back(pair<int, int>(bts, bte));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
static string plaintorich(const string &in, const list<string>& terms,
|
||||||
|
list<pair<int, int> >&termoffsets)
|
||||||
{
|
{
|
||||||
|
#if 0
|
||||||
|
{string t;
|
||||||
|
for (list<string>::const_iterator it = terms.begin();it != terms.end();it++)
|
||||||
|
t += "'" + *it + "' ";
|
||||||
|
LOGDEB(("plaintorich: term: %s\n", t.c_str()));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
myTextSplitCB cb(terms);
|
||||||
|
TextSplit splitter(&cb);
|
||||||
|
splitter.text_to_words(in);
|
||||||
|
string out1;
|
||||||
|
if (cb.tboffs.empty()) {
|
||||||
|
out1 = in;
|
||||||
|
} else {
|
||||||
|
list<pair<int, int> >::iterator it = cb.tboffs.begin();
|
||||||
|
for (unsigned int i = 0; i < in.length() ; i++) {
|
||||||
|
if (it != cb.tboffs.end()) {
|
||||||
|
if (i == (unsigned int)it->first) {
|
||||||
|
out1 += "<termtag>";
|
||||||
|
} else if (i == (unsigned int)it->second) {
|
||||||
|
if (it != cb.tboffs.end())
|
||||||
|
it++;
|
||||||
|
out1 += "</termtag>";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out1 += in[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
string out = "<qt><head><title></title></head><body><p>";
|
string out = "<qt><head><title></title></head><body><p>";
|
||||||
for (unsigned int i = 0; i < in.length() ; i++) {
|
for (string::const_iterator it = out1.begin();it != out1.end(); it++) {
|
||||||
if (in[i] == '\n') {
|
if (*it == '\n') {
|
||||||
out += "<br>";
|
out += "<br>";
|
||||||
// out += '\n';
|
// out += '\n';
|
||||||
} else {
|
} else {
|
||||||
out += in[i];
|
out += *it;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
termoffsets = cb.tboffs;
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -137,7 +192,7 @@ void RecollMain::reslistTE_clicked(int par, int car)
|
|||||||
int reldocnum = par - 1;
|
int reldocnum = par - 1;
|
||||||
reslist_current = reldocnum;
|
reslist_current = reldocnum;
|
||||||
previewTextEdit->clear();
|
previewTextEdit->clear();
|
||||||
LOGDEB(("Cleared preview\n"));
|
|
||||||
if (!rcldb->getDoc(reslist_winfirst + reldocnum, doc, 0)) {
|
if (!rcldb->getDoc(reslist_winfirst + reldocnum, doc, 0)) {
|
||||||
QMessageBox::warning(0, "Recoll",
|
QMessageBox::warning(0, "Recoll",
|
||||||
QString("Can't retrieve document from database"));
|
QString("Can't retrieve document from database"));
|
||||||
@ -154,26 +209,28 @@ void RecollMain::reslistTE_clicked(int par, int car)
|
|||||||
doc.mimetype.c_str());
|
doc.mimetype.c_str());
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
list<string> terms;
|
||||||
|
rcldb->getQueryTerms(terms);
|
||||||
|
list<pair<int, int> > termoffsets;
|
||||||
|
string rich = plaintorich(fdoc.text, terms, termoffsets);
|
||||||
|
|
||||||
string rich = plaintorich(fdoc.text);
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
//Highlighting; pass a list of (search term, style name) to plaintorich
|
|
||||||
// and create the corresponding styles with different colors here
|
|
||||||
// We need to :
|
|
||||||
// - Break the query into terms : wait for the query analyzer
|
|
||||||
// - Break the text into words. This should use a version of
|
|
||||||
// textsplit with an option to keep the punctuation (see how to do
|
|
||||||
// this). We do want the same splitter code to be used here and
|
|
||||||
// when indexing.
|
|
||||||
QStyleSheetItem *item =
|
QStyleSheetItem *item =
|
||||||
new QStyleSheetItem( previewTextEdit->styleSheet(), "mytag" );
|
new QStyleSheetItem( previewTextEdit->styleSheet(), "termtag" );
|
||||||
item->setColor("red");
|
item->setColor("blue");
|
||||||
item->setFontWeight(QFont::Bold);
|
item->setFontWeight(QFont::Bold);
|
||||||
#endif
|
|
||||||
|
|
||||||
QString str = QString::fromUtf8(rich.c_str(), rich.length());
|
QString str = QString::fromUtf8(rich.c_str(), rich.length());
|
||||||
previewTextEdit->setText(str);
|
previewTextEdit->setText(str);
|
||||||
|
int para = 0, index = 1;
|
||||||
|
if (!termoffsets.empty()) {
|
||||||
|
index = (termoffsets.begin())->first;
|
||||||
|
LOGDEB1(("Setting cursor position to para %d, index %d\n",para,index));
|
||||||
|
previewTextEdit->setCursorPosition(0, index);
|
||||||
|
}
|
||||||
|
previewTextEdit->ensureCursorVisible();
|
||||||
|
previewTextEdit->getCursorPosition(¶, &index);
|
||||||
|
LOGDEB1(("PREVIEW Paragraphs: %d. Cpos: %d %d\n",
|
||||||
|
previewTextEdit->paragraphs(), para, index));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -181,7 +238,7 @@ void RecollMain::reslistTE_clicked(int par, int car)
|
|||||||
// first page of results
|
// first page of results
|
||||||
void RecollMain::queryText_returnPressed()
|
void RecollMain::queryText_returnPressed()
|
||||||
{
|
{
|
||||||
LOGDEB(("RecollMain::queryText_returnPressed()\n"));
|
LOGDEB1(("RecollMain::queryText_returnPressed()\n"));
|
||||||
if (!rcldb->isopen()) {
|
if (!rcldb->isopen()) {
|
||||||
string dbdir;
|
string dbdir;
|
||||||
if (rclconfig->getConfParam(string("dbdir"), dbdir) == 0) {
|
if (rclconfig->getConfParam(string("dbdir"), dbdir) == 0) {
|
||||||
@ -206,6 +263,7 @@ void RecollMain::queryText_returnPressed()
|
|||||||
|
|
||||||
if (!rcldb->setQuery(string((const char *)u8)))
|
if (!rcldb->setQuery(string((const char *)u8)))
|
||||||
return;
|
return;
|
||||||
|
list<string> terms;
|
||||||
listNextPB_clicked();
|
listNextPB_clicked();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -234,7 +292,7 @@ void RecollMain::listPrevPB_clicked()
|
|||||||
// Fill up result list window with next screen of hits
|
// Fill up result list window with next screen of hits
|
||||||
void RecollMain::listNextPB_clicked()
|
void RecollMain::listNextPB_clicked()
|
||||||
{
|
{
|
||||||
LOGDEB(("listNextPB_clicked: winfirst %d\n", reslist_winfirst));
|
LOGDEB1(("listNextPB_clicked: winfirst %d\n", reslist_winfirst));
|
||||||
|
|
||||||
if (reslist_winfirst < 0)
|
if (reslist_winfirst < 0)
|
||||||
reslist_winfirst = 0;
|
reslist_winfirst = 0;
|
||||||
@ -284,7 +342,7 @@ void RecollMain::listNextPB_clicked()
|
|||||||
struct tm *tm = localtime(&mtime);
|
struct tm *tm = localtime(&mtime);
|
||||||
strftime(datebuf, 99, "<i>Modified:</i> %F %T", tm);
|
strftime(datebuf, 99, "<i>Modified:</i> %F %T", tm);
|
||||||
}
|
}
|
||||||
LOGDEB(("Abstract: %s\n", doc.abstract.c_str()));
|
LOGDEB1(("Abstract: %s\n", doc.abstract.c_str()));
|
||||||
string result = "<p>" +
|
string result = "<p>" +
|
||||||
string(perbuf) + " <b>" + doc.title + "</b><br>" +
|
string(perbuf) + " <b>" + doc.title + "</b><br>" +
|
||||||
doc.mimetype + " " +
|
doc.mimetype + " " +
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.18 2005-02-04 14:21:17 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.19 2005-02-07 13:17:47 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
@ -171,20 +171,19 @@ bool Rcl::Db::isopen()
|
|||||||
}
|
}
|
||||||
|
|
||||||
// A small class to hold state while splitting text
|
// A small class to hold state while splitting text
|
||||||
class wsData {
|
class mySplitterCB : public TextSplitCB {
|
||||||
public:
|
public:
|
||||||
Xapian::Document &doc;
|
Xapian::Document &doc;
|
||||||
Xapian::termpos basepos; // Base for document section
|
Xapian::termpos basepos; // Base for document section
|
||||||
Xapian::termpos curpos; // Last position sent to callback
|
Xapian::termpos curpos; // Last position sent to callback
|
||||||
wsData(Xapian::Document &d) : doc(d), basepos(1), curpos(0)
|
mySplitterCB(Xapian::Document &d) : doc(d), basepos(1), curpos(0)
|
||||||
{}
|
{}
|
||||||
|
bool takeword(const std::string &term, int pos, int, int);
|
||||||
};
|
};
|
||||||
|
|
||||||
// Callback for the document to word splitting class during indexation
|
// Callback for the document to word splitting class during indexation
|
||||||
static bool splitCb(void *cdata, const std::string &term, int pos)
|
bool mySplitterCB::takeword(const std::string &term, int pos, int, int)
|
||||||
{
|
{
|
||||||
wsData *data = (wsData*)cdata;
|
|
||||||
|
|
||||||
// cerr << "splitCb: term " << term << endl;
|
// cerr << "splitCb: term " << term << endl;
|
||||||
//string printable;
|
//string printable;
|
||||||
//transcode(term, printable, "UTF-8", "ISO8859-1");
|
//transcode(term, printable, "UTF-8", "ISO8859-1");
|
||||||
@ -193,8 +192,8 @@ static bool splitCb(void *cdata, const std::string &term, int pos)
|
|||||||
try {
|
try {
|
||||||
// 1 is the value for wdfinc in index_text when called from omindex
|
// 1 is the value for wdfinc in index_text when called from omindex
|
||||||
// TOBEDONE: check what this is used for
|
// TOBEDONE: check what this is used for
|
||||||
data->curpos = pos;
|
curpos = pos;
|
||||||
data->doc.add_posting(term, data->basepos + data->curpos, 1);
|
doc.add_posting(term, basepos + curpos, 1);
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
LOGERR(("Rcl::Db: Error occurred during xapian add_posting\n"));
|
LOGERR(("Rcl::Db: Error occurred during xapian add_posting\n"));
|
||||||
return false;
|
return false;
|
||||||
@ -281,9 +280,9 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc)
|
|||||||
|
|
||||||
Xapian::Document newdocument;
|
Xapian::Document newdocument;
|
||||||
|
|
||||||
wsData splitData(newdocument);
|
mySplitterCB splitData(newdocument);
|
||||||
|
|
||||||
TextSplit splitter(splitCb, &splitData);
|
TextSplit splitter(&splitData);
|
||||||
|
|
||||||
string noacc;
|
string noacc;
|
||||||
if (!unac_cpp(doc.title, noacc)) {
|
if (!unac_cpp(doc.title, noacc)) {
|
||||||
@ -436,18 +435,16 @@ bool Rcl::Db::purge()
|
|||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
class wsQData {
|
class wsQData : public TextSplitCB {
|
||||||
public:
|
public:
|
||||||
vector<string> terms;
|
vector<string> terms;
|
||||||
|
|
||||||
|
bool takeword(const std::string &term, int , int, int) {
|
||||||
|
terms.push_back(term);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Callback for the query-to-words splitting
|
|
||||||
static bool splitQCb(void *cdata, const std::string &term, int )
|
|
||||||
{
|
|
||||||
wsQData *data = (wsQData*)cdata;
|
|
||||||
data->terms.push_back(term);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool Rcl::Db::setQuery(const std::string &querystring)
|
bool Rcl::Db::setQuery(const std::string &querystring)
|
||||||
{
|
{
|
||||||
@ -457,7 +454,7 @@ bool Rcl::Db::setQuery(const std::string &querystring)
|
|||||||
return false;
|
return false;
|
||||||
|
|
||||||
wsQData splitData;
|
wsQData splitData;
|
||||||
TextSplit splitter(splitQCb, &splitData);
|
TextSplit splitter(&splitData);
|
||||||
|
|
||||||
string noacc;
|
string noacc;
|
||||||
if (!dumb_string(querystring, noacc)) {
|
if (!dumb_string(querystring, noacc)) {
|
||||||
@ -475,6 +472,21 @@ bool Rcl::Db::setQuery(const std::string &querystring)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Rcl::Db::getQueryTerms(list<string>& terms)
|
||||||
|
{
|
||||||
|
Native *ndb = (Native *)pdata;
|
||||||
|
if (!ndb)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
terms.clear();
|
||||||
|
Xapian::TermIterator it;
|
||||||
|
for (it = ndb->query.get_terms_begin(); it != ndb->query.get_terms_end();
|
||||||
|
it++) {
|
||||||
|
terms.push_back(*it);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
int Rcl::Db::getResCnt()
|
int Rcl::Db::getResCnt()
|
||||||
{
|
{
|
||||||
Native *ndb = (Native *)pdata;
|
Native *ndb = (Native *)pdata;
|
||||||
|
|||||||
@ -1,8 +1,9 @@
|
|||||||
#ifndef _DB_H_INCLUDED_
|
#ifndef _DB_H_INCLUDED_
|
||||||
#define _DB_H_INCLUDED_
|
#define _DB_H_INCLUDED_
|
||||||
/* @(#$Id: rcldb.h,v 1.8 2005-01-31 14:31:09 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: rcldb.h,v 1.9 2005-02-07 13:17:47 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <list>
|
||||||
|
|
||||||
// rcldb defines an interface for a 'real' text database. The current
|
// rcldb defines an interface for a 'real' text database. The current
|
||||||
// implementation uses xapian only, and xapian-related code is in rcldb.cpp
|
// implementation uses xapian only, and xapian-related code is in rcldb.cpp
|
||||||
@ -72,6 +73,7 @@ class Db {
|
|||||||
|
|
||||||
// Parse query string and initialize query
|
// Parse query string and initialize query
|
||||||
bool setQuery(const std::string &q);
|
bool setQuery(const std::string &q);
|
||||||
|
bool getQueryTerms(std::list<std::string>& terms);
|
||||||
|
|
||||||
// Get document at rank i. This is probably vastly inferior to the type
|
// Get document at rank i. This is probably vastly inferior to the type
|
||||||
// of interface in Xapian, but we have to start with something simple
|
// of interface in Xapian, but we have to start with something simple
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user