This commit is contained in:
dockes 2005-01-28 15:25:40 +00:00
parent 6d35f5430c
commit 82334f2957
7 changed files with 297 additions and 181 deletions

11
src/Makefile Normal file
View File

@ -0,0 +1,11 @@
all:
cd lib;make
cd index;make
cd qtgui;rm -f recoll;make
clean:
cd common;make clean
cd index;make clean
cd qtgui;rm -f recoll;make clean
cd query;make clean
cd utils;make clean

View File

@ -0,0 +1,23 @@
#ifndef _INDEXTEXT_H_INCLUDED_
#define _INDEXTEXT_H_INCLUDED_
/* @(#$Id: indextext.h,v 1.1 2005-01-28 15:25:39 dockes Exp $ (C) 2004 J.F.Dockes */
/* Note: this only exists to help with using myhtmlparse.cc */
// Minimize changes to myhtmlparse.cpp
#include "debuglog.h"
#include <string>
// lets hope that the charset includes ascii values...
static inline void
lowercase_term(std::string &term)
{
std::string::iterator i = term.begin();
while (i != term.end()) {
if (*i >= 'A' && *i <= 'Z')
*i = *i + 'a' - 'A';
i++;
}
}
#endif /* _INDEXTEXT_H_INCLUDED_ */

View File

@ -108,7 +108,6 @@ MyHtmlParser::opening_tag(const string &tag, const map<string,string> &p)
break;
case 'm':
if (tag == "meta") {
LOGDEB(("Found META\n"));
map<string, string>::const_iterator i, j;
if ((i = p.find("content")) != p.end()) {
if ((j = p.find("name")) != p.end()) {
@ -135,7 +134,6 @@ MyHtmlParser::opening_tag(const string &tag, const map<string,string> &p)
}
}
} else if ((j = p.find("http-equiv")) != p.end()) {
LOGDEB(("Found http-equiv\n"));
string hequiv = j->second;
lowercase_term(hequiv);
if (hequiv == "content-type") {

View File

@ -23,13 +23,13 @@
<property name="caption">
<string>recoll</string>
</property>
<hbox>
<vbox>
<property name="name">
<cstring>unnamed</cstring>
</property>
<widget class="QLayoutWidget">
<property name="name">
<cstring>layout3</cstring>
<cstring>layout10</cstring>
</property>
<vbox>
<property name="name">
@ -37,7 +37,7 @@
</property>
<widget class="QLayoutWidget">
<property name="name">
<cstring>layout2</cstring>
<cstring>layout8</cstring>
</property>
<hbox>
<property name="name">
@ -62,6 +62,22 @@
<string>Search</string>
</property>
</widget>
<widget class="QPushButton">
<property name="name">
<cstring>listPrevPb</cstring>
</property>
<property name="text">
<string>Previous page</string>
</property>
</widget>
<widget class="QPushButton">
<property name="name">
<cstring>listNextPB</cstring>
</property>
<property name="text">
<string>Next page</string>
</property>
</widget>
<spacer>
<property name="name">
<cstring>spacer1</cstring>
@ -74,7 +90,7 @@
</property>
<property name="sizeHint">
<size>
<width>329</width>
<width>346</width>
<height>20</height>
</size>
</property>
@ -83,21 +99,7 @@
</widget>
<widget class="QSplitter">
<property name="name">
<cstring>splitter9</cstring>
</property>
<property name="sizePolicy">
<sizepolicy>
<hsizetype>7</hsizetype>
<vsizetype>7</vsizetype>
<horstretch>1</horstretch>
<verstretch>1</verstretch>
</sizepolicy>
</property>
<property name="minimumSize">
<size>
<width>0</width>
<height>0</height>
</size>
<cstring>splitter6</cstring>
</property>
<property name="orientation">
<enum>Horizontal</enum>
@ -108,8 +110,8 @@
</property>
<property name="sizePolicy">
<sizepolicy>
<hsizetype>7</hsizetype>
<vsizetype>7</vsizetype>
<hsizetype>5</hsizetype>
<vsizetype>5</vsizetype>
<horstretch>2</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
@ -123,13 +125,13 @@
</widget>
<widget class="QSplitter">
<property name="name">
<cstring>splitter8</cstring>
<cstring>splitter5</cstring>
</property>
<property name="sizePolicy">
<sizepolicy>
<hsizetype>7</hsizetype>
<vsizetype>7</vsizetype>
<horstretch>3</horstretch>
<hsizetype>5</hsizetype>
<vsizetype>5</vsizetype>
<horstretch>5</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
@ -145,7 +147,7 @@
<hsizetype>7</hsizetype>
<vsizetype>7</vsizetype>
<horstretch>0</horstretch>
<verstretch>2</verstretch>
<verstretch>4</verstretch>
</sizepolicy>
</property>
<property name="textFormat">
@ -178,7 +180,7 @@
</widget>
</vbox>
</widget>
</hbox>
</vbox>
</widget>
<menubar>
<property name="name">
@ -190,10 +192,7 @@
<action name="fileExitAction"/>
</item>
<item text="&amp;Help" name="helpMenu">
<action name="helpContentsAction"/>
<action name="helpIndexAction"/>
<separator/>
<action name="helpAboutAction"/>
</item>
</menubar>
<toolbars>
@ -213,48 +212,6 @@
<string></string>
</property>
</action>
<action>
<property name="name">
<cstring>helpContentsAction</cstring>
</property>
<property name="text">
<string>Contents</string>
</property>
<property name="menuText">
<string>&amp;Contents...</string>
</property>
<property name="accel">
<string></string>
</property>
</action>
<action>
<property name="name">
<cstring>helpIndexAction</cstring>
</property>
<property name="text">
<string>Index</string>
</property>
<property name="menuText">
<string>&amp;Index...</string>
</property>
<property name="accel">
<string></string>
</property>
</action>
<action>
<property name="name">
<cstring>helpAboutAction</cstring>
</property>
<property name="text">
<string>About</string>
</property>
<property name="menuText">
<string>&amp;About</string>
</property>
<property name="accel">
<string></string>
</property>
</action>
</actions>
<connections>
<connection>
@ -263,24 +220,6 @@
<receiver>RecollMain</receiver>
<slot>fileExit()</slot>
</connection>
<connection>
<sender>helpIndexAction</sender>
<signal>activated()</signal>
<receiver>RecollMain</receiver>
<slot>helpIndex()</slot>
</connection>
<connection>
<sender>helpContentsAction</sender>
<signal>activated()</signal>
<receiver>RecollMain</receiver>
<slot>helpContents()</slot>
</connection>
<connection>
<sender>helpAboutAction</sender>
<signal>activated()</signal>
<receiver>RecollMain</receiver>
<slot>helpAbout()</slot>
</connection>
<connection>
<sender>resTextEdit</sender>
<signal>clicked(int,int)</signal>
@ -305,18 +244,33 @@
<receiver>RecollMain</receiver>
<slot>Search_clicked()</slot>
</connection>
<connection>
<sender>listPrevPb</sender>
<signal>clicked()</signal>
<receiver>RecollMain</receiver>
<slot>listPrevPB_clicked()</slot>
</connection>
<connection>
<sender>listNextPB</sender>
<signal>clicked()</signal>
<receiver>RecollMain</receiver>
<slot>listNextPB_clicked()</slot>
</connection>
</connections>
<includes>
<include location="local" impldecl="in implementation">recollmain.ui.h</include>
</includes>
<variables>
<variable>int reslist_current;</variable>
<variable>int reslist_winfirst;</variable>
</variables>
<slots>
<slot>fileExit()</slot>
<slot>helpIndex()</slot>
<slot>helpContents()</slot>
<slot>helpAbout()</slot>
<slot>resTextEdit_clicked( int par, int car )</slot>
<slot>queryText_returnPressed()</slot>
<slot>Search_clicked()</slot>
<slot>listPrevPB_clicked()</slot>
<slot>listNextPB_clicked()</slot>
</slots>
<pixmapinproject/>
<layoutdefaults spacing="6" margin="11"/>

View File

@ -16,22 +16,6 @@ void RecollMain::fileExit()
}
void RecollMain::helpIndex()
{
}
void RecollMain::helpContents()
{
}
void RecollMain::helpAbout()
{
}
#include <qmessagebox.h>
#include "rcldb.h"
@ -62,12 +46,27 @@ static string plaintorich(const string &in)
return out;
}
void RecollMain::resTextEdit_clicked( int par, int car )
// Click in the result list window: display preview for selected document,
// and highlight entry. The paragraph number is doc number in window + 1
void RecollMain::resTextEdit_clicked(int par, int car)
{
fprintf(stderr, "Clicked at paragraph %d, char %d\n", par, car);
LOGDEB(("RecollMain::resTextEdi_clicked: par %d, char %d\n", par, car));
if (reslist_winfirst == -1)
return;
Rcl::Doc doc;
doc.erase();
if (rcldb->getDoc(par, doc)) {
if (reslist_current != -1) {
QColor color("white");
resTextEdit->setParagraphBackgroundColor(reslist_current+1, color);
}
QColor color("lightblue");
resTextEdit->setParagraphBackgroundColor(par, color);
int reldocnum = par-1;
reslist_current = reldocnum;
previewTextEdit->clear();
if (rcldb->getDoc(reslist_winfirst + reldocnum, doc, 0)) {
// Go to the file system to retrieve / convert the document text
// for preview:
@ -86,7 +85,7 @@ void RecollMain::resTextEdit_clicked( int par, int car )
Rcl::Doc fdoc;
if (!fun(rclconfig, fn, doc.mimetype, fdoc)) {
QMessageBox::warning(0, "Recoll",
QString("Failed to convert document for preview!\n") +
QString("Failed to convert document for preview!\n") +
fn.c_str() + " mimetype " +
doc.mimetype.c_str());
return;
@ -108,46 +107,24 @@ void RecollMain::resTextEdit_clicked( int par, int car )
item->setColor("red");
item->setFontWeight(QFont::Bold);
#endif
QString str = QString::fromUtf8(rich.c_str(), rich.length());
QString str = QString::fromUtf8(rich.c_str(), rich.length());
previewTextEdit->setTextFormat(RichText);
previewTextEdit->setText(str);
}
}
#include "pathut.h"
void RecollMain::queryText_returnPressed()
{
LOGDEB(("RecollMain::queryText_returnPressed()\n"));
resTextEdit->clear();
previewTextEdit->clear();
reslist_current = -1;
reslist_winfirst = -1;
string rawq = queryText->text();
rcldb->setQuery(rawq);
Rcl::Doc doc;
// Insert results if any in result list window
QString result;
resTextEdit->append("<qt><head></head><body>");
for (int i = 0;; i++) {
doc.erase();
if (!rcldb->getDoc(i, doc))
break;
LOGDEB(("Url: %s\n", doc.url.c_str()));
LOGDEB(("Mimetype: \n", doc.mimetype.c_str()));
LOGDEB(("Mtime: \n", doc.mtime.c_str()));
LOGDEB(("Origcharset: \n", doc.origcharset.c_str()));
LOGDEB(("Title: \n", doc.title.c_str()));
LOGDEB(("Text: \n", doc.text.c_str()));
LOGDEB(("Keywords: \n", doc.keywords.c_str()));
LOGDEB(("Abstract: \n", doc.abstract.c_str()));
result = "<p>" + doc.url + "</p>";
resTextEdit->append(result);
}
resTextEdit->append("</body></qt>");
// Display preview for 1st doc in list
resTextEdit_clicked(0, 0);
listNextPB_clicked();
}
@ -155,3 +132,101 @@ void RecollMain::Search_clicked()
{
queryText_returnPressed();
}
static const int respagesize = 10;
void RecollMain::listPrevPB_clicked()
{
reslist_winfirst -= 2*respagesize;
listNextPB_clicked();
}
#ifndef MIN
#define MIN(A,B) ((A) < (B) ? (A) : (B))
#endif
void RecollMain::listNextPB_clicked()
{
LOGDEB(("listNextPB_clicked: winfirst %d\n", reslist_winfirst));
if (reslist_winfirst < 0)
reslist_winfirst = 0;
else
reslist_winfirst += respagesize;
// Insert results if any in result list window
bool gotone = false;
for (int i = 0; i < respagesize; i++) {
Rcl::Doc doc;
doc.erase();
int percent;
if (!rcldb->getDoc(reslist_winfirst + i, doc, &percent))
break;
int resCnt = rcldb->getResCnt();
int last = MIN(resCnt, reslist_winfirst+respagesize);
if (i == 0) {
resTextEdit->clear();
previewTextEdit->clear();
resTextEdit->append("<qt><head></head><body><p>");
char line[80];
sprintf(line, "<p><b>Displaying results %d-%d out of %d</b><br>",
reslist_winfirst+1, last, resCnt);
resTextEdit->append(line);
}
gotone = true;
LOGDEB1(("Url: %s\n", doc.url.c_str()));
LOGDEB1(("Mimetype: %s\n", doc.mimetype.c_str()));
LOGDEB1(("Mtime: %s\n", doc.mtime.c_str()));
LOGDEB1(("Origcharset: %s\n", doc.origcharset.c_str()));
LOGDEB1(("Title: %s\n", doc.title.c_str()));
LOGDEB1(("Text: %s\n", doc.text.c_str()));
LOGDEB1(("Keywords: %s\n", doc.keywords.c_str()));
LOGDEB1(("Abstract: %s\n", doc.abstract.c_str()));
// Result list display. Standard Omega includes:
// - title or simple file name or url
// - abstract and keywords
// - url
// - relevancy percentage + keywords matched
// - date de modification
// - langue
// - taille
char perbuf[10];
sprintf(perbuf, "%3d%%", percent);
if (doc.title.empty())
doc.title = path_getsimple(doc.url);
char datebuf[100];
datebuf[0] = 0;
if (!doc.mtime.empty()) {
time_t mtime = atol(doc.mtime.c_str());
struct tm *tm = localtime(&mtime);
strftime(datebuf, 99, "<i>Modified:</i> %F %T", tm);
}
string result = "<p>" +
string(perbuf) + " <b>" + doc.title + "</b><br>" +
(!doc.mtime.empty() ? string(datebuf) + "<br>" : string("")) +
(!doc.abstract.empty() ? doc.abstract + "<br>" : string("")) +
(!doc.keywords.empty() ? doc.keywords + "<br>" : string("")) +
"<i>" + doc.url + +"</i><br>" +
"</p>";
QString str = QString::fromUtf8(result.c_str(), result.length());
resTextEdit->append(str);
}
if (gotone) {
resTextEdit->append("</body></qt>");
resTextEdit->setCursorPosition(0,0);
resTextEdit->ensureCursorVisible();
// Display preview for 1st doc in list
resTextEdit_clicked(1, 0);
} else {
// Restore first in win parameter that we shouln't have incremented
reslist_winfirst -= respagesize;
if (reslist_winfirst < 0)
reslist_winfirst = 0;
}
}

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.11 2005-01-28 09:37:37 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.12 2005-01-28 15:25:40 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
#include <sys/stat.h>
@ -32,8 +32,14 @@ class Native {
// Querying
Xapian::Database db;
Xapian::Query query;
Native() : isopen(false), iswritable(false) {}
Xapian::Enquire *enquire;
Xapian::MSet mset;
Native() : isopen(false), iswritable(false), enquire(0) {
}
~Native() {
delete enquire;
}
};
Rcl::Db::Db()
@ -185,6 +191,7 @@ static bool splitCb(void *cdata, const std::string &term, int pos)
// Unaccent and lowercase data: use unac
// for accents, and do it by hand for upper / lower. Note lowercasing is
// only for ascii letters anyway, so it's just A-Z -> a-z
// Removing crlfs is so that we can use the text in the document data fields.
bool dumb_string(const string &in, string &out)
{
string inter;
@ -193,10 +200,14 @@ bool dumb_string(const string &in, string &out)
return false;
out.reserve(inter.length());
for (unsigned int i = 0; i < inter.length(); i++) {
if (inter[i] >= 'A' && inter[i] <= 'Z')
if (inter[i] >= 'A' && inter[i] <= 'Z') {
out += inter[i] + 'a' - 'A';
else
out += inter[i];
} else {
if (inter[i] == '\n' || inter[i] == '\r')
out += ' ';
else
out += inter[i];
}
}
return true;
}
@ -210,19 +221,6 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &doc)
Xapian::Document newdocument;
// Document data record. omindex has the following nl separated fields:
// - url
// - sample
// - caption (title limited to 100 chars)
// - mime type
string record = "url=file:/" + fn;
record += "\nmtime=" + doc.mtime;
record += "\nsample=";
record += "\ncaption=" + doc.title;
record += "\nmtype=" + doc.mimetype;
record += "\n";
newdocument.set_data(record);
wsData splitData(newdocument);
TextSplit splitter(splitCb, &splitData);
@ -260,6 +258,22 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &doc)
newdocument.add_term(pathterm);
const char *fnc = fn.c_str();
// Document data record. omindex has the following nl separated fields:
// - url
// - sample
// - caption (title limited to 100 chars)
// - mime type
string record = "url=file:/" + fn;
record += "\nmtype=" + doc.mimetype;
record += "\nmtime=" + doc.mtime;
record += "\norigcharset=" + doc.origcharset;
record += "\ncaption=" + doc.title;
record += "\nkeywords=" + doc.keywords;
record += "\nabstract=" + doc.abstract;
record += "\n";
LOGDEB(("Newdocument data: %s\n", record.c_str()));
newdocument.set_data(record);
// If this document has already been indexed, update the existing
// entry.
try {
@ -268,8 +282,8 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &doc)
#endif
ndb->wdb.replace_document(pathterm, newdocument);
#if 0
if (did < updated.size()) {
updated[did] = true;
if (did < ndb->updated.size()) {
ndb->updated[did] = true;
LOGDEB(("%s updated\n", fnc));
} else {
LOGDEB(("%s added\n", fnc));
@ -299,6 +313,9 @@ bool Rcl::Db::needUpdate(const string &filename, const struct stat *stp)
if (did == ndb->wdb.postlist_end(pathterm))
return true;
Xapian::Document doc = ndb->wdb.get_document(*did);
#if 0
ndb->updated[*did] = true;
#endif
string data = doc.get_data();
//cerr << "DOCUMENT EXISTS " << data << endl;
const char *cp = strstr(data.c_str(), "mtime=");
@ -332,6 +349,7 @@ static bool splitQCb(void *cdata, const std::string &term, int )
bool Rcl::Db::setQuery(const std::string &querystring)
{
LOGDEB(("Rcl::Db::setQuery: %s\n", querystring.c_str()));
wsQData splitData;
TextSplit splitter(splitQCb, &splitData);
@ -345,32 +363,64 @@ bool Rcl::Db::setQuery(const std::string &querystring)
ndb->query = Xapian::Query(Xapian::Query::OP_OR, splitData.terms.begin(),
splitData.terms.end());
delete ndb->enquire;
ndb->enquire = new Xapian::Enquire(ndb->db);
ndb->enquire->set_query(ndb->query);
ndb->mset = Xapian::MSet();
return true;
}
bool Rcl::Db::getDoc(int i, Doc &doc)
int Rcl::Db::getResCnt()
{
LOGDEB1(("Rcl::Db::getDoc: %d\n", i));
Native *ndb = (Native *)pdata;
if (!ndb || !ndb->enquire) {
LOGERR(("Rcl::Db::getResCnt: no query opened\n"));
return -1;
}
if (ndb->mset.size() <= 0)
return -1;
return ndb->mset.get_matches_lower_bound();
}
Xapian::Enquire enquire(ndb->db);
enquire.set_query(ndb->query);
Xapian::MSet matches = enquire.get_mset(i, 1);
LOGDEB1(("Rcl::Db::getDoc: Query '%s' Estimated results: %d\n",
ndb->query.get_description(), matches.get_matches_lower_bound()));
if (matches.empty())
bool Rcl::Db::getDoc(int i, Doc &doc, int *percent)
{
LOGDEB(("Rcl::Db::getDoc: %d\n", i));
Native *ndb = (Native *)pdata;
if (!ndb || !ndb->enquire) {
LOGERR(("Rcl::Db::getDoc: no query opened\n"));
return false;
}
Xapian::Document xdoc = matches.begin().get_document();
int first = ndb->mset.get_firstitem();
int last = first + ndb->mset.size() -1;
if (!(i >= first && i <= last)) {
LOGDEB1(("Fetching for first %d, count 10\n", i));
ndb->mset = ndb->enquire->get_mset(i, 10);
if (ndb->mset.empty())
return false;
first = ndb->mset.get_firstitem();
last = first + ndb->mset.size() -1;
}
LOGDEB1(("Rcl::Db::getDoc: Qry '%s' win [%d-%d] Estimated results: %d",
ndb->query.get_description().c_str(),
first, last,
ndb->mset.get_matches_lower_bound()));
Xapian::Document xdoc = ndb->mset[i-first].get_document();
if (percent)
*percent = ndb->mset.convert_to_percent(ndb->mset[i-first]);
// Parse xapian document's data and populate doc fields
string data = xdoc.get_data();
LOGDEB1(("Rcl::Db::getDoc: data: %s\n", data.c_str()));
ConfSimple parms(&data);
parms.get(string("url"), doc.url);
parms.get(string("mtype"), doc.mimetype);
parms.get(string("mtime"), doc.mtime);
parms.get(string("url"), doc.url);
parms.get(string("origcharset"), doc.origcharset);
parms.get(string("caption"), doc.title);
parms.get(string("keywords"), doc.keywords);
parms.get(string("abstract"), doc.abstract);
return true;
}

View File

@ -1,6 +1,6 @@
#ifndef _DB_H_INCLUDED_
#define _DB_H_INCLUDED_
/* @(#$Id: rcldb.h,v 1.5 2005-01-25 14:37:21 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: rcldb.h,v 1.6 2005-01-28 15:25:40 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string>
@ -26,23 +26,26 @@ namespace Rcl {
*/
class Doc {
public:
// This fields potentially go into the document data record
std::string url;
std::string mimetype;
std::string mtime; // Modification time as decimal ascii
std::string origcharset;
std::string title;
std::string text;
std::string keywords;
std::string abstract;
std::string text;
void erase() {
url.erase();
mimetype.erase();
mtime.erase();
origcharset.erase();
title.erase();
text.erase();
keywords.erase();
abstract.erase();
text.erase();
}
};
@ -71,7 +74,9 @@ class Db {
// Get document at rank i. This is probably vastly inferior to the type
// of interface in Xapian, but we have to start with something simple
// to experiment with the GUI
bool getDoc(int i, Doc &doc);
bool getDoc(int i, Doc &doc, int *percent = 0);
// Get results count
int getResCnt();
};