added code to specifically index/search file names

This commit is contained in:
dockes 2006-03-20 16:05:41 +00:00
parent f96fcd6dd3
commit d4852f3b0d
10 changed files with 259 additions and 145 deletions

View File

@ -1 +1 @@
1.2.3 1.3.1

View File

@ -1,5 +1,5 @@
#ifndef lint #ifndef lint
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.24 2006-01-26 07:02:06 dockes Exp $ (C) 2004 J.F.Dockes"; static char rcsid[] = "@(#$Id: indexer.cpp,v 1.25 2006-03-20 16:05:41 dockes Exp $ (C) 2004 J.F.Dockes";
#endif #endif
/* /*
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
@ -236,6 +236,11 @@ DbIndexer::processone(const std::string &fn, const struct stat *stp,
// Internal access path for multi-document files // Internal access path for multi-document files
doc.ipath = ipath; doc.ipath = ipath;
// File name transcoded to utf8 for indexation.
// We actually might want a separate param for the filename charset
string charset = config->getDefCharset();
// If this fails, the path won't be indexed, no big deal
transcode(fn, doc.utf8fn, charset, "UTF-8");
// Do database-specific work to update document data // Do database-specific work to update document data
if (!db.add(fn, doc, stp)) if (!db.add(fn, doc, stp))
return FsTreeWalker::FtwError; return FsTreeWalker::FtwError;

View File

@ -1,5 +1,5 @@
#ifndef lint #ifndef lint
static char rcsid[] = "@(#$Id: internfile.cpp,v 1.15 2006-01-23 13:32:28 dockes Exp $ (C) 2004 J.F.Dockes"; static char rcsid[] = "@(#$Id: internfile.cpp,v 1.16 2006-03-20 16:05:41 dockes Exp $ (C) 2004 J.F.Dockes";
#endif #endif
/* /*
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
@ -125,41 +125,42 @@ FileInterner::FileInterner(const std::string &f, RclConfig *cnf,
// for a compressed file. // for a compressed file.
m_mime = mimetype(m_fn, m_cfg, usfci); m_mime = mimetype(m_fn, m_cfg, usfci);
// If identification fails, try to use the input parameter. Note that this // If identification fails, try to use the input parameter. This
// is normally not a compressed type (it's the mime type from the db) // is then normally not a compressed type (it's the mime type from
// the db), and is only set when previewing, not for indexing
if (m_mime.empty() && imime) if (m_mime.empty() && imime)
m_mime = *imime; m_mime = *imime;
if (!m_mime.empty()) {
// Has mime: check for a compressed file. If so, create a
// temporary uncompressed file, and rerun the mime type
// identification, then do the rest with the temp file.
list<string>ucmd;
if (m_cfg->getUncompressor(m_mime, ucmd)) {
if (!uncompressfile(m_cfg, m_fn, ucmd, m_tdir, m_tfile)) {
return;
}
LOGDEB(("internfile: after ucomp: m_tdir %s, tfile %s\n",
m_tdir.c_str(), m_tfile.c_str()));
m_fn = m_tfile;
m_mime = mimetype(m_fn, m_cfg, usfci);
if (m_mime.empty() && imime)
m_mime = *imime;
}
}
if (m_mime.empty()) { if (m_mime.empty()) {
// No mime type: not listed in our map, or present in stop list // No mime type. We let it through as config may warrant that
LOGDEB(("FileInterner::FileInterner: (no mime) [%s]\n", m_fn.c_str())); // we index all file names
return; LOGDEB(("internfile: (no mime) [%s]\n", m_fn.c_str()));
} }
// First check for a compressed file. If so, create a temporary // Look for appropriate handler (might still return empty)
// uncompressed file, and rerun the mime type identification, then do the
// rest with the temp file.
list<string>ucmd;
if (m_cfg->getUncompressor(m_mime, ucmd)) {
if (!uncompressfile(m_cfg, m_fn, ucmd, m_tdir, m_tfile)) {
return;
}
LOGDEB(("internfile: after ucomp: m_tdir %s, tfile %s\n",
m_tdir.c_str(), m_tfile.c_str()));
m_fn = m_tfile;
m_mime = mimetype(m_fn, m_cfg, usfci);
if (m_mime.empty() && imime)
m_mime = *imime;
if (m_mime.empty()) {
// No mime type ?? pass on.
LOGDEB(("internfile: (no mime) [%s]\n", m_fn.c_str()));
return;
}
}
// Look for appropriate handler
m_handler = getMimeHandler(m_mime, m_cfg); m_handler = getMimeHandler(m_mime, m_cfg);
if (!m_handler) { if (!m_handler) {
// No handler for this type, for now :( // No handler for this type, for now :( if indexallfilenames
// is set in the config, this normally wont happen (we get mh_unknown)
LOGDEB(("FileInterner::FileInterner: %s: no handler\n", LOGDEB(("FileInterner::FileInterner: %s: no handler\n",
m_mime.c_str())); m_mime.c_str()));
return; return;

View File

@ -1,5 +1,5 @@
#ifndef lint #ifndef lint
static char rcsid[] = "@(#$Id: mimehandler.cpp,v 1.16 2006-01-23 13:32:28 dockes Exp $ (C) 2004 J.F.Dockes"; static char rcsid[] = "@(#$Id: mimehandler.cpp,v 1.17 2006-03-20 16:05:41 dockes Exp $ (C) 2004 J.F.Dockes";
#endif #endif
/* /*
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
@ -31,6 +31,7 @@ using namespace std;
#include "mh_mail.h" #include "mh_mail.h"
#include "mh_text.h" #include "mh_text.h"
#include "mh_exec.h" #include "mh_exec.h"
#include "mh_unknown.h"
/** Create internal handler object appropriate for given mime type */ /** Create internal handler object appropriate for given mime type */
static MimeHandler *mhFactory(const string &mime) static MimeHandler *mhFactory(const string &mime)
@ -52,35 +53,48 @@ static MimeHandler *mhFactory(const string &mime)
MimeHandler *getMimeHandler(const string &mtype, RclConfig *cfg) MimeHandler *getMimeHandler(const string &mtype, RclConfig *cfg)
{ {
// Get handler definition for mime type // Get handler definition for mime type
string hs = cfg->getMimeHandlerDef(mtype); string hs;
if (hs.empty()) if (!mtype.empty())
return 0; hs = cfg->getMimeHandlerDef(mtype);
// Break definition into type and name if (!hs.empty()) {
list<string> toks; // Break definition into type and name
stringToStrings(hs, toks); list<string> toks;
if (toks.empty()) { stringToStrings(hs, toks);
LOGERR(("getMimeHandler: bad mimeconf line for %s\n", mtype.c_str())); if (toks.empty()) {
return 0; LOGERR(("getMimeHandler: bad mimeconf line for %s\n",
} mtype.c_str()));
// Retrieve handler function according to type
if (!stringlowercmp("internal", toks.front())) {
return mhFactory(mtype);
} else if (!stringlowercmp("dll", toks.front())) {
return 0;
} else if (!stringlowercmp("exec", toks.front())) {
if (toks.size() < 2) {
LOGERR(("getMimeHandler: bad line for %s: %s\n", mtype.c_str(),
hs.c_str()));
return 0; return 0;
} }
MimeHandlerExec *h = new MimeHandlerExec;
list<string>::const_iterator it1 = toks.begin(); // Retrieve handler function according to type
it1++; if (!stringlowercmp("internal", toks.front())) {
for (;it1 != toks.end();it1++) return mhFactory(mtype);
h->params.push_back(*it1); } else if (!stringlowercmp("dll", toks.front())) {
return h; } else if (!stringlowercmp("exec", toks.front())) {
if (toks.size() < 2) {
LOGERR(("getMimeHandler: bad line for %s: %s\n",
mtype.c_str(), hs.c_str()));
return 0;
}
MimeHandlerExec *h = new MimeHandlerExec;
list<string>::const_iterator it1 = toks.begin();
it1++;
for (;it1 != toks.end();it1++)
h->params.push_back(*it1);
return h;
}
}
// We are supposed to get here if there was no specific error, but
// there is no identified mime type, or no handler
// associated. These files are either ignored or their name is
// indexed, depending on configuration
bool indexunknown = false;
cfg->getConfParam("indexallfilenames", &indexunknown);
if (indexunknown) {
return new MimeHandlerUnknown;
} else {
return 0;
} }
return 0;
} }

View File

@ -24,7 +24,7 @@
</property> </property>
<widget class="QLayoutWidget"> <widget class="QLayoutWidget">
<property name="name"> <property name="name">
<cstring>layout13</cstring> <cstring>layout12</cstring>
</property> </property>
<vbox> <vbox>
<property name="name"> <property name="name">
@ -32,18 +32,12 @@
</property> </property>
<widget class="QLayoutWidget"> <widget class="QLayoutWidget">
<property name="name"> <property name="name">
<cstring>layout15</cstring> <cstring>layout11</cstring>
</property> </property>
<hbox> <hbox>
<property name="name"> <property name="name">
<cstring>unnamed</cstring> <cstring>unnamed</cstring>
</property> </property>
<property name="margin">
<number>10</number>
</property>
<property name="spacing">
<number>10</number>
</property>
<widget class="QLabel"> <widget class="QLabel">
<property name="name"> <property name="name">
<cstring>textLabel2</cstring> <cstring>textLabel2</cstring>
@ -54,12 +48,54 @@
</widget> </widget>
<widget class="QLayoutWidget"> <widget class="QLayoutWidget">
<property name="name"> <property name="name">
<cstring>layout14</cstring> <cstring>layout10</cstring>
</property> </property>
<grid> <grid>
<property name="name"> <property name="name">
<cstring>unnamed</cstring> <cstring>unnamed</cstring>
</property> </property>
<widget class="QLabel" row="2" column="0" rowspan="1" colspan="2">
<property name="name">
<cstring>orWordsTL</cstring>
</property>
<property name="text">
<string>Any of these words</string>
</property>
</widget>
<widget class="QLineEdit" row="2" column="2">
<property name="name">
<cstring>orWordsLE</cstring>
</property>
</widget>
<widget class="QLabel" row="4" column="0">
<property name="name">
<cstring>textLabel1_2</cstring>
</property>
<property name="text">
<string>File name</string>
</property>
</widget>
<widget class="QLineEdit" row="4" column="2">
<property name="name">
<cstring>fileNameLE</cstring>
</property>
</widget>
<widget class="QLabel" row="3" column="0" rowspan="1" colspan="2">
<property name="name">
<cstring>noWordsTL</cstring>
</property>
<property name="text">
<string>None of these words</string>
</property>
</widget>
<widget class="QLineEdit" row="3" column="2">
<property name="name">
<cstring>noWordsLE</cstring>
</property>
<property name="text">
<string></string>
</property>
</widget>
<widget class="QLabel" row="0" column="0"> <widget class="QLabel" row="0" column="0">
<property name="name"> <property name="name">
<cstring>andWordsTL</cstring> <cstring>andWordsTL</cstring>
@ -74,7 +110,7 @@
<string>All these words</string> <string>All these words</string>
</property> </property>
</widget> </widget>
<widget class="QLineEdit" row="0" column="1" rowspan="1" colspan="3"> <widget class="QLineEdit" row="0" column="1" rowspan="1" colspan="2">
<property name="name"> <property name="name">
<cstring>andWordsLE</cstring> <cstring>andWordsLE</cstring>
</property> </property>
@ -93,40 +129,11 @@
<string>This exact phrase</string> <string>This exact phrase</string>
</property> </property>
</widget> </widget>
<widget class="QLineEdit" row="1" column="2" rowspan="1" colspan="2"> <widget class="QLineEdit" row="1" column="2">
<property name="name"> <property name="name">
<cstring>phraseLE</cstring> <cstring>phraseLE</cstring>
</property> </property>
</widget> </widget>
<widget class="QLabel" row="2" column="0" rowspan="1" colspan="2">
<property name="name">
<cstring>orWordsTL</cstring>
</property>
<property name="text">
<string>Any of these words</string>
</property>
</widget>
<widget class="QLineEdit" row="2" column="2" rowspan="1" colspan="2">
<property name="name">
<cstring>orWordsLE</cstring>
</property>
</widget>
<widget class="QLabel" row="3" column="0" rowspan="1" colspan="3">
<property name="name">
<cstring>noWordsTL</cstring>
</property>
<property name="text">
<string>None of these words</string>
</property>
</widget>
<widget class="QLineEdit" row="3" column="3">
<property name="name">
<cstring>noWordsLE</cstring>
</property>
<property name="text">
<string></string>
</property>
</widget>
</grid> </grid>
</widget> </widget>
</hbox> </hbox>
@ -353,20 +360,6 @@
</widget> </widget>
</grid> </grid>
</widget> </widget>
<widget class="Line">
<property name="name">
<cstring>line1</cstring>
</property>
<property name="frameShape">
<enum>HLine</enum>
</property>
<property name="frameShadow">
<enum>Sunken</enum>
</property>
<property name="orientation">
<enum>Horizontal</enum>
</property>
</widget>
<widget class="QLayoutWidget"> <widget class="QLayoutWidget">
<property name="name"> <property name="name">
<cstring>layout25</cstring> <cstring>layout25</cstring>
@ -398,6 +391,20 @@
</widget> </widget>
</vbox> </vbox>
</widget> </widget>
<widget class="Line">
<property name="name">
<cstring>line1</cstring>
</property>
<property name="frameShape">
<enum>HLine</enum>
</property>
<property name="frameShadow">
<enum>Sunken</enum>
</property>
<property name="orientation">
<enum>Horizontal</enum>
</property>
</widget>
</vbox> </vbox>
</widget> </widget>
<connections> <connections>

View File

@ -131,6 +131,7 @@ void advsearch::searchPB_clicked()
mydata.phrase = string((const char*)(phraseLE->text().utf8())); mydata.phrase = string((const char*)(phraseLE->text().utf8()));
mydata.orwords = string((const char*)(orWordsLE->text().utf8())); mydata.orwords = string((const char*)(orWordsLE->text().utf8()));
mydata.nowords = string((const char*)(noWordsLE->text().utf8())); mydata.nowords = string((const char*)(noWordsLE->text().utf8()));
mydata.filename = string((const char*)(fileNameLE->text().utf8()));
if (restrictFtCB->isOn() && noFiltypsLB->count() > 0) { if (restrictFtCB->isOn() && noFiltypsLB->count() > 0) {
for (unsigned int i = 0; i < yesFiltypsLB->count(); i++) { for (unsigned int i = 0; i < yesFiltypsLB->count(); i++) {
QCString ctext = yesFiltypsLB->item(i)->text().utf8(); QCString ctext = yesFiltypsLB->item(i)->text().utf8();

View File

@ -75,6 +75,23 @@
<string>If this is set, each returned document will contain all the terms in the query. Else documents will be ordered by relevance, but may not contain all the terms.</string> <string>If this is set, each returned document will contain all the terms in the query. Else documents will be ordered by relevance, but may not contain all the terms.</string>
</property> </property>
</widget> </widget>
<widget class="QCheckBox">
<property name="name">
<cstring>isFNameCB</cstring>
</property>
<property name="text">
<string>&amp;File name</string>
</property>
<property name="accel">
<string>Alt+F</string>
</property>
<property name="toolTip" stdset="0">
<string>Search is on file names only, and may use wildcards.</string>
</property>
<property name="whatsThis" stdset="0">
<string>If this is set, the search will only be performed on file names. Wildcards ? and * can be used and will be matched as in a shell command line.</string>
</property>
</widget>
<widget class="QLineEdit"> <widget class="QLineEdit">
<property name="name"> <property name="name">
<cstring>queryText</cstring> <cstring>queryText</cstring>

View File

@ -44,9 +44,11 @@ void SSearchBase::startSimpleSearch()
LOGDEB(("SSearchBase::startSimpleSearch\n")); LOGDEB(("SSearchBase::startSimpleSearch\n"));
Rcl::AdvSearchData sdata; Rcl::AdvSearchData sdata;
QCString u8 = queryText->text().utf8(); QCString u8 = queryText->text().utf8();
if (allTermsCB->isChecked())
if (isFNameCB->isChecked())
sdata.filename = u8;
else if (allTermsCB->isChecked())
sdata.allwords = u8; sdata.allwords = u8;
else else
sdata.orwords = u8; sdata.orwords = u8;

View File

@ -1,5 +1,5 @@
#ifndef lint #ifndef lint
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.57 2006-02-07 10:26:49 dockes Exp $ (C) 2004 J.F.Dockes"; static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.58 2006-03-20 16:05:41 dockes Exp $ (C) 2004 J.F.Dockes";
#endif #endif
/* /*
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
@ -20,6 +20,7 @@ static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.57 2006-02-07 10:26:49 dockes Exp $
#include <stdio.h> #include <stdio.h>
#include <unistd.h> #include <unistd.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <fnmatch.h>
#include <iostream> #include <iostream>
#include <string> #include <string>
@ -287,6 +288,7 @@ bool Rcl::dumb_string(const string &in, string &out)
if (!unacmaybefold(s1, out, "UTF-8", true)) { if (!unacmaybefold(s1, out, "UTF-8", true)) {
LOGERR(("dumb_string: unac failed for %s\n", in.c_str())); LOGERR(("dumb_string: unac failed for %s\n", in.c_str()));
out.erase(); out.erase();
// See comment at start of func
return true; return true;
} }
return true; return true;
@ -387,11 +389,9 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc,
// /////// Split and index terms in document body and auxiliary fields // /////// Split and index terms in document body and auxiliary fields
string noacc; string noacc;
// Split and index file name. This supposes that it's either ascii // Split and index file path. Do we really want to do this? Or do
// or utf-8. If this fails, we just go on. We need a config // it with the simple file name only ?
// parameter for file name charset. if (dumb_string(doc.utf8fn, noacc)) {
// Do we really want to fold case here ?
if (dumb_string(fn, noacc)) {
splitter.text_to_words(noacc); splitter.text_to_words(noacc);
splitData.basepos += splitData.curpos + 100; splitData.basepos += splitData.curpos + 100;
} }
@ -439,6 +439,14 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc,
string pathterm = "P" + hash; string pathterm = "P" + hash;
newdocument.add_term(pathterm); newdocument.add_term(pathterm);
// Simple file name. This is used for file name searches only. We index
// it with a term prefix
string sfn = path_getsimple(doc.utf8fn);
if (dumb_string(sfn, noacc) && !noacc.empty()) {
sfn = string("XSFN") + noacc;
newdocument.add_term(sfn);
}
// Internal path: with path, makes unique identifier for documents // Internal path: with path, makes unique identifier for documents
// inside multidocument files. // inside multidocument files.
string uniterm; string uniterm;
@ -992,7 +1000,7 @@ bool Rcl::Db::setQuery(const std::string &iqstring, QueryOpts opts,
Native *ndb = (Native *)pdata; Native *ndb = (Native *)pdata;
if (!ndb) if (!ndb)
return false; return false;
asdata.erase(); m_asdata.erase();
dbindices.clear(); dbindices.clear();
list<Xapian::Query> pqueries; list<Xapian::Query> pqueries;
stringToXapianQueries(iqstring, stemlang, ndb, pqueries, opts); stringToXapianQueries(iqstring, stemlang, ndb, pqueries, opts);
@ -1023,7 +1031,7 @@ bool Rcl::Db::setQuery(AdvSearchData &sdata, QueryOpts opts,
if (!sdata.topdir.empty()) if (!sdata.topdir.empty())
LOGDEB((" restricted to: %s\n", sdata.topdir.c_str())); LOGDEB((" restricted to: %s\n", sdata.topdir.c_str()));
asdata = sdata; m_asdata = sdata;
dbindices.clear(); dbindices.clear();
Native *ndb = (Native *)pdata; Native *ndb = (Native *)pdata;
@ -1031,12 +1039,62 @@ bool Rcl::Db::setQuery(AdvSearchData &sdata, QueryOpts opts,
return false; return false;
list<Xapian::Query> pqueries; list<Xapian::Query> pqueries;
Xapian::Query xq; Xapian::Query xq;
if (!sdata.filename.empty()) {
LOGDEB((" filename search\n"));
// File name search, with possible wildcards.
// We expand wildcards by scanning the filename terms (prefixed
// with XSFN) from the database.
// We build an OR query with the expanded values if any.
string pattern;
// We take the data either from allwords or orwords to avoid
// interaction with the allwords checkbox
dumb_string(sdata.filename, pattern);
// If pattern is not quoted, we add * at each end: match any
// substring
if (pattern[0] == '"' && pattern[pattern.size()-1] == '"')
pattern = pattern.substr(1, pattern.size() -2);
else
pattern = "*" + pattern + "*";
LOGDEB((" pattern: [%s]\n", pattern.c_str()));
// Match pattern against all file names in the db
Xapian::TermIterator it = ndb->db.allterms_begin();
it.skip_to("XSFN");
list<string> names;
for (;it != ndb->db.allterms_end(); it++) {
if ((*it).find("XSFN") != 0)
break;
string fn = (*it).substr(4);
LOGDEB2(("Matching [%s] and [%s]\n", pattern.c_str(), fn.c_str()));
if (fnmatch(pattern.c_str(), fn.c_str(), 0) != FNM_NOMATCH) {
names.push_back((*it).c_str());
}
// Limit the match count
if (names.size() > 1000) {
LOGERR(("Rcl::Db::SetQuery: too many matched file names\n"));
break;
}
}
if (names.empty()) {
// Build an impossible query: we know its impossible because we
// control the prefixes!
names.push_back("XIMPOSSIBLE");
}
// Build a query out of the matching file name terms.
xq = Xapian::Query(Xapian::Query::OP_OR, names.begin(), names.end());
}
if (!sdata.allwords.empty()) { if (!sdata.allwords.empty()) {
stringToXapianQueries(sdata.allwords, stemlang, ndb, pqueries, opts); stringToXapianQueries(sdata.allwords, stemlang, ndb, pqueries, opts);
if (!pqueries.empty()) { if (!pqueries.empty()) {
xq = Xapian::Query(Xapian::Query::OP_AND, pqueries.begin(), Xapian::Query nq =
pqueries.end()); Xapian::Query(Xapian::Query::OP_AND, pqueries.begin(),
pqueries.end());
xq = xq.empty() ? nq :
Xapian::Query(Xapian::Query::OP_AND, xq, nq);
pqueries.clear(); pqueries.clear();
} }
} }
@ -1044,8 +1102,8 @@ bool Rcl::Db::setQuery(AdvSearchData &sdata, QueryOpts opts,
if (!sdata.orwords.empty()) { if (!sdata.orwords.empty()) {
stringToXapianQueries(sdata.orwords, stemlang, ndb, pqueries, opts); stringToXapianQueries(sdata.orwords, stemlang, ndb, pqueries, opts);
if (!pqueries.empty()) { if (!pqueries.empty()) {
Xapian::Query nq; Xapian::Query nq =
nq = Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(), Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(),
pqueries.end()); pqueries.end());
xq = xq.empty() ? nq : xq = xq.empty() ? nq :
Xapian::Query(Xapian::Query::OP_AND_MAYBE, xq, nq); Xapian::Query(Xapian::Query::OP_AND_MAYBE, xq, nq);
@ -1157,7 +1215,7 @@ class Rcl::DbPops {
string url; string url;
parms.get(string("url"), url); parms.get(string("url"), url);
url = url.substr(7); url = url.substr(7);
if (url.find(rdb->asdata.topdir) == 0) if (url.find(rdb->m_asdata.topdir) == 0)
return true; return true;
return false; return false;
} }
@ -1215,8 +1273,8 @@ bool Rcl::Db::getDoc(int exti, Doc &doc, int *percent)
} }
// For now the only post-query filter is on dir subtree // For now the only post-query filter is on dir subtree
bool postqfilter = !asdata.topdir.empty(); bool postqfilter = !m_asdata.topdir.empty();
LOGDEB1(("Topdir %s postqflt %d\n", asdata.topdir.c_str(), postqfilter)); LOGDEB1(("Topdir %s postqflt %d\n", m_asdata.topdir.c_str(), postqfilter));
int xapi; int xapi;
if (postqfilter) { if (postqfilter) {

View File

@ -16,7 +16,7 @@
*/ */
#ifndef _DB_H_INCLUDED_ #ifndef _DB_H_INCLUDED_
#define _DB_H_INCLUDED_ #define _DB_H_INCLUDED_
/* @(#$Id: rcldb.h,v 1.25 2006-02-07 10:26:49 dockes Exp $ (C) 2004 J.F.Dockes */ /* @(#$Id: rcldb.h,v 1.26 2006-03-20 16:05:41 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string> #include <string>
#include <list> #include <list>
@ -52,26 +52,33 @@ namespace Rcl {
class Doc { class Doc {
public: public:
// These fields potentially go into the document data record // These fields potentially go into the document data record
string url; // We indicate the routine that sets them up during indexing
string ipath; string url; // Computed from fn by Db::add
string mimetype; string utf8fn; // Transcoded version of the file path.
// Set by DbIndexer::processone
string ipath; // Set by DbIndexer::processone
string mimetype; // Set by FileInterner::internfile
string fmtime; // File modification time as decimal ascii unix time string fmtime; // File modification time as decimal ascii unix time
// Set by DbIndexer::processone
string dmtime; // Data reference date (same format). Ie: mail date string dmtime; // Data reference date (same format). Ie: mail date
string origcharset; // Possibly set by handler
string title; string origcharset; // Charset we transcoded from (in case we want back)
string keywords; // Possibly set by handler
string abstract; string title; // Possibly set by handler
string fbytes; // File size string keywords; // Possibly set by handler
string dbytes; // Doc size string abstract; // Possibly set by handler
string fbytes; // File size. Set by Db::Add
string dbytes; // Doc size. Set by Db::Add from text length
// The following fields don't go to the db. text is only used when // The following fields don't go to the db record
// indexing
string text; string text; // text is split and indexed
int pc; // used by sortseq, convenience int pc; // used by sortseq, convenience
void erase() { void erase() {
url.erase(); url.erase();
utf8fn.erase();
ipath.erase(); ipath.erase();
mimetype.erase(); mimetype.erase();
fmtime.erase(); fmtime.erase();
@ -96,6 +103,7 @@ class AdvSearchData {
string phrase; string phrase;
string orwords; string orwords;
string nowords; string nowords;
string filename;
list<string> filetypes; // restrict to types. Empty if inactive list<string> filetypes; // restrict to types. Empty if inactive
string topdir; // restrict to subtree. Empty if inactive string topdir; // restrict to subtree. Empty if inactive
string description; // Printable expanded version of the complete query string description; // Printable expanded version of the complete query
@ -107,6 +115,7 @@ class AdvSearchData {
nowords.erase(); nowords.erase();
filetypes.clear(); filetypes.clear();
topdir.erase(); topdir.erase();
filename.erase();
description.erase(); description.erase();
} }
}; };
@ -167,7 +176,7 @@ class Db {
private: private:
AdvSearchData asdata; AdvSearchData m_asdata;
vector<int> dbindices; // In case there is a postq filter: sequence of vector<int> dbindices; // In case there is a postq filter: sequence of
// db indices that match // db indices that match
void *pdata; // Pointer to private data. We don't want db(ie void *pdata; // Pointer to private data. We don't want db(ie