added code to specifically index/search file names
This commit is contained in:
parent
f96fcd6dd3
commit
d4852f3b0d
@ -1 +1 @@
|
||||
1.2.3
|
||||
1.3.1
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.24 2006-01-26 07:02:06 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.25 2006-03-20 16:05:41 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -236,6 +236,11 @@ DbIndexer::processone(const std::string &fn, const struct stat *stp,
|
||||
// Internal access path for multi-document files
|
||||
doc.ipath = ipath;
|
||||
|
||||
// File name transcoded to utf8 for indexation.
|
||||
// We actually might want a separate param for the filename charset
|
||||
string charset = config->getDefCharset();
|
||||
// If this fails, the path won't be indexed, no big deal
|
||||
transcode(fn, doc.utf8fn, charset, "UTF-8");
|
||||
// Do database-specific work to update document data
|
||||
if (!db.add(fn, doc, stp))
|
||||
return FsTreeWalker::FtwError;
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: internfile.cpp,v 1.15 2006-01-23 13:32:28 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: internfile.cpp,v 1.16 2006-03-20 16:05:41 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -125,41 +125,42 @@ FileInterner::FileInterner(const std::string &f, RclConfig *cnf,
|
||||
// for a compressed file.
|
||||
m_mime = mimetype(m_fn, m_cfg, usfci);
|
||||
|
||||
// If identification fails, try to use the input parameter. Note that this
|
||||
// is normally not a compressed type (it's the mime type from the db)
|
||||
// If identification fails, try to use the input parameter. This
|
||||
// is then normally not a compressed type (it's the mime type from
|
||||
// the db), and is only set when previewing, not for indexing
|
||||
if (m_mime.empty() && imime)
|
||||
m_mime = *imime;
|
||||
|
||||
if (!m_mime.empty()) {
|
||||
// Has mime: check for a compressed file. If so, create a
|
||||
// temporary uncompressed file, and rerun the mime type
|
||||
// identification, then do the rest with the temp file.
|
||||
list<string>ucmd;
|
||||
if (m_cfg->getUncompressor(m_mime, ucmd)) {
|
||||
if (!uncompressfile(m_cfg, m_fn, ucmd, m_tdir, m_tfile)) {
|
||||
return;
|
||||
}
|
||||
LOGDEB(("internfile: after ucomp: m_tdir %s, tfile %s\n",
|
||||
m_tdir.c_str(), m_tfile.c_str()));
|
||||
m_fn = m_tfile;
|
||||
m_mime = mimetype(m_fn, m_cfg, usfci);
|
||||
if (m_mime.empty() && imime)
|
||||
m_mime = *imime;
|
||||
}
|
||||
}
|
||||
|
||||
if (m_mime.empty()) {
|
||||
// No mime type: not listed in our map, or present in stop list
|
||||
LOGDEB(("FileInterner::FileInterner: (no mime) [%s]\n", m_fn.c_str()));
|
||||
return;
|
||||
// No mime type. We let it through as config may warrant that
|
||||
// we index all file names
|
||||
LOGDEB(("internfile: (no mime) [%s]\n", m_fn.c_str()));
|
||||
}
|
||||
|
||||
// First check for a compressed file. If so, create a temporary
|
||||
// uncompressed file, and rerun the mime type identification, then do the
|
||||
// rest with the temp file.
|
||||
list<string>ucmd;
|
||||
if (m_cfg->getUncompressor(m_mime, ucmd)) {
|
||||
if (!uncompressfile(m_cfg, m_fn, ucmd, m_tdir, m_tfile)) {
|
||||
return;
|
||||
}
|
||||
LOGDEB(("internfile: after ucomp: m_tdir %s, tfile %s\n",
|
||||
m_tdir.c_str(), m_tfile.c_str()));
|
||||
m_fn = m_tfile;
|
||||
m_mime = mimetype(m_fn, m_cfg, usfci);
|
||||
if (m_mime.empty() && imime)
|
||||
m_mime = *imime;
|
||||
if (m_mime.empty()) {
|
||||
// No mime type ?? pass on.
|
||||
LOGDEB(("internfile: (no mime) [%s]\n", m_fn.c_str()));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Look for appropriate handler
|
||||
// Look for appropriate handler (might still return empty)
|
||||
m_handler = getMimeHandler(m_mime, m_cfg);
|
||||
|
||||
if (!m_handler) {
|
||||
// No handler for this type, for now :(
|
||||
// No handler for this type, for now :( if indexallfilenames
|
||||
// is set in the config, this normally wont happen (we get mh_unknown)
|
||||
LOGDEB(("FileInterner::FileInterner: %s: no handler\n",
|
||||
m_mime.c_str()));
|
||||
return;
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: mimehandler.cpp,v 1.16 2006-01-23 13:32:28 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: mimehandler.cpp,v 1.17 2006-03-20 16:05:41 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -31,6 +31,7 @@ using namespace std;
|
||||
#include "mh_mail.h"
|
||||
#include "mh_text.h"
|
||||
#include "mh_exec.h"
|
||||
#include "mh_unknown.h"
|
||||
|
||||
/** Create internal handler object appropriate for given mime type */
|
||||
static MimeHandler *mhFactory(const string &mime)
|
||||
@ -52,35 +53,48 @@ static MimeHandler *mhFactory(const string &mime)
|
||||
MimeHandler *getMimeHandler(const string &mtype, RclConfig *cfg)
|
||||
{
|
||||
// Get handler definition for mime type
|
||||
string hs = cfg->getMimeHandlerDef(mtype);
|
||||
if (hs.empty())
|
||||
return 0;
|
||||
string hs;
|
||||
if (!mtype.empty())
|
||||
hs = cfg->getMimeHandlerDef(mtype);
|
||||
|
||||
// Break definition into type and name
|
||||
list<string> toks;
|
||||
stringToStrings(hs, toks);
|
||||
if (toks.empty()) {
|
||||
LOGERR(("getMimeHandler: bad mimeconf line for %s\n", mtype.c_str()));
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Retrieve handler function according to type
|
||||
if (!stringlowercmp("internal", toks.front())) {
|
||||
return mhFactory(mtype);
|
||||
} else if (!stringlowercmp("dll", toks.front())) {
|
||||
return 0;
|
||||
} else if (!stringlowercmp("exec", toks.front())) {
|
||||
if (toks.size() < 2) {
|
||||
LOGERR(("getMimeHandler: bad line for %s: %s\n", mtype.c_str(),
|
||||
hs.c_str()));
|
||||
if (!hs.empty()) {
|
||||
// Break definition into type and name
|
||||
list<string> toks;
|
||||
stringToStrings(hs, toks);
|
||||
if (toks.empty()) {
|
||||
LOGERR(("getMimeHandler: bad mimeconf line for %s\n",
|
||||
mtype.c_str()));
|
||||
return 0;
|
||||
}
|
||||
MimeHandlerExec *h = new MimeHandlerExec;
|
||||
list<string>::const_iterator it1 = toks.begin();
|
||||
it1++;
|
||||
for (;it1 != toks.end();it1++)
|
||||
h->params.push_back(*it1);
|
||||
return h;
|
||||
|
||||
// Retrieve handler function according to type
|
||||
if (!stringlowercmp("internal", toks.front())) {
|
||||
return mhFactory(mtype);
|
||||
} else if (!stringlowercmp("dll", toks.front())) {
|
||||
} else if (!stringlowercmp("exec", toks.front())) {
|
||||
if (toks.size() < 2) {
|
||||
LOGERR(("getMimeHandler: bad line for %s: %s\n",
|
||||
mtype.c_str(), hs.c_str()));
|
||||
return 0;
|
||||
}
|
||||
MimeHandlerExec *h = new MimeHandlerExec;
|
||||
list<string>::const_iterator it1 = toks.begin();
|
||||
it1++;
|
||||
for (;it1 != toks.end();it1++)
|
||||
h->params.push_back(*it1);
|
||||
return h;
|
||||
}
|
||||
}
|
||||
|
||||
// We are supposed to get here if there was no specific error, but
|
||||
// there is no identified mime type, or no handler
|
||||
// associated. These files are either ignored or their name is
|
||||
// indexed, depending on configuration
|
||||
bool indexunknown = false;
|
||||
cfg->getConfParam("indexallfilenames", &indexunknown);
|
||||
if (indexunknown) {
|
||||
return new MimeHandlerUnknown;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
</property>
|
||||
<widget class="QLayoutWidget">
|
||||
<property name="name">
|
||||
<cstring>layout13</cstring>
|
||||
<cstring>layout12</cstring>
|
||||
</property>
|
||||
<vbox>
|
||||
<property name="name">
|
||||
@ -32,18 +32,12 @@
|
||||
</property>
|
||||
<widget class="QLayoutWidget">
|
||||
<property name="name">
|
||||
<cstring>layout15</cstring>
|
||||
<cstring>layout11</cstring>
|
||||
</property>
|
||||
<hbox>
|
||||
<property name="name">
|
||||
<cstring>unnamed</cstring>
|
||||
</property>
|
||||
<property name="margin">
|
||||
<number>10</number>
|
||||
</property>
|
||||
<property name="spacing">
|
||||
<number>10</number>
|
||||
</property>
|
||||
<widget class="QLabel">
|
||||
<property name="name">
|
||||
<cstring>textLabel2</cstring>
|
||||
@ -54,12 +48,54 @@
|
||||
</widget>
|
||||
<widget class="QLayoutWidget">
|
||||
<property name="name">
|
||||
<cstring>layout14</cstring>
|
||||
<cstring>layout10</cstring>
|
||||
</property>
|
||||
<grid>
|
||||
<property name="name">
|
||||
<cstring>unnamed</cstring>
|
||||
</property>
|
||||
<widget class="QLabel" row="2" column="0" rowspan="1" colspan="2">
|
||||
<property name="name">
|
||||
<cstring>orWordsTL</cstring>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>Any of these words</string>
|
||||
</property>
|
||||
</widget>
|
||||
<widget class="QLineEdit" row="2" column="2">
|
||||
<property name="name">
|
||||
<cstring>orWordsLE</cstring>
|
||||
</property>
|
||||
</widget>
|
||||
<widget class="QLabel" row="4" column="0">
|
||||
<property name="name">
|
||||
<cstring>textLabel1_2</cstring>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>File name</string>
|
||||
</property>
|
||||
</widget>
|
||||
<widget class="QLineEdit" row="4" column="2">
|
||||
<property name="name">
|
||||
<cstring>fileNameLE</cstring>
|
||||
</property>
|
||||
</widget>
|
||||
<widget class="QLabel" row="3" column="0" rowspan="1" colspan="2">
|
||||
<property name="name">
|
||||
<cstring>noWordsTL</cstring>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>None of these words</string>
|
||||
</property>
|
||||
</widget>
|
||||
<widget class="QLineEdit" row="3" column="2">
|
||||
<property name="name">
|
||||
<cstring>noWordsLE</cstring>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string></string>
|
||||
</property>
|
||||
</widget>
|
||||
<widget class="QLabel" row="0" column="0">
|
||||
<property name="name">
|
||||
<cstring>andWordsTL</cstring>
|
||||
@ -74,7 +110,7 @@
|
||||
<string>All these words</string>
|
||||
</property>
|
||||
</widget>
|
||||
<widget class="QLineEdit" row="0" column="1" rowspan="1" colspan="3">
|
||||
<widget class="QLineEdit" row="0" column="1" rowspan="1" colspan="2">
|
||||
<property name="name">
|
||||
<cstring>andWordsLE</cstring>
|
||||
</property>
|
||||
@ -93,40 +129,11 @@
|
||||
<string>This exact phrase</string>
|
||||
</property>
|
||||
</widget>
|
||||
<widget class="QLineEdit" row="1" column="2" rowspan="1" colspan="2">
|
||||
<widget class="QLineEdit" row="1" column="2">
|
||||
<property name="name">
|
||||
<cstring>phraseLE</cstring>
|
||||
</property>
|
||||
</widget>
|
||||
<widget class="QLabel" row="2" column="0" rowspan="1" colspan="2">
|
||||
<property name="name">
|
||||
<cstring>orWordsTL</cstring>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>Any of these words</string>
|
||||
</property>
|
||||
</widget>
|
||||
<widget class="QLineEdit" row="2" column="2" rowspan="1" colspan="2">
|
||||
<property name="name">
|
||||
<cstring>orWordsLE</cstring>
|
||||
</property>
|
||||
</widget>
|
||||
<widget class="QLabel" row="3" column="0" rowspan="1" colspan="3">
|
||||
<property name="name">
|
||||
<cstring>noWordsTL</cstring>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>None of these words</string>
|
||||
</property>
|
||||
</widget>
|
||||
<widget class="QLineEdit" row="3" column="3">
|
||||
<property name="name">
|
||||
<cstring>noWordsLE</cstring>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string></string>
|
||||
</property>
|
||||
</widget>
|
||||
</grid>
|
||||
</widget>
|
||||
</hbox>
|
||||
@ -353,20 +360,6 @@
|
||||
</widget>
|
||||
</grid>
|
||||
</widget>
|
||||
<widget class="Line">
|
||||
<property name="name">
|
||||
<cstring>line1</cstring>
|
||||
</property>
|
||||
<property name="frameShape">
|
||||
<enum>HLine</enum>
|
||||
</property>
|
||||
<property name="frameShadow">
|
||||
<enum>Sunken</enum>
|
||||
</property>
|
||||
<property name="orientation">
|
||||
<enum>Horizontal</enum>
|
||||
</property>
|
||||
</widget>
|
||||
<widget class="QLayoutWidget">
|
||||
<property name="name">
|
||||
<cstring>layout25</cstring>
|
||||
@ -398,6 +391,20 @@
|
||||
</widget>
|
||||
</vbox>
|
||||
</widget>
|
||||
<widget class="Line">
|
||||
<property name="name">
|
||||
<cstring>line1</cstring>
|
||||
</property>
|
||||
<property name="frameShape">
|
||||
<enum>HLine</enum>
|
||||
</property>
|
||||
<property name="frameShadow">
|
||||
<enum>Sunken</enum>
|
||||
</property>
|
||||
<property name="orientation">
|
||||
<enum>Horizontal</enum>
|
||||
</property>
|
||||
</widget>
|
||||
</vbox>
|
||||
</widget>
|
||||
<connections>
|
||||
|
||||
@ -131,6 +131,7 @@ void advsearch::searchPB_clicked()
|
||||
mydata.phrase = string((const char*)(phraseLE->text().utf8()));
|
||||
mydata.orwords = string((const char*)(orWordsLE->text().utf8()));
|
||||
mydata.nowords = string((const char*)(noWordsLE->text().utf8()));
|
||||
mydata.filename = string((const char*)(fileNameLE->text().utf8()));
|
||||
if (restrictFtCB->isOn() && noFiltypsLB->count() > 0) {
|
||||
for (unsigned int i = 0; i < yesFiltypsLB->count(); i++) {
|
||||
QCString ctext = yesFiltypsLB->item(i)->text().utf8();
|
||||
|
||||
@ -75,6 +75,23 @@
|
||||
<string>If this is set, each returned document will contain all the terms in the query. Else documents will be ordered by relevance, but may not contain all the terms.</string>
|
||||
</property>
|
||||
</widget>
|
||||
<widget class="QCheckBox">
|
||||
<property name="name">
|
||||
<cstring>isFNameCB</cstring>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>&File name</string>
|
||||
</property>
|
||||
<property name="accel">
|
||||
<string>Alt+F</string>
|
||||
</property>
|
||||
<property name="toolTip" stdset="0">
|
||||
<string>Search is on file names only, and may use wildcards.</string>
|
||||
</property>
|
||||
<property name="whatsThis" stdset="0">
|
||||
<string>If this is set, the search will only be performed on file names. Wildcards ? and * can be used and will be matched as in a shell command line.</string>
|
||||
</property>
|
||||
</widget>
|
||||
<widget class="QLineEdit">
|
||||
<property name="name">
|
||||
<cstring>queryText</cstring>
|
||||
|
||||
@ -44,9 +44,11 @@ void SSearchBase::startSimpleSearch()
|
||||
LOGDEB(("SSearchBase::startSimpleSearch\n"));
|
||||
|
||||
Rcl::AdvSearchData sdata;
|
||||
|
||||
QCString u8 = queryText->text().utf8();
|
||||
if (allTermsCB->isChecked())
|
||||
|
||||
if (isFNameCB->isChecked())
|
||||
sdata.filename = u8;
|
||||
else if (allTermsCB->isChecked())
|
||||
sdata.allwords = u8;
|
||||
else
|
||||
sdata.orwords = u8;
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.57 2006-02-07 10:26:49 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.58 2006-03-20 16:05:41 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -20,6 +20,7 @@ static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.57 2006-02-07 10:26:49 dockes Exp $
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fnmatch.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
@ -287,6 +288,7 @@ bool Rcl::dumb_string(const string &in, string &out)
|
||||
if (!unacmaybefold(s1, out, "UTF-8", true)) {
|
||||
LOGERR(("dumb_string: unac failed for %s\n", in.c_str()));
|
||||
out.erase();
|
||||
// See comment at start of func
|
||||
return true;
|
||||
}
|
||||
return true;
|
||||
@ -387,11 +389,9 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc,
|
||||
// /////// Split and index terms in document body and auxiliary fields
|
||||
string noacc;
|
||||
|
||||
// Split and index file name. This supposes that it's either ascii
|
||||
// or utf-8. If this fails, we just go on. We need a config
|
||||
// parameter for file name charset.
|
||||
// Do we really want to fold case here ?
|
||||
if (dumb_string(fn, noacc)) {
|
||||
// Split and index file path. Do we really want to do this? Or do
|
||||
// it with the simple file name only ?
|
||||
if (dumb_string(doc.utf8fn, noacc)) {
|
||||
splitter.text_to_words(noacc);
|
||||
splitData.basepos += splitData.curpos + 100;
|
||||
}
|
||||
@ -439,6 +439,14 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc,
|
||||
string pathterm = "P" + hash;
|
||||
newdocument.add_term(pathterm);
|
||||
|
||||
// Simple file name. This is used for file name searches only. We index
|
||||
// it with a term prefix
|
||||
string sfn = path_getsimple(doc.utf8fn);
|
||||
if (dumb_string(sfn, noacc) && !noacc.empty()) {
|
||||
sfn = string("XSFN") + noacc;
|
||||
newdocument.add_term(sfn);
|
||||
}
|
||||
|
||||
// Internal path: with path, makes unique identifier for documents
|
||||
// inside multidocument files.
|
||||
string uniterm;
|
||||
@ -992,7 +1000,7 @@ bool Rcl::Db::setQuery(const std::string &iqstring, QueryOpts opts,
|
||||
Native *ndb = (Native *)pdata;
|
||||
if (!ndb)
|
||||
return false;
|
||||
asdata.erase();
|
||||
m_asdata.erase();
|
||||
dbindices.clear();
|
||||
list<Xapian::Query> pqueries;
|
||||
stringToXapianQueries(iqstring, stemlang, ndb, pqueries, opts);
|
||||
@ -1023,7 +1031,7 @@ bool Rcl::Db::setQuery(AdvSearchData &sdata, QueryOpts opts,
|
||||
if (!sdata.topdir.empty())
|
||||
LOGDEB((" restricted to: %s\n", sdata.topdir.c_str()));
|
||||
|
||||
asdata = sdata;
|
||||
m_asdata = sdata;
|
||||
dbindices.clear();
|
||||
|
||||
Native *ndb = (Native *)pdata;
|
||||
@ -1031,12 +1039,62 @@ bool Rcl::Db::setQuery(AdvSearchData &sdata, QueryOpts opts,
|
||||
return false;
|
||||
list<Xapian::Query> pqueries;
|
||||
Xapian::Query xq;
|
||||
|
||||
|
||||
if (!sdata.filename.empty()) {
|
||||
LOGDEB((" filename search\n"));
|
||||
// File name search, with possible wildcards.
|
||||
// We expand wildcards by scanning the filename terms (prefixed
|
||||
// with XSFN) from the database.
|
||||
// We build an OR query with the expanded values if any.
|
||||
string pattern;
|
||||
// We take the data either from allwords or orwords to avoid
|
||||
// interaction with the allwords checkbox
|
||||
dumb_string(sdata.filename, pattern);
|
||||
|
||||
// If pattern is not quoted, we add * at each end: match any
|
||||
// substring
|
||||
if (pattern[0] == '"' && pattern[pattern.size()-1] == '"')
|
||||
pattern = pattern.substr(1, pattern.size() -2);
|
||||
else
|
||||
pattern = "*" + pattern + "*";
|
||||
|
||||
LOGDEB((" pattern: [%s]\n", pattern.c_str()));
|
||||
|
||||
// Match pattern against all file names in the db
|
||||
Xapian::TermIterator it = ndb->db.allterms_begin();
|
||||
it.skip_to("XSFN");
|
||||
list<string> names;
|
||||
for (;it != ndb->db.allterms_end(); it++) {
|
||||
if ((*it).find("XSFN") != 0)
|
||||
break;
|
||||
string fn = (*it).substr(4);
|
||||
LOGDEB2(("Matching [%s] and [%s]\n", pattern.c_str(), fn.c_str()));
|
||||
if (fnmatch(pattern.c_str(), fn.c_str(), 0) != FNM_NOMATCH) {
|
||||
names.push_back((*it).c_str());
|
||||
}
|
||||
// Limit the match count
|
||||
if (names.size() > 1000) {
|
||||
LOGERR(("Rcl::Db::SetQuery: too many matched file names\n"));
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (names.empty()) {
|
||||
// Build an impossible query: we know its impossible because we
|
||||
// control the prefixes!
|
||||
names.push_back("XIMPOSSIBLE");
|
||||
}
|
||||
// Build a query out of the matching file name terms.
|
||||
xq = Xapian::Query(Xapian::Query::OP_OR, names.begin(), names.end());
|
||||
}
|
||||
|
||||
if (!sdata.allwords.empty()) {
|
||||
stringToXapianQueries(sdata.allwords, stemlang, ndb, pqueries, opts);
|
||||
if (!pqueries.empty()) {
|
||||
xq = Xapian::Query(Xapian::Query::OP_AND, pqueries.begin(),
|
||||
pqueries.end());
|
||||
Xapian::Query nq =
|
||||
Xapian::Query(Xapian::Query::OP_AND, pqueries.begin(),
|
||||
pqueries.end());
|
||||
xq = xq.empty() ? nq :
|
||||
Xapian::Query(Xapian::Query::OP_AND, xq, nq);
|
||||
pqueries.clear();
|
||||
}
|
||||
}
|
||||
@ -1044,8 +1102,8 @@ bool Rcl::Db::setQuery(AdvSearchData &sdata, QueryOpts opts,
|
||||
if (!sdata.orwords.empty()) {
|
||||
stringToXapianQueries(sdata.orwords, stemlang, ndb, pqueries, opts);
|
||||
if (!pqueries.empty()) {
|
||||
Xapian::Query nq;
|
||||
nq = Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(),
|
||||
Xapian::Query nq =
|
||||
Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(),
|
||||
pqueries.end());
|
||||
xq = xq.empty() ? nq :
|
||||
Xapian::Query(Xapian::Query::OP_AND_MAYBE, xq, nq);
|
||||
@ -1157,7 +1215,7 @@ class Rcl::DbPops {
|
||||
string url;
|
||||
parms.get(string("url"), url);
|
||||
url = url.substr(7);
|
||||
if (url.find(rdb->asdata.topdir) == 0)
|
||||
if (url.find(rdb->m_asdata.topdir) == 0)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
@ -1215,8 +1273,8 @@ bool Rcl::Db::getDoc(int exti, Doc &doc, int *percent)
|
||||
}
|
||||
|
||||
// For now the only post-query filter is on dir subtree
|
||||
bool postqfilter = !asdata.topdir.empty();
|
||||
LOGDEB1(("Topdir %s postqflt %d\n", asdata.topdir.c_str(), postqfilter));
|
||||
bool postqfilter = !m_asdata.topdir.empty();
|
||||
LOGDEB1(("Topdir %s postqflt %d\n", m_asdata.topdir.c_str(), postqfilter));
|
||||
|
||||
int xapi;
|
||||
if (postqfilter) {
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
*/
|
||||
#ifndef _DB_H_INCLUDED_
|
||||
#define _DB_H_INCLUDED_
|
||||
/* @(#$Id: rcldb.h,v 1.25 2006-02-07 10:26:49 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: rcldb.h,v 1.26 2006-03-20 16:05:41 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include <string>
|
||||
#include <list>
|
||||
@ -52,26 +52,33 @@ namespace Rcl {
|
||||
class Doc {
|
||||
public:
|
||||
// These fields potentially go into the document data record
|
||||
string url;
|
||||
string ipath;
|
||||
string mimetype;
|
||||
// We indicate the routine that sets them up during indexing
|
||||
string url; // Computed from fn by Db::add
|
||||
string utf8fn; // Transcoded version of the file path.
|
||||
// Set by DbIndexer::processone
|
||||
string ipath; // Set by DbIndexer::processone
|
||||
string mimetype; // Set by FileInterner::internfile
|
||||
string fmtime; // File modification time as decimal ascii unix time
|
||||
// Set by DbIndexer::processone
|
||||
string dmtime; // Data reference date (same format). Ie: mail date
|
||||
string origcharset;
|
||||
string title;
|
||||
string keywords;
|
||||
string abstract;
|
||||
string fbytes; // File size
|
||||
string dbytes; // Doc size
|
||||
// Possibly set by handler
|
||||
string origcharset; // Charset we transcoded from (in case we want back)
|
||||
// Possibly set by handler
|
||||
string title; // Possibly set by handler
|
||||
string keywords; // Possibly set by handler
|
||||
string abstract; // Possibly set by handler
|
||||
string fbytes; // File size. Set by Db::Add
|
||||
string dbytes; // Doc size. Set by Db::Add from text length
|
||||
|
||||
// The following fields don't go to the db. text is only used when
|
||||
// indexing
|
||||
string text;
|
||||
// The following fields don't go to the db record
|
||||
|
||||
string text; // text is split and indexed
|
||||
|
||||
int pc; // used by sortseq, convenience
|
||||
|
||||
void erase() {
|
||||
url.erase();
|
||||
utf8fn.erase();
|
||||
ipath.erase();
|
||||
mimetype.erase();
|
||||
fmtime.erase();
|
||||
@ -96,6 +103,7 @@ class AdvSearchData {
|
||||
string phrase;
|
||||
string orwords;
|
||||
string nowords;
|
||||
string filename;
|
||||
list<string> filetypes; // restrict to types. Empty if inactive
|
||||
string topdir; // restrict to subtree. Empty if inactive
|
||||
string description; // Printable expanded version of the complete query
|
||||
@ -107,6 +115,7 @@ class AdvSearchData {
|
||||
nowords.erase();
|
||||
filetypes.clear();
|
||||
topdir.erase();
|
||||
filename.erase();
|
||||
description.erase();
|
||||
}
|
||||
};
|
||||
@ -167,7 +176,7 @@ class Db {
|
||||
|
||||
private:
|
||||
|
||||
AdvSearchData asdata;
|
||||
AdvSearchData m_asdata;
|
||||
vector<int> dbindices; // In case there is a postq filter: sequence of
|
||||
// db indices that match
|
||||
void *pdata; // Pointer to private data. We don't want db(ie
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user