added code to specifically index/search file names
This commit is contained in:
parent
f96fcd6dd3
commit
d4852f3b0d
@ -1 +1 @@
|
|||||||
1.2.3
|
1.3.1
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.24 2006-01-26 07:02:06 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.25 2006-03-20 16:05:41 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -236,6 +236,11 @@ DbIndexer::processone(const std::string &fn, const struct stat *stp,
|
|||||||
// Internal access path for multi-document files
|
// Internal access path for multi-document files
|
||||||
doc.ipath = ipath;
|
doc.ipath = ipath;
|
||||||
|
|
||||||
|
// File name transcoded to utf8 for indexation.
|
||||||
|
// We actually might want a separate param for the filename charset
|
||||||
|
string charset = config->getDefCharset();
|
||||||
|
// If this fails, the path won't be indexed, no big deal
|
||||||
|
transcode(fn, doc.utf8fn, charset, "UTF-8");
|
||||||
// Do database-specific work to update document data
|
// Do database-specific work to update document data
|
||||||
if (!db.add(fn, doc, stp))
|
if (!db.add(fn, doc, stp))
|
||||||
return FsTreeWalker::FtwError;
|
return FsTreeWalker::FtwError;
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: internfile.cpp,v 1.15 2006-01-23 13:32:28 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: internfile.cpp,v 1.16 2006-03-20 16:05:41 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -125,41 +125,42 @@ FileInterner::FileInterner(const std::string &f, RclConfig *cnf,
|
|||||||
// for a compressed file.
|
// for a compressed file.
|
||||||
m_mime = mimetype(m_fn, m_cfg, usfci);
|
m_mime = mimetype(m_fn, m_cfg, usfci);
|
||||||
|
|
||||||
// If identification fails, try to use the input parameter. Note that this
|
// If identification fails, try to use the input parameter. This
|
||||||
// is normally not a compressed type (it's the mime type from the db)
|
// is then normally not a compressed type (it's the mime type from
|
||||||
|
// the db), and is only set when previewing, not for indexing
|
||||||
if (m_mime.empty() && imime)
|
if (m_mime.empty() && imime)
|
||||||
m_mime = *imime;
|
m_mime = *imime;
|
||||||
|
|
||||||
|
if (!m_mime.empty()) {
|
||||||
|
// Has mime: check for a compressed file. If so, create a
|
||||||
|
// temporary uncompressed file, and rerun the mime type
|
||||||
|
// identification, then do the rest with the temp file.
|
||||||
|
list<string>ucmd;
|
||||||
|
if (m_cfg->getUncompressor(m_mime, ucmd)) {
|
||||||
|
if (!uncompressfile(m_cfg, m_fn, ucmd, m_tdir, m_tfile)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
LOGDEB(("internfile: after ucomp: m_tdir %s, tfile %s\n",
|
||||||
|
m_tdir.c_str(), m_tfile.c_str()));
|
||||||
|
m_fn = m_tfile;
|
||||||
|
m_mime = mimetype(m_fn, m_cfg, usfci);
|
||||||
|
if (m_mime.empty() && imime)
|
||||||
|
m_mime = *imime;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (m_mime.empty()) {
|
if (m_mime.empty()) {
|
||||||
// No mime type: not listed in our map, or present in stop list
|
// No mime type. We let it through as config may warrant that
|
||||||
LOGDEB(("FileInterner::FileInterner: (no mime) [%s]\n", m_fn.c_str()));
|
// we index all file names
|
||||||
return;
|
LOGDEB(("internfile: (no mime) [%s]\n", m_fn.c_str()));
|
||||||
}
|
}
|
||||||
|
|
||||||
// First check for a compressed file. If so, create a temporary
|
// Look for appropriate handler (might still return empty)
|
||||||
// uncompressed file, and rerun the mime type identification, then do the
|
|
||||||
// rest with the temp file.
|
|
||||||
list<string>ucmd;
|
|
||||||
if (m_cfg->getUncompressor(m_mime, ucmd)) {
|
|
||||||
if (!uncompressfile(m_cfg, m_fn, ucmd, m_tdir, m_tfile)) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
LOGDEB(("internfile: after ucomp: m_tdir %s, tfile %s\n",
|
|
||||||
m_tdir.c_str(), m_tfile.c_str()));
|
|
||||||
m_fn = m_tfile;
|
|
||||||
m_mime = mimetype(m_fn, m_cfg, usfci);
|
|
||||||
if (m_mime.empty() && imime)
|
|
||||||
m_mime = *imime;
|
|
||||||
if (m_mime.empty()) {
|
|
||||||
// No mime type ?? pass on.
|
|
||||||
LOGDEB(("internfile: (no mime) [%s]\n", m_fn.c_str()));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Look for appropriate handler
|
|
||||||
m_handler = getMimeHandler(m_mime, m_cfg);
|
m_handler = getMimeHandler(m_mime, m_cfg);
|
||||||
|
|
||||||
if (!m_handler) {
|
if (!m_handler) {
|
||||||
// No handler for this type, for now :(
|
// No handler for this type, for now :( if indexallfilenames
|
||||||
|
// is set in the config, this normally wont happen (we get mh_unknown)
|
||||||
LOGDEB(("FileInterner::FileInterner: %s: no handler\n",
|
LOGDEB(("FileInterner::FileInterner: %s: no handler\n",
|
||||||
m_mime.c_str()));
|
m_mime.c_str()));
|
||||||
return;
|
return;
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: mimehandler.cpp,v 1.16 2006-01-23 13:32:28 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: mimehandler.cpp,v 1.17 2006-03-20 16:05:41 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -31,6 +31,7 @@ using namespace std;
|
|||||||
#include "mh_mail.h"
|
#include "mh_mail.h"
|
||||||
#include "mh_text.h"
|
#include "mh_text.h"
|
||||||
#include "mh_exec.h"
|
#include "mh_exec.h"
|
||||||
|
#include "mh_unknown.h"
|
||||||
|
|
||||||
/** Create internal handler object appropriate for given mime type */
|
/** Create internal handler object appropriate for given mime type */
|
||||||
static MimeHandler *mhFactory(const string &mime)
|
static MimeHandler *mhFactory(const string &mime)
|
||||||
@ -52,35 +53,48 @@ static MimeHandler *mhFactory(const string &mime)
|
|||||||
MimeHandler *getMimeHandler(const string &mtype, RclConfig *cfg)
|
MimeHandler *getMimeHandler(const string &mtype, RclConfig *cfg)
|
||||||
{
|
{
|
||||||
// Get handler definition for mime type
|
// Get handler definition for mime type
|
||||||
string hs = cfg->getMimeHandlerDef(mtype);
|
string hs;
|
||||||
if (hs.empty())
|
if (!mtype.empty())
|
||||||
return 0;
|
hs = cfg->getMimeHandlerDef(mtype);
|
||||||
|
|
||||||
// Break definition into type and name
|
if (!hs.empty()) {
|
||||||
list<string> toks;
|
// Break definition into type and name
|
||||||
stringToStrings(hs, toks);
|
list<string> toks;
|
||||||
if (toks.empty()) {
|
stringToStrings(hs, toks);
|
||||||
LOGERR(("getMimeHandler: bad mimeconf line for %s\n", mtype.c_str()));
|
if (toks.empty()) {
|
||||||
return 0;
|
LOGERR(("getMimeHandler: bad mimeconf line for %s\n",
|
||||||
}
|
mtype.c_str()));
|
||||||
|
|
||||||
// Retrieve handler function according to type
|
|
||||||
if (!stringlowercmp("internal", toks.front())) {
|
|
||||||
return mhFactory(mtype);
|
|
||||||
} else if (!stringlowercmp("dll", toks.front())) {
|
|
||||||
return 0;
|
|
||||||
} else if (!stringlowercmp("exec", toks.front())) {
|
|
||||||
if (toks.size() < 2) {
|
|
||||||
LOGERR(("getMimeHandler: bad line for %s: %s\n", mtype.c_str(),
|
|
||||||
hs.c_str()));
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
MimeHandlerExec *h = new MimeHandlerExec;
|
|
||||||
list<string>::const_iterator it1 = toks.begin();
|
// Retrieve handler function according to type
|
||||||
it1++;
|
if (!stringlowercmp("internal", toks.front())) {
|
||||||
for (;it1 != toks.end();it1++)
|
return mhFactory(mtype);
|
||||||
h->params.push_back(*it1);
|
} else if (!stringlowercmp("dll", toks.front())) {
|
||||||
return h;
|
} else if (!stringlowercmp("exec", toks.front())) {
|
||||||
|
if (toks.size() < 2) {
|
||||||
|
LOGERR(("getMimeHandler: bad line for %s: %s\n",
|
||||||
|
mtype.c_str(), hs.c_str()));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
MimeHandlerExec *h = new MimeHandlerExec;
|
||||||
|
list<string>::const_iterator it1 = toks.begin();
|
||||||
|
it1++;
|
||||||
|
for (;it1 != toks.end();it1++)
|
||||||
|
h->params.push_back(*it1);
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We are supposed to get here if there was no specific error, but
|
||||||
|
// there is no identified mime type, or no handler
|
||||||
|
// associated. These files are either ignored or their name is
|
||||||
|
// indexed, depending on configuration
|
||||||
|
bool indexunknown = false;
|
||||||
|
cfg->getConfParam("indexallfilenames", &indexunknown);
|
||||||
|
if (indexunknown) {
|
||||||
|
return new MimeHandlerUnknown;
|
||||||
|
} else {
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -24,7 +24,7 @@
|
|||||||
</property>
|
</property>
|
||||||
<widget class="QLayoutWidget">
|
<widget class="QLayoutWidget">
|
||||||
<property name="name">
|
<property name="name">
|
||||||
<cstring>layout13</cstring>
|
<cstring>layout12</cstring>
|
||||||
</property>
|
</property>
|
||||||
<vbox>
|
<vbox>
|
||||||
<property name="name">
|
<property name="name">
|
||||||
@ -32,18 +32,12 @@
|
|||||||
</property>
|
</property>
|
||||||
<widget class="QLayoutWidget">
|
<widget class="QLayoutWidget">
|
||||||
<property name="name">
|
<property name="name">
|
||||||
<cstring>layout15</cstring>
|
<cstring>layout11</cstring>
|
||||||
</property>
|
</property>
|
||||||
<hbox>
|
<hbox>
|
||||||
<property name="name">
|
<property name="name">
|
||||||
<cstring>unnamed</cstring>
|
<cstring>unnamed</cstring>
|
||||||
</property>
|
</property>
|
||||||
<property name="margin">
|
|
||||||
<number>10</number>
|
|
||||||
</property>
|
|
||||||
<property name="spacing">
|
|
||||||
<number>10</number>
|
|
||||||
</property>
|
|
||||||
<widget class="QLabel">
|
<widget class="QLabel">
|
||||||
<property name="name">
|
<property name="name">
|
||||||
<cstring>textLabel2</cstring>
|
<cstring>textLabel2</cstring>
|
||||||
@ -54,12 +48,54 @@
|
|||||||
</widget>
|
</widget>
|
||||||
<widget class="QLayoutWidget">
|
<widget class="QLayoutWidget">
|
||||||
<property name="name">
|
<property name="name">
|
||||||
<cstring>layout14</cstring>
|
<cstring>layout10</cstring>
|
||||||
</property>
|
</property>
|
||||||
<grid>
|
<grid>
|
||||||
<property name="name">
|
<property name="name">
|
||||||
<cstring>unnamed</cstring>
|
<cstring>unnamed</cstring>
|
||||||
</property>
|
</property>
|
||||||
|
<widget class="QLabel" row="2" column="0" rowspan="1" colspan="2">
|
||||||
|
<property name="name">
|
||||||
|
<cstring>orWordsTL</cstring>
|
||||||
|
</property>
|
||||||
|
<property name="text">
|
||||||
|
<string>Any of these words</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
<widget class="QLineEdit" row="2" column="2">
|
||||||
|
<property name="name">
|
||||||
|
<cstring>orWordsLE</cstring>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
<widget class="QLabel" row="4" column="0">
|
||||||
|
<property name="name">
|
||||||
|
<cstring>textLabel1_2</cstring>
|
||||||
|
</property>
|
||||||
|
<property name="text">
|
||||||
|
<string>File name</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
<widget class="QLineEdit" row="4" column="2">
|
||||||
|
<property name="name">
|
||||||
|
<cstring>fileNameLE</cstring>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
<widget class="QLabel" row="3" column="0" rowspan="1" colspan="2">
|
||||||
|
<property name="name">
|
||||||
|
<cstring>noWordsTL</cstring>
|
||||||
|
</property>
|
||||||
|
<property name="text">
|
||||||
|
<string>None of these words</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
<widget class="QLineEdit" row="3" column="2">
|
||||||
|
<property name="name">
|
||||||
|
<cstring>noWordsLE</cstring>
|
||||||
|
</property>
|
||||||
|
<property name="text">
|
||||||
|
<string></string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
<widget class="QLabel" row="0" column="0">
|
<widget class="QLabel" row="0" column="0">
|
||||||
<property name="name">
|
<property name="name">
|
||||||
<cstring>andWordsTL</cstring>
|
<cstring>andWordsTL</cstring>
|
||||||
@ -74,7 +110,7 @@
|
|||||||
<string>All these words</string>
|
<string>All these words</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
<widget class="QLineEdit" row="0" column="1" rowspan="1" colspan="3">
|
<widget class="QLineEdit" row="0" column="1" rowspan="1" colspan="2">
|
||||||
<property name="name">
|
<property name="name">
|
||||||
<cstring>andWordsLE</cstring>
|
<cstring>andWordsLE</cstring>
|
||||||
</property>
|
</property>
|
||||||
@ -93,40 +129,11 @@
|
|||||||
<string>This exact phrase</string>
|
<string>This exact phrase</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
<widget class="QLineEdit" row="1" column="2" rowspan="1" colspan="2">
|
<widget class="QLineEdit" row="1" column="2">
|
||||||
<property name="name">
|
<property name="name">
|
||||||
<cstring>phraseLE</cstring>
|
<cstring>phraseLE</cstring>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
<widget class="QLabel" row="2" column="0" rowspan="1" colspan="2">
|
|
||||||
<property name="name">
|
|
||||||
<cstring>orWordsTL</cstring>
|
|
||||||
</property>
|
|
||||||
<property name="text">
|
|
||||||
<string>Any of these words</string>
|
|
||||||
</property>
|
|
||||||
</widget>
|
|
||||||
<widget class="QLineEdit" row="2" column="2" rowspan="1" colspan="2">
|
|
||||||
<property name="name">
|
|
||||||
<cstring>orWordsLE</cstring>
|
|
||||||
</property>
|
|
||||||
</widget>
|
|
||||||
<widget class="QLabel" row="3" column="0" rowspan="1" colspan="3">
|
|
||||||
<property name="name">
|
|
||||||
<cstring>noWordsTL</cstring>
|
|
||||||
</property>
|
|
||||||
<property name="text">
|
|
||||||
<string>None of these words</string>
|
|
||||||
</property>
|
|
||||||
</widget>
|
|
||||||
<widget class="QLineEdit" row="3" column="3">
|
|
||||||
<property name="name">
|
|
||||||
<cstring>noWordsLE</cstring>
|
|
||||||
</property>
|
|
||||||
<property name="text">
|
|
||||||
<string></string>
|
|
||||||
</property>
|
|
||||||
</widget>
|
|
||||||
</grid>
|
</grid>
|
||||||
</widget>
|
</widget>
|
||||||
</hbox>
|
</hbox>
|
||||||
@ -353,20 +360,6 @@
|
|||||||
</widget>
|
</widget>
|
||||||
</grid>
|
</grid>
|
||||||
</widget>
|
</widget>
|
||||||
<widget class="Line">
|
|
||||||
<property name="name">
|
|
||||||
<cstring>line1</cstring>
|
|
||||||
</property>
|
|
||||||
<property name="frameShape">
|
|
||||||
<enum>HLine</enum>
|
|
||||||
</property>
|
|
||||||
<property name="frameShadow">
|
|
||||||
<enum>Sunken</enum>
|
|
||||||
</property>
|
|
||||||
<property name="orientation">
|
|
||||||
<enum>Horizontal</enum>
|
|
||||||
</property>
|
|
||||||
</widget>
|
|
||||||
<widget class="QLayoutWidget">
|
<widget class="QLayoutWidget">
|
||||||
<property name="name">
|
<property name="name">
|
||||||
<cstring>layout25</cstring>
|
<cstring>layout25</cstring>
|
||||||
@ -398,6 +391,20 @@
|
|||||||
</widget>
|
</widget>
|
||||||
</vbox>
|
</vbox>
|
||||||
</widget>
|
</widget>
|
||||||
|
<widget class="Line">
|
||||||
|
<property name="name">
|
||||||
|
<cstring>line1</cstring>
|
||||||
|
</property>
|
||||||
|
<property name="frameShape">
|
||||||
|
<enum>HLine</enum>
|
||||||
|
</property>
|
||||||
|
<property name="frameShadow">
|
||||||
|
<enum>Sunken</enum>
|
||||||
|
</property>
|
||||||
|
<property name="orientation">
|
||||||
|
<enum>Horizontal</enum>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
</vbox>
|
</vbox>
|
||||||
</widget>
|
</widget>
|
||||||
<connections>
|
<connections>
|
||||||
|
|||||||
@ -131,6 +131,7 @@ void advsearch::searchPB_clicked()
|
|||||||
mydata.phrase = string((const char*)(phraseLE->text().utf8()));
|
mydata.phrase = string((const char*)(phraseLE->text().utf8()));
|
||||||
mydata.orwords = string((const char*)(orWordsLE->text().utf8()));
|
mydata.orwords = string((const char*)(orWordsLE->text().utf8()));
|
||||||
mydata.nowords = string((const char*)(noWordsLE->text().utf8()));
|
mydata.nowords = string((const char*)(noWordsLE->text().utf8()));
|
||||||
|
mydata.filename = string((const char*)(fileNameLE->text().utf8()));
|
||||||
if (restrictFtCB->isOn() && noFiltypsLB->count() > 0) {
|
if (restrictFtCB->isOn() && noFiltypsLB->count() > 0) {
|
||||||
for (unsigned int i = 0; i < yesFiltypsLB->count(); i++) {
|
for (unsigned int i = 0; i < yesFiltypsLB->count(); i++) {
|
||||||
QCString ctext = yesFiltypsLB->item(i)->text().utf8();
|
QCString ctext = yesFiltypsLB->item(i)->text().utf8();
|
||||||
|
|||||||
@ -75,6 +75,23 @@
|
|||||||
<string>If this is set, each returned document will contain all the terms in the query. Else documents will be ordered by relevance, but may not contain all the terms.</string>
|
<string>If this is set, each returned document will contain all the terms in the query. Else documents will be ordered by relevance, but may not contain all the terms.</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
|
<widget class="QCheckBox">
|
||||||
|
<property name="name">
|
||||||
|
<cstring>isFNameCB</cstring>
|
||||||
|
</property>
|
||||||
|
<property name="text">
|
||||||
|
<string>&File name</string>
|
||||||
|
</property>
|
||||||
|
<property name="accel">
|
||||||
|
<string>Alt+F</string>
|
||||||
|
</property>
|
||||||
|
<property name="toolTip" stdset="0">
|
||||||
|
<string>Search is on file names only, and may use wildcards.</string>
|
||||||
|
</property>
|
||||||
|
<property name="whatsThis" stdset="0">
|
||||||
|
<string>If this is set, the search will only be performed on file names. Wildcards ? and * can be used and will be matched as in a shell command line.</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
<widget class="QLineEdit">
|
<widget class="QLineEdit">
|
||||||
<property name="name">
|
<property name="name">
|
||||||
<cstring>queryText</cstring>
|
<cstring>queryText</cstring>
|
||||||
|
|||||||
@ -44,9 +44,11 @@ void SSearchBase::startSimpleSearch()
|
|||||||
LOGDEB(("SSearchBase::startSimpleSearch\n"));
|
LOGDEB(("SSearchBase::startSimpleSearch\n"));
|
||||||
|
|
||||||
Rcl::AdvSearchData sdata;
|
Rcl::AdvSearchData sdata;
|
||||||
|
|
||||||
QCString u8 = queryText->text().utf8();
|
QCString u8 = queryText->text().utf8();
|
||||||
if (allTermsCB->isChecked())
|
|
||||||
|
if (isFNameCB->isChecked())
|
||||||
|
sdata.filename = u8;
|
||||||
|
else if (allTermsCB->isChecked())
|
||||||
sdata.allwords = u8;
|
sdata.allwords = u8;
|
||||||
else
|
else
|
||||||
sdata.orwords = u8;
|
sdata.orwords = u8;
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.57 2006-02-07 10:26:49 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.58 2006-03-20 16:05:41 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -20,6 +20,7 @@ static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.57 2006-02-07 10:26:49 dockes Exp $
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
|
#include <fnmatch.h>
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <string>
|
#include <string>
|
||||||
@ -287,6 +288,7 @@ bool Rcl::dumb_string(const string &in, string &out)
|
|||||||
if (!unacmaybefold(s1, out, "UTF-8", true)) {
|
if (!unacmaybefold(s1, out, "UTF-8", true)) {
|
||||||
LOGERR(("dumb_string: unac failed for %s\n", in.c_str()));
|
LOGERR(("dumb_string: unac failed for %s\n", in.c_str()));
|
||||||
out.erase();
|
out.erase();
|
||||||
|
// See comment at start of func
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
@ -387,11 +389,9 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc,
|
|||||||
// /////// Split and index terms in document body and auxiliary fields
|
// /////// Split and index terms in document body and auxiliary fields
|
||||||
string noacc;
|
string noacc;
|
||||||
|
|
||||||
// Split and index file name. This supposes that it's either ascii
|
// Split and index file path. Do we really want to do this? Or do
|
||||||
// or utf-8. If this fails, we just go on. We need a config
|
// it with the simple file name only ?
|
||||||
// parameter for file name charset.
|
if (dumb_string(doc.utf8fn, noacc)) {
|
||||||
// Do we really want to fold case here ?
|
|
||||||
if (dumb_string(fn, noacc)) {
|
|
||||||
splitter.text_to_words(noacc);
|
splitter.text_to_words(noacc);
|
||||||
splitData.basepos += splitData.curpos + 100;
|
splitData.basepos += splitData.curpos + 100;
|
||||||
}
|
}
|
||||||
@ -439,6 +439,14 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc,
|
|||||||
string pathterm = "P" + hash;
|
string pathterm = "P" + hash;
|
||||||
newdocument.add_term(pathterm);
|
newdocument.add_term(pathterm);
|
||||||
|
|
||||||
|
// Simple file name. This is used for file name searches only. We index
|
||||||
|
// it with a term prefix
|
||||||
|
string sfn = path_getsimple(doc.utf8fn);
|
||||||
|
if (dumb_string(sfn, noacc) && !noacc.empty()) {
|
||||||
|
sfn = string("XSFN") + noacc;
|
||||||
|
newdocument.add_term(sfn);
|
||||||
|
}
|
||||||
|
|
||||||
// Internal path: with path, makes unique identifier for documents
|
// Internal path: with path, makes unique identifier for documents
|
||||||
// inside multidocument files.
|
// inside multidocument files.
|
||||||
string uniterm;
|
string uniterm;
|
||||||
@ -992,7 +1000,7 @@ bool Rcl::Db::setQuery(const std::string &iqstring, QueryOpts opts,
|
|||||||
Native *ndb = (Native *)pdata;
|
Native *ndb = (Native *)pdata;
|
||||||
if (!ndb)
|
if (!ndb)
|
||||||
return false;
|
return false;
|
||||||
asdata.erase();
|
m_asdata.erase();
|
||||||
dbindices.clear();
|
dbindices.clear();
|
||||||
list<Xapian::Query> pqueries;
|
list<Xapian::Query> pqueries;
|
||||||
stringToXapianQueries(iqstring, stemlang, ndb, pqueries, opts);
|
stringToXapianQueries(iqstring, stemlang, ndb, pqueries, opts);
|
||||||
@ -1023,7 +1031,7 @@ bool Rcl::Db::setQuery(AdvSearchData &sdata, QueryOpts opts,
|
|||||||
if (!sdata.topdir.empty())
|
if (!sdata.topdir.empty())
|
||||||
LOGDEB((" restricted to: %s\n", sdata.topdir.c_str()));
|
LOGDEB((" restricted to: %s\n", sdata.topdir.c_str()));
|
||||||
|
|
||||||
asdata = sdata;
|
m_asdata = sdata;
|
||||||
dbindices.clear();
|
dbindices.clear();
|
||||||
|
|
||||||
Native *ndb = (Native *)pdata;
|
Native *ndb = (Native *)pdata;
|
||||||
@ -1031,12 +1039,62 @@ bool Rcl::Db::setQuery(AdvSearchData &sdata, QueryOpts opts,
|
|||||||
return false;
|
return false;
|
||||||
list<Xapian::Query> pqueries;
|
list<Xapian::Query> pqueries;
|
||||||
Xapian::Query xq;
|
Xapian::Query xq;
|
||||||
|
|
||||||
|
if (!sdata.filename.empty()) {
|
||||||
|
LOGDEB((" filename search\n"));
|
||||||
|
// File name search, with possible wildcards.
|
||||||
|
// We expand wildcards by scanning the filename terms (prefixed
|
||||||
|
// with XSFN) from the database.
|
||||||
|
// We build an OR query with the expanded values if any.
|
||||||
|
string pattern;
|
||||||
|
// We take the data either from allwords or orwords to avoid
|
||||||
|
// interaction with the allwords checkbox
|
||||||
|
dumb_string(sdata.filename, pattern);
|
||||||
|
|
||||||
|
// If pattern is not quoted, we add * at each end: match any
|
||||||
|
// substring
|
||||||
|
if (pattern[0] == '"' && pattern[pattern.size()-1] == '"')
|
||||||
|
pattern = pattern.substr(1, pattern.size() -2);
|
||||||
|
else
|
||||||
|
pattern = "*" + pattern + "*";
|
||||||
|
|
||||||
|
LOGDEB((" pattern: [%s]\n", pattern.c_str()));
|
||||||
|
|
||||||
|
// Match pattern against all file names in the db
|
||||||
|
Xapian::TermIterator it = ndb->db.allterms_begin();
|
||||||
|
it.skip_to("XSFN");
|
||||||
|
list<string> names;
|
||||||
|
for (;it != ndb->db.allterms_end(); it++) {
|
||||||
|
if ((*it).find("XSFN") != 0)
|
||||||
|
break;
|
||||||
|
string fn = (*it).substr(4);
|
||||||
|
LOGDEB2(("Matching [%s] and [%s]\n", pattern.c_str(), fn.c_str()));
|
||||||
|
if (fnmatch(pattern.c_str(), fn.c_str(), 0) != FNM_NOMATCH) {
|
||||||
|
names.push_back((*it).c_str());
|
||||||
|
}
|
||||||
|
// Limit the match count
|
||||||
|
if (names.size() > 1000) {
|
||||||
|
LOGERR(("Rcl::Db::SetQuery: too many matched file names\n"));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (names.empty()) {
|
||||||
|
// Build an impossible query: we know its impossible because we
|
||||||
|
// control the prefixes!
|
||||||
|
names.push_back("XIMPOSSIBLE");
|
||||||
|
}
|
||||||
|
// Build a query out of the matching file name terms.
|
||||||
|
xq = Xapian::Query(Xapian::Query::OP_OR, names.begin(), names.end());
|
||||||
|
}
|
||||||
|
|
||||||
if (!sdata.allwords.empty()) {
|
if (!sdata.allwords.empty()) {
|
||||||
stringToXapianQueries(sdata.allwords, stemlang, ndb, pqueries, opts);
|
stringToXapianQueries(sdata.allwords, stemlang, ndb, pqueries, opts);
|
||||||
if (!pqueries.empty()) {
|
if (!pqueries.empty()) {
|
||||||
xq = Xapian::Query(Xapian::Query::OP_AND, pqueries.begin(),
|
Xapian::Query nq =
|
||||||
pqueries.end());
|
Xapian::Query(Xapian::Query::OP_AND, pqueries.begin(),
|
||||||
|
pqueries.end());
|
||||||
|
xq = xq.empty() ? nq :
|
||||||
|
Xapian::Query(Xapian::Query::OP_AND, xq, nq);
|
||||||
pqueries.clear();
|
pqueries.clear();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1044,8 +1102,8 @@ bool Rcl::Db::setQuery(AdvSearchData &sdata, QueryOpts opts,
|
|||||||
if (!sdata.orwords.empty()) {
|
if (!sdata.orwords.empty()) {
|
||||||
stringToXapianQueries(sdata.orwords, stemlang, ndb, pqueries, opts);
|
stringToXapianQueries(sdata.orwords, stemlang, ndb, pqueries, opts);
|
||||||
if (!pqueries.empty()) {
|
if (!pqueries.empty()) {
|
||||||
Xapian::Query nq;
|
Xapian::Query nq =
|
||||||
nq = Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(),
|
Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(),
|
||||||
pqueries.end());
|
pqueries.end());
|
||||||
xq = xq.empty() ? nq :
|
xq = xq.empty() ? nq :
|
||||||
Xapian::Query(Xapian::Query::OP_AND_MAYBE, xq, nq);
|
Xapian::Query(Xapian::Query::OP_AND_MAYBE, xq, nq);
|
||||||
@ -1157,7 +1215,7 @@ class Rcl::DbPops {
|
|||||||
string url;
|
string url;
|
||||||
parms.get(string("url"), url);
|
parms.get(string("url"), url);
|
||||||
url = url.substr(7);
|
url = url.substr(7);
|
||||||
if (url.find(rdb->asdata.topdir) == 0)
|
if (url.find(rdb->m_asdata.topdir) == 0)
|
||||||
return true;
|
return true;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -1215,8 +1273,8 @@ bool Rcl::Db::getDoc(int exti, Doc &doc, int *percent)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// For now the only post-query filter is on dir subtree
|
// For now the only post-query filter is on dir subtree
|
||||||
bool postqfilter = !asdata.topdir.empty();
|
bool postqfilter = !m_asdata.topdir.empty();
|
||||||
LOGDEB1(("Topdir %s postqflt %d\n", asdata.topdir.c_str(), postqfilter));
|
LOGDEB1(("Topdir %s postqflt %d\n", m_asdata.topdir.c_str(), postqfilter));
|
||||||
|
|
||||||
int xapi;
|
int xapi;
|
||||||
if (postqfilter) {
|
if (postqfilter) {
|
||||||
|
|||||||
@ -16,7 +16,7 @@
|
|||||||
*/
|
*/
|
||||||
#ifndef _DB_H_INCLUDED_
|
#ifndef _DB_H_INCLUDED_
|
||||||
#define _DB_H_INCLUDED_
|
#define _DB_H_INCLUDED_
|
||||||
/* @(#$Id: rcldb.h,v 1.25 2006-02-07 10:26:49 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: rcldb.h,v 1.26 2006-03-20 16:05:41 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <list>
|
#include <list>
|
||||||
@ -52,26 +52,33 @@ namespace Rcl {
|
|||||||
class Doc {
|
class Doc {
|
||||||
public:
|
public:
|
||||||
// These fields potentially go into the document data record
|
// These fields potentially go into the document data record
|
||||||
string url;
|
// We indicate the routine that sets them up during indexing
|
||||||
string ipath;
|
string url; // Computed from fn by Db::add
|
||||||
string mimetype;
|
string utf8fn; // Transcoded version of the file path.
|
||||||
|
// Set by DbIndexer::processone
|
||||||
|
string ipath; // Set by DbIndexer::processone
|
||||||
|
string mimetype; // Set by FileInterner::internfile
|
||||||
string fmtime; // File modification time as decimal ascii unix time
|
string fmtime; // File modification time as decimal ascii unix time
|
||||||
|
// Set by DbIndexer::processone
|
||||||
string dmtime; // Data reference date (same format). Ie: mail date
|
string dmtime; // Data reference date (same format). Ie: mail date
|
||||||
string origcharset;
|
// Possibly set by handler
|
||||||
string title;
|
string origcharset; // Charset we transcoded from (in case we want back)
|
||||||
string keywords;
|
// Possibly set by handler
|
||||||
string abstract;
|
string title; // Possibly set by handler
|
||||||
string fbytes; // File size
|
string keywords; // Possibly set by handler
|
||||||
string dbytes; // Doc size
|
string abstract; // Possibly set by handler
|
||||||
|
string fbytes; // File size. Set by Db::Add
|
||||||
|
string dbytes; // Doc size. Set by Db::Add from text length
|
||||||
|
|
||||||
// The following fields don't go to the db. text is only used when
|
// The following fields don't go to the db record
|
||||||
// indexing
|
|
||||||
string text;
|
string text; // text is split and indexed
|
||||||
|
|
||||||
int pc; // used by sortseq, convenience
|
int pc; // used by sortseq, convenience
|
||||||
|
|
||||||
void erase() {
|
void erase() {
|
||||||
url.erase();
|
url.erase();
|
||||||
|
utf8fn.erase();
|
||||||
ipath.erase();
|
ipath.erase();
|
||||||
mimetype.erase();
|
mimetype.erase();
|
||||||
fmtime.erase();
|
fmtime.erase();
|
||||||
@ -96,6 +103,7 @@ class AdvSearchData {
|
|||||||
string phrase;
|
string phrase;
|
||||||
string orwords;
|
string orwords;
|
||||||
string nowords;
|
string nowords;
|
||||||
|
string filename;
|
||||||
list<string> filetypes; // restrict to types. Empty if inactive
|
list<string> filetypes; // restrict to types. Empty if inactive
|
||||||
string topdir; // restrict to subtree. Empty if inactive
|
string topdir; // restrict to subtree. Empty if inactive
|
||||||
string description; // Printable expanded version of the complete query
|
string description; // Printable expanded version of the complete query
|
||||||
@ -107,6 +115,7 @@ class AdvSearchData {
|
|||||||
nowords.erase();
|
nowords.erase();
|
||||||
filetypes.clear();
|
filetypes.clear();
|
||||||
topdir.erase();
|
topdir.erase();
|
||||||
|
filename.erase();
|
||||||
description.erase();
|
description.erase();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -167,7 +176,7 @@ class Db {
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
AdvSearchData asdata;
|
AdvSearchData m_asdata;
|
||||||
vector<int> dbindices; // In case there is a postq filter: sequence of
|
vector<int> dbindices; // In case there is a postq filter: sequence of
|
||||||
// db indices that match
|
// db indices that match
|
||||||
void *pdata; // Pointer to private data. We don't want db(ie
|
void *pdata; // Pointer to private data. We don't want db(ie
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user