changed stopsuffixes processing
This commit is contained in:
parent
2bba8a159d
commit
1c80f0d67c
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.38 2006-12-19 08:40:50 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.39 2006-12-20 09:54:17 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -24,6 +24,8 @@ static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.38 2006-12-19 08:40:50 dockes E
|
|||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <langinfo.h>
|
#include <langinfo.h>
|
||||||
|
|
||||||
|
#include <set>
|
||||||
|
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
#ifdef __FreeBSD__
|
#ifdef __FreeBSD__
|
||||||
@ -42,6 +44,13 @@ static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.38 2006-12-19 08:40:50 dockes E
|
|||||||
using namespace std;
|
using namespace std;
|
||||||
#endif /* NO_NAMESPACES */
|
#endif /* NO_NAMESPACES */
|
||||||
|
|
||||||
|
#ifndef MIN
|
||||||
|
#define MIN(A,B) (((A)<(B)) ? (A) : (B))
|
||||||
|
#endif
|
||||||
|
#ifndef MAX
|
||||||
|
#define MAX(A,B) (((A)>(B)) ? (A) : (B))
|
||||||
|
#endif
|
||||||
|
|
||||||
RclConfig::RclConfig(const string *argcnf)
|
RclConfig::RclConfig(const string *argcnf)
|
||||||
{
|
{
|
||||||
zeroMe();
|
zeroMe();
|
||||||
@ -211,16 +220,81 @@ std::list<string> RclConfig::getAllMimeTypes()
|
|||||||
return lst;
|
return lst;
|
||||||
}
|
}
|
||||||
|
|
||||||
const list<string>* RclConfig::getStopSuffixes()
|
// Things for suffix comparison. We define a string class and string
|
||||||
|
// comparison with suffix-only sensitivity
|
||||||
|
class SfString {
|
||||||
|
public:
|
||||||
|
SfString(const string& s) : m_str(s) {}
|
||||||
|
bool operator==(const SfString& s2) {
|
||||||
|
string::const_reverse_iterator r1 = m_str.rbegin(), re1 = m_str.rend(),
|
||||||
|
r2 = s2.m_str.rbegin(), re2 = s2.m_str.rend();
|
||||||
|
while (r1 != re1 && r2 != re2) {
|
||||||
|
if (*r1 != *r2) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
++r1; ++r2;
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
string m_str;
|
||||||
|
};
|
||||||
|
|
||||||
|
class SuffCmp {
|
||||||
|
public:
|
||||||
|
int operator()(const SfString& s1, const SfString& s2) {
|
||||||
|
//cout << "Comparing " << s1.m_str << " and " << s2.m_str << endl;
|
||||||
|
string::const_reverse_iterator
|
||||||
|
r1 = s1.m_str.rbegin(), re1 = s1.m_str.rend(),
|
||||||
|
r2 = s2.m_str.rbegin(), re2 = s2.m_str.rend();
|
||||||
|
while (r1 != re1 && r2 != re2) {
|
||||||
|
if (*r1 != *r2) {
|
||||||
|
return *r1 < *r2 ? 1 : 0;
|
||||||
|
}
|
||||||
|
++r1; ++r2;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
typedef multiset<SfString, SuffCmp> SuffixStore;
|
||||||
|
|
||||||
|
#define STOPSUFFIXES ((SuffixStore *)m_stopsuffixes)
|
||||||
|
|
||||||
|
bool RclConfig::inStopSuffixes(const string& fni)
|
||||||
{
|
{
|
||||||
if (stopsuffixes == 0 && (stopsuffixes = new list<string>) != 0) {
|
if (m_stopsuffixes == 0) {
|
||||||
|
// Need to initialize the suffixes
|
||||||
|
if ((m_stopsuffixes = new SuffixStore) == 0) {
|
||||||
|
LOGERR(("RclConfig::inStopSuffixes: out of memory\n"));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
string stp;
|
string stp;
|
||||||
|
list<string> stoplist;
|
||||||
if (mimemap && mimemap->get("recoll_noindex", stp, m_keydir)) {
|
if (mimemap && mimemap->get("recoll_noindex", stp, m_keydir)) {
|
||||||
stringToStrings(stp, *stopsuffixes);
|
stringToStrings(stp, stoplist);
|
||||||
|
}
|
||||||
|
for (list<string>::const_iterator it = stoplist.begin();
|
||||||
|
it != stoplist.end(); it++) {
|
||||||
|
string lower(*it);
|
||||||
|
stringtolower(lower);
|
||||||
|
STOPSUFFIXES->insert(SfString(lower));
|
||||||
|
if (m_maxsufflen < lower.length())
|
||||||
|
m_maxsufflen = lower.length();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return stopsuffixes;
|
string fn(fni,
|
||||||
|
MAX(0, fni.length() - m_maxsufflen),
|
||||||
|
MIN(fni.length(), m_maxsufflen));
|
||||||
|
stringtolower(fn);
|
||||||
|
SuffixStore::const_iterator it = STOPSUFFIXES->find(fn);
|
||||||
|
if (it != STOPSUFFIXES->end()) {
|
||||||
|
LOGDEB2(("RclConfig::inStopSuffixes: Found (%s) [%s]\n",
|
||||||
|
fni.c_str(), (*it).m_str.c_str()));
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
LOGDEB2(("RclConfig::inStopSuffixes: not found [%s]\n", fni.c_str()));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
string RclConfig::getMimeTypeFromSuffix(const string &suff)
|
string RclConfig::getMimeTypeFromSuffix(const string &suff)
|
||||||
@ -244,6 +318,17 @@ string RclConfig::getSuffixFromMimeType(const string &mt)
|
|||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void RclConfig::freeAll()
|
||||||
|
{
|
||||||
|
delete m_conf;
|
||||||
|
delete mimemap;
|
||||||
|
delete mimeconf;
|
||||||
|
delete mimeview;
|
||||||
|
delete STOPSUFFIXES;
|
||||||
|
// just in case
|
||||||
|
zeroMe();
|
||||||
|
}
|
||||||
|
|
||||||
string RclConfig::getMimeHandlerDef(const std::string &mtype)
|
string RclConfig::getMimeHandlerDef(const std::string &mtype)
|
||||||
{
|
{
|
||||||
string hs;
|
string hs;
|
||||||
@ -495,8 +580,9 @@ void RclConfig::initFrom(const RclConfig& r)
|
|||||||
mimeconf = new ConfStack<ConfTree>(*(r.mimeconf));
|
mimeconf = new ConfStack<ConfTree>(*(r.mimeconf));
|
||||||
if (r.mimeview)
|
if (r.mimeview)
|
||||||
mimeview = new ConfStack<ConfTree>(*(r.mimeview));
|
mimeview = new ConfStack<ConfTree>(*(r.mimeview));
|
||||||
if (r.stopsuffixes)
|
if (r.m_stopsuffixes)
|
||||||
stopsuffixes = new std::list<std::string>(*(r.stopsuffixes));
|
m_stopsuffixes = new SuffixStore(*((SuffixStore*)r.m_stopsuffixes));
|
||||||
|
m_maxsufflen = r.m_maxsufflen;
|
||||||
defcharset = r.defcharset;
|
defcharset = r.defcharset;
|
||||||
guesscharset = r.guesscharset;
|
guesscharset = r.guesscharset;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -16,7 +16,7 @@
|
|||||||
*/
|
*/
|
||||||
#ifndef _RCLCONFIG_H_INCLUDED_
|
#ifndef _RCLCONFIG_H_INCLUDED_
|
||||||
#define _RCLCONFIG_H_INCLUDED_
|
#define _RCLCONFIG_H_INCLUDED_
|
||||||
/* @(#$Id: rclconfig.h,v 1.28 2006-12-16 15:30:02 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: rclconfig.h,v 1.29 2006-12-20 09:54:17 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
#include <list>
|
#include <list>
|
||||||
#include <string>
|
#include <string>
|
||||||
@ -35,7 +35,7 @@ using std::pair;
|
|||||||
class RclConfig {
|
class RclConfig {
|
||||||
public:
|
public:
|
||||||
|
|
||||||
RclConfig(const string *argcnf=0);
|
RclConfig(const string *argcnf = 0);
|
||||||
bool ok() {return m_ok;}
|
bool ok() {return m_ok;}
|
||||||
const string &getReason() {return m_reason;}
|
const string &getReason() {return m_reason;}
|
||||||
/** Return the directory where this config is stored */
|
/** Return the directory where this config is stored */
|
||||||
@ -83,12 +83,12 @@ class RclConfig {
|
|||||||
list<string> getSkippedNames();
|
list<string> getSkippedNames();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get list of ignored suffixes from mimemap
|
* Check if file name should be ignored because of suffix
|
||||||
*
|
*
|
||||||
* The list is initialized on first call, and not changed for subsequent
|
* The list of ignored suffixes is initialized on first call, and
|
||||||
* setKeydirs.
|
* not changed for subsequent setKeydirs.
|
||||||
*/
|
*/
|
||||||
const list<string>* getStopSuffixes();
|
bool inStopSuffixes(const string& fn);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check in mimeconf if input mime type is a compressed one, and
|
* Check in mimeconf if input mime type is a compressed one, and
|
||||||
@ -153,7 +153,8 @@ class RclConfig {
|
|||||||
ConfStack<ConfTree> *mimeconf; // but their content may depend on it.
|
ConfStack<ConfTree> *mimeconf; // but their content may depend on it.
|
||||||
ConfStack<ConfTree> *mimeview; //
|
ConfStack<ConfTree> *mimeview; //
|
||||||
|
|
||||||
list<string> *stopsuffixes;
|
void *m_stopsuffixes;
|
||||||
|
unsigned int m_maxsufflen;
|
||||||
|
|
||||||
// Parameters auto-fetched on setkeydir
|
// Parameters auto-fetched on setkeydir
|
||||||
string defcharset; // These are stored locally to avoid
|
string defcharset; // These are stored locally to avoid
|
||||||
@ -171,18 +172,11 @@ class RclConfig {
|
|||||||
mimemap = 0;
|
mimemap = 0;
|
||||||
mimeconf = 0;
|
mimeconf = 0;
|
||||||
mimeview = 0;
|
mimeview = 0;
|
||||||
stopsuffixes = 0;
|
m_stopsuffixes = 0;
|
||||||
|
m_maxsufflen = 0;
|
||||||
}
|
}
|
||||||
/** Free data then zero pointers */
|
/** Free data then zero pointers */
|
||||||
void freeAll() {
|
void freeAll();
|
||||||
delete m_conf;
|
|
||||||
delete mimemap;
|
|
||||||
delete mimeconf;
|
|
||||||
delete mimeview;
|
|
||||||
delete stopsuffixes;
|
|
||||||
// just in case
|
|
||||||
zeroMe();
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -24,7 +24,7 @@
|
|||||||
Dockes</holder>
|
Dockes</holder>
|
||||||
</copyright>
|
</copyright>
|
||||||
|
|
||||||
<releaseinfo>$Id: usermanual.sgml,v 1.30 2006-12-18 12:06:11 dockes Exp $</releaseinfo>
|
<releaseinfo>$Id: usermanual.sgml,v 1.31 2006-12-20 09:54:17 dockes Exp $</releaseinfo>
|
||||||
|
|
||||||
<abstract>
|
<abstract>
|
||||||
<para>This document introduces full text search notions
|
<para>This document introduces full text search notions
|
||||||
@ -1528,7 +1528,7 @@ RECOLL_CONFDIR=$recollconf $recolldata/examples/rclmon.sh stop
|
|||||||
agents like <application>thunderbird</application>
|
agents like <application>thunderbird</application>
|
||||||
usually store messages in hidden directories, and you
|
usually store messages in hidden directories, and you
|
||||||
probably want this indexed. One possible solution is to
|
probably want this indexed. One possible solution is to
|
||||||
have <userinput>.*</userinput> in
|
have <filename>.*</filename> in
|
||||||
<literal>skippedNames</literal>, and add things like
|
<literal>skippedNames</literal>, and add things like
|
||||||
<filename>~/.thunderbird</filename> or
|
<filename>~/.thunderbird</filename> or
|
||||||
<filename>~/.evolution</filename> in
|
<filename>~/.evolution</filename> in
|
||||||
@ -1656,12 +1656,19 @@ RECOLL_CONFDIR=$recollconf $recolldata/examples/rclmon.sh stop
|
|||||||
|
|
||||||
<para><filename>mimemap</filename> also has a
|
<para><filename>mimemap</filename> also has a
|
||||||
<literal>recoll_noindex</literal> variable which is a list of
|
<literal>recoll_noindex</literal> variable which is a list of
|
||||||
suffixes. Matching files will be skipped (avoids unnecessary
|
suffixes. Matching files will be skipped (which avoids
|
||||||
decompressions or <command>file</command> executions). This is
|
unnecessary decompressions or <command>file</command>
|
||||||
partially redundant with <literal>skippedNames</literal> in
|
executions). This is partially redundant with
|
||||||
the main configuration file, with two differences: it will not
|
<literal>skippedNames</literal> in the main configuration
|
||||||
affect directories, and it can be changed for any
|
file, with two differences: it will not affect directories,
|
||||||
sub-directory.</para>
|
and it cannot be made dependant on the file-system location
|
||||||
|
(it is a configuration-wide parameter). You could accomplish
|
||||||
|
with <literal>skippedNames</literal> anything that
|
||||||
|
<literal>recoll_noindex</literal> does. The latter is used
|
||||||
|
mostly for things known to be unindexable by a given &RCL;
|
||||||
|
version. Having it there avoids cluttering the more
|
||||||
|
user-oriented and locally customized
|
||||||
|
<literal>skippedNames</literal>.</para>
|
||||||
|
|
||||||
</sect2>
|
</sect2>
|
||||||
|
|
||||||
@ -1672,14 +1679,15 @@ RECOLL_CONFDIR=$recollconf $recolldata/examples/rclmon.sh stop
|
|||||||
different mime types are handled for indexing, and which icons
|
different mime types are handled for indexing, and which icons
|
||||||
are displayed in the <command>recoll</command> result lists.</para>
|
are displayed in the <command>recoll</command> result lists.</para>
|
||||||
|
|
||||||
<para>Changing the indexing parameters is probably not a
|
<para>Changing the parameters in the [index] section is
|
||||||
good idea except if you are a &RCL; developers.</para>
|
probably not a good idea except if you are a &RCL;
|
||||||
|
developer.</para>
|
||||||
|
|
||||||
<para>You can change the icons which are displayed by
|
<para>The [icons] section allows you to change the icons which
|
||||||
<command>recoll</command> in the result lists (the values are
|
are displayed by <command>recoll</command> in the result
|
||||||
the basenames of the png images inside the
|
lists (the values are the basenames of the png images inside
|
||||||
<filename>iconsdir</filename> directory (specified in
|
the <filename>iconsdir</filename> directory (specified in
|
||||||
<filename>recoll.conf</filename>).</para>
|
<filename>recoll.conf</filename>).</para>
|
||||||
|
|
||||||
</sect2>
|
</sect2>
|
||||||
<sect2 id="rclinstall.config.mimeview">
|
<sect2 id="rclinstall.config.mimeview">
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.19 2006-12-19 08:40:50 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.20 2006-12-20 09:54:18 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -117,19 +117,12 @@ string mimetype(const string &fn, const struct stat *stp,
|
|||||||
if (cfg == 0)
|
if (cfg == 0)
|
||||||
return "";
|
return "";
|
||||||
|
|
||||||
const list<string>* stoplist = cfg->getStopSuffixes();
|
if (cfg->inStopSuffixes(fn)) {
|
||||||
if (stoplist && !stoplist->empty()) {
|
LOGDEB(("mimetype: fn [%s] in stopsuffixes\n", fn.c_str()));
|
||||||
for (list<string>::const_iterator it = stoplist->begin();
|
return "";
|
||||||
it != stoplist->end(); it++) {
|
|
||||||
if (!stringisuffcmp(fn, *it)) {
|
|
||||||
LOGDEB(("mimetype: fn %s in stoplist (%s)\n", fn.c_str(),
|
|
||||||
it->c_str()));
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Look for suffix in mimetype map
|
// First look for suffix in mimetype map
|
||||||
string::size_type dot = fn.find_last_of(".");
|
string::size_type dot = fn.find_last_of(".");
|
||||||
string suff;
|
string suff;
|
||||||
if (dot != string::npos) {
|
if (dot != string::npos) {
|
||||||
@ -142,6 +135,7 @@ string mimetype(const string &fn, const struct stat *stp,
|
|||||||
return mtype;
|
return mtype;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Then examine data
|
||||||
return mimetypefromdata(fn, usfc);
|
return mimetypefromdata(fn, usfc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user