changed stopsuffixes processing
This commit is contained in:
parent
2bba8a159d
commit
1c80f0d67c
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.38 2006-12-19 08:40:50 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.39 2006-12-20 09:54:17 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -24,6 +24,8 @@ static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.38 2006-12-19 08:40:50 dockes E
|
||||
#include <errno.h>
|
||||
#include <langinfo.h>
|
||||
|
||||
#include <set>
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#ifdef __FreeBSD__
|
||||
@ -42,6 +44,13 @@ static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.38 2006-12-19 08:40:50 dockes E
|
||||
using namespace std;
|
||||
#endif /* NO_NAMESPACES */
|
||||
|
||||
#ifndef MIN
|
||||
#define MIN(A,B) (((A)<(B)) ? (A) : (B))
|
||||
#endif
|
||||
#ifndef MAX
|
||||
#define MAX(A,B) (((A)>(B)) ? (A) : (B))
|
||||
#endif
|
||||
|
||||
RclConfig::RclConfig(const string *argcnf)
|
||||
{
|
||||
zeroMe();
|
||||
@ -211,16 +220,81 @@ std::list<string> RclConfig::getAllMimeTypes()
|
||||
return lst;
|
||||
}
|
||||
|
||||
const list<string>* RclConfig::getStopSuffixes()
|
||||
// Things for suffix comparison. We define a string class and string
|
||||
// comparison with suffix-only sensitivity
|
||||
class SfString {
|
||||
public:
|
||||
SfString(const string& s) : m_str(s) {}
|
||||
bool operator==(const SfString& s2) {
|
||||
string::const_reverse_iterator r1 = m_str.rbegin(), re1 = m_str.rend(),
|
||||
r2 = s2.m_str.rbegin(), re2 = s2.m_str.rend();
|
||||
while (r1 != re1 && r2 != re2) {
|
||||
if (*r1 != *r2) {
|
||||
return 0;
|
||||
}
|
||||
++r1; ++r2;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
string m_str;
|
||||
};
|
||||
|
||||
class SuffCmp {
|
||||
public:
|
||||
int operator()(const SfString& s1, const SfString& s2) {
|
||||
//cout << "Comparing " << s1.m_str << " and " << s2.m_str << endl;
|
||||
string::const_reverse_iterator
|
||||
r1 = s1.m_str.rbegin(), re1 = s1.m_str.rend(),
|
||||
r2 = s2.m_str.rbegin(), re2 = s2.m_str.rend();
|
||||
while (r1 != re1 && r2 != re2) {
|
||||
if (*r1 != *r2) {
|
||||
return *r1 < *r2 ? 1 : 0;
|
||||
}
|
||||
++r1; ++r2;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
typedef multiset<SfString, SuffCmp> SuffixStore;
|
||||
|
||||
#define STOPSUFFIXES ((SuffixStore *)m_stopsuffixes)
|
||||
|
||||
bool RclConfig::inStopSuffixes(const string& fni)
|
||||
{
|
||||
if (stopsuffixes == 0 && (stopsuffixes = new list<string>) != 0) {
|
||||
if (m_stopsuffixes == 0) {
|
||||
// Need to initialize the suffixes
|
||||
if ((m_stopsuffixes = new SuffixStore) == 0) {
|
||||
LOGERR(("RclConfig::inStopSuffixes: out of memory\n"));
|
||||
return false;
|
||||
}
|
||||
string stp;
|
||||
list<string> stoplist;
|
||||
if (mimemap && mimemap->get("recoll_noindex", stp, m_keydir)) {
|
||||
stringToStrings(stp, *stopsuffixes);
|
||||
stringToStrings(stp, stoplist);
|
||||
}
|
||||
for (list<string>::const_iterator it = stoplist.begin();
|
||||
it != stoplist.end(); it++) {
|
||||
string lower(*it);
|
||||
stringtolower(lower);
|
||||
STOPSUFFIXES->insert(SfString(lower));
|
||||
if (m_maxsufflen < lower.length())
|
||||
m_maxsufflen = lower.length();
|
||||
}
|
||||
}
|
||||
|
||||
return stopsuffixes;
|
||||
string fn(fni,
|
||||
MAX(0, fni.length() - m_maxsufflen),
|
||||
MIN(fni.length(), m_maxsufflen));
|
||||
stringtolower(fn);
|
||||
SuffixStore::const_iterator it = STOPSUFFIXES->find(fn);
|
||||
if (it != STOPSUFFIXES->end()) {
|
||||
LOGDEB2(("RclConfig::inStopSuffixes: Found (%s) [%s]\n",
|
||||
fni.c_str(), (*it).m_str.c_str()));
|
||||
return true;
|
||||
} else {
|
||||
LOGDEB2(("RclConfig::inStopSuffixes: not found [%s]\n", fni.c_str()));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
string RclConfig::getMimeTypeFromSuffix(const string &suff)
|
||||
@ -244,6 +318,17 @@ string RclConfig::getSuffixFromMimeType(const string &mt)
|
||||
return "";
|
||||
}
|
||||
|
||||
void RclConfig::freeAll()
|
||||
{
|
||||
delete m_conf;
|
||||
delete mimemap;
|
||||
delete mimeconf;
|
||||
delete mimeview;
|
||||
delete STOPSUFFIXES;
|
||||
// just in case
|
||||
zeroMe();
|
||||
}
|
||||
|
||||
string RclConfig::getMimeHandlerDef(const std::string &mtype)
|
||||
{
|
||||
string hs;
|
||||
@ -495,8 +580,9 @@ void RclConfig::initFrom(const RclConfig& r)
|
||||
mimeconf = new ConfStack<ConfTree>(*(r.mimeconf));
|
||||
if (r.mimeview)
|
||||
mimeview = new ConfStack<ConfTree>(*(r.mimeview));
|
||||
if (r.stopsuffixes)
|
||||
stopsuffixes = new std::list<std::string>(*(r.stopsuffixes));
|
||||
if (r.m_stopsuffixes)
|
||||
m_stopsuffixes = new SuffixStore(*((SuffixStore*)r.m_stopsuffixes));
|
||||
m_maxsufflen = r.m_maxsufflen;
|
||||
defcharset = r.defcharset;
|
||||
guesscharset = r.guesscharset;
|
||||
}
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
*/
|
||||
#ifndef _RCLCONFIG_H_INCLUDED_
|
||||
#define _RCLCONFIG_H_INCLUDED_
|
||||
/* @(#$Id: rclconfig.h,v 1.28 2006-12-16 15:30:02 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: rclconfig.h,v 1.29 2006-12-20 09:54:17 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include <list>
|
||||
#include <string>
|
||||
@ -35,7 +35,7 @@ using std::pair;
|
||||
class RclConfig {
|
||||
public:
|
||||
|
||||
RclConfig(const string *argcnf=0);
|
||||
RclConfig(const string *argcnf = 0);
|
||||
bool ok() {return m_ok;}
|
||||
const string &getReason() {return m_reason;}
|
||||
/** Return the directory where this config is stored */
|
||||
@ -83,12 +83,12 @@ class RclConfig {
|
||||
list<string> getSkippedNames();
|
||||
|
||||
/**
|
||||
* Get list of ignored suffixes from mimemap
|
||||
* Check if file name should be ignored because of suffix
|
||||
*
|
||||
* The list is initialized on first call, and not changed for subsequent
|
||||
* setKeydirs.
|
||||
* The list of ignored suffixes is initialized on first call, and
|
||||
* not changed for subsequent setKeydirs.
|
||||
*/
|
||||
const list<string>* getStopSuffixes();
|
||||
bool inStopSuffixes(const string& fn);
|
||||
|
||||
/**
|
||||
* Check in mimeconf if input mime type is a compressed one, and
|
||||
@ -153,7 +153,8 @@ class RclConfig {
|
||||
ConfStack<ConfTree> *mimeconf; // but their content may depend on it.
|
||||
ConfStack<ConfTree> *mimeview; //
|
||||
|
||||
list<string> *stopsuffixes;
|
||||
void *m_stopsuffixes;
|
||||
unsigned int m_maxsufflen;
|
||||
|
||||
// Parameters auto-fetched on setkeydir
|
||||
string defcharset; // These are stored locally to avoid
|
||||
@ -171,18 +172,11 @@ class RclConfig {
|
||||
mimemap = 0;
|
||||
mimeconf = 0;
|
||||
mimeview = 0;
|
||||
stopsuffixes = 0;
|
||||
m_stopsuffixes = 0;
|
||||
m_maxsufflen = 0;
|
||||
}
|
||||
/** Free data then zero pointers */
|
||||
void freeAll() {
|
||||
delete m_conf;
|
||||
delete mimemap;
|
||||
delete mimeconf;
|
||||
delete mimeview;
|
||||
delete stopsuffixes;
|
||||
// just in case
|
||||
zeroMe();
|
||||
}
|
||||
void freeAll();
|
||||
};
|
||||
|
||||
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
Dockes</holder>
|
||||
</copyright>
|
||||
|
||||
<releaseinfo>$Id: usermanual.sgml,v 1.30 2006-12-18 12:06:11 dockes Exp $</releaseinfo>
|
||||
<releaseinfo>$Id: usermanual.sgml,v 1.31 2006-12-20 09:54:17 dockes Exp $</releaseinfo>
|
||||
|
||||
<abstract>
|
||||
<para>This document introduces full text search notions
|
||||
@ -1528,7 +1528,7 @@ RECOLL_CONFDIR=$recollconf $recolldata/examples/rclmon.sh stop
|
||||
agents like <application>thunderbird</application>
|
||||
usually store messages in hidden directories, and you
|
||||
probably want this indexed. One possible solution is to
|
||||
have <userinput>.*</userinput> in
|
||||
have <filename>.*</filename> in
|
||||
<literal>skippedNames</literal>, and add things like
|
||||
<filename>~/.thunderbird</filename> or
|
||||
<filename>~/.evolution</filename> in
|
||||
@ -1656,12 +1656,19 @@ RECOLL_CONFDIR=$recollconf $recolldata/examples/rclmon.sh stop
|
||||
|
||||
<para><filename>mimemap</filename> also has a
|
||||
<literal>recoll_noindex</literal> variable which is a list of
|
||||
suffixes. Matching files will be skipped (avoids unnecessary
|
||||
decompressions or <command>file</command> executions). This is
|
||||
partially redundant with <literal>skippedNames</literal> in
|
||||
the main configuration file, with two differences: it will not
|
||||
affect directories, and it can be changed for any
|
||||
sub-directory.</para>
|
||||
suffixes. Matching files will be skipped (which avoids
|
||||
unnecessary decompressions or <command>file</command>
|
||||
executions). This is partially redundant with
|
||||
<literal>skippedNames</literal> in the main configuration
|
||||
file, with two differences: it will not affect directories,
|
||||
and it cannot be made dependant on the file-system location
|
||||
(it is a configuration-wide parameter). You could accomplish
|
||||
with <literal>skippedNames</literal> anything that
|
||||
<literal>recoll_noindex</literal> does. The latter is used
|
||||
mostly for things known to be unindexable by a given &RCL;
|
||||
version. Having it there avoids cluttering the more
|
||||
user-oriented and locally customized
|
||||
<literal>skippedNames</literal>.</para>
|
||||
|
||||
</sect2>
|
||||
|
||||
@ -1672,14 +1679,15 @@ RECOLL_CONFDIR=$recollconf $recolldata/examples/rclmon.sh stop
|
||||
different mime types are handled for indexing, and which icons
|
||||
are displayed in the <command>recoll</command> result lists.</para>
|
||||
|
||||
<para>Changing the indexing parameters is probably not a
|
||||
good idea except if you are a &RCL; developers.</para>
|
||||
<para>Changing the parameters in the [index] section is
|
||||
probably not a good idea except if you are a &RCL;
|
||||
developer.</para>
|
||||
|
||||
<para>You can change the icons which are displayed by
|
||||
<command>recoll</command> in the result lists (the values are
|
||||
the basenames of the png images inside the
|
||||
<filename>iconsdir</filename> directory (specified in
|
||||
<filename>recoll.conf</filename>).</para>
|
||||
<para>The [icons] section allows you to change the icons which
|
||||
are displayed by <command>recoll</command> in the result
|
||||
lists (the values are the basenames of the png images inside
|
||||
the <filename>iconsdir</filename> directory (specified in
|
||||
<filename>recoll.conf</filename>).</para>
|
||||
|
||||
</sect2>
|
||||
<sect2 id="rclinstall.config.mimeview">
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.19 2006-12-19 08:40:50 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.20 2006-12-20 09:54:18 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -117,19 +117,12 @@ string mimetype(const string &fn, const struct stat *stp,
|
||||
if (cfg == 0)
|
||||
return "";
|
||||
|
||||
const list<string>* stoplist = cfg->getStopSuffixes();
|
||||
if (stoplist && !stoplist->empty()) {
|
||||
for (list<string>::const_iterator it = stoplist->begin();
|
||||
it != stoplist->end(); it++) {
|
||||
if (!stringisuffcmp(fn, *it)) {
|
||||
LOGDEB(("mimetype: fn %s in stoplist (%s)\n", fn.c_str(),
|
||||
it->c_str()));
|
||||
return "";
|
||||
}
|
||||
}
|
||||
if (cfg->inStopSuffixes(fn)) {
|
||||
LOGDEB(("mimetype: fn [%s] in stopsuffixes\n", fn.c_str()));
|
||||
return "";
|
||||
}
|
||||
|
||||
// Look for suffix in mimetype map
|
||||
// First look for suffix in mimetype map
|
||||
string::size_type dot = fn.find_last_of(".");
|
||||
string suff;
|
||||
if (dot != string::npos) {
|
||||
@ -142,6 +135,7 @@ string mimetype(const string &fn, const struct stat *stp,
|
||||
return mtype;
|
||||
}
|
||||
|
||||
// Then examine data
|
||||
return mimetypefromdata(fn, usfc);
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user