From 0e1cbddb8b988a463acacf7c97013b1dfa6359f3 Mon Sep 17 00:00:00 2001 From: dockes Date: Tue, 29 Sep 2009 15:58:45 +0000 Subject: [PATCH] textfilemaxmbs --- src/internfile/mh_text.cpp | 30 ++++++++++++++++++++++++++---- src/qtgui/confgui/confguiindex.cpp | 12 ++++++++++++ src/sampleconf/recoll.conf.in | 3 +++ 3 files changed, 41 insertions(+), 4 deletions(-) diff --git a/src/internfile/mh_text.cpp b/src/internfile/mh_text.cpp index 4b4d1159..c5673b01 100644 --- a/src/internfile/mh_text.cpp +++ b/src/internfile/mh_text.cpp @@ -17,6 +17,9 @@ static char rcsid[] = "@(#$Id: mh_text.cpp,v 1.6 2006-12-15 12:40:02 dockes Exp * Free Software Foundation, Inc., * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +#include +#include +#include #include #include @@ -30,16 +33,35 @@ using namespace std; #include "readfile.h" #include "transcode.h" #include "md5.h" +#include "rclconfig.h" + +const int MB = 1024*1024; // Process a plain text file bool MimeHandlerText::set_document_file(const string &fn) { RecollFilter::set_document_file(fn); + + // file size + struct stat st; + if (stat(fn.c_str(), &st) < 0) { + LOGERR(("MimeHandlerText::set_document_file: stat(%s) errno %d\n", + fn.c_str(), errno)); + return false; + } + + // Handle max file size parameter. If it's too big, we just don't index + // the text at all (should we index the first maxmbs instead ?) + int maxmbs = -1; + RclConfig::getMainConfig()->getConfParam("textfilemaxmbs", &maxmbs); + string otext; - string reason; - if (!file_to_string(fn, otext, &reason)) { - LOGERR(("MimeHandlerText: can't read file: %s\n", reason.c_str())); - return false; + if (st.st_size / MB <= maxmbs) { + string reason; + if (!file_to_string(fn, otext, &reason)) { + LOGERR(("MimeHandlerText: can't read file: %s\n", reason.c_str())); + return false; + } } return set_document_string(otext); } diff --git a/src/qtgui/confgui/confguiindex.cpp b/src/qtgui/confgui/confguiindex.cpp index 853816d4..bf55de93 100644 --- a/src/qtgui/confgui/confguiindex.cpp +++ b/src/qtgui/confgui/confguiindex.cpp @@ -365,6 +365,18 @@ ConfSubPanelW::ConfSubPanelW(QWidget *parent, ConfNull *config) -1, 1000000); m_widgets.push_back(ezfmaxkbs); + ConfLink lnktxtmaxmbs(new ConfLinkRclRep(config, + "textfilemaxmbs")); + ConfParamIntW *etxtmaxmbs = new + ConfParamIntW(m_groupbox, lnktxtmaxmbs, + tr("Max. text file size (MB)"), + tr("This value sets a threshold beyond which text " + "files will not be processed. Set to -1 for no " + "limit. This is for excluding monster " + "log files from the index."), + -1, 1000000); + m_widgets.push_back(etxtmaxmbs); + vboxLayout->addWidget(m_groupbox); subDirChanged(); } diff --git a/src/sampleconf/recoll.conf.in b/src/sampleconf/recoll.conf.in index 35f65692..ff9c5092 100644 --- a/src/sampleconf/recoll.conf.in +++ b/src/sampleconf/recoll.conf.in @@ -104,6 +104,9 @@ indexallfilenames = 1 # processing of any compressed file # compressedfilemaxkbs = -1 +# Size limit for text files. This is for skipping monster logs +textfilemaxmbs = -1 + # Length of abstracts we store while indexing. Longer will make for a # bigger db # idxabsmlen = 250