diff --git a/src/common/rclconfig.cpp b/src/common/rclconfig.cpp index 06f67196..4622f33e 100644 --- a/src/common/rclconfig.cpp +++ b/src/common/rclconfig.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.48 2007-06-21 11:14:45 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.49 2007-06-22 06:14:04 dockes Exp $ (C) 2004 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -351,6 +351,17 @@ bool RclConfig::getMimeCategories(list& cats) return true; } +bool RclConfig::isMimeCategory(string& cat) +{ + listcats; + getMimeCategories(cats); + for (list::iterator it = cats.begin(); it != cats.end(); it++) { + if (!stringicmp(*it,cat)) + return true; + } + return false; +} + /** Get list of mime types for category from mimeconf */ bool RclConfig::getMimeCatTypes(const string& cat, list& tps) { diff --git a/src/common/rclconfig.h b/src/common/rclconfig.h index 665f955d..9bb5e85b 100644 --- a/src/common/rclconfig.h +++ b/src/common/rclconfig.h @@ -16,7 +16,7 @@ */ #ifndef _RCLCONFIG_H_INCLUDED_ #define _RCLCONFIG_H_INCLUDED_ -/* @(#$Id: rclconfig.h,v 1.35 2007-06-19 08:36:24 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: rclconfig.h,v 1.36 2007-06-22 06:14:04 dockes Exp $ (C) 2004 J.F.Dockes */ #include #include @@ -134,6 +134,8 @@ class RclConfig { /** mimeconf: get list of file categories */ bool getMimeCategories(list&); + /** mimeconf: is parameter one of the categories ? */ + bool isMimeCategory(string&); /** mimeconf: get list of mime types for category */ bool getMimeCatTypes(const string& cat, list&); diff --git a/src/query/wasatorcl.cpp b/src/query/wasatorcl.cpp index e32b5322..a514e953 100644 --- a/src/query/wasatorcl.cpp +++ b/src/query/wasatorcl.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: wasatorcl.cpp,v 1.8 2007-02-13 10:58:31 dockes Exp $ (C) 2006 J.F.Dockes"; +static char rcsid[] = "@(#$Id: wasatorcl.cpp,v 1.9 2007-06-22 06:14:04 dockes Exp $ (C) 2006 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -22,6 +22,7 @@ static char rcsid[] = "@(#$Id: wasatorcl.cpp,v 1.8 2007-02-13 10:58:31 dockes Ex #include "searchdata.h" #include "wasatorcl.h" #include "debuglog.h" +#include "smallut.h" Rcl::SearchData *wasaStringToRcl(const string &qs, string &reason) { @@ -55,6 +56,12 @@ Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa) // ?? continue; case WasaQuery::OP_LEAF: + // Special case for mime. Not pretty. + if (!stringicmp("mime", (*it)->m_fieldspec)) { + sdata->addFiletype((*it)->m_value); + break; + } + if ((*it)->m_value.find_first_of(" \t\n\r") != string::npos) { nclause = new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE, (*it)->m_value, 0, diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index 855bcd74..2e11f7b3 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.117 2007-06-21 11:56:28 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.118 2007-06-22 06:14:04 dockes Exp $ (C) 2004 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -750,6 +750,7 @@ bool Db::fieldToPrefix(const string& fldname, string &pfx) static map fldToPrefs; if (fldToPrefs.empty()) { fldToPrefs["abstract"] = ""; + fldToPrefs["ext"] = "XE"; fldToPrefs["title"] = "S"; fldToPrefs["caption"] = "S"; @@ -990,7 +991,12 @@ bool Db::add(const string &fn, const Doc &idoc, const struct stat *stp) // Simple file name. This is used for file name searches only. We index // it with a term prefix. utf8fn used to be the full path, but it's now // the simple file name. + // We also add a term for the filename extension if any. if (dumb_string(doc.utf8fn, noacc) && !noacc.empty()) { + string::size_type pos = noacc.rfind('.'); + if (pos != string::npos && pos != noacc.length() -1) { + newdocument.add_term(string("XE") + noacc.substr(pos+1)); + } noacc = string("XSFN") + noacc; newdocument.add_term(noacc); } diff --git a/src/rcldb/searchdata.cpp b/src/rcldb/searchdata.cpp index a0650129..d5881f98 100644 --- a/src/rcldb/searchdata.cpp +++ b/src/rcldb/searchdata.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.16 2007-06-19 08:36:24 dockes Exp $ (C) 2006 J.F.Dockes"; +static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.17 2007-06-22 06:14:04 dockes Exp $ (C) 2006 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -72,10 +72,25 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d, const string& stemlang) // Add the file type filtering clause if any if (!m_filetypes.empty()) { - list pqueries; - Xapian::Query tq; + vector exptps; + exptps.reserve(m_filetypes.size()); + // Expand categories + RclConfig *cfg = RclConfig::getMainConfig(); for (vector::iterator it = m_filetypes.begin(); it != m_filetypes.end(); it++) { + if (cfg && cfg->isMimeCategory(*it)) { + listtps; + cfg->getMimeCatTypes(*it, tps); + exptps.insert(exptps.end(), tps.begin(), tps.end()); + } else { + exptps.push_back(*it); + } + } + + list pqueries; + Xapian::Query tq; + for (vector::iterator it = exptps.begin(); + it != exptps.end(); it++) { string term = "T" + *it; LOGDEB(("Adding file type term: [%s]\n", term.c_str())); tq = tq.empty() ? Xapian::Query(term) : diff --git a/src/sampleconf/mimeconf b/src/sampleconf/mimeconf index 4d2a6011..9625d5d3 100644 --- a/src/sampleconf/mimeconf +++ b/src/sampleconf/mimeconf @@ -1,4 +1,4 @@ -# @(#$Id: mimeconf,v 1.30 2007-06-19 08:36:24 dockes Exp $ (C) 2004 J.F.Dockes +# @(#$Id: mimeconf,v 1.31 2007-06-22 06:14:04 dockes Exp $ (C) 2004 J.F.Dockes # Recoll : associations of mime types to processing filters. # There are different sections for decompression, 'interning' for indexing @@ -91,7 +91,7 @@ text/x-c = source [categories] -texts = \ +text = \ application/msword \ application/pdf \ application/postscript \ @@ -108,11 +108,11 @@ texts = \ text/plain \ text/rtf -spreadsheets = application/vnd.ms-excel \ +spreadsheet = application/vnd.ms-excel \ application/vnd.sun.xml.calc \ application/vnd.sun.xml.calc.template -presentations = application/vnd.ms-powerpoint \ +presentation = application/vnd.ms-powerpoint \ application/vnd.sun.xml.impress \ application/vnd.sun.xml.impress.template @@ -120,7 +120,7 @@ media = audio/mpeg \ image/jpeg \ image/png \ -messages = message/rfc822 \ +message = message/rfc822 \ text/x-gaim-log \ text/x-mail \ @@ -132,6 +132,12 @@ other = application/vnd.sun.xml.draw \ [prefixes] +# This allows extending the set of fields that recoll understand/searches. +# See the manual for exact usage. +# Important: +# - the field names MUST be all lowercase here. They can be anycased +# in the documents: +# - The extension field prefixes MUST begin with X and be all UPPERCASE. title = S caption = S subject = S @@ -145,3 +151,5 @@ tag = K keywords = K tags = K +# testing /example : +recollspecialfield = XRCLSF