handle mime: and ext: in qlang

This commit is contained in:
dockes 2007-06-22 06:14:04 +00:00
parent 2777fedb78
commit e892ca4fa4
6 changed files with 61 additions and 12 deletions

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.48 2007-06-21 11:14:45 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.49 2007-06-22 06:14:04 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -351,6 +351,17 @@ bool RclConfig::getMimeCategories(list<string>& cats)
return true;
}
bool RclConfig::isMimeCategory(string& cat)
{
list<string>cats;
getMimeCategories(cats);
for (list<string>::iterator it = cats.begin(); it != cats.end(); it++) {
if (!stringicmp(*it,cat))
return true;
}
return false;
}
/** Get list of mime types for category from mimeconf */
bool RclConfig::getMimeCatTypes(const string& cat, list<string>& tps)
{

View File

@ -16,7 +16,7 @@
*/
#ifndef _RCLCONFIG_H_INCLUDED_
#define _RCLCONFIG_H_INCLUDED_
/* @(#$Id: rclconfig.h,v 1.35 2007-06-19 08:36:24 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: rclconfig.h,v 1.36 2007-06-22 06:14:04 dockes Exp $ (C) 2004 J.F.Dockes */
#include <list>
#include <string>
@ -134,6 +134,8 @@ class RclConfig {
/** mimeconf: get list of file categories */
bool getMimeCategories(list<string>&);
/** mimeconf: is parameter one of the categories ? */
bool isMimeCategory(string&);
/** mimeconf: get list of mime types for category */
bool getMimeCatTypes(const string& cat, list<string>&);

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: wasatorcl.cpp,v 1.8 2007-02-13 10:58:31 dockes Exp $ (C) 2006 J.F.Dockes";
static char rcsid[] = "@(#$Id: wasatorcl.cpp,v 1.9 2007-06-22 06:14:04 dockes Exp $ (C) 2006 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -22,6 +22,7 @@ static char rcsid[] = "@(#$Id: wasatorcl.cpp,v 1.8 2007-02-13 10:58:31 dockes Ex
#include "searchdata.h"
#include "wasatorcl.h"
#include "debuglog.h"
#include "smallut.h"
Rcl::SearchData *wasaStringToRcl(const string &qs, string &reason)
{
@ -55,6 +56,12 @@ Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa)
// ??
continue;
case WasaQuery::OP_LEAF:
// Special case for mime. Not pretty.
if (!stringicmp("mime", (*it)->m_fieldspec)) {
sdata->addFiletype((*it)->m_value);
break;
}
if ((*it)->m_value.find_first_of(" \t\n\r") != string::npos) {
nclause = new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE,
(*it)->m_value, 0,

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.117 2007-06-21 11:56:28 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.118 2007-06-22 06:14:04 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -750,6 +750,7 @@ bool Db::fieldToPrefix(const string& fldname, string &pfx)
static map<string, string> fldToPrefs;
if (fldToPrefs.empty()) {
fldToPrefs["abstract"] = "";
fldToPrefs["ext"] = "XE";
fldToPrefs["title"] = "S";
fldToPrefs["caption"] = "S";
@ -990,7 +991,12 @@ bool Db::add(const string &fn, const Doc &idoc, const struct stat *stp)
// Simple file name. This is used for file name searches only. We index
// it with a term prefix. utf8fn used to be the full path, but it's now
// the simple file name.
// We also add a term for the filename extension if any.
if (dumb_string(doc.utf8fn, noacc) && !noacc.empty()) {
string::size_type pos = noacc.rfind('.');
if (pos != string::npos && pos != noacc.length() -1) {
newdocument.add_term(string("XE") + noacc.substr(pos+1));
}
noacc = string("XSFN") + noacc;
newdocument.add_term(noacc);
}

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.16 2007-06-19 08:36:24 dockes Exp $ (C) 2006 J.F.Dockes";
static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.17 2007-06-22 06:14:04 dockes Exp $ (C) 2006 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -72,10 +72,25 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d, const string& stemlang)
// Add the file type filtering clause if any
if (!m_filetypes.empty()) {
list<Xapian::Query> pqueries;
Xapian::Query tq;
vector<string> exptps;
exptps.reserve(m_filetypes.size());
// Expand categories
RclConfig *cfg = RclConfig::getMainConfig();
for (vector<string>::iterator it = m_filetypes.begin();
it != m_filetypes.end(); it++) {
if (cfg && cfg->isMimeCategory(*it)) {
list<string>tps;
cfg->getMimeCatTypes(*it, tps);
exptps.insert(exptps.end(), tps.begin(), tps.end());
} else {
exptps.push_back(*it);
}
}
list<Xapian::Query> pqueries;
Xapian::Query tq;
for (vector<string>::iterator it = exptps.begin();
it != exptps.end(); it++) {
string term = "T" + *it;
LOGDEB(("Adding file type term: [%s]\n", term.c_str()));
tq = tq.empty() ? Xapian::Query(term) :

View File

@ -1,4 +1,4 @@
# @(#$Id: mimeconf,v 1.30 2007-06-19 08:36:24 dockes Exp $ (C) 2004 J.F.Dockes
# @(#$Id: mimeconf,v 1.31 2007-06-22 06:14:04 dockes Exp $ (C) 2004 J.F.Dockes
# Recoll : associations of mime types to processing filters.
# There are different sections for decompression, 'interning' for indexing
@ -91,7 +91,7 @@ text/x-c = source
[categories]
texts = \
text = \
application/msword \
application/pdf \
application/postscript \
@ -108,11 +108,11 @@ texts = \
text/plain \
text/rtf
spreadsheets = application/vnd.ms-excel \
spreadsheet = application/vnd.ms-excel \
application/vnd.sun.xml.calc \
application/vnd.sun.xml.calc.template
presentations = application/vnd.ms-powerpoint \
presentation = application/vnd.ms-powerpoint \
application/vnd.sun.xml.impress \
application/vnd.sun.xml.impress.template
@ -120,7 +120,7 @@ media = audio/mpeg \
image/jpeg \
image/png \
messages = message/rfc822 \
message = message/rfc822 \
text/x-gaim-log \
text/x-mail \
@ -132,6 +132,12 @@ other = application/vnd.sun.xml.draw \
[prefixes]
# This allows extending the set of fields that recoll understand/searches.
# See the manual for exact usage.
# Important:
# - the field names MUST be all lowercase here. They can be anycased
# in the documents:
# - The extension field prefixes MUST begin with X and be all UPPERCASE.
title = S
caption = S
subject = S
@ -145,3 +151,5 @@ tag = K
keywords = K
tags = K
# testing /example :
recollspecialfield = XRCLSF