added open-ended field name handling

This commit is contained in:
dockes 2007-06-19 08:36:24 +00:00
parent c4b099e8d3
commit 0c74bd6e36
15 changed files with 176 additions and 176 deletions

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.46 2007-06-18 13:04:14 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.47 2007-06-19 08:36:23 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -373,13 +373,13 @@ string RclConfig::getMimeHandlerDef(const std::string &mtype)
return hs;
}
string RclConfig::getFieldPrefix(const string& fld)
bool RclConfig::getFieldPrefix(const string& fld, string &pfx)
{
string hs;
if (!mimeconf->get(fld, hs, "prefixes")) {
if (!mimeconf->get(fld, pfx, "prefixes")) {
LOGDEB(("getFieldPrefix: no prefix defined for '%s'\n", fld.c_str()));
return false;
}
return hs;
return true;
}
string RclConfig::getMimeViewerDef(const string &mtype)

View File

@ -16,7 +16,7 @@
*/
#ifndef _RCLCONFIG_H_INCLUDED_
#define _RCLCONFIG_H_INCLUDED_
/* @(#$Id: rclconfig.h,v 1.34 2007-06-18 13:04:15 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: rclconfig.h,v 1.35 2007-06-19 08:36:24 dockes Exp $ (C) 2004 J.F.Dockes */
#include <list>
#include <string>
@ -138,7 +138,7 @@ class RclConfig {
bool getMimeCatTypes(const string& cat, list<string>&);
/** mimeconf: get field prefix from field name */
string getFieldPrefix(const string& fldname);
bool getFieldPrefix(const string& fldname, string &pfx);
/** mimeview: get/set external viewer exec string(s) for mimetype(s) */
string getMimeViewerDef(const string &mimetype);

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: internfile.cpp,v 1.30 2007-05-23 08:29:04 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: internfile.cpp,v 1.31 2007-06-19 08:36:24 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -270,12 +270,12 @@ static const string keyab("abstract");
static const string keyau("author");
static const string keycs("charset");
static const string keyct("content");
static const string keyds("description");
static const string keyfn("filename");
static const string keykw("keywords");
static const string keymd("modificationdate");
static const string keymt("mimetype");
static const string keyoc("origcharset");
static const string keysm("sample");
static const string keytt("title");
bool FileInterner::dijontorcl(Rcl::Doc& doc)
@ -283,15 +283,24 @@ bool FileInterner::dijontorcl(Rcl::Doc& doc)
Dijon::Filter *df = m_handlers.back();
const std::map<std::string, std::string>& docdata = df->get_meta_data();
getKeyValue(docdata, keyau, doc.author);
getKeyValue(docdata, keyoc, doc.origcharset);
getKeyValue(docdata, keyct, doc.text);
getKeyValue(docdata, keytt, doc.title);
getKeyValue(docdata, keykw, doc.keywords);
getKeyValue(docdata, keymd, doc.dmtime);
if (!getKeyValue(docdata, keyab, doc.abstract))
getKeyValue(docdata, keysm, doc.abstract);
LOGDEB1(("FILENAME: %s\n", doc.utf8fn.c_str()));
for (map<string,string>::const_iterator it = docdata.begin();
it != docdata.end(); it++) {
if (it->first == keyct) {
doc.text = it->second;
} else if (it->first == keymd) {
doc.dmtime = it->second;
} else if (it->first == keyoc) {
doc.origcharset = it->second;
} else if (it->first == keymt || it->first == keycs) {
// don't need these.
} else {
doc.meta[it->first] = it->second;
}
}
if (doc.meta[keyab].empty() && !doc.meta[keyds].empty()) {
doc.meta[keyab] = doc.meta[keyds];
doc.meta.erase(keyds);
}
return true;
}
@ -324,7 +333,7 @@ void FileInterner::collectIpathAndMT(Rcl::Doc& doc, string& ipath) const
} else {
ipath += isep;
}
getKeyValue(docdata, keyau, doc.author);
getKeyValue(docdata, keyau, doc.meta["author"]);
getKeyValue(docdata, keymd, doc.dmtime);
}
@ -672,7 +681,7 @@ int main(int argc, char **argv)
"]]]]\n-----------------------------------------------------\n" <<
"doc.keywords [[[[" << doc.keywords <<
"]]]]\n-----------------------------------------------------\n" <<
"doc.abstract [[[[" << doc.abstract <<
"doc.meta["abstract"] [[[[" << doc.meta["abstract"] <<
"]]]]\n-----------------------------------------------------\n" <<
"doc.text [[[[" << doc.text << "]]]]\n";
}

View File

@ -136,15 +136,16 @@ bool MimeHandlerHtml::next_document()
m_metaData["origcharset"] = m_defcharset;
m_metaData["content"] = result.dump;
m_metaData["charset"] = "utf-8";
m_metaData["title"] = result.title;
m_metaData["keywords"] = result.keywords;
// Avoid setting empty values which would crush ones possibly inherited
// from parent (if we're an attachment)
if (!result.author.empty())
m_metaData["author"] = result.author;
if (!result.dmtime.empty())
m_metaData["modificationdate"] = result.dmtime;
m_metaData["sample"] = result.sample;
m_metaData["mimetype"] = "text/plain";
for (map<string,string>::const_iterator it = result.meta.begin();
it != result.meta.end(); it++) {
if (!it->second.empty())
m_metaData[it->first] = it->second;
}
return true;
}

View File

@ -144,22 +144,7 @@ MyHtmlParser::opening_tag(const string &tag, const map<string,string> &p)
if ((j = p.find("name")) != p.end()) {
string name = j->second;
lowercase_term(name);
if (name == "description") {
if (sample.empty()) {
sample = i->second;
decode_entities(sample);
}
} else if (name == "keywords") {
if (!keywords.empty()) keywords += ' ';
string tmp = i->second;
decode_entities(tmp);
keywords += tmp;
} else if (name == "author") {
if (!author.empty()) author += ' ';
string tmp = i->second;
decode_entities(tmp);
author += tmp;
} else if (name == "date") {
if (name == "date") {
// Yes this doesnt exist. It's output by filters
// And the format isn't even standard http/html
// FIXME
@ -172,7 +157,14 @@ MyHtmlParser::opening_tag(const string &tag, const map<string,string> &p)
sprintf(ascuxtime, "%ld", (long)mktime(&tm));
dmtime = ascuxtime;
}
}
} else if (name == "robots") {
} else {
if (!meta[name].empty())
meta[name] += ' ';
string tmp = i->second;
decode_entities(tmp);
meta[name] += tmp;
}
} else if ((j = p.find("http-equiv")) != p.end()) {
string hequiv = j->second;
lowercase_term(hequiv);
@ -309,8 +301,8 @@ MyHtmlParser::closing_tag(const string &tag)
break;
case 't':
if (tag == "title") {
if (title.empty()) {
title = dump;
if (meta["title"].empty()) {
meta["title"] = dump;
dump = "";
}
break;

View File

@ -22,6 +22,8 @@
* USA
* -----END-LICENCE-----
*/
#include <map>
using std::map;
#include "htmlparse.h"
@ -37,7 +39,8 @@ class MyHtmlParser : public HtmlParser {
bool in_body_tag;
bool in_pre_tag;
bool pending_space;
string title, sample, keywords, dump, dmtime, author;
map<string,string> meta;
string dump, dmtime;
string ocharset; // This is the charset our user thinks the doc was
string charset; // This is the charset it was supposedly converted to
string doccharset; // Set this to value of charset parameter in header

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: preview_w.cpp,v 1.20 2007-06-12 13:31:38 dockes Exp $ (C) 2005 J.F.Dockes";
static char rcsid[] = "@(#$Id: preview_w.cpp,v 1.21 2007-06-19 08:36:24 dockes Exp $ (C) 2005 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -400,8 +400,12 @@ QTextEdit *Preview::addEditorTab()
void Preview::setCurTabProps(const string &fn, const Rcl::Doc &doc,
int docnum)
{
QString title = QString::fromUtf8(doc.title.c_str(),
doc.title.length());
QString title;
map<string,string>::const_iterator meta_it;
if ((meta_it = doc.meta.find("title")) != doc.meta.end()) {
title = QString::fromUtf8(meta_it->second.c_str(),
meta_it->second.length());
}
if (title.length() > 20) {
title = title.left(10) + "..." + title.right(10);
}
@ -421,8 +425,8 @@ void Preview::setCurTabProps(const string &fn, const Rcl::Doc &doc,
printableUrl(doc.url, url);
string tiptxt = url + string("\n");
tiptxt += doc.mimetype + " " + string(datebuf) + "\n";
if (!doc.title.empty())
tiptxt += doc.title + "\n";
if (meta_it != doc.meta.end() && !meta_it->second.empty())
tiptxt += meta_it->second + "\n";
pvTab->setTabToolTip(w,QString::fromUtf8(tiptxt.c_str(), tiptxt.length()));
for (list<TabData>::iterator it = tabData.begin();
@ -607,8 +611,8 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc,
Rcl::Doc doc = idoc;
bool cancel = false;
if (doc.title.empty())
doc.title = path_getsimple(doc.url);
if (doc.meta["title"].empty())
doc.meta["title"] = path_getsimple(doc.url);
setCurTabProps(fn, doc, docnum);

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: reslist.cpp,v 1.26 2007-06-13 17:03:23 dockes Exp $ (C) 2005 J.F.Dockes";
static char rcsid[] = "@(#$Id: reslist.cpp,v 1.27 2007-06-19 08:36:24 dockes Exp $ (C) 2005 J.F.Dockes";
#endif
#include <time.h>
@ -399,7 +399,7 @@ void ResList::resultPageNext()
if (percent == -1) {
percent = 0;
// Document not available, maybe other further, will go on.
doc.abstract = string(tr("Unavailable document").utf8());
doc.meta["abstract"] = string(tr("Unavailable document").utf8());
}
// Determine icon to display if any
@ -426,8 +426,8 @@ void ResList::resultPageNext()
printableUrl(doc.url, url);
// Make title out of file name if none yet
if (doc.title.empty()) {
doc.title = path_getsimple(url);
if (doc.meta["title"].empty()) {
doc.meta["title"] = path_getsimple(url);
}
// Result number
@ -469,7 +469,7 @@ void ResList::resultPageNext()
(doc.syntabs || prefs.queryReplaceAbstract)) {
abstract = m_docSource->getAbstract(doc);
} else {
abstract = doc.abstract;
abstract = doc.meta["abstract"];
}
// No need to call escapeHtml(), plaintorich handles it
string richabst;
@ -505,14 +505,14 @@ void ResList::resultPageNext()
map<char,string> subs;
subs['A'] = !richabst.empty() ? richabst + "<br>" : "";
subs['D'] = datebuf;
subs['K'] = !doc.keywords.empty() ? escapeHtml(doc.keywords) + "<br>"
: "";
subs['K'] = !doc.meta["keywords"].empty() ?
escapeHtml(doc.meta["keywords"]) + "<br>" : "";
subs['L'] = linksbuf;
subs['N'] = numbuf;
subs['M'] = doc.mimetype;
subs['R'] = perbuf;
subs['S'] = sizebuf;
subs['T'] = escapeHtml(doc.title);
subs['T'] = escapeHtml(doc.meta["title"]);
subs['U'] = url;
string formatted;

View File

@ -16,7 +16,7 @@
*/
#ifndef _DOCSEQ_H_INCLUDED_
#define _DOCSEQ_H_INCLUDED_
/* @(#$Id: docseq.h,v 1.11 2007-01-19 15:22:50 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: docseq.h,v 1.12 2007-06-19 08:36:24 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string>
#include <list>
#include <vector>
@ -70,7 +70,7 @@ class DocSequence {
* The default is to return the input doc's abstract fields, but some
* sequences can compute a better value (ie: docseqdb) */
virtual string getAbstract(Rcl::Doc& doc) {
return doc.abstract;
return doc.meta["abstract"];
}
/** Get estimated total count in results */

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: docseqdb.cpp,v 1.2 2007-01-19 15:22:50 dockes Exp $ (C) 2005 J.F.Dockes";
static char rcsid[] = "@(#$Id: docseqdb.cpp,v 1.3 2007-06-19 08:36:24 dockes Exp $ (C) 2005 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -42,9 +42,9 @@ int DocSequenceDb::getResCnt()
string DocSequenceDb::getAbstract(Rcl::Doc &doc)
{
if (!m_db)
return doc.abstract;
return doc.meta["abstract"];
string abstract;
m_db->makeDocAbstract(doc, abstract);
return abstract.empty() ? doc.abstract : abstract;
return abstract.empty() ? doc.meta["abstract"] : abstract;
}

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.114 2007-06-18 13:04:15 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.115 2007-06-19 08:36:24 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -200,14 +200,14 @@ bool Native::dbDataToRclDoc(Xapian::docid docid, std::string &data, Doc &doc)
parms.get(string("fmtime"), doc.fmtime);
parms.get(string("dmtime"), doc.dmtime);
parms.get(string("origcharset"), doc.origcharset);
parms.get(string("caption"), doc.title);
parms.get(string("keywords"), doc.keywords);
parms.get(string("abstract"), doc.abstract);
parms.get(string("caption"), doc.meta["title"]);
parms.get(string("keywords"), doc.meta["keywords"]);
parms.get(string("abstract"), doc.meta["abstract"]);
// Possibly remove synthetic abstract indicator (if it's there, we
// used to index the beginning of the text as abstract).
doc.syntabs = false;
if (doc.abstract.find(rclSyntAbs) == 0) {
doc.abstract = doc.abstract.substr(rclSyntAbs.length());
if (doc.meta["abstract"].find(rclSyntAbs) == 0) {
doc.meta["abstract"] = doc.meta["abstract"].substr(rclSyntAbs.length());
doc.syntabs = true;
}
parms.get(string("ipath"), doc.ipath);
@ -743,12 +743,15 @@ bool Db::isopen()
// Try to translate field specification into field prefix. We have a
// default table used if translations are not in the config for some
// reason (old config not updated ?). We use it only if the config
// translation fails
string Db::fieldToPrefix(const string& fldname)
// translation fails. Also we add in there fields which should be
// indexed with no prefix (ie: abstract)
bool Db::fieldToPrefix(const string& fldname, string &pfx)
{
// This is the default table
static map<string, string> fldToPrefs;
if (fldToPrefs.empty()) {
fldToPrefs["abstract"] = "";
fldToPrefs["title"] = "S";
fldToPrefs["caption"] = "S";
fldToPrefs["subject"] = "S";
@ -763,17 +766,19 @@ string Db::fieldToPrefix(const string& fldname)
fldToPrefs["tags"] = "K";
}
string fld(fldname), pfx;
string fld(fldname);
stringtolower(fld);
RclConfig *config = RclConfig::getMainConfig();
if (config)
pfx = config->getFieldPrefix(fld);
if (pfx.empty()) {
map<string, string>::const_iterator it = fldToPrefs.find(fld);
if (it != fldToPrefs.end())
fld = it->second;
if (config && config->getFieldPrefix(fld, pfx))
return true;
map<string, string>::const_iterator it = fldToPrefs.find(fld);
if (it != fldToPrefs.end()) {
pfx = it->second;
return true;
}
return pfx;
return false;
}
@ -880,11 +885,12 @@ bool Db::add(const string &fn, const Doc &idoc, const struct stat *stp)
LOGDEB1(("Db::add: fn %s\n", fn.c_str()));
if (m_ndb == 0)
return false;
static int first = 1;
// Check file system full every mbyte of indexed text.
if (m_maxFsOccupPc > 0 && (m_curtxtsz - m_occtxtsz) / MB >= 1) {
if (m_maxFsOccupPc > 0 && (first || (m_curtxtsz - m_occtxtsz) / MB >= 1)) {
LOGDEB(("Db::add: checking file system usage\n"));
int pc;
first = 0;
if (fsocc(m_basedir, &pc) && pc >= m_maxFsOccupPc) {
LOGERR(("Db::add: stop indexing: file system "
"%d%% full > max %d%%\n", pc, m_maxFsOccupPc));
@ -895,37 +901,38 @@ bool Db::add(const string &fn, const Doc &idoc, const struct stat *stp)
Doc doc = idoc;
// The title, author, abstract and keywords fields are special, they
// get stored in the document data record.
// Truncate abstract, title and keywords to reasonable lengths. If
// abstract is currently empty, we make up one with the beginning
// of the document. This is then not indexed, but part of the doc
// data so that we can return it to a query without having to
// decode the original file.
bool syntabs = false;
if (doc.abstract.empty()) {
// Note that the map accesses by operator[] create empty entries if they
// don't exist yet.
if (doc.meta["abstract"].empty()) {
syntabs = true;
doc.abstract = rclSyntAbs +
truncate_to_word(doc.text, m_idxAbsTruncLen);
doc.meta["abstract"] = rclSyntAbs +
neutchars(truncate_to_word(doc.text, m_idxAbsTruncLen), "\n\r");
} else {
doc.abstract = truncate_to_word(doc.abstract, m_idxAbsTruncLen);
doc.meta["abstract"] =
neutchars(truncate_to_word(doc.meta["abstract"], m_idxAbsTruncLen),
"\n\r");
}
doc.abstract = neutchars(doc.abstract, "\n\r");
doc.title = neutchars(truncate_to_word(doc.title, 150), "\n\r");
doc.author = neutchars(truncate_to_word(doc.author, 150), "\n\r");
doc.keywords = neutchars(truncate_to_word(doc.keywords, 300), "\n\r");
if (doc.meta["title"].empty())
doc.meta["title"] = doc.utf8fn, "\n\r";
doc.meta["title"] =
neutchars(truncate_to_word(doc.meta["title"], 150), "\n\r");
doc.meta["author"] =
neutchars(truncate_to_word(doc.meta["author"], 150), "\n\r");
doc.meta["keywords"] =
neutchars(truncate_to_word(doc.meta["keywords"], 300),"\n\r");
Xapian::Document newdocument;
mySplitterCB splitData(newdocument, m_stops);
TextSplit splitter(&splitData);
// Index the title, document text, keywords and other textual
// metadata. These are all indexed as text with positions, as we
// may want to do phrase searches with them (this makes no sense
// for keywords by the way, but wtf).
/
// The order has no importance, and we set a position gap of 100
// between fields to avoid false proximity matches.
string noacc;
// Split and index file name as document term(s)
@ -935,35 +942,39 @@ bool Db::add(const string &fn, const Doc &idoc, const struct stat *stp)
splitData.basepos += splitData.curpos + 100;
}
// Split and index title. If title is empty here, use file name
if (doc.title.empty())
doc.title = doc.utf8fn;
if (!doc.title.empty()) {
LOGDEB2(("Db::add: split title [%s]\n", doc.title.c_str()));
if (!dumb_string(doc.title, noacc)) {
LOGERR(("Db::add: dumb_string failed\n"));
return false;
// Index textual metadata. These are all indexed as text with
// positions, as we may want to do phrase searches with them (this
// makes no sense for keywords by the way).
//
// The order has no importance, and we set a position gap of 100
// between fields to avoid false proximity matches.
map<string,string>::iterator meta_it;
string pfx;
for (meta_it = doc.meta.begin(); meta_it != doc.meta.end(); meta_it++) {
if (!meta_it->second.empty()) {
if (meta_it->first == "abstract" && syntabs)
continue;
if (!fieldToPrefix(meta_it->first, pfx)) {
LOGDEB(("Db::add: no prefix for field [%s], no indexing\n",
meta_it->first.c_str()));
continue;
}
LOGDEB(("Db::add: field [%s] pfx [%s]: [%s]\n",
meta_it->first.c_str(), pfx.c_str(),
meta_it->second.c_str()));
if (!dumb_string(meta_it->second, noacc)) {
LOGERR(("Db::add: dumb_string failed\n"));
return false;
}
splitData.setprefix(pfx); // Subject
splitter.text_to_words(noacc);
splitData.setprefix(emptystring);
splitData.basepos += splitData.curpos + 100;
}
splitData.setprefix("S"); // Subject
splitter.text_to_words(noacc);
splitData.setprefix(emptystring);
splitData.basepos += splitData.curpos + 100;
}
// Split and index author
if (!doc.author.empty()) {
LOGDEB2(("Db::add: split author [%s]\n", doc.author.c_str()));
if (!dumb_string(doc.author, noacc)) {
LOGERR(("Db::add: dumb_string failed\n"));
return false;
}
splitData.setprefix("A");
splitter.text_to_words(noacc);
splitData.setprefix(emptystring);
splitData.basepos += splitData.curpos + 100;
}
// Split and index body
// Split and index body text
LOGDEB2(("Db::add: split body\n"));
if (!dumb_string(doc.text, noacc)) {
LOGERR(("Db::add: dumb_string failed\n"));
@ -972,36 +983,8 @@ bool Db::add(const string &fn, const Doc &idoc, const struct stat *stp)
splitter.text_to_words(noacc);
splitData.basepos += splitData.curpos + 100;
// Split and index keywords
if (!doc.keywords.empty()) {
LOGDEB2(("Db::add: split kw [%s]\n", doc.keywords.c_str()));
if (!dumb_string(doc.keywords, noacc)) {
LOGERR(("Db::add: dumb_string failed\n"));
return false;
}
splitData.setprefix("K");
splitter.text_to_words(noacc);
splitData.setprefix(emptystring);
splitData.basepos += splitData.curpos + 100;
}
// Split and index abstract. We don't do this if it is synthetic
// any more (this used to give a relevance boost to the beginning
// of text, why ?)
LOGDEB2(("Db::add: split abstract [%s]\n", doc.abstract.c_str()));
if (!syntabs) {
// syntabs indicator test kept here in case we want to go back
// to indexing synthetic abstracts one day
if (!dumb_string(syntabs ? doc.abstract.substr(rclSyntAbs.length()) :
doc.abstract, noacc)) {
LOGERR(("Db::add: dumb_string failed\n"));
return false;
}
splitter.text_to_words(noacc);
}
splitData.basepos += splitData.curpos + 100;
////// Special terms for metadata
////// Special terms for other metadata. No positions for these.
// Mime type
newdocument.add_term("T" + doc.mimetype);
@ -1075,11 +1058,14 @@ bool Db::add(const string &fn, const Doc &idoc, const struct stat *stp)
if (!doc.ipath.empty()) {
record += "\nipath=" + doc.ipath;
}
record += "\ncaption=" + doc.title;
record += "\nkeywords=" + doc.keywords;
record += "\nabstract=" + doc.abstract;
if (!doc.author.empty()) {
record += "\nauthor=" + doc.author;
if (!doc.meta["title"].empty())
record += "\ncaption=" + doc.meta["title"];
if (!doc.meta["keywords"].empty())
record += "\nkeywords=" + doc.meta["keywords"];
if (!doc.meta["abstract"].empty())
record += "\nabstract=" + doc.meta["abstract"];
if (!doc.meta["author"].empty()) {
record += "\nauthor=" + doc.meta["author"];
}
record += "\n";
LOGDEB1(("Newdocument data: %s\n", record.c_str()));

View File

@ -16,7 +16,7 @@
*/
#ifndef _DB_H_INCLUDED_
#define _DB_H_INCLUDED_
/* @(#$Id: rcldb.h,v 1.51 2007-06-18 13:04:15 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: rcldb.h,v 1.52 2007-06-19 08:36:24 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string>
#include <list>
@ -95,7 +95,7 @@ class Db {
const StopList& getStopList() const {return m_stops;}
/** Field name to prefix translation (ie: author -> 'A') */
string fieldToPrefix(const string& fldname);
bool fieldToPrefix(const string& fldname, string &pfx);
/* Update-related methods ******************************************/

View File

@ -16,12 +16,14 @@
*/
#ifndef _RCLDOC_H_INCLUDED_
#define _RCLDOC_H_INCLUDED_
/* @(#$Id: rcldoc.h,v 1.2 2007-01-17 13:53:41 dockes Exp $ (C) 2006 J.F.Dockes */
/* @(#$Id: rcldoc.h,v 1.3 2007-06-19 08:36:24 dockes Exp $ (C) 2006 J.F.Dockes */
#include <string>
#include <map>
#ifndef NO_NAMESPACES
using std::string;
using std::map;
namespace Rcl {
#endif
@ -47,12 +49,16 @@ class Doc {
// Possibly set by handler
string origcharset; // Charset we transcoded from (in case we want back)
// Possibly set by handler
string title; // Possibly set by handler
string author; // Possibly set by handler
string keywords; // Possibly set by handler
string abstract; // Possibly set by handler
bool syntabs; // true if abstract is just the top of doc, not an
// explicit document attribute
// A map for textual metadata like, author, keywords, abstract, title
// Entries possibly set by handler. If a field-name to prefix translation
// exists, the terms will be indexed with a prefix.
map<string, string> meta;
// Attribute for the "abstract" entry. true if it is just the top
// of doc, not a native document attribute
bool syntabs;
string fbytes; // File size. Set by Db::Add
string dbytes; // Doc size. Set by Db::Add from text length
@ -72,9 +78,7 @@ class Doc {
fmtime.erase();
dmtime.erase();
origcharset.erase();
title.erase();
keywords.erase();
abstract.erase();
meta.clear();
syntabs = false;
fbytes.erase();
dbytes.erase();

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.15 2007-06-18 13:04:15 dockes Exp $ (C) 2006 J.F.Dockes";
static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.16 2007-06-19 08:36:24 dockes Exp $ (C) 2006 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -487,7 +487,7 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p,
}
string prefix;
if (!m_field.empty())
prefix = db.fieldToPrefix(m_field);
db.fieldToPrefix(m_field, prefix);
list<Xapian::Query> pqueries;
// We normally boost the original term in the stem expansion list. Don't
@ -541,7 +541,7 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p,
string prefix;
if (!m_field.empty())
prefix = db.fieldToPrefix(m_field);
db.fieldToPrefix(m_field, prefix);
// We normally boost the original term in the stem expansion list. Don't
// do it if there are wildcards anywhere, this would skew the results.

View File

@ -1,4 +1,4 @@
# @(#$Id: mimeconf,v 1.29 2007-06-18 13:04:15 dockes Exp $ (C) 2004 J.F.Dockes
# @(#$Id: mimeconf,v 1.30 2007-06-19 08:36:24 dockes Exp $ (C) 2004 J.F.Dockes
# Recoll : associations of mime types to processing filters.
# There are different sections for decompression, 'interning' for indexing
@ -144,3 +144,4 @@ keyword = K
tag = K
keywords = K
tags = K