foundation work for configurable stored/indexed fields
This commit is contained in:
parent
c8114446cf
commit
89c3dfdf98
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.56 2007-12-13 06:58:21 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.57 2008-09-08 16:49:10 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -109,25 +109,27 @@ RclConfig::RclConfig(const string *argcnf)
|
||||
m_cdirs.push_back(path_cat(m_datadir, "examples"));
|
||||
string cnferrloc = m_confdir + " or " + path_cat(m_datadir, "examples");
|
||||
|
||||
// Read and process "recoll.conf"
|
||||
if (!updateMainConfig())
|
||||
return;
|
||||
|
||||
// Other files
|
||||
mimemap = new ConfStack<ConfTree>("mimemap", m_cdirs, true);
|
||||
if (mimemap == 0 || !mimemap->ok()) {
|
||||
m_reason = string("No or bad mimemap file in: ") + cnferrloc;
|
||||
return;
|
||||
}
|
||||
|
||||
mimeconf = new ConfStack<ConfTree>("mimeconf", m_cdirs, true);
|
||||
mimeconf = new ConfStack<ConfSimple>("mimeconf", m_cdirs, true);
|
||||
if (mimeconf == 0 || !mimeconf->ok()) {
|
||||
m_reason = string("No/bad mimeconf in: ") + cnferrloc;
|
||||
return;
|
||||
}
|
||||
mimeview = new ConfStack<ConfTree>("mimeview", m_cdirs, true);
|
||||
if (mimeconf == 0 || !mimeconf->ok()) {
|
||||
mimeview = new ConfStack<ConfSimple>("mimeview", m_cdirs, true);
|
||||
if (mimeview == 0 || !mimeview->ok()) {
|
||||
m_reason = string("No/bad mimeview in: ") + cnferrloc;
|
||||
return;
|
||||
}
|
||||
if (!readFieldsConfig(cnferrloc))
|
||||
return;
|
||||
|
||||
m_ok = true;
|
||||
setKeyDir("");
|
||||
@ -453,15 +455,129 @@ string RclConfig::getMimeHandlerDef(const std::string &mtype, bool filtertypes)
|
||||
return hs;
|
||||
}
|
||||
|
||||
// Read definitions for field prefixes, aliases, and hierarchy and arrange
|
||||
// things for speed (theses are used a lot during indexing)
|
||||
bool RclConfig::readFieldsConfig(const string& cnferrloc)
|
||||
{
|
||||
m_fields = new ConfStack<ConfSimple>("fields", m_cdirs, true);
|
||||
if (m_fields == 0 || !m_fields->ok()) {
|
||||
m_reason = string("No/bad fields file in: ") + cnferrloc;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Build a direct map avoiding all indirections for field to
|
||||
// prefix translation
|
||||
// Add direct prefixes
|
||||
list<string>tps = m_fields->getNames("prefixes");
|
||||
for (list<string>::const_iterator it = tps.begin(); it != tps.end();it++) {
|
||||
string val;
|
||||
m_fields->get(*it, val, "prefixes");
|
||||
m_fldtopref[*it] = val;
|
||||
}
|
||||
// Add prefixes for aliases:
|
||||
tps = m_fields->getNames("aliases");
|
||||
for (list<string>::const_iterator it = tps.begin(); it != tps.end();it++) {
|
||||
string canonic = *it; // canonic name
|
||||
string pfx;
|
||||
map<string,string>::const_iterator pit = m_fldtopref.find(canonic);
|
||||
if (pit != m_fldtopref.end()) {
|
||||
pfx = pit->second;
|
||||
} else {
|
||||
// Note: it's perfectly normal to have no prefix for the canonic
|
||||
// name, this could be a stored, not indexed field
|
||||
LOGDEB2(("RclConfig::readFieldsConfig: no pfx for canonic [%s]\n",
|
||||
canonic.c_str()));
|
||||
continue;
|
||||
}
|
||||
string aliases;
|
||||
m_fields->get(canonic, aliases, "aliases");
|
||||
list<string> l;
|
||||
stringToStrings(aliases, l);
|
||||
for (list<string>::const_iterator ait = l.begin();
|
||||
ait != l.end(); ait++) {
|
||||
m_fldtopref[*ait] = pfx;
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
for (map<string,string>::const_iterator it = m_fldtopref.begin();
|
||||
it != m_fldtopref.end(); it++) {
|
||||
LOGDEB(("RclConfig::readFieldsConfig: [%s] => [%s]\n",
|
||||
it->first.c_str(), it->second.c_str()));
|
||||
}
|
||||
#endif
|
||||
|
||||
string ss;
|
||||
if (m_fields->get("stored", ss, "stored")) {
|
||||
list<string> sl;
|
||||
stringToStrings(ss, sl);
|
||||
for (list<string>::const_iterator it = sl.begin();
|
||||
it != sl.end(); it++) {
|
||||
LOGDEB(("Inserting [%s] in stored list\n", (*it).c_str()));
|
||||
m_storedFields.insert(*it);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Return term indexing prefix for field name (ie: "filename" -> "XSFN")
|
||||
bool RclConfig::getFieldPrefix(const string& fld, string &pfx)
|
||||
{
|
||||
if (!mimeconf->get(fld, pfx, "prefixes")) {
|
||||
LOGDEB(("getFieldPrefix: no prefix defined for '%s'\n", fld.c_str()));
|
||||
return false;
|
||||
map<string,string>::const_iterator pit = m_fldtopref.find(fld);
|
||||
if (pit != m_fldtopref.end()) {
|
||||
pfx = pit->second;
|
||||
return true;
|
||||
} else {
|
||||
LOGDEB1(("RclConfig::readFieldsConfig: no prefix for field [%s]\n",
|
||||
fld.c_str()));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Return specialisations of field name for search expansion
|
||||
// (ie: author->[author, from])
|
||||
bool RclConfig::getFieldSpecialisations(const string& fld,
|
||||
list<string>& children, bool top)
|
||||
{
|
||||
string sclds;
|
||||
children.push_back(fld);
|
||||
if (m_fields->get(fld, sclds, "specialisations")) {
|
||||
list<string> clds;
|
||||
stringToStrings(sclds, clds);
|
||||
for (list<string>::const_iterator it = clds.begin();
|
||||
it != clds.end(); it++) {
|
||||
getFieldSpecialisations(*it, children, false);
|
||||
}
|
||||
}
|
||||
if (top) {
|
||||
children.sort();
|
||||
children.unique();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
//
|
||||
bool RclConfig::getFieldSpecialisationPrefixes(const string& fld,
|
||||
list<string>& pfxes)
|
||||
{
|
||||
list<string> clds;
|
||||
getFieldSpecialisations(fld, clds);
|
||||
for (list<string>::const_iterator it = clds.begin();
|
||||
it != clds.end(); it++) {
|
||||
string pfx;
|
||||
if (getFieldPrefix(*it, pfx))
|
||||
pfxes.push_back(pfx);
|
||||
}
|
||||
pfxes.sort();
|
||||
pfxes.unique();
|
||||
return true;
|
||||
}
|
||||
bool RclConfig::fieldIsStored(const string& fld)
|
||||
{
|
||||
set<string>::const_iterator it = m_storedFields.find(fld);
|
||||
return it != m_storedFields.end();
|
||||
}
|
||||
|
||||
string RclConfig::getMimeViewerDef(const string &mtype)
|
||||
{
|
||||
string hs;
|
||||
@ -497,7 +613,7 @@ bool RclConfig::setMimeViewerDef(const string& mt, const string& def)
|
||||
cdirs.push_back(path_cat(m_datadir, "examples"));
|
||||
|
||||
delete mimeview;
|
||||
mimeview = new ConfStack<ConfTree>("mimeview", cdirs, true);
|
||||
mimeview = new ConfStack<ConfSimple>("mimeview", cdirs, true);
|
||||
if (mimeview == 0 || !mimeview->ok()) {
|
||||
m_reason = string("No/bad mimeview in: ") + m_confdir;
|
||||
return false;
|
||||
@ -727,6 +843,7 @@ void RclConfig::freeAll()
|
||||
delete mimemap;
|
||||
delete mimeconf;
|
||||
delete mimeview;
|
||||
delete m_fields;
|
||||
delete STOPSUFFIXES;
|
||||
// just in case
|
||||
zeroMe();
|
||||
@ -747,9 +864,9 @@ void RclConfig::initFrom(const RclConfig& r)
|
||||
if (r.mimemap)
|
||||
mimemap = new ConfStack<ConfTree>(*(r.mimemap));
|
||||
if (r.mimeconf)
|
||||
mimeconf = new ConfStack<ConfTree>(*(r.mimeconf));
|
||||
mimeconf = new ConfStack<ConfSimple>(*(r.mimeconf));
|
||||
if (r.mimeview)
|
||||
mimeview = new ConfStack<ConfTree>(*(r.mimeview));
|
||||
mimeview = new ConfStack<ConfSimple>(*(r.mimeview));
|
||||
if (r.m_stopsuffixes)
|
||||
m_stopsuffixes = new SuffixStore(*((SuffixStore*)r.m_stopsuffixes));
|
||||
m_maxsufflen = r.m_maxsufflen;
|
||||
|
||||
@ -16,19 +16,23 @@
|
||||
*/
|
||||
#ifndef _RCLCONFIG_H_INCLUDED_
|
||||
#define _RCLCONFIG_H_INCLUDED_
|
||||
/* @(#$Id: rclconfig.h,v 1.39 2007-11-16 14:28:52 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: rclconfig.h,v 1.40 2008-09-08 16:49:10 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include <list>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include <utility>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#ifndef NO_NAMESPACES
|
||||
using std::list;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
using std::pair;
|
||||
using std::set;
|
||||
using std::map;
|
||||
using std::set;
|
||||
#endif
|
||||
|
||||
#include "conftree.h"
|
||||
@ -142,6 +146,14 @@ class RclConfig {
|
||||
|
||||
/** mimeconf: get field prefix from field name */
|
||||
bool getFieldPrefix(const string& fldname, string &pfx);
|
||||
/** Get implied meanings for field name (ie: author->[author, from]) */
|
||||
bool getFieldSpecialisations(const string& fld,
|
||||
list<string>& childrens, bool top = true);
|
||||
/** Get prefixes for specialisations of field name */
|
||||
bool getFieldSpecialisationPrefixes(const string& fld,
|
||||
list<string>& pfxes);
|
||||
bool fieldIsStored(const string& fld);
|
||||
const set<string>& getStoredFields() {return m_storedFields;}
|
||||
|
||||
/** mimeview: get/set external viewer exec string(s) for mimetype(s) */
|
||||
string getMimeViewerDef(const string &mimetype);
|
||||
@ -181,8 +193,11 @@ class RclConfig {
|
||||
|
||||
ConfStack<ConfTree> *m_conf; // Parsed configuration files
|
||||
ConfStack<ConfTree> *mimemap; // The files don't change with keydir,
|
||||
ConfStack<ConfTree> *mimeconf; // but their content may depend on it.
|
||||
ConfStack<ConfTree> *mimeview; //
|
||||
ConfStack<ConfSimple> *mimeconf; // but their content may depend on it.
|
||||
ConfStack<ConfSimple> *mimeview; //
|
||||
ConfStack<ConfSimple> *m_fields;
|
||||
map<string, string> m_fldtopref;
|
||||
set<string> m_storedFields;
|
||||
|
||||
void *m_stopsuffixes;
|
||||
unsigned int m_maxsufflen;
|
||||
@ -205,11 +220,13 @@ class RclConfig {
|
||||
mimemap = 0;
|
||||
mimeconf = 0;
|
||||
mimeview = 0;
|
||||
m_fields = 0;
|
||||
m_stopsuffixes = 0;
|
||||
m_maxsufflen = 0;
|
||||
}
|
||||
/** Free data then zero pointers */
|
||||
void freeAll();
|
||||
bool readFieldsConfig(const string& errloc);
|
||||
};
|
||||
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: internfile.cpp,v 1.40 2008-09-05 10:36:06 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: internfile.cpp,v 1.41 2008-09-08 16:49:10 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -286,13 +286,14 @@ static inline bool getKeyValue(const map<string, string>& docdata,
|
||||
return false;
|
||||
}
|
||||
|
||||
static const string keyab("abstract");
|
||||
// These defs are for the Dijon meta array. Rcl::Doc predefined field
|
||||
// names are used where appropriate. In some cases, Rcl::Doc names are
|
||||
// used inside the Dijon metadata (ex: origcharset)
|
||||
static const string keyau("author");
|
||||
static const string keycs("charset");
|
||||
static const string keyct("content");
|
||||
static const string keyds("description");
|
||||
static const string keyfn("filename");
|
||||
static const string keykw("keywords");
|
||||
static const string keymd("modificationdate");
|
||||
static const string keymt("mimetype");
|
||||
static const string keyoc("origcharset");
|
||||
@ -317,8 +318,8 @@ bool FileInterner::dijontorcl(Rcl::Doc& doc)
|
||||
doc.meta[it->first] = it->second;
|
||||
}
|
||||
}
|
||||
if (doc.meta[keyab].empty() && !doc.meta[keyds].empty()) {
|
||||
doc.meta[keyab] = doc.meta[keyds];
|
||||
if (doc.meta[Rcl::Doc::keyabs].empty() && !doc.meta[keyds].empty()) {
|
||||
doc.meta[Rcl::Doc::keyabs] = doc.meta[keyds];
|
||||
doc.meta.erase(keyds);
|
||||
}
|
||||
return true;
|
||||
@ -353,7 +354,7 @@ void FileInterner::collectIpathAndMT(Rcl::Doc& doc, string& ipath) const
|
||||
} else {
|
||||
ipath += isep;
|
||||
}
|
||||
getKeyValue(docdata, keyau, doc.meta["author"]);
|
||||
getKeyValue(docdata, keyau, doc.meta[Rcl::Doc::keyau]);
|
||||
getKeyValue(docdata, keymd, doc.dmtime);
|
||||
}
|
||||
|
||||
|
||||
@ -8,8 +8,8 @@ LIBS = librcl.a
|
||||
|
||||
all: $(LIBS)
|
||||
|
||||
OBJS = rclaspell.o rclconfig.o rclinit.o textsplit.o unacpp.o csguess.o indexer.o mimetype.o htmlparse.o myhtmlparse.o mimehandler.o internfile.o mh_exec.o mh_html.o mh_mail.o mh_mbox.o mh_text.o docseq.o docseqdb.o docseqhist.o history.o recollq.o sortseq.o wasastringtoquery.o wasatorcl.o pathhash.o rcldb.o rclquery.o searchdata.o stemdb.o stoplist.o base64.o conftree.o copyfile.o debuglog.o execmd.o fstreewalk.o idfile.o fileudi.o md5.o mimeparse.o pathut.o readfile.o smallut.o transcode.o wipedir.o x11mon.o
|
||||
DEPS = rclaspell.dep.stamp rclconfig.dep.stamp rclinit.dep.stamp textsplit.dep.stamp unacpp.dep.stamp csguess.dep.stamp indexer.dep.stamp mimetype.dep.stamp htmlparse.dep.stamp myhtmlparse.dep.stamp mimehandler.dep.stamp internfile.dep.stamp mh_exec.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp mh_mbox.dep.stamp mh_text.dep.stamp docseq.dep.stamp docseqdb.dep.stamp docseqhist.dep.stamp history.dep.stamp recollq.dep.stamp sortseq.dep.stamp wasastringtoquery.dep.stamp wasatorcl.dep.stamp pathhash.dep.stamp rcldb.dep.stamp rclquery.dep.stamp searchdata.dep.stamp stemdb.dep.stamp stoplist.dep.stamp base64.dep.stamp conftree.dep.stamp copyfile.dep.stamp debuglog.dep.stamp execmd.dep.stamp fstreewalk.dep.stamp idfile.dep.stamp fileudi.dep.stamp md5.dep.stamp mimeparse.dep.stamp pathut.dep.stamp readfile.dep.stamp smallut.dep.stamp transcode.dep.stamp wipedir.dep.stamp x11mon.dep.stamp
|
||||
OBJS = rclaspell.o rclconfig.o rclinit.o textsplit.o unacpp.o csguess.o indexer.o mimetype.o htmlparse.o myhtmlparse.o mimehandler.o internfile.o mh_exec.o mh_html.o mh_mail.o mh_mbox.o mh_text.o docseq.o docseqdb.o docseqhist.o history.o recollq.o sortseq.o wasastringtoquery.o wasatorcl.o pathhash.o rcldb.o rcldoc.o rclquery.o searchdata.o stemdb.o stoplist.o base64.o conftree.o copyfile.o debuglog.o execmd.o fstreewalk.o idfile.o fileudi.o md5.o mimeparse.o pathut.o readfile.o smallut.o transcode.o wipedir.o x11mon.o
|
||||
DEPS = rclaspell.dep.stamp rclconfig.dep.stamp rclinit.dep.stamp textsplit.dep.stamp unacpp.dep.stamp csguess.dep.stamp indexer.dep.stamp mimetype.dep.stamp htmlparse.dep.stamp myhtmlparse.dep.stamp mimehandler.dep.stamp internfile.dep.stamp mh_exec.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp mh_mbox.dep.stamp mh_text.dep.stamp docseq.dep.stamp docseqdb.dep.stamp docseqhist.dep.stamp history.dep.stamp recollq.dep.stamp sortseq.dep.stamp wasastringtoquery.dep.stamp wasatorcl.dep.stamp pathhash.dep.stamp rcldb.dep.stamp rcldoc.dep.stamp rclquery.dep.stamp searchdata.dep.stamp stemdb.dep.stamp stoplist.dep.stamp base64.dep.stamp conftree.dep.stamp copyfile.dep.stamp debuglog.dep.stamp execmd.dep.stamp fstreewalk.dep.stamp idfile.dep.stamp fileudi.dep.stamp md5.dep.stamp mimeparse.dep.stamp pathut.dep.stamp readfile.dep.stamp smallut.dep.stamp transcode.dep.stamp wipedir.dep.stamp x11mon.dep.stamp
|
||||
|
||||
librcl.a : $(DEPS) $(OBJS) unac.o
|
||||
ar ru librcl.a $(OBJS) unac.o
|
||||
@ -71,6 +71,8 @@ pathhash.o : ../rcldb/pathhash.cpp
|
||||
$(CXX) $(ALL_CXXFLAGS) -c ../rcldb/pathhash.cpp
|
||||
rcldb.o : ../rcldb/rcldb.cpp
|
||||
$(CXX) $(ALL_CXXFLAGS) -c ../rcldb/rcldb.cpp
|
||||
rcldoc.o : ../rcldb/rcldoc.cpp
|
||||
$(CXX) $(ALL_CXXFLAGS) -c ../rcldb/rcldoc.cpp
|
||||
rclquery.o : ../rcldb/rclquery.cpp
|
||||
$(CXX) $(ALL_CXXFLAGS) -c ../rcldb/rclquery.cpp
|
||||
searchdata.o : ../rcldb/searchdata.cpp
|
||||
@ -198,6 +200,9 @@ pathhash.dep.stamp : ../rcldb/pathhash.cpp
|
||||
rcldb.dep.stamp : ../rcldb/rcldb.cpp
|
||||
$(CXX) -M $(ALL_CXXFLAGS) ../rcldb/rcldb.cpp > rcldb.dep
|
||||
touch rcldb.dep.stamp
|
||||
rcldoc.dep.stamp : ../rcldb/rcldoc.cpp
|
||||
$(CXX) -M $(ALL_CXXFLAGS) ../rcldb/rcldoc.cpp > rcldoc.dep
|
||||
touch rcldoc.dep.stamp
|
||||
rclquery.dep.stamp : ../rcldb/rclquery.cpp
|
||||
$(CXX) -M $(ALL_CXXFLAGS) ../rcldb/rclquery.cpp > rclquery.dep
|
||||
touch rclquery.dep.stamp
|
||||
@ -285,6 +290,7 @@ include wasastringtoquery.dep
|
||||
include wasatorcl.dep
|
||||
include pathhash.dep
|
||||
include rcldb.dep
|
||||
include rcldoc.dep
|
||||
include rclquery.dep
|
||||
include searchdata.dep
|
||||
include stemdb.dep
|
||||
|
||||
@ -31,6 +31,7 @@ ${depth}/query/wasastringtoquery.cpp \
|
||||
${depth}/query/wasatorcl.cpp \
|
||||
${depth}/rcldb/pathhash.cpp \
|
||||
${depth}/rcldb/rcldb.cpp \
|
||||
${depth}/rcldb/rcldoc.cpp \
|
||||
${depth}/rcldb/rclquery.cpp \
|
||||
${depth}/rcldb/searchdata.cpp \
|
||||
${depth}/rcldb/stemdb.cpp \
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: pyrecoll.cpp,v 1.10 2008-08-28 15:44:37 dockes Exp $ (C) 2007 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: pyrecoll.cpp,v 1.11 2008-09-08 16:49:10 dockes Exp $ (C) 2007 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
|
||||
@ -394,8 +394,8 @@ static PyGetSetDef Doc_getseters[] = {
|
||||
"fbytes", (void *)"fbytes"},
|
||||
{"dbytes", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
||||
"dbytes", (void *)"dbytes"},
|
||||
{"relevance", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
||||
"relevance", (void *)"relevance"},
|
||||
{"relevancyrating", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
||||
"relevance", (void *)"relevancyrating"},
|
||||
{"title", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
||||
"title", (void *)"title"},
|
||||
{"keywords", (getter)Doc_getmeta, (setter)Doc_setmeta,
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
import sys
|
||||
import recoll
|
||||
allmeta = ("title", "keywords", "abstract", "url", "mimetype", "mtime",
|
||||
"ipath", "fbytes", "dbytes", "relevance")
|
||||
"ipath", "fbytes", "dbytes", "relevancyrating")
|
||||
|
||||
|
||||
def dotest(db, q):
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: preview_w.cpp,v 1.35 2008-07-01 08:27:58 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: preview_w.cpp,v 1.36 2008-09-08 16:49:10 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -753,8 +753,8 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc,
|
||||
|
||||
Rcl::Doc doc = idoc;
|
||||
|
||||
if (doc.meta["title"].empty())
|
||||
doc.meta["title"] = path_getsimple(doc.url);
|
||||
if (doc.meta[Rcl::Doc::keytt].empty())
|
||||
doc.meta[Rcl::Doc::keytt] = path_getsimple(doc.url);
|
||||
|
||||
setCurTabProps(fn, doc, docnum);
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: reslist.cpp,v 1.41 2008-08-26 07:33:05 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: reslist.cpp,v 1.42 2008-09-08 16:49:10 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
#include <time.h>
|
||||
@ -418,7 +418,7 @@ void ResList::resultPageNext()
|
||||
if (percent == -1) {
|
||||
percent = 0;
|
||||
// Document not available, maybe other further, will go on.
|
||||
doc.meta["abstract"] = string(tr("Unavailable document").utf8());
|
||||
doc.meta[Rcl::Doc::keyabs] = string(tr("Unavailable document").utf8());
|
||||
}
|
||||
|
||||
// Determine icon to display if any
|
||||
@ -442,8 +442,8 @@ void ResList::resultPageNext()
|
||||
printableUrl(rclconfig->getDefCharset(), doc.url, url);
|
||||
|
||||
// Make title out of file name if none yet
|
||||
if (doc.meta["title"].empty()) {
|
||||
doc.meta["title"] = path_getsimple(url);
|
||||
if (doc.meta[Rcl::Doc::keytt].empty()) {
|
||||
doc.meta[Rcl::Doc::keytt] = path_getsimple(url);
|
||||
}
|
||||
|
||||
// Result number
|
||||
@ -485,7 +485,7 @@ void ResList::resultPageNext()
|
||||
(doc.syntabs || prefs.queryReplaceAbstract)) {
|
||||
abstract = m_docSource->getAbstract(doc);
|
||||
} else {
|
||||
abstract = doc.meta["abstract"];
|
||||
abstract = doc.meta[Rcl::Doc::keyabs];
|
||||
}
|
||||
// No need to call escapeHtml(), plaintorich handles it
|
||||
list<string> lr;
|
||||
@ -520,14 +520,14 @@ void ResList::resultPageNext()
|
||||
subs['A'] = !richabst.empty() ? richabst + "<br>" : "";
|
||||
subs['D'] = datebuf;
|
||||
subs['I'] = img_name;
|
||||
subs['K'] = !doc.meta["keywords"].empty() ?
|
||||
escapeHtml(doc.meta["keywords"]) + "<br>" : "";
|
||||
subs['K'] = !doc.meta[Rcl::Doc::keykw].empty() ?
|
||||
escapeHtml(doc.meta[Rcl::Doc::keykw]) + "<br>" : "";
|
||||
subs['L'] = linksbuf;
|
||||
subs['N'] = numbuf;
|
||||
subs['M'] = doc.mimetype;
|
||||
subs['R'] = perbuf;
|
||||
subs['S'] = sizebuf;
|
||||
subs['T'] = escapeHtml(doc.meta["title"]);
|
||||
subs['T'] = escapeHtml(doc.meta[Rcl::Doc::keytt]);
|
||||
subs['U'] = url;
|
||||
|
||||
string formatted;
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
*/
|
||||
#ifndef _DOCSEQ_H_INCLUDED_
|
||||
#define _DOCSEQ_H_INCLUDED_
|
||||
/* @(#$Id: docseq.h,v 1.13 2008-06-13 18:22:46 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: docseq.h,v 1.14 2008-09-08 16:49:10 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
#include <string>
|
||||
#include <list>
|
||||
#include <vector>
|
||||
@ -70,7 +70,7 @@ class DocSequence {
|
||||
* The default is to return the input doc's abstract fields, but some
|
||||
* sequences can compute a better value (ie: docseqdb) */
|
||||
virtual string getAbstract(Rcl::Doc& doc) {
|
||||
return doc.meta["abstract"];
|
||||
return doc.meta[Rcl::Doc::keyabs];
|
||||
}
|
||||
|
||||
/** Get estimated total count in results */
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: docseqdb.cpp,v 1.4 2008-06-13 18:22:46 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: docseqdb.cpp,v 1.5 2008-09-08 16:49:10 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -62,10 +62,10 @@ int DocSequenceDb::getResCnt()
|
||||
string DocSequenceDb::getAbstract(Rcl::Doc &doc)
|
||||
{
|
||||
if (!m_q->whatDb())
|
||||
return doc.meta["abstract"];
|
||||
return doc.meta[Rcl::Doc::keyabs];
|
||||
string abstract;
|
||||
m_q->whatDb()->makeDocAbstract(doc, m_q.getptr(), abstract);
|
||||
return abstract.empty() ? doc.meta["abstract"] : abstract;
|
||||
return abstract.empty() ? doc.meta[Rcl::Doc::keyabs] : abstract;
|
||||
}
|
||||
|
||||
list<string> DocSequenceDb::expand(Rcl::Doc &doc)
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: recollq.cpp,v 1.13 2008-06-13 18:22:46 dockes Exp $ (C) 2006 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: recollq.cpp,v 1.14 2008-09-08 16:49:10 dockes Exp $ (C) 2006 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -59,6 +59,7 @@ static char usage [] =
|
||||
" -d also dump file contents\n"
|
||||
" -n <cnt> limit the maximum number of results (0->no limit, default 2000)\n"
|
||||
" -b : basic. Just output urls, no mime types or titles\n"
|
||||
" -m : dump the whole document meta[] array\n"
|
||||
;
|
||||
static void
|
||||
Usage(void)
|
||||
@ -80,6 +81,7 @@ static int op_flags;
|
||||
#define OPT_l 0x100
|
||||
#define OPT_q 0x200
|
||||
#define OPT_t 0x400
|
||||
#define OPT_m 0x800
|
||||
|
||||
int recollq(RclConfig **cfp, int argc, char **argv)
|
||||
{
|
||||
@ -103,6 +105,7 @@ int recollq(RclConfig **cfp, int argc, char **argv)
|
||||
case 'd': op_flags |= OPT_d; break;
|
||||
case 'f': op_flags |= OPT_f; break;
|
||||
case 'l': op_flags |= OPT_l; break;
|
||||
case 'm': op_flags |= OPT_m; break;
|
||||
case 'n': op_flags |= OPT_n; if (argc < 2) Usage();
|
||||
limit = atoi(*(++argv));
|
||||
if (limit <= 0) limit = INT_MAX;
|
||||
@ -192,9 +195,16 @@ int recollq(RclConfig **cfp, int argc, char **argv)
|
||||
cout
|
||||
<< doc.mimetype.c_str() << "\t"
|
||||
<< "[" << doc.url.c_str() << "]" << "\t"
|
||||
<< "[" << doc.meta["title"].c_str() << "]" << "\t"
|
||||
<< "[" << doc.meta[Rcl::Doc::keytt].c_str() << "]" << "\t"
|
||||
<< doc.fbytes.c_str() << "\tbytes" << "\t"
|
||||
<< endl;
|
||||
if (op_flags & OPT_m) {
|
||||
for (map<string,string>::const_iterator it = doc.meta.begin();
|
||||
it != doc.meta.end(); it++) {
|
||||
cout << it->first << " = " << it->second << endl;
|
||||
}
|
||||
}
|
||||
cout << endl;
|
||||
}
|
||||
if (op_flags & OPT_d) {
|
||||
string fn = doc.url.substr(7);
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.142 2008-09-05 10:34:17 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.143 2008-09-08 16:49:10 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -129,6 +129,13 @@ bool Db::Native::subDocs(const string &udi, vector<Xapian::docid>& docids)
|
||||
return false;
|
||||
}
|
||||
|
||||
static const string keycap("caption");
|
||||
static const string keymtp("mtype");
|
||||
static const string keyfmt("fmtime");
|
||||
static const string keydmt("dmtime");
|
||||
static const string keyoc("origcharset");
|
||||
static const string keyurl("url");
|
||||
|
||||
// Turn data record from db into document fields
|
||||
bool Db::Native::dbDataToRclDoc(Xapian::docid docid, std::string &data,
|
||||
Doc &doc, int percent)
|
||||
@ -137,30 +144,37 @@ bool Db::Native::dbDataToRclDoc(Xapian::docid docid, std::string &data,
|
||||
ConfSimple parms(&data);
|
||||
if (!parms.ok())
|
||||
return false;
|
||||
parms.get(string("url"), doc.url);
|
||||
parms.get(string("mtype"), doc.mimetype);
|
||||
parms.get(string("fmtime"), doc.fmtime);
|
||||
parms.get(string("dmtime"), doc.dmtime);
|
||||
parms.get(string("origcharset"), doc.origcharset);
|
||||
parms.get(string("caption"), doc.meta["title"]);
|
||||
parms.get(string("keywords"), doc.meta["keywords"]);
|
||||
parms.get(string("abstract"), doc.meta["abstract"]);
|
||||
parms.get(string("author"), doc.meta["author"]);
|
||||
parms.get(keyurl, doc.url);
|
||||
parms.get(keymtp, doc.mimetype);
|
||||
parms.get(keyfmt, doc.fmtime);
|
||||
parms.get(keydmt, doc.dmtime);
|
||||
parms.get(keyoc, doc.origcharset);
|
||||
parms.get(keycap, doc.meta[Doc::keytt]);
|
||||
parms.get(Doc::keykw, doc.meta[Doc::keykw]);
|
||||
parms.get(Doc::keyabs, doc.meta[Doc::keyabs]);
|
||||
// Possibly remove synthetic abstract indicator (if it's there, we
|
||||
// used to index the beginning of the text as abstract).
|
||||
doc.syntabs = false;
|
||||
if (doc.meta["abstract"].find(rclSyntAbs) == 0) {
|
||||
doc.meta["abstract"] = doc.meta["abstract"].substr(rclSyntAbs.length());
|
||||
if (doc.meta[Doc::keyabs].find(rclSyntAbs) == 0) {
|
||||
doc.meta[Doc::keyabs] = doc.meta[Doc::keyabs].substr(rclSyntAbs.length());
|
||||
doc.syntabs = true;
|
||||
}
|
||||
char buf[20];
|
||||
sprintf(buf,"%.2f", float(percent) / 100.0);
|
||||
doc.meta["relevancyrating"] = buf;
|
||||
doc.meta[Doc::keyrr] = buf;
|
||||
parms.get(string("ipath"), doc.ipath);
|
||||
parms.get(string("fbytes"), doc.fbytes);
|
||||
parms.get(string("dbytes"), doc.dbytes);
|
||||
parms.get(string("sig"), doc.sig);
|
||||
doc.xdocid = docid;
|
||||
|
||||
// Other, not predefined meta fields:
|
||||
list<string> keys = parms.getNames(string());
|
||||
for (list<string>::const_iterator it = keys.begin();
|
||||
it != keys.end(); it++) {
|
||||
if (doc.meta.find(*it) == doc.meta.end())
|
||||
parms.get(*it, doc.meta[*it]);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -680,21 +694,21 @@ bool Db::fieldToPrefix(const string& fldname, string &pfx)
|
||||
// This is the default table
|
||||
static map<string, string> fldToPrefs;
|
||||
if (fldToPrefs.empty()) {
|
||||
fldToPrefs["abstract"] = string();
|
||||
fldToPrefs[Doc::keyabs] = string();
|
||||
fldToPrefs["ext"] = "XE";
|
||||
fldToPrefs["filename"] = "XSFN";
|
||||
|
||||
fldToPrefs["title"] = "S";
|
||||
fldToPrefs["caption"] = "S";
|
||||
fldToPrefs[keycap] = "S";
|
||||
fldToPrefs["subject"] = "S";
|
||||
|
||||
fldToPrefs["author"] = "A";
|
||||
fldToPrefs[Doc::keyau] = "A";
|
||||
fldToPrefs["creator"] = "A";
|
||||
fldToPrefs["from"] = "A";
|
||||
|
||||
fldToPrefs["keyword"] = "K";
|
||||
fldToPrefs["tag"] = "K";
|
||||
fldToPrefs["keywords"] = "K";
|
||||
fldToPrefs[Doc::keykw] = "K";
|
||||
fldToPrefs["tags"] = "K";
|
||||
}
|
||||
|
||||
@ -803,6 +817,7 @@ void Db::setAbstractParams(int idxtrunc, int syntlen, int syntctxlen)
|
||||
}
|
||||
|
||||
static const int MB = 1024 * 1024;
|
||||
static const string nc("\n\r\x0c");
|
||||
|
||||
// Add document in internal form to the database: index the terms in
|
||||
// the title abstract and body and add special terms for file name,
|
||||
@ -831,35 +846,6 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi,
|
||||
|
||||
Doc doc = idoc;
|
||||
|
||||
// The title, author, abstract and keywords fields are special, they
|
||||
// get stored in the document data record.
|
||||
// Truncate abstract, title and keywords to reasonable lengths. If
|
||||
// abstract is currently empty, we make up one with the beginning
|
||||
// of the document. This is then not indexed, but part of the doc
|
||||
// data so that we can return it to a query without having to
|
||||
// decode the original file.
|
||||
bool syntabs = false;
|
||||
// Note that the map accesses by operator[] create empty entries if they
|
||||
// don't exist yet.
|
||||
if (doc.meta["abstract"].empty()) {
|
||||
syntabs = true;
|
||||
doc.meta["abstract"] = rclSyntAbs +
|
||||
neutchars(truncate_to_word(doc.text, m_idxAbsTruncLen), "\n\r");
|
||||
} else {
|
||||
doc.meta["abstract"] =
|
||||
neutchars(truncate_to_word(doc.meta["abstract"], m_idxAbsTruncLen),
|
||||
"\n\r");
|
||||
}
|
||||
if (doc.meta["title"].empty())
|
||||
doc.meta["title"] = doc.utf8fn;
|
||||
doc.meta["title"] =
|
||||
neutchars(truncate_to_word(doc.meta["title"], 150), "\n\r");
|
||||
doc.meta["author"] =
|
||||
neutchars(truncate_to_word(doc.meta["author"], 150), "\n\r");
|
||||
doc.meta["keywords"] =
|
||||
neutchars(truncate_to_word(doc.meta["keywords"], 300),"\n\r");
|
||||
|
||||
|
||||
Xapian::Document newdocument;
|
||||
mySplitterCB splitData(newdocument, m_stops);
|
||||
TextSplit splitter(&splitData);
|
||||
@ -882,11 +868,9 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi,
|
||||
string pfx;
|
||||
for (meta_it = doc.meta.begin(); meta_it != doc.meta.end(); meta_it++) {
|
||||
if (!meta_it->second.empty()) {
|
||||
if (meta_it->first == "abstract" && syntabs)
|
||||
continue;
|
||||
if (!fieldToPrefix(meta_it->first, pfx)) {
|
||||
LOGDEB(("Db::add: no prefix for field [%s], no indexing\n",
|
||||
meta_it->first.c_str()));
|
||||
meta_it->first.c_str()));
|
||||
continue;
|
||||
}
|
||||
LOGDEB1(("Db::add: field [%s] pfx [%s]: [%s]\n",
|
||||
@ -908,7 +892,7 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi,
|
||||
else
|
||||
splitData.basepos += splitData.curpos + 100;
|
||||
|
||||
// Finally: split and index body text
|
||||
// Split and index body text
|
||||
LOGDEB2(("Db::add: split body\n"));
|
||||
if (!dumb_string(doc.text, noacc)) {
|
||||
LOGERR(("Db::add: dumb_string failed\n"));
|
||||
@ -958,11 +942,22 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi,
|
||||
buf[4] = '\0';
|
||||
newdocument.add_term("Y" + string(buf)); // Year (YYYY)
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// Document data record. omindex has the following nl separated fields:
|
||||
// - url
|
||||
// - sample
|
||||
// - caption (title limited to 100 chars)
|
||||
// - mime type
|
||||
//
|
||||
// The title, author, abstract and keywords fields are special,
|
||||
// they always get stored in the document data
|
||||
// record. Configurable other fields can be, too.
|
||||
//
|
||||
// We truncate stored fields abstract, title and keywords to
|
||||
// reasonable lengths and suppress newlines (so that the data
|
||||
// record can keep a simple syntax)
|
||||
|
||||
string record = "url=" + doc.url;
|
||||
record += "\nmtype=" + doc.mimetype;
|
||||
record += "\nfmtime=" + doc.fmtime;
|
||||
@ -982,20 +977,55 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi,
|
||||
sprintf(sizebuf, "%u", (unsigned int)doc.text.length());
|
||||
record += string("\ndbytes=") + sizebuf;
|
||||
|
||||
if (!doc.ipath.empty()) {
|
||||
if (!doc.ipath.empty())
|
||||
record += "\nipath=" + doc.ipath;
|
||||
|
||||
if (doc.meta[Doc::keytt].empty())
|
||||
doc.meta[Doc::keytt] = doc.utf8fn;
|
||||
doc.meta[Doc::keytt] =
|
||||
neutchars(truncate_to_word(doc.meta[Doc::keytt], 150), nc);
|
||||
if (!doc.meta[Doc::keytt].empty())
|
||||
record += "\n" + keycap + "=" + doc.meta[Doc::keytt];
|
||||
|
||||
doc.meta[Doc::keykw] =
|
||||
neutchars(truncate_to_word(doc.meta[Doc::keykw], 300), nc);
|
||||
if (!doc.meta[Doc::keykw].empty())
|
||||
record += "\n" + Doc::keykw + "=" + doc.meta[Doc::keykw];
|
||||
|
||||
// If abstract is empty, we make up one with the beginning of the
|
||||
// document. This is then not indexed, but part of the doc data so
|
||||
// that we can return it to a query without having to decode the
|
||||
// original file.
|
||||
bool syntabs = false;
|
||||
// Note that the map accesses by operator[] create empty entries if they
|
||||
// don't exist yet.
|
||||
if (doc.meta[Doc::keyabs].empty()) {
|
||||
syntabs = true;
|
||||
if (!doc.text.empty())
|
||||
doc.meta[Doc::keyabs] = rclSyntAbs +
|
||||
neutchars(truncate_to_word(doc.text, m_idxAbsTruncLen), nc);
|
||||
} else {
|
||||
doc.meta[Doc::keyabs] =
|
||||
neutchars(truncate_to_word(doc.meta[Doc::keyabs], m_idxAbsTruncLen),
|
||||
nc);
|
||||
}
|
||||
if (!doc.meta["title"].empty())
|
||||
record += "\ncaption=" + doc.meta["title"];
|
||||
if (!doc.meta["keywords"].empty())
|
||||
record += "\nkeywords=" + doc.meta["keywords"];
|
||||
if (!doc.meta["abstract"].empty())
|
||||
record += "\nabstract=" + doc.meta["abstract"];
|
||||
if (!doc.meta["author"].empty()) {
|
||||
record += "\nauthor=" + doc.meta["author"];
|
||||
if (!doc.meta[Doc::keyabs].empty())
|
||||
record += "\n" + Doc::keyabs + "=" + doc.meta[Doc::keyabs];
|
||||
|
||||
RclConfig *config = RclConfig::getMainConfig();
|
||||
if (config) {
|
||||
const set<string>& stored = config->getStoredFields();
|
||||
for (set<string>::const_iterator it = stored.begin();
|
||||
it != stored.end(); it++) {
|
||||
if (!doc.meta[*it].empty()) {
|
||||
string value =
|
||||
neutchars(truncate_to_word(doc.meta[*it], 150), nc);
|
||||
record += "\n" + *it + "=" + value;
|
||||
}
|
||||
}
|
||||
}
|
||||
record += "\n";
|
||||
LOGDEB1(("Newdocument data: %s\n", record.c_str()));
|
||||
LOGDEB(("Rcl::Db::add: new doc record:\n %s\n", record.c_str()));
|
||||
newdocument.set_data(record);
|
||||
|
||||
const char *fnc = udi.c_str();
|
||||
|
||||
14
src/rcldb/rcldoc.cpp
Normal file
14
src/rcldb/rcldoc.cpp
Normal file
@ -0,0 +1,14 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: rcldoc.cpp,v 1.1 2008-09-08 16:49:10 dockes Exp $ (C) 2007 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
|
||||
#include "rcldoc.h"
|
||||
namespace Rcl {
|
||||
const string Doc::keyabs("abstract");
|
||||
const string Doc::keyau("author");
|
||||
const string Doc::keyfn("filename");
|
||||
const string Doc::keykw("keywords");
|
||||
const string Doc::keyrr("relevancyrating");
|
||||
const string Doc::keytt("title");
|
||||
}
|
||||
@ -16,7 +16,7 @@
|
||||
*/
|
||||
#ifndef _RCLDOC_H_INCLUDED_
|
||||
#define _RCLDOC_H_INCLUDED_
|
||||
/* @(#$Id: rcldoc.h,v 1.8 2008-08-26 07:33:31 dockes Exp $ (C) 2006 J.F.Dockes */
|
||||
/* @(#$Id: rcldoc.h,v 1.9 2008-09-08 16:49:10 dockes Exp $ (C) 2006 J.F.Dockes */
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
@ -51,7 +51,7 @@ class Doc {
|
||||
|
||||
// Transcoded version of the simple file name for SFN-prefixed
|
||||
// specific file name indexation
|
||||
// Indexx: set by DbIndexer::processone
|
||||
// Index: set by DbIndexer::processone
|
||||
string utf8fn;
|
||||
|
||||
// Internal path for multi-doc files. Ascii
|
||||
@ -78,11 +78,13 @@ class Doc {
|
||||
// handler. If a fieldname-to-prefix translation exists, the
|
||||
// terms in the value will be indexed with a prefix.
|
||||
// Only some predefined fields are stored in the data record:
|
||||
// "title", "keywords", "abstract", "author"
|
||||
// "title", "keywords", "abstract", "author", but if a field name is
|
||||
// in the "stored" configuration list, it will be stored too.
|
||||
map<string, string> meta;
|
||||
|
||||
// Attribute for the "abstract" entry. true if it is just the top
|
||||
// of doc, not a native document attribute.
|
||||
// of doc, not a native document attribute. Not stored directly, but
|
||||
// as an indicative prefix at the beginning of the abstract (ugly hack)
|
||||
bool syntabs;
|
||||
|
||||
// File size. Index: Set by caller prior to Db::Add. Query: set by
|
||||
@ -110,7 +112,7 @@ class Doc {
|
||||
// and indexed
|
||||
string text;
|
||||
|
||||
int pc; // used by sortseq, convenience
|
||||
int pc; // relevancy percentage, used by sortseq, convenience
|
||||
unsigned long xdocid; // Opaque: rcldb doc identifier.
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
@ -132,6 +134,12 @@ class Doc {
|
||||
pc = 0;
|
||||
xdocid = 0;
|
||||
}
|
||||
static const string keyfn;
|
||||
static const string keyrr;
|
||||
static const string keyabs;
|
||||
static const string keyau;
|
||||
static const string keytt;
|
||||
static const string keykw;
|
||||
};
|
||||
|
||||
|
||||
|
||||
@ -97,6 +97,7 @@ ${INSTALL} -m 0444 \
|
||||
sampleconf/mimeview \
|
||||
sampleconf/recoll.conf \
|
||||
sampleconf/mimemap \
|
||||
sampleconf/fields \
|
||||
${datadir}/recoll/examples/ || exit 1
|
||||
${INSTALL} -m 0755 index/rclmon.sh ${datadir}/recoll/examples/ || exit 1
|
||||
|
||||
|
||||
55
src/sampleconf/fields
Normal file
55
src/sampleconf/fields
Normal file
@ -0,0 +1,55 @@
|
||||
# @(#$Id: fields,v 1.1 2008-09-08 16:49:10 dockes Exp $ (C) 2007 J.F.Dockes
|
||||
# Field names configuration. This defines how one may search ie for
|
||||
# author:Hemingway
|
||||
# Important:
|
||||
# - the field names MUST be all lowercase here. They can be anycased
|
||||
# in the documents:
|
||||
|
||||
#####################################################
|
||||
# This section defines what prefix the terms inside named fields will be
|
||||
# indexed with (in addition to prefix-less indexing for general search)
|
||||
# ALL prefixes MUST be all UPPERCASE. Extension prefixes begin with X
|
||||
#
|
||||
# The choice of field names is rather arbitrary. Use of any of the aliases
|
||||
# defined in the following section will yield exactly the same results,
|
||||
# (both for indexing and search).
|
||||
[prefixes]
|
||||
|
||||
# Native fields matching omega uses, which we index without an X first
|
||||
# letter. Don't change these
|
||||
title = S
|
||||
author = A
|
||||
keyword = K
|
||||
|
||||
# extension examples. This are actually used by default by Recoll:
|
||||
ext = XE
|
||||
filename = XSFN
|
||||
|
||||
############################
|
||||
# Some fields are stored in the document data record inside the index and
|
||||
# can be returned in result lists. There is no necessity that stored fields
|
||||
# should be indexed (have a prefix) (example: url but this one doesn't need
|
||||
# to be listed here)
|
||||
#
|
||||
# Some fields are stored by default, don't add them here, else they will be
|
||||
# stored twice: title, keywords, abstract, filename, mimetype, url
|
||||
# "author" used to be stored by default, now set here as optional
|
||||
[stored]
|
||||
stored = author
|
||||
|
||||
##########################
|
||||
# This section defines field names aliases or synonyms. Any right hand side
|
||||
# value will be turned into the lhs canonic name before further treatment
|
||||
[aliases]
|
||||
title = caption subject
|
||||
author = creator
|
||||
keyword = keywords tag tags
|
||||
dmtime = date contentmodified datemodified
|
||||
mtype = type mimetype contenttype
|
||||
ext = fileextension
|
||||
|
||||
#########################
|
||||
# This section defines a hierarchy for field names. Searching for a lhs
|
||||
# ancestor will be expanded to a search for itself and all rhs descendants
|
||||
[specialisations]
|
||||
author = from
|
||||
Loading…
x
Reference in New Issue
Block a user