diff --git a/src/common/beaglequeuecache.cpp b/src/common/beaglequeuecache.cpp index ee706369..5f3335e0 100644 --- a/src/common/beaglequeuecache.cpp +++ b/src/common/beaglequeuecache.cpp @@ -1,5 +1,23 @@ +/* Copyright (C) 2011 J.F.Dockes + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + #include "autoconfig.h" +#include "cstr.h" #include "beaglequeuecache.h" #include "circache.h" #include "debuglog.h" @@ -42,18 +60,18 @@ bool BeagleQueueCache::getFromCache(const string& udi, Rcl::Doc &dotdoc, ConfSimple cf(dict, 1); if (htt) - cf.get(Rcl::Doc::keybght, *htt, ""); + cf.get(Rcl::Doc::keybght, *htt, cstr_null); // Build a doc from saved metadata - cf.get("url", dotdoc.url, ""); - cf.get("mimetype", dotdoc.mimetype, ""); - cf.get("fmtime", dotdoc.fmtime, ""); - cf.get("fbytes", dotdoc.fbytes, ""); - dotdoc.sig = ""; - list names = cf.getNames(""); + cf.get(cstr_url, dotdoc.url, cstr_null); + cf.get(cstr_mimetype, dotdoc.mimetype, cstr_null); + cf.get(cstr_fmtime, dotdoc.fmtime, cstr_null); + cf.get(cstr_fbytes, dotdoc.fbytes, cstr_null); + dotdoc.sig.clear(); + list names = cf.getNames(cstr_null); for (list::const_iterator it = names.begin(); it != names.end(); it++) { - cf.get(*it, dotdoc.meta[*it], ""); + cf.get(*it, dotdoc.meta[*it], cstr_null); } dotdoc.meta[Rcl::Doc::keyudi] = udi; return true; diff --git a/src/common/cstr.cpp b/src/common/cstr.cpp new file mode 100644 index 00000000..0119e809 --- /dev/null +++ b/src/common/cstr.cpp @@ -0,0 +1,6 @@ + +#include "cstr.h" +#define RCLIN_CSTR_CPPFILE +#undef _CSTR_H_INCLUDED_ +#include "cstr.h" + diff --git a/src/common/cstr.h b/src/common/cstr.h new file mode 100644 index 00000000..001607c7 --- /dev/null +++ b/src/common/cstr.h @@ -0,0 +1,62 @@ +/* Copyright (C) 2011 J.F.Dockes + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef _CSTR_H_INCLUDED_ +#define _CSTR_H_INCLUDED_ + +// recoll mostly uses STL strings. In many places we had automatic +// conversion from a C string to an STL one. This costs, and can +// become significant if used often. +// +// This file and the associated .cpp file declares/defines constant +// strings used in the program. Strings are candidates for a move here +// when they are used in a fast loop or are shared. + +#include +using std::string; + +// The following slightly hacky preprocessing directives and the +// companion code in the cpp file looks complicated, but it just +// ensures that we only have to write the strings once to get the +// extern declaration and the definition. +#ifdef RCLIN_CSTR_CPPFILE +#undef DEF_CSTR +#define DEF_CSTR(NM, STR) const string cstr_##NM(STR) +#else +#define DEF_CSTR(NM, STR) extern const string cstr_##NM +#endif + +DEF_CSTR(author, "author"); +DEF_CSTR(caption, "caption"); +DEF_CSTR(charset, "charset"); +DEF_CSTR(content, "content"); +DEF_CSTR(dmtime, "dmtime"); +DEF_CSTR(dquote, "\""); +DEF_CSTR(fbytes, "fbytes"); +DEF_CSTR(fileu, "file://"); +DEF_CSTR(fmtime, "fmtime"); +DEF_CSTR(ipath, "ipath"); +DEF_CSTR(iso_8859_1, "ISO-8859-1"); +DEF_CSTR(mimetype, "mimetype"); +DEF_CSTR(minwilds, "*?["); +DEF_CSTR(newline, "\n"); +DEF_CSTR(null, ""); +DEF_CSTR(plus, "+"); +DEF_CSTR(textplain, "text/plain"); +DEF_CSTR(url, "url"); + +#endif /* _CSTR_H_INCLUDED_ */ diff --git a/src/common/rclconfig.cpp b/src/common/rclconfig.cpp index 68e18781..8caf72a4 100644 --- a/src/common/rclconfig.cpp +++ b/src/common/rclconfig.cpp @@ -35,6 +35,7 @@ #include #include +#include "cstr.h" #include "pathut.h" #include "rclconfig.h" #include "conftree.h" @@ -176,7 +177,7 @@ RclConfig::RclConfig(const string *argcnf) return; m_ok = true; - setKeyDir(""); + setKeyDir(cstr_null); m_stpsuffstate.init(this, mimemap, "recoll_noindex"); m_skpnstate.init(this, m_conf, "skippedNames"); @@ -199,7 +200,7 @@ bool RclConfig::updateMainConfig() m_rmtstate.init(this, 0, "indexedmimetypes"); return false; } - setKeyDir(""); + setKeyDir(cstr_null); bool nocjk = false; if (getConfParam("nocjk", &nocjk) && nocjk == true) { TextSplit::cjkProcessing(false); @@ -348,7 +349,7 @@ const string& RclConfig::getDefCharset(bool filename) } else { // Note: it seems that all versions of iconv will take // iso-8859. Some won't take iso8859 - localecharset = string("ISO-8859-1"); + localecharset = string(cstr_iso_8859_1); } LOGDEB1(("RclConfig::getDefCharset: localecharset [%s]\n", localecharset.c_str())); @@ -377,7 +378,7 @@ bool RclConfig::addLocalFields(map *tgt) sfields[i] = '\n'; // Parse the result with a confsimple and add the results to the metadata ConfSimple conf(sfields, 1, true); - list nmlst = conf.getNames(""); + list nmlst = conf.getNames(cstr_null); for (list::const_iterator it = nmlst.begin(); it != nmlst.end(); it++) { conf.get(*it, (*tgt)[*it]); @@ -488,15 +489,15 @@ string RclConfig::getMimeTypeFromSuffix(const string& suff) string RclConfig::getSuffixFromMimeType(const string &mt) { string suffix; - listsfs = mimemap->getNames(""); + listsfs = mimemap->getNames(cstr_null); string mt1; for (list::const_iterator it = sfs.begin(); it != sfs.end(); it++) { - if (mimemap->get(*it, mt1, "")) + if (mimemap->get(*it, mt1, cstr_null)) if (!stringicmp(mt, mt1)) return *it; } - return ""; + return cstr_null; } /** Get list of file categories from mimeconf */ @@ -955,7 +956,7 @@ bool RclConfig::getUncompressor(const string &mtype, list& cmd) { string hs; - mimeconf->get(mtype, hs, ""); + mimeconf->get(mtype, hs, cstr_null); if (hs.empty()) return false; list tokens; @@ -1205,7 +1206,7 @@ int main(int argc, char **argv) } } } else { - config->setKeyDir(""); + config->setKeyDir(cstr_null); list names = config->getConfNames(); names.sort(); names.unique(); diff --git a/src/index/beaglequeue.cpp b/src/index/beaglequeue.cpp index 56f6d0dc..9ee9274d 100644 --- a/src/index/beaglequeue.cpp +++ b/src/index/beaglequeue.cpp @@ -20,7 +20,7 @@ #include #include -#include "autoconfig.h" +#include "cstr.h" #include "pathut.h" #include "debuglog.h" #include "fstreewalk.h" @@ -127,11 +127,11 @@ public: confstr += line + "\n"; } ConfSimple fields(confstr, 1); - list names = fields.getNames(""); + list names = fields.getNames(cstr_null); for (list::iterator it = names.begin(); it != names.end(); it++) { string value; - fields.get(*it, value, ""); + fields.get(*it, value, cstr_null); if (!value.compare("undefined") || !value.compare("null")) continue; @@ -158,10 +158,10 @@ public: // something homogenous and easy to save. for (map::const_iterator it = doc.meta.begin(); it != doc.meta.end(); it++) { - m_fields.set((*it).first, (*it).second, ""); + m_fields.set((*it).first, (*it).second, cstr_null); } - m_fields.set("url", doc.url, ""); - m_fields.set("mimetype", doc.mimetype, ""); + m_fields.set(cstr_url, doc.url, cstr_null); + m_fields.set(cstr_mimetype, doc.mimetype, cstr_null); return true; } @@ -215,10 +215,10 @@ bool BeagleQueueIndexer::indexFromCache(const string& udi) if (!stringlowercmp("bookmark", hittype)) { // Just index the dotdoc dotdoc.meta[Rcl::Doc::keybcknd] = "BGL"; - return m_db->addOrUpdate(udi, "", dotdoc); + return m_db->addOrUpdate(udi, cstr_null, dotdoc); } else if (stringlowercmp("webhistory", dotdoc.meta[Rcl::Doc::keybght]) || (dotdoc.mimetype.compare("text/html") && - dotdoc.mimetype.compare("text/plain"))) { + dotdoc.mimetype.compare(cstr_textplain))) { LOGDEB(("BeagleQueueIndexer: skipping: hittype %s mimetype %s\n", dotdoc.meta[Rcl::Doc::keybght].c_str(), dotdoc.mimetype.c_str())); return true; @@ -243,9 +243,9 @@ bool BeagleQueueIndexer::indexFromCache(const string& udi) doc.fmtime = dotdoc.fmtime; doc.url = dotdoc.url; doc.fbytes = dotdoc.fbytes; - doc.sig = ""; + doc.sig.clear(); doc.meta[Rcl::Doc::keybcknd] = "BGL"; - return m_db->addOrUpdate(udi, "", doc); + return m_db->addOrUpdate(udi, cstr_null, doc); } } @@ -289,7 +289,7 @@ bool BeagleQueueIndexer::index() } if (udi.empty()) continue; - if (m_db->needUpdate(udi, "")) { + if (m_db->needUpdate(udi, cstr_null)) { try { // indexFromCache does a CirCache::get(). We could // arrange to use a getCurrent() instead, would be more @@ -390,7 +390,7 @@ BeagleQueueIndexer::processone(const string &path, // Have to use the hit type for the udi, because the same url can exist // as a bookmark or a page. udipath = path_cat(dotdoc.meta[Rcl::Doc::keybght], url_gpath(dotdoc.url)); - make_udi(udipath, "", udi); + make_udi(udipath, cstr_null, udi); LOGDEB(("BeagleQueueIndexer: prc1: udi [%s]\n", udi.c_str())); char ascdate[30]; @@ -408,15 +408,15 @@ BeagleQueueIndexer::processone(const string &path, dotdoc.fbytes = cbuf; // Document signature for up to date checks: none. - dotdoc.sig = ""; + dotdoc.sig.clear(); dotdoc.meta[Rcl::Doc::keybcknd] = "BGL"; - if (!m_db->addOrUpdate(udi, "", dotdoc)) + if (!m_db->addOrUpdate(udi, cstr_null, dotdoc)) return FsTreeWalker::FtwError; } else if (stringlowercmp("webhistory", dotdoc.meta[Rcl::Doc::keybght]) || (dotdoc.mimetype.compare("text/html") && - dotdoc.mimetype.compare("text/plain"))) { + dotdoc.mimetype.compare(cstr_textplain))) { LOGDEB(("BeagleQueueIndexer: skipping: hittype %s mimetype %s\n", dotdoc.meta[Rcl::Doc::keybght].c_str(), dotdoc.mimetype.c_str())); // Unlink them anyway @@ -454,20 +454,20 @@ BeagleQueueIndexer::processone(const string &path, sprintf(cbuf, OFFTPC, stp->st_size); doc.fbytes = cbuf; // Document signature for up to date checks: none. - doc.sig = ""; + doc.sig.clear(); doc.url = dotdoc.url; doc.meta[Rcl::Doc::keybcknd] = "BGL"; - if (!m_db->addOrUpdate(udi, "", doc)) + if (!m_db->addOrUpdate(udi, cstr_null, doc)) return FsTreeWalker::FtwError; } // Copy to cache { // doc fields not in meta, needing saving to the cache - dotfile.m_fields.set("fmtime", dotdoc.fmtime, ""); - dotfile.m_fields.set("fbytes", dotdoc.fbytes, ""); - dotfile.m_fields.set("udi", udi, ""); + dotfile.m_fields.set("fmtime", dotdoc.fmtime, cstr_null); + dotfile.m_fields.set("fbytes", dotdoc.fbytes, cstr_null); + dotfile.m_fields.set("udi", udi, cstr_null); string fdata; file_to_string(path, fdata); if (!m_cache || !m_cache->cc()) { diff --git a/src/index/fsindexer.cpp b/src/index/fsindexer.cpp index a2f23610..eb6dd0c8 100644 --- a/src/index/fsindexer.cpp +++ b/src/index/fsindexer.cpp @@ -30,6 +30,7 @@ #include #include +#include "cstr.h" #include "pathut.h" #include "conftree.h" #include "rclconfig.h" @@ -241,7 +242,7 @@ bool FsIndexer::purgeFiles(list& files) return false; for (list::iterator it = files.begin(); it != files.end(); ) { string udi; - make_udi(*it, "", udi); + make_udi(*it, cstr_null, udi); // rcldb::purgefile returns true if the udi was either not // found or deleted, false only in case of actual error bool existed; @@ -342,7 +343,7 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp, string sig; makesig(stp, sig); string udi; - make_udi(fn, "", udi); + make_udi(fn, cstr_null, udi); if (!m_db->needUpdate(udi, sig)) { LOGDEB0(("processone: up to date: %s\n", fn.c_str())); if (m_updater) { @@ -384,9 +385,8 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp, "UTF-8")); string parent_udi; - make_udi(fn, "", parent_udi); + make_udi(fn, cstr_null, parent_udi); Rcl::Doc doc; - const string plus("+"); char ascdate[30]; sprintf(ascdate, "%ld", long(stp->st_mtime)); @@ -413,7 +413,7 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp, if (doc.fmtime.empty()) doc.fmtime = ascdate; if (doc.url.empty()) - doc.url = string("file://") + fn; + doc.url = cstr_fileu + fn; if (doc.utf8fn.empty()) doc.utf8fn = utf8fn; @@ -432,7 +432,7 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp, // myriads of such files, the ext script is executed for them // and fails every time) if (fis == FileInterner::FIError) { - doc.sig += plus; + doc.sig += cstr_plus; } // Possibly add fields from local config @@ -442,7 +442,7 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp, // of the file document. string udi; make_udi(fn, doc.ipath, udi); - if (!m_db->addOrUpdate(udi, doc.ipath.empty() ? "" : parent_udi, doc)) + if (!m_db->addOrUpdate(udi, doc.ipath.empty() ? cstr_null : parent_udi, doc)) return FsTreeWalker::FtwError; // Tell what we are doing and check for interrupt request @@ -468,14 +468,14 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp, fileDoc.fmtime = ascdate; fileDoc.utf8fn = utf8fn; fileDoc.mimetype = interner.getMimetype(); - fileDoc.url = string("file://") + fn; + fileDoc.url = cstr_fileu + fn; char cbuf[100]; sprintf(cbuf, OFFTPC, stp->st_size); fileDoc.fbytes = cbuf; // Document signature for up to date checks. makesig(stp, fileDoc.sig); - if (!m_db->addOrUpdate(parent_udi, "", fileDoc)) + if (!m_db->addOrUpdate(parent_udi, cstr_null, fileDoc)) return FsTreeWalker::FtwError; } diff --git a/src/index/indexer.cpp b/src/index/indexer.cpp index 7cdf3849..c7203358 100644 --- a/src/index/indexer.cpp +++ b/src/index/indexer.cpp @@ -25,6 +25,7 @@ #include +#include "cstr.h" #include "debuglog.h" #include "indexer.h" #include "fsindexer.h" @@ -58,7 +59,7 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun) return false; } - m_config->setKeyDir(""); + m_config->setKeyDir(cstr_null); if (typestorun & IxTFs) { deleteZ(m_fsindexer); m_fsindexer = new FsIndexer(m_config, &m_db, m_updater); @@ -118,7 +119,7 @@ bool ConfIndexer::indexFiles(std::list& ifiles, IxFlag flag) m_config->getDbDir().c_str())); return false; } - m_config->setKeyDir(""); + m_config->setKeyDir(cstr_null); bool ret = false; if (!m_fsindexer) m_fsindexer = new FsIndexer(m_config, &m_db, m_updater); @@ -163,7 +164,7 @@ bool ConfIndexer::purgeFiles(std::list &files) return false; } bool ret = false; - m_config->setKeyDir(""); + m_config->setKeyDir(cstr_null); if (!m_fsindexer) m_fsindexer = new FsIndexer(m_config, &m_db, m_updater); if (m_fsindexer) diff --git a/src/internfile/internfile.cpp b/src/internfile/internfile.cpp index 60784c68..b44d045f 100644 --- a/src/internfile/internfile.cpp +++ b/src/internfile/internfile.cpp @@ -32,6 +32,7 @@ using namespace std; #endif /* NO_NAMESPACES */ +#include "cstr.h" #include "internfile.h" #include "rcldoc.h" #include "mimetype.h" @@ -51,7 +52,6 @@ using namespace std; #include "pxattr.h" #endif // RCL_USE_XATTR -static const string cstr_stxtplain("text/plain"); // The internal path element separator. This can't be the same as the rcldb // file to ipath separator : "|" @@ -206,7 +206,7 @@ void FileInterner::init(const string &f, const struct stat *stp, RclConfig *cnf, // Indexing by udi makes things easier (because they sometimes get a temp // as input string udi; - make_udi(f, "", udi); + make_udi(f, cstr_null, udi); cnf->setKeyDir(path_getfather(m_fn)); @@ -366,7 +366,7 @@ void FileInterner::initcommon(RclConfig *cnf, int flags) m_handlers.reserve(MAXHANDLERS); for (unsigned int i = 0; i < MAXHANDLERS; i++) m_tmpflgs[i] = false; - m_targetMType = cstr_stxtplain; + m_targetMType = cstr_textplain; } // We used a single beagle cache object to access beagle data. We protect it @@ -398,7 +398,7 @@ FileInterner::FileInterner(const Rcl::Doc& idoc, RclConfig *cnf, if (backend.empty() || !backend.compare("FS")) { // Filesystem document. Intern from file. // The url has to be like file:// - if (idoc.url.find("file://") != 0) { + if (idoc.url.find(cstr_fileu) != 0) { LOGERR(("FileInterner: FS backend and non fs url: [%s]\n", idoc.url.c_str())); return; @@ -565,13 +565,9 @@ static inline bool getKeyValue(const map& docdata, // These defs are for the Dijon meta array. Rcl::Doc predefined field // names are used where appropriate. In some cases, Rcl::Doc names are // used inside the Dijon metadata (ex: origcharset) -static const string cstr_keyau("author"); -static const string cstr_keycs("charset"); -static const string cstr_keyct("content"); static const string cstr_keyds("description"); static const string cstr_keyfn("filename"); static const string cstr_keymd("modificationdate"); -static const string cstr_keymt("mimetype"); static const string cstr_keytt("title"); bool FileInterner::dijontorcl(Rcl::Doc& doc) @@ -586,13 +582,13 @@ bool FileInterner::dijontorcl(Rcl::Doc& doc) for (map::const_iterator it = docdata.begin(); it != docdata.end(); it++) { - if (it->first == cstr_keyct) { + if (it->first == cstr_content) { doc.text = it->second; } else if (it->first == cstr_keymd) { doc.dmtime = it->second; } else if (it->first == Rcl::Doc::keyoc) { doc.origcharset = it->second; - } else if (it->first == cstr_keymt || it->first == cstr_keycs) { + } else if (it->first == cstr_mimetype || it->first == cstr_charset) { // don't need/want these. } else { doc.meta[it->first] = it->second; @@ -632,18 +628,18 @@ void FileInterner::collectIpathAndMT(Rcl::Doc& doc) const for (vector::const_iterator hit = m_handlers.begin(); hit != m_handlers.end(); hit++) { const map& docdata = (*hit)->get_meta_data(); - if (getKeyValue(docdata, "ipath", ipathel)) { + if (getKeyValue(docdata, cstr_ipath, ipathel)) { if (!ipathel.empty()) { // We have a non-empty ipath hasipath = true; - getKeyValue(docdata, cstr_keymt, doc.mimetype); + getKeyValue(docdata, cstr_mimetype, doc.mimetype); getKeyValue(docdata, cstr_keyfn, doc.utf8fn); } doc.ipath += colon_hide(ipathel) + cstr_isep; } else { doc.ipath += cstr_isep; } - getKeyValue(docdata, cstr_keyau, doc.meta[Rcl::Doc::keyau]); + getKeyValue(docdata, cstr_author, doc.meta[Rcl::Doc::keyau]); getKeyValue(docdata, cstr_keymd, doc.dmtime); } @@ -682,8 +678,8 @@ int FileInterner::addHandler() { const map& docdata = m_handlers.back()->get_meta_data(); string charset, mimetype; - getKeyValue(docdata, cstr_keycs, charset); - getKeyValue(docdata, cstr_keymt, mimetype); + getKeyValue(docdata, cstr_charset, charset); + getKeyValue(docdata, cstr_mimetype, mimetype); LOGDEB(("FileInterner::addHandler: next_doc is %s\n", mimetype.c_str())); @@ -691,7 +687,7 @@ int FileInterner::addHandler() // general), we're done decoding. If we hit text/plain, we're done // in any case if (!stringicmp(mimetype, m_targetMType) || - !stringicmp(mimetype, cstr_stxtplain)) { + !stringicmp(mimetype, cstr_textplain)) { m_reachedMType = mimetype; LOGDEB1(("FileInterner::addHandler: target reached\n")); return ADD_BREAK; @@ -724,7 +720,7 @@ int FileInterner::addHandler() const string *txt = &ns; { map::const_iterator it; - it = docdata.find(cstr_keyct); + it = docdata.find(cstr_content); if (it != docdata.end()) txt = &it->second; } diff --git a/src/internfile/mh_exec.cpp b/src/internfile/mh_exec.cpp index 4ed07d14..81cbc8cf 100644 --- a/src/internfile/mh_exec.cpp +++ b/src/internfile/mh_exec.cpp @@ -14,7 +14,7 @@ * Free Software Foundation, Inc., * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - +#include "cstr.h" #include "execmd.h" #include "mh_exec.h" #include "mh_html.h" @@ -95,7 +95,7 @@ bool MimeHandlerExec::next_document() myparams.push_back(m_ipath); // Execute command, store the output - string& output = m_metaData["content"]; + string& output = m_metaData[cstr_content]; output.erase(); ExecCmd mexec; MEAdv adv(filtermaxseconds); @@ -146,7 +146,7 @@ bool MimeHandlerExec::next_document() void MimeHandlerExec::finaldetails() { - string& output = m_metaData["content"]; + string& output = m_metaData[cstr_content]; // If output is text/plain (not text/html), we may have to convert // it to utf-8, because this is the last point where it can be done. @@ -158,7 +158,7 @@ void MimeHandlerExec::finaldetails() } string mt = cfgFilterOutputMtype.empty() ? "text/html" : cfgFilterOutputMtype; - if (!mt.compare("text/plain") && stringlowercmp("utf-8", charset)) { + if (!mt.compare(cstr_textplain) && stringlowercmp("utf-8", charset)) { string transcoded; int ecnt; if (!transcode(output, transcoded, charset, "UTF-8", &ecnt)) { @@ -183,8 +183,8 @@ void MimeHandlerExec::finaldetails() // Supposed contents charset encoding. This could still be // overridden by the content-type meta tag for html, but this is // wasteful so we hope it's correct - m_metaData["charset"] = charset; - m_metaData["mimetype"] = mt; + m_metaData[cstr_charset] = charset; + m_metaData[cstr_mimetype] = mt; string md5, xmd5, reason; if (MD5File(m_fn, md5, &reason)) { diff --git a/src/internfile/mh_execm.cpp b/src/internfile/mh_execm.cpp index 6dfacc2f..7bf89042 100644 --- a/src/internfile/mh_execm.cpp +++ b/src/internfile/mh_execm.cpp @@ -19,6 +19,7 @@ #include #include +#include "cstr.h" #include "mh_execm.h" #include "mh_html.h" #include "debuglog.h" @@ -85,7 +86,7 @@ bool MimeHandlerExecMultiple::readDataElement(string& name, string &data) // Empty line (end of message) ? if (!ibuf.compare("\n")) { LOGDEB(("MHExecMultiple: Got empty line\n")); - name = ""; + name.clear(); return true; } @@ -122,7 +123,7 @@ bool MimeHandlerExecMultiple::readDataElement(string& name, string &data) // piece string *datap = &data; if (!stringlowercmp("document:", name)) { - datap = &m_metaData["content"]; + datap = &m_metaData[cstr_content]; } else { datap = &data; } @@ -163,7 +164,7 @@ bool MimeHandlerExecMultiple::next_document() } else { obuf << "Filename: " << 0 << "\n"; } - if (m_ipath.length()) { + if (!m_ipath.empty()) { LOGDEB(("next_doc: sending len %d val [%s]\n", m_ipath.length(), m_ipath.c_str())); obuf << "Ipath: " << m_ipath.length() << "\n" << m_ipath; @@ -238,7 +239,7 @@ bool MimeHandlerExecMultiple::next_document() // It used to be that eof could be signalled just by an empty document, but // this was wrong. Empty documents can be found ie in zip files and should // not be interpreted as eof. - if (m_metaData["content"].length() == 0) { + if (m_metaData[cstr_content].empty()) { LOGDEB0(("MHExecMultiple: got empty document inside [%s]: [%s]\n", m_fn.c_str(), ipath.c_str())); } @@ -248,14 +249,14 @@ bool MimeHandlerExecMultiple::next_document() // mimetype, or the ipath MUST be a filename-like string which we can use // to compute a mime type if (!ipath.empty()) { - m_metaData["ipath"] = ipath; + m_metaData[cstr_ipath] = ipath; if (mtype.empty()) { LOGDEB0(("MHExecMultiple: no mime type from filter, " "using ipath for a guess\n")); mtype = mimetype(ipath, 0, m_config, false); if (mtype.empty()) { // mimetype() won't call idFile when there is no file. Do it - mtype = idFileMem(m_metaData["content"]); + mtype = idFileMem(m_metaData[cstr_content]); if (mtype.empty()) { // Note this happens for example for directory zip members // We could recognize them by the end /, but wouldn't know @@ -265,13 +266,13 @@ bool MimeHandlerExecMultiple::next_document() } } } - m_metaData["mimetype"] = mtype; + m_metaData[cstr_mimetype] = mtype; string md5, xmd5; - MD5String(m_metaData["content"], md5); + MD5String(m_metaData[cstr_content], md5); m_metaData["md5"] = MD5HexPrint(md5, xmd5); } else { - m_metaData["mimetype"] = mtype.empty() ? "text/html" : mtype; - m_metaData.erase("ipath"); + m_metaData[cstr_mimetype] = mtype.empty() ? "text/html" : mtype; + m_metaData.erase(cstr_ipath); string md5, xmd5, reason; if (MD5File(m_fn, md5, &reason)) { m_metaData["md5"] = MD5HexPrint(md5, xmd5); @@ -292,14 +293,13 @@ bool MimeHandlerExecMultiple::next_document() charset = m_dfltInputCharset; } } - m_metaData["charset"] = charset; + m_metaData[cstr_charset] = charset; if (eofnext_received) m_havedoc = false; LOGDEB0(("MHExecMultiple: returning %d bytes of content," - " mtype [%s] charset [%s]\n", - m_metaData["content"].size(), m_metaData["mimetype"].c_str(), - m_metaData["charset"].c_str())); + " mtype [%s] charset [%s]\n", m_metaData[cstr_content].size(), + m_metaData[cstr_mimetype].c_str(), m_metaData[cstr_charset].c_str())); return true; } diff --git a/src/internfile/mh_html.cpp b/src/internfile/mh_html.cpp index 4ead947f..4784beaf 100644 --- a/src/internfile/mh_html.cpp +++ b/src/internfile/mh_html.cpp @@ -15,7 +15,7 @@ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - +#include "cstr.h" #include "mimehandler.h" #include "debuglog.h" #include "csguess.h" @@ -74,7 +74,7 @@ bool MimeHandlerHtml::next_document() LOGDEB(("MHHtml::next_doc.: default supposed input charset: [%s]\n", charset.c_str())); // Override default input charset if someone took care to set one: - map::const_iterator it = m_metaData.find("charset"); + map::const_iterator it = m_metaData.find(cstr_charset); if (it != m_metaData.end() && !it->second.empty()) { charset = it->second; LOGDEB(("MHHtml: next_doc.: input charset from metadata: [%s]\n", @@ -102,7 +102,7 @@ bool MimeHandlerHtml::next_document() transcoded = m_html; // We don't know the charset, at all p.reset_charsets(); - charset = ""; + charset.clear(); } else { if (ecnt) { if (pass == 0) { @@ -163,13 +163,13 @@ bool MimeHandlerHtml::next_document() } m_metaData["origcharset"] = result.get_charset(); - m_metaData["content"] = result.dump; - m_metaData["charset"] = "utf-8"; + m_metaData[cstr_content] = result.dump; + m_metaData[cstr_charset] = "utf-8"; // Avoid setting empty values which would crush ones possibly inherited // from parent (if we're an attachment) if (!result.dmtime.empty()) m_metaData["modificationdate"] = result.dmtime; - m_metaData["mimetype"] = "text/plain"; + m_metaData[cstr_mimetype] = cstr_textplain; for (map::const_iterator it = result.meta.begin(); it != result.meta.end(); it++) { diff --git a/src/internfile/mh_mail.cpp b/src/internfile/mh_mail.cpp index 1ac22636..e0b8228e 100644 --- a/src/internfile/mh_mail.cpp +++ b/src/internfile/mh_mail.cpp @@ -25,6 +25,7 @@ #include #include +#include "cstr.h" #include "mimehandler.h" #include "readfile.h" #include "transcode.h" @@ -43,17 +44,12 @@ using namespace std; static const int maxdepth = 20; -static const string cstr_mimetype = "mimetype"; -static const string cstr_content = "content"; -static const string cstr_author = "author"; static const string cstr_recipient = "recipient"; static const string cstr_modificationdate = "modificationdate"; static const string cstr_title = "title"; static const string cstr_msgid = "msgid"; static const string cstr_abstract = "abstract"; -static const string cstr_newline = "\n"; - MimeHandlerMail::MimeHandlerMail(RclConfig *cnf, const string &mt) : RecollFilter(cnf, mt), m_bincdoc(0), m_fd(-1), m_stream(0), m_idx(-1) { @@ -176,7 +172,7 @@ bool MimeHandlerMail::next_document() bool res = false; if (m_idx == -1) { - m_metaData[cstr_mimetype] = "text/plain"; + m_metaData[cstr_mimetype] = cstr_textplain; res = processMsg(m_bincdoc, 0); LOGDEB1(("MimeHandlerMail::next_document: mimetype %s\n", m_metaData[cstr_mimetype].c_str())); @@ -185,7 +181,7 @@ bool MimeHandlerMail::next_document() m_metaData[cstr_abstract] = truncate_to_word(txt.substr(m_startoftext), 250); } else { - m_metaData[cstr_abstract] = ""; + m_metaData[cstr_abstract].clear(); res = processAttach(); } m_idx++; @@ -240,7 +236,7 @@ bool MimeHandlerMail::processAttach() MHMailAttach *att = m_attachments[m_idx]; m_metaData[cstr_mimetype] = att->m_contentType; - m_metaData["charset"] = att->m_charset; + m_metaData[cstr_charset] = att->m_charset; m_metaData["filename"] = att->m_filename; // Change the title to something helpul m_metaData[cstr_title] = att->m_filename + " (" + m_subject + ")"; @@ -263,12 +259,12 @@ bool MimeHandlerMail::processAttach() // Special case for text/plain content. Internfile should deal // with this but it expects text/plain to be utf-8 already, so we // handle the transcoding if needed - if (m_metaData[cstr_mimetype] == "text/plain" && - stringicmp(m_metaData["charset"], "UTF-8")) { + if (m_metaData[cstr_mimetype] == cstr_textplain && + stringicmp(m_metaData[cstr_charset], "UTF-8")) { string utf8; - if (!transcode(body, utf8, m_metaData["charset"], "UTF-8")) { + if (!transcode(body, utf8, m_metaData[cstr_charset], "UTF-8")) { LOGERR((" processAttach: transcode to utf-8 failed " - "for charset [%s]\n", m_metaData["charset"].c_str())); + "for charset [%s]\n", m_metaData[cstr_charset].c_str())); // Just let it through and hope for the best... } else { body = utf8; @@ -288,7 +284,7 @@ bool MimeHandlerMail::processAttach() // Ipath char nbuf[20]; sprintf(nbuf, "%d", m_idx); - m_metaData["ipath"] = nbuf; + m_metaData[cstr_ipath] = nbuf; return true; } @@ -452,7 +448,7 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth) MimeHeaderValue content_type; parseMimeHeaderValue(hi.getValue(), content_type); LOGDEB2(("walkmime: C-type: %s\n",content_type.value.c_str())); - if (!stringlowercmp("text/plain", content_type.value)) + if (!stringlowercmp(cstr_textplain, content_type.value)) ittxt = it; else if (!stringlowercmp("text/html", content_type.value)) ithtml = it; @@ -473,7 +469,7 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth) // Get and parse content-type header. Binc::HeaderItem hi; - string ctt = "text/plain"; + string ctt = cstr_textplain; if (doc->h.getFirstHeader("Content-Type", hi)) { ctt = hi.getValue(); } @@ -531,7 +527,7 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth) // to iso-8859 only if the transfer-encoding is 8 bit, or test for // actual 8 bit chars, but what the heck, le'ts use 8859-1 as default string charset; - it = content_type.params.find(string("charset")); + it = content_type.params.find(string(cstr_charset)); if (it != content_type.params.end()) charset = it->second; if (charset.empty() || @@ -555,7 +551,7 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth) // attachment, as per rfc2183. // If it is inline but not text or html, same thing. if (stringlowercmp("inline", content_disposition.value) || - (stringlowercmp("text/plain", content_type.value) && + (stringlowercmp(cstr_textplain, content_type.value) && stringlowercmp("text/html", content_type.value)) ) { if (!filename.empty()) { out += "\n"; diff --git a/src/internfile/mh_mbox.cpp b/src/internfile/mh_mbox.cpp index 5d08b1b8..a78036b9 100644 --- a/src/internfile/mh_mbox.cpp +++ b/src/internfile/mh_mbox.cpp @@ -29,6 +29,7 @@ #include #include +#include "cstr.h" #include "mimehandler.h" #include "debuglog.h" #include "readfile.h" @@ -144,7 +145,7 @@ public: string blk1; blk1.append("udi="); blk1.append(udi); - blk1.append("\n"); + blk1.append(cstr_newline); blk1.resize(o_b1size, 0); if (fwrite(blk1.c_str(), 1, o_b1size, fp) != o_b1size) { LOGDEB(("MboxCache::put_offsets: fwrite errno %d\n", errno)); @@ -384,7 +385,7 @@ bool MimeHandlerMbox::next_document() } FILE *fp = (FILE *)m_vfp; int mtarg = 0; - if (m_ipath != "") { + if (!m_ipath.empty()) { sscanf(m_ipath.c_str(), "%d", &mtarg); } else if (m_forPreview) { // Can't preview an mbox. @@ -434,7 +435,7 @@ bool MimeHandlerMbox::next_document() off_t message_end = 0; bool iseof = false; bool hademptyline = true; - string& msgtxt = m_metaData["content"]; + string& msgtxt = m_metaData[cstr_content]; msgtxt.erase(); line_type line; for (;;) { @@ -498,8 +499,8 @@ bool MimeHandlerMbox::next_document() // m_msgnum was incremented when hitting the next From_ or eof, so the data // is for m_msgnum - 1 sprintf(buf, "%d", m_msgnum - 1); - m_metaData["ipath"] = buf; - m_metaData["mimetype"] = "message/rfc822"; + m_metaData[cstr_ipath] = buf; + m_metaData[cstr_mimetype] = "message/rfc822"; if (iseof) { LOGDEB2(("MimeHandlerMbox::next: eof hit\n")); m_havedoc = false; @@ -590,7 +591,7 @@ int main(int argc, char **argv) exit(1); } map::const_iterator it = - mh.get_meta_data().find("content"); + mh.get_meta_data().find(cstr_content); int size; if (it == mh.get_meta_data().end()) { size = -1; @@ -610,7 +611,7 @@ int main(int argc, char **argv) } docnt++; map::const_iterator it = - mh.get_meta_data().find("content"); + mh.get_meta_data().find(cstr_content); int size; if (it == mh.get_meta_data().end()) { size = -1; diff --git a/src/internfile/mh_text.cpp b/src/internfile/mh_text.cpp index 2b96ec9f..2cf0ad45 100644 --- a/src/internfile/mh_text.cpp +++ b/src/internfile/mh_text.cpp @@ -27,6 +27,7 @@ using namespace std; #endif /* NO_NAMESPACES */ +#include "cstr.h" #include "mh_text.h" #include "csguess.h" #include "debuglog.h" @@ -122,7 +123,7 @@ bool MimeHandlerText::next_document() m_dfltInputCharset.c_str())); int ecnt; bool ret; - string& itext = m_metaData["content"]; + string& itext = m_metaData[cstr_content]; if (!(ret=transcode(m_text, itext, m_dfltInputCharset, "UTF-8", &ecnt)) || ecnt > int(itext.size() / 4)) { LOGERR(("MimeHandlerText::mkDoc: transcode to utf-8 failed " @@ -132,8 +133,8 @@ bool MimeHandlerText::next_document() return false; } m_metaData["origcharset"] = m_dfltInputCharset; - m_metaData["charset"] = "utf-8"; - m_metaData["mimetype"] = "text/plain"; + m_metaData[cstr_charset] = "utf-8"; + m_metaData[cstr_mimetype] = cstr_textplain; // If text length is 0 (the file is empty or oversize), or we have // read all at once, we're done @@ -151,7 +152,7 @@ bool MimeHandlerText::next_document() char buf[30]; sprintf(buf, "%lld", (long long)(m_offs - m_text.length())); if (m_offs - m_text.length() != 0) - m_metaData["ipath"] = buf; + m_metaData[cstr_ipath] = buf; readnext(); return true; } diff --git a/src/internfile/mh_unknown.h b/src/internfile/mh_unknown.h index 3975972f..c478aab4 100644 --- a/src/internfile/mh_unknown.h +++ b/src/internfile/mh_unknown.h @@ -19,6 +19,7 @@ #include +#include "cstr.h" #include "mimehandler.h" /** @@ -30,19 +31,19 @@ class MimeHandlerUnknown : public RecollFilter { MimeHandlerUnknown(RclConfig *cnf, const string& mt) : RecollFilter(cnf, mt) {} virtual ~MimeHandlerUnknown() {} - virtual bool set_document_string(const string& fn) { + virtual bool set_document_file(const string& fn) { RecollFilter::set_document_file(fn); return m_havedoc = true; } - virtual bool set_document_file(const string&) { + virtual bool set_document_string(const string&) { return m_havedoc = true; } virtual bool next_document() { if (m_havedoc == false) return false; m_havedoc = false; - m_metaData["content"] = ""; - m_metaData["mimetype"] = "text/plain"; + m_metaData[cstr_content] = cstr_null; + m_metaData[cstr_mimetype] = cstr_textplain; return true; } virtual bool is_unknown() {return true;} diff --git a/src/internfile/mimehandler.cpp b/src/internfile/mimehandler.cpp index 5de5c08f..9b2a3387 100644 --- a/src/internfile/mimehandler.cpp +++ b/src/internfile/mimehandler.cpp @@ -24,6 +24,7 @@ #include using namespace std; +#include "cstr.h" #include "mimehandler.h" #include "debuglog.h" #include "rclconfig.h" @@ -110,7 +111,7 @@ static Dijon::Filter *mhFactory(RclConfig *config, const string &mime) LOGDEB2(("mhFactory(%s)\n", mime.c_str())); string lmime(mime); stringtolower(lmime); - if ("text/plain" == lmime) { + if (cstr_textplain == lmime) { LOGDEB2(("mhFactory(%s): returning MimeHandlerText\n", mime.c_str())); return new MimeHandlerText(config, lmime); } else if ("text/html" == lmime) { @@ -178,9 +179,9 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs, // Handle additional attributes. We substitute the semi-colons // with newlines and use a ConfSimple string value; - if (attrs.get("charset", value)) + if (attrs.get(cstr_charset, value)) h->cfgFilterOutputCharset = stringtolower((const string&)value); - if (attrs.get("mimetype", value)) + if (attrs.get(cstr_mimetype, value)) h->cfgFilterOutputMtype = stringtolower((const string&)value); #if 0 @@ -276,7 +277,7 @@ Dijon::Filter *getMimeHandler(const string &mtype, RclConfig *cfg, // If the type is an unknown text/xxx, index as text/plain and // hope for the best (this wouldn't work too well with text/rtf...) if (mtype.find("text/") == 0) { - h = mhFactory("text/plain"); + h = mhFactory(cstr_textplain); goto out; } #endif diff --git a/src/internfile/myhtmlparse.cpp b/src/internfile/myhtmlparse.cpp index 54cb40ed..47f252a1 100644 --- a/src/internfile/myhtmlparse.cpp +++ b/src/internfile/myhtmlparse.cpp @@ -27,6 +27,7 @@ #include #include +#include "cstr.h" #include "myhtmlparse.h" #include "indextext.h" // for lowercase_term() #include "mimeparse.h" @@ -352,7 +353,7 @@ MyHtmlParser::opening_tag(const string &tag) case 'm': if (tag == "meta") { string content; - if (get_parameter("content", content)) { + if (get_parameter(cstr_content, content)) { string name; if (get_parameter("name", name)) { lowercase_term(name); @@ -386,7 +387,7 @@ MyHtmlParser::opening_tag(const string &tag) MimeHeaderValue p; parseMimeHeaderValue(content, p); map::const_iterator k; - if ((k = p.params.find("charset")) != + if ((k = p.params.find(cstr_charset)) != p.params.end()) { charset = k->second; if (!samecharset(charset, fromcharset)) { @@ -517,7 +518,7 @@ MyHtmlParser::closing_tag(const string &tag) if (tag == "title") { if (meta.find("title") == meta.end()|| meta["title"].empty()) { meta["title"] = dump; - dump = ""; + dump.clear(); } break; } diff --git a/src/lib/Makefile b/src/lib/Makefile index b839a5eb..88405471 100644 --- a/src/lib/Makefile +++ b/src/lib/Makefile @@ -6,8 +6,8 @@ LIBS = librcl.a all: $(LIBS) -OBJS = rclaspell.o beaglequeuecache.o rclconfig.o rclinit.o textsplit.o unacpp.o beaglequeue.o csguess.o fsindexer.o indexer.o mimetype.o subtreelist.o htmlparse.o myhtmlparse.o mimehandler.o internfile.o mh_exec.o mh_execm.o mh_html.o mh_mail.o mh_mbox.o mh_text.o docseq.o docseqdb.o docseqhist.o filtseq.o dynconf.o plaintorich.o recollq.o reslistpager.o sortseq.o wasastringtoquery.o wasatorcl.o rcldb.o rcldoc.o rclquery.o searchdata.o stemdb.o stoplist.o base64.o circache.o closefrom.o conftree.o copyfile.o debuglog.o execmd.o fstreewalk.o idfile.o fileudi.o md5.o mimeparse.o netcon.o pathut.o pxattr.o rclionice.o readfile.o smallut.o transcode.o wipedir.o x11mon.o mime-getpart.o mime-parsefull.o mime-parseonlyheader.o mime-printbody.o mime-printdoc.o mime-printheader.o mime.o convert.o iodevice.o iofactory.o -DEPS = rclaspell.dep.stamp beaglequeuecache.dep.stamp rclconfig.dep.stamp rclinit.dep.stamp textsplit.dep.stamp unacpp.dep.stamp beaglequeue.dep.stamp csguess.dep.stamp fsindexer.dep.stamp indexer.dep.stamp mimetype.dep.stamp subtreelist.dep.stamp htmlparse.dep.stamp myhtmlparse.dep.stamp mimehandler.dep.stamp internfile.dep.stamp mh_exec.dep.stamp mh_execm.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp mh_mbox.dep.stamp mh_text.dep.stamp docseq.dep.stamp docseqdb.dep.stamp docseqhist.dep.stamp filtseq.dep.stamp dynconf.dep.stamp plaintorich.dep.stamp recollq.dep.stamp reslistpager.dep.stamp sortseq.dep.stamp wasastringtoquery.dep.stamp wasatorcl.dep.stamp rcldb.dep.stamp rcldoc.dep.stamp rclquery.dep.stamp searchdata.dep.stamp stemdb.dep.stamp stoplist.dep.stamp base64.dep.stamp circache.dep.stamp closefrom.dep.stamp conftree.dep.stamp copyfile.dep.stamp debuglog.dep.stamp execmd.dep.stamp fstreewalk.dep.stamp idfile.dep.stamp fileudi.dep.stamp md5.dep.stamp mimeparse.dep.stamp netcon.dep.stamp pathut.dep.stamp pxattr.dep.stamp rclionice.dep.stamp readfile.dep.stamp smallut.dep.stamp transcode.dep.stamp wipedir.dep.stamp x11mon.dep.stamp mime-getpart.dep.stamp mime-parsefull.dep.stamp mime-parseonlyheader.dep.stamp mime-printbody.dep.stamp mime-printdoc.dep.stamp mime-printheader.dep.stamp mime.dep.stamp convert.dep.stamp iodevice.dep.stamp iofactory.dep.stamp +OBJS = rclaspell.o beaglequeuecache.o cstr.o rclconfig.o rclinit.o textsplit.o unacpp.o beaglequeue.o csguess.o fsindexer.o indexer.o mimetype.o subtreelist.o htmlparse.o myhtmlparse.o mimehandler.o internfile.o mh_exec.o mh_execm.o mh_html.o mh_mail.o mh_mbox.o mh_text.o docseq.o docseqdb.o docseqhist.o filtseq.o dynconf.o plaintorich.o recollq.o reslistpager.o sortseq.o wasastringtoquery.o wasatorcl.o rcldb.o rcldoc.o rclquery.o searchdata.o stemdb.o stoplist.o base64.o circache.o closefrom.o conftree.o copyfile.o debuglog.o execmd.o fstreewalk.o idfile.o fileudi.o md5.o mimeparse.o netcon.o pathut.o pxattr.o rclionice.o readfile.o smallut.o transcode.o wipedir.o x11mon.o mime-getpart.o mime-parsefull.o mime-parseonlyheader.o mime-printbody.o mime-printdoc.o mime-printheader.o mime.o convert.o iodevice.o iofactory.o +DEPS = rclaspell.dep.stamp beaglequeuecache.dep.stamp cstr.dep.stamp rclconfig.dep.stamp rclinit.dep.stamp textsplit.dep.stamp unacpp.dep.stamp beaglequeue.dep.stamp csguess.dep.stamp fsindexer.dep.stamp indexer.dep.stamp mimetype.dep.stamp subtreelist.dep.stamp htmlparse.dep.stamp myhtmlparse.dep.stamp mimehandler.dep.stamp internfile.dep.stamp mh_exec.dep.stamp mh_execm.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp mh_mbox.dep.stamp mh_text.dep.stamp docseq.dep.stamp docseqdb.dep.stamp docseqhist.dep.stamp filtseq.dep.stamp dynconf.dep.stamp plaintorich.dep.stamp recollq.dep.stamp reslistpager.dep.stamp sortseq.dep.stamp wasastringtoquery.dep.stamp wasatorcl.dep.stamp rcldb.dep.stamp rcldoc.dep.stamp rclquery.dep.stamp searchdata.dep.stamp stemdb.dep.stamp stoplist.dep.stamp base64.dep.stamp circache.dep.stamp closefrom.dep.stamp conftree.dep.stamp copyfile.dep.stamp debuglog.dep.stamp execmd.dep.stamp fstreewalk.dep.stamp idfile.dep.stamp fileudi.dep.stamp md5.dep.stamp mimeparse.dep.stamp netcon.dep.stamp pathut.dep.stamp pxattr.dep.stamp rclionice.dep.stamp readfile.dep.stamp smallut.dep.stamp transcode.dep.stamp wipedir.dep.stamp x11mon.dep.stamp mime-getpart.dep.stamp mime-parsefull.dep.stamp mime-parseonlyheader.dep.stamp mime-printbody.dep.stamp mime-printdoc.dep.stamp mime-printheader.dep.stamp mime.dep.stamp convert.dep.stamp iodevice.dep.stamp iofactory.dep.stamp librcl.a : $(DEPS) $(OBJS) unac.o ar ru librcl.a $(OBJS) unac.o @@ -19,6 +19,8 @@ rclaspell.o : ../aspell/rclaspell.cpp $(depth)/mk/localdefs $(CXX) $(ALL_CXXFLAGS) -c ../aspell/rclaspell.cpp beaglequeuecache.o : ../common/beaglequeuecache.cpp $(depth)/mk/localdefs $(CXX) $(ALL_CXXFLAGS) -c ../common/beaglequeuecache.cpp +cstr.o : ../common/cstr.cpp $(depth)/mk/localdefs + $(CXX) $(ALL_CXXFLAGS) -c ../common/cstr.cpp rclconfig.o : ../common/rclconfig.cpp $(depth)/mk/localdefs $(CXX) $(ALL_CXXFLAGS) -c ../common/rclconfig.cpp rclinit.o : ../common/rclinit.cpp $(depth)/mk/localdefs @@ -167,6 +169,9 @@ rclaspell.dep.stamp : ../aspell/rclaspell.cpp $(depth)/mk/localdefs beaglequeuecache.dep.stamp : ../common/beaglequeuecache.cpp $(depth)/mk/localdefs $(CXX) -M $(ALL_CXXFLAGS) ../common/beaglequeuecache.cpp > beaglequeuecache.dep touch beaglequeuecache.dep.stamp +cstr.dep.stamp : ../common/cstr.cpp $(depth)/mk/localdefs + $(CXX) -M $(ALL_CXXFLAGS) ../common/cstr.cpp > cstr.dep + touch cstr.dep.stamp rclconfig.dep.stamp : ../common/rclconfig.cpp $(depth)/mk/localdefs $(CXX) -M $(ALL_CXXFLAGS) ../common/rclconfig.cpp > rclconfig.dep touch rclconfig.dep.stamp @@ -343,6 +348,7 @@ x11mon.dep.stamp : ../utils/x11mon.cpp $(depth)/mk/localdefs touch x11mon.dep.stamp include rclaspell.dep include beaglequeuecache.dep +include cstr.dep include rclconfig.dep include rclinit.dep include textsplit.dep diff --git a/src/lib/mkMake b/src/lib/mkMake index fdb93410..724048a6 100755 --- a/src/lib/mkMake +++ b/src/lib/mkMake @@ -6,6 +6,7 @@ depth=${depth:-..} SRC_CPP="\ ${depth}/aspell/rclaspell.cpp \ ${depth}/common/beaglequeuecache.cpp \ +${depth}/common/cstr.cpp \ ${depth}/common/rclconfig.cpp \ ${depth}/common/rclinit.cpp \ ${depth}/common/textsplit.cpp \ diff --git a/src/query/docseq.cpp b/src/query/docseq.cpp index 65dbd0cc..47639fec 100644 --- a/src/query/docseq.cpp +++ b/src/query/docseq.cpp @@ -81,7 +81,7 @@ bool DocSource::buildStack() string DocSource::title() { if (m_seq.isNull()) - return ""; + return string(); string qual; if (m_fspec.isNotNull() && !m_sspec.isNotNull()) qual = string(" (") + o_filt_trans + string(")"); diff --git a/src/query/reslistpager.cpp b/src/query/reslistpager.cpp index f50fa074..8903e15d 100644 --- a/src/query/reslistpager.cpp +++ b/src/query/reslistpager.cpp @@ -27,6 +27,7 @@ using std::ostringstream; using std::endl; +#include "cstr.h" #include "reslistpager.h" #include "debuglog.h" #include "rclconfig.h" @@ -384,7 +385,7 @@ string ResListPager::iconPath(RclConfig *config, const string& mtype) { string iconpath; config->getMimeIconName(mtype, &iconpath); - iconpath = string("file://") + iconpath; + iconpath = cstr_fileu + iconpath; return iconpath; } diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index 5bd0f852..b42d205d 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -94,7 +94,6 @@ static const string cstr_syntAbs("?!#@"); // Only ONE field name inside the index data record differs from the // Rcl::Doc ones: caption<->title, for a remnant of compatibility with // omega -static const string cstr_keycap("caption"); // Static/Default table for field->prefix/weight translation. // This is logically const after initialization. Can't use a @@ -125,7 +124,7 @@ static void initFldToTraits() fldToTraits["ext"] = FieldTraits("XE"); fldToTraits[Doc::keyfn] = FieldTraits("XSFN"); - fldToTraits[cstr_keycap] = FieldTraits("S"); + fldToTraits[cstr_caption] = FieldTraits("S"); fldToTraits[Doc::keytt] = FieldTraits("S"); fldToTraits["subject"] = FieldTraits("S"); @@ -198,7 +197,7 @@ bool Db::Native::dbDataToRclDoc(Xapian::docid docid, std::string &data, parms.get(Doc::keyfmt, doc.fmtime); parms.get(Doc::keydmt, doc.dmtime); parms.get(Doc::keyoc, doc.origcharset); - parms.get(cstr_keycap, doc.meta[Doc::keytt]); + parms.get(cstr_caption, doc.meta[Doc::keytt]); parms.get(Doc::keykw, doc.meta[Doc::keykw]); parms.get(Doc::keyabs, doc.meta[Doc::keyabs]); // Possibly remove synthetic abstract indicator (if it's there, we @@ -1179,7 +1178,7 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, doc.meta[Doc::keytt] = neutchars(truncate_to_word(doc.meta[Doc::keytt], 150), cstr_nc); if (!doc.meta[Doc::keytt].empty()) - RECORD_APPEND(record, cstr_keycap, doc.meta[Doc::keytt]); + RECORD_APPEND(record, cstr_caption, doc.meta[Doc::keytt]); trimstring(doc.meta[Doc::keykw], " \t\r\n"); doc.meta[Doc::keykw] = @@ -1526,7 +1525,7 @@ bool Db::filenameWildExp(const string& fnexp, list& names) // each end: match any substring if (pattern[0] == '"' && pattern[pattern.size()-1] == '"') { pattern = pattern.substr(1, pattern.size() -2); - } else if (pattern.find_first_of("*?[") == string::npos && + } else if (pattern.find_first_of(cstr_minwilds) == string::npos && !unaciscapital(pattern)) { pattern = "*" + pattern + "*"; } // else let it be diff --git a/src/rcldb/rcldb.h b/src/rcldb/rcldb.h index e4d03a38..dd1d7e97 100644 --- a/src/rcldb/rcldb.h +++ b/src/rcldb/rcldb.h @@ -21,6 +21,7 @@ #include #include +#include "cstr.h" #include "refcntr.h" #include "rcldoc.h" #include "stoplist.h" @@ -186,7 +187,7 @@ class Db { enum MatchType {ET_WILD, ET_REGEXP, ET_STEM}; bool termMatch(MatchType typ, const string &lang, const string &s, TermMatchResult& result, int max = -1, - const string& field = "", + const string& field = cstr_null, string *prefix = 0 ); /** Return min and max years for doc mod times in db */ diff --git a/src/rcldb/rclquery.cpp b/src/rcldb/rclquery.cpp index e46d7260..c4046f73 100644 --- a/src/rcldb/rclquery.cpp +++ b/src/rcldb/rclquery.cpp @@ -24,6 +24,7 @@ #include "xapian.h" +#include "cstr.h" #include "rcldb.h" #include "rcldb_p.h" #include "rclquery.h" @@ -39,17 +40,15 @@ namespace Rcl { #endif -static const string cstr_keycap("caption"); -static const string cstr_keydmtime("dmtime"); // Field names inside the index data record may differ from the rcldoc ones // (esp.: caption / title) static const string& docfToDatf(const string& df) { if (!df.compare(Doc::keytt)) { - return cstr_keycap; + return cstr_caption; } else if (!df.compare(Doc::keymt)) { - return cstr_keydmtime; + return cstr_dmtime; } else { return df; } diff --git a/src/rcldb/searchdata.cpp b/src/rcldb/searchdata.cpp index 0b8b7a01..be834213 100644 --- a/src/rcldb/searchdata.cpp +++ b/src/rcldb/searchdata.cpp @@ -25,6 +25,7 @@ #include "xapian.h" +#include "cstr.h" #include "rcldb.h" #include "searchdata.h" #include "debuglog.h" @@ -346,8 +347,10 @@ bool SearchData::maybeAddAutoPhrase() words += clp->gettext(); } - if (words.find_first_of("\"*[]?") != string::npos && // has wildcards - TextSplit::countWords(words) <= 1) { // Just one word. + // If there are wildcards or quotes in there, or this is a single word, + // bail out + if (words.find_first_of("\"*[?") != string::npos && + TextSplit::countWords(words) <= 1) { LOGDEB2(("SearchData::maybeAddAutoPhrase: wildcards or single word\n")); return false; } @@ -561,7 +564,7 @@ void StringToXapianQ::expandTerm(bool nostemexp, return; } - bool haswild = term.find_first_of("*?[") != string::npos; + bool haswild = term.find_first_of(cstr_minwilds) != string::npos; // No stemming if there are wildcards or prevented globally. if (haswild || m_stemlang.empty()) @@ -857,8 +860,6 @@ bool StringToXapianQ::processUserString(const string &iq, return true; } -static const string cstr_null; - // Translate a simple OR, AND, or EXCL search clause. bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p, const string& stemlang) @@ -964,10 +965,10 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p, // stringToXapianQueries() to lowercase and simplify the phrase // terms etc. This will result into a single (complex) // Xapian::Query. - if (m_text.find_first_of("\"") != string::npos) { + if (m_text.find('\"') != string::npos) { m_text = neutchars(m_text, "\""); } - string s = string("\"") + m_text + string("\""); + string s = cstr_dquote + m_text + cstr_dquote; bool useNear = (m_tp == SCLT_NEAR); StringToXapianQ tr(db, m_field, l_stemlang, doBoostUserTerm); if (!tr.processUserString(s, m_reason, pqueries, db.getStopList(), diff --git a/src/rcldb/searchdata.h b/src/rcldb/searchdata.h index 74c23e52..96ac04df 100644 --- a/src/rcldb/searchdata.h +++ b/src/rcldb/searchdata.h @@ -30,6 +30,7 @@ #include "rcldb.h" #include "refcntr.h" #include "smallut.h" +#include "cstr.h" class RclConfig; @@ -215,7 +216,7 @@ public: SearchDataClauseSimple(SClType tp, const string& txt, const string& fld = string()) : SearchDataClause(tp), m_text(txt), m_field(fld), m_slack(0) { - m_haveWildCards = (txt.find_first_of("*?[") != string::npos); + m_haveWildCards = (txt.find_first_of(cstr_minwilds) != string::npos); } virtual ~SearchDataClauseSimple() {} diff --git a/src/rcldb/stemdb.cpp b/src/rcldb/stemdb.cpp index 4b506a04..d20ca810 100644 --- a/src/rcldb/stemdb.cpp +++ b/src/rcldb/stemdb.cpp @@ -291,13 +291,14 @@ static bool stemExpandOne(const std::string& dbdir, // Build expansion list from database data No need for // a conftree, but we need to massage the data a // little - string::size_type pos = data.find_first_of("="); - ++pos; - string::size_type pos1 = data.find_last_of("\n"); + string::size_type pos = data.find('='); + string::size_type pos1 = data.rfind('\n'); if (pos == string::npos || pos1 == string::npos || - pos1 <= pos) { - // ?? + pos1 <= pos+1) { + LOGERR(("stemExpand: bad data in db: [%s]\n", + data.c_str())); } else { + ++pos; stringToStrings(data.substr(pos, pos1-pos), result); } } diff --git a/src/utils/circache.cpp b/src/utils/circache.cpp index 85b1d218..9b6fc749 100644 --- a/src/utils/circache.cpp +++ b/src/utils/circache.cpp @@ -34,6 +34,7 @@ #include #include +#include "cstr.h" #include "circache.h" #include "conftree.h" #include "debuglog.h" @@ -356,27 +357,27 @@ public: string s(bf, CIRCACHE_FIRSTBLOCK_SIZE); ConfSimple conf(s, 1); string value; - if (!conf.get("maxsize", value, "")) { + if (!conf.get("maxsize", value, cstr_null)) { m_reason << "readfirstblock: conf get maxsize failed"; return false; } m_maxsize = atol(value.c_str()); - if (!conf.get("oheadoffs", value, "")) { + if (!conf.get("oheadoffs", value, cstr_null)) { m_reason << "readfirstblock: conf get oheadoffs failed"; return false; } m_oheadoffs = atol(value.c_str()); - if (!conf.get("nheadoffs", value, "")) { + if (!conf.get("nheadoffs", value, cstr_null)) { m_reason << "readfirstblock: conf get nheadoffs failed"; return false; } m_nheadoffs = atol(value.c_str()); - if (!conf.get("npadsize", value, "")) { + if (!conf.get("npadsize", value, cstr_null)) { m_reason << "readfirstblock: conf get npadsize failed"; return false; } m_npadsize = atol(value.c_str()); - if (!conf.get("unient", value, "")) { + if (!conf.get("unient", value, cstr_null)) { m_uniquentries = false; } else { m_uniquentries = stringToBool(value); @@ -478,7 +479,7 @@ public: string b(bf, d.dicsize); ConfSimple conf(b, 1); - if (!conf.get("udi", udi, "")) { + if (!conf.get("udi", udi, cstr_null)) { m_reason << "scan: no udi in dic"; return CCScanHook::Error; } @@ -1100,7 +1101,7 @@ bool CirCache::getCurrent(string& udi, string& dic, string& data) return false; ConfSimple conf(dic, 1); - conf.get("udi", udi, ""); + conf.get("udi", udi, cstr_null); return true; } @@ -1300,7 +1301,7 @@ int main(int argc, char **argv) exit(1); } string udi; - make_udi(fn, "", udi); + make_udi(fn, cstr_null, udi); sprintf(dic, "#whatever...\nmimetype = text/plain\nudi=%s\n", udi.c_str()); string sdic; diff --git a/src/utils/fstreewalk.cpp b/src/utils/fstreewalk.cpp index b47fa5e0..64770ff0 100644 --- a/src/utils/fstreewalk.cpp +++ b/src/utils/fstreewalk.cpp @@ -31,6 +31,7 @@ #include #include +#include "cstr.h" #include "debuglog.h" #include "pathut.h" #include "fstreewalk.h" @@ -222,7 +223,7 @@ FsTreeWalker::Status FsTreeWalker::walk(const string& _top, // back dir = data->dirs.front(); data->dirs.pop_front(); - if (dir == "") { + if (dir.empty()) { // Father change marker. if (data->dirs.empty()) break; @@ -244,7 +245,7 @@ FsTreeWalker::Status FsTreeWalker::walk(const string& _top, // Depth first, pop and process latest dir dir = data->dirs.back(); data->dirs.pop_back(); - if (dir == "") { + if (dir.empty()) { // Father change marker. if (data->dirs.empty()) break; @@ -374,8 +375,9 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top, // from entries for other dir. This is to help // with generating DirReturn callbacks if (!nullpush) { - if (!data->dirs.empty() && data->dirs.back() != "") - data->dirs.push_back(""); + if (!data->dirs.empty() && + !data->dirs.back().empty()) + data->dirs.push_back(cstr_null); nullpush = true; } data->dirs.push_back(fn);