From 560041cab9c7f933c3864c15028f7b514c091bd4 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Sat, 30 May 2020 15:54:49 +0200 Subject: [PATCH] cleared out errant tabs --- src/aspell/rclaspell.h | 2 +- src/bincimapmime/convert.h | 68 +- src/bincimapmime/mime-parsefull.cc | 152 +- src/bincimapmime/mime-parseonlyheader.cc | 50 +- src/bincimapmime/mime-printbody.cc | 4 +- src/bincimapmime/mime-utils.h | 2 +- src/bincimapmime/trbinc.cc | 60 +- src/common/rclconfig.cpp | 2 +- src/common/rclconfig.h | 34 +- src/common/rclinit.cpp | 110 +- src/common/syngroups.cpp | 98 +- src/common/textsplitko.cpp | 16 +- src/common/unacpp.cpp | 2 +- src/common/unacpp.h | 2 +- src/common/uproplist.h | 6 +- src/common/utf8fn.cpp | 4 +- src/common/webstore.cpp | 22 +- src/index/fetcher.cpp | 6 +- src/index/idxstatus.h | 12 +- src/index/indexer.cpp | 110 +- src/index/indexer.h | 2 +- src/index/mimetype.cpp | 148 +- src/index/rclmon.h | 6 +- src/index/rclmonprc.cpp | 280 +- src/index/recollindex.cpp | 64 +- src/index/subtreelist.cpp | 34 +- src/index/subtreelist.h | 2 +- src/index/webqueuefetcher.cpp | 22 +- src/internfile/extrameta.cpp | 114 +- src/internfile/extrameta.h | 4 +- src/internfile/htmlparse.cpp | 452 +- src/internfile/htmlparse.h | 18 +- src/internfile/indextext.h | 4 +- src/internfile/mh_exec.cpp | 98 +- src/internfile/mh_exec.h | 4 +- src/internfile/mh_html.h | 14 +- src/internfile/mh_mbox.cpp | 2 +- src/internfile/mh_null.h | 14 +- src/internfile/mh_symlink.h | 36 +- src/internfile/mh_text.cpp | 22 +- src/internfile/mh_unknown.h | 14 +- src/internfile/mimehandler.h | 84 +- src/internfile/myhtmlparse.h | 4 +- src/internfile/txtdcode.cpp | 16 +- .../kioslave/kio_recoll-kde4/data/help.html | 6 +- .../kio_recoll-kde4/data/welcome.html | 2 +- src/kde/kioslave/kio_recoll-kde4/htmlif.cpp | 152 +- .../kioslave/kio_recoll-kde4/kio_recoll.cpp | 312 +- src/kde/kioslave/kio_recoll-kde4/kio_recoll.h | 32 +- src/kde/kioslave/kio_recoll/data/help.html | 6 +- src/kde/kioslave/kio_recoll/data/welcome.html | 2 +- src/kde/recoll_applet/Makefile.cvs | 10 +- src/kde/recoll_applet/admin/Makefile.common | 38 +- src/kde/recoll_applet/admin/conf.change.pl | 68 +- src/php/recoll/recoll.cpp | 38 +- src/python/pychm/recollchm/swig_chm.c | 378 +- src/python/recoll/pyrclextract.cpp | 68 +- src/python/recoll/pyrecoll.cpp | 888 +- src/qtgui/advsearch_w.h | 6 +- src/qtgui/advshist.cpp | 12 +- src/qtgui/confgui/confguiindex.cpp | 6 +- src/qtgui/crontool.cpp | 28 +- src/qtgui/crontool.h | 10 +- src/qtgui/firstidx.h | 4 +- src/qtgui/fragbuts.h | 2 +- src/qtgui/guiutils.h | 6 +- src/qtgui/idxsched.h | 10 +- src/qtgui/main.cpp | 12 +- src/qtgui/multisave.cpp | 102 +- src/qtgui/ptrans_w.cpp | 40 +- src/qtgui/ptrans_w.h | 6 +- src/qtgui/rclhelp.cpp | 52 +- src/qtgui/rclmain_w.cpp | 10 +- src/qtgui/recoll.h | 2 +- src/qtgui/respopup.h | 8 +- src/qtgui/restable.h | 18 +- src/qtgui/rtitool.h | 6 +- src/qtgui/searchclause_w.cpp | 92 +- src/qtgui/specialindex.h | 4 +- src/qtgui/spell_w.h | 10 +- src/qtgui/ssearch_w.cpp | 6 +- src/qtgui/uiprefs_w.h | 12 +- src/qtgui/webcache.h | 4 +- src/qtgui/widgets/editdialog.h | 4 +- src/qtgui/widgets/listdialog.h | 4 +- src/qtgui/widgets/qxtglobal.h | 4 +- src/qtgui/winschedtool.cpp | 6 +- src/qtgui/xmltosd.cpp | 224 +- src/query/docseq.cpp | 54 +- src/query/docseqdb.h | 4 +- src/query/docseqdocs.h | 22 +- src/query/docseqhist.cpp | 28 +- src/query/docseqhist.h | 4 +- src/query/dynconf.cpp | 114 +- src/query/dynconf.h | 34 +- src/query/filtseq.cpp | 120 +- src/query/filtseq.h | 2 +- src/query/sortseq.cpp | 26 +- src/query/sortseq.h | 4 +- src/query/wasaparse.cpp | 4 +- src/query/xadump.cpp | 302 +- src/rcldb/daterange.cpp | 62 +- src/rcldb/daterange.h | 2 +- src/rcldb/expansiondbs.cpp | 2 +- src/rcldb/expansiondbs.h | 10 +- src/rcldb/rcldb.h | 2 +- src/rcldb/rcldb_p.h | 10 +- src/rcldb/rcldups.cpp | 66 +- src/rcldb/rclquery.h | 2 +- src/rcldb/searchdata.cpp | 8 +- src/rcldb/searchdataxml.cpp | 148 +- src/rcldb/stemdb.cpp | 36 +- src/rcldb/stemdb.h | 14 +- src/rcldb/stoplist.cpp | 14 +- src/rcldb/synfamily.h | 68 +- src/rcldb/xmacros.h | 24 +- src/sampleconf/recoll.conf | 2 +- src/testmains/traspell.cpp | 106 +- src/testmains/trinternfile.cpp | 80 +- src/testmains/trmimetype.cpp | 24 +- src/testmains/trsyngroups.cpp | 8 +- src/testmains/trunac.cpp | 136 +- src/testmains/trwipedir.cpp | 26 +- src/unac/unac.c | 13282 ++++++++-------- src/unac/unac.h | 44 +- src/utils/base64.cpp | 384 +- src/utils/base64.h | 2 +- src/utils/cancelcheck.h | 8 +- src/utils/closefrom.cpp | 2 +- src/utils/cmdtalk.h | 22 +- src/utils/copyfile.h | 2 +- src/utils/cpuconf.cpp | 4 +- src/utils/cpuconf.h | 2 +- src/utils/ecrontab.cpp | 126 +- src/utils/ecrontab.h | 6 +- src/utils/fileudi.cpp | 18 +- src/utils/fstreewalk.h | 6 +- src/utils/hldata.h | 6 +- src/utils/idfile.cpp | 152 +- src/utils/md5.cpp | 314 +- src/utils/md5.h | 20 +- src/utils/md5ut.cpp | 10 +- src/utils/miniz.cpp | 66 +- src/utils/miniz.h | 14 +- src/utils/pathut.cpp | 2 +- src/utils/pxattr.cpp | 940 +- src/utils/pxattr.h | 32 +- src/utils/smallut.cpp | 22 +- src/utils/strmatcher.cpp | 6 +- src/utils/strmatcher.h | 14 +- src/utils/transcode.h | 6 +- src/utils/utf8iter.h | 264 +- src/utils/wipedir.cpp | 76 +- src/utils/workqueue.cpp | 44 +- src/utils/x11mon.cpp | 32 +- src/windows/fnmatch.c | 232 +- src/windows/fnmatch.h | 16 +- src/windows/rclstartw.cpp | 22 +- src/windows/strptime.cpp | 474 +- src/xaposix/safeunistd.h | 4 +- unac/getopt.c | 1090 +- unac/getopt.h | 32 +- unac/unac.c | 13282 ++++++++-------- unac/unac.h | 44 +- unac/unaccent.c | 60 +- unac/unactest.c | 2 +- unac/unactest1.c | 52 +- 167 files changed, 18868 insertions(+), 18868 deletions(-) diff --git a/src/aspell/rclaspell.h b/src/aspell/rclaspell.h index 241c70be..609c240f 100644 --- a/src/aspell/rclaspell.h +++ b/src/aspell/rclaspell.h @@ -64,7 +64,7 @@ class Aspell { /** Return a list of possible expansions for a given word */ bool suggest(Rcl::Db &db, const std::string& term, - std::list &suggestions, std::string &reason); + std::list &suggestions, std::string &reason); private: std::string dicPath(); diff --git a/src/bincimapmime/convert.h b/src/bincimapmime/convert.h index 5fbe0a9a..5c243d5b 100644 --- a/src/bincimapmime/convert.h +++ b/src/bincimapmime/convert.h @@ -93,25 +93,25 @@ namespace Binc { const char hexchars[] = "0123456789abcdef"; std::string tmp; for (std::string::const_iterator i = s.begin(); - i != s.end() && i + 1 != s.end(); i += 2) { + i != s.end() && i + 1 != s.end(); i += 2) { ptrdiff_t n; unsigned char c = *i; unsigned char d = *(i + 1); const char *t; if ((t = strchr(hexchars, c)) == 0) - return "out of range"; + return "out of range"; n = (t - hexchars) << 4; if ((t = strchr(hexchars, d)) == 0) - return "out of range"; + return "out of range"; n += (t - hexchars); if (n >= 0 && n <= 255) - tmp += (char) n; + tmp += (char) n; else - return "out of range"; + return "out of range"; } return tmp; @@ -123,7 +123,7 @@ namespace Binc { for (std::string::const_iterator i = s_in.begin(); i != s_in.end(); ++i) { unsigned char c = (unsigned char)*i; if (c <= 31 || c >= 127 || c == '\"' || c == '\\') - return "{" + toString((unsigned long)s_in.length()) + "}\r\n" + s_in; + return "{" + toString((unsigned long)s_in.length()) + "}\r\n" + s_in; } return "\"" + s_in + "\""; @@ -161,7 +161,7 @@ namespace Binc { //---------------------------------------------------------------------- inline const std::string unfold(const std::string &a, - bool removecomment = true) + bool removecomment = true) { std::string tmp; bool incomment = false; @@ -169,16 +169,16 @@ namespace Binc { for (std::string::const_iterator i = a.begin(); i != a.end(); ++i) { unsigned char c = (unsigned char)*i; if (!inquotes && removecomment) { - if (c == '(') { - incomment = true; - tmp += " "; - } else if (c == ')') { - incomment = false; - } else if (c != 0x0a && c != 0x0d) { - tmp += *i; + if (c == '(') { + incomment = true; + tmp += " "; + } else if (c == ')') { + incomment = false; + } else if (c != 0x0a && c != 0x0d) { + tmp += *i; } } else if (c != 0x0a && c != 0x0d) { - tmp += *i; + tmp += *i; } if (!incomment) { @@ -193,16 +193,16 @@ namespace Binc { //---------------------------------------------------------------------- inline void split(const std::string &s_in, const std::string &delim, - std::vector &dest, bool skipempty = true) + std::vector &dest, bool skipempty = true) { std::string token; for (std::string::const_iterator i = s_in.begin(); i != s_in.end(); ++i) { if (delim.find(*i) != std::string::npos) { - if (!skipempty || token != "") - dest.push_back(token); - token.clear(); + if (!skipempty || token != "") + dest.push_back(token); + token.clear(); } else - token += *i; + token += *i; } if (token != "") @@ -211,7 +211,7 @@ namespace Binc { //---------------------------------------------------------------------- inline void splitAddr(const std::string &s_in, - std::vector &dest, bool skipempty = true) + std::vector &dest, bool skipempty = true) { static const std::string delim = ","; std::string token; @@ -221,11 +221,11 @@ namespace Binc { else if (!inquote && *i == '\"') inquote = true; if (!inquote && delim.find(*i) != std::string::npos) { - if (!skipempty || token != "") - dest.push_back(token); - token.clear(); + if (!skipempty || token != "") + dest.push_back(token); + token.clear(); } else - token += *i; + token += *i; } if (token != "") dest.push_back(token); @@ -240,7 +240,7 @@ namespace Binc { std::string a = s_in.substr(0, 5); uppercase(a); return a == "INBOX" ? - a + (s_in.length() > 5 ? s_in.substr(5) : std::string()) : s_in; + a + (s_in.length() > 5 ? s_in.substr(5) : std::string()) : s_in; } return s_in; @@ -252,19 +252,19 @@ namespace Binc { std::string regex = "^"; for (std::string::const_iterator i = s_in.begin(); i != s_in.end(); ++i) { if (*i == '.' || *i == '[' || *i == ']' || *i == '{' || *i == '}' || - *i == '(' || *i == ')' || *i == '^' || *i == '$' || *i == '?' || - *i == '+' || *i == '\\') { - regex += "\\"; - regex += *i; + *i == '(' || *i == ')' || *i == '^' || *i == '$' || *i == '?' || + *i == '+' || *i == '\\') { + regex += "\\"; + regex += *i; } else if (*i == '*') - regex += ".*?"; + regex += ".*?"; else if (*i == '%') { regex += "(\\"; regex += delimiter; regex += "){0,1}"; - regex += "[^\\"; - regex += delimiter; - regex += "]*?"; + regex += "[^\\"; + regex += delimiter; + regex += "]*?"; } else regex += *i; } diff --git a/src/bincimapmime/mime-parsefull.cc b/src/bincimapmime/mime-parsefull.cc index 6efd3855..ca6bd27e 100644 --- a/src/bincimapmime/mime-parsefull.cc +++ b/src/bincimapmime/mime-parsefull.cc @@ -112,7 +112,7 @@ void Binc::MimeDocument::parseFull(istream& s) //------------------------------------------------------------------------ bool Binc::MimePart::parseOneHeaderLine(Binc::Header *header, - unsigned int *nlines) + unsigned int *nlines) { using namespace ::Binc; char c; @@ -127,7 +127,7 @@ bool Binc::MimePart::parseOneHeaderLine(Binc::Header *header, // start of the body. if (c == '\r') { for (int i = 0; i < (int) name.length() + 1; ++i) - mimeSource->ungetChar(); + mimeSource->ungetChar(); return false; } @@ -167,17 +167,17 @@ bool Binc::MimePart::parseOneHeaderLine(Binc::Header *header, // key,value pair. if (cqueue[2] == '\n' && c != ' ' && c != '\t') { if (content.length() > 2) - content.resize(content.length() - 2); + content.resize(content.length() - 2); trim(content); header->add(name, content); if (c != '\r') { - mimeSource->ungetChar(); - if (c == '\n') --*nlines; - return true; + mimeSource->ungetChar(); + if (c == '\n') --*nlines; + return true; } - + mimeSource->getChar(&c); return false; } @@ -203,8 +203,8 @@ void Binc::MimePart::parseHeader(Binc::Header *header, unsigned int *nlines) //------------------------------------------------------------------------ void Binc::MimePart::analyzeHeader(Binc::Header *header, bool *multipart, - bool *messagerfc822, string *subtype, - string *boundary) + bool *messagerfc822, string *subtype, + string *boundary) { using namespace ::Binc; @@ -228,43 +228,43 @@ void Binc::MimePart::analyzeHeader(Binc::Header *header, bool *multipart, lowercase(key); if (key == "multipart") { - *multipart = true; - lowercase(value); - *subtype = value; + *multipart = true; + lowercase(value); + *subtype = value; } else if (key == "message") { - lowercase(value); - if (value == "rfc822") - *messagerfc822 = true; + lowercase(value); + if (value == "rfc822") + *messagerfc822 = true; } } for (vector::const_iterator i = types.begin(); - i != types.end(); ++i) { + i != types.end(); ++i) { string element = *i; trim(element); if (element.find("=") != string::npos) { - string::size_type pos = element.find('='); - string key = element.substr(0, pos); - string value = element.substr(pos + 1); - - lowercase(key); - trim(key); + string::size_type pos = element.find('='); + string key = element.substr(0, pos); + string value = element.substr(pos + 1); + + lowercase(key); + trim(key); - if (key == "boundary") { - trim(value, " \""); - *boundary = value; - } + if (key == "boundary") { + trim(value, " \""); + *boundary = value; + } } } } } void Binc::MimePart::parseMessageRFC822(vector *members, - bool *foundendofpart, - unsigned int *bodylength, - unsigned int *nbodylines, - const string &toboundary) + bool *foundendofpart, + unsigned int *bodylength, + unsigned int *nbodylines, + const string &toboundary) { using namespace ::Binc; @@ -301,7 +301,7 @@ void Binc::MimePart::parseMessageRFC822(vector *members, } bool Binc::MimePart::skipUntilBoundary(const string &delimiter, - unsigned int *nlines, bool *eof) + unsigned int *nlines, bool *eof) { string::size_type endpos = delimiter.length(); char *delimiterqueue = 0; @@ -337,7 +337,7 @@ bool Binc::MimePart::skipUntilBoundary(const string &delimiter, delimiterpos = 0; if (compareStringToQueue(delimiterStr, delimiterqueue, - delimiterpos, int(endpos))) { + delimiterpos, int(endpos))) { foundBoundary = true; break; } @@ -354,9 +354,9 @@ bool Binc::MimePart::skipUntilBoundary(const string &delimiter, // and need to check if it is immediately followed by another boundary // (in this case, we give up our final CRLF in its favour) inline void Binc::MimePart::postBoundaryProcessing(bool *eof, - unsigned int *nlines, - int *boundarysize, - bool *foundendofpart) + unsigned int *nlines, + int *boundarysize, + bool *foundendofpart) { // Read two more characters. This may be CRLF, it may be "--" and // it may be any other two characters. @@ -381,16 +381,16 @@ inline void Binc::MimePart::postBoundaryProcessing(bool *eof, if (a == '-' && b == '-') { *foundendofpart = true; *boundarysize += 2; - + if (!mimeSource->getChar(&a)) - *eof = true; + *eof = true; if (a == '\n') - ++*nlines; - + ++*nlines; + if (!mimeSource->getChar(&b)) - *eof = true; + *eof = true; if (b == '\n') - ++*nlines; + ++*nlines; } // If the boundary is followed by CRLF, we need to handle the @@ -400,19 +400,19 @@ inline void Binc::MimePart::postBoundaryProcessing(bool *eof, if (a == '\r' && b == '\n') { // Get 2 more if (!mimeSource->getChar(&a) || !mimeSource->getChar(&b)) { - *eof = true; + *eof = true; } else if (a == '-' && b == '-') { - MPFDEB((stderr, "BINC: consecutive delimiters, giving up CRLF\n")); - mimeSource->ungetChar(); - mimeSource->ungetChar(); - mimeSource->ungetChar(); - mimeSource->ungetChar(); + MPFDEB((stderr, "BINC: consecutive delimiters, giving up CRLF\n")); + mimeSource->ungetChar(); + mimeSource->ungetChar(); + mimeSource->ungetChar(); + mimeSource->ungetChar(); } else { - // We unget the 2 chars, and keep our crlf (increasing our own size) - MPFDEB((stderr, "BINC: keeping my CRLF\n")); - mimeSource->ungetChar(); - mimeSource->ungetChar(); - *boundarysize += 2; + // We unget the 2 chars, and keep our crlf (increasing our own size) + MPFDEB((stderr, "BINC: keeping my CRLF\n")); + mimeSource->ungetChar(); + mimeSource->ungetChar(); + *boundarysize += 2; } } else { @@ -424,17 +424,17 @@ inline void Binc::MimePart::postBoundaryProcessing(bool *eof, } void Binc::MimePart::parseMultipart(const string &boundary, - const string &toboundary, - bool *eof, - unsigned int *nlines, - int *boundarysize, - bool *foundendofpart, - unsigned int *bodylength, - vector *members) + const string &toboundary, + bool *eof, + unsigned int *nlines, + int *boundarysize, + bool *foundendofpart, + unsigned int *bodylength, + vector *members) { MPFDEB((stderr, "BINC: ParseMultipart: boundary [%s], toboundary[%s]\n", - boundary.c_str(), - toboundary.c_str())); + boundary.c_str(), + toboundary.c_str())); using namespace ::Binc; unsigned int bodystartoffsetcrlf = mimeSource->getOffset(); @@ -462,8 +462,8 @@ void Binc::MimePart::parseMultipart(const string &boundary, // final boundary. int bsize = 0; if (m.doParseFull(mimeSource, boundary, bsize)) { - quit = true; - *boundarysize = bsize; + quit = true; + *boundarysize = bsize; } members->push_back(m); @@ -502,14 +502,14 @@ void Binc::MimePart::parseMultipart(const string &boundary, } void Binc::MimePart::parseSinglePart(const string &toboundary, - int *boundarysize, - unsigned int *nbodylines, - unsigned int *nlines, - bool *eof, bool *foundendofpart, - unsigned int *bodylength) + int *boundarysize, + unsigned int *nbodylines, + unsigned int *nlines, + bool *eof, bool *foundendofpart, + unsigned int *bodylength) { MPFDEB((stderr, "BINC: parseSinglePart, boundary [%s]\n", - toboundary.c_str())); + toboundary.c_str())); using namespace ::Binc; unsigned int bodystartoffsetcrlf = mimeSource->getOffset(); @@ -550,7 +550,7 @@ void Binc::MimePart::parseSinglePart(const string &toboundary, boundarypos = 0; if (compareStringToQueue(_toboundaryStr, boundaryqueue, - boundarypos, int(endpos))) { + boundarypos, int(endpos))) { *boundarysize = static_cast(_toboundary.length()); break; } @@ -580,12 +580,12 @@ void Binc::MimePart::parseSinglePart(const string &toboundary, *bodylength = 0; } MPFDEB((stderr, "BINC: parseSimple ret: bodylength %d, boundarysize %d\n", - *bodylength, *boundarysize)); + *bodylength, *boundarysize)); } //------------------------------------------------------------------------ int Binc::MimePart::doParseFull(MimeInputSource *ms, const string &toboundary, - int &boundarysize) + int &boundarysize) { MPFDEB((stderr, "BINC: doParsefull, toboundary[%s]\n", toboundary.c_str())); mimeSource = ms; @@ -610,15 +610,15 @@ int Binc::MimePart::doParseFull(MimeInputSource *ms, const string &toboundary, if (messagerfc822) { parseMessageRFC822(&members, &foundendofpart, &bodylength, - &nbodylines, toboundary); + &nbodylines, toboundary); } else if (multipart) { parseMultipart(boundary, toboundary, &eof, &nlines, &boundarysize, - &foundendofpart, &bodylength, - &members); + &foundendofpart, &bodylength, + &members); } else { parseSinglePart(toboundary, &boundarysize, &nbodylines, &nlines, - &eof, &foundendofpart, &bodylength); + &eof, &foundendofpart, &bodylength); } MPFDEB((stderr, "BINC: doParsefull ret, toboundary[%s]\n", toboundary.c_str())); diff --git a/src/bincimapmime/mime-parseonlyheader.cc b/src/bincimapmime/mime-parseonlyheader.cc index 1751eb68..00d208b8 100644 --- a/src/bincimapmime/mime-parseonlyheader.cc +++ b/src/bincimapmime/mime-parseonlyheader.cc @@ -107,27 +107,27 @@ int Binc::MimePart::doParseOnlyHeader(MimeInputSource *ms) // read name while (1) { if (!mimeSource->getChar(&c)) { - quit = true; - break; + quit = true; + break; } if (c == '\n') ++nlines; if (c == ':') break; if (c == '\n') { for (int i = int(name.length()) - 1; i >= 0; --i) - mimeSource->ungetChar(); + mimeSource->ungetChar(); - quit = true; - name.clear(); - break; + quit = true; + name.clear(); + break; } name += c; if (name.length() == 2 && name.substr(0, 2) == "\r\n") { - name.clear(); - quit = true; - break; + name.clear(); + quit = true; + break; } } @@ -140,36 +140,36 @@ int Binc::MimePart::doParseOnlyHeader(MimeInputSource *ms) while (!quit) { if (!mimeSource->getChar(&c)) { - quit = true; - break; + quit = true; + break; } if (c == '\n') ++nlines; for (int i = 0; i < 3; ++i) - cqueue[i] = cqueue[i + 1]; + cqueue[i] = cqueue[i + 1]; cqueue[3] = c; if (strncmp(cqueue, "\r\n\r\n", 4) == 0) { - quit = true; - break; + quit = true; + break; } if (cqueue[2] == '\n') { - // guess the mime rfc says what can not appear on the beginning - // of a line. - if (!isspace(cqueue[3])) { - if (content.length() > 2) - content.resize(content.length() - 2); + // guess the mime rfc says what can not appear on the beginning + // of a line. + if (!isspace(cqueue[3])) { + if (content.length() > 2) + content.resize(content.length() - 2); - trim(content); - h.add(name, content); + trim(content); + h.add(name, content); - name = c; - content.clear(); - break; - } + name = c; + content.clear(); + break; + } } content += c; diff --git a/src/bincimapmime/mime-printbody.cc b/src/bincimapmime/mime-printbody.cc index 11606147..e196ad9a 100644 --- a/src/bincimapmime/mime-printbody.cc +++ b/src/bincimapmime/mime-printbody.cc @@ -34,8 +34,8 @@ using namespace ::std; void Binc::MimePart::getBody(string &s, - unsigned int startoffset, - unsigned int length) const + unsigned int startoffset, + unsigned int length) const { mimeSource->reset(); mimeSource->seek(bodystartoffsetcrlf + startoffset); diff --git a/src/bincimapmime/mime-utils.h b/src/bincimapmime/mime-utils.h index 7bac437a..83a5d62e 100644 --- a/src/bincimapmime/mime-utils.h +++ b/src/bincimapmime/mime-utils.h @@ -35,7 +35,7 @@ using namespace ::std; #endif /* NO_NAMESPACES */ inline bool compareStringToQueue(const char *s_in, char *bqueue, - int pos, int size) + int pos, int size) { for (int i = 0; i < size; ++i) { if (s_in[i] != bqueue[pos]) diff --git a/src/bincimapmime/trbinc.cc b/src/bincimapmime/trbinc.cc index 1e71e187..bc51af6b 100644 --- a/src/bincimapmime/trbinc.cc +++ b/src/bincimapmime/trbinc.cc @@ -44,8 +44,8 @@ Usage(void) static int op_flags; #define OPT_MOINS 0x1 -#define OPT_s 0x2 -#define OPT_b 0x4 +#define OPT_s 0x2 +#define OPT_b 0x4 #define DEFCOUNT 10 @@ -60,31 +60,31 @@ int main(int argc, char **argv) argc--; argv++; while (argc > 0 && **argv == '-') { - (*argv)++; - if (!(**argv)) - /* Cas du "adb - core" */ - Usage(); - while (**argv) - switch (*(*argv)++) { - case 's': op_flags |= OPT_s; break; - case 'b': op_flags |= OPT_b; if (argc < 2) Usage(); - if ((sscanf(*(++argv), "%d", &count)) != 1) - Usage(); - argc--; - goto b1; - default: Usage(); break; - } + (*argv)++; + if (!(**argv)) + /* Cas du "adb - core" */ + Usage(); + while (**argv) + switch (*(*argv)++) { + case 's': op_flags |= OPT_s; break; + case 'b': op_flags |= OPT_b; if (argc < 2) Usage(); + if ((sscanf(*(++argv), "%d", &count)) != 1) + Usage(); + argc--; + goto b1; + default: Usage(); break; + } b1: argc--; argv++; } if (argc != 1) - Usage(); + Usage(); char *mfile = *argv++;argc--; int fd; if ((fd = open(mfile, 0)) < 0) { - perror("Opening"); - exit(1); + perror("Opening"); + exit(1); } Binc::MimeDocument doc; @@ -97,30 +97,30 @@ int main(int argc, char **argv) fprintf(stderr, "Size: %d\n", size); cp = (char *)malloc(size); if (cp==0) { - fprintf(stderr, "Malloc %d failed\n", size); - exit(1); + fprintf(stderr, "Malloc %d failed\n", size); + exit(1); } int n; if ((n=read(fd, cp, size)) != size) { - fprintf(stderr, "Read failed: requested %d, got %d\n", size, n); - exit(1); + fprintf(stderr, "Read failed: requested %d, got %d\n", size, n); + exit(1); } std::stringstream s(string(cp, size), ios::in); doc.parseFull(s); #endif if (!doc.isHeaderParsed() && !doc.isAllParsed()) { - fprintf(stderr, "Parse error\n"); - exit(1); + fprintf(stderr, "Parse error\n"); + exit(1); } close(fd); Binc::HeaderItem hi; for (int i = 0; i < nh ; i++) { - if (!doc.h.getFirstHeader(hnames[i], hi)) { - fprintf(stderr, "No %s\n", hnames[i]); - exit(1); - } - printf("%s: %s\n", hnames[i], hi.getValue().c_str()); + if (!doc.h.getFirstHeader(hnames[i], hi)) { + fprintf(stderr, "No %s\n", hnames[i]); + exit(1); + } + printf("%s: %s\n", hnames[i], hi.getValue().c_str()); } exit(0); } diff --git a/src/common/rclconfig.cpp b/src/common/rclconfig.cpp index a08e8585..663e9d0e 100644 --- a/src/common/rclconfig.cpp +++ b/src/common/rclconfig.cpp @@ -1571,7 +1571,7 @@ vector RclConfig::getSkippedPaths() const skpl.push_back(getDbDir()); skpl.push_back(getConfDir()); #ifdef _WIN32 - skpl.push_back(TempFile::rcltmpdir()); + skpl.push_back(TempFile::rcltmpdir()); #endif if (getCacheDir().compare(getConfDir())) { skpl.push_back(getCacheDir()); diff --git a/src/common/rclconfig.h b/src/common/rclconfig.h index 173f6d8b..6ac19be6 100644 --- a/src/common/rclconfig.h +++ b/src/common/rclconfig.h @@ -100,7 +100,7 @@ class RclConfig { RclConfig(const RclConfig &r); ~RclConfig() { - freeAll(); + freeAll(); } // Return a writable clone of the main config. This belongs to the @@ -136,9 +136,9 @@ class RclConfig { bool getConfParam(const string &name, string &value, bool shallow=false) const { - if (m_conf == 0) - return false; - return m_conf->get(name, value, m_keydir, shallow); + if (m_conf == 0) + return false; + return m_conf->get(name, value, m_keydir, shallow); } /** Variant with autoconversion to int */ bool getConfParam(const string &name, int *value, bool shallow=false) const; @@ -166,7 +166,7 @@ class RclConfig { */ vector getConfNames(const char *pattern = 0) const { - return m_conf->getNames(m_keydir, pattern); + return m_conf->getNames(m_keydir, pattern); } /** Check if name exists anywhere in config */ @@ -207,7 +207,7 @@ class RclConfig { /** Do path translation according to the ptrans table */ void urlrewrite(const string& dbdir, string& url) const; ConfSimple *getPTrans() { - return m_ptrans; + return m_ptrans; } /** Get Web Queue directory name */ string getWebQueueDir() const; @@ -221,7 +221,7 @@ class RclConfig { /** Get list of skipped paths patterns. Doesn't depend on the keydir */ vector getSkippedPaths() const; /** Get list of skipped paths patterns, daemon version (may add some) - Doesn't depend on the keydir */ + Doesn't depend on the keydir */ vector getDaemSkippedPaths() const; /** Return list of no content suffixes. Used by confgui, indexing uses @@ -260,7 +260,7 @@ class RclConfig { * @param whole the raw value. No way to escape a semi-colon in there. */ static bool valueSplitAttributes(const string& whole, string& value, - ConfSimple& attrs) ; + ConfSimple& attrs) ; /** Compute difference between 'base' and 'changed', as elements to be * added and substracted from base. Input and output strings are in @@ -315,7 +315,7 @@ class RclConfig { /** mimeview: get/set external viewer exec string(s) for mimetype(s) */ string getMimeViewerDef(const string &mimetype, const string& apptag, - bool useall) const; + bool useall) const; set getMimeViewerAllEx() const; bool setMimeViewerAllEx(const set& allex); bool getMimeViewerDefs(vector >&) const; @@ -348,21 +348,21 @@ class RclConfig { string findFilter(const string& cmd) const; /** Thread config init is not done automatically because not all - programs need it and it uses the debug log so that it's better to - call it after primary init */ + programs need it and it uses the debug log so that it's better to + call it after primary init */ void initThrConf(); const string& getOrigCwd() { - return o_origcwd; + return o_origcwd; } RclConfig& operator=(const RclConfig &r) { - if (this != &r) { - freeAll(); - initFrom(r); - } - return *this; + if (this != &r) { + freeAll(); + initFrom(r); + } + return *this; } friend class ParamStale; diff --git a/src/common/rclinit.cpp b/src/common/rclinit.cpp index ad7c7dc3..cba4b1bb 100644 --- a/src/common/rclinit.cpp +++ b/src/common/rclinit.cpp @@ -77,29 +77,29 @@ void initAsyncSigs(void (*sigcleanup)(int)) // Install app signal handler if (sigcleanup) { - struct sigaction action; - action.sa_handler = sigcleanup; - action.sa_flags = 0; - sigemptyset(&action.sa_mask); - for (unsigned int i = 0; i < sizeof(catchedSigs) / sizeof(int); i++) - if (signal(catchedSigs[i], SIG_IGN) != SIG_IGN) { - if (sigaction(catchedSigs[i], &action, 0) < 0) { - perror("Sigaction failed"); - } - } + struct sigaction action; + action.sa_handler = sigcleanup; + action.sa_flags = 0; + sigemptyset(&action.sa_mask); + for (unsigned int i = 0; i < sizeof(catchedSigs) / sizeof(int); i++) + if (signal(catchedSigs[i], SIG_IGN) != SIG_IGN) { + if (sigaction(catchedSigs[i], &action, 0) < 0) { + perror("Sigaction failed"); + } + } } // Install log rotate sig handler { - struct sigaction action; - action.sa_handler = siglogreopen; - action.sa_flags = 0; - sigemptyset(&action.sa_mask); - if (signal(SIGHUP, SIG_IGN) != SIG_IGN) { - if (sigaction(SIGHUP, &action, 0) < 0) { - perror("Sigaction failed"); - } - } + struct sigaction action; + action.sa_handler = siglogreopen; + action.sa_flags = 0; + sigemptyset(&action.sa_mask); + if (signal(SIGHUP, SIG_IGN) != SIG_IGN) { + if (sigaction(SIGHUP, &action, 0) < 0) { + perror("Sigaction failed"); + } + } } } void recoll_exitready() @@ -150,10 +150,10 @@ static BOOL WINAPI CtrlHandler(DWORD fdwCtrlType) { l_sigcleanup(SIGINT); LOGDEB0("CtrlHandler: waiting for exit ready\n" ); - DWORD res = WaitForSingleObject(eWorkFinished, INFINITE); - if (res != WAIT_OBJECT_0) { + DWORD res = WaitForSingleObject(eWorkFinished, INFINITE); + if (res != WAIT_OBJECT_0) { LOGERR("CtrlHandler: exit ack wait failed\n" ); - } + } LOGDEB0("CtrlHandler: got exit ready event, exiting\n" ); return TRUE; } @@ -242,14 +242,14 @@ void initAsyncSigs(void (*sigcleanup)(int)) // Install app signal handler if (sigcleanup) { l_sigcleanup = sigcleanup; - for (unsigned int i = 0; i < sizeof(catchedSigs) / sizeof(int); i++) { - if (signal(catchedSigs[i], SIG_IGN) != SIG_IGN) { - signal(catchedSigs[i], sigcleanup); - } + for (unsigned int i = 0; i < sizeof(catchedSigs) / sizeof(int); i++) { + if (signal(catchedSigs[i], SIG_IGN) != SIG_IGN) { + signal(catchedSigs[i], sigcleanup); + } } } - CreateThread(NULL, 0, RunInvisibleWindowThread, NULL, 0, &tid); + CreateThread(NULL, 0, RunInvisibleWindowThread, NULL, 0, &tid); SetConsoleCtrlHandler((PHANDLER_ROUTINE)CtrlHandler, TRUE); eWorkFinished = CreateEvent(NULL, TRUE, FALSE, NULL); if (eWorkFinished == INVALID_HANDLE_VALUE) { @@ -267,11 +267,11 @@ void recoll_exitready() #endif RclConfig *recollinit(int flags, - void (*cleanup)(void), void (*sigcleanup)(int), - string &reason, const string *argcnf) + void (*cleanup)(void), void (*sigcleanup)(int), + string &reason, const string *argcnf) { if (cleanup) - atexit(cleanup); + atexit(cleanup); #if defined(MACPORTS) || defined(HOMEBREW) // The MACPORTS and HOMEBREW flags are set by the resp. portfile @@ -303,12 +303,12 @@ RclConfig *recollinit(int flags, RclConfig *config = new RclConfig(argcnf); if (!config || !config->ok()) { - reason = "Configuration could not be built:\n"; - if (config) - reason += config->getReason(); - else - reason += "Out of memory ?"; - return 0; + reason = "Configuration could not be built:\n"; + if (config) + reason += config->getReason(); + else + reason += "Out of memory ?"; + return 0; } TextSplit::staticConfInit(config); @@ -318,8 +318,8 @@ RclConfig *recollinit(int flags, // ones. string logfilename, loglevel; if (flags & RCLINIT_DAEMON) { - config->getConfParam(string("daemlogfilename"), logfilename); - config->getConfParam(string("daemloglevel"), loglevel); + config->getConfParam(string("daemlogfilename"), logfilename); + config->getConfParam(string("daemloglevel"), loglevel); } if (flags & RCLINIT_IDX) { if (logfilename.empty()) { @@ -339,22 +339,22 @@ RclConfig *recollinit(int flags, } if (logfilename.empty()) - config->getConfParam(string("logfilename"), logfilename); + config->getConfParam(string("logfilename"), logfilename); if (loglevel.empty()) - config->getConfParam(string("loglevel"), loglevel); + config->getConfParam(string("loglevel"), loglevel); // Initialize logging if (!logfilename.empty()) { - logfilename = path_tildexpand(logfilename); - // If not an absolute path or stderr, compute relative to config dir. - if (!path_isabsolute(logfilename) && + logfilename = path_tildexpand(logfilename); + // If not an absolute path or stderr, compute relative to config dir. + if (!path_isabsolute(logfilename) && logfilename.compare("stderr")) { - logfilename = path_cat(config->getConfDir(), logfilename); - } + logfilename = path_cat(config->getConfDir(), logfilename); + } Logger::getTheLog("")->reopen(logfilename); } if (!loglevel.empty()) { - int lev = atoi(loglevel.c_str()); + int lev = atoi(loglevel.c_str()); Logger::getTheLog("")->setLogLevel(Logger::LogLevel(lev)); } LOGINF(Rcl::version_string() << " [" << config->getConfDir() << "]\n"); @@ -378,7 +378,7 @@ RclConfig *recollinit(int flags, // Init Unac translation exceptions string unacex; if (config->getConfParam("unac_except_trans", unacex) && !unacex.empty()) - unac_set_except_translations(unacex.c_str()); + unac_set_except_translations(unacex.c_str()); #ifndef IDX_THREADS ExecCmd::useVfork(true); @@ -393,23 +393,23 @@ RclConfig *recollinit(int flags, bool novfork; config->getConfParam("novfork", &novfork); if (novfork) { - LOGDEB0("rclinit: will use fork() for starting commands\n" ); + LOGDEB0("rclinit: will use fork() for starting commands\n" ); ExecCmd::useVfork(false); } else { - LOGDEB0("rclinit: will use vfork() for starting commands\n" ); - ExecCmd::useVfork(true); + LOGDEB0("rclinit: will use vfork() for starting commands\n" ); + ExecCmd::useVfork(true); } #endif int flushmb; if (config->getConfParam("idxflushmb", &flushmb) && flushmb > 0) { - LOGDEB1("rclinit: idxflushmb=" << flushmb << + LOGDEB1("rclinit: idxflushmb=" << flushmb << ", set XAPIAN_FLUSH_THRESHOLD to 10E6\n"); - static const char *cp = "XAPIAN_FLUSH_THRESHOLD=1000000"; + static const char *cp = "XAPIAN_FLUSH_THRESHOLD=1000000"; #ifdef PUTENV_ARG_CONST - ::putenv(cp); + ::putenv(cp); #else - ::putenv(strdup(cp)); + ::putenv(strdup(cp)); #endif } @@ -425,7 +425,7 @@ void recoll_threadinit() sigemptyset(&sset); for (unsigned int i = 0; i < sizeof(catchedSigs) / sizeof(int); i++) - sigaddset(&sset, catchedSigs[i]); + sigaddset(&sset, catchedSigs[i]); sigaddset(&sset, SIGHUP); pthread_sigmask(SIG_BLOCK, &sset, 0); #else diff --git a/src/common/syngroups.cpp b/src/common/syngroups.cpp index 226ceaf1..5bd33f2b 100644 --- a/src/common/syngroups.cpp +++ b/src/common/syngroups.cpp @@ -99,7 +99,7 @@ bool SynGroups::setfile(const string& fn) if (fn.empty()) { delete m; m = 0; - return true; + return true; } if (m->samefile(fn)) { @@ -111,9 +111,9 @@ bool SynGroups::setfile(const string& fn) ifstream input; input.open(fn.c_str(), ios::in); if (!input.is_open()) { - LOGSYSERR("SynGroups:setfile", "open", fn); - return false; - } + LOGSYSERR("SynGroups:setfile", "open", fn); + return false; + } string cline; bool appending = false; @@ -123,18 +123,18 @@ bool SynGroups::setfile(const string& fn) for (;;) { cline.clear(); - getline(input, cline); - if (!input.good()) { - if (input.bad()) { + getline(input, cline); + if (!input.good()) { + if (input.bad()) { LOGERR("Syngroup::setfile(" << fn << "):Parse: input.bad()\n"); - return false; - } - // Must be eof ? But maybe we have a partial line which - // must be processed. This happens if the last line before - // eof ends with a backslash, or there is no final \n + return false; + } + // Must be eof ? But maybe we have a partial line which + // must be processed. This happens if the last line before + // eof ends with a backslash, or there is no final \n eof = true; - } - lnum++; + } + lnum++; { string::size_type pos = cline.find_last_not_of("\n\r"); @@ -145,46 +145,46 @@ bool SynGroups::setfile(const string& fn) } } - if (appending) - line += cline; - else - line = cline; + if (appending) + line += cline; + else + line = cline; - // Note that we trim whitespace before checking for backslash-eol - // This avoids invisible whitespace problems. - trimstring(line); - if (line.empty() || line.at(0) == '#') { + // Note that we trim whitespace before checking for backslash-eol + // This avoids invisible whitespace problems. + trimstring(line); + if (line.empty() || line.at(0) == '#') { if (eof) break; - continue; - } - if (line[line.length() - 1] == '\\') { - line.erase(line.length() - 1); - appending = true; - continue; - } - appending = false; + continue; + } + if (line[line.length() - 1] == '\\') { + line.erase(line.length() - 1); + appending = true; + continue; + } + appending = false; - vector words; - if (!stringToStrings(line, words)) { - LOGERR("SynGroups:setfile: " << fn << ": bad line " << lnum << + vector words; + if (!stringToStrings(line, words)) { + LOGERR("SynGroups:setfile: " << fn << ": bad line " << lnum << ": " << line << "\n"); - continue; - } + continue; + } - if (words.empty()) - continue; - if (words.size() == 1) { - LOGERR("Syngroup::setfile(" << fn << "):single term group at line " + if (words.empty()) + continue; + if (words.size() == 1) { + LOGERR("Syngroup::setfile(" << fn << "):single term group at line " << lnum << " ??\n"); - continue; - } + continue; + } - m->groups.push_back(words); - for (const auto& word : words) { - m->terms[word] = m->groups.size()-1; - } - LOGDEB1("SynGroups::setfile: group: [" << + m->groups.push_back(words); + for (const auto& word : words) { + m->terms[word] = m->groups.size()-1; + } + LOGDEB1("SynGroups::setfile: group: [" << stringsToString(m->groups.back()) << "]\n"); } LOGDEB("SynGroups::setfile: got " << m->groups.size() << @@ -198,12 +198,12 @@ vector SynGroups::getgroup(const string& term) { vector ret; if (!ok()) - return ret; + return ret; const auto it1 = m->terms.find(term); if (it1 == m->terms.end()) { - LOGDEB0("SynGroups::getgroup: [" << term << "] not found in map\n"); - return ret; + LOGDEB0("SynGroups::getgroup: [" << term << "] not found in map\n"); + return ret; } unsigned int idx = it1->second; diff --git a/src/common/textsplitko.cpp b/src/common/textsplitko.cpp index 4e72c68f..83006f4d 100644 --- a/src/common/textsplitko.cpp +++ b/src/common/textsplitko.cpp @@ -113,9 +113,9 @@ static bool initCmd() #define STRSZT std::string::size_type #define ISASCIIPUNCTORCTL(c) (c <= 0x7f && \ - !((c >= 'A' && c <= 'Z') || \ - (c >= 'a' && c <= 'z') || \ - (c >= '0' && c <= '9'))) + !((c >= 'A' && c <= 'Z') || \ + (c >= 'a' && c <= 'z') || \ + (c >= '0' && c <= '9'))) bool TextSplit::ko_to_words(Utf8Iter *itp, unsigned int *cp) { @@ -153,10 +153,10 @@ bool TextSplit::ko_to_words(Utf8Iter *itp, unsigned int *cp) for (; !it.eof() && !it.error(); it++) { c = *it; if (!isHANGUL(c) && !ISASCIIPUNCTORCTL(c)) { - // Non-Korean: we keep on if encountering space and other - // ASCII punctuation. Allows sending longer pieces of text - // to the splitter (perf). Else break, process this piece, - // and return to the main splitter + // Non-Korean: we keep on if encountering space and other + // ASCII punctuation. Allows sending longer pieces of text + // to the splitter (perf). Else break, process this piece, + // and return to the main splitter LOGKO("ko_to_words: broke on " << (std::string)it << endl); break; } else { @@ -194,7 +194,7 @@ bool TextSplit::ko_to_words(Utf8Iter *itp, unsigned int *cp) } LOGKO("TextSplit::k_to_words: sending out " << inputdata.size() << - " bytes " << inputdata << endl); + " bytes " << inputdata << endl); // Overall data counter for slave restarts restartcount += inputdata.size(); diff --git a/src/common/unacpp.cpp b/src/common/unacpp.cpp index 4b7c1714..f0103f7b 100644 --- a/src/common/unacpp.cpp +++ b/src/common/unacpp.cpp @@ -29,7 +29,7 @@ using namespace std; bool unacmaybefold(const string &in, string &out, - const char *encoding, UnacOp what) + const char *encoding, UnacOp what) { char *cout = 0; size_t out_len; diff --git a/src/common/unacpp.h b/src/common/unacpp.h index 1152c85d..069d4b0f 100644 --- a/src/common/unacpp.h +++ b/src/common/unacpp.h @@ -22,7 +22,7 @@ // A small stringified wrapper for unac.c enum UnacOp {UNACOP_UNAC = 1, UNACOP_FOLD = 2, UNACOP_UNACFOLD = 3}; extern bool unacmaybefold(const std::string& in, std::string& out, - const char *encoding, UnacOp what); + const char *encoding, UnacOp what); // Utility function to determine if string begins with capital extern bool unaciscapital(const std::string& in); diff --git a/src/common/uproplist.h b/src/common/uproplist.h index 70ee946a..2e4d9e54 100644 --- a/src/common/uproplist.h +++ b/src/common/uproplist.h @@ -64,13 +64,13 @@ static const unsigned unipuncblocks[] = { 0x2600, 0x26FF, // Dingbats 0x2700, 0x27BF, - // Miscellaneous Mathematical Symbols-A + // Miscellaneous Mathematical Symbols-A 0x27C0, 0x27EF, // Supplemental Arrows-A 0x27F0, 0x27FF, // Supplemental Arrows-B 0x2900, 0x297F, - // Miscellaneous Mathematical Symbols-B + // Miscellaneous Mathematical Symbols-B 0x2980, 0x29FF, // Supplemental Mathematical Operators 0x2A00, 0x2AFF, @@ -170,7 +170,7 @@ static const unsigned int uniskip[] = { 0x200C, /* ZERO WIDTH NON-JOINER */ 0x200D, /* ZERO WIDTH JOINER */ 0x2060, /* WORD JOINER . Actually this should not be ignored but used to - * prevent a word break... */ + * prevent a word break... */ }; /* Things that would visibly break a block of text, rendering obvious the need diff --git a/src/common/utf8fn.cpp b/src/common/utf8fn.cpp index 75b2b60f..304f07c9 100644 --- a/src/common/utf8fn.cpp +++ b/src/common/utf8fn.cpp @@ -37,10 +37,10 @@ string compute_utf8fn(const RclConfig *config, const string& ifn, bool simple) string utf8fn; int ercnt; if (!transcode(lfn, utf8fn, charset, "UTF-8", &ercnt)) { - LOGERR("compute_utf8fn: fn transcode failure from [" << charset << + LOGERR("compute_utf8fn: fn transcode failure from [" << charset << "] to UTF-8 for: [" << lfn << "]\n"); } else if (ercnt) { - LOGDEB("compute_utf8fn: " << ercnt << " transcode errors from [" << + LOGDEB("compute_utf8fn: " << ercnt << " transcode errors from [" << charset << "] to UTF-8 for: [" << lfn << "]\n"); } LOGDEB1("compute_utf8fn: transcoded from [" << lfn << "] to [" << diff --git a/src/common/webstore.cpp b/src/common/webstore.cpp index eaa67bf3..5a070f6d 100644 --- a/src/common/webstore.cpp +++ b/src/common/webstore.cpp @@ -37,15 +37,15 @@ WebStore::WebStore(RclConfig *cnf) int maxmbs = 40; cnf->getConfParam("webcachemaxmbs", &maxmbs); if ((m_cache = new CirCache(ccdir)) == 0) { - LOGERR("WebStore: cant create CirCache object\n" ); - return; + LOGERR("WebStore: cant create CirCache object\n" ); + return; } if (!m_cache->create(int64_t(maxmbs)*1000*1024, CirCache::CC_CRUNIQUE)) { - LOGERR("WebStore: cache file creation failed: " << + LOGERR("WebStore: cache file creation failed: " << m_cache->getReason() << "\n"); - delete m_cache; - m_cache = 0; - return; + delete m_cache; + m_cache = 0; + return; } } @@ -57,17 +57,17 @@ WebStore::~WebStore() // Read document from cache. Return the metadata as an Rcl::Doc // @param htt Web Hit Type bool WebStore::getFromCache(const string& udi, Rcl::Doc &dotdoc, - string& data, string *htt) + string& data, string *htt) { string dict; if (m_cache == 0) { - LOGERR("WebStore::getFromCache: cache is null\n"); - return false; + LOGERR("WebStore::getFromCache: cache is null\n"); + return false; } if (!m_cache->get(udi, dict, &data)) { - LOGDEB("WebStore::getFromCache: get failed\n"); - return false; + LOGDEB("WebStore::getFromCache: get failed\n"); + return false; } ConfSimple cf(dict, 1); diff --git a/src/index/fetcher.cpp b/src/index/fetcher.cpp index f5cc4dd0..9533fd3a 100644 --- a/src/index/fetcher.cpp +++ b/src/index/fetcher.cpp @@ -35,17 +35,17 @@ std::unique_ptr docFetcherMake(RclConfig *config, string backend; idoc.getmeta(Rcl::Doc::keybcknd, &backend); if (backend.empty() || !backend.compare("FS")) { - return std::unique_ptr(new FSDocFetcher); + return std::unique_ptr(new FSDocFetcher); #ifndef DISABLE_WEB_INDEXER } else if (!backend.compare("BGL")) { - return std::unique_ptr(new WQDocFetcher); + return std::unique_ptr(new WQDocFetcher); #endif } else { std::unique_ptr f(exeDocFetcherMake(config, backend)); if (!f) { LOGERR("DocFetcherFactory: unknown backend [" << backend << "]\n"); } - return f; + return f; } } diff --git a/src/index/idxstatus.h b/src/index/idxstatus.h index bd5b20a3..6414f9aa 100644 --- a/src/index/idxstatus.h +++ b/src/index/idxstatus.h @@ -25,9 +25,9 @@ class DbIxStatus { public: enum Phase {DBIXS_NONE, - DBIXS_FILES, DBIXS_PURGE, DBIXS_STEMDB, DBIXS_CLOSING, - DBIXS_MONITOR, - DBIXS_DONE}; + DBIXS_FILES, DBIXS_PURGE, DBIXS_STEMDB, DBIXS_CLOSING, + DBIXS_MONITOR, + DBIXS_DONE}; Phase phase; std::string fn; // Last file processed int docsdone; // Documents actually updated @@ -43,9 +43,9 @@ class DbIxStatus { bool hasmonitor{false}; void reset() { - phase = DBIXS_FILES; - fn.erase(); - docsdone = filesdone = fileerrors = dbtotdocs = totfiles = 0; + phase = DBIXS_FILES; + fn.erase(); + docsdone = filesdone = fileerrors = dbtotdocs = totfiles = 0; } DbIxStatus() {reset();} }; diff --git a/src/index/indexer.cpp b/src/index/indexer.cpp index 26f6b900..59044276 100644 --- a/src/index/indexer.cpp +++ b/src/index/indexer.cpp @@ -132,14 +132,14 @@ bool ConfIndexer::runFirstIndexing() { // Indexing status file existing and not empty ? if (path_filesize(m_config->getIdxStatusFile()) > 0) { - LOGDEB0("ConfIndexer::runFirstIndexing: no: status file not empty\n"); - return false; + LOGDEB0("ConfIndexer::runFirstIndexing: no: status file not empty\n"); + return false; } // And only do this if the user has kept the default topdirs (~). vector tdl = m_config->getTopdirs(); if (tdl.size() != 1 || tdl[0].compare(path_canon(path_tildexpand("~")))) { - LOGDEB0("ConfIndexer::runFirstIndexing: no: not home only\n"); - return false; + LOGDEB0("ConfIndexer::runFirstIndexing: no: not home only\n"); + return false; } return true; } @@ -150,7 +150,7 @@ bool ConfIndexer::firstFsIndexingSequence() deleteZ(m_fsindexer); m_fsindexer = new FsIndexer(m_config, &m_db, m_updater); if (!m_fsindexer) { - return false; + return false; } int flushmb = m_db.getFlushMb(); m_db.setFlushMb(2); @@ -164,17 +164,17 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun, int flags) { Rcl::Db::OpenMode mode = resetbefore ? Rcl::Db::DbTrunc : Rcl::Db::DbUpd; if (!m_db.open(mode)) { - LOGERR("ConfIndexer: error opening database " << m_config->getDbDir() << + LOGERR("ConfIndexer: error opening database " << m_config->getDbDir() << " : " << m_db.getReason() << "\n"); addIdxReason("indexer", m_db.getReason()); - return false; + return false; } m_config->setKeyDir(cstr_null); if (typestorun & IxTFs) { - if (runFirstIndexing()) { - firstFsIndexingSequence(); - } + if (runFirstIndexing()) { + firstFsIndexingSequence(); + } deleteZ(m_fsindexer); m_fsindexer = new FsIndexer(m_config, &m_db, m_updater); if (!m_fsindexer || !m_fsindexer->index(flags)) { @@ -183,7 +183,7 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun, int flags) } else { addIdxReason("indexer", "Index creation failed. See log."); } - m_db.close(); + m_db.close(); return false; } } @@ -193,7 +193,7 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun, int flags) deleteZ(m_webindexer); m_webindexer = new WebQueueIndexer(m_config, &m_db, m_updater); if (!m_webindexer || !m_webindexer->index()) { - m_db.close(); + m_db.close(); addIdxReason("indexer", "Web index creation failed. See log"); return false; } @@ -203,10 +203,10 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun, int flags) // Get rid of all database entries that don't exist in the // filesystem anymore. Only if all *configured* indexers ran. if (m_updater && !m_updater->update(DbIxStatus::DBIXS_PURGE, "")) { - m_db.close(); + m_db.close(); addIdxReason("indexer", "Index purge failed. See log"); - return false; - } + return false; + } m_db.purge(); } @@ -214,22 +214,22 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun, int flags) // here. Makes no sense to check for cancel, we'll have to close // anyway if (m_updater) - m_updater->update(DbIxStatus::DBIXS_CLOSING, string()); + m_updater->update(DbIxStatus::DBIXS_CLOSING, string()); if (!m_db.close()) { - LOGERR("ConfIndexer::index: error closing database in " << + LOGERR("ConfIndexer::index: error closing database in " << m_config->getDbDir() << "\n"); addIdxReason("indexer", "Index close/flush failed. See log"); - return false; + return false; } if (m_updater && !m_updater->update(DbIxStatus::DBIXS_CLOSING, string())) - return false; + return false; bool ret = true; if (!createStemmingDatabases()) { - ret = false; + ret = false; } if (m_updater && !m_updater->update(DbIxStatus::DBIXS_CLOSING, string())) - return false; + return false; // Don't fail indexing because of an aspell issue: we ignore the status. // Messages were written to the reasons output @@ -237,7 +237,7 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun, int flags) clearMimeHandlerCache(); if (m_updater) - m_updater->update(DbIxStatus::DBIXS_DONE, string()); + m_updater->update(DbIxStatus::DBIXS_DONE, string()); return ret; } @@ -246,14 +246,14 @@ bool ConfIndexer::indexFiles(list& ifiles, int flag) list myfiles; string origcwd = m_config->getOrigCwd(); for (const auto& entry : ifiles) { - myfiles.push_back(path_canon(entry, &origcwd)); + myfiles.push_back(path_canon(entry, &origcwd)); } myfiles.sort(); if (!m_db.open(Rcl::Db::DbUpd)) { - LOGERR("ConfIndexer: indexFiles error opening database " << + LOGERR("ConfIndexer: indexFiles error opening database " << m_config->getDbDir() << "\n"); - return false; + return false; } m_config->setKeyDir(cstr_null); bool ret = false; @@ -280,9 +280,9 @@ bool ConfIndexer::indexFiles(list& ifiles, int flag) } // The close would be done in our destructor, but we want status here if (!m_db.close()) { - LOGERR("ConfIndexer::index: error closing database in " << + LOGERR("ConfIndexer::index: error closing database in " << m_config->getDbDir() << "\n"); - return false; + return false; } ifiles = myfiles; clearMimeHandlerCache(); @@ -297,7 +297,7 @@ bool ConfIndexer::updateDocs(vector &docs, IxFlag flag) docsToPaths(docs, paths); list files(paths.begin(), paths.end()); if (!files.empty()) { - return indexFiles(files, flag); + return indexFiles(files, flag); } return true; } @@ -307,14 +307,14 @@ bool ConfIndexer::purgeFiles(list &files, int flag) list myfiles; string origcwd = m_config->getOrigCwd(); for (const auto& entry : files) { - myfiles.push_back(path_canon(entry, &origcwd)); + myfiles.push_back(path_canon(entry, &origcwd)); } myfiles.sort(); if (!m_db.open(Rcl::Db::DbUpd)) { - LOGERR("ConfIndexer: purgeFiles error opening database " << + LOGERR("ConfIndexer: purgeFiles error opening database " << m_config->getDbDir() << "\n"); - return false; + return false; } bool ret = false; m_config->setKeyDir(cstr_null); @@ -337,9 +337,9 @@ bool ConfIndexer::purgeFiles(list &files, int flag) // The close would be done in our destructor, but we want status here if (!m_db.close()) { - LOGERR("ConfIndexer::purgefiles: error closing database in " << + LOGERR("ConfIndexer::purgefiles: error closing database in " << m_config->getDbDir() << "\n"); - return false; + return false; } return ret; } @@ -356,18 +356,18 @@ bool ConfIndexer::createStemmingDatabases() addIdxReason("stemming", "could not open db"); return false; } - vector langs; - stringToStrings(slangs, langs); + vector langs; + stringToStrings(slangs, langs); - // Get the list of existing stem dbs from the database (some may have - // been manually created, we just keep those from the config - vector dblangs = m_db.getStemLangs(); - vector::const_iterator it; - for (it = dblangs.begin(); it != dblangs.end(); it++) { - if (find(langs.begin(), langs.end(), *it) == langs.end()) - m_db.deleteStemDb(*it); - } - ret = ret && m_db.createStemDbs(langs); + // Get the list of existing stem dbs from the database (some may have + // been manually created, we just keep those from the config + vector dblangs = m_db.getStemLangs(); + vector::const_iterator it; + for (it = dblangs.begin(); it != dblangs.end(); it++) { + if (find(langs.begin(), langs.end(), *it) == langs.end()) + m_db.deleteStemDb(*it); + } + ret = ret && m_db.createStemDbs(langs); if (!ret) { addIdxReason("stemming", "stem db creation failed"); } @@ -379,7 +379,7 @@ bool ConfIndexer::createStemmingDatabases() bool ConfIndexer::createStemDb(const string &lang) { if (!m_db.open(Rcl::Db::DbUpd)) - return false; + return false; vector langs; stringToStrings(lang, langs); return m_db.createStemDbs(langs); @@ -397,32 +397,32 @@ bool ConfIndexer::createAspellDict() // it forever. static int noaspell = -12345; if (noaspell == -12345) { - noaspell = false; - m_config->getConfParam("noaspell", &noaspell); + noaspell = false; + m_config->getConfParam("noaspell", &noaspell); } if (noaspell) - return true; + return true; if (!m_db.open(Rcl::Db::DbRO)) { LOGERR("ConfIndexer::createAspellDict: could not open db\n"); - return false; + return false; } Aspell aspell(m_config); string reason; if (!aspell.init(reason)) { - LOGERR("ConfIndexer::createAspellDict: aspell init failed: " << + LOGERR("ConfIndexer::createAspellDict: aspell init failed: " << reason << "\n"); - noaspell = true; - return false; + noaspell = true; + return false; } LOGDEB("ConfIndexer::createAspellDict: creating dictionary\n"); if (!aspell.buildDict(m_db, reason)) { - LOGERR("ConfIndexer::createAspellDict: aspell buildDict failed: " << + LOGERR("ConfIndexer::createAspellDict: aspell buildDict failed: " << reason << "\n"); addIdxReason("aspell", reason); - noaspell = true; - return false; + noaspell = true; + return false; } #endif return true; diff --git a/src/index/indexer.h b/src/index/indexer.h index 16f09be1..a660edc4 100644 --- a/src/index/indexer.h +++ b/src/index/indexer.h @@ -45,7 +45,7 @@ class DbIxStatusUpdater { virtual bool update(DbIxStatus::Phase phase, const string& fn) { #ifdef IDX_THREADS - std::unique_lock lock(m_mutex); + std::unique_lock lock(m_mutex); #endif status.phase = phase; status.fn = fn; diff --git a/src/index/mimetype.cpp b/src/index/mimetype.cpp index b20c55df..3fc8a9f3 100644 --- a/src/index/mimetype.cpp +++ b/src/index/mimetype.cpp @@ -55,11 +55,11 @@ static string mimetypefromdata(RclConfig *cfg, const string &fn, bool usfc) #ifdef USE_SYSTEM_FILE_COMMAND if (usfc && mime.empty()) { - // Last resort: use "file -i", or its configured replacement. + // Last resort: use "file -i", or its configured replacement. // 'file' fallback if the configured command (default: // xdg-mime) is not found - static const vector tradfilecmd = {{FILE_PROG}, {"-i"}}; + static const vector tradfilecmd = {{FILE_PROG}, {"-i"}}; vector cmd; string scommand; @@ -81,55 +81,55 @@ static string mimetypefromdata(RclConfig *cfg, const string &fn, bool usfc) cmd = tradfilecmd; } - string result; + string result; LOGDEB2("mimetype: executing: [" << stringsToString(cmd) << "]\n"); - if (!ExecCmd::backtick(cmd, result)) { - LOGERR("mimetypefromdata: exec " << + if (!ExecCmd::backtick(cmd, result)) { + LOGERR("mimetypefromdata: exec " << stringsToString(cmd) << " failed\n"); - return string(); - } - trimstring(result, " \t\n\r"); - LOGDEB2("mimetype: systemfilecommand output [" << result << "]\n"); - - // The normal output from "file -i" looks like the following: - // thefilename.xxx: text/plain; charset=us-ascii - // Sometimes the semi-colon is missing like in: - // mimetype.cpp: text/x-c charset=us-ascii - // And sometimes we only get the mime type. This apparently happens - // when 'file' believes that the file name is binary + return string(); + } + trimstring(result, " \t\n\r"); + LOGDEB2("mimetype: systemfilecommand output [" << result << "]\n"); + + // The normal output from "file -i" looks like the following: + // thefilename.xxx: text/plain; charset=us-ascii + // Sometimes the semi-colon is missing like in: + // mimetype.cpp: text/x-c charset=us-ascii + // And sometimes we only get the mime type. This apparently happens + // when 'file' believes that the file name is binary // xdg-mime only outputs the MIME type. - // If there is no colon and there is a slash, this is hopefuly - // the mime type - if (result.find_first_of(":") == string::npos && - result.find_first_of("/") != string::npos) { - return result; - } + // If there is no colon and there is a slash, this is hopefuly + // the mime type + if (result.find_first_of(":") == string::npos && + result.find_first_of("/") != string::npos) { + return result; + } - // Else the result should begin with the file name. Get rid of it: - if (result.find(fn) != 0) { - // Garbage "file" output. Maybe the result of a charset - // conversion attempt? - LOGERR("mimetype: can't interpret output from [" << + // Else the result should begin with the file name. Get rid of it: + if (result.find(fn) != 0) { + // Garbage "file" output. Maybe the result of a charset + // conversion attempt? + LOGERR("mimetype: can't interpret output from [" << stringsToString(cmd) << "] : [" << result << "]\n"); - return string(); - } - result = result.substr(fn.size()); + return string(); + } + result = result.substr(fn.size()); - // Now should look like ": text/plain; charset=us-ascii" - // Split it, and take second field - list res; - stringToStrings(result, res); - if (res.size() <= 1) - return string(); - list::iterator it = res.begin(); - mime = *++it; - // Remove possible semi-colon at the end - trimstring(mime, " \t;"); + // Now should look like ": text/plain; charset=us-ascii" + // Split it, and take second field + list res; + stringToStrings(result, res); + if (res.size() <= 1) + return string(); + list::iterator it = res.begin(); + mime = *++it; + // Remove possible semi-colon at the end + trimstring(mime, " \t;"); - // File -i will sometimes return strange stuff (ie: "very small file") - if(mime.find("/") == string::npos) - mime.clear(); + // File -i will sometimes return strange stuff (ie: "very small file") + if(mime.find("/") == string::npos) + mime.clear(); } #endif //USE_SYSTEM_FILE_COMMAND @@ -139,24 +139,24 @@ static string mimetypefromdata(RclConfig *cfg, const string &fn, bool usfc) /// Guess mime type, first from suffix, then from file data. We also /// have a list of suffixes that we don't touch at all. string mimetype(const string &fn, const struct PathStat *stp, - RclConfig *cfg, bool usfc) + RclConfig *cfg, bool usfc) { // Use stat data if available to check for non regular files if (stp) { - // Note: the value used for directories is different from what - // file -i would print on Linux (inode/directory). Probably - // comes from bsd. Thos may surprise a user trying to use a - // 'mime:' filter with the query language, but it's not work - // changing (would force a reindex). - if (stp->pst_type == PathStat::PST_DIR) - return "inode/directory"; - if (stp->pst_type == PathStat::PST_SYMLINK) - return "inode/symlink"; - if (stp->pst_type != PathStat::PST_REGULAR) - return "inode/x-fsspecial"; - // Empty files are just this: avoid further errors with actual filters. - if (stp->pst_size == 0) - return "inode/x-empty"; + // Note: the value used for directories is different from what + // file -i would print on Linux (inode/directory). Probably + // comes from bsd. Thos may surprise a user trying to use a + // 'mime:' filter with the query language, but it's not work + // changing (would force a reindex). + if (stp->pst_type == PathStat::PST_DIR) + return "inode/directory"; + if (stp->pst_type == PathStat::PST_SYMLINK) + return "inode/symlink"; + if (stp->pst_type != PathStat::PST_REGULAR) + return "inode/x-fsspecial"; + // Empty files are just this: avoid further errors with actual filters. + if (stp->pst_size == 0) + return "inode/x-empty"; } string mtype; @@ -165,40 +165,40 @@ string mimetype(const string &fn, const struct PathStat *stp, // Extended attribute has priority on everything, as per: // http://freedesktop.org/wiki/CommonExtendedAttributes if (pxattr::get(fn, "mime_type", &mtype)) { - LOGDEB0("Mimetype: 'mime_type' xattr : [" << mtype << "]\n"); - if (mtype.empty()) { - LOGDEB0("Mimetype: getxattr() returned empty mime type !\n"); - } else { - return mtype; - } + LOGDEB0("Mimetype: 'mime_type' xattr : [" << mtype << "]\n"); + if (mtype.empty()) { + LOGDEB0("Mimetype: getxattr() returned empty mime type !\n"); + } else { + return mtype; + } } #endif if (cfg == 0) { - LOGERR("Mimetype: null config ??\n"); - return mtype; + LOGERR("Mimetype: null config ??\n"); + return mtype; } if (cfg->inStopSuffixes(fn)) { - LOGDEB("mimetype: fn [" << fn << "] in stopsuffixes\n"); - return mtype; + LOGDEB("mimetype: fn [" << fn << "] in stopsuffixes\n"); + return mtype; } // Compute file name suffix and search the mimetype map string::size_type dot = fn.find_first_of("."); while (dot != string::npos) { - string suff = stringtolower(fn.substr(dot)); - mtype = cfg->getMimeTypeFromSuffix(suff); - if (!mtype.empty() || dot >= fn.size() - 1) - break; - dot = fn.find_first_of(".", dot + 1); + string suff = stringtolower(fn.substr(dot)); + mtype = cfg->getMimeTypeFromSuffix(suff); + if (!mtype.empty() || dot >= fn.size() - 1) + break; + dot = fn.find_first_of(".", dot + 1); } // If type was not determined from suffix, examine file data. Can // only do this if we have an actual file (as opposed to a pure // name). if (mtype.empty() && stp) - mtype = mimetypefromdata(cfg, fn, usfc); + mtype = mimetypefromdata(cfg, fn, usfc); return mtype; } diff --git a/src/index/rclmon.h b/src/index/rclmon.h index 875b4a29..96015627 100644 --- a/src/index/rclmon.h +++ b/src/index/rclmon.h @@ -47,7 +47,7 @@ using std::multimap; class RclMonEvent { public: enum EvType {RCLEVT_NONE= 0, RCLEVT_MODIFY=1, RCLEVT_DELETE=2, - RCLEVT_DIRCREATE=3, RCLEVT_ISDIR=0x10}; + RCLEVT_DIRCREATE=3, RCLEVT_ISDIR=0x10}; string m_path; // Type and flags int m_etyp; @@ -61,13 +61,13 @@ class RclMonEvent { bool m_needidx; RclMonEvent() : m_etyp(RCLEVT_NONE), - m_itvsecs(0), m_minclock(0), m_needidx(false) {} + m_itvsecs(0), m_minclock(0), m_needidx(false) {} EvType evtype() {return EvType(m_etyp & 0xf);} int evflags() {return m_etyp & 0xf0;} }; enum RclMonitorOption {RCLMON_NONE=0, RCLMON_NOFORK=1, RCLMON_NOX11=2, - RCLMON_NOCONFCHECK=4}; + RCLMON_NOCONFCHECK=4}; /** * Monitoring event queue. This is the shared object between the main thread diff --git a/src/index/rclmonprc.cpp b/src/index/rclmonprc.cpp index f684e518..0cfdc006 100644 --- a/src/index/rclmonprc.cpp +++ b/src/index/rclmonprc.cpp @@ -142,19 +142,19 @@ public: std::condition_variable m_cond; RclEQData() - : m_config(0), m_ok(true) + : m_config(0), m_ok(true) { } void readDelayPats(int dfltsecs); DelayPat searchDelayPats(const string& path) { - for (vector::iterator it = m_delaypats.begin(); - it != m_delaypats.end(); it++) { - if (fnmatch(it->pattern.c_str(), path.c_str(), 0) == 0) { - return *it; - } - } - return DelayPat(); + for (vector::iterator it = m_delaypats.begin(); + it != m_delaypats.end(); it++) { + if (fnmatch(it->pattern.c_str(), path.c_str(), 0) == 0) { + return *it; + } + } + return DelayPat(); } void delayInsert(const queue_type::iterator &qit); }; @@ -162,30 +162,30 @@ public: void RclEQData::readDelayPats(int dfltsecs) { if (m_config == 0) - return; + return; string patstring; if (!m_config->getConfParam("mondelaypatterns", patstring) || - patstring.empty()) - return; + patstring.empty()) + return; vector dplist; if (!stringToStrings(patstring, dplist)) { - LOGERR("rclEQData: bad pattern list: [" << (patstring) << "]\n" ); - return; + LOGERR("rclEQData: bad pattern list: [" << (patstring) << "]\n" ); + return; } for (vector::iterator it = dplist.begin(); - it != dplist.end(); it++) { - string::size_type pos = it->find_last_of(":"); - DelayPat dp; - dp.pattern = it->substr(0, pos); - if (pos != string::npos && pos != it->size()-1) { - dp.seconds = atoi(it->substr(pos+1).c_str()); - } else { - dp.seconds = dfltsecs; - } - m_delaypats.push_back(dp); - LOGDEB2("rclmon::readDelayPats: add [" << (dp.pattern) << "] " << (dp.seconds) << "\n" ); + it != dplist.end(); it++) { + string::size_type pos = it->find_last_of(":"); + DelayPat dp; + dp.pattern = it->substr(0, pos); + if (pos != string::npos && pos != it->size()-1) { + dp.seconds = atoi(it->substr(pos+1).c_str()); + } else { + dp.seconds = dfltsecs; + } + m_delaypats.push_back(dp); + LOGDEB2("rclmon::readDelayPats: add [" << (dp.pattern) << "] " << (dp.seconds) << "\n" ); } } @@ -197,12 +197,12 @@ void RclEQData::delayInsert(const queue_type::iterator &qit) MONDEB("RclEQData::delayInsert: minclock " << qit->second.m_minclock << std::endl); for (delays_type::iterator dit = m_delays.begin(); - dit != m_delays.end(); dit++) { - queue_type::iterator qit1 = *dit; - if ((*qit1).second.m_minclock > qit->second.m_minclock) { - m_delays.insert(dit, qit); - return; - } + dit != m_delays.end(); dit++) { + queue_type::iterator qit1 = *dit; + if ((*qit1).second.m_minclock > qit->second.m_minclock) { + m_delays.insert(dit, qit); + return; + } } m_delays.push_back(qit); } @@ -220,7 +220,7 @@ RclMonEventQueue::~RclMonEventQueue() void RclMonEventQueue::setopts(int opts) { if (m_data) - m_data->m_opts = opts; + m_data->m_opts = opts; } /** Wait until there is something to process on the queue, or timeout. @@ -232,22 +232,22 @@ std::unique_lock RclMonEventQueue::wait(int seconds, bool *top) MONDEB("RclMonEventQueue::wait, seconds: " << seconds << std::endl); if (!empty()) { - MONDEB("RclMonEventQueue:: immediate return\n"); - return lock; + MONDEB("RclMonEventQueue:: immediate return\n"); + return lock; } int err; if (seconds > 0) { - if (top) - *top = false; - if (m_data->m_cond.wait_for(lock, std::chrono::seconds(seconds)) == + if (top) + *top = false; + if (m_data->m_cond.wait_for(lock, std::chrono::seconds(seconds)) == std::cv_status::timeout) { *top = true; MONDEB("RclMonEventQueue:: timeout\n"); return lock; } } else { - m_data->m_cond.wait(lock); + m_data->m_cond.wait(lock); } MONDEB("RclMonEventQueue:: non-timeout return\n"); return lock; @@ -269,16 +269,16 @@ RclConfig *RclMonEventQueue::getConfig() bool RclMonEventQueue::ok() { if (m_data == 0) { - LOGINFO("RclMonEventQueue: not ok: bad state\n" ); - return false; + LOGINFO("RclMonEventQueue: not ok: bad state\n" ); + return false; } if (stopindexing) { - LOGINFO("RclMonEventQueue: not ok: stop request\n" ); - return false; + LOGINFO("RclMonEventQueue: not ok: stop request\n" ); + return false; } if (!m_data->m_ok) { - LOGINFO("RclMonEventQueue: not ok: queue terminated\n" ); - return false; + LOGINFO("RclMonEventQueue: not ok: queue terminated\n" ); + return false; } return true; } @@ -295,24 +295,24 @@ void RclMonEventQueue::setTerminate() bool RclMonEventQueue::empty() { if (m_data == 0) { - MONDEB("RclMonEventQueue::empty(): true (m_data==0)\n"); - return true; + MONDEB("RclMonEventQueue::empty(): true (m_data==0)\n"); + return true; } if (!m_data->m_iqueue.empty()) { - MONDEB("RclMonEventQueue::empty(): false (m_iqueue not empty)\n"); - return true; + MONDEB("RclMonEventQueue::empty(): false (m_iqueue not empty)\n"); + return true; } if (m_data->m_dqueue.empty()) { - MONDEB("RclMonEventQueue::empty(): true (m_Xqueue both empty)\n"); - return true; + MONDEB("RclMonEventQueue::empty(): true (m_Xqueue both empty)\n"); + return true; } // Only dqueue has events. Have to check the delays (only the // first, earliest one): queue_type::iterator qit = *(m_data->m_delays.begin()); if (qit->second.m_minclock > time(0)) { - MONDEB("RclMonEventQueue::empty(): true (no delay ready " << + MONDEB("RclMonEventQueue::empty(): true (no delay ready " << qit->second.m_minclock << ")\n"); - return true; + return true; } MONDEB("RclMonEventQueue::empty(): returning false (delay expired)\n"); return false; @@ -329,36 +329,36 @@ RclMonEvent RclMonEventQueue::pop() // Look at the delayed events, get rid of the expired/unactive // ones, possibly return an expired/needidx one. while (!m_data->m_delays.empty()) { - delays_type::iterator dit = m_data->m_delays.begin(); - queue_type::iterator qit = *dit; - MONDEB("RclMonEventQueue::pop(): in delays: evt minclock " << - qit->second.m_minclock << std::endl); - if (qit->second.m_minclock <= now) { - if (qit->second.m_needidx) { - RclMonEvent ev = qit->second; - qit->second.m_minclock = time(0) + qit->second.m_itvsecs; - qit->second.m_needidx = false; - m_data->m_delays.erase(dit); - m_data->delayInsert(qit); - return ev; - } else { - // Delay elapsed without new update, get rid of event. - m_data->m_dqueue.erase(qit); - m_data->m_delays.erase(dit); - } - } else { - // This and following events are for later processing, we - // are done with the delayed event list. - break; - } + delays_type::iterator dit = m_data->m_delays.begin(); + queue_type::iterator qit = *dit; + MONDEB("RclMonEventQueue::pop(): in delays: evt minclock " << + qit->second.m_minclock << std::endl); + if (qit->second.m_minclock <= now) { + if (qit->second.m_needidx) { + RclMonEvent ev = qit->second; + qit->second.m_minclock = time(0) + qit->second.m_itvsecs; + qit->second.m_needidx = false; + m_data->m_delays.erase(dit); + m_data->delayInsert(qit); + return ev; + } else { + // Delay elapsed without new update, get rid of event. + m_data->m_dqueue.erase(qit); + m_data->m_delays.erase(dit); + } + } else { + // This and following events are for later processing, we + // are done with the delayed event list. + break; + } } // Look for non-delayed event if (!m_data->m_iqueue.empty()) { - queue_type::iterator qit = m_data->m_iqueue.begin(); - RclMonEvent ev = qit->second; - m_data->m_iqueue.erase(qit); - return ev; + queue_type::iterator qit = m_data->m_iqueue.begin(); + RclMonEvent ev = qit->second; + m_data->m_iqueue.erase(qit); + return ev; } return RclMonEvent(); @@ -376,32 +376,32 @@ bool RclMonEventQueue::pushEvent(const RclMonEvent &ev) DelayPat pat = m_data->searchDelayPats(ev.m_path); if (pat.seconds != 0) { - // Using delayed reindex queue. Need to take care of minclock and also - // insert into the in-minclock-order list - queue_type::iterator qit = m_data->m_dqueue.find(ev.m_path); - if (qit == m_data->m_dqueue.end()) { - // Not there yet, insert new - qit = - m_data->m_dqueue.insert(queue_type::value_type(ev.m_path, ev)).first; - // Set the time to next index to "now" as it has not been - // indexed recently (otherwise it would still be in the - // queue), and add the iterator to the delay queue. - qit->second.m_minclock = time(0); - qit->second.m_needidx = true; - qit->second.m_itvsecs = pat.seconds; - m_data->delayInsert(qit); - } else { - // Already in queue. Possibly update type but save minclock - // (so no need to touch m_delays). Flag as needing indexing - time_t saved_clock = qit->second.m_minclock; - qit->second = ev; - qit->second.m_minclock = saved_clock; - qit->second.m_needidx = true; - } + // Using delayed reindex queue. Need to take care of minclock and also + // insert into the in-minclock-order list + queue_type::iterator qit = m_data->m_dqueue.find(ev.m_path); + if (qit == m_data->m_dqueue.end()) { + // Not there yet, insert new + qit = + m_data->m_dqueue.insert(queue_type::value_type(ev.m_path, ev)).first; + // Set the time to next index to "now" as it has not been + // indexed recently (otherwise it would still be in the + // queue), and add the iterator to the delay queue. + qit->second.m_minclock = time(0); + qit->second.m_needidx = true; + qit->second.m_itvsecs = pat.seconds; + m_data->delayInsert(qit); } else { - // Immediate event: just insert it, erasing any previously - // existing entry - m_data->m_iqueue[ev.m_path] = ev; + // Already in queue. Possibly update type but save minclock + // (so no need to touch m_delays). Flag as needing indexing + time_t saved_clock = qit->second.m_minclock; + qit->second = ev; + qit->second.m_minclock = saved_clock; + qit->second.m_needidx = true; + } + } else { + // Immediate event: just insert it, erasing any previously + // existing entry + m_data->m_iqueue[ev.m_path] = ev; } m_data->m_cond.notify_all(); @@ -429,19 +429,19 @@ static bool expeditedIndexingRequested(RclConfig *conf) { static vector rqfiles; if (rqfiles.empty()) { - rqfiles.push_back(path_cat(conf->getConfDir(), "rclmonixnow")); - const char *cp; - if ((cp = getenv("RECOLL_CONFTOP"))) { - rqfiles.push_back(path_cat(cp, "rclmonixnow")); - } - if ((cp = getenv("RECOLL_CONFMID"))) { - rqfiles.push_back(path_cat(cp, "rclmonixnow")); - } + rqfiles.push_back(path_cat(conf->getConfDir(), "rclmonixnow")); + const char *cp; + if ((cp = getenv("RECOLL_CONFTOP"))) { + rqfiles.push_back(path_cat(cp, "rclmonixnow")); + } + if ((cp = getenv("RECOLL_CONFMID"))) { + rqfiles.push_back(path_cat(cp, "rclmonixnow")); + } } bool found = false; for (vector::const_iterator it = rqfiles.begin(); - it != rqfiles.end(); it++) { - found = found || checkfileanddelete(*it); + it != rqfiles.end(); it++) { + found = found || checkfileanddelete(*it); } return found; } @@ -449,9 +449,9 @@ static bool expeditedIndexingRequested(RclConfig *conf) bool startMonitor(RclConfig *conf, int opts) { if (!conf->getConfParam("monauxinterval", &auxinterval)) - auxinterval = dfltauxinterval; + auxinterval = dfltauxinterval; if (!conf->getConfParam("monixinterval", &ixinterval)) - ixinterval = dfltixinterval; + ixinterval = dfltixinterval; rclEQ.setConfig(conf); rclEQ.setopts(opts); @@ -494,7 +494,7 @@ bool startMonitor(RclConfig *conf, int opts) if (!rclEQ.ok() || x11dead) { break; } - + // Process event queue for (;;) { // Retrieve event @@ -533,14 +533,14 @@ bool startMonitor(RclConfig *conf, int opts) } now = time(0); - // Process. We don't do this every time but let the lists accumulate + // Process. We don't do this every time but let the lists accumulate // a little, this saves processing. Start at once if list is big. if (expeditedIndexingRequested(conf) || - (now - lastixtime > ixinterval) || - (deleted.size() + modified.size() > 20)) { + (now - lastixtime > ixinterval) || + (deleted.size() + modified.size() > 20)) { lastixtime = now; - // Used to do the modified list first, but it does seem - // smarter to make room first... + // Used to do the modified list first, but it does seem + // smarter to make room first... if (!deleted.empty()) { deleted.sort(); deleted.unique(); @@ -559,28 +559,28 @@ bool startMonitor(RclConfig *conf, int opts) } } - // Recreate the auxiliary dbs every hour at most. + // Recreate the auxiliary dbs every hour at most. now = time(0); - if (didsomething && now - lastauxtime > auxinterval) { - lastauxtime = now; - didsomething = false; - if (!createAuxDbs(conf)) { - // We used to bail out on error here. Not anymore, - // because this is most of the time due to a failure - // of aspell dictionary generation, which is not - // critical. - } - } + if (didsomething && now - lastauxtime > auxinterval) { + lastauxtime = now; + didsomething = false; + if (!createAuxDbs(conf)) { + // We used to bail out on error here. Not anymore, + // because this is most of the time due to a failure + // of aspell dictionary generation, which is not + // critical. + } + } - // Check for a config change - if (!(opts & RCLMON_NOCONFCHECK) && o_reexec && conf->sourceChanged()) { - LOGDEB("Rclmonprc: config changed, reexecuting myself\n" ); - // We never want to have a -n option after a config - // change. -n was added by the reexec after the initial - // pass even if it was not given on the command line - o_reexec->removeArg("-n"); - o_reexec->reexec(); - } + // Check for a config change + if (!(opts & RCLMON_NOCONFCHECK) && o_reexec && conf->sourceChanged()) { + LOGDEB("Rclmonprc: config changed, reexecuting myself\n" ); + // We never want to have a -n option after a config + // change. -n was added by the reexec after the initial + // pass even if it was not given on the command line + o_reexec->removeArg("-n"); + o_reexec->reexec(); + } } LOGDEB("Rclmonprc: calling queue setTerminate\n" ); rclEQ.setTerminate(); diff --git a/src/index/recollindex.cpp b/src/index/recollindex.cpp index 785a592f..4a563f58 100644 --- a/src/index/recollindex.cpp +++ b/src/index/recollindex.cpp @@ -550,22 +550,22 @@ static void flushIdxReasons() static vector argstovector(int argc, char **argv) { - vector args; - for (int i = 0; i < argc; i++) { - args.push_back(argv[i]); - } - return args; + vector args; + for (int i = 0; i < argc; i++) { + args.push_back(argv[i]); + } + return args; } static vector fileToArgs(const string& fn) { - string reason, data; - if (!file_to_string(fn, data, &reason)) { - cerr << "Failed reading args file " << fn << " errno " << errno << "\n"; - exit(1); - } - vector args; - stringToStrings(data, args); - return args; + string reason, data; + if (!file_to_string(fn, data, &reason)) { + cerr << "Failed reading args file " << fn << " errno " << errno << "\n"; + exit(1); + } + vector args; + stringToStrings(data, args); + return args; } int main(int argc, char **argv) @@ -585,23 +585,23 @@ int main(int argc, char **argv) thisprog = path_absolute(argv[0]); argc--; argv++; - vector args = argstovector(argc, argv); + vector args = argstovector(argc, argv); - // Passing args through a temp file: this is used on Windows to - // avoid issues with charsets in args (avoid using wmain) - if (args.size() == 1 && args[0][0] != '-') { - args = fileToArgs(args[0]); - } + // Passing args through a temp file: this is used on Windows to + // avoid issues with charsets in args (avoid using wmain) + if (args.size() == 1 && args[0][0] != '-') { + args = fileToArgs(args[0]); + } - unsigned int aremain = args.size(); - unsigned int argidx = 0; - for (; argidx < args.size(); argidx++) { - const string& arg{args[argidx]}; - aremain = args.size() - argidx; - if (arg[0] != '-') { - break; - } - for (unsigned int cidx = 1; cidx < arg.size(); cidx++) { + unsigned int aremain = args.size(); + unsigned int argidx = 0; + for (; argidx < args.size(); argidx++) { + const string& arg{args[argidx]}; + aremain = args.size() - argidx; + if (arg[0] != '-') { + break; + } + for (unsigned int cidx = 1; cidx < arg.size(); cidx++) { switch (arg[cidx]) { case 'b': op_flags |= OPT_b; break; case 'c': op_flags |= OPT_c; if (aremain < 2) Usage(); @@ -639,10 +639,10 @@ int main(int argc, char **argv) case 'z': op_flags |= OPT_z; break; default: Usage(); break; } - } - b1: - ; - } + } + b1: + ; + } aremain = args.size() - argidx; if (op_flags & OPT_h) diff --git a/src/index/subtreelist.cpp b/src/index/subtreelist.cpp index c25f237d..fada32f8 100644 --- a/src/index/subtreelist.cpp +++ b/src/index/subtreelist.cpp @@ -28,13 +28,13 @@ #include "log.h" bool subtreelist(RclConfig *config, const string& top, - vector& paths) + vector& paths) { LOGDEB("subtreelist: top: [" << (top) << "]\n" ); Rcl::Db rcldb(config); if (!rcldb.open(Rcl::Db::DbRO)) { - LOGERR("subtreelist: can't open database in [" << (config->getDbDir()) << "]: " << (rcldb.getReason()) << "\n" ); - return false; + LOGERR("subtreelist: can't open database in [" << (config->getDbDir()) << "]: " << (rcldb.getReason()) << "\n" ); + return false; } Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_OR, cstr_null); @@ -47,12 +47,12 @@ bool subtreelist(RclConfig *config, const string& top, int cnt = query.getResCnt(); for (int i = 0; i < cnt; i++) { - Rcl::Doc doc; - if (!query.getDoc(i, doc)) - break; - string path = fileurltolocalpath(doc.url); - if (!path.empty()) - paths.push_back(path); + Rcl::Doc doc; + if (!query.getDoc(i, doc)) + break; + string path = fileurltolocalpath(doc.url); + if (!path.empty()) + paths.push_back(path); } return true; } @@ -104,26 +104,26 @@ int main(int argc, char **argv) switch (*(*argv)++) { default: Usage(); break; } - argc--; argv++; + argc--; argv++; } if (argc < 1) - Usage(); + Usage(); top = *argv++;argc--; string reason; RclConfig *config = recollinit(0, 0, 0, reason, 0); if (!config || !config->ok()) { - fprintf(stderr, "Recoll init failed: %s\n", reason.c_str()); - exit(1); + fprintf(stderr, "Recoll init failed: %s\n", reason.c_str()); + exit(1); } vector paths; if (!subtreelist(config, top, paths)) { - cerr << "subtreelist failed" << endl; - exit(1); + cerr << "subtreelist failed" << endl; + exit(1); } for (vector::const_iterator it = paths.begin(); - it != paths.end(); it++) { - cout << *it << endl; + it != paths.end(); it++) { + cout << *it << endl; } exit(0); } diff --git a/src/index/subtreelist.h b/src/index/subtreelist.h index c52695db..317d22ab 100644 --- a/src/index/subtreelist.h +++ b/src/index/subtreelist.h @@ -28,6 +28,6 @@ class RclConfig; // the real time indexer to purge entries when a top directory is // renamed. This is really convoluted, I'd like a better way. extern bool subtreelist(RclConfig *config, const string& top, - std::vector& paths); + std::vector& paths); #endif /* _SUBTREELIST_H_INCLUDED_ */ diff --git a/src/index/webqueuefetcher.cpp b/src/index/webqueuefetcher.cpp index 2f162128..256a1fec 100644 --- a/src/index/webqueuefetcher.cpp +++ b/src/index/webqueuefetcher.cpp @@ -35,23 +35,23 @@ bool WQDocFetcher::fetch(RclConfig* cnf, const Rcl::Doc& idoc, RawDoc& out) { string udi; if (!idoc.getmeta(Rcl::Doc::keyudi, &udi) || udi.empty()) { - LOGERR("WQDocFetcher:: no udi in idoc\n" ); - return false; + LOGERR("WQDocFetcher:: no udi in idoc\n" ); + return false; } Rcl::Doc dotdoc; { std::unique_lock locker(o_beagler_mutex); - // Retrieve from our webcache (beagle data). The beagler - // object is created at the first call of this routine and - // deleted when the program exits. - static WebStore o_beagler(cnf); - if (!o_beagler.getFromCache(udi, dotdoc, out.data)) { - LOGINFO("WQDocFetcher::fetch: failed for [" << udi << "]\n"); - return false; - } + // Retrieve from our webcache (beagle data). The beagler + // object is created at the first call of this routine and + // deleted when the program exits. + static WebStore o_beagler(cnf); + if (!o_beagler.getFromCache(udi, dotdoc, out.data)) { + LOGINFO("WQDocFetcher::fetch: failed for [" << udi << "]\n"); + return false; + } } if (dotdoc.mimetype.compare(idoc.mimetype)) { - LOGINFO("WQDocFetcher:: udi [" << udi << "], mimetp mismatch: in: [" << + LOGINFO("WQDocFetcher:: udi [" << udi << "], mimetp mismatch: in: [" << idoc.mimetype << "], bgl [" << dotdoc.mimetype << "]\n"); } out.kind = RawDoc::RDK_DATA; diff --git a/src/internfile/extrameta.cpp b/src/internfile/extrameta.cpp index 8d658c48..2064a541 100644 --- a/src/internfile/extrameta.cpp +++ b/src/internfile/extrameta.cpp @@ -30,20 +30,20 @@ using std::string; using std::map; static void docfieldfrommeta(RclConfig* cfg, const string& name, - const string &value, Rcl::Doc& doc) + const string &value, Rcl::Doc& doc) { string fieldname = cfg->fieldCanon(name); LOGDEB0("Internfile:: setting [" << fieldname << "] from cmd/xattr value [" << value << "]\n"); if (fieldname == cstr_dj_keymd) { - doc.dmtime = value; + doc.dmtime = value; } else { - doc.meta[fieldname] = value; + doc.meta[fieldname] = value; } } void reapXAttrs(const RclConfig* cfg, const string& path, - map& xfields) + map& xfields) { LOGDEB2("reapXAttrs: [" << path << "]\n"); #ifndef _WIN32 @@ -57,7 +57,7 @@ void reapXAttrs(const RclConfig* cfg, const string& path, LOGERR("FileInterner::reapXattrs: pxattr::list: errno " << errno << "\n"); } - return; + return; } const map& xtof = cfg->getXattrToField(); @@ -65,25 +65,25 @@ void reapXAttrs(const RclConfig* cfg, const string& path, // or mapped depending if the translation is empty. Other names // are recorded as-is for (vector::const_iterator it = xnames.begin(); - it != xnames.end(); it++) { - string key = *it; - map::const_iterator mit = xtof.find(*it); - if (mit != xtof.end()) { - if (mit->second.empty()) { - continue; - } else { - key = mit->second; - } - } - string value; - if (!pxattr::get(path, *it, &value, pxattr::PXATTR_NOFOLLOW)) { - LOGERR("FileInterner::reapXattrs: pxattr::get failed for " << *it + it != xnames.end(); it++) { + string key = *it; + map::const_iterator mit = xtof.find(*it); + if (mit != xtof.end()) { + if (mit->second.empty()) { + continue; + } else { + key = mit->second; + } + } + string value; + if (!pxattr::get(path, *it, &value, pxattr::PXATTR_NOFOLLOW)) { + LOGERR("FileInterner::reapXattrs: pxattr::get failed for " << *it << ", errno " << errno << "\n"); - continue; - } - // Encode should we ? - xfields[key] = value; - LOGDEB2("reapXAttrs: [" << key << "] -> [" << value << "]\n"); + continue; + } + // Encode should we ? + xfields[key] = value; + LOGDEB2("reapXAttrs: [" << key << "] -> [" << value << "]\n"); } #else PRETEND_USE(cfg); @@ -93,34 +93,34 @@ void reapXAttrs(const RclConfig* cfg, const string& path, } void docFieldsFromXattrs(RclConfig *cfg, const map& xfields, - Rcl::Doc& doc) + Rcl::Doc& doc) { for (map::const_iterator it = xfields.begin(); - it != xfields.end(); it++) { - docfieldfrommeta(cfg, it->first, it->second, doc); + it != xfields.end(); it++) { + docfieldfrommeta(cfg, it->first, it->second, doc); } } void reapMetaCmds(RclConfig* cfg, const string& path, - map& cfields) + map& cfields) { const vector& reapers = cfg->getMDReapers(); if (reapers.empty()) - return; + return; map smap = {{'f', path}}; for (vector::const_iterator rp = reapers.begin(); - rp != reapers.end(); rp++) { - vector cmd; - for (vector::const_iterator it = rp->cmdv.begin(); - it != rp->cmdv.end(); it++) { - string s; - pcSubst(*it, s, smap); - cmd.push_back(s); - } - string output; - if (ExecCmd::backtick(cmd, output)) { - cfields[rp->fieldname] = output; - } + rp != reapers.end(); rp++) { + vector cmd; + for (vector::const_iterator it = rp->cmdv.begin(); + it != rp->cmdv.end(); it++) { + string s; + pcSubst(*it, s, smap); + cmd.push_back(s); + } + string output; + if (ExecCmd::backtick(cmd, output)) { + cfields[rp->fieldname] = output; + } } } @@ -133,25 +133,25 @@ void reapMetaCmds(RclConfig* cfg, const string& path, // and the output from anything beginning with "rclmulti" will be // interpreted as multiple fields in configuration file format... void docFieldsFromMetaCmds(RclConfig *cfg, const map& cfields, - Rcl::Doc& doc) + Rcl::Doc& doc) { for (map::const_iterator it = cfields.begin(); - it != cfields.end(); it++) { - if (!it->first.compare(0, 8, "rclmulti")) { - ConfSimple simple(it->second); - if (simple.ok()) { - vector names = simple.getNames(""); - for (vector::const_iterator nm = names.begin(); - nm != names.end(); nm++) { - string value; - if (simple.get(*nm, value)) { - docfieldfrommeta(cfg, *nm, value, doc); - } - } - } - } else { - docfieldfrommeta(cfg, it->first, it->second, doc); - } + it != cfields.end(); it++) { + if (!it->first.compare(0, 8, "rclmulti")) { + ConfSimple simple(it->second); + if (simple.ok()) { + vector names = simple.getNames(""); + for (vector::const_iterator nm = names.begin(); + nm != names.end(); nm++) { + string value; + if (simple.get(*nm, value)) { + docfieldfrommeta(cfg, *nm, value, doc); + } + } + } + } else { + docfieldfrommeta(cfg, it->first, it->second, doc); + } } } diff --git a/src/internfile/extrameta.h b/src/internfile/extrameta.h index 1b3bedaf..d70b827c 100644 --- a/src/internfile/extrameta.h +++ b/src/internfile/extrameta.h @@ -30,7 +30,7 @@ namespace Rcl {class Doc;}; /** Read external attributes, possibly ignore some or change the names according to the fields configuration */ extern void reapXAttrs(const RclConfig* config, const std::string& path, - std::map& xfields); + std::map& xfields); /** Turn the pre-processed extended file attributes into doc fields */ extern void docFieldsFromXattrs( @@ -39,7 +39,7 @@ extern void docFieldsFromXattrs( /** Get metadata by executing commands */ extern void reapMetaCmds(RclConfig* config, const std::string& path, - std::map& xfields); + std::map& xfields); /** Turn the pre-processed ext cmd metadata into doc fields */ extern void docFieldsFromMetaCmds( diff --git a/src/internfile/htmlparse.cpp b/src/internfile/htmlparse.cpp index 48a1373a..fd10b897 100644 --- a/src/internfile/htmlparse.cpp +++ b/src/internfile/htmlparse.cpp @@ -34,7 +34,7 @@ inline void lowercase_string(string &str) { for (string::iterator i = str.begin(); i != str.end(); ++i) { - *i = tolower(static_cast(*i)); + *i = tolower(static_cast(*i)); } } @@ -68,7 +68,7 @@ inline static bool p_nottag(char c) { return !isalnum(static_cast(c)) && - c != '.' && c != '-' && c != ':'; // ':' for XML namespaces. + c != '.' && c != '-' && c != ':'; // ':' for XML namespaces. } inline static bool @@ -99,14 +99,14 @@ HtmlParser::HtmlParser() #if 0 static const struct ent { const char *n; unsigned int v; } ents[] = { #include "namedentities.h" - { NULL, 0 } + { NULL, 0 } }; if (named_ents.empty()) { - const struct ent *i = ents; - while (i->n) { - named_ents[string(i->n)] = i->v; - ++i; - } + const struct ent *i = ents; + while (i->n) { + named_ents[string(i->n)] = i->v; + ++i; + } } #endif } @@ -123,45 +123,45 @@ HtmlParser::decode_entities(string &) // find() and find_if() templates don't work... string::const_iterator amp = s.begin(), s_end = s.end(); while ((amp = find(amp, s_end, '&')) != s_end) { - unsigned int val = 0; - string::const_iterator end, p = amp + 1; - if (p != s_end && *p == '#') { - p++; - if (p != s_end && (*p == 'x' || *p == 'X')) { - // hex - p++; - end = find_if(p, s_end, p_notxdigit); - sscanf(s.substr(p - s.begin(), end - p).c_str(), "%x", &val); - } else { - // number - end = find_if(p, s_end, p_notdigit); - val = atoi(s.substr(p - s.begin(), end - p).c_str()); - } - } else { - end = find_if(p, s_end, p_notalnum); - string code = s.substr(p - s.begin(), end - p); - map::const_iterator i; - i = named_ents.find(code); - if (i != named_ents.end()) val = i->second; - } - if (end < s_end && *end == ';') end++; - if (val) { - string::size_type amp_pos = amp - s.begin(); - if (val < 0x80) { - s.replace(amp_pos, end - amp, 1u, char(val)); - } else { - // Convert unicode value val to UTF-8. - char seq[4]; - unsigned len = Xapian::Unicode::nonascii_to_utf8(val, seq); - s.replace(amp_pos, end - amp, seq, len); - } - s_end = s.end(); - // We've modified the string, so the iterators are no longer - // valid... - amp = s.begin() + amp_pos + 1; - } else { - amp = end; - } + unsigned int val = 0; + string::const_iterator end, p = amp + 1; + if (p != s_end && *p == '#') { + p++; + if (p != s_end && (*p == 'x' || *p == 'X')) { + // hex + p++; + end = find_if(p, s_end, p_notxdigit); + sscanf(s.substr(p - s.begin(), end - p).c_str(), "%x", &val); + } else { + // number + end = find_if(p, s_end, p_notdigit); + val = atoi(s.substr(p - s.begin(), end - p).c_str()); + } + } else { + end = find_if(p, s_end, p_notalnum); + string code = s.substr(p - s.begin(), end - p); + map::const_iterator i; + i = named_ents.find(code); + if (i != named_ents.end()) val = i->second; + } + if (end < s_end && *end == ';') end++; + if (val) { + string::size_type amp_pos = amp - s.begin(); + if (val < 0x80) { + s.replace(amp_pos, end - amp, 1u, char(val)); + } else { + // Convert unicode value val to UTF-8. + char seq[4]; + unsigned len = Xapian::Unicode::nonascii_to_utf8(val, seq); + s.replace(amp_pos, end - amp, seq, len); + } + s_end = s.end(); + // We've modified the string, so the iterators are no longer + // valid... + amp = s.begin() + amp_pos + 1; + } else { + amp = end; + } } #endif } @@ -175,222 +175,222 @@ HtmlParser::parse_html(const string &body) string::const_iterator start = body.begin(); while (true) { - // Skip through until we find an HTML tag, a comment, or the end of - // document. Ignore isolated occurrences of `<' which don't start - // a tag or comment. - string::const_iterator p = start; - while (true) { - p = find(p, body.end(), '<'); - if (p == body.end()) break; - unsigned char ch = *(p + 1); + // Skip through until we find an HTML tag, a comment, or the end of + // document. Ignore isolated occurrences of `<' which don't start + // a tag or comment. + string::const_iterator p = start; + while (true) { + p = find(p, body.end(), '<'); + if (p == body.end()) break; + unsigned char ch = *(p + 1); - // Tag, closing tag, or comment (or SGML declaration). - if ((!in_script && isalpha(ch)) || ch == '/' || ch == '!') break; + // Tag, closing tag, or comment (or SGML declaration). + if ((!in_script && isalpha(ch)) || ch == '/' || ch == '!') break; - if (ch == '?') { - // PHP code or XML declaration. - // XML declaration is only valid at the start of the first line. - // FIXME: need to deal with BOMs... - if (p != body.begin() || body.size() < 20) break; + if (ch == '?') { + // PHP code or XML declaration. + // XML declaration is only valid at the start of the first line. + // FIXME: need to deal with BOMs... + if (p != body.begin() || body.size() < 20) break; - // XML declaration looks something like this: - // - if (p[2] != 'x' || p[3] != 'm' || p[4] != 'l') break; - if (strchr(" \t\r\n", p[5]) == NULL) break; + // XML declaration looks something like this: + // + if (p[2] != 'x' || p[3] != 'm' || p[4] != 'l') break; + if (strchr(" \t\r\n", p[5]) == NULL) break; - string::const_iterator decl_end = find(p + 6, body.end(), '?'); - if (decl_end == body.end()) break; + string::const_iterator decl_end = find(p + 6, body.end(), '?'); + if (decl_end == body.end()) break; - // Default charset for XML is UTF-8. - charset = "utf-8"; + // Default charset for XML is UTF-8. + charset = "utf-8"; - string decl(p + 6, decl_end); - size_t enc = decl.find("encoding"); - if (enc == string::npos) break; + string decl(p + 6, decl_end); + size_t enc = decl.find("encoding"); + if (enc == string::npos) break; - enc = decl.find_first_not_of(" \t\r\n", enc + 8); - if (enc == string::npos || enc == decl.size()) break; + enc = decl.find_first_not_of(" \t\r\n", enc + 8); + if (enc == string::npos || enc == decl.size()) break; - if (decl[enc] != '=') break; - - enc = decl.find_first_not_of(" \t\r\n", enc + 1); - if (enc == string::npos || enc == decl.size()) break; + if (decl[enc] != '=') break; + + enc = decl.find_first_not_of(" \t\r\n", enc + 1); + if (enc == string::npos || enc == decl.size()) break; - if (decl[enc] != '"' && decl[enc] != '\'') break; + if (decl[enc] != '"' && decl[enc] != '\'') break; - char quote = decl[enc++]; - size_t enc_end = decl.find(quote, enc); + char quote = decl[enc++]; + size_t enc_end = decl.find(quote, enc); - if (enc != string::npos) - charset = decl.substr(enc, enc_end - enc); + if (enc != string::npos) + charset = decl.substr(enc, enc_end - enc); - break; - } - p++; - } + break; + } + p++; + } - // Process text up to start of tag. - if (p > start || p == body.end()) { - string text = body.substr(start - body.begin(), p - start); - decode_entities(text); - process_text(text); - } + // Process text up to start of tag. + if (p > start || p == body.end()) { + string text = body.substr(start - body.begin(), p - start); + decode_entities(text); + process_text(text); + } - if (p == body.end()) { - do_eof(); - break; - } + if (p == body.end()) { + do_eof(); + break; + } - start = p + 1; + start = p + 1; - if (start == body.end()) break; + if (start == body.end()) break; - if (*start == '!') { - if (++start == body.end()) break; - if (++start == body.end()) break; - // comment or SGML declaration - if (*(start - 1) == '-' && *start == '-') { - ++start; - string::const_iterator close = find(start, body.end(), '>'); - // An unterminated comment swallows rest of document - // (like Netscape, but unlike MSIE IIRC) - if (close == body.end()) break; + if (*start == '!') { + if (++start == body.end()) break; + if (++start == body.end()) break; + // comment or SGML declaration + if (*(start - 1) == '-' && *start == '-') { + ++start; + string::const_iterator close = find(start, body.end(), '>'); + // An unterminated comment swallows rest of document + // (like Netscape, but unlike MSIE IIRC) + if (close == body.end()) break; - p = close; - // look for --> - while (p != body.end() && (*(p - 1) != '-' || *(p - 2) != '-')) - p = find(p + 1, body.end(), '>'); + p = close; + // look for --> + while (p != body.end() && (*(p - 1) != '-' || *(p - 2) != '-')) + p = find(p + 1, body.end(), '>'); - if (p != body.end()) { - // Check for htdig's "ignore this bit" comments. - if (p - start == 15 && string(start, p - 2) == "htdig_noindex") { - string::size_type i; - i = body.find("", p + 1 - body.begin()); - if (i == string::npos) break; - start = body.begin() + i + 21; - continue; - } - // If we found --> skip to there. - start = p; - } else { - // Otherwise skip to the first > we found (as Netscape does). - start = close; - } - } else { - // just an SGML declaration, perhaps giving the DTD - ignore it - start = find(start - 1, body.end(), '>'); - if (start == body.end()) break; - } - ++start; - } else if (*start == '?') { - if (++start == body.end()) break; - // PHP - swallow until ?> or EOF - start = find(start + 1, body.end(), '>'); + if (p != body.end()) { + // Check for htdig's "ignore this bit" comments. + if (p - start == 15 && string(start, p - 2) == "htdig_noindex") { + string::size_type i; + i = body.find("", p + 1 - body.begin()); + if (i == string::npos) break; + start = body.begin() + i + 21; + continue; + } + // If we found --> skip to there. + start = p; + } else { + // Otherwise skip to the first > we found (as Netscape does). + start = close; + } + } else { + // just an SGML declaration, perhaps giving the DTD - ignore it + start = find(start - 1, body.end(), '>'); + if (start == body.end()) break; + } + ++start; + } else if (*start == '?') { + if (++start == body.end()) break; + // PHP - swallow until ?> or EOF + start = find(start + 1, body.end(), '>'); - // look for ?> - while (start != body.end() && *(start - 1) != '?') - start = find(start + 1, body.end(), '>'); + // look for ?> + while (start != body.end() && *(start - 1) != '?') + start = find(start + 1, body.end(), '>'); - // unterminated PHP swallows rest of document (rather arbitrarily - // but it avoids polluting the database when things go wrong) - if (start != body.end()) ++start; - } else { - // opening or closing tag - int closing = 0; + // unterminated PHP swallows rest of document (rather arbitrarily + // but it avoids polluting the database when things go wrong) + if (start != body.end()) ++start; + } else { + // opening or closing tag + int closing = 0; - if (*start == '/') { - closing = 1; - start = find_if(start + 1, body.end(), p_notwhitespace); - } - - p = start; - start = find_if(start, body.end(), p_nottag); - string tag = body.substr(p - body.begin(), start - p); - // convert tagname to lowercase - lowercase_string(tag); + if (*start == '/') { + closing = 1; + start = find_if(start + 1, body.end(), p_notwhitespace); + } + + p = start; + start = find_if(start, body.end(), p_nottag); + string tag = body.substr(p - body.begin(), start - p); + // convert tagname to lowercase + lowercase_string(tag); - if (closing) { - if (!closing_tag(tag)) - return; - if (in_script && tag == "script") in_script = false; + if (closing) { + if (!closing_tag(tag)) + return; + if (in_script && tag == "script") in_script = false; - /* ignore any bogus parameters on closing tags */ - p = find(start, body.end(), '>'); - if (p == body.end()) break; - start = p + 1; - } else { - bool empty_element = false; - // FIXME: parse parameters lazily. - while (start < body.end() && *start != '>') { - string name, value; + /* ignore any bogus parameters on closing tags */ + p = find(start, body.end(), '>'); + if (p == body.end()) break; + start = p + 1; + } else { + bool empty_element = false; + // FIXME: parse parameters lazily. + while (start < body.end() && *start != '>') { + string name, value; - p = find_if(start, body.end(), p_whitespaceeqgt); + p = find_if(start, body.end(), p_whitespaceeqgt); - size_t name_len = p - start; - if (name_len == 1) { - if (*start == '/' && p < body.end() && *p == '>') { - // E.g. - start = p; - empty_element = true; - break; - } - } + size_t name_len = p - start; + if (name_len == 1) { + if (*start == '/' && p < body.end() && *p == '>') { + // E.g. + start = p; + empty_element = true; + break; + } + } - name.assign(body, start - body.begin(), name_len); + name.assign(body, start - body.begin(), name_len); - p = find_if(p, body.end(), p_notwhitespace); + p = find_if(p, body.end(), p_notwhitespace); - start = p; - if (start != body.end() && *start == '=') { - start = find_if(start + 1, body.end(), p_notwhitespace); + start = p; + if (start != body.end() && *start == '=') { + start = find_if(start + 1, body.end(), p_notwhitespace); - p = body.end(); + p = body.end(); - int quote = *start; - if (quote == '"' || quote == '\'') { - start++; - p = find(start, body.end(), quote); - } + int quote = *start; + if (quote == '"' || quote == '\'') { + start++; + p = find(start, body.end(), quote); + } - if (p == body.end()) { - // unquoted or no closing quote - p = find_if(start, body.end(), p_whitespacegt); - } - value.assign(body, start - body.begin(), p - start); - start = find_if(p, body.end(), p_notwhitespace); + if (p == body.end()) { + // unquoted or no closing quote + p = find_if(start, body.end(), p_whitespacegt); + } + value.assign(body, start - body.begin(), p - start); + start = find_if(p, body.end(), p_notwhitespace); - if (!name.empty()) { - // convert parameter name to lowercase - lowercase_string(name); - // in case of multiple entries, use the first - // (as Netscape does) - parameters.insert(make_pair(name, value)); - } - } - } + if (!name.empty()) { + // convert parameter name to lowercase + lowercase_string(name); + // in case of multiple entries, use the first + // (as Netscape does) + parameters.insert(make_pair(name, value)); + } + } + } #if 0 - cout << "<" << tag; - map::const_iterator x; - for (x = parameters.begin(); x != parameters.end(); x++) { - cout << " " << x->first << "=\"" << x->second << "\""; - } - cout << ">\n"; + cout << "<" << tag; + map::const_iterator x; + for (x = parameters.begin(); x != parameters.end(); x++) { + cout << " " << x->first << "=\"" << x->second << "\""; + } + cout << ">\n"; #endif - if (!opening_tag(tag)) - return; - parameters.clear(); + if (!opening_tag(tag)) + return; + parameters.clear(); - if (empty_element) { - if (!closing_tag(tag)) - return; - } + if (empty_element) { + if (!closing_tag(tag)) + return; + } - // In