From 3cf7fb3b65a39dc555cf93e1bcb6d14ae99e91d6 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Mon, 13 Jul 2020 16:53:12 +0200 Subject: [PATCH] Also use picoxml for saved searches --- src/qtgui/xmltosd.cpp | 503 +++++++++++++++++++----------------------- src/utils/picoxml.h | 11 +- 2 files changed, 237 insertions(+), 277 deletions(-) diff --git a/src/qtgui/xmltosd.cpp b/src/qtgui/xmltosd.cpp index 3c948041..7b1c8f95 100644 --- a/src/qtgui/xmltosd.cpp +++ b/src/qtgui/xmltosd.cpp @@ -1,4 +1,4 @@ -/* Copyright (C) 2005 J.F.Dockes +/* Copyright (C) 2005-2020 J.F.Dockes * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -17,8 +17,6 @@ #include "autoconfig.h" -#include - #include "ssearch_w.h" #include "guiutils.h" @@ -26,51 +24,175 @@ #include "xmltosd.h" #include "smallut.h" #include "recoll.h" +#include "picoxml.h" using namespace std; using namespace Rcl; -class SDHXMLHandler : public QXmlDefaultHandler { +class SDHXMLHandler : public PicoXMLParser { public: - SDHXMLHandler() - : isvalid(false) - { - resetTemps(); + SDHXMLHandler(const std::string& in) + : PicoXMLParser(in) { + resetTemps(); + } + void startElement( + const std::string& nm, + const std::map& attrs) { + + LOGDEB2("SDHXMLHandler::startElement: name [" << nm << "]\n"); + if (nm == "SD") { + // Advanced search history entries have no type. So we're good + // either if type is absent, or if it's searchdata + auto attr = attrs.find("type"); + if (attr != attrs.end() && attr->second != "searchdata") { + LOGDEB("XMLTOSD: bad type: " << attr->second << endl); + contentsOk = false; + return; + } + resetTemps(); + // A new search descriptor. Allocate data structure + sd = std::shared_ptr(new SearchData); + if (!sd) { + LOGERR("SDHXMLHandler::startElement: out of memory\n"); + contentsOk = false; + return; + } + } + return; } - bool startElement(const QString & /* namespaceURI */, - const QString & /* localName */, - const QString &qName, - const QXmlAttributes &attributes); - bool endElement(const QString & /* namespaceURI */, - const QString & /* localName */, - const QString &qName); - bool characters(const QString &str) - { - currentText += str; - return true; + + void endElement(const string & nm) { + LOGDEB2("SDHXMLHandler::endElement: name [" << nm << "]\n"); + string curtxt{currentText}; + trimstring(curtxt, " \t\n\r"); + if (nm == "CLT") { + if (curtxt == "OR") { + sd->setTp(SCLT_OR); + } + } else if (nm == "CT") { + whatclause = curtxt; + } else if (nm == "NEG") { + exclude = true; + } else if (nm == "F") { + field = base64_decode(curtxt); + } else if (nm == "T") { + text = base64_decode(curtxt); + } else if (nm == "T2") { + text2 = base64_decode(curtxt); + } else if (nm == "S") { + slack = atoi(curtxt.c_str()); + } else if (nm == "C") { + SearchDataClause *c; + if (whatclause == "AND" || whatclause.empty()) { + c = new SearchDataClauseSimple(SCLT_AND, text, field); + c->setexclude(exclude); + } else if (whatclause == "OR") { + c = new SearchDataClauseSimple(SCLT_OR, text, field); + c->setexclude(exclude); + } else if (whatclause == "RG") { + c = new SearchDataClauseRange(text, text2, field); + c->setexclude(exclude); + } else if (whatclause == "EX") { + // Compat with old hist. We don't generete EX + // (SCLT_EXCL) anymore it's replaced with OR + exclude + // flag + c = new SearchDataClauseSimple(SCLT_OR, text, field); + c->setexclude(true); + } else if (whatclause == "FN") { + c = new SearchDataClauseFilename(text); + c->setexclude(exclude); + } else if (whatclause == "PH") { + c = new SearchDataClauseDist(SCLT_PHRASE, text, slack, field); + c->setexclude(exclude); + } else if (whatclause == "NE") { + c = new SearchDataClauseDist(SCLT_NEAR, text, slack, field); + c->setexclude(exclude); + } else { + LOGERR("Bad clause type [" << whatclause << "]\n"); + contentsOk = false; + return; + } + sd->addClause(c); + whatclause = ""; + text.clear(); + field.clear(); + slack = 0; + exclude = false; + } else if (nm == "D") { + d = atoi(curtxt.c_str()); + } else if (nm == "M") { + m = atoi(curtxt.c_str()); + } else if (nm == "Y") { + y = atoi(curtxt.c_str()); + } else if (nm == "DMI") { + di.d1 = d; + di.m1 = m; + di.y1 = y; + hasdates = true; + } else if (nm == "DMA") { + di.d2 = d; + di.m2 = m; + di.y2 = y; + hasdates = true; + } else if (nm == "MIS") { + sd->setMinSize(atoll(curtxt.c_str())); + } else if (nm == "MAS") { + sd->setMaxSize(atoll(curtxt.c_str())); + } else if (nm == "ST") { + string types = curtxt.c_str(); + vector vt; + stringToTokens(types, vt); + for (unsigned int i = 0; i < vt.size(); i++) + sd->addFiletype(vt[i]); + } else if (nm == "IT") { + vector vt; + stringToTokens(curtxt, vt); + for (unsigned int i = 0; i < vt.size(); i++) + sd->remFiletype(vt[i]); + } else if (nm == "YD") { + string d; + base64_decode(curtxt, d); + sd->addClause(new SearchDataClausePath(d)); + } else if (nm == "ND") { + string d; + base64_decode(curtxt, d); + sd->addClause(new SearchDataClausePath(d, true)); + } else if (nm == "SD") { + // Closing current search descriptor. Finishing touches... + if (hasdates) + sd->setDateSpan(&di); + resetTemps(); + isvalid = contentsOk; + } + currentText.clear(); + return; + } + + void characterData(const std::string &str) { + currentText += str; } // The object we set up std::shared_ptr sd; - bool isvalid; - + bool isvalid{false}; + bool contentsOk{true}; + private: - void resetTemps() - { - currentText = whatclause = ""; - text.clear(); + void resetTemps() { + currentText = whatclause = ""; + text.clear(); text2.clear(); - field.clear(); - slack = 0; - d = m = y = di.d1 = di.m1 = di.y1 = di.d2 = di.m2 = di.y2 = 0; - hasdates = false; - exclude = false; + field.clear(); + slack = 0; + d = m = y = di.d1 = di.m1 = di.y1 = di.d2 = di.m2 = di.y2 = 0; + hasdates = false; + exclude = false; } // Temporary data while parsing. - QString currentText; - QString whatclause; - string field, text, text2; + std::string currentText; + std::string whatclause; + std::string field, text, text2; int slack; int d, m, y; DateInterval di; @@ -78,152 +200,11 @@ private: bool exclude; }; -bool SDHXMLHandler::startElement(const QString & /* namespaceURI */, - const QString & /* localName */, - const QString &qName, - const QXmlAttributes &attrs) -{ - LOGDEB2("SDHXMLHandler::startElement: name [" << qs2utf8s(qName) << "]\n"); - if (qName == "SD") { - // Advanced search history entries have no type. So we're good - // either if type is absent, or if it's searchdata - int idx = attrs.index("type"); - if (idx >= 0 && attrs.value(idx).compare("searchdata")) { - LOGDEB("XMLTOSD: bad type: " << qs2utf8s(attrs.value(idx)) << endl); - return false; - } - resetTemps(); - // A new search descriptor. Allocate data structure - sd = std::shared_ptr(new SearchData); - if (!sd) { - LOGERR("SDHXMLHandler::startElement: out of memory\n"); - return false; - } - } - return true; -} - -bool SDHXMLHandler::endElement(const QString & /* namespaceURI */, - const QString & /* localName */, - const QString &qName) -{ - LOGDEB2("SDHXMLHandler::endElement: name [" << qs2utf8s(qName) << "]\n"); - - if (qName == "CLT") { - if (currentText == "OR") { - sd->setTp(SCLT_OR); - } - } else if (qName == "CT") { - whatclause = currentText.trimmed(); - } else if (qName == "NEG") { - exclude = true; - } else if (qName == "F") { - field = base64_decode(qs2utf8s(currentText.trimmed())); - } else if (qName == "T") { - text = base64_decode(qs2utf8s(currentText.trimmed())); - } else if (qName == "T2") { - text2 = base64_decode(qs2utf8s(currentText.trimmed())); - } else if (qName == "S") { - slack = atoi((const char *)currentText.toUtf8()); - } else if (qName == "C") { - SearchDataClause *c; - if (whatclause == "AND" || whatclause.isEmpty()) { - c = new SearchDataClauseSimple(SCLT_AND, text, field); - c->setexclude(exclude); - } else if (whatclause == "OR") { - c = new SearchDataClauseSimple(SCLT_OR, text, field); - c->setexclude(exclude); - } else if (whatclause == "RG") { - c = new SearchDataClauseRange(text, text2, field); - c->setexclude(exclude); - } else if (whatclause == "EX") { - // Compat with old hist. We don't generete EX (SCLT_EXCL) anymore - // it's replaced with OR + exclude flag - c = new SearchDataClauseSimple(SCLT_OR, text, field); - c->setexclude(true); - } else if (whatclause == "FN") { - c = new SearchDataClauseFilename(text); - c->setexclude(exclude); - } else if (whatclause == "PH") { - c = new SearchDataClauseDist(SCLT_PHRASE, text, slack, field); - c->setexclude(exclude); - } else if (whatclause == "NE") { - c = new SearchDataClauseDist(SCLT_NEAR, text, slack, field); - c->setexclude(exclude); - } else { - LOGERR("Bad clause type [" << qs2utf8s(whatclause) << "]\n"); - return false; - } - sd->addClause(c); - whatclause = ""; - text.clear(); - field.clear(); - slack = 0; - exclude = false; - } else if (qName == "D") { - d = atoi((const char *)currentText.toUtf8()); - } else if (qName == "M") { - m = atoi((const char *)currentText.toUtf8()); - } else if (qName == "Y") { - y = atoi((const char *)currentText.toUtf8()); - } else if (qName == "DMI") { - di.d1 = d; - di.m1 = m; - di.y1 = y; - hasdates = true; - } else if (qName == "DMA") { - di.d2 = d; - di.m2 = m; - di.y2 = y; - hasdates = true; - } else if (qName == "MIS") { - sd->setMinSize(atoll((const char *)currentText.toUtf8())); - } else if (qName == "MAS") { - sd->setMaxSize(atoll((const char *)currentText.toUtf8())); - } else if (qName == "ST") { - string types = (const char *)currentText.toUtf8(); - vector vt; - stringToTokens(types, vt); - for (unsigned int i = 0; i < vt.size(); i++) - sd->addFiletype(vt[i]); - } else if (qName == "IT") { - string types(qs2utf8s(currentText)); - vector vt; - stringToTokens(types, vt); - for (unsigned int i = 0; i < vt.size(); i++) - sd->remFiletype(vt[i]); - } else if (qName == "YD") { - string d; - base64_decode(qs2utf8s(currentText.trimmed()), d); - sd->addClause(new SearchDataClausePath(d)); - } else if (qName == "ND") { - string d; - base64_decode(qs2utf8s(currentText.trimmed()), d); - sd->addClause(new SearchDataClausePath(d, true)); - } else if (qName == "SD") { - // Closing current search descriptor. Finishing touches... - if (hasdates) - sd->setDateSpan(&di); - resetTemps(); - isvalid = true; - } - currentText.clear(); - return true; -} - - std::shared_ptr xmlToSearchData(const string& xml, bool verbose) { - SDHXMLHandler handler; - QXmlSimpleReader reader; - reader.setContentHandler(&handler); - reader.setErrorHandler(&handler); - - QXmlInputSource xmlInputSource; - xmlInputSource.setData(QString::fromUtf8(xml.c_str())); - - if (!reader.parse(xmlInputSource) || !handler.isvalid) { + SDHXMLHandler handler(xml); + if (!handler.Parse() || !handler.isvalid) { if (verbose) { LOGERR("xmlToSearchData: parse failed for [" << xml << "]\n"); } @@ -234,117 +215,93 @@ std::shared_ptr xmlToSearchData(const string& xml, // Handler for parsing saved simple search data -class SSHXMLHandler : public QXmlDefaultHandler { +class SSHXMLHandler : public PicoXMLParser { public: - SSHXMLHandler() - : isvalid(false) - { + SSHXMLHandler(const std::string& in) + : PicoXMLParser(in) { + resetTemps(); + } + + void startElement(const std::string &nm, + const std::map& attrs) { + LOGDEB2("SSHXMLHandler::startElement: name [" << nm << "]\n"); + if (nm == "SD") { + // Simple search saved data has a type='ssearch' attribute. + auto attr = attrs.find("type"); + if (attr == attrs.end() || attr->second != "ssearch") { + if (attr == attrs.end()) { + LOGDEB("XMLTOSSS: bad type\n"); + } else { + LOGDEB("XMLTOSSS: bad type: " << attr->second << endl); + } + contentsOk = false; + } resetTemps(); } - bool startElement(const QString & /* namespaceURI */, - const QString & /* localName */, - const QString &qName, - const QXmlAttributes &attributes); - bool endElement(const QString & /* namespaceURI */, - const QString & /* localName */, - const QString &qName); - bool characters(const QString &str) - { - currentText += str; - return true; - } + } + + void endElement(const string& nm) override { + LOGDEB2("SSHXMLHandler::endElement: name [" << nm << "]\n"); + std::string curtxt{currentText}; + trimstring(curtxt, " \t\n\r"); + if (nm == "SL") { + stringToStrings(curtxt, data.stemlangs); + } else if (nm == "T") { + base64_decode(curtxt, data.text); + } else if (nm == "EX") { + data.extindexes.push_back(base64_decode(curtxt)); + } else if (nm == "SM") { + if (curtxt == "QL") { + data.mode = SSearch::SST_LANG; + } else if (curtxt == "FN") { + data.mode = SSearch::SST_FNM; + } else if (curtxt == "OR") { + data.mode = SSearch::SST_ANY; + } else if (curtxt == "AND") { + data.mode = SSearch::SST_ALL; + } else { + LOGERR("BAD SEARCH MODE: [" << curtxt << "]\n"); + contentsOk = false; + return; + } + } else if (nm == "AS") { + stringToStrings(curtxt, data.autosuffs); + } else if (nm == "AP") { + data.autophrase = true; + } else if (nm == "SD") { + // Closing current search descriptor. Finishing touches... + resetTemps(); + isvalid = contentsOk; + } + currentText.clear(); + return ; + } + + void characterData(const std::string &str) { + currentText += str; + } // The object we set up SSearchDef data; - bool isvalid; - + bool isvalid{false}; + bool contentsOk{true}; + private: - void resetTemps() - { - currentText = whatclause = ""; - text.clear(); - } + void resetTemps() { + currentText = whatclause = ""; + text.clear(); + } // Temporary data while parsing. - QString currentText; - QString whatclause; + std::string currentText; + std::string whatclause; string text; }; -bool SSHXMLHandler::startElement(const QString & /* namespaceURI */, - const QString & /* localName */, - const QString &qName, - const QXmlAttributes &attrs) -{ - LOGDEB2("SSHXMLHandler::startElement: name [" << u8s2qs(qName) << "]\n"); - if (qName == "SD") { - // Simple search saved data has a type='ssearch' attribute. - int idx = attrs.index("type"); - if (idx < 0 || attrs.value(idx).compare("ssearch")) { - if (idx < 0) { - LOGDEB("XMLTOSSS: bad type\n"); - } else { - LOGDEB("XMLTOSSS: bad type: " << qs2utf8s(attrs.value(idx)) - << endl); - } - return false; - } - resetTemps(); - } - return true; -} - -bool SSHXMLHandler::endElement(const QString & /* namespaceURI */, - const QString & /* localName */, - const QString &qName) -{ - LOGDEB2("SSHXMLHandler::endElement: name [" << u8s2qs(qName) << "]\n"); - - currentText = currentText.trimmed(); - - if (qName == "SL") { - stringToStrings(qs2utf8s(currentText), data.stemlangs); - } else if (qName == "T") { - base64_decode(qs2utf8s(currentText), data.text); - } else if (qName == "EX") { - data.extindexes.push_back(base64_decode(qs2utf8s(currentText))); - } else if (qName == "SM") { - if (!currentText.compare("QL")) { - data.mode = SSearch::SST_LANG; - } else if (!currentText.compare("FN")) { - data.mode = SSearch::SST_FNM; - } else if (!currentText.compare("OR")) { - data.mode = SSearch::SST_ANY; - } else if (!currentText.compare("AND")) { - data.mode = SSearch::SST_ALL; - } else { - LOGERR("BAD SEARCH MODE: [" << qs2utf8s(currentText) << "]\n"); - return false; - } - } else if (qName == "AS") { - stringToStrings(qs2utf8s(currentText), data.autosuffs); - } else if (qName == "AP") { - data.autophrase = true; - } else if (qName == "SD") { - // Closing current search descriptor. Finishing touches... - resetTemps(); - isvalid = true; - } - currentText.clear(); - return true; -} - bool xmlToSSearch(const string& xml, SSearchDef& data) { - SSHXMLHandler handler; - QXmlSimpleReader reader; - reader.setContentHandler(&handler); - reader.setErrorHandler(&handler); - - QXmlInputSource xmlInputSource; - xmlInputSource.setData(QString::fromUtf8(xml.c_str())); - - if (!reader.parse(xmlInputSource) || !handler.isvalid) { + SSHXMLHandler handler(xml); + if (!handler.Parse() || !handler.isvalid) { LOGERR("xmlToSSearch: parse failed for [" << xml << "]\n"); return false; } diff --git a/src/utils/picoxml.h b/src/utils/picoxml.h index a67453d2..eef7a4bc 100644 --- a/src/utils/picoxml.h +++ b/src/utils/picoxml.h @@ -364,14 +364,17 @@ private: } epos++; skipWS(tag, epos); - if (tag[epos] != '"' || epos == tag.size() - 1) { - m_reason << "Missing dquote or value at cpos " << m_pos+epos; + char qc{0}; + if ((tag[epos] != '"' && tag[epos] != '\'') || + epos == tag.size() - 1) { + m_reason << "Missing quote or value at cpos " << m_pos+epos; return false; } + qc = tag[epos]; spos = epos + 1; - epos = tag.find_first_of(R"(")", spos); + epos = tag.find_first_of(qc, spos); if (epos == std::string::npos) { - m_reason << "Missing closing dquote at cpos " << m_pos+spos; + m_reason << "Missing closing quote at cpos " << m_pos+spos; return false; } attrs[attrnm] = tag.substr(spos, epos - spos);