Also use picoxml for saved searches

This commit is contained in:
Jean-Francois Dockes 2020-07-13 16:53:12 +02:00
parent 97e2b26da0
commit 3cf7fb3b65
2 changed files with 237 additions and 277 deletions

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2005 J.F.Dockes
/* Copyright (C) 2005-2020 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
@ -17,8 +17,6 @@
#include "autoconfig.h"
#include <QtXml/QXmlDefaultHandler>
#include "ssearch_w.h"
#include "guiutils.h"
@ -26,51 +24,175 @@
#include "xmltosd.h"
#include "smallut.h"
#include "recoll.h"
#include "picoxml.h"
using namespace std;
using namespace Rcl;
class SDHXMLHandler : public QXmlDefaultHandler {
class SDHXMLHandler : public PicoXMLParser {
public:
SDHXMLHandler()
: isvalid(false)
{
resetTemps();
SDHXMLHandler(const std::string& in)
: PicoXMLParser(in) {
resetTemps();
}
void startElement(
const std::string& nm,
const std::map<std::string, std::string>& attrs) {
LOGDEB2("SDHXMLHandler::startElement: name [" << nm << "]\n");
if (nm == "SD") {
// Advanced search history entries have no type. So we're good
// either if type is absent, or if it's searchdata
auto attr = attrs.find("type");
if (attr != attrs.end() && attr->second != "searchdata") {
LOGDEB("XMLTOSD: bad type: " << attr->second << endl);
contentsOk = false;
return;
}
resetTemps();
// A new search descriptor. Allocate data structure
sd = std::shared_ptr<SearchData>(new SearchData);
if (!sd) {
LOGERR("SDHXMLHandler::startElement: out of memory\n");
contentsOk = false;
return;
}
}
return;
}
bool startElement(const QString & /* namespaceURI */,
const QString & /* localName */,
const QString &qName,
const QXmlAttributes &attributes);
bool endElement(const QString & /* namespaceURI */,
const QString & /* localName */,
const QString &qName);
bool characters(const QString &str)
{
currentText += str;
return true;
void endElement(const string & nm) {
LOGDEB2("SDHXMLHandler::endElement: name [" << nm << "]\n");
string curtxt{currentText};
trimstring(curtxt, " \t\n\r");
if (nm == "CLT") {
if (curtxt == "OR") {
sd->setTp(SCLT_OR);
}
} else if (nm == "CT") {
whatclause = curtxt;
} else if (nm == "NEG") {
exclude = true;
} else if (nm == "F") {
field = base64_decode(curtxt);
} else if (nm == "T") {
text = base64_decode(curtxt);
} else if (nm == "T2") {
text2 = base64_decode(curtxt);
} else if (nm == "S") {
slack = atoi(curtxt.c_str());
} else if (nm == "C") {
SearchDataClause *c;
if (whatclause == "AND" || whatclause.empty()) {
c = new SearchDataClauseSimple(SCLT_AND, text, field);
c->setexclude(exclude);
} else if (whatclause == "OR") {
c = new SearchDataClauseSimple(SCLT_OR, text, field);
c->setexclude(exclude);
} else if (whatclause == "RG") {
c = new SearchDataClauseRange(text, text2, field);
c->setexclude(exclude);
} else if (whatclause == "EX") {
// Compat with old hist. We don't generete EX
// (SCLT_EXCL) anymore it's replaced with OR + exclude
// flag
c = new SearchDataClauseSimple(SCLT_OR, text, field);
c->setexclude(true);
} else if (whatclause == "FN") {
c = new SearchDataClauseFilename(text);
c->setexclude(exclude);
} else if (whatclause == "PH") {
c = new SearchDataClauseDist(SCLT_PHRASE, text, slack, field);
c->setexclude(exclude);
} else if (whatclause == "NE") {
c = new SearchDataClauseDist(SCLT_NEAR, text, slack, field);
c->setexclude(exclude);
} else {
LOGERR("Bad clause type [" << whatclause << "]\n");
contentsOk = false;
return;
}
sd->addClause(c);
whatclause = "";
text.clear();
field.clear();
slack = 0;
exclude = false;
} else if (nm == "D") {
d = atoi(curtxt.c_str());
} else if (nm == "M") {
m = atoi(curtxt.c_str());
} else if (nm == "Y") {
y = atoi(curtxt.c_str());
} else if (nm == "DMI") {
di.d1 = d;
di.m1 = m;
di.y1 = y;
hasdates = true;
} else if (nm == "DMA") {
di.d2 = d;
di.m2 = m;
di.y2 = y;
hasdates = true;
} else if (nm == "MIS") {
sd->setMinSize(atoll(curtxt.c_str()));
} else if (nm == "MAS") {
sd->setMaxSize(atoll(curtxt.c_str()));
} else if (nm == "ST") {
string types = curtxt.c_str();
vector<string> vt;
stringToTokens(types, vt);
for (unsigned int i = 0; i < vt.size(); i++)
sd->addFiletype(vt[i]);
} else if (nm == "IT") {
vector<string> vt;
stringToTokens(curtxt, vt);
for (unsigned int i = 0; i < vt.size(); i++)
sd->remFiletype(vt[i]);
} else if (nm == "YD") {
string d;
base64_decode(curtxt, d);
sd->addClause(new SearchDataClausePath(d));
} else if (nm == "ND") {
string d;
base64_decode(curtxt, d);
sd->addClause(new SearchDataClausePath(d, true));
} else if (nm == "SD") {
// Closing current search descriptor. Finishing touches...
if (hasdates)
sd->setDateSpan(&di);
resetTemps();
isvalid = contentsOk;
}
currentText.clear();
return;
}
void characterData(const std::string &str) {
currentText += str;
}
// The object we set up
std::shared_ptr<SearchData> sd;
bool isvalid;
bool isvalid{false};
bool contentsOk{true};
private:
void resetTemps()
{
currentText = whatclause = "";
text.clear();
void resetTemps() {
currentText = whatclause = "";
text.clear();
text2.clear();
field.clear();
slack = 0;
d = m = y = di.d1 = di.m1 = di.y1 = di.d2 = di.m2 = di.y2 = 0;
hasdates = false;
exclude = false;
field.clear();
slack = 0;
d = m = y = di.d1 = di.m1 = di.y1 = di.d2 = di.m2 = di.y2 = 0;
hasdates = false;
exclude = false;
}
// Temporary data while parsing.
QString currentText;
QString whatclause;
string field, text, text2;
std::string currentText;
std::string whatclause;
std::string field, text, text2;
int slack;
int d, m, y;
DateInterval di;
@ -78,152 +200,11 @@ private:
bool exclude;
};
bool SDHXMLHandler::startElement(const QString & /* namespaceURI */,
const QString & /* localName */,
const QString &qName,
const QXmlAttributes &attrs)
{
LOGDEB2("SDHXMLHandler::startElement: name [" << qs2utf8s(qName) << "]\n");
if (qName == "SD") {
// Advanced search history entries have no type. So we're good
// either if type is absent, or if it's searchdata
int idx = attrs.index("type");
if (idx >= 0 && attrs.value(idx).compare("searchdata")) {
LOGDEB("XMLTOSD: bad type: " << qs2utf8s(attrs.value(idx)) << endl);
return false;
}
resetTemps();
// A new search descriptor. Allocate data structure
sd = std::shared_ptr<SearchData>(new SearchData);
if (!sd) {
LOGERR("SDHXMLHandler::startElement: out of memory\n");
return false;
}
}
return true;
}
bool SDHXMLHandler::endElement(const QString & /* namespaceURI */,
const QString & /* localName */,
const QString &qName)
{
LOGDEB2("SDHXMLHandler::endElement: name [" << qs2utf8s(qName) << "]\n");
if (qName == "CLT") {
if (currentText == "OR") {
sd->setTp(SCLT_OR);
}
} else if (qName == "CT") {
whatclause = currentText.trimmed();
} else if (qName == "NEG") {
exclude = true;
} else if (qName == "F") {
field = base64_decode(qs2utf8s(currentText.trimmed()));
} else if (qName == "T") {
text = base64_decode(qs2utf8s(currentText.trimmed()));
} else if (qName == "T2") {
text2 = base64_decode(qs2utf8s(currentText.trimmed()));
} else if (qName == "S") {
slack = atoi((const char *)currentText.toUtf8());
} else if (qName == "C") {
SearchDataClause *c;
if (whatclause == "AND" || whatclause.isEmpty()) {
c = new SearchDataClauseSimple(SCLT_AND, text, field);
c->setexclude(exclude);
} else if (whatclause == "OR") {
c = new SearchDataClauseSimple(SCLT_OR, text, field);
c->setexclude(exclude);
} else if (whatclause == "RG") {
c = new SearchDataClauseRange(text, text2, field);
c->setexclude(exclude);
} else if (whatclause == "EX") {
// Compat with old hist. We don't generete EX (SCLT_EXCL) anymore
// it's replaced with OR + exclude flag
c = new SearchDataClauseSimple(SCLT_OR, text, field);
c->setexclude(true);
} else if (whatclause == "FN") {
c = new SearchDataClauseFilename(text);
c->setexclude(exclude);
} else if (whatclause == "PH") {
c = new SearchDataClauseDist(SCLT_PHRASE, text, slack, field);
c->setexclude(exclude);
} else if (whatclause == "NE") {
c = new SearchDataClauseDist(SCLT_NEAR, text, slack, field);
c->setexclude(exclude);
} else {
LOGERR("Bad clause type [" << qs2utf8s(whatclause) << "]\n");
return false;
}
sd->addClause(c);
whatclause = "";
text.clear();
field.clear();
slack = 0;
exclude = false;
} else if (qName == "D") {
d = atoi((const char *)currentText.toUtf8());
} else if (qName == "M") {
m = atoi((const char *)currentText.toUtf8());
} else if (qName == "Y") {
y = atoi((const char *)currentText.toUtf8());
} else if (qName == "DMI") {
di.d1 = d;
di.m1 = m;
di.y1 = y;
hasdates = true;
} else if (qName == "DMA") {
di.d2 = d;
di.m2 = m;
di.y2 = y;
hasdates = true;
} else if (qName == "MIS") {
sd->setMinSize(atoll((const char *)currentText.toUtf8()));
} else if (qName == "MAS") {
sd->setMaxSize(atoll((const char *)currentText.toUtf8()));
} else if (qName == "ST") {
string types = (const char *)currentText.toUtf8();
vector<string> vt;
stringToTokens(types, vt);
for (unsigned int i = 0; i < vt.size(); i++)
sd->addFiletype(vt[i]);
} else if (qName == "IT") {
string types(qs2utf8s(currentText));
vector<string> vt;
stringToTokens(types, vt);
for (unsigned int i = 0; i < vt.size(); i++)
sd->remFiletype(vt[i]);
} else if (qName == "YD") {
string d;
base64_decode(qs2utf8s(currentText.trimmed()), d);
sd->addClause(new SearchDataClausePath(d));
} else if (qName == "ND") {
string d;
base64_decode(qs2utf8s(currentText.trimmed()), d);
sd->addClause(new SearchDataClausePath(d, true));
} else if (qName == "SD") {
// Closing current search descriptor. Finishing touches...
if (hasdates)
sd->setDateSpan(&di);
resetTemps();
isvalid = true;
}
currentText.clear();
return true;
}
std::shared_ptr<Rcl::SearchData> xmlToSearchData(const string& xml,
bool verbose)
{
SDHXMLHandler handler;
QXmlSimpleReader reader;
reader.setContentHandler(&handler);
reader.setErrorHandler(&handler);
QXmlInputSource xmlInputSource;
xmlInputSource.setData(QString::fromUtf8(xml.c_str()));
if (!reader.parse(xmlInputSource) || !handler.isvalid) {
SDHXMLHandler handler(xml);
if (!handler.Parse() || !handler.isvalid) {
if (verbose) {
LOGERR("xmlToSearchData: parse failed for [" << xml << "]\n");
}
@ -234,117 +215,93 @@ std::shared_ptr<Rcl::SearchData> xmlToSearchData(const string& xml,
// Handler for parsing saved simple search data
class SSHXMLHandler : public QXmlDefaultHandler {
class SSHXMLHandler : public PicoXMLParser {
public:
SSHXMLHandler()
: isvalid(false)
{
SSHXMLHandler(const std::string& in)
: PicoXMLParser(in) {
resetTemps();
}
void startElement(const std::string &nm,
const std::map<std::string, std::string>& attrs) {
LOGDEB2("SSHXMLHandler::startElement: name [" << nm << "]\n");
if (nm == "SD") {
// Simple search saved data has a type='ssearch' attribute.
auto attr = attrs.find("type");
if (attr == attrs.end() || attr->second != "ssearch") {
if (attr == attrs.end()) {
LOGDEB("XMLTOSSS: bad type\n");
} else {
LOGDEB("XMLTOSSS: bad type: " << attr->second << endl);
}
contentsOk = false;
}
resetTemps();
}
bool startElement(const QString & /* namespaceURI */,
const QString & /* localName */,
const QString &qName,
const QXmlAttributes &attributes);
bool endElement(const QString & /* namespaceURI */,
const QString & /* localName */,
const QString &qName);
bool characters(const QString &str)
{
currentText += str;
return true;
}
}
void endElement(const string& nm) override {
LOGDEB2("SSHXMLHandler::endElement: name [" << nm << "]\n");
std::string curtxt{currentText};
trimstring(curtxt, " \t\n\r");
if (nm == "SL") {
stringToStrings(curtxt, data.stemlangs);
} else if (nm == "T") {
base64_decode(curtxt, data.text);
} else if (nm == "EX") {
data.extindexes.push_back(base64_decode(curtxt));
} else if (nm == "SM") {
if (curtxt == "QL") {
data.mode = SSearch::SST_LANG;
} else if (curtxt == "FN") {
data.mode = SSearch::SST_FNM;
} else if (curtxt == "OR") {
data.mode = SSearch::SST_ANY;
} else if (curtxt == "AND") {
data.mode = SSearch::SST_ALL;
} else {
LOGERR("BAD SEARCH MODE: [" << curtxt << "]\n");
contentsOk = false;
return;
}
} else if (nm == "AS") {
stringToStrings(curtxt, data.autosuffs);
} else if (nm == "AP") {
data.autophrase = true;
} else if (nm == "SD") {
// Closing current search descriptor. Finishing touches...
resetTemps();
isvalid = contentsOk;
}
currentText.clear();
return ;
}
void characterData(const std::string &str) {
currentText += str;
}
// The object we set up
SSearchDef data;
bool isvalid;
bool isvalid{false};
bool contentsOk{true};
private:
void resetTemps()
{
currentText = whatclause = "";
text.clear();
}
void resetTemps() {
currentText = whatclause = "";
text.clear();
}
// Temporary data while parsing.
QString currentText;
QString whatclause;
std::string currentText;
std::string whatclause;
string text;
};
bool SSHXMLHandler::startElement(const QString & /* namespaceURI */,
const QString & /* localName */,
const QString &qName,
const QXmlAttributes &attrs)
{
LOGDEB2("SSHXMLHandler::startElement: name [" << u8s2qs(qName) << "]\n");
if (qName == "SD") {
// Simple search saved data has a type='ssearch' attribute.
int idx = attrs.index("type");
if (idx < 0 || attrs.value(idx).compare("ssearch")) {
if (idx < 0) {
LOGDEB("XMLTOSSS: bad type\n");
} else {
LOGDEB("XMLTOSSS: bad type: " << qs2utf8s(attrs.value(idx))
<< endl);
}
return false;
}
resetTemps();
}
return true;
}
bool SSHXMLHandler::endElement(const QString & /* namespaceURI */,
const QString & /* localName */,
const QString &qName)
{
LOGDEB2("SSHXMLHandler::endElement: name [" << u8s2qs(qName) << "]\n");
currentText = currentText.trimmed();
if (qName == "SL") {
stringToStrings(qs2utf8s(currentText), data.stemlangs);
} else if (qName == "T") {
base64_decode(qs2utf8s(currentText), data.text);
} else if (qName == "EX") {
data.extindexes.push_back(base64_decode(qs2utf8s(currentText)));
} else if (qName == "SM") {
if (!currentText.compare("QL")) {
data.mode = SSearch::SST_LANG;
} else if (!currentText.compare("FN")) {
data.mode = SSearch::SST_FNM;
} else if (!currentText.compare("OR")) {
data.mode = SSearch::SST_ANY;
} else if (!currentText.compare("AND")) {
data.mode = SSearch::SST_ALL;
} else {
LOGERR("BAD SEARCH MODE: [" << qs2utf8s(currentText) << "]\n");
return false;
}
} else if (qName == "AS") {
stringToStrings(qs2utf8s(currentText), data.autosuffs);
} else if (qName == "AP") {
data.autophrase = true;
} else if (qName == "SD") {
// Closing current search descriptor. Finishing touches...
resetTemps();
isvalid = true;
}
currentText.clear();
return true;
}
bool xmlToSSearch(const string& xml, SSearchDef& data)
{
SSHXMLHandler handler;
QXmlSimpleReader reader;
reader.setContentHandler(&handler);
reader.setErrorHandler(&handler);
QXmlInputSource xmlInputSource;
xmlInputSource.setData(QString::fromUtf8(xml.c_str()));
if (!reader.parse(xmlInputSource) || !handler.isvalid) {
SSHXMLHandler handler(xml);
if (!handler.Parse() || !handler.isvalid) {
LOGERR("xmlToSSearch: parse failed for [" << xml << "]\n");
return false;
}

View File

@ -364,14 +364,17 @@ private:
}
epos++;
skipWS(tag, epos);
if (tag[epos] != '"' || epos == tag.size() - 1) {
m_reason << "Missing dquote or value at cpos " << m_pos+epos;
char qc{0};
if ((tag[epos] != '"' && tag[epos] != '\'') ||
epos == tag.size() - 1) {
m_reason << "Missing quote or value at cpos " << m_pos+epos;
return false;
}
qc = tag[epos];
spos = epos + 1;
epos = tag.find_first_of(R"(")", spos);
epos = tag.find_first_of(qc, spos);
if (epos == std::string::npos) {
m_reason << "Missing closing dquote at cpos " << m_pos+spos;
m_reason << "Missing closing quote at cpos " << m_pos+spos;
return false;
}
attrs[attrnm] = tag.substr(spos, epos - spos);