diff --git a/src/qtgui/fragbuts.cpp b/src/qtgui/fragbuts.cpp index 8bd7b2b7..01f83670 100644 --- a/src/qtgui/fragbuts.cpp +++ b/src/qtgui/fragbuts.cpp @@ -37,42 +37,75 @@ #include "log.h" #include "readfile.h" #include "copyfile.h" +#include "picoxml.h" using namespace std; -class FragButsParser : public QXmlDefaultHandler { +class FragButsParser : public PicoXMLParser { public: - FragButsParser(FragButs *_parent, vector& _buttons) - : parent(_parent), vlw(new QVBoxLayout(parent)), - vl(new QVBoxLayout()), buttons(_buttons), - hl(0), bg(0), radio(false) { - } + FragButsParser( + const std::string& in, FragButs *_p, vector& _bts) + : PicoXMLParser(in), parent(_p), vlw(new QVBoxLayout(parent)), + vl(new QVBoxLayout()), buttons(_bts) {} - bool startElement(const QString & /* namespaceURI */, - const QString & /* localName */, - const QString &qName, - const QXmlAttributes &attributes); - bool endElement(const QString & /* namespaceURI */, - const QString & /* localName */, - const QString &qName); - bool characters(const QString &str) { - currentText += str; - return true; + void startElement(const std::string &nm, + const std::map&) override { + std::cerr << "startElement [" << nm << "]\n"; + currentText.clear(); + if (nm == "buttons") { + radio = false; + hl = new QHBoxLayout(); + } else if (nm == "radiobuttons") { + radio = true; + bg = new QButtonGroup(parent); + hl = new QHBoxLayout(); + } else if (nm == "label" || nm == "frag" || nm == "fragbuts" || + nm == "fragbut") { + } else { + QMessageBox::warning( + 0, "Recoll", QString("Bad element name: [%1]").arg(nm.c_str())); } + } + void endElement(const std::string& nm) override { + std::cerr << "endElement [" << nm << "]\n"; - bool error(const QXmlParseException& exception) { - fatalError(exception); - return false; + if (nm == "label") { + label = u8s2qs(currentText); + } else if (nm == "frag") { + frag = currentText; + } else if (nm == "fragbut") { + string slab = qs2utf8s(label); + trimstring(slab, " \t\n\t"); + label = u8s2qs(slab.c_str()); + QAbstractButton *abut; + if (radio) { + QRadioButton *but = new QRadioButton(label, parent); + bg->addButton(but); + if (bg->buttons().length() == 1) + but->setChecked(true); + abut = but; + } else { + QCheckBox *but = new QCheckBox(label, parent); + abut = but; + } + abut->setToolTip(u8s2qs(currentText)); + buttons.push_back(FragButs::ButFrag(abut, frag)); + hl->addWidget(abut); + } else if (nm == "buttons" || nm == "radiobuttons") { + vl->addLayout(hl); + hl = 0; + } else if (nm == "fragbuts") { + vlw->addLayout(vl); + } else { + QMessageBox::warning( + 0, "Recoll", QString("Bad element name: [%1]").arg(nm.c_str())); + } } - bool fatalError(const QXmlParseException& x) { - errorMessage = QString("%2 at line %3 column %4") - .arg(x.message()) - .arg(x.lineNumber()) - .arg(x.columnNumber()); - return false; + void characterData(const std::string &str) override { + std::cerr << "characterData [" << str << "]\n"; + currentText += str; } - QString errorMessage; private: QWidget *parent; QVBoxLayout *vlw; @@ -80,75 +113,14 @@ private: vector& buttons; // Temporary data while parsing. - QHBoxLayout *hl; - QButtonGroup *bg; - QString currentText; + QHBoxLayout *hl{nullptr}; + QButtonGroup *bg{nullptr}; QString label; - string frag; - bool radio; + std::string currentText; + std::string frag; + bool radio{false}; }; -bool FragButsParser::startElement(const QString & /* namespaceURI */, - const QString & /* localName */, - const QString &qName, - const QXmlAttributes &/*attributes*/) -{ - currentText = ""; - if (qName == "buttons") { - radio = false; - hl = new QHBoxLayout(); - } else if (qName == "radiobuttons") { - radio = true; - bg = new QButtonGroup(parent); - hl = new QHBoxLayout(); - } else if (qName == "label" || qName == "frag" || qName == "fragbuts" || - qName == "fragbut") { - } else { - QMessageBox::warning(0, "Recoll", - QString("Bad element name: [%1]").arg(qName)); - return false; - } - return true; -} - -bool FragButsParser::endElement(const QString & /* namespaceURI */, - const QString & /* localName */, - const QString &qName) -{ - if (qName == "label") { - label = currentText; - } else if (qName == "frag") { - frag = qs2utf8s(currentText); - } else if (qName == "fragbut") { - string slab = qs2utf8s(label); - trimstring(slab, " \t\n\t"); - label = QString::fromUtf8(slab.c_str()); - QAbstractButton *abut; - if (radio) { - QRadioButton *but = new QRadioButton(label, parent); - bg->addButton(but); - if (bg->buttons().length() == 1) - but->setChecked(true); - abut = but; - } else { - QCheckBox *but = new QCheckBox(label, parent); - abut = but; - } - abut->setToolTip(currentText); - buttons.push_back(FragButs::ButFrag(abut, frag)); - hl->addWidget(abut); - } else if (qName == "buttons" || qName == "radiobuttons") { - vl->addLayout(hl); - hl = 0; - } else if (qName == "fragbuts") { - vlw->addLayout(vl); - } else { - QMessageBox::warning(0, "Recoll", - QString("Bad element name: [%1]").arg(qName)); - return false; - } - return true; -} FragButs::FragButs(QWidget* parent) : QWidget(parent), m_reftime(0), m_ok(false) @@ -168,15 +140,11 @@ FragButs::FragButs(QWidget* parent) LOGERR("Fragbuts:: can't read [" << m_fn << "]\n"); return; } - FragButsParser parser(this, m_buttons); - QXmlSimpleReader reader; - reader.setContentHandler(&parser); - reader.setErrorHandler(&parser); - QXmlInputSource xmlInputSource; - xmlInputSource.setData(QString::fromUtf8(data.c_str())); - if (!reader.parse(xmlInputSource)) { - QMessageBox::warning(0, "Recoll", tr("%1:\n %2") - .arg(path2qs(m_fn)).arg(parser.errorMessage)); + FragButsParser parser(data, this, m_buttons); + if (!parser.Parse()) { + QMessageBox::warning( + 0, "Recoll", tr("%1:\n %2").arg(path2qs(m_fn)) + .arg(u8s2qs(parser.getReason()))); return; } for (vector::iterator it = m_buttons.begin(); diff --git a/src/utils/picoxml.h b/src/utils/picoxml.h new file mode 100644 index 00000000..a67453d2 --- /dev/null +++ b/src/utils/picoxml.h @@ -0,0 +1,439 @@ +/* Copyright (C) 2016 J.F.Dockes + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * (1) Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * (2) Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * (3)The name of the author may not be used to + * endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. +**********************************************************/ + +#ifndef _PICOXML_H_INCLUDED_ +#define _PICOXML_H_INCLUDED_ + +/** + * PicoXMLParser: a single include file parser for an XML-like, but + * restricted language, adequate for config files, not for arbitrary + * externally generated data. + * + * - The code depends on nothing but the C++ standard library + * - The input to the parser is a single c++ string. Does not deal with + * input in several pieces or files. + * - SAX mode only. You have access to the tag stack. I've always + * found DOM mode less usable. + * - Checks for proper tag nesting and not much else. + * - ! No CDATA + * - ! Attributes should really really not contain XML special chars. + * + * A typical input would be like the following (you can add XML + * declarations, whitespace and newlines to taste). + * + * top chrs1sub chrstop chrs2 + * + * Usage: subclass PicoXMLParser, overriding the methods in the + * "protected:" section (look there for more details), call the + * constructor with your input, then call parse(). + */ + +#include +#include +#include +#include +#include +#include + +// Expat compat +typedef char XML_Char; + +class PicoXMLParser { +public: + PicoXMLParser(const std::string& input) + : m_in(input), m_pos(0) {} + + virtual ~PicoXMLParser() {} + + virtual bool parse() { + return _parse(); + } + virtual bool Parse() { + return _parse(); + } + + virtual std::string getReason() { + return m_reason.str(); + } + +protected: + + /* Methods to be overriden */ + + /** + * Tag open handler. + * @param tagname the tag name + * @param attrs a map of attribute name/value pairs + */ + virtual void startElement( + const std::string& /* nm */, + const std::map& /* attrs */) {} + /** Expatmm compat. We don't support attributes with this at the moment */ + virtual void StartElement(const XML_Char *, const XML_Char **) {} + + /** + * Tag close handler. + * You should probably have been accumulating text and stuff since + * the tag opening. + * @param tagname the tag name. + */ + virtual void endElement(const std::string& /* nm */) {} + /** Expatmm compat */ + virtual void EndElement(const XML_Char */* nm */) {} + + /** + * Non-tag data handler. + * @param data the data. + */ + virtual void characterData(const std::string& /*data*/) {} + /** Expatmm compat */ + virtual void CharacterData(const XML_Char *, int) {} + + /** + * Return current tag name stack. Deprecated, use m_path. + * This does not include the current (bottom) tag. + * Attributes are not kept in there, you'll have to do this yourself. + * @return a const ref to a vector of tag names. + */ + virtual const std::vector& tagStack() { + return m_tagstack; + } + + /** + * Current element stack, including the bottom one + * Each entry includes the attributes and the starting character offset. + * The stack includes the last element (the one open is called for). + */ + class StackEl { + public: + StackEl(const std::string& nm) : name(nm) {} + std::string name; + std::string::size_type start_index; + std::map attributes; + std::string data; // Derived class usage + }; + std::vector m_path; + +private: + const std::string& m_in; + std::string::size_type m_pos{0}; + std::stringstream m_reason; + std::vector m_tagstack; + + void _startelem(const std::string& tagname, + const std::map& attrs, bool empty) + { + m_path.push_back(StackEl(tagname)); + StackEl& lastelt = m_path.back(); + lastelt.start_index = m_pos; + lastelt.attributes = attrs; + + startElement(tagname, attrs); + StartElement(tagname.c_str(), nullptr); + + m_tagstack.push_back(tagname); // Compat + if (empty) { + _endelem(tagname); + } + } + + void _endelem(const std::string& tagname) + { + m_tagstack.pop_back(); + endElement(tagname); + EndElement(tagname.c_str()); + m_path.pop_back(); + } + + bool _parse() { + // skip initial whitespace and XML decl. On success, returns with + // current pos on first tag '<' + if (!skipDecl()) { + return false; + } + if (nomore()) { + // empty file + return true; + } + + for (;;) { + // Current char is '<' and the next char is not '?' + //std::cerr<< "m_pos "<< m_pos<<" char "<< m_in[m_pos]<"); + if (m_pos == std::string::npos || m_pos <= spos + 1) { + m_reason << "Empty tag or EOF inside tag. pos " << spos; + return false; + } + + int emptyel = m_in[m_pos-2] == '/' ? 1 : 0; + if (emptyel && isendtag) { + m_reason << "Bad tag at cpos " << spos; + return false; + } + + std::string tag = + m_in.substr(spos + isendtag, + m_pos - (spos + 1 + isendtag + emptyel)); + //std::cerr << "TAG NAME [" << tag << "]\n"; + trimtag(tag); + std::map attrs; + if (!parseattrs(tag, attrs)) { + return false; + } + if (isendtag) { + if (m_tagstack.empty() || tag.compare(m_tagstack.back())) { + m_reason << "Closing not open tag " << tag << + " at cpos " << m_pos; + return false; + } + _endelem(tag); + } else { + _startelem(tag, attrs, emptyel); + } + spos = m_pos; + if (!_chardata()) { + return false; + } + } + return false; + } + + bool _chardata() { + std::string::size_type spos = m_pos; + m_pos = m_in.find("<", m_pos); + if (nomore()) { + return true; + } + if (m_pos != spos) { + std::string data{unQuote(m_in.substr(spos, m_pos - spos))}; + characterData(data); + CharacterData(data.c_str(), data.size()); + } + return true; + } + + bool nomore(int sz = 0) const { + return m_pos == std::string::npos || m_pos >= m_in.size() - sz; + } + bool skipWS(const std::string& in, std::string::size_type& pos) { + if (pos == std::string::npos) + return false; + pos = in.find_first_not_of(" \t\n\r", pos); + return pos != std::string::npos; + } + bool skipStr(const std::string& str) { + if (m_pos == std::string::npos) + return false; + m_pos = m_in.find(str, m_pos); + if (m_pos != std::string::npos) + m_pos += str.size(); + return m_pos != std::string::npos; + } + int peek(int sz = 0) const { + if (nomore(sz)) + return -1; + return m_in[m_pos + 1 + sz]; + } + void trimtag(std::string& tagname) { + std::string::size_type trimpos = tagname.find_last_not_of(" \t\n\r"); + if (trimpos != std::string::npos) { + tagname = tagname.substr(0, trimpos+1); + } + } + + bool skipDecl() { + for (;;) { + if (!skipWS(m_in, m_pos)) { + m_reason << "EOF during initial ws skip"; + return true; + } + if (m_in[m_pos] != '<') { + m_reason << "EOF file does not begin with decl/tag: m_pos " << + m_pos << " char [" << m_in[m_pos] << "]\n"; + return false; + } + if (peek() == '?') { + if (!skipStr("?>")) { + m_reason << "EOF while looking for end of xml decl"; + return false; + } + } else { + break; + } + } + return true; + } + + bool skipComment() { + if (nomore()) { + return true; + } + if (m_in[m_pos] != '<') { + m_reason << "Internal error: skipComment called with wrong " + "start: m_pos " << + m_pos << " char [" << m_in[m_pos] << "]\n"; + return false; + } + if (peek() == '!' && peek(1) == '-' && peek(2) == '-') { + if (!skipStr("-->")) { + m_reason << "EOF while looking for end of XML comment"; + return false; + } + // Process possible characters until next tag + return _chardata(); + } + return true; + } + + bool parseattrs(std::string& tag, + std::map& attrs) { + //std::cerr << "parseattrs: [" << tag << "]\n"; + attrs.clear(); + std::string::size_type spos = tag.find_first_of(" \t\n\r"); + if (spos == std::string::npos) + return true; + std::string tagname = tag.substr(0, spos); + //std::cerr << "tag name [" << tagname << "] pos " << spos << "\n"; + skipWS(tag, spos); + + for (;;) { + //std::cerr << "top of loop [" << tag.substr(spos) << "]\n"; + std::string::size_type epos = tag.find_first_of(" \t\n\r=", spos); + if (epos == std::string::npos) { + m_reason << "Bad attributes syntax at cpos " << m_pos + epos; + return false; + } + std::string attrnm = tag.substr(spos, epos - spos); + if (attrnm.empty()) { + m_reason << "Empty attribute name ?? at cpos " << m_pos + epos; + return false; + } + //std::cerr << "attr name [" << attrnm << "]\n"; + skipWS(tag, epos); + if (epos == std::string::npos || epos == tag.size() - 1 || + tag[epos] != '=') { + m_reason <<"Missing equal sign or value at cpos " << m_pos+epos; + return false; + } + epos++; + skipWS(tag, epos); + if (tag[epos] != '"' || epos == tag.size() - 1) { + m_reason << "Missing dquote or value at cpos " << m_pos+epos; + return false; + } + spos = epos + 1; + epos = tag.find_first_of(R"(")", spos); + if (epos == std::string::npos) { + m_reason << "Missing closing dquote at cpos " << m_pos+spos; + return false; + } + attrs[attrnm] = tag.substr(spos, epos - spos); + //std::cerr << "attr value [" << attrs[attrnm] << "]\n"; + if (epos == tag.size() - 1) { + break; + } + epos++; + skipWS(tag, epos); + if (epos == tag.size() - 1) { + break; + } + spos = epos; + } + tag = tagname; + return true; + } + + std::string unQuote(const std::string &s) { + static const std::string e_quot{"quot"}; + static const std::string e_amp{"amp"}; + static const std::string e_apos{"apos"}; + static const std::string e_lt{"lt"}; + static const std::string e_gt{"gt"}; + + std::string out; + out.reserve(s.size()); + std::string::const_iterator it = s.begin(); + while (it != s.end()) { + if (*it != '&') { + out += *it; + it++; + continue; + } + if (it == s.end()) { + // Unexpected + break; + } + it++; + std::string code; + while (it != s.end() && *it != ';') { + code += *it; + it++; + } + if (it == s.end()) { + // Unexpected + break; + } + it++; + if (code == e_quot) { + out += '"'; + } else if (code == e_amp) { + out += '&'; + } else if (code == e_apos) { + out += '\''; + } else if (code == e_lt) { + out += '<'; + } else if (code == e_gt) { + out += '>'; + } + } + return out; + } +}; +#endif /* _PICOXML_H_INCLUDED_ */