Merge branch 'kopostag'
This commit is contained in:
commit
9b3a5fac12
@ -82,6 +82,7 @@ common/rclinit.h \
|
|||||||
common/syngroups.cpp \
|
common/syngroups.cpp \
|
||||||
common/syngroups.h \
|
common/syngroups.h \
|
||||||
common/textsplit.cpp \
|
common/textsplit.cpp \
|
||||||
|
common/textsplitko.cpp \
|
||||||
common/textsplit.h \
|
common/textsplit.h \
|
||||||
common/unacpp.cpp \
|
common/unacpp.cpp \
|
||||||
common/unacpp.h \
|
common/unacpp.h \
|
||||||
@ -210,6 +211,8 @@ utils/circache.cpp \
|
|||||||
utils/circache.h \
|
utils/circache.h \
|
||||||
utils/closefrom.cpp \
|
utils/closefrom.cpp \
|
||||||
utils/closefrom.h \
|
utils/closefrom.h \
|
||||||
|
utils/cmdtalk.cpp \
|
||||||
|
utils/cmdtalk.h \
|
||||||
utils/conftree.cpp \
|
utils/conftree.cpp \
|
||||||
utils/conftree.h \
|
utils/conftree.h \
|
||||||
utils/copyfile.cpp \
|
utils/copyfile.cpp \
|
||||||
@ -645,8 +648,10 @@ filterdir = $(pkgdatadir)/filters
|
|||||||
dist_filter_DATA = \
|
dist_filter_DATA = \
|
||||||
desktop/hotrecoll.py \
|
desktop/hotrecoll.py \
|
||||||
filters/abiword.xsl \
|
filters/abiword.xsl \
|
||||||
|
filters/cmdtalk.py \
|
||||||
filters/fb2.xsl \
|
filters/fb2.xsl \
|
||||||
filters/gnumeric.xsl \
|
filters/gnumeric.xsl \
|
||||||
|
filters/kosplitter.py \
|
||||||
filters/msodump.zip \
|
filters/msodump.zip \
|
||||||
filters/okular-note.xsl \
|
filters/okular-note.xsl \
|
||||||
filters/opendoc-body.xsl \
|
filters/opendoc-body.xsl \
|
||||||
@ -724,7 +729,7 @@ python/recoll/recoll/rclconfig.py
|
|||||||
install-data-hook:
|
install-data-hook:
|
||||||
(cd $(DESTDIR)/$(filterdir); \
|
(cd $(DESTDIR)/$(filterdir); \
|
||||||
chmod a+x rcl* ppt-dump.py xls-dump.py xlsxmltocsv.py hotrecoll.py; \
|
chmod a+x rcl* ppt-dump.py xls-dump.py xlsxmltocsv.py hotrecoll.py; \
|
||||||
chmod a+x recoll-we-move-files.py ../examples/rclmon.sh; \
|
chmod a+x recoll-we-move-files.py ../examples/rclmon.sh kosplitter.py; \
|
||||||
chmod 0644 msodump.zip recollepub.zip rclexecm.py rcllatinstops.zip \
|
chmod 0644 msodump.zip recollepub.zip rclexecm.py rcllatinstops.zip \
|
||||||
rclconfig.py conftree.py rclmidi.py rclexec1.py rcluncomp.py rclxslt.py)
|
rclconfig.py conftree.py rclmidi.py rclexec1.py rcluncomp.py rclxslt.py)
|
||||||
|
|
||||||
|
|||||||
@ -44,8 +44,10 @@
|
|||||||
// ngrams
|
// ngrams
|
||||||
#undef KATAKANA_AS_WORDS
|
#undef KATAKANA_AS_WORDS
|
||||||
|
|
||||||
// Same for Korean syllabic, and same problem, not used.
|
// Same for Korean syllabic, and same problem. However we have a
|
||||||
#undef HANGUL_AS_WORDS
|
// runtime option to use an external text analyser for hangul, so this
|
||||||
|
// is defined at compile time.
|
||||||
|
#define HANGUL_AS_WORDS
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
@ -289,6 +291,7 @@ bool TextSplit::o_noNumbers{false};
|
|||||||
bool TextSplit::o_deHyphenate{false};
|
bool TextSplit::o_deHyphenate{false};
|
||||||
int TextSplit::o_maxWordLength{40};
|
int TextSplit::o_maxWordLength{40};
|
||||||
static const int o_CJKMaxNgramLen{5};
|
static const int o_CJKMaxNgramLen{5};
|
||||||
|
bool o_exthangultagger{false};
|
||||||
|
|
||||||
void TextSplit::staticConfInit(RclConfig *config)
|
void TextSplit::staticConfInit(RclConfig *config)
|
||||||
{
|
{
|
||||||
@ -323,7 +326,14 @@ void TextSplit::staticConfInit(RclConfig *config)
|
|||||||
charclasses[int('\\')] = SPACE;
|
charclasses[int('\\')] = SPACE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
string kotagger;
|
||||||
|
config->getConfParam("hangultagger", kotagger);
|
||||||
|
if (!kotagger.empty()) {
|
||||||
|
o_exthangultagger = true;
|
||||||
|
koStaticConfInit(config, kotagger);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Final term checkpoint: do some checking (the kind which is simpler
|
// Final term checkpoint: do some checking (the kind which is simpler
|
||||||
// to do here than in the main loop), then send term to our client.
|
// to do here than in the main loop), then send term to our client.
|
||||||
@ -612,7 +622,7 @@ bool TextSplit::text_to_words(const string &in)
|
|||||||
#if defined(KATAKANA_AS_WORDS) || defined(HANGUL_AS_WORDS)
|
#if defined(KATAKANA_AS_WORDS) || defined(HANGUL_AS_WORDS)
|
||||||
int prev_csc = -1;
|
int prev_csc = -1;
|
||||||
#endif
|
#endif
|
||||||
for (; !it.eof(); it++) {
|
for (; !it.eof() && !it.error(); it++) {
|
||||||
unsigned int c = *it;
|
unsigned int c = *it;
|
||||||
nonalnumcnt++;
|
nonalnumcnt++;
|
||||||
|
|
||||||
@ -625,30 +635,40 @@ bool TextSplit::text_to_words(const string &in)
|
|||||||
if (UNICODE_IS_KATAKANA(c)) {
|
if (UNICODE_IS_KATAKANA(c)) {
|
||||||
csc = CSC_KATAKANA;
|
csc = CSC_KATAKANA;
|
||||||
} else if (UNICODE_IS_HANGUL(c)) {
|
} else if (UNICODE_IS_HANGUL(c)) {
|
||||||
csc = CSC_HANGUL;
|
if (o_exthangultagger) {
|
||||||
|
csc = CSC_HANGUL;
|
||||||
|
} else {
|
||||||
|
csc = CSC_CJK;
|
||||||
|
}
|
||||||
} else if (UNICODE_IS_CJK(c)) {
|
} else if (UNICODE_IS_CJK(c)) {
|
||||||
csc = CSC_CJK;
|
csc = CSC_CJK;
|
||||||
} else {
|
} else {
|
||||||
csc = CSC_OTHER;
|
csc = CSC_OTHER;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (o_processCJK && csc == CSC_CJK) {
|
if (o_processCJK && (csc == CSC_CJK || csc == CSC_HANGUL)) {
|
||||||
// CJK character hit.
|
// CJK character hit. Hangul processing may be special.
|
||||||
|
|
||||||
// Do like at EOF with the current non-cjk data.
|
// Do like at EOF with the current non-cjk data.
|
||||||
if (m_wordLen || m_span.length()) {
|
if (m_wordLen || m_span.length()) {
|
||||||
if (!doemit(true, it.getBpos()))
|
if (!doemit(true, it.getBpos()))
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
// Hand off situation to the appropriate routine.
|
||||||
// Hand off situation to the cjk routine.
|
if (csc == CSC_HANGUL) {
|
||||||
if (!cjk_to_words(&it, &c)) {
|
if (!ko_to_words(&it, &c)) {
|
||||||
LOGERR("Textsplit: scan error in cjk handler\n");
|
LOGERR("Textsplit: scan error in korean handler\n");
|
||||||
return false;
|
return false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (!cjk_to_words(&it, &c)) {
|
||||||
|
LOGERR("Textsplit: scan error in cjk handler\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check for eof, else c contains the first non-cjk
|
// Check for eof, else c contains the first non-cjk
|
||||||
// character after the cjk sequence, just go on.
|
// character after the cjk sequence, just go on.
|
||||||
if (it.eof())
|
if (it.eof() || it.error())
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -976,7 +996,7 @@ bool TextSplit::cjk_to_words(Utf8Iter *itp, unsigned int *cp)
|
|||||||
// Current number of valid offsets;
|
// Current number of valid offsets;
|
||||||
unsigned int nchars = 0;
|
unsigned int nchars = 0;
|
||||||
unsigned int c = 0;
|
unsigned int c = 0;
|
||||||
for (; !it.eof(); it++) {
|
for (; !it.eof() && !it.error(); it++) {
|
||||||
c = *it;
|
c = *it;
|
||||||
if (c == ' ' || c == '\t' || c == '\n') {
|
if (c == ' ' || c == '\t' || c == '\n') {
|
||||||
continue;
|
continue;
|
||||||
@ -1077,7 +1097,7 @@ int TextSplit::countWords(const string& s, TextSplit::Flags flgs)
|
|||||||
bool TextSplit::hasVisibleWhite(const string &in)
|
bool TextSplit::hasVisibleWhite(const string &in)
|
||||||
{
|
{
|
||||||
Utf8Iter it(in);
|
Utf8Iter it(in);
|
||||||
for (; !it.eof(); it++) {
|
for (; !it.eof() && !it.error(); it++) {
|
||||||
unsigned int c = (unsigned char)*it;
|
unsigned int c = (unsigned char)*it;
|
||||||
if (c == (unsigned int)-1) {
|
if (c == (unsigned int)-1) {
|
||||||
LOGERR("hasVisibleWhite: error while scanning UTF-8 string\n");
|
LOGERR("hasVisibleWhite: error while scanning UTF-8 string\n");
|
||||||
@ -1097,7 +1117,7 @@ template <class T> bool u8stringToStrings(const string &s, T &tokens)
|
|||||||
tokens.clear();
|
tokens.clear();
|
||||||
enum states {SPACE, TOKEN, INQUOTE, ESCAPE};
|
enum states {SPACE, TOKEN, INQUOTE, ESCAPE};
|
||||||
states state = SPACE;
|
states state = SPACE;
|
||||||
for (; !it.eof(); it++) {
|
for (; !it.eof() && !it.error(); it++) {
|
||||||
unsigned int c = *it;
|
unsigned int c = *it;
|
||||||
if (visiblewhite.find(c) != visiblewhite.end())
|
if (visiblewhite.find(c) != visiblewhite.end())
|
||||||
c = ' ';
|
c = ' ';
|
||||||
|
|||||||
@ -54,6 +54,7 @@ public:
|
|||||||
/** Call at program initialization to read non default values from the
|
/** Call at program initialization to read non default values from the
|
||||||
configuration */
|
configuration */
|
||||||
static void staticConfInit(RclConfig *config);
|
static void staticConfInit(RclConfig *config);
|
||||||
|
static void koStaticConfInit(RclConfig *config, const std::string& tagger);
|
||||||
|
|
||||||
/** Split text, emit words and positions. */
|
/** Split text, emit words and positions. */
|
||||||
virtual bool text_to_words(const std::string &in);
|
virtual bool text_to_words(const std::string &in);
|
||||||
@ -199,6 +200,9 @@ private:
|
|||||||
// This processes cjk text:
|
// This processes cjk text:
|
||||||
bool cjk_to_words(Utf8Iter *it, unsigned int *cp);
|
bool cjk_to_words(Utf8Iter *it, unsigned int *cp);
|
||||||
|
|
||||||
|
// Experimental Korean splitter. This uses an external Python tokenizer
|
||||||
|
bool ko_to_words(Utf8Iter *it, unsigned int *cp);
|
||||||
|
|
||||||
bool emitterm(bool isspan, std::string &term, int pos, size_t bs,size_t be);
|
bool emitterm(bool isspan, std::string &term, int pos, size_t bs,size_t be);
|
||||||
bool doemit(bool spanerase, size_t bp);
|
bool doemit(bool spanerase, size_t bp);
|
||||||
void discardspan();
|
void discardspan();
|
||||||
|
|||||||
214
src/common/textsplitko.cpp
Normal file
214
src/common/textsplitko.cpp
Normal file
@ -0,0 +1,214 @@
|
|||||||
|
/* Copyright (C) 2020 J.F.Dockes
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc.,
|
||||||
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// Specialized Korean text splitter using konlpy running in a Python
|
||||||
|
// subprocess. konlpy can use several different backends. We support
|
||||||
|
// Okt (Twitter) and Mecab at this point. Unfortunately the different
|
||||||
|
// backends have different POS TAG names, so that things are not
|
||||||
|
// completly transparent when using another (need to translate the tag
|
||||||
|
// names in the Python program).
|
||||||
|
|
||||||
|
#include "autoconfig.h"
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
#include <cstring>
|
||||||
|
#include <unordered_set>
|
||||||
|
#include <mutex>
|
||||||
|
|
||||||
|
#include "textsplit.h"
|
||||||
|
#include "log.h"
|
||||||
|
//#define UTF8ITER_CHECK
|
||||||
|
#include "utf8iter.h"
|
||||||
|
#include "smallut.h"
|
||||||
|
#include "rclconfig.h"
|
||||||
|
#include "cmdtalk.h"
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
// Separator char used in words and tags lists.
|
||||||
|
static const string sepchars("\t");
|
||||||
|
|
||||||
|
static CmdTalk *o_talker;
|
||||||
|
static bool o_starterror{false};
|
||||||
|
static string o_cmdpath;
|
||||||
|
std::mutex o_mutex;
|
||||||
|
static string o_taggername{"Okt"};
|
||||||
|
|
||||||
|
// The Python/Java splitter is leaking memory. We restart it from time to time
|
||||||
|
static uint64_t restartcount;
|
||||||
|
static uint64_t restartthreshold = 5 * 1000 * 1000;
|
||||||
|
|
||||||
|
void TextSplit::koStaticConfInit(RclConfig *config, const string& tagger)
|
||||||
|
{
|
||||||
|
o_cmdpath = config->findFilter("kosplitter.py");
|
||||||
|
if (tagger == "Okt" || tagger == "Mecab" || tagger == "Komoran") {
|
||||||
|
o_taggername = tagger;
|
||||||
|
} else {
|
||||||
|
LOGERR("TextSplit::koStaticConfInit: unknown tagger [" << tagger <<
|
||||||
|
"], using Okt\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start the Python subprocess
|
||||||
|
static bool initCmd()
|
||||||
|
{
|
||||||
|
if (o_starterror) {
|
||||||
|
// No use retrying
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (o_talker) {
|
||||||
|
if (restartcount > restartthreshold) {
|
||||||
|
delete o_talker;
|
||||||
|
o_talker = nullptr;
|
||||||
|
restartcount = 0;
|
||||||
|
} else {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (o_cmdpath.empty()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (nullptr == (o_talker = new CmdTalk(300))) {
|
||||||
|
o_starterror = true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!o_talker->startCmd(o_cmdpath)) {
|
||||||
|
delete o_talker;
|
||||||
|
o_talker = nullptr;
|
||||||
|
o_starterror = true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool TextSplit::ko_to_words(Utf8Iter *itp, unsigned int *cp)
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> mylock(o_mutex);
|
||||||
|
initCmd();
|
||||||
|
if (nullptr == o_talker) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
LOGDEB1("k_to_words: m_wordpos " << m_wordpos << "\n");
|
||||||
|
Utf8Iter &it = *itp;
|
||||||
|
unsigned int c = 0;
|
||||||
|
|
||||||
|
unordered_map<string, string> args;
|
||||||
|
|
||||||
|
args.insert(pair<string,string>{"data", string()});
|
||||||
|
string& inputdata{args.begin()->second};
|
||||||
|
|
||||||
|
// We send the tagger name every time but it's only used the first
|
||||||
|
// one: can't change it after init. We could avoid sending it
|
||||||
|
// every time, but I don't think that the performance hit is
|
||||||
|
// significant
|
||||||
|
args.insert(pair<string,string>{"tagger", o_taggername});
|
||||||
|
|
||||||
|
// Walk the Korean characters section and send the text to the
|
||||||
|
// analyser
|
||||||
|
string::size_type orgbytepos = it.getBpos();
|
||||||
|
for (; !it.eof() && !it.error(); it++) {
|
||||||
|
c = *it;
|
||||||
|
if (!isHANGUL(c) && isalpha(c)) {
|
||||||
|
// Done with Korean stretch, process and go back to main routine
|
||||||
|
//std::cerr << "Broke on char " << (std::string)it << endl;
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
it.appendchartostring(inputdata);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
LOGDEB1("TextSplit::k_to_words: sending out " << inputdata.size() <<
|
||||||
|
" bytes " << inputdata << endl);
|
||||||
|
restartcount += inputdata.size();
|
||||||
|
unordered_map<string,string> result;
|
||||||
|
if (!o_talker->talk(args, result)) {
|
||||||
|
LOGERR("Python splitter for Korean failed for [" << inputdata << "]\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto resit = result.find("text");
|
||||||
|
if (resit == result.end()) {
|
||||||
|
LOGERR("No text in Python splitter for Korean\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
string& outtext = resit->second;
|
||||||
|
vector<string> words;
|
||||||
|
stringToTokens(outtext, words, sepchars);
|
||||||
|
|
||||||
|
resit = result.find("tags");
|
||||||
|
if (resit == result.end()) {
|
||||||
|
LOGERR("No tags in Python splitter for Korean\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
string& outtags = resit->second;
|
||||||
|
vector<string> tags;
|
||||||
|
stringToTokens(outtags, tags, sepchars);
|
||||||
|
|
||||||
|
// This is the position in the whole text, not the local fragment,
|
||||||
|
// which is bytepos-orgbytepos
|
||||||
|
string::size_type bytepos(orgbytepos);
|
||||||
|
for (unsigned int i = 0; i < words.size(); i++) {
|
||||||
|
// The POS tagger strips characters from the input (e.g. multiple
|
||||||
|
// spaces, sometimes new lines, possibly other stuff). This
|
||||||
|
// means that we can't easily reconstruct the byte position
|
||||||
|
// from the concatenated terms. The output seems to be always
|
||||||
|
// shorter than the input, so we try to look ahead for the
|
||||||
|
// term. Can't be too sure that this works though, depending
|
||||||
|
// on exactly what transformation may have been applied from
|
||||||
|
// the original input to the term.
|
||||||
|
string word = words[i];
|
||||||
|
trimstring(word);
|
||||||
|
string::size_type newpos = bytepos - orgbytepos;
|
||||||
|
newpos = inputdata.find(word, newpos);
|
||||||
|
if (newpos != string::npos) {
|
||||||
|
bytepos = orgbytepos + newpos;
|
||||||
|
}
|
||||||
|
LOGDEB1("WORD OPOS " << bytepos-orgbytepos <<
|
||||||
|
" FOUND POS " << newpos << endl);
|
||||||
|
if (tags[i] == "Noun" || tags[i] == "Verb" ||
|
||||||
|
tags[i] == "Adjective" || tags[i] == "Adverb") {
|
||||||
|
if (!takeword(
|
||||||
|
word, m_wordpos++, bytepos, bytepos + words[i].size())) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
LOGDEB1("WORD [" << words[i] << "] size " << words[i].size() <<
|
||||||
|
" TAG " << tags[i] << endl);
|
||||||
|
bytepos += words[i].size();
|
||||||
|
}
|
||||||
|
|
||||||
|
#if DO_CHECK_THINGS
|
||||||
|
int sizediff = inputdata.size() - (bytepos - orgbytepos);
|
||||||
|
if (sizediff < 0)
|
||||||
|
sizediff = -sizediff;
|
||||||
|
if (sizediff > 1) {
|
||||||
|
LOGERR("ORIGINAL TEXT SIZE: " << inputdata.size() <<
|
||||||
|
" FINAL BYTE POS " << bytepos - orgbytepos <<
|
||||||
|
" TEXT [" << inputdata << "]\n");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Reset state, saving term position, and return the found non-cjk
|
||||||
|
// Unicode character value. The current input byte offset is kept
|
||||||
|
// in the utf8Iter
|
||||||
|
int pos = m_wordpos;
|
||||||
|
clearsplitstate();
|
||||||
|
m_spanpos = m_wordpos = pos;
|
||||||
|
*cp = c;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
236
src/filters/cmdtalk.py
Normal file
236
src/filters/cmdtalk.py
Normal file
@ -0,0 +1,236 @@
|
|||||||
|
#################################
|
||||||
|
# Copyright (C) 2016 J.F.Dockes
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the
|
||||||
|
# Free Software Foundation, Inc.,
|
||||||
|
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
########################################################
|
||||||
|
# Command communication module and utilities. See commands in cmdtalk.h
|
||||||
|
#
|
||||||
|
# All data is binary. This is important for Python3
|
||||||
|
# All parameter names are converted to and processed as str/unicode
|
||||||
|
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
import shutil
|
||||||
|
import getopt
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
PY3 = sys.version > '3'
|
||||||
|
|
||||||
|
if PY3:
|
||||||
|
def makebytes(data):
|
||||||
|
if isinstance(data, bytes):
|
||||||
|
return data
|
||||||
|
else:
|
||||||
|
return data.encode("UTF-8")
|
||||||
|
else:
|
||||||
|
def makebytes(data):
|
||||||
|
if isinstance(data, unicode):
|
||||||
|
return data.encode("UTF-8")
|
||||||
|
else:
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
############################################
|
||||||
|
# CmdTalk implements the
|
||||||
|
# communication protocol with the master process. It calls an external
|
||||||
|
# method to use the args and produce return data.
|
||||||
|
class CmdTalk:
|
||||||
|
|
||||||
|
def __init__(self, outfile=sys.stdout, infile=sys.stdin, exitfunc=None):
|
||||||
|
try:
|
||||||
|
self.myname = os.path.basename(sys.argv[0])
|
||||||
|
except:
|
||||||
|
self.myname = "???"
|
||||||
|
|
||||||
|
self.outfile = outfile
|
||||||
|
self.infile = infile
|
||||||
|
self.exitfunc = exitfunc
|
||||||
|
self.fields = {}
|
||||||
|
|
||||||
|
if sys.platform == "win32":
|
||||||
|
import msvcrt
|
||||||
|
msvcrt.setmode(self.outfile.fileno(), os.O_BINARY)
|
||||||
|
msvcrt.setmode(self.infile.fileno(), os.O_BINARY)
|
||||||
|
self.debugfile = None
|
||||||
|
if self.debugfile:
|
||||||
|
self.errfout = open(self.debugfile, "a")
|
||||||
|
else:
|
||||||
|
self.errfout = sys.stderr
|
||||||
|
|
||||||
|
def log(self, s, doexit = 0, exitvalue = 1):
|
||||||
|
print("CMDTALK: %s: %s" % (self.myname, s), file=self.errfout)
|
||||||
|
if doexit:
|
||||||
|
if self.exitfunc:
|
||||||
|
self.exitfunc(exitvalue)
|
||||||
|
sys.exit(exitvalue)
|
||||||
|
|
||||||
|
def breakwrite(self, outfile, data):
|
||||||
|
if sys.platform != "win32":
|
||||||
|
outfile.write(data)
|
||||||
|
else:
|
||||||
|
# On windows, writing big chunks can fail with a "not enough space"
|
||||||
|
# error. Seems a combined windows/python bug, depending on versions.
|
||||||
|
# See https://bugs.python.org/issue11395
|
||||||
|
# In any case, just break it up
|
||||||
|
total = len(data)
|
||||||
|
bs = 4*1024
|
||||||
|
offset = 0
|
||||||
|
while total > 0:
|
||||||
|
if total < bs:
|
||||||
|
tow = total
|
||||||
|
else:
|
||||||
|
tow = bs
|
||||||
|
#self.log("Total %d Writing %d to stdout: %s" % (total,tow,data[offset:offset+tow]))
|
||||||
|
outfile.write(data[offset:offset+tow])
|
||||||
|
offset += tow
|
||||||
|
total -= tow
|
||||||
|
|
||||||
|
# Read single parameter from process input: line with param name and size
|
||||||
|
# followed by data. The param name is returned as str/unicode, the data
|
||||||
|
# as bytes
|
||||||
|
def readparam(self):
|
||||||
|
if PY3:
|
||||||
|
inf = self.infile.buffer
|
||||||
|
else:
|
||||||
|
inf = self.infile
|
||||||
|
s = inf.readline()
|
||||||
|
if s == b'':
|
||||||
|
if self.exitfunc:
|
||||||
|
self.exitfunc(0)
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
s = s.rstrip(b'\n')
|
||||||
|
|
||||||
|
if s == b'':
|
||||||
|
return ('', b'')
|
||||||
|
l = s.split()
|
||||||
|
if len(l) != 2:
|
||||||
|
self.log(b'bad line: [' + s + b']', 1, 1)
|
||||||
|
|
||||||
|
paramname = l[0].decode('ASCII').rstrip(':')
|
||||||
|
paramsize = int(l[1])
|
||||||
|
if paramsize > 0:
|
||||||
|
paramdata = inf.read(paramsize)
|
||||||
|
if len(paramdata) != paramsize:
|
||||||
|
self.log("Bad read: wanted %d, got %d" %
|
||||||
|
(paramsize, len(paramdata)), 1, 1)
|
||||||
|
else:
|
||||||
|
paramdata = b''
|
||||||
|
if PY3:
|
||||||
|
paramdata = paramdata.decode('utf-8')
|
||||||
|
|
||||||
|
#self.log("paramname [%s] paramsize %d value [%s]" %
|
||||||
|
# (paramname, paramsize, paramdata))
|
||||||
|
return (paramname, paramdata)
|
||||||
|
|
||||||
|
if PY3:
|
||||||
|
def senditem(self, nm, data):
|
||||||
|
data = makebytes(data)
|
||||||
|
l = len(data)
|
||||||
|
self.outfile.buffer.write(makebytes("%s: %d\n" % (nm, l)))
|
||||||
|
self.breakwrite(self.outfile.buffer, data)
|
||||||
|
else:
|
||||||
|
def senditem(self, nm, data):
|
||||||
|
data = makebytes(data)
|
||||||
|
l = len(data)
|
||||||
|
self.outfile.write(makebytes("%s: %d\n" % (nm, l)))
|
||||||
|
self.breakwrite(self.outfile, data)
|
||||||
|
|
||||||
|
# Send answer: document, ipath, possible eof.
|
||||||
|
def answer(self, outfields):
|
||||||
|
for nm,value in outfields.items():
|
||||||
|
#self.log("Senditem: [%s] -> [%s]" % (nm, value))
|
||||||
|
self.senditem(nm, value)
|
||||||
|
|
||||||
|
# End of message
|
||||||
|
print(file=self.outfile)
|
||||||
|
self.outfile.flush()
|
||||||
|
#self.log("done writing data")
|
||||||
|
|
||||||
|
# Call processor with input params, send result
|
||||||
|
def processmessage(self, processor, params):
|
||||||
|
# In normal usage we try to recover from processor errors, but
|
||||||
|
# we sometimes want to see the real stack trace when testing
|
||||||
|
safeexec = True
|
||||||
|
if safeexec:
|
||||||
|
try:
|
||||||
|
outfields = processor.process(params)
|
||||||
|
except Exception as err:
|
||||||
|
self.log("processmessage: processor raised: [%s]" % err)
|
||||||
|
traceback.print_exc()
|
||||||
|
outfields = {}
|
||||||
|
outfields["cmdtalkstatus"] = "1"
|
||||||
|
outfields["cmdtalkerrstr"] = str(err)
|
||||||
|
else:
|
||||||
|
outfields = processor.process(params)
|
||||||
|
|
||||||
|
self.answer(outfields)
|
||||||
|
|
||||||
|
# Loop on messages from our master
|
||||||
|
def mainloop(self, processor):
|
||||||
|
while 1:
|
||||||
|
#self.log("waiting for command")
|
||||||
|
|
||||||
|
params = dict()
|
||||||
|
|
||||||
|
# Read at most 10 parameters (normally 1 or 2), stop at empty line
|
||||||
|
# End of message is signalled by empty paramname
|
||||||
|
for i in range(10):
|
||||||
|
paramname, paramdata = self.readparam()
|
||||||
|
if paramname == "":
|
||||||
|
break
|
||||||
|
params[paramname] = paramdata
|
||||||
|
|
||||||
|
# Got message, act on it
|
||||||
|
self.processmessage(processor, params)
|
||||||
|
|
||||||
|
|
||||||
|
# Common main routine for testing: either run the normal protocol
|
||||||
|
# engine or a local loop. This means that you can call
|
||||||
|
# cmdtalk.main(proto,processor) instead of proto.mainloop(processor)
|
||||||
|
# from your module, and get the benefits of command line testing
|
||||||
|
def main(proto, processor):
|
||||||
|
if len(sys.argv) == 1:
|
||||||
|
proto.mainloop(processor)
|
||||||
|
# mainloop does not return. Just in case
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# Not running the main loop: run one processor call for debugging
|
||||||
|
def usage():
|
||||||
|
print("Usage: cmdtalk.py pname pvalue [pname pvalue...]",
|
||||||
|
file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
def debprint(out, s):
|
||||||
|
proto.breakwrite(out, makebytes(s+'\n'))
|
||||||
|
|
||||||
|
args = sys.argv[1:]
|
||||||
|
if len(args) == 0 or len(args) % 2 != 0:
|
||||||
|
usage()
|
||||||
|
params = dict()
|
||||||
|
for i in range(int(len(args)/2)):
|
||||||
|
params[args[2*i]] = args[2*i+1]
|
||||||
|
res = processor.process(params)
|
||||||
|
|
||||||
|
ioout = sys.stdout.buffer if PY3 else sys.stdout
|
||||||
|
|
||||||
|
for nm,value in res.items():
|
||||||
|
#self.log("Senditem: [%s] -> [%s]" % (nm, value))
|
||||||
|
bdata = makebytes(value)
|
||||||
|
debprint(ioout, "%s->" % nm)
|
||||||
|
proto.breakwrite(ioout, bdata)
|
||||||
|
ioout.write(b'\n')
|
||||||
90
src/filters/kosplitter.py
Executable file
90
src/filters/kosplitter.py
Executable file
@ -0,0 +1,90 @@
|
|||||||
|
#!/usr/bin/python3
|
||||||
|
#################################
|
||||||
|
# Copyright (C) 2020 J.F.Dockes
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the
|
||||||
|
# Free Software Foundation, Inc.,
|
||||||
|
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
########################################################
|
||||||
|
|
||||||
|
#
|
||||||
|
# Interface to the konlpy Korean text analyser: we receive text from
|
||||||
|
# our parent process and have it segmented by the analyser, then
|
||||||
|
# return the results. The analyser startup is very expensive (several
|
||||||
|
# seconds), which is why we can't just execute it from the main
|
||||||
|
# process.
|
||||||
|
#
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import cmdtalk
|
||||||
|
|
||||||
|
from konlpy.tag import Okt,Mecab,Komoran
|
||||||
|
|
||||||
|
class Processor(object):
|
||||||
|
def __init__(self, proto):
|
||||||
|
self.proto = proto
|
||||||
|
self.tagsOkt = False
|
||||||
|
self.tagsMecab = False
|
||||||
|
self.tagsKomoran = False
|
||||||
|
|
||||||
|
def _init_tagger(self, taggername):
|
||||||
|
if taggername == "Okt":
|
||||||
|
self.tagger = Okt()
|
||||||
|
self.tagsOkt = True
|
||||||
|
elif taggername == "Mecab":
|
||||||
|
self.tagger = Mecab()
|
||||||
|
self.tagsMecab = True
|
||||||
|
elif taggername == "Komoran":
|
||||||
|
self.tagger = Komoran()
|
||||||
|
self.tagsKomoran = True
|
||||||
|
else:
|
||||||
|
raise Exception("Bad tagger name " + taggername)
|
||||||
|
|
||||||
|
def process(self, params):
|
||||||
|
if 'data' not in params:
|
||||||
|
return {'error':'No data field in parameters'}
|
||||||
|
if not (self.tagsOkt or self.tagsMecab or self.tagsKomoran):
|
||||||
|
if 'tagger' not in params:
|
||||||
|
return {'error':'No "tagger" field in parameters'}
|
||||||
|
self._init_tagger(params['tagger']);
|
||||||
|
|
||||||
|
pos = self.tagger.pos(params['data'])
|
||||||
|
#proto.log("%s" % pos)
|
||||||
|
text = ""
|
||||||
|
tags = ""
|
||||||
|
for e in pos:
|
||||||
|
word = e[0]
|
||||||
|
word = word.replace('\t', ' ')
|
||||||
|
text += word + "\t"
|
||||||
|
tag = e[1]
|
||||||
|
if self.tagsOkt:
|
||||||
|
pass
|
||||||
|
elif self.tagsMecab or self.tagsKomoran:
|
||||||
|
tb = tag[0:2]
|
||||||
|
if tb[0] == "N":
|
||||||
|
tag = "Noun"
|
||||||
|
elif tb == "VV":
|
||||||
|
tag = "Verb"
|
||||||
|
elif tb == "VA":
|
||||||
|
tag = "Adjective"
|
||||||
|
elif tag == "MAG":
|
||||||
|
tag = "Adverb"
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
tags += tag + "\t"
|
||||||
|
return {'text': text, 'tags': tags}
|
||||||
|
|
||||||
|
|
||||||
|
proto = cmdtalk.CmdTalk()
|
||||||
|
processor = Processor(proto)
|
||||||
|
cmdtalk.main(proto, processor)
|
||||||
@ -36,20 +36,6 @@ from hwp5.xmlmodel import Hwp5File as xml_Hwp5File
|
|||||||
from hwp5.utils import cached_property
|
from hwp5.utils import cached_property
|
||||||
|
|
||||||
|
|
||||||
# This was duplicated from hwp5 hwp5text.py and I don't really
|
|
||||||
# understand what it does...
|
|
||||||
RESOURCE_PATH_XSL_TEXT = 'xsl/plaintext.xsl'
|
|
||||||
class TextTransform(BaseTransform):
|
|
||||||
@property
|
|
||||||
def transform_hwp5_to_text(self):
|
|
||||||
transform_xhwp5 = self.transform_xhwp5_to_text
|
|
||||||
return self.make_transform_hwp5(transform_xhwp5)
|
|
||||||
@cached_property
|
|
||||||
def transform_xhwp5_to_text(self):
|
|
||||||
resource_path = RESOURCE_PATH_XSL_TEXT
|
|
||||||
return self.make_xsl_transform(resource_path)
|
|
||||||
|
|
||||||
|
|
||||||
# Associate HTML meta names and hwp summaryinfo values
|
# Associate HTML meta names and hwp summaryinfo values
|
||||||
def metafields(summaryinfo):
|
def metafields(summaryinfo):
|
||||||
yield(('Description', summaryinfo.subject + " " +
|
yield(('Description', summaryinfo.subject + " " +
|
||||||
|
|||||||
@ -158,7 +158,15 @@ void rwSettings(bool writing)
|
|||||||
"/Recoll/prefs/reslist/collapseDuplicates", Bool, false);
|
"/Recoll/prefs/reslist/collapseDuplicates", Bool, false);
|
||||||
SETTING_RW(prefs.showResultsAsTable,
|
SETTING_RW(prefs.showResultsAsTable,
|
||||||
"/Recoll/prefs/showResultsAsTable", Bool, false);
|
"/Recoll/prefs/showResultsAsTable", Bool, false);
|
||||||
SETTING_RW(prefs.maxhltextmbs, "/Recoll/prefs/preview/maxhltextmbs", Int, 3);
|
|
||||||
|
SETTING_RW(prefs.maxhltextkbs, "/Recoll/prefs/preview/maxhltextkbs", Int,
|
||||||
|
3000);
|
||||||
|
// Compat: if maxhltextkbs is not set but old maxhltextmbs is set use it
|
||||||
|
if (!writing && !settings.contains("/Recoll/prefs/preview/maxhltextkbs") &&
|
||||||
|
settings.contains("/Recoll/prefs/preview/maxhltextmbs")) {
|
||||||
|
prefs.maxhltextkbs = settings.value(
|
||||||
|
"/Recoll/prefs/preview/maxhltextmbs").toInt() * 1024;
|
||||||
|
}
|
||||||
|
|
||||||
SETTING_RW(prefs.previewPlainPre,
|
SETTING_RW(prefs.previewPlainPre,
|
||||||
"/Recoll/prefs/preview/plainPre", Int, PrefsPack::PP_PREWRAP);
|
"/Recoll/prefs/preview/plainPre", Int, PrefsPack::PP_PREWRAP);
|
||||||
|
|||||||
@ -20,6 +20,7 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <list>
|
#include <list>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <set>
|
||||||
|
|
||||||
#include <qstring.h>
|
#include <qstring.h>
|
||||||
#include <qstringlist.h>
|
#include <qstringlist.h>
|
||||||
@ -46,7 +47,7 @@ class PrefsPack {
|
|||||||
int filterCtlStyle;
|
int filterCtlStyle;
|
||||||
int respagesize{8};
|
int respagesize{8};
|
||||||
int historysize{0};
|
int historysize{0};
|
||||||
int maxhltextmbs;
|
int maxhltextkbs;
|
||||||
QString reslistfontfamily;
|
QString reslistfontfamily;
|
||||||
// Not saved in prefs for now. Computed from qt defaults and used to
|
// Not saved in prefs for now. Computed from qt defaults and used to
|
||||||
// set main character color for webkit/textbrowser reslist and
|
// set main character color for webkit/textbrowser reslist and
|
||||||
@ -154,6 +155,11 @@ class PrefsPack {
|
|||||||
|
|
||||||
std::string stemlang();
|
std::string stemlang();
|
||||||
|
|
||||||
|
// MIME types for which we prefer to use stored text from preview
|
||||||
|
// rather than extracting the possibly nicer HTML because the
|
||||||
|
// extractor is very slow. This is compiled in and there is no UI
|
||||||
|
// for now.
|
||||||
|
std::set<std::string> preferStoredTextMimes{"application/x-hwp"};
|
||||||
};
|
};
|
||||||
|
|
||||||
/** Global preferences record */
|
/** Global preferences record */
|
||||||
|
|||||||
BIN
src/qtgui/i18n/recoll_ko.qm
Normal file
BIN
src/qtgui/i18n/recoll_ko.qm
Normal file
Binary file not shown.
3294
src/qtgui/i18n/recoll_ko.ts
Normal file
3294
src/qtgui/i18n/recoll_ko.ts
Normal file
File diff suppressed because it is too large
Load Diff
@ -574,6 +574,90 @@ void Preview::emitWordSelect(QString word)
|
|||||||
emit(wordSelect(word));
|
emit(wordSelect(word));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Display message dialog after load failed
|
||||||
|
void Preview::displayLoadError(
|
||||||
|
FileInterner::ErrorPossibleCause explain, bool canGetRawText)
|
||||||
|
{
|
||||||
|
// Note that we can't easily check for a readable file
|
||||||
|
// because it's possible that only a region is locked
|
||||||
|
// (e.g. on Windows for an ost file the first block is
|
||||||
|
// readable even if Outlook is running).
|
||||||
|
QString msg;
|
||||||
|
switch (explain) {
|
||||||
|
case FileInterner::FetchMissing:
|
||||||
|
msg = tr("Error loading the document: file missing.");
|
||||||
|
break;
|
||||||
|
case FileInterner::FetchPerm:
|
||||||
|
msg = tr("Error loading the document: no permission.");
|
||||||
|
break;
|
||||||
|
case FileInterner::FetchNoBackend:
|
||||||
|
msg =
|
||||||
|
tr("Error loading: backend not configured.");
|
||||||
|
break;
|
||||||
|
case FileInterner::InternfileOther:
|
||||||
|
#ifdef _WIN32
|
||||||
|
msg = tr("Error loading the document: "
|
||||||
|
"other handler error<br>"
|
||||||
|
"Maybe the application is locking the file ?");
|
||||||
|
#else
|
||||||
|
msg = tr("Error loading the document: other handler error.");
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (canGetRawText) {
|
||||||
|
msg += tr("<br>Attempting to display from stored text.");
|
||||||
|
}
|
||||||
|
QMessageBox::warning(0, "Recoll", msg);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Preview::runLoadThread(LoadThread& lthr, QTimer& tT, QEventLoop& loop,
|
||||||
|
QProgressDialog& progress, bool canGetRawText)
|
||||||
|
{
|
||||||
|
lthr.start();
|
||||||
|
for (int i = 0;;i++) {
|
||||||
|
tT.start(1000);
|
||||||
|
loop.exec();
|
||||||
|
if (lthr.isFinished())
|
||||||
|
break;
|
||||||
|
if (progress.wasCanceled()) {
|
||||||
|
CancelCheck::instance().setCancel();
|
||||||
|
}
|
||||||
|
if (i == 1)
|
||||||
|
progress.show();
|
||||||
|
}
|
||||||
|
|
||||||
|
LOGDEB("loadDocInCurrentTab: after file load: cancel " <<
|
||||||
|
CancelCheck::instance().cancelState() << " status " << lthr.status <<
|
||||||
|
" text length " << lthr.fdoc.text.length() << "\n");
|
||||||
|
|
||||||
|
if (lthr.status == 0) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (CancelCheck::instance().cancelState())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
QString explain;
|
||||||
|
if (!lthr.missing.empty()) {
|
||||||
|
explain = QString::fromUtf8("<br>") +
|
||||||
|
tr("Missing helper program: ") +
|
||||||
|
QString::fromLocal8Bit(lthr.missing.c_str());
|
||||||
|
QMessageBox::warning(0, "Recoll",
|
||||||
|
tr("Can't turn doc into internal "
|
||||||
|
"representation for ") +
|
||||||
|
lthr.fdoc.mimetype.c_str() + explain);
|
||||||
|
} else {
|
||||||
|
if (progress.wasCanceled()) {
|
||||||
|
QMessageBox::warning(0, "Recoll", tr("Canceled"));
|
||||||
|
} else {
|
||||||
|
progress.reset();
|
||||||
|
displayLoadError(lthr.explain, canGetRawText);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Code for loading a file into an editor window. The operations that
|
Code for loading a file into an editor window. The operations that
|
||||||
we call have no provision to indicate progression, and it would be
|
we call have no provision to indicate progression, and it would be
|
||||||
@ -627,93 +711,42 @@ bool Preview::loadDocInCurrentTab(const Rcl::Doc &idoc, int docnum)
|
|||||||
connect(&tT, SIGNAL(timeout()), &loop, SLOT(quit()));
|
connect(&tT, SIGNAL(timeout()), &loop, SLOT(quit()));
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// Load and convert document
|
// Load and convert document
|
||||||
// idoc came out of the index data (main text and some fields missing).
|
// - idoc came out of the index data (main text and some fields missing).
|
||||||
// fdoc is the complete one what we are going to extract from storage.
|
// - fdoc is the complete one what we are going to extract from storage.
|
||||||
|
//
|
||||||
|
// If the preference to use the stored text is set, we still
|
||||||
|
// create the LoadThread object for convenience (using its fdoc
|
||||||
|
// field, but don't start it.
|
||||||
|
|
||||||
LoadThread lthr(theconfig, idoc, prefs.previewHtml, this);
|
LoadThread lthr(theconfig, idoc, prefs.previewHtml, this);
|
||||||
connect(<hr, SIGNAL(finished()), &loop, SLOT(quit()));
|
connect(<hr, SIGNAL(finished()), &loop, SLOT(quit()));
|
||||||
|
|
||||||
lthr.start();
|
bool canGetRawText = rcldb && rcldb->storesDocText();
|
||||||
for (int i = 0;;i++) {
|
auto it = prefs.preferStoredTextMimes.find(idoc.mimetype);
|
||||||
tT.start(1000);
|
bool preferStoredText = (it != prefs.preferStoredTextMimes.end());
|
||||||
loop.exec();
|
bool loadok{false};
|
||||||
if (lthr.isFinished())
|
|
||||||
break;
|
if (!preferStoredText || !canGetRawText) {
|
||||||
if (progress.wasCanceled()) {
|
// Try load from actual document
|
||||||
CancelCheck::instance().setCancel();
|
loadok = runLoadThread(lthr, tT, loop, progress, canGetRawText);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!loadok && canGetRawText) {
|
||||||
|
// Preferring/able to use stored text or extern load failed
|
||||||
|
lthr.fdoc = idoc;
|
||||||
|
loadok = rcldb->getDocRawText(lthr.fdoc);
|
||||||
|
if (!loadok) {
|
||||||
|
QMessageBox::warning(0,"Recoll",tr("Could not fetch stored text"));
|
||||||
}
|
}
|
||||||
if (i == 1)
|
|
||||||
progress.show();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
LOGDEB("loadDocInCurrentTab: after file load: cancel " <<
|
if (!loadok) {
|
||||||
CancelCheck::instance().cancelState() << " status " << lthr.status <<
|
// Everything failed.
|
||||||
" text length " << lthr.fdoc.text.length() << "\n");
|
progress.close();
|
||||||
|
|
||||||
if (CancelCheck::instance().cancelState())
|
|
||||||
return false;
|
return false;
|
||||||
if (lthr.status != 0) {
|
|
||||||
bool canGetRawText = rcldb && rcldb->storesDocText();
|
|
||||||
QString explain;
|
|
||||||
if (!lthr.missing.empty()) {
|
|
||||||
explain = QString::fromUtf8("<br>") +
|
|
||||||
tr("Missing helper program: ") +
|
|
||||||
QString::fromLocal8Bit(lthr.missing.c_str());
|
|
||||||
QMessageBox::warning(0, "Recoll",
|
|
||||||
tr("Can't turn doc into internal "
|
|
||||||
"representation for ") +
|
|
||||||
lthr.fdoc.mimetype.c_str() + explain);
|
|
||||||
} else {
|
|
||||||
if (progress.wasCanceled()) {
|
|
||||||
QMessageBox::warning(0, "Recoll", tr("Canceled"));
|
|
||||||
} else {
|
|
||||||
progress.reset();
|
|
||||||
// Note that we can't easily check for a readable file
|
|
||||||
// because it's possible that only a region is locked
|
|
||||||
// (e.g. on Windows for an ost file the first block is
|
|
||||||
// readable even if Outlook is running).
|
|
||||||
QString msg;
|
|
||||||
switch (lthr.explain) {
|
|
||||||
case FileInterner::FetchMissing:
|
|
||||||
msg = tr("Error loading the document: file missing.");
|
|
||||||
break;
|
|
||||||
case FileInterner::FetchPerm:
|
|
||||||
msg = tr("Error loading the document: no permission.");
|
|
||||||
break;
|
|
||||||
case FileInterner::FetchNoBackend:
|
|
||||||
msg =
|
|
||||||
tr("Error loading: backend not configured.");
|
|
||||||
break;
|
|
||||||
case FileInterner::InternfileOther:
|
|
||||||
#ifdef _WIN32
|
|
||||||
msg = tr("Error loading the document: "
|
|
||||||
"other handler error<br>"
|
|
||||||
"Maybe the application is locking the file ?");
|
|
||||||
#else
|
|
||||||
msg = tr("Error loading the document: other handler error.");
|
|
||||||
#endif
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (canGetRawText) {
|
|
||||||
msg += tr("<br>Attempting to display from stored text.");
|
|
||||||
}
|
|
||||||
QMessageBox::warning(0, "Recoll", msg);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if (canGetRawText) {
|
|
||||||
lthr.fdoc = idoc;
|
|
||||||
if (!rcldb->getDocRawText(lthr.fdoc)) {
|
|
||||||
QMessageBox::warning(0, "Recoll",
|
|
||||||
tr("Could not fetch stored text"));
|
|
||||||
progress.close();
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
progress.close();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reset config just in case.
|
// Reset config just in case.
|
||||||
theconfig->setKeyDir("");
|
theconfig->setKeyDir("");
|
||||||
|
|
||||||
@ -722,8 +755,8 @@ bool Preview::loadDocInCurrentTab(const Rcl::Doc &idoc, int docnum)
|
|||||||
// We don't do the highlighting for very big texts: too long. We
|
// We don't do the highlighting for very big texts: too long. We
|
||||||
// should at least do special char escaping, in case a '&' or '<'
|
// should at least do special char escaping, in case a '&' or '<'
|
||||||
// somehow slipped through previous processing.
|
// somehow slipped through previous processing.
|
||||||
bool highlightTerms = lthr.fdoc.text.length() <
|
bool highlightTerms = int(lthr.fdoc.text.length()) <
|
||||||
(unsigned long)prefs.maxhltextmbs * 1024 * 1024;
|
prefs.maxhltextkbs * 1024;
|
||||||
|
|
||||||
// Final text is produced in chunks so that we can display the top
|
// Final text is produced in chunks so that we can display the top
|
||||||
// while still inserting at bottom
|
// while still inserting at bottom
|
||||||
@ -752,7 +785,6 @@ bool Preview::loadDocInCurrentTab(const Rcl::Doc &idoc, int docnum)
|
|||||||
QStringList qrichlst;
|
QStringList qrichlst;
|
||||||
editor->m_plaintorich->set_activatelinks(prefs.previewActiveLinks);
|
editor->m_plaintorich->set_activatelinks(prefs.previewActiveLinks);
|
||||||
|
|
||||||
#if 1
|
|
||||||
if (highlightTerms) {
|
if (highlightTerms) {
|
||||||
progress.setLabelText(tr("Creating preview text"));
|
progress.setLabelText(tr("Creating preview text"));
|
||||||
qApp->processEvents();
|
qApp->processEvents();
|
||||||
@ -815,17 +847,6 @@ bool Preview::loadDocInCurrentTab(const Rcl::Doc &idoc, int docnum)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else // For testing qtextedit bugs...
|
|
||||||
highlightTerms = true;
|
|
||||||
const char *textlist[] =
|
|
||||||
{
|
|
||||||
"Du plain text avec un\n <termtag>termtag</termtag> fin de ligne:",
|
|
||||||
"texte apres le tag\n",
|
|
||||||
};
|
|
||||||
const int listl = sizeof(textlist) / sizeof(char*);
|
|
||||||
for (int i = 0 ; i < listl ; i++)
|
|
||||||
qrichlst.push_back(QString::fromUtf8(textlist[i]));
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////
|
||||||
|
|||||||
@ -44,9 +44,11 @@
|
|||||||
#include "rcldb.h"
|
#include "rcldb.h"
|
||||||
#include "plaintorich.h"
|
#include "plaintorich.h"
|
||||||
#include "rclmain_w.h"
|
#include "rclmain_w.h"
|
||||||
|
#include "internfile.h"
|
||||||
|
|
||||||
#include "ui_preview.h"
|
#include "ui_preview.h"
|
||||||
|
|
||||||
|
|
||||||
class QTabWidget;
|
class QTabWidget;
|
||||||
class QLabel;
|
class QLabel;
|
||||||
class QPushButton;
|
class QPushButton;
|
||||||
@ -55,6 +57,10 @@ class Preview;
|
|||||||
class PlainToRichQtPreview;
|
class PlainToRichQtPreview;
|
||||||
class QUrl;
|
class QUrl;
|
||||||
class RclMain;
|
class RclMain;
|
||||||
|
class LoadThread;
|
||||||
|
class QTimer;
|
||||||
|
class QEventLoop;
|
||||||
|
class QProgressDialog;
|
||||||
|
|
||||||
class PreviewTextEdit : public PREVIEW_PARENTCLASS {
|
class PreviewTextEdit : public PREVIEW_PARENTCLASS {
|
||||||
Q_OBJECT;
|
Q_OBJECT;
|
||||||
@ -185,6 +191,10 @@ private:
|
|||||||
virtual PreviewTextEdit *currentEditor();
|
virtual PreviewTextEdit *currentEditor();
|
||||||
virtual PreviewTextEdit *addEditorTab();
|
virtual PreviewTextEdit *addEditorTab();
|
||||||
virtual bool loadDocInCurrentTab(const Rcl::Doc& idoc, int dnm);
|
virtual bool loadDocInCurrentTab(const Rcl::Doc& idoc, int dnm);
|
||||||
|
void displayLoadError(
|
||||||
|
FileInterner::ErrorPossibleCause explain, bool canGetRawText);
|
||||||
|
bool runLoadThread(LoadThread& lthr, QTimer& tT, QEventLoop& loop,
|
||||||
|
QProgressDialog& progress, bool canGetRawText);
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* _PREVIEW_W_H_INCLUDED_ */
|
#endif /* _PREVIEW_W_H_INCLUDED_ */
|
||||||
|
|||||||
@ -168,7 +168,7 @@ i18n/recoll_zh_CN.ts \
|
|||||||
i18n/recoll_fr.ts \
|
i18n/recoll_fr.ts \
|
||||||
i18n/recoll_xx.ts \
|
i18n/recoll_xx.ts \
|
||||||
i18n/recoll_cs.ts \
|
i18n/recoll_cs.ts \
|
||||||
i18n/recoll_kr.ts \
|
i18n/recoll_ko.ts \
|
||||||
i18n/recoll_el.ts \
|
i18n/recoll_el.ts \
|
||||||
i18n/recoll_tr.ts
|
i18n/recoll_tr.ts
|
||||||
|
|
||||||
|
|||||||
2296
src/qtgui/uiprefs.ui
2296
src/qtgui/uiprefs.ui
File diff suppressed because it is too large
Load Diff
@ -112,7 +112,7 @@ void UIPrefsDialog::setFromPrefs()
|
|||||||
pageLenSB->setValue(prefs.respagesize);
|
pageLenSB->setValue(prefs.respagesize);
|
||||||
maxHistSizeSB->setValue(prefs.historysize);
|
maxHistSizeSB->setValue(prefs.historysize);
|
||||||
collapseDupsCB->setChecked(prefs.collapseDuplicates);
|
collapseDupsCB->setChecked(prefs.collapseDuplicates);
|
||||||
maxHLTSB->setValue(prefs.maxhltextmbs);
|
maxHLTSB->setValue(prefs.maxhltextkbs);
|
||||||
|
|
||||||
if (prefs.ssearchTypSav) {
|
if (prefs.ssearchTypSav) {
|
||||||
ssearchTypCMB->setCurrentIndex(4);
|
ssearchTypCMB->setCurrentIndex(4);
|
||||||
@ -304,7 +304,7 @@ void UIPrefsDialog::accept()
|
|||||||
prefs.respagesize = pageLenSB->value();
|
prefs.respagesize = pageLenSB->value();
|
||||||
prefs.historysize = maxHistSizeSB->value();
|
prefs.historysize = maxHistSizeSB->value();
|
||||||
prefs.collapseDuplicates = collapseDupsCB->isChecked();
|
prefs.collapseDuplicates = collapseDupsCB->isChecked();
|
||||||
prefs.maxhltextmbs = maxHLTSB->value();
|
prefs.maxhltextkbs = maxHLTSB->value();
|
||||||
|
|
||||||
prefs.qtermstyle = qtermStyleLE->text();
|
prefs.qtermstyle = qtermStyleLE->text();
|
||||||
prefs.abssep = abssepLE->text();
|
prefs.abssep = abssepLE->text();
|
||||||
|
|||||||
243
src/utils/cmdtalk.cpp
Normal file
243
src/utils/cmdtalk.cpp
Normal file
@ -0,0 +1,243 @@
|
|||||||
|
/* Copyright (C) 2016 J.F.Dockes
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU Lesser General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2.1 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public License
|
||||||
|
* along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc.,
|
||||||
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*/
|
||||||
|
#include "cmdtalk.h"
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <sstream>
|
||||||
|
#include <mutex>
|
||||||
|
|
||||||
|
#include "smallut.h"
|
||||||
|
#include "execmd.h"
|
||||||
|
#ifdef MDU_INCLUDE_LOG
|
||||||
|
#include MDU_INCLUDE_LOG
|
||||||
|
#else
|
||||||
|
#include "log.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
class TimeoutExcept {};
|
||||||
|
|
||||||
|
class Canceler : public ExecCmdAdvise {
|
||||||
|
public:
|
||||||
|
Canceler(int tmsecs)
|
||||||
|
: m_timeosecs(tmsecs) {}
|
||||||
|
|
||||||
|
virtual void newData(int cnt) {
|
||||||
|
if (m_starttime && (time(0) - m_starttime) > m_timeosecs) {
|
||||||
|
throw TimeoutExcept();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void reset() {
|
||||||
|
m_starttime = time(0);
|
||||||
|
}
|
||||||
|
int m_timeosecs;
|
||||||
|
time_t m_starttime{0};
|
||||||
|
};
|
||||||
|
|
||||||
|
class CmdTalk::Internal {
|
||||||
|
public:
|
||||||
|
Internal(int timeosecs)
|
||||||
|
: m_cancel(timeosecs) {}
|
||||||
|
|
||||||
|
~Internal() {
|
||||||
|
delete cmd;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool readDataElement(string& name, string &data);
|
||||||
|
|
||||||
|
bool talk(const pair<string, string>& arg0,
|
||||||
|
const unordered_map<string, string>& args,
|
||||||
|
unordered_map<string, string>& rep);
|
||||||
|
|
||||||
|
ExecCmd *cmd{0};
|
||||||
|
Canceler m_cancel;
|
||||||
|
std::mutex mmutex;
|
||||||
|
};
|
||||||
|
|
||||||
|
CmdTalk::CmdTalk(int timeosecs)
|
||||||
|
{
|
||||||
|
m = new Internal(timeosecs);
|
||||||
|
}
|
||||||
|
CmdTalk::~CmdTalk()
|
||||||
|
{
|
||||||
|
delete m;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CmdTalk::startCmd(const string& cmdname,
|
||||||
|
const vector<string>& args,
|
||||||
|
const vector<string>& env,
|
||||||
|
const vector<string>& path)
|
||||||
|
{
|
||||||
|
LOGDEB("CmdTalk::startCmd\n");
|
||||||
|
|
||||||
|
delete m->cmd;
|
||||||
|
m->cmd = new ExecCmd;
|
||||||
|
m->cmd->setAdvise(&m->m_cancel);
|
||||||
|
|
||||||
|
for (const auto& it : env) {
|
||||||
|
m->cmd->putenv(it);
|
||||||
|
}
|
||||||
|
|
||||||
|
string acmdname(cmdname);
|
||||||
|
if (!path.empty()) {
|
||||||
|
string colonpath;
|
||||||
|
for (const auto& it: path) {
|
||||||
|
colonpath += it + ":";
|
||||||
|
}
|
||||||
|
if (!colonpath.empty()) {
|
||||||
|
colonpath.erase(colonpath.size()-1);
|
||||||
|
}
|
||||||
|
LOGDEB("CmdTalk::startCmd: PATH: [" << colonpath << "]\n");
|
||||||
|
ExecCmd::which(cmdname, acmdname, colonpath.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (m->cmd->startExec(acmdname, args, 1, 1) < 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Messages are made of data elements. Each element is like:
|
||||||
|
// name: len\ndata
|
||||||
|
// An empty line signals the end of the message, so the whole thing
|
||||||
|
// would look like:
|
||||||
|
// Name1: Len1\nData1Name2: Len2\nData2\n
|
||||||
|
bool CmdTalk::Internal::readDataElement(string& name, string &data)
|
||||||
|
{
|
||||||
|
string ibuf;
|
||||||
|
|
||||||
|
m_cancel.reset();
|
||||||
|
try {
|
||||||
|
// Read name and length
|
||||||
|
if (cmd->getline(ibuf) <= 0) {
|
||||||
|
LOGERR("CmdTalk: getline error\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} catch (TimeoutExcept) {
|
||||||
|
LOGINF("CmdTalk:readDataElement: fatal timeout (" <<
|
||||||
|
m_cancel.m_timeosecs << " S)\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
LOGDEB1("CmdTalk:rde: line [" << ibuf << "]\n");
|
||||||
|
|
||||||
|
// Empty line (end of message) ?
|
||||||
|
if (!ibuf.compare("\n")) {
|
||||||
|
LOGDEB1("CmdTalk: Got empty line\n");
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// We're expecting something like Name: len\n
|
||||||
|
vector<string> tokens;
|
||||||
|
stringToTokens(ibuf, tokens);
|
||||||
|
if (tokens.size() != 2) {
|
||||||
|
LOGERR("CmdTalk: bad line in filter output: [" << ibuf << "]\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
vector<string>::iterator it = tokens.begin();
|
||||||
|
name = *it++;
|
||||||
|
string& slen = *it;
|
||||||
|
int len;
|
||||||
|
if (sscanf(slen.c_str(), "%d", &len) != 1) {
|
||||||
|
LOGERR("CmdTalk: bad line in filter output: [" << ibuf << "]\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read element data
|
||||||
|
data.erase();
|
||||||
|
if (len > 0 && cmd->receive(data, len) != len) {
|
||||||
|
LOGERR("CmdTalk: expected " << len << " bytes of data, got " <<
|
||||||
|
data.length() << "\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
LOGDEB1("CmdTalk:rde: got: name [" << name << "] len " << len <<"value ["<<
|
||||||
|
(data.size() > 100 ? (data.substr(0, 100) + " ...") : data)<< endl);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CmdTalk::Internal::talk(const pair<string, string>& arg0,
|
||||||
|
const unordered_map<string, string>& args,
|
||||||
|
unordered_map<string, string>& rep)
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> lock(mmutex);
|
||||||
|
if (cmd->getChildPid() <= 0) {
|
||||||
|
LOGERR("CmdTalk::talk: no process\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
ostringstream obuf;
|
||||||
|
if (!arg0.first.empty()) {
|
||||||
|
obuf << arg0.first << ": " << arg0.second.size() << "\n" << arg0.second;
|
||||||
|
}
|
||||||
|
for (const auto& it : args) {
|
||||||
|
obuf << it.first << ": " << it.second.size() << "\n" << it.second;
|
||||||
|
}
|
||||||
|
obuf << "\n";
|
||||||
|
|
||||||
|
if (cmd->send(obuf.str()) < 0) {
|
||||||
|
cmd->zapChild();
|
||||||
|
LOGERR("CmdTalk: send error\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read answer (multiple elements)
|
||||||
|
LOGDEB1("CmdTalk: reading answer\n");
|
||||||
|
for (;;) {
|
||||||
|
string name, data;
|
||||||
|
if (!readDataElement(name, data)) {
|
||||||
|
cmd->zapChild();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (name.empty()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
trimstring(name, ":");
|
||||||
|
LOGDEB1("CmdTalk: got [" << name << "] -> [" << data << "]\n");
|
||||||
|
rep[name] = data;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rep.find("cmdtalkstatus") != rep.end()) {
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CmdTalk::running()
|
||||||
|
{
|
||||||
|
return m && m->cmd && m->cmd->getChildPid() > 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CmdTalk::talk(const unordered_map<string, string>& args,
|
||||||
|
unordered_map<string, string>& rep)
|
||||||
|
{
|
||||||
|
return m->talk({"",""}, args, rep);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CmdTalk::callproc(
|
||||||
|
const string& proc,
|
||||||
|
const unordered_map<std::string, std::string>& args,
|
||||||
|
unordered_map<std::string, std::string>& rep)
|
||||||
|
{
|
||||||
|
return m->talk({"cmdtalk:proc", proc}, args, rep);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
109
src/utils/cmdtalk.h
Normal file
109
src/utils/cmdtalk.h
Normal file
@ -0,0 +1,109 @@
|
|||||||
|
/* Copyright (C) 2016 J.F.Dockes
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU Lesser General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2.1 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public License
|
||||||
|
* along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc.,
|
||||||
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*/
|
||||||
|
#ifndef _CMDTALK_H_INCLUDED_
|
||||||
|
#define _CMDTALK_H_INCLUDED_
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Execute commands and exchange messages with it.
|
||||||
|
*
|
||||||
|
* A simple stream protocol is used for the dialog. HTTP or some kind
|
||||||
|
* of full-blown RPC could have been used, but there was also good
|
||||||
|
* reason to keep it simple (yet powerful), given the limited context
|
||||||
|
* of dialog through a pipe.
|
||||||
|
*
|
||||||
|
* The data is exchanged in TLV fashion, in a way that should be
|
||||||
|
* usable in most script languages. The basic unit of data has one line
|
||||||
|
* with a data type and a count (both ASCII), followed by the data. A
|
||||||
|
* 'message' is made of one or several units or tags and ends with one empty
|
||||||
|
* line.
|
||||||
|
*
|
||||||
|
* Example:(the message begins before 'Filename' and has 'Filename' and
|
||||||
|
* 'Ipath' tags):
|
||||||
|
*
|
||||||
|
Filename: 24
|
||||||
|
/my/home/mail/somefolderIpath: 2
|
||||||
|
22
|
||||||
|
|
||||||
|
<Message ends here: because of the empty line after '22'
|
||||||
|
|
||||||
|
*
|
||||||
|
* Example answer, with 'Mimetype' and 'Data' tags
|
||||||
|
*
|
||||||
|
Mimetype: 10
|
||||||
|
text/plainData: 10
|
||||||
|
0123456789
|
||||||
|
|
||||||
|
<Message ends here because of empty line
|
||||||
|
|
||||||
|
*
|
||||||
|
* This format is both extensible and reasonably easy to parse.
|
||||||
|
* While it's more fitted for python or perl on the script side, it
|
||||||
|
* should even be sort of usable from the shell (e.g.: use dd to read
|
||||||
|
* the counted data). Most alternatives would need data encoding in
|
||||||
|
* some cases.
|
||||||
|
*
|
||||||
|
* Higher level dialog:
|
||||||
|
* The C++ program is the master and sends request messages to the script.
|
||||||
|
* Both sides of the communication should be prepared to receive and discard
|
||||||
|
* unknown tags.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <unordered_map>
|
||||||
|
|
||||||
|
class CmdTalk {
|
||||||
|
public:
|
||||||
|
CmdTalk(int timeosecs);
|
||||||
|
virtual ~CmdTalk();
|
||||||
|
|
||||||
|
// @param env each entry should be of the form name=value. They
|
||||||
|
// augment the subprocess environnement.
|
||||||
|
// @param path replaces the PATH variable when looking for the command.
|
||||||
|
//
|
||||||
|
// Note that cmdtalk.py:main() method is a test routine which
|
||||||
|
// expects data pairs on the command line. If actual parameters
|
||||||
|
// need to be passed, it can't be used by the processor.
|
||||||
|
virtual bool startCmd(const std::string& cmdname,
|
||||||
|
const std::vector<std::string>& args =
|
||||||
|
std::vector<std::string>(),
|
||||||
|
const std::vector<std::string>& env =
|
||||||
|
std::vector<std::string>(),
|
||||||
|
const std::vector<std::string>& path =
|
||||||
|
std::vector<std::string>()
|
||||||
|
);
|
||||||
|
virtual bool running();
|
||||||
|
|
||||||
|
// Single exchange: send and receive data.
|
||||||
|
virtual bool talk(const std::unordered_map<std::string, std::string>& args,
|
||||||
|
std::unordered_map<std::string, std::string>& rep);
|
||||||
|
|
||||||
|
// Specialized version with special argument used by dispatcher to call
|
||||||
|
// designated method
|
||||||
|
virtual bool callproc(
|
||||||
|
const std::string& proc,
|
||||||
|
const std::unordered_map<std::string, std::string>& args,
|
||||||
|
std::unordered_map<std::string, std::string>& rep);
|
||||||
|
|
||||||
|
CmdTalk(const CmdTalk&) = delete;
|
||||||
|
CmdTalk &operator=(const CmdTalk &) = delete;
|
||||||
|
private:
|
||||||
|
class Internal;
|
||||||
|
Internal *m{0};
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* _CMDTALK_H_INCLUDED_ */
|
||||||
Loading…
x
Reference in New Issue
Block a user