This commit is contained in:
Jean-Francois Dockes 2020-10-14 11:57:18 +02:00
parent ceecf5ff43
commit 2d21d4a460
4 changed files with 111 additions and 116 deletions

View File

@ -1115,20 +1115,19 @@ set<string> RclConfig::getIndexedFields() const
string RclConfig::fieldCanon(const string& f) const string RclConfig::fieldCanon(const string& f) const
{ {
string fld = stringtolower(f); string fld = stringtolower(f);
map<string, string>::const_iterator it = m_aliastocanon.find(fld); const auto it = m_aliastocanon.find(fld);
if (it != m_aliastocanon.end()) { if (it != m_aliastocanon.end()) {
LOGDEB1("RclConfig::fieldCanon: [" << f << "] -> [" << it->second << LOGDEB1("RclConfig::fieldCanon: [" << f << "] -> [" << it->second <<
"]\n"); "]\n");
return it->second; return it->second;
} }
LOGDEB1("RclConfig::fieldCanon: [" << (f) << "] -> [" << (fld) << "]\n"); LOGDEB1("RclConfig::fieldCanon: [" << f << "] -> [" << fld << "]\n");
return fld; return fld;
} }
string RclConfig::fieldQCanon(const string& f) const string RclConfig::fieldQCanon(const string& f) const
{ {
string fld = stringtolower(f); const auto it = m_aliastoqcanon.find(stringtolower(f));
map<string, string>::const_iterator it = m_aliastoqcanon.find(fld);
if (it != m_aliastoqcanon.end()) { if (it != m_aliastoqcanon.end()) {
LOGDEB1("RclConfig::fieldQCanon: [" << f << "] -> [" << it->second << LOGDEB1("RclConfig::fieldQCanon: [" << f << "] -> [" << it->second <<
"]\n"); "]\n");

View File

@ -77,30 +77,30 @@ void initAsyncSigs(void (*sigcleanup)(int))
// Install app signal handler // Install app signal handler
if (sigcleanup) { if (sigcleanup) {
struct sigaction action; struct sigaction action;
action.sa_handler = sigcleanup; action.sa_handler = sigcleanup;
action.sa_flags = 0; action.sa_flags = 0;
sigemptyset(&action.sa_mask); sigemptyset(&action.sa_mask);
for (unsigned int i = 0; i < sizeof(catchedSigs) / sizeof(int); i++) for (unsigned int i = 0; i < sizeof(catchedSigs) / sizeof(int); i++)
if (signal(catchedSigs[i], SIG_IGN) != SIG_IGN) { if (signal(catchedSigs[i], SIG_IGN) != SIG_IGN) {
if (sigaction(catchedSigs[i], &action, 0) < 0) { if (sigaction(catchedSigs[i], &action, 0) < 0) {
perror("Sigaction failed"); perror("Sigaction failed");
} }
} }
} }
// Install log rotate sig handler // Install log rotate sig handler
{ {
struct sigaction action; struct sigaction action;
action.sa_handler = siglogreopen; action.sa_handler = siglogreopen;
action.sa_flags = 0; action.sa_flags = 0;
sigemptyset(&action.sa_mask); sigemptyset(&action.sa_mask);
if (signal(SIGHUP, SIG_IGN) != SIG_IGN) { if (signal(SIGHUP, SIG_IGN) != SIG_IGN) {
if (sigaction(SIGHUP, &action, 0) < 0) { if (sigaction(SIGHUP, &action, 0) < 0) {
perror("Sigaction failed"); perror("Sigaction failed");
}
} }
} }
}
} }
void recoll_exitready() void recoll_exitready()
{ {
@ -150,10 +150,10 @@ static BOOL WINAPI CtrlHandler(DWORD fdwCtrlType)
{ {
l_sigcleanup(SIGINT); l_sigcleanup(SIGINT);
LOGDEB0("CtrlHandler: waiting for exit ready\n" ); LOGDEB0("CtrlHandler: waiting for exit ready\n" );
DWORD res = WaitForSingleObject(eWorkFinished, INFINITE); DWORD res = WaitForSingleObject(eWorkFinished, INFINITE);
if (res != WAIT_OBJECT_0) { if (res != WAIT_OBJECT_0) {
LOGERR("CtrlHandler: exit ack wait failed\n" ); LOGERR("CtrlHandler: exit ack wait failed\n" );
} }
LOGDEB0("CtrlHandler: got exit ready event, exiting\n" ); LOGDEB0("CtrlHandler: got exit ready event, exiting\n" );
return TRUE; return TRUE;
} }
@ -242,10 +242,10 @@ void initAsyncSigs(void (*sigcleanup)(int))
// Install app signal handler // Install app signal handler
if (sigcleanup) { if (sigcleanup) {
l_sigcleanup = sigcleanup; l_sigcleanup = sigcleanup;
for (unsigned int i = 0; i < sizeof(catchedSigs) / sizeof(int); i++) { for (unsigned int i = 0; i < sizeof(catchedSigs) / sizeof(int); i++) {
if (signal(catchedSigs[i], SIG_IGN) != SIG_IGN) { if (signal(catchedSigs[i], SIG_IGN) != SIG_IGN) {
signal(catchedSigs[i], sigcleanup); signal(catchedSigs[i], sigcleanup);
} }
} }
} }
@ -267,11 +267,11 @@ void recoll_exitready()
#endif #endif
RclConfig *recollinit(int flags, RclConfig *recollinit(int flags,
void (*cleanup)(void), void (*sigcleanup)(int), void (*cleanup)(void), void (*sigcleanup)(int),
string &reason, const string *argcnf) string &reason, const string *argcnf)
{ {
if (cleanup) if (cleanup)
atexit(cleanup); atexit(cleanup);
#if defined(MACPORTS) || defined(HOMEBREW) #if defined(MACPORTS) || defined(HOMEBREW)
// The MACPORTS and HOMEBREW flags are set by the resp. portfile // The MACPORTS and HOMEBREW flags are set by the resp. portfile
@ -303,12 +303,12 @@ RclConfig *recollinit(int flags,
RclConfig *config = new RclConfig(argcnf); RclConfig *config = new RclConfig(argcnf);
if (!config || !config->ok()) { if (!config || !config->ok()) {
reason = "Configuration could not be built:\n"; reason = "Configuration could not be built:\n";
if (config) if (config)
reason += config->getReason(); reason += config->getReason();
else else
reason += "Out of memory ?"; reason += "Out of memory ?";
return 0; return 0;
} }
TextSplit::staticConfInit(config); TextSplit::staticConfInit(config);
@ -318,8 +318,8 @@ RclConfig *recollinit(int flags,
// ones. // ones.
string logfilename, loglevel; string logfilename, loglevel;
if (flags & RCLINIT_DAEMON) { if (flags & RCLINIT_DAEMON) {
config->getConfParam(string("daemlogfilename"), logfilename); config->getConfParam(string("daemlogfilename"), logfilename);
config->getConfParam(string("daemloglevel"), loglevel); config->getConfParam(string("daemloglevel"), loglevel);
} }
if (flags & RCLINIT_IDX) { if (flags & RCLINIT_IDX) {
if (logfilename.empty()) { if (logfilename.empty()) {
@ -339,22 +339,22 @@ RclConfig *recollinit(int flags,
} }
if (logfilename.empty()) if (logfilename.empty())
config->getConfParam(string("logfilename"), logfilename); config->getConfParam(string("logfilename"), logfilename);
if (loglevel.empty()) if (loglevel.empty())
config->getConfParam(string("loglevel"), loglevel); config->getConfParam(string("loglevel"), loglevel);
// Initialize logging // Initialize logging
if (!logfilename.empty()) { if (!logfilename.empty()) {
logfilename = path_tildexpand(logfilename); logfilename = path_tildexpand(logfilename);
// If not an absolute path or stderr, compute relative to config dir. // If not an absolute path or stderr, compute relative to config dir.
if (!path_isabsolute(logfilename) && if (!path_isabsolute(logfilename) &&
logfilename.compare("stderr")) { logfilename.compare("stderr")) {
logfilename = path_cat(config->getConfDir(), logfilename); logfilename = path_cat(config->getConfDir(), logfilename);
} }
Logger::getTheLog("")->reopen(logfilename); Logger::getTheLog("")->reopen(logfilename);
} }
if (!loglevel.empty()) { if (!loglevel.empty()) {
int lev = atoi(loglevel.c_str()); int lev = atoi(loglevel.c_str());
Logger::getTheLog("")->setLogLevel(Logger::LogLevel(lev)); Logger::getTheLog("")->setLogLevel(Logger::LogLevel(lev));
} }
LOGINF(Rcl::version_string() << " [" << config->getConfDir() << "]\n"); LOGINF(Rcl::version_string() << " [" << config->getConfDir() << "]\n");
@ -378,7 +378,7 @@ RclConfig *recollinit(int flags,
// Init Unac translation exceptions // Init Unac translation exceptions
string unacex; string unacex;
if (config->getConfParam("unac_except_trans", unacex) && !unacex.empty()) if (config->getConfParam("unac_except_trans", unacex) && !unacex.empty())
unac_set_except_translations(unacex.c_str()); unac_set_except_translations(unacex.c_str());
#ifndef IDX_THREADS #ifndef IDX_THREADS
ExecCmd::useVfork(true); ExecCmd::useVfork(true);
@ -393,23 +393,23 @@ RclConfig *recollinit(int flags,
bool novfork; bool novfork;
config->getConfParam("novfork", &novfork); config->getConfParam("novfork", &novfork);
if (novfork) { if (novfork) {
LOGDEB0("rclinit: will use fork() for starting commands\n" ); LOGDEB0("rclinit: will use fork() for starting commands\n" );
ExecCmd::useVfork(false); ExecCmd::useVfork(false);
} else { } else {
LOGDEB0("rclinit: will use vfork() for starting commands\n" ); LOGDEB0("rclinit: will use vfork() for starting commands\n" );
ExecCmd::useVfork(true); ExecCmd::useVfork(true);
} }
#endif #endif
int flushmb; int flushmb;
if (config->getConfParam("idxflushmb", &flushmb) && flushmb > 0) { if (config->getConfParam("idxflushmb", &flushmb) && flushmb > 0) {
LOGDEB1("rclinit: idxflushmb=" << flushmb << LOGDEB1("rclinit: idxflushmb=" << flushmb <<
", set XAPIAN_FLUSH_THRESHOLD to 10E6\n"); ", set XAPIAN_FLUSH_THRESHOLD to 10E6\n");
static const char *cp = "XAPIAN_FLUSH_THRESHOLD=1000000"; static const char *cp = "XAPIAN_FLUSH_THRESHOLD=1000000";
#ifdef PUTENV_ARG_CONST #ifdef PUTENV_ARG_CONST
::putenv(cp); ::putenv(cp);
#else #else
::putenv(strdup(cp)); ::putenv(strdup(cp));
#endif #endif
} }
@ -425,7 +425,7 @@ void recoll_threadinit()
sigemptyset(&sset); sigemptyset(&sset);
for (unsigned int i = 0; i < sizeof(catchedSigs) / sizeof(int); i++) for (unsigned int i = 0; i < sizeof(catchedSigs) / sizeof(int); i++)
sigaddset(&sset, catchedSigs[i]); sigaddset(&sset, catchedSigs[i]);
sigaddset(&sset, SIGHUP); sigaddset(&sset, SIGHUP);
pthread_sigmask(SIG_BLOCK, &sset, 0); pthread_sigmask(SIG_BLOCK, &sset, 0);
#else #else
@ -442,5 +442,3 @@ bool recoll_ismainthread()
{ {
return std::this_thread::get_id() == mainthread_id; return std::this_thread::get_id() == mainthread_id;
} }

View File

@ -63,9 +63,9 @@ Extractor_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{ {
LOGDEB("Extractor_new\n" ); LOGDEB("Extractor_new\n" );
rclx_ExtractorObject *self = rclx_ExtractorObject *self =
(rclx_ExtractorObject *)type->tp_alloc(type, 0); (rclx_ExtractorObject *)type->tp_alloc(type, 0);
if (self == 0) if (self == 0)
return 0; return 0;
self->xtr = 0; self->xtr = 0;
self->docobject = 0; self->docobject = 0;
return (PyObject *)self; return (PyObject *)self;
@ -79,55 +79,55 @@ Extractor_init(rclx_ExtractorObject *self, PyObject *args, PyObject *kwargs)
PyObject *pdobj; PyObject *pdobj;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!", (char**)kwlist, if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!", (char**)kwlist,
recoll_DocType, &pdobj)) recoll_DocType, &pdobj))
return -1; return -1;
recoll_DocObject *dobj = (recoll_DocObject *)pdobj; recoll_DocObject *dobj = (recoll_DocObject *)pdobj;
if (dobj->doc == 0) { if (dobj->doc == 0) {
PyErr_SetString(PyExc_AttributeError, "Null Doc ?"); PyErr_SetString(PyExc_AttributeError, "Null Doc ?");
return -1; return -1;
} }
self->docobject = dobj; self->docobject = dobj;
Py_INCREF(dobj); Py_INCREF(dobj);
self->rclconfig = dobj->rclconfig; self->rclconfig = dobj->rclconfig;
self->xtr = new FileInterner(*dobj->doc, self->rclconfig.get(), self->xtr = new FileInterner(*dobj->doc, self->rclconfig.get(),
FileInterner::FIF_forPreview); FileInterner::FIF_forPreview);
return 0; return 0;
} }
PyDoc_STRVAR(doc_Extractor_textextract, PyDoc_STRVAR(doc_Extractor_textextract,
"textextract(ipath)\n" "textextract(ipath)\n"
"Extract document defined by ipath and return a doc object. The doc.text\n" "Extract document defined by ipath and return a doc object. The doc.text\n"
"field has the document text as either text/plain or text/html\n" "field has the document text as either text/plain or text/html\n"
"according to doc.mimetype.\n" "according to doc.mimetype.\n"
); );
static PyObject * static PyObject *
Extractor_textextract(rclx_ExtractorObject* self, PyObject *args, Extractor_textextract(rclx_ExtractorObject* self, PyObject *args,
PyObject *kwargs) PyObject *kwargs)
{ {
LOGDEB("Extractor_textextract\n" ); LOGDEB("Extractor_textextract\n" );
static const char* kwlist[] = {"ipath", NULL}; static const char* kwlist[] = {"ipath", NULL};
char *sipath = 0; char *sipath = 0;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "es:Extractor_textextract", if (!PyArg_ParseTupleAndKeywords(args, kwargs, "es:Extractor_textextract",
(char**)kwlist, (char**)kwlist,
"utf-8", &sipath)) "utf-8", &sipath))
return 0; return 0;
string ipath(sipath); string ipath(sipath);
PyMem_Free(sipath); PyMem_Free(sipath);
if (self->xtr == 0) { if (self->xtr == 0) {
PyErr_SetString(PyExc_AttributeError, "extract: null object"); PyErr_SetString(PyExc_AttributeError, "extract: null object");
return 0; return 0;
} }
/* Call the doc class object to create a new doc. */ /* Call the doc class object to create a new doc. */
recoll_DocObject *result = recoll_DocObject *result =
(recoll_DocObject *)PyObject_CallObject((PyObject *)recoll_DocType, 0); (recoll_DocObject *)PyObject_CallObject((PyObject *)recoll_DocType, 0);
if (!result) { if (!result) {
PyErr_SetString(PyExc_AttributeError, "extract: doc create failed"); PyErr_SetString(PyExc_AttributeError, "extract: doc create failed");
return 0; return 0;
} }
FileInterner::Status status = self->xtr->internfile(*(result->doc), ipath); FileInterner::Status status = self->xtr->internfile(*(result->doc), ipath);
if (status != FileInterner::FIDone && status != FileInterner::FIAgain) { if (status != FileInterner::FIDone && status != FileInterner::FIAgain) {
@ -137,14 +137,14 @@ Extractor_textextract(rclx_ExtractorObject* self, PyObject *args,
string html = self->xtr->get_html(); string html = self->xtr->get_html();
if (!html.empty()) { if (!html.empty()) {
result->doc->text = html; result->doc->text = html;
result->doc->mimetype = "text/html"; result->doc->mimetype = "text/html";
} }
// Is this actually needed ? Useful for url which is also formatted . // Is this actually needed ? Useful for url which is also formatted .
Rcl::Doc *doc = result->doc; Rcl::Doc *doc = result->doc;
printableUrl(self->rclconfig->getDefCharset(), doc->url, printableUrl(self->rclconfig->getDefCharset(), doc->url,
doc->meta[Rcl::Doc::keyurl]); doc->meta[Rcl::Doc::keyurl]);
doc->meta[Rcl::Doc::keytp] = doc->mimetype; doc->meta[Rcl::Doc::keytp] = doc->mimetype;
doc->meta[Rcl::Doc::keyipt] = doc->ipath; doc->meta[Rcl::Doc::keyipt] = doc->ipath;
doc->meta[Rcl::Doc::keyfs] = doc->fbytes; doc->meta[Rcl::Doc::keyfs] = doc->fbytes;
@ -153,12 +153,12 @@ Extractor_textextract(rclx_ExtractorObject* self, PyObject *args,
} }
PyDoc_STRVAR(doc_Extractor_idoctofile, PyDoc_STRVAR(doc_Extractor_idoctofile,
"idoctofile(ipath='', mimetype='', ofilename='')\n" "idoctofile(ipath='', mimetype='', ofilename='')\n"
"Extract document defined by ipath into a file, in its native format.\n" "Extract document defined by ipath into a file, in its native format.\n"
); );
static PyObject * static PyObject *
Extractor_idoctofile(rclx_ExtractorObject* self, PyObject *args, Extractor_idoctofile(rclx_ExtractorObject* self, PyObject *args,
PyObject *kwargs) PyObject *kwargs)
{ {
LOGDEB("Extractor_idoctofile\n" ); LOGDEB("Extractor_idoctofile\n" );
static const char* kwlist[] = {"ipath", "mimetype", "ofilename", NULL}; static const char* kwlist[] = {"ipath", "mimetype", "ofilename", NULL};
@ -166,11 +166,11 @@ Extractor_idoctofile(rclx_ExtractorObject* self, PyObject *args,
char *smt = 0; char *smt = 0;
char *soutfile = 0; // no freeing char *soutfile = 0; // no freeing
if (!PyArg_ParseTupleAndKeywords(args,kwargs, "eses|s:Extractor_idoctofile", if (!PyArg_ParseTupleAndKeywords(args,kwargs, "eses|s:Extractor_idoctofile",
(char**)kwlist, (char**)kwlist,
"utf-8", &sipath, "utf-8", &sipath,
"utf-8", &smt, "utf-8", &smt,
&soutfile)) &soutfile))
return 0; return 0;
string ipath(sipath); string ipath(sipath);
PyMem_Free(sipath); PyMem_Free(sipath);
@ -178,11 +178,11 @@ Extractor_idoctofile(rclx_ExtractorObject* self, PyObject *args,
PyMem_Free(smt); PyMem_Free(smt);
string outfile; string outfile;
if (soutfile && *soutfile) if (soutfile && *soutfile)
outfile.assign(soutfile); outfile.assign(soutfile);
if (self->xtr == 0) { if (self->xtr == 0) {
PyErr_SetString(PyExc_AttributeError, "idoctofile: null object"); PyErr_SetString(PyExc_AttributeError, "idoctofile: null object");
return 0; return 0;
} }
// If ipath is empty and we want the original mimetype, we can't // If ipath is empty and we want the original mimetype, we can't
@ -199,7 +199,7 @@ Extractor_idoctofile(rclx_ExtractorObject* self, PyObject *args,
"\n"); "\n");
if (ipath.empty() && !mimetype.compare(self->docobject->doc->mimetype)) { if (ipath.empty() && !mimetype.compare(self->docobject->doc->mimetype)) {
status = FileInterner::idocToFile(temp, outfile, self->rclconfig.get(), status = FileInterner::idocToFile(temp, outfile, self->rclconfig.get(),
*self->docobject->doc); *self->docobject->doc);
} else { } else {
self->xtr->setTargetMType(mimetype); self->xtr->setTargetMType(mimetype);
status = self->xtr->interntofile(temp, outfile, ipath, mimetype); status = self->xtr->interntofile(temp, outfile, ipath, mimetype);
@ -209,9 +209,9 @@ Extractor_idoctofile(rclx_ExtractorObject* self, PyObject *args,
return 0; return 0;
} }
if (outfile.empty()) if (outfile.empty())
temp.setnoremove(1); temp.setnoremove(1);
PyObject *result = outfile.empty() ? PyBytes_FromString(temp.filename()) : PyObject *result = outfile.empty() ? PyBytes_FromString(temp.filename()) :
PyBytes_FromString(outfile.c_str()); PyBytes_FromString(outfile.c_str());
return (PyObject *)result; return (PyObject *)result;
} }
@ -224,11 +224,11 @@ static PyMethodDef Extractor_methods[] = {
}; };
PyDoc_STRVAR(doc_ExtractorObject, PyDoc_STRVAR(doc_ExtractorObject,
"Extractor()\n" "Extractor()\n"
"\n" "\n"
"An Extractor object can extract data from a native simple or compound\n" "An Extractor object can extract data from a native simple or compound\n"
"object.\n" "object.\n"
); );
static PyTypeObject rclx_ExtractorType = { static PyTypeObject rclx_ExtractorType = {
PyVarObject_HEAD_INIT(NULL, 0) PyVarObject_HEAD_INIT(NULL, 0)
"rclextract.Extractor", /*tp_name*/ "rclextract.Extractor", /*tp_name*/
@ -275,7 +275,7 @@ static PyMethodDef rclextract_methods[] = {
{NULL, NULL, 0, NULL} /* Sentinel */ {NULL, NULL, 0, NULL} /* Sentinel */
}; };
PyDoc_STRVAR(rclx_doc_string, PyDoc_STRVAR(rclx_doc_string,
"This is an interface to the Recoll text extraction features."); "This is an interface to the Recoll text extraction features.");
struct module_state { struct module_state {
PyObject *error; PyObject *error;
@ -300,15 +300,15 @@ static int rclextract_clear(PyObject *m) {
} }
static struct PyModuleDef moduledef = { static struct PyModuleDef moduledef = {
PyModuleDef_HEAD_INIT, PyModuleDef_HEAD_INIT,
"rclextract", "rclextract",
NULL, NULL,
sizeof(struct module_state), sizeof(struct module_state),
rclextract_methods, rclextract_methods,
NULL, NULL,
rclextract_traverse, rclextract_traverse,
rclextract_clear, rclextract_clear,
NULL NULL
}; };
#define INITERROR return NULL #define INITERROR return NULL
@ -318,8 +318,8 @@ PyInit_rclextract(void)
#else #else
#define INITERROR return #define INITERROR return
PyMODINIT_FUNC PyMODINIT_FUNC
initrclextract(void) initrclextract(void)
#endif #endif
{ {
// We run recollinit. It's responsible for initializing some static data // We run recollinit. It's responsible for initializing some static data
@ -332,8 +332,8 @@ initrclextract(void)
string reason; string reason;
RclConfig *rclconfig = recollinit(RCLINIT_PYTHON, 0, 0, reason, 0); RclConfig *rclconfig = recollinit(RCLINIT_PYTHON, 0, 0, reason, 0);
if (rclconfig == 0) { if (rclconfig == 0) {
PyErr_SetString(PyExc_EnvironmentError, reason.c_str()); PyErr_SetString(PyExc_EnvironmentError, reason.c_str());
INITERROR; INITERROR;
} else { } else {
delete rclconfig; delete rclconfig;
} }
@ -380,4 +380,3 @@ initrclextract(void)
return module; return module;
#endif #endif
} }

View File

@ -1066,7 +1066,7 @@ Query_executesd(recoll_QueryObject* self, PyObject *args, PyObject *kwargs)
// Move some data from the dedicated fields to the meta array to make // Move some data from the dedicated fields to the meta array to make
// fetching attributes easier. Needed because we only use the meta // fetching attributes easier. Needed because we only use the meta
// array when enumerating keys. Also for url which is also formatted. // array when enumerating keys. Also for url which is also formatted.
// But not that some fields are not copied, and are only reachable if // But note that some fields are not copied, and are only reachable if
// one knows their name (e.g. xdocid). // one knows their name (e.g. xdocid).
static void movedocfields(const RclConfig* rclconfig, Rcl::Doc *doc) static void movedocfields(const RclConfig* rclconfig, Rcl::Doc *doc)
{ {
@ -2205,4 +2205,3 @@ PyInit_recoll(void)
return module; return module;
#endif #endif
} }