Python module: merge pyrecoll and pyrclextract C extensions into one _pyrecoll and create 2 python modules to maintain compat

This commit is contained in:
Jean-Francois Dockes 2020-10-14 17:36:33 +02:00
parent 6daba6884f
commit 406c6b3257
7 changed files with 87 additions and 180 deletions

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2007 J.F.Dockes /* Copyright (C) 2007-2020 J.F.Dockes
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or * the Free Software Foundation; either version 2 of the License, or
@ -33,9 +33,6 @@
using namespace std; using namespace std;
// Imported from pyrecoll
static PyObject *recoll_DocType;
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
/// Extractor object code /// Extractor object code
typedef struct { typedef struct {
@ -76,12 +73,12 @@ Extractor_init(rclx_ExtractorObject *self, PyObject *args, PyObject *kwargs)
{ {
LOGDEB("Extractor_init\n" ); LOGDEB("Extractor_init\n" );
static const char* kwlist[] = {"doc", NULL}; static const char* kwlist[] = {"doc", NULL};
PyObject *pdobj; recoll_DocObject *dobj = 0;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!", (char**)kwlist, if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!:Extractor_init",
recoll_DocType, &pdobj)) (char**)kwlist,
&recoll_DocType, &dobj))
return -1; return -1;
recoll_DocObject *dobj = (recoll_DocObject *)pdobj;
if (dobj->doc == 0) { if (dobj->doc == 0) {
PyErr_SetString(PyExc_AttributeError, "Null Doc ?"); PyErr_SetString(PyExc_AttributeError, "Null Doc ?");
return -1; return -1;
@ -124,7 +121,7 @@ Extractor_textextract(rclx_ExtractorObject* self, PyObject *args,
} }
/* Call the doc class object to create a new doc. */ /* Call the doc class object to create a new doc. */
recoll_DocObject *result = recoll_DocObject *result =
(recoll_DocObject *)PyObject_CallObject((PyObject *)recoll_DocType, 0); (recoll_DocObject *)PyObject_CallObject((PyObject *)&recoll_DocType, 0);
if (!result) { if (!result) {
PyErr_SetString(PyExc_AttributeError, "extract: doc create failed"); PyErr_SetString(PyExc_AttributeError, "extract: doc create failed");
return 0; return 0;
@ -229,9 +226,10 @@ PyDoc_STRVAR(doc_ExtractorObject,
"An Extractor object can extract data from a native simple or compound\n" "An Extractor object can extract data from a native simple or compound\n"
"object.\n" "object.\n"
); );
static PyTypeObject rclx_ExtractorType = {
PyTypeObject rclx_ExtractorType = {
PyVarObject_HEAD_INIT(NULL, 0) PyVarObject_HEAD_INIT(NULL, 0)
"rclextract.Extractor", /*tp_name*/ "_rclextract.Extractor", /*tp_name*/
sizeof(rclx_ExtractorObject), /*tp_basicsize*/ sizeof(rclx_ExtractorObject), /*tp_basicsize*/
0, /*tp_itemsize*/ 0, /*tp_itemsize*/
(destructor)Extractor_dealloc, /*tp_dealloc*/ (destructor)Extractor_dealloc, /*tp_dealloc*/
@ -269,114 +267,3 @@ static PyTypeObject rclx_ExtractorType = {
0, /* tp_alloc */ 0, /* tp_alloc */
Extractor_new, /* tp_new */ Extractor_new, /* tp_new */
}; };
///////////////////////////////////// Module-level stuff
static PyMethodDef rclextract_methods[] = {
{NULL, NULL, 0, NULL} /* Sentinel */
};
PyDoc_STRVAR(rclx_doc_string,
"This is an interface to the Recoll text extraction features.");
struct module_state {
PyObject *error;
};
#if PY_MAJOR_VERSION >= 3
#define GETSTATE(m) ((struct module_state*)PyModule_GetState(m))
#else
#define GETSTATE(m) (&_state)
static struct module_state _state;
#endif
#if PY_MAJOR_VERSION >= 3
static int rclextract_traverse(PyObject *m, visitproc visit, void *arg) {
Py_VISIT(GETSTATE(m)->error);
return 0;
}
static int rclextract_clear(PyObject *m) {
Py_CLEAR(GETSTATE(m)->error);
return 0;
}
static struct PyModuleDef moduledef = {
PyModuleDef_HEAD_INIT,
"rclextract",
NULL,
sizeof(struct module_state),
rclextract_methods,
NULL,
rclextract_traverse,
rclextract_clear,
NULL
};
#define INITERROR return NULL
extern "C" PyObject *
PyInit_rclextract(void)
#else
#define INITERROR return
PyMODINIT_FUNC
initrclextract(void)
#endif
{
// We run recollinit. It's responsible for initializing some static data
// which is distinct from pyrecoll's as we're separately dlopened.
// The rclconfig object is not used, we'll get the config
// data from the objects out of the recoll module.
// Unfortunately, as we're not getting the actual config directory
// from pyrecoll (we could, through a capsule), this needs at
// least an empty default configuration directory to work.
string reason;
RclConfig *rclconfig = recollinit(RCLINIT_PYTHON, 0, 0, reason, 0);
if (rclconfig == 0) {
PyErr_SetString(PyExc_EnvironmentError, reason.c_str());
INITERROR;
} else {
delete rclconfig;
}
#if PY_MAJOR_VERSION >= 3
PyObject *module = PyModule_Create(&moduledef);
#else
PyObject *module = Py_InitModule("rclextract", rclextract_methods);
#endif
if (module == NULL)
INITERROR;
struct module_state *st = GETSTATE(module);
// The first parameter is a char *. Hopefully we don't initialize
// modules too often...
st->error = PyErr_NewException(strdup("rclextract.Error"), NULL, NULL);
if (st->error == NULL) {
Py_DECREF(module);
INITERROR;
}
PyModule_AddStringConstant(module, "__doc__", rclx_doc_string);
if (PyType_Ready(&rclx_ExtractorType) < 0)
INITERROR;
Py_INCREF(&rclx_ExtractorType);
PyModule_AddObject(module, "Extractor", (PyObject *)&rclx_ExtractorType);
#if PY_MAJOR_VERSION >= 3 || (PY_MAJOR_VERSION >= 2 && PY_MINOR_VERSION >= 7)
recoll_DocType = (PyObject*)PyCapsule_Import(PYRECOLL_PACKAGE "recoll.doctypeptr", 0);
#else
PyObject *module1 = PyImport_ImportModule(PYRECOLL_PACKAGE "recoll");
if (module1 != NULL) {
PyObject *cobject = PyObject_GetAttrString(module1, "doctypeptr");
if (cobject == NULL)
INITERROR;
if (PyCObject_Check(cobject))
recoll_DocType = (PyObject*)PyCObject_AsVoidPtr(cobject);
Py_DECREF(cobject);
}
#endif
#if PY_MAJOR_VERSION >= 3
return module;
#endif
}

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2007 J.F.Dockes /* Copyright (C) 2007-2020 J.F.Dockes
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or * the Free Software Foundation; either version 2 of the License, or
@ -140,7 +140,7 @@ static PyMethodDef SearchData_methods[] = {
static PyTypeObject recoll_SearchDataType = { static PyTypeObject recoll_SearchDataType = {
PyVarObject_HEAD_INIT(NULL, 0) PyVarObject_HEAD_INIT(NULL, 0)
"recoll.SearchData", /*tp_name*/ "_recoll.SearchData", /*tp_name*/
sizeof(recoll_SearchDataObject), /*tp_basicsize*/ sizeof(recoll_SearchDataObject), /*tp_basicsize*/
0, /*tp_itemsize*/ 0, /*tp_itemsize*/
(destructor)SearchData_dealloc, /*tp_dealloc*/ (destructor)SearchData_dealloc, /*tp_dealloc*/
@ -775,9 +775,10 @@ PyDoc_STRVAR(doc_DocObject,
" title (both)\n" " title (both)\n"
" keywords (both)\n" " keywords (both)\n"
); );
static PyTypeObject recoll_DocType = {
PyTypeObject recoll_DocType = {
PyVarObject_HEAD_INIT(NULL, 0) PyVarObject_HEAD_INIT(NULL, 0)
"recoll.Doc", /*tp_name*/ "_recoll.Doc", /*tp_name*/
sizeof(recoll_DocObject), /*tp_basicsize*/ sizeof(recoll_DocObject), /*tp_basicsize*/
0, /*tp_itemsize*/ 0, /*tp_itemsize*/
(destructor)Doc_dealloc, /*tp_dealloc*/ (destructor)Doc_dealloc, /*tp_dealloc*/
@ -1319,6 +1320,7 @@ PyDoc_STRVAR(doc_Query_makedocabstract,
" terms\n" " terms\n"
"If methods is set, will also perform highlighting. See the highlight method\n" "If methods is set, will also perform highlighting. See the highlight method\n"
); );
static PyObject * static PyObject *
Query_makedocabstract(recoll_QueryObject* self, PyObject *args,PyObject *kwargs) Query_makedocabstract(recoll_QueryObject* self, PyObject *args,PyObject *kwargs)
{ {
@ -1522,7 +1524,7 @@ PyDoc_STRVAR(doc_QueryObject,
); );
static PyTypeObject recoll_QueryType = { static PyTypeObject recoll_QueryType = {
PyVarObject_HEAD_INIT(NULL, 0) PyVarObject_HEAD_INIT(NULL, 0)
"recoll.Query", /*tp_name*/ "_recoll.Query", /*tp_name*/
sizeof(recoll_QueryObject), /*tp_basicsize*/ sizeof(recoll_QueryObject), /*tp_basicsize*/
0, /*tp_itemsize*/ 0, /*tp_itemsize*/
(destructor)Query_dealloc, /*tp_dealloc*/ (destructor)Query_dealloc, /*tp_dealloc*/
@ -2025,7 +2027,7 @@ PyDoc_STRVAR(doc_DbObject,
); );
static PyTypeObject recoll_DbType = { static PyTypeObject recoll_DbType = {
PyVarObject_HEAD_INIT(NULL, 0) PyVarObject_HEAD_INIT(NULL, 0)
"recoll.Db", /*tp_name*/ "_recoll.Db", /*tp_name*/
sizeof(recoll_DbObject), /*tp_basicsize*/ sizeof(recoll_DbObject), /*tp_basicsize*/
0, /*tp_itemsize*/ 0, /*tp_itemsize*/
(destructor)Db_dealloc, /*tp_dealloc*/ (destructor)Db_dealloc, /*tp_dealloc*/
@ -2122,7 +2124,7 @@ static int recoll_clear(PyObject *m) {
static struct PyModuleDef moduledef = { static struct PyModuleDef moduledef = {
PyModuleDef_HEAD_INIT, PyModuleDef_HEAD_INIT,
"recoll", "_recoll",
NULL, NULL,
sizeof(struct module_state), sizeof(struct module_state),
recoll_methods, recoll_methods,
@ -2135,13 +2137,13 @@ static struct PyModuleDef moduledef = {
#define INITERROR return NULL #define INITERROR return NULL
extern "C" PyObject * extern "C" PyObject *
PyInit_recoll(void) PyInit__recoll(void)
#else #else
#define INITERROR return #define INITERROR return
PyMODINIT_FUNC PyMODINIT_FUNC
initrecoll(void) init_recoll(void)
#endif #endif
{ {
// Note: we can't call recollinit here, because the confdir is only really // Note: we can't call recollinit here, because the confdir is only really
@ -2152,7 +2154,7 @@ PyInit_recoll(void)
#if PY_MAJOR_VERSION >= 3 #if PY_MAJOR_VERSION >= 3
PyObject *module = PyModule_Create(&moduledef); PyObject *module = PyModule_Create(&moduledef);
#else #else
PyObject *module = Py_InitModule("recoll", recoll_methods); PyObject *module = Py_InitModule("_recoll", recoll_methods);
#endif #endif
if (module == NULL) if (module == NULL)
INITERROR; INITERROR;
@ -2160,7 +2162,7 @@ PyInit_recoll(void)
struct module_state *st = GETSTATE(module); struct module_state *st = GETSTATE(module);
// The first parameter is a char *. Hopefully we don't initialize // The first parameter is a char *. Hopefully we don't initialize
// modules too often... // modules too often...
st->error = PyErr_NewException(strdup("recoll.Error"), NULL, NULL); st->error = PyErr_NewException(strdup("_recoll.Error"), NULL, NULL);
if (st->error == NULL) { if (st->error == NULL) {
Py_DECREF(module); Py_DECREF(module);
INITERROR; INITERROR;
@ -2186,20 +2188,13 @@ PyInit_recoll(void)
Py_INCREF((PyObject*)&recoll_SearchDataType); Py_INCREF((PyObject*)&recoll_SearchDataType);
PyModule_AddObject(module, "SearchData", PyModule_AddObject(module, "SearchData",
(PyObject *)&recoll_SearchDataType); (PyObject *)&recoll_SearchDataType);
PyModule_AddStringConstant(module, "__doc__",
pyrecoll_doc_string);
PyObject *doctypecobject; PyModule_AddStringConstant(module, "__doc__", pyrecoll_doc_string);
#if PY_MAJOR_VERSION >= 3 || (PY_MAJOR_VERSION >= 2 && PY_MINOR_VERSION >= 7) if (PyType_Ready(&rclx_ExtractorType) < 0)
// Export a few pointers for the benefit of other recoll python modules INITERROR;
doctypecobject= Py_INCREF(&rclx_ExtractorType);
PyCapsule_New(&recoll_DocType, PYRECOLL_PACKAGE "recoll.doctypeptr", 0); PyModule_AddObject(module, "Extractor", (PyObject *)&rclx_ExtractorType);
#else
doctypecobject = PyCObject_FromVoidPtr(&recoll_DocType, NULL);
#endif
PyModule_AddObject(module, "doctypeptr", doctypecobject);
#if PY_MAJOR_VERSION >= 3 #if PY_MAJOR_VERSION >= 3
return module; return module;

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2012 J.F.Dockes /* Copyright (C) 2012-2020 J.F.Dockes
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or * the Free Software Foundation; either version 2 of the License, or
@ -17,6 +17,8 @@
#ifndef _PYRECOLL_H_INCLUDED_ #ifndef _PYRECOLL_H_INCLUDED_
#define _PYRECOLL_H_INCLUDED_ #define _PYRECOLL_H_INCLUDED_
/* Shared definitions for pyrecoll.cpp and pyrclextract.cpp */
#include <Python.h> #include <Python.h>
#include <memory> #include <memory>
@ -31,6 +33,7 @@ typedef struct {
std::shared_ptr<RclConfig> rclconfig; std::shared_ptr<RclConfig> rclconfig;
} recoll_DocObject; } recoll_DocObject;
#define PYRECOLL_PACKAGE "recoll." extern PyTypeObject rclx_ExtractorType;
extern PyTypeObject recoll_DocType;
#endif // _PYRECOLL_H_INCLUDED_ #endif // _PYRECOLL_H_INCLUDED_

View File

@ -0,0 +1,23 @@
# Copyright (C) 2020 J.F.Dockes
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
# We used to have two C extensions: recoll and rclextract, which was a really
# bad idea. They are now merged into the _recoll C extension module. The two
# python modules recoll.py and rclextract.py only exist for compatibility (for
# now: maybe we'll do something with them in the future).
from ._recoll import Extractor

View File

@ -0,0 +1,22 @@
# Copyright (C) 2020 J.F.Dockes
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
# We used to have two C extensions: recoll and rclextract, which was a really
# bad idea. They are now merged into the _recoll C extension module. The two
# python modules recoll.py and rclextract.py only exist for compatibility (for
# now: maybe we'll do something with them in the future).
from ._recoll import *

View File

@ -25,10 +25,9 @@ if 'libdir' in os.environ and os.environ['libdir'] != "":
else: else:
runtime_library_dirs = [os.path.join('@prefix@', 'lib', 'recoll')] runtime_library_dirs = [os.path.join('@prefix@', 'lib', 'recoll')]
module1 = Extension('recoll', module1 = Extension('_recoll',
define_macros = [('MAJOR_VERSION', '1'), define_macros = [('MAJOR_VERSION', '1'),
('MINOR_VERSION', '0'), ('MINOR_VERSION', '0'),
('UNAC_VERSION', '"1.0.7"'),
('RECOLL_DATADIR', '"@RECOLL_DATADIR@"') ('RECOLL_DATADIR', '"@RECOLL_DATADIR@"')
], ],
include_dirs = ['/usr/local/include', include_dirs = ['/usr/local/include',
@ -36,6 +35,7 @@ module1 = Extension('recoll',
os.path.join(top, 'common'), os.path.join(top, 'common'),
os.path.join(localtop, 'common'), os.path.join(localtop, 'common'),
os.path.join(top, 'common'), os.path.join(top, 'common'),
os.path.join(top, 'internfile'),
os.path.join(top, 'rcldb'), os.path.join(top, 'rcldb'),
os.path.join(top, 'query'), os.path.join(top, 'query'),
os.path.join(top, 'unac') os.path.join(top, 'unac')
@ -44,26 +44,9 @@ module1 = Extension('recoll',
libraries = libraries, libraries = libraries,
library_dirs = library_dirs, library_dirs = library_dirs,
runtime_library_dirs = runtime_library_dirs, runtime_library_dirs = runtime_library_dirs,
sources = [os.path.join(pytop, 'pyrecoll.cpp')]) sources = [os.path.join(pytop, 'pyrecoll.cpp'),
os.path.join(pytop, 'pyrclextract.cpp')
module2 = Extension('rclextract', ])
define_macros = [('MAJOR_VERSION', '1'),
('MINOR_VERSION', '0'),
('UNAC_VERSION', '"1.0.7"'),
('RECOLL_DATADIR', '"@RECOLL_DATADIR@"')
],
include_dirs = ['/usr/local/include',
os.path.join(top, 'utils'),
os.path.join(top, 'common'),
os.path.join(localtop, 'common'),
os.path.join(top, 'internfile'),
os.path.join(top, 'rcldb'),
],
extra_compile_args = extra_compile_args,
libraries = libraries,
library_dirs = library_dirs,
runtime_library_dirs = runtime_library_dirs,
sources = [os.path.join(pytop, 'pyrclextract.cpp')])
setup (name = 'Recoll', setup (name = 'Recoll',
version = '1.0', version = '1.0',
@ -77,4 +60,4 @@ setup (name = 'Recoll',
''', ''',
packages = ['recoll'], packages = ['recoll'],
ext_package = 'recoll', ext_package = 'recoll',
ext_modules = [module1, module2]) ext_modules = [module1])

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
"""A python version of the command line query tool recollq (a bit simplified) """A python version of the command line query tool recollq (a bit simplified)
The input string is always interpreted as a query language string. The input string is always interpreted as a query language string.
@ -14,13 +14,7 @@ if sys.version_info[0] >= 3:
else: else:
ISP3 = False ISP3 = False
try: from recoll import recoll, rclextract
from recoll import recoll
from recoll import rclextract
hasextract = True
except:
import recoll
hasextract = False
allmeta = ("title", "keywords", "abstract", "url", "mimetype", "mtime", allmeta = ("title", "keywords", "abstract", "url", "mimetype", "mtime",
"ipath", "fbytes", "dbytes", "relevancyrating") "ipath", "fbytes", "dbytes", "relevancyrating")
@ -97,7 +91,7 @@ def doquery(db, q):
print(utf8string(abs)) print(utf8string(abs))
print('') print('')
# fulldoc = extract(doc) # fulldoc = extract(doc)
# print "FULLDOC MIMETYPE", fulldoc.mimetype, "TEXT:", fulldoc.text.encode("utf-8") # print("FULLDOC MIMETYPE %s TEXT: %s" % (fulldoc.mimetype,fulldoc.text))
########################################### MAIN ########################################### MAIN