Python module: merge pyrecoll and pyrclextract C extensions into one _pyrecoll and create 2 python modules to maintain compat

This commit is contained in:
Jean-Francois Dockes 2020-10-14 17:36:33 +02:00
parent 6daba6884f
commit 406c6b3257
7 changed files with 87 additions and 180 deletions

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2007 J.F.Dockes
/* Copyright (C) 2007-2020 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
@ -33,9 +33,6 @@
using namespace std;
// Imported from pyrecoll
static PyObject *recoll_DocType;
//////////////////////////////////////////////////////////////////////
/// Extractor object code
typedef struct {
@ -76,12 +73,12 @@ Extractor_init(rclx_ExtractorObject *self, PyObject *args, PyObject *kwargs)
{
LOGDEB("Extractor_init\n" );
static const char* kwlist[] = {"doc", NULL};
PyObject *pdobj;
recoll_DocObject *dobj = 0;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!", (char**)kwlist,
recoll_DocType, &pdobj))
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!:Extractor_init",
(char**)kwlist,
&recoll_DocType, &dobj))
return -1;
recoll_DocObject *dobj = (recoll_DocObject *)pdobj;
if (dobj->doc == 0) {
PyErr_SetString(PyExc_AttributeError, "Null Doc ?");
return -1;
@ -124,7 +121,7 @@ Extractor_textextract(rclx_ExtractorObject* self, PyObject *args,
}
/* Call the doc class object to create a new doc. */
recoll_DocObject *result =
(recoll_DocObject *)PyObject_CallObject((PyObject *)recoll_DocType, 0);
(recoll_DocObject *)PyObject_CallObject((PyObject *)&recoll_DocType, 0);
if (!result) {
PyErr_SetString(PyExc_AttributeError, "extract: doc create failed");
return 0;
@ -229,9 +226,10 @@ PyDoc_STRVAR(doc_ExtractorObject,
"An Extractor object can extract data from a native simple or compound\n"
"object.\n"
);
static PyTypeObject rclx_ExtractorType = {
PyTypeObject rclx_ExtractorType = {
PyVarObject_HEAD_INIT(NULL, 0)
"rclextract.Extractor", /*tp_name*/
"_rclextract.Extractor", /*tp_name*/
sizeof(rclx_ExtractorObject), /*tp_basicsize*/
0, /*tp_itemsize*/
(destructor)Extractor_dealloc, /*tp_dealloc*/
@ -269,114 +267,3 @@ static PyTypeObject rclx_ExtractorType = {
0, /* tp_alloc */
Extractor_new, /* tp_new */
};
///////////////////////////////////// Module-level stuff
static PyMethodDef rclextract_methods[] = {
{NULL, NULL, 0, NULL} /* Sentinel */
};
PyDoc_STRVAR(rclx_doc_string,
"This is an interface to the Recoll text extraction features.");
struct module_state {
PyObject *error;
};
#if PY_MAJOR_VERSION >= 3
#define GETSTATE(m) ((struct module_state*)PyModule_GetState(m))
#else
#define GETSTATE(m) (&_state)
static struct module_state _state;
#endif
#if PY_MAJOR_VERSION >= 3
static int rclextract_traverse(PyObject *m, visitproc visit, void *arg) {
Py_VISIT(GETSTATE(m)->error);
return 0;
}
static int rclextract_clear(PyObject *m) {
Py_CLEAR(GETSTATE(m)->error);
return 0;
}
static struct PyModuleDef moduledef = {
PyModuleDef_HEAD_INIT,
"rclextract",
NULL,
sizeof(struct module_state),
rclextract_methods,
NULL,
rclextract_traverse,
rclextract_clear,
NULL
};
#define INITERROR return NULL
extern "C" PyObject *
PyInit_rclextract(void)
#else
#define INITERROR return
PyMODINIT_FUNC
initrclextract(void)
#endif
{
// We run recollinit. It's responsible for initializing some static data
// which is distinct from pyrecoll's as we're separately dlopened.
// The rclconfig object is not used, we'll get the config
// data from the objects out of the recoll module.
// Unfortunately, as we're not getting the actual config directory
// from pyrecoll (we could, through a capsule), this needs at
// least an empty default configuration directory to work.
string reason;
RclConfig *rclconfig = recollinit(RCLINIT_PYTHON, 0, 0, reason, 0);
if (rclconfig == 0) {
PyErr_SetString(PyExc_EnvironmentError, reason.c_str());
INITERROR;
} else {
delete rclconfig;
}
#if PY_MAJOR_VERSION >= 3
PyObject *module = PyModule_Create(&moduledef);
#else
PyObject *module = Py_InitModule("rclextract", rclextract_methods);
#endif
if (module == NULL)
INITERROR;
struct module_state *st = GETSTATE(module);
// The first parameter is a char *. Hopefully we don't initialize
// modules too often...
st->error = PyErr_NewException(strdup("rclextract.Error"), NULL, NULL);
if (st->error == NULL) {
Py_DECREF(module);
INITERROR;
}
PyModule_AddStringConstant(module, "__doc__", rclx_doc_string);
if (PyType_Ready(&rclx_ExtractorType) < 0)
INITERROR;
Py_INCREF(&rclx_ExtractorType);
PyModule_AddObject(module, "Extractor", (PyObject *)&rclx_ExtractorType);
#if PY_MAJOR_VERSION >= 3 || (PY_MAJOR_VERSION >= 2 && PY_MINOR_VERSION >= 7)
recoll_DocType = (PyObject*)PyCapsule_Import(PYRECOLL_PACKAGE "recoll.doctypeptr", 0);
#else
PyObject *module1 = PyImport_ImportModule(PYRECOLL_PACKAGE "recoll");
if (module1 != NULL) {
PyObject *cobject = PyObject_GetAttrString(module1, "doctypeptr");
if (cobject == NULL)
INITERROR;
if (PyCObject_Check(cobject))
recoll_DocType = (PyObject*)PyCObject_AsVoidPtr(cobject);
Py_DECREF(cobject);
}
#endif
#if PY_MAJOR_VERSION >= 3
return module;
#endif
}

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2007 J.F.Dockes
/* Copyright (C) 2007-2020 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
@ -140,7 +140,7 @@ static PyMethodDef SearchData_methods[] = {
static PyTypeObject recoll_SearchDataType = {
PyVarObject_HEAD_INIT(NULL, 0)
"recoll.SearchData", /*tp_name*/
"_recoll.SearchData", /*tp_name*/
sizeof(recoll_SearchDataObject), /*tp_basicsize*/
0, /*tp_itemsize*/
(destructor)SearchData_dealloc, /*tp_dealloc*/
@ -775,9 +775,10 @@ PyDoc_STRVAR(doc_DocObject,
" title (both)\n"
" keywords (both)\n"
);
static PyTypeObject recoll_DocType = {
PyTypeObject recoll_DocType = {
PyVarObject_HEAD_INIT(NULL, 0)
"recoll.Doc", /*tp_name*/
"_recoll.Doc", /*tp_name*/
sizeof(recoll_DocObject), /*tp_basicsize*/
0, /*tp_itemsize*/
(destructor)Doc_dealloc, /*tp_dealloc*/
@ -1319,6 +1320,7 @@ PyDoc_STRVAR(doc_Query_makedocabstract,
" terms\n"
"If methods is set, will also perform highlighting. See the highlight method\n"
);
static PyObject *
Query_makedocabstract(recoll_QueryObject* self, PyObject *args,PyObject *kwargs)
{
@ -1522,7 +1524,7 @@ PyDoc_STRVAR(doc_QueryObject,
);
static PyTypeObject recoll_QueryType = {
PyVarObject_HEAD_INIT(NULL, 0)
"recoll.Query", /*tp_name*/
"_recoll.Query", /*tp_name*/
sizeof(recoll_QueryObject), /*tp_basicsize*/
0, /*tp_itemsize*/
(destructor)Query_dealloc, /*tp_dealloc*/
@ -2025,7 +2027,7 @@ PyDoc_STRVAR(doc_DbObject,
);
static PyTypeObject recoll_DbType = {
PyVarObject_HEAD_INIT(NULL, 0)
"recoll.Db", /*tp_name*/
"_recoll.Db", /*tp_name*/
sizeof(recoll_DbObject), /*tp_basicsize*/
0, /*tp_itemsize*/
(destructor)Db_dealloc, /*tp_dealloc*/
@ -2122,7 +2124,7 @@ static int recoll_clear(PyObject *m) {
static struct PyModuleDef moduledef = {
PyModuleDef_HEAD_INIT,
"recoll",
"_recoll",
NULL,
sizeof(struct module_state),
recoll_methods,
@ -2135,13 +2137,13 @@ static struct PyModuleDef moduledef = {
#define INITERROR return NULL
extern "C" PyObject *
PyInit_recoll(void)
PyInit__recoll(void)
#else
#define INITERROR return
PyMODINIT_FUNC
initrecoll(void)
init_recoll(void)
#endif
{
// Note: we can't call recollinit here, because the confdir is only really
@ -2152,7 +2154,7 @@ PyInit_recoll(void)
#if PY_MAJOR_VERSION >= 3
PyObject *module = PyModule_Create(&moduledef);
#else
PyObject *module = Py_InitModule("recoll", recoll_methods);
PyObject *module = Py_InitModule("_recoll", recoll_methods);
#endif
if (module == NULL)
INITERROR;
@ -2160,7 +2162,7 @@ PyInit_recoll(void)
struct module_state *st = GETSTATE(module);
// The first parameter is a char *. Hopefully we don't initialize
// modules too often...
st->error = PyErr_NewException(strdup("recoll.Error"), NULL, NULL);
st->error = PyErr_NewException(strdup("_recoll.Error"), NULL, NULL);
if (st->error == NULL) {
Py_DECREF(module);
INITERROR;
@ -2186,20 +2188,13 @@ PyInit_recoll(void)
Py_INCREF((PyObject*)&recoll_SearchDataType);
PyModule_AddObject(module, "SearchData",
(PyObject *)&recoll_SearchDataType);
PyModule_AddStringConstant(module, "__doc__",
pyrecoll_doc_string);
PyObject *doctypecobject;
PyModule_AddStringConstant(module, "__doc__", pyrecoll_doc_string);
#if PY_MAJOR_VERSION >= 3 || (PY_MAJOR_VERSION >= 2 && PY_MINOR_VERSION >= 7)
// Export a few pointers for the benefit of other recoll python modules
doctypecobject=
PyCapsule_New(&recoll_DocType, PYRECOLL_PACKAGE "recoll.doctypeptr", 0);
#else
doctypecobject = PyCObject_FromVoidPtr(&recoll_DocType, NULL);
#endif
PyModule_AddObject(module, "doctypeptr", doctypecobject);
if (PyType_Ready(&rclx_ExtractorType) < 0)
INITERROR;
Py_INCREF(&rclx_ExtractorType);
PyModule_AddObject(module, "Extractor", (PyObject *)&rclx_ExtractorType);
#if PY_MAJOR_VERSION >= 3
return module;

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2012 J.F.Dockes
/* Copyright (C) 2012-2020 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
@ -17,6 +17,8 @@
#ifndef _PYRECOLL_H_INCLUDED_
#define _PYRECOLL_H_INCLUDED_
/* Shared definitions for pyrecoll.cpp and pyrclextract.cpp */
#include <Python.h>
#include <memory>
@ -31,6 +33,7 @@ typedef struct {
std::shared_ptr<RclConfig> rclconfig;
} recoll_DocObject;
#define PYRECOLL_PACKAGE "recoll."
extern PyTypeObject rclx_ExtractorType;
extern PyTypeObject recoll_DocType;
#endif // _PYRECOLL_H_INCLUDED_

View File

@ -0,0 +1,23 @@
# Copyright (C) 2020 J.F.Dockes
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
# We used to have two C extensions: recoll and rclextract, which was a really
# bad idea. They are now merged into the _recoll C extension module. The two
# python modules recoll.py and rclextract.py only exist for compatibility (for
# now: maybe we'll do something with them in the future).
from ._recoll import Extractor

View File

@ -0,0 +1,22 @@
# Copyright (C) 2020 J.F.Dockes
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
# We used to have two C extensions: recoll and rclextract, which was a really
# bad idea. They are now merged into the _recoll C extension module. The two
# python modules recoll.py and rclextract.py only exist for compatibility (for
# now: maybe we'll do something with them in the future).
from ._recoll import *

View File

@ -25,10 +25,9 @@ if 'libdir' in os.environ and os.environ['libdir'] != "":
else:
runtime_library_dirs = [os.path.join('@prefix@', 'lib', 'recoll')]
module1 = Extension('recoll',
module1 = Extension('_recoll',
define_macros = [('MAJOR_VERSION', '1'),
('MINOR_VERSION', '0'),
('UNAC_VERSION', '"1.0.7"'),
('RECOLL_DATADIR', '"@RECOLL_DATADIR@"')
],
include_dirs = ['/usr/local/include',
@ -36,6 +35,7 @@ module1 = Extension('recoll',
os.path.join(top, 'common'),
os.path.join(localtop, 'common'),
os.path.join(top, 'common'),
os.path.join(top, 'internfile'),
os.path.join(top, 'rcldb'),
os.path.join(top, 'query'),
os.path.join(top, 'unac')
@ -44,26 +44,9 @@ module1 = Extension('recoll',
libraries = libraries,
library_dirs = library_dirs,
runtime_library_dirs = runtime_library_dirs,
sources = [os.path.join(pytop, 'pyrecoll.cpp')])
module2 = Extension('rclextract',
define_macros = [('MAJOR_VERSION', '1'),
('MINOR_VERSION', '0'),
('UNAC_VERSION', '"1.0.7"'),
('RECOLL_DATADIR', '"@RECOLL_DATADIR@"')
],
include_dirs = ['/usr/local/include',
os.path.join(top, 'utils'),
os.path.join(top, 'common'),
os.path.join(localtop, 'common'),
os.path.join(top, 'internfile'),
os.path.join(top, 'rcldb'),
],
extra_compile_args = extra_compile_args,
libraries = libraries,
library_dirs = library_dirs,
runtime_library_dirs = runtime_library_dirs,
sources = [os.path.join(pytop, 'pyrclextract.cpp')])
sources = [os.path.join(pytop, 'pyrecoll.cpp'),
os.path.join(pytop, 'pyrclextract.cpp')
])
setup (name = 'Recoll',
version = '1.0',
@ -77,4 +60,4 @@ setup (name = 'Recoll',
''',
packages = ['recoll'],
ext_package = 'recoll',
ext_modules = [module1, module2])
ext_modules = [module1])

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/python3
# -*- coding: utf-8 -*-
"""A python version of the command line query tool recollq (a bit simplified)
The input string is always interpreted as a query language string.
@ -14,13 +14,7 @@ if sys.version_info[0] >= 3:
else:
ISP3 = False
try:
from recoll import recoll
from recoll import rclextract
hasextract = True
except:
import recoll
hasextract = False
from recoll import recoll, rclextract
allmeta = ("title", "keywords", "abstract", "url", "mimetype", "mtime",
"ipath", "fbytes", "dbytes", "relevancyrating")
@ -97,7 +91,7 @@ def doquery(db, q):
print(utf8string(abs))
print('')
# fulldoc = extract(doc)
# print "FULLDOC MIMETYPE", fulldoc.mimetype, "TEXT:", fulldoc.text.encode("utf-8")
# print("FULLDOC MIMETYPE %s TEXT: %s" % (fulldoc.mimetype,fulldoc.text))
########################################### MAIN