Python module: merge pyrecoll and pyrclextract C extensions into one _pyrecoll and create 2 python modules to maintain compat
This commit is contained in:
parent
6daba6884f
commit
406c6b3257
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2007 J.F.Dockes
|
||||
/* Copyright (C) 2007-2020 J.F.Dockes
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
@ -33,9 +33,6 @@
|
||||
|
||||
using namespace std;
|
||||
|
||||
// Imported from pyrecoll
|
||||
static PyObject *recoll_DocType;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
/// Extractor object code
|
||||
typedef struct {
|
||||
@ -76,12 +73,12 @@ Extractor_init(rclx_ExtractorObject *self, PyObject *args, PyObject *kwargs)
|
||||
{
|
||||
LOGDEB("Extractor_init\n" );
|
||||
static const char* kwlist[] = {"doc", NULL};
|
||||
PyObject *pdobj;
|
||||
recoll_DocObject *dobj = 0;
|
||||
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!", (char**)kwlist,
|
||||
recoll_DocType, &pdobj))
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!:Extractor_init",
|
||||
(char**)kwlist,
|
||||
&recoll_DocType, &dobj))
|
||||
return -1;
|
||||
recoll_DocObject *dobj = (recoll_DocObject *)pdobj;
|
||||
if (dobj->doc == 0) {
|
||||
PyErr_SetString(PyExc_AttributeError, "Null Doc ?");
|
||||
return -1;
|
||||
@ -124,7 +121,7 @@ Extractor_textextract(rclx_ExtractorObject* self, PyObject *args,
|
||||
}
|
||||
/* Call the doc class object to create a new doc. */
|
||||
recoll_DocObject *result =
|
||||
(recoll_DocObject *)PyObject_CallObject((PyObject *)recoll_DocType, 0);
|
||||
(recoll_DocObject *)PyObject_CallObject((PyObject *)&recoll_DocType, 0);
|
||||
if (!result) {
|
||||
PyErr_SetString(PyExc_AttributeError, "extract: doc create failed");
|
||||
return 0;
|
||||
@ -229,9 +226,10 @@ PyDoc_STRVAR(doc_ExtractorObject,
|
||||
"An Extractor object can extract data from a native simple or compound\n"
|
||||
"object.\n"
|
||||
);
|
||||
static PyTypeObject rclx_ExtractorType = {
|
||||
|
||||
PyTypeObject rclx_ExtractorType = {
|
||||
PyVarObject_HEAD_INIT(NULL, 0)
|
||||
"rclextract.Extractor", /*tp_name*/
|
||||
"_rclextract.Extractor", /*tp_name*/
|
||||
sizeof(rclx_ExtractorObject), /*tp_basicsize*/
|
||||
0, /*tp_itemsize*/
|
||||
(destructor)Extractor_dealloc, /*tp_dealloc*/
|
||||
@ -269,114 +267,3 @@ static PyTypeObject rclx_ExtractorType = {
|
||||
0, /* tp_alloc */
|
||||
Extractor_new, /* tp_new */
|
||||
};
|
||||
|
||||
///////////////////////////////////// Module-level stuff
|
||||
static PyMethodDef rclextract_methods[] = {
|
||||
{NULL, NULL, 0, NULL} /* Sentinel */
|
||||
};
|
||||
PyDoc_STRVAR(rclx_doc_string,
|
||||
"This is an interface to the Recoll text extraction features.");
|
||||
|
||||
struct module_state {
|
||||
PyObject *error;
|
||||
};
|
||||
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
#define GETSTATE(m) ((struct module_state*)PyModule_GetState(m))
|
||||
#else
|
||||
#define GETSTATE(m) (&_state)
|
||||
static struct module_state _state;
|
||||
#endif
|
||||
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
static int rclextract_traverse(PyObject *m, visitproc visit, void *arg) {
|
||||
Py_VISIT(GETSTATE(m)->error);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int rclextract_clear(PyObject *m) {
|
||||
Py_CLEAR(GETSTATE(m)->error);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct PyModuleDef moduledef = {
|
||||
PyModuleDef_HEAD_INIT,
|
||||
"rclextract",
|
||||
NULL,
|
||||
sizeof(struct module_state),
|
||||
rclextract_methods,
|
||||
NULL,
|
||||
rclextract_traverse,
|
||||
rclextract_clear,
|
||||
NULL
|
||||
};
|
||||
|
||||
#define INITERROR return NULL
|
||||
|
||||
extern "C" PyObject *
|
||||
PyInit_rclextract(void)
|
||||
|
||||
#else
|
||||
#define INITERROR return
|
||||
PyMODINIT_FUNC
|
||||
initrclextract(void)
|
||||
#endif
|
||||
{
|
||||
// We run recollinit. It's responsible for initializing some static data
|
||||
// which is distinct from pyrecoll's as we're separately dlopened.
|
||||
// The rclconfig object is not used, we'll get the config
|
||||
// data from the objects out of the recoll module.
|
||||
// Unfortunately, as we're not getting the actual config directory
|
||||
// from pyrecoll (we could, through a capsule), this needs at
|
||||
// least an empty default configuration directory to work.
|
||||
string reason;
|
||||
RclConfig *rclconfig = recollinit(RCLINIT_PYTHON, 0, 0, reason, 0);
|
||||
if (rclconfig == 0) {
|
||||
PyErr_SetString(PyExc_EnvironmentError, reason.c_str());
|
||||
INITERROR;
|
||||
} else {
|
||||
delete rclconfig;
|
||||
}
|
||||
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
PyObject *module = PyModule_Create(&moduledef);
|
||||
#else
|
||||
PyObject *module = Py_InitModule("rclextract", rclextract_methods);
|
||||
#endif
|
||||
if (module == NULL)
|
||||
INITERROR;
|
||||
|
||||
struct module_state *st = GETSTATE(module);
|
||||
// The first parameter is a char *. Hopefully we don't initialize
|
||||
// modules too often...
|
||||
st->error = PyErr_NewException(strdup("rclextract.Error"), NULL, NULL);
|
||||
if (st->error == NULL) {
|
||||
Py_DECREF(module);
|
||||
INITERROR;
|
||||
}
|
||||
|
||||
PyModule_AddStringConstant(module, "__doc__", rclx_doc_string);
|
||||
|
||||
if (PyType_Ready(&rclx_ExtractorType) < 0)
|
||||
INITERROR;
|
||||
Py_INCREF(&rclx_ExtractorType);
|
||||
PyModule_AddObject(module, "Extractor", (PyObject *)&rclx_ExtractorType);
|
||||
|
||||
#if PY_MAJOR_VERSION >= 3 || (PY_MAJOR_VERSION >= 2 && PY_MINOR_VERSION >= 7)
|
||||
recoll_DocType = (PyObject*)PyCapsule_Import(PYRECOLL_PACKAGE "recoll.doctypeptr", 0);
|
||||
#else
|
||||
PyObject *module1 = PyImport_ImportModule(PYRECOLL_PACKAGE "recoll");
|
||||
if (module1 != NULL) {
|
||||
PyObject *cobject = PyObject_GetAttrString(module1, "doctypeptr");
|
||||
if (cobject == NULL)
|
||||
INITERROR;
|
||||
if (PyCObject_Check(cobject))
|
||||
recoll_DocType = (PyObject*)PyCObject_AsVoidPtr(cobject);
|
||||
Py_DECREF(cobject);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
return module;
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2007 J.F.Dockes
|
||||
/* Copyright (C) 2007-2020 J.F.Dockes
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
@ -140,7 +140,7 @@ static PyMethodDef SearchData_methods[] = {
|
||||
|
||||
static PyTypeObject recoll_SearchDataType = {
|
||||
PyVarObject_HEAD_INIT(NULL, 0)
|
||||
"recoll.SearchData", /*tp_name*/
|
||||
"_recoll.SearchData", /*tp_name*/
|
||||
sizeof(recoll_SearchDataObject), /*tp_basicsize*/
|
||||
0, /*tp_itemsize*/
|
||||
(destructor)SearchData_dealloc, /*tp_dealloc*/
|
||||
@ -775,9 +775,10 @@ PyDoc_STRVAR(doc_DocObject,
|
||||
" title (both)\n"
|
||||
" keywords (both)\n"
|
||||
);
|
||||
static PyTypeObject recoll_DocType = {
|
||||
|
||||
PyTypeObject recoll_DocType = {
|
||||
PyVarObject_HEAD_INIT(NULL, 0)
|
||||
"recoll.Doc", /*tp_name*/
|
||||
"_recoll.Doc", /*tp_name*/
|
||||
sizeof(recoll_DocObject), /*tp_basicsize*/
|
||||
0, /*tp_itemsize*/
|
||||
(destructor)Doc_dealloc, /*tp_dealloc*/
|
||||
@ -1319,6 +1320,7 @@ PyDoc_STRVAR(doc_Query_makedocabstract,
|
||||
" terms\n"
|
||||
"If methods is set, will also perform highlighting. See the highlight method\n"
|
||||
);
|
||||
|
||||
static PyObject *
|
||||
Query_makedocabstract(recoll_QueryObject* self, PyObject *args,PyObject *kwargs)
|
||||
{
|
||||
@ -1522,7 +1524,7 @@ PyDoc_STRVAR(doc_QueryObject,
|
||||
);
|
||||
static PyTypeObject recoll_QueryType = {
|
||||
PyVarObject_HEAD_INIT(NULL, 0)
|
||||
"recoll.Query", /*tp_name*/
|
||||
"_recoll.Query", /*tp_name*/
|
||||
sizeof(recoll_QueryObject), /*tp_basicsize*/
|
||||
0, /*tp_itemsize*/
|
||||
(destructor)Query_dealloc, /*tp_dealloc*/
|
||||
@ -2025,7 +2027,7 @@ PyDoc_STRVAR(doc_DbObject,
|
||||
);
|
||||
static PyTypeObject recoll_DbType = {
|
||||
PyVarObject_HEAD_INIT(NULL, 0)
|
||||
"recoll.Db", /*tp_name*/
|
||||
"_recoll.Db", /*tp_name*/
|
||||
sizeof(recoll_DbObject), /*tp_basicsize*/
|
||||
0, /*tp_itemsize*/
|
||||
(destructor)Db_dealloc, /*tp_dealloc*/
|
||||
@ -2122,7 +2124,7 @@ static int recoll_clear(PyObject *m) {
|
||||
|
||||
static struct PyModuleDef moduledef = {
|
||||
PyModuleDef_HEAD_INIT,
|
||||
"recoll",
|
||||
"_recoll",
|
||||
NULL,
|
||||
sizeof(struct module_state),
|
||||
recoll_methods,
|
||||
@ -2135,13 +2137,13 @@ static struct PyModuleDef moduledef = {
|
||||
#define INITERROR return NULL
|
||||
|
||||
extern "C" PyObject *
|
||||
PyInit_recoll(void)
|
||||
PyInit__recoll(void)
|
||||
|
||||
#else
|
||||
#define INITERROR return
|
||||
|
||||
PyMODINIT_FUNC
|
||||
initrecoll(void)
|
||||
init_recoll(void)
|
||||
#endif
|
||||
{
|
||||
// Note: we can't call recollinit here, because the confdir is only really
|
||||
@ -2152,7 +2154,7 @@ PyInit_recoll(void)
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
PyObject *module = PyModule_Create(&moduledef);
|
||||
#else
|
||||
PyObject *module = Py_InitModule("recoll", recoll_methods);
|
||||
PyObject *module = Py_InitModule("_recoll", recoll_methods);
|
||||
#endif
|
||||
if (module == NULL)
|
||||
INITERROR;
|
||||
@ -2160,7 +2162,7 @@ PyInit_recoll(void)
|
||||
struct module_state *st = GETSTATE(module);
|
||||
// The first parameter is a char *. Hopefully we don't initialize
|
||||
// modules too often...
|
||||
st->error = PyErr_NewException(strdup("recoll.Error"), NULL, NULL);
|
||||
st->error = PyErr_NewException(strdup("_recoll.Error"), NULL, NULL);
|
||||
if (st->error == NULL) {
|
||||
Py_DECREF(module);
|
||||
INITERROR;
|
||||
@ -2186,20 +2188,13 @@ PyInit_recoll(void)
|
||||
Py_INCREF((PyObject*)&recoll_SearchDataType);
|
||||
PyModule_AddObject(module, "SearchData",
|
||||
(PyObject *)&recoll_SearchDataType);
|
||||
PyModule_AddStringConstant(module, "__doc__",
|
||||
pyrecoll_doc_string);
|
||||
|
||||
PyObject *doctypecobject;
|
||||
PyModule_AddStringConstant(module, "__doc__", pyrecoll_doc_string);
|
||||
|
||||
#if PY_MAJOR_VERSION >= 3 || (PY_MAJOR_VERSION >= 2 && PY_MINOR_VERSION >= 7)
|
||||
// Export a few pointers for the benefit of other recoll python modules
|
||||
doctypecobject=
|
||||
PyCapsule_New(&recoll_DocType, PYRECOLL_PACKAGE "recoll.doctypeptr", 0);
|
||||
#else
|
||||
doctypecobject = PyCObject_FromVoidPtr(&recoll_DocType, NULL);
|
||||
#endif
|
||||
|
||||
PyModule_AddObject(module, "doctypeptr", doctypecobject);
|
||||
if (PyType_Ready(&rclx_ExtractorType) < 0)
|
||||
INITERROR;
|
||||
Py_INCREF(&rclx_ExtractorType);
|
||||
PyModule_AddObject(module, "Extractor", (PyObject *)&rclx_ExtractorType);
|
||||
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
return module;
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2012 J.F.Dockes
|
||||
/* Copyright (C) 2012-2020 J.F.Dockes
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
@ -17,6 +17,8 @@
|
||||
#ifndef _PYRECOLL_H_INCLUDED_
|
||||
#define _PYRECOLL_H_INCLUDED_
|
||||
|
||||
/* Shared definitions for pyrecoll.cpp and pyrclextract.cpp */
|
||||
|
||||
#include <Python.h>
|
||||
|
||||
#include <memory>
|
||||
@ -31,6 +33,7 @@ typedef struct {
|
||||
std::shared_ptr<RclConfig> rclconfig;
|
||||
} recoll_DocObject;
|
||||
|
||||
#define PYRECOLL_PACKAGE "recoll."
|
||||
extern PyTypeObject rclx_ExtractorType;
|
||||
extern PyTypeObject recoll_DocType;
|
||||
|
||||
#endif // _PYRECOLL_H_INCLUDED_
|
||||
|
||||
23
src/python/recoll/recoll/rclextract.py
Normal file
23
src/python/recoll/recoll/rclextract.py
Normal file
@ -0,0 +1,23 @@
|
||||
# Copyright (C) 2020 J.F.Dockes
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the
|
||||
# Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
# We used to have two C extensions: recoll and rclextract, which was a really
|
||||
# bad idea. They are now merged into the _recoll C extension module. The two
|
||||
# python modules recoll.py and rclextract.py only exist for compatibility (for
|
||||
# now: maybe we'll do something with them in the future).
|
||||
|
||||
|
||||
from ._recoll import Extractor
|
||||
22
src/python/recoll/recoll/recoll.py
Normal file
22
src/python/recoll/recoll/recoll.py
Normal file
@ -0,0 +1,22 @@
|
||||
# Copyright (C) 2020 J.F.Dockes
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the
|
||||
# Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
# We used to have two C extensions: recoll and rclextract, which was a really
|
||||
# bad idea. They are now merged into the _recoll C extension module. The two
|
||||
# python modules recoll.py and rclextract.py only exist for compatibility (for
|
||||
# now: maybe we'll do something with them in the future).
|
||||
|
||||
from ._recoll import *
|
||||
@ -25,10 +25,9 @@ if 'libdir' in os.environ and os.environ['libdir'] != "":
|
||||
else:
|
||||
runtime_library_dirs = [os.path.join('@prefix@', 'lib', 'recoll')]
|
||||
|
||||
module1 = Extension('recoll',
|
||||
module1 = Extension('_recoll',
|
||||
define_macros = [('MAJOR_VERSION', '1'),
|
||||
('MINOR_VERSION', '0'),
|
||||
('UNAC_VERSION', '"1.0.7"'),
|
||||
('RECOLL_DATADIR', '"@RECOLL_DATADIR@"')
|
||||
],
|
||||
include_dirs = ['/usr/local/include',
|
||||
@ -36,6 +35,7 @@ module1 = Extension('recoll',
|
||||
os.path.join(top, 'common'),
|
||||
os.path.join(localtop, 'common'),
|
||||
os.path.join(top, 'common'),
|
||||
os.path.join(top, 'internfile'),
|
||||
os.path.join(top, 'rcldb'),
|
||||
os.path.join(top, 'query'),
|
||||
os.path.join(top, 'unac')
|
||||
@ -44,26 +44,9 @@ module1 = Extension('recoll',
|
||||
libraries = libraries,
|
||||
library_dirs = library_dirs,
|
||||
runtime_library_dirs = runtime_library_dirs,
|
||||
sources = [os.path.join(pytop, 'pyrecoll.cpp')])
|
||||
|
||||
module2 = Extension('rclextract',
|
||||
define_macros = [('MAJOR_VERSION', '1'),
|
||||
('MINOR_VERSION', '0'),
|
||||
('UNAC_VERSION', '"1.0.7"'),
|
||||
('RECOLL_DATADIR', '"@RECOLL_DATADIR@"')
|
||||
],
|
||||
include_dirs = ['/usr/local/include',
|
||||
os.path.join(top, 'utils'),
|
||||
os.path.join(top, 'common'),
|
||||
os.path.join(localtop, 'common'),
|
||||
os.path.join(top, 'internfile'),
|
||||
os.path.join(top, 'rcldb'),
|
||||
],
|
||||
extra_compile_args = extra_compile_args,
|
||||
libraries = libraries,
|
||||
library_dirs = library_dirs,
|
||||
runtime_library_dirs = runtime_library_dirs,
|
||||
sources = [os.path.join(pytop, 'pyrclextract.cpp')])
|
||||
sources = [os.path.join(pytop, 'pyrecoll.cpp'),
|
||||
os.path.join(pytop, 'pyrclextract.cpp')
|
||||
])
|
||||
|
||||
setup (name = 'Recoll',
|
||||
version = '1.0',
|
||||
@ -77,4 +60,4 @@ setup (name = 'Recoll',
|
||||
''',
|
||||
packages = ['recoll'],
|
||||
ext_package = 'recoll',
|
||||
ext_modules = [module1, module2])
|
||||
ext_modules = [module1])
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
#!/usr/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""A python version of the command line query tool recollq (a bit simplified)
|
||||
The input string is always interpreted as a query language string.
|
||||
@ -14,14 +14,8 @@ if sys.version_info[0] >= 3:
|
||||
else:
|
||||
ISP3 = False
|
||||
|
||||
try:
|
||||
from recoll import recoll
|
||||
from recoll import rclextract
|
||||
hasextract = True
|
||||
except:
|
||||
import recoll
|
||||
hasextract = False
|
||||
|
||||
from recoll import recoll, rclextract
|
||||
|
||||
allmeta = ("title", "keywords", "abstract", "url", "mimetype", "mtime",
|
||||
"ipath", "fbytes", "dbytes", "relevancyrating")
|
||||
|
||||
@ -97,7 +91,7 @@ def doquery(db, q):
|
||||
print(utf8string(abs))
|
||||
print('')
|
||||
# fulldoc = extract(doc)
|
||||
# print "FULLDOC MIMETYPE", fulldoc.mimetype, "TEXT:", fulldoc.text.encode("utf-8")
|
||||
# print("FULLDOC MIMETYPE %s TEXT: %s" % (fulldoc.mimetype,fulldoc.text))
|
||||
|
||||
|
||||
########################################### MAIN
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user