diff --git a/src/python/recoll/pyrclextract.cpp b/src/python/recoll/pyrclextract.cpp index c0277721..96325856 100644 --- a/src/python/recoll/pyrclextract.cpp +++ b/src/python/recoll/pyrclextract.cpp @@ -1,4 +1,4 @@ -/* Copyright (C) 2007 J.F.Dockes +/* Copyright (C) 2007-2020 J.F.Dockes * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -33,9 +33,6 @@ using namespace std; -// Imported from pyrecoll -static PyObject *recoll_DocType; - ////////////////////////////////////////////////////////////////////// /// Extractor object code typedef struct { @@ -76,12 +73,12 @@ Extractor_init(rclx_ExtractorObject *self, PyObject *args, PyObject *kwargs) { LOGDEB("Extractor_init\n" ); static const char* kwlist[] = {"doc", NULL}; - PyObject *pdobj; + recoll_DocObject *dobj = 0; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!", (char**)kwlist, - recoll_DocType, &pdobj)) + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!:Extractor_init", + (char**)kwlist, + &recoll_DocType, &dobj)) return -1; - recoll_DocObject *dobj = (recoll_DocObject *)pdobj; if (dobj->doc == 0) { PyErr_SetString(PyExc_AttributeError, "Null Doc ?"); return -1; @@ -124,7 +121,7 @@ Extractor_textextract(rclx_ExtractorObject* self, PyObject *args, } /* Call the doc class object to create a new doc. */ recoll_DocObject *result = - (recoll_DocObject *)PyObject_CallObject((PyObject *)recoll_DocType, 0); + (recoll_DocObject *)PyObject_CallObject((PyObject *)&recoll_DocType, 0); if (!result) { PyErr_SetString(PyExc_AttributeError, "extract: doc create failed"); return 0; @@ -229,9 +226,10 @@ PyDoc_STRVAR(doc_ExtractorObject, "An Extractor object can extract data from a native simple or compound\n" "object.\n" ); -static PyTypeObject rclx_ExtractorType = { + +PyTypeObject rclx_ExtractorType = { PyVarObject_HEAD_INIT(NULL, 0) - "rclextract.Extractor", /*tp_name*/ + "_rclextract.Extractor", /*tp_name*/ sizeof(rclx_ExtractorObject), /*tp_basicsize*/ 0, /*tp_itemsize*/ (destructor)Extractor_dealloc, /*tp_dealloc*/ @@ -269,114 +267,3 @@ static PyTypeObject rclx_ExtractorType = { 0, /* tp_alloc */ Extractor_new, /* tp_new */ }; - -///////////////////////////////////// Module-level stuff -static PyMethodDef rclextract_methods[] = { - {NULL, NULL, 0, NULL} /* Sentinel */ -}; -PyDoc_STRVAR(rclx_doc_string, - "This is an interface to the Recoll text extraction features."); - -struct module_state { - PyObject *error; -}; - -#if PY_MAJOR_VERSION >= 3 -#define GETSTATE(m) ((struct module_state*)PyModule_GetState(m)) -#else -#define GETSTATE(m) (&_state) -static struct module_state _state; -#endif - -#if PY_MAJOR_VERSION >= 3 -static int rclextract_traverse(PyObject *m, visitproc visit, void *arg) { - Py_VISIT(GETSTATE(m)->error); - return 0; -} - -static int rclextract_clear(PyObject *m) { - Py_CLEAR(GETSTATE(m)->error); - return 0; -} - -static struct PyModuleDef moduledef = { - PyModuleDef_HEAD_INIT, - "rclextract", - NULL, - sizeof(struct module_state), - rclextract_methods, - NULL, - rclextract_traverse, - rclextract_clear, - NULL -}; - -#define INITERROR return NULL - -extern "C" PyObject * -PyInit_rclextract(void) - -#else -#define INITERROR return - PyMODINIT_FUNC - initrclextract(void) -#endif -{ - // We run recollinit. It's responsible for initializing some static data - // which is distinct from pyrecoll's as we're separately dlopened. - // The rclconfig object is not used, we'll get the config - // data from the objects out of the recoll module. - // Unfortunately, as we're not getting the actual config directory - // from pyrecoll (we could, through a capsule), this needs at - // least an empty default configuration directory to work. - string reason; - RclConfig *rclconfig = recollinit(RCLINIT_PYTHON, 0, 0, reason, 0); - if (rclconfig == 0) { - PyErr_SetString(PyExc_EnvironmentError, reason.c_str()); - INITERROR; - } else { - delete rclconfig; - } - -#if PY_MAJOR_VERSION >= 3 - PyObject *module = PyModule_Create(&moduledef); -#else - PyObject *module = Py_InitModule("rclextract", rclextract_methods); -#endif - if (module == NULL) - INITERROR; - - struct module_state *st = GETSTATE(module); - // The first parameter is a char *. Hopefully we don't initialize - // modules too often... - st->error = PyErr_NewException(strdup("rclextract.Error"), NULL, NULL); - if (st->error == NULL) { - Py_DECREF(module); - INITERROR; - } - - PyModule_AddStringConstant(module, "__doc__", rclx_doc_string); - - if (PyType_Ready(&rclx_ExtractorType) < 0) - INITERROR; - Py_INCREF(&rclx_ExtractorType); - PyModule_AddObject(module, "Extractor", (PyObject *)&rclx_ExtractorType); - -#if PY_MAJOR_VERSION >= 3 || (PY_MAJOR_VERSION >= 2 && PY_MINOR_VERSION >= 7) - recoll_DocType = (PyObject*)PyCapsule_Import(PYRECOLL_PACKAGE "recoll.doctypeptr", 0); -#else - PyObject *module1 = PyImport_ImportModule(PYRECOLL_PACKAGE "recoll"); - if (module1 != NULL) { - PyObject *cobject = PyObject_GetAttrString(module1, "doctypeptr"); - if (cobject == NULL) - INITERROR; - if (PyCObject_Check(cobject)) - recoll_DocType = (PyObject*)PyCObject_AsVoidPtr(cobject); - Py_DECREF(cobject); - } -#endif - -#if PY_MAJOR_VERSION >= 3 - return module; -#endif -} diff --git a/src/python/recoll/pyrecoll.cpp b/src/python/recoll/pyrecoll.cpp index 398b918b..a85b7dde 100644 --- a/src/python/recoll/pyrecoll.cpp +++ b/src/python/recoll/pyrecoll.cpp @@ -1,4 +1,4 @@ -/* Copyright (C) 2007 J.F.Dockes +/* Copyright (C) 2007-2020 J.F.Dockes * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -140,7 +140,7 @@ static PyMethodDef SearchData_methods[] = { static PyTypeObject recoll_SearchDataType = { PyVarObject_HEAD_INIT(NULL, 0) - "recoll.SearchData", /*tp_name*/ + "_recoll.SearchData", /*tp_name*/ sizeof(recoll_SearchDataObject), /*tp_basicsize*/ 0, /*tp_itemsize*/ (destructor)SearchData_dealloc, /*tp_dealloc*/ @@ -775,9 +775,10 @@ PyDoc_STRVAR(doc_DocObject, " title (both)\n" " keywords (both)\n" ); -static PyTypeObject recoll_DocType = { + +PyTypeObject recoll_DocType = { PyVarObject_HEAD_INIT(NULL, 0) - "recoll.Doc", /*tp_name*/ + "_recoll.Doc", /*tp_name*/ sizeof(recoll_DocObject), /*tp_basicsize*/ 0, /*tp_itemsize*/ (destructor)Doc_dealloc, /*tp_dealloc*/ @@ -1319,6 +1320,7 @@ PyDoc_STRVAR(doc_Query_makedocabstract, " terms\n" "If methods is set, will also perform highlighting. See the highlight method\n" ); + static PyObject * Query_makedocabstract(recoll_QueryObject* self, PyObject *args,PyObject *kwargs) { @@ -1522,7 +1524,7 @@ PyDoc_STRVAR(doc_QueryObject, ); static PyTypeObject recoll_QueryType = { PyVarObject_HEAD_INIT(NULL, 0) - "recoll.Query", /*tp_name*/ + "_recoll.Query", /*tp_name*/ sizeof(recoll_QueryObject), /*tp_basicsize*/ 0, /*tp_itemsize*/ (destructor)Query_dealloc, /*tp_dealloc*/ @@ -2025,7 +2027,7 @@ PyDoc_STRVAR(doc_DbObject, ); static PyTypeObject recoll_DbType = { PyVarObject_HEAD_INIT(NULL, 0) - "recoll.Db", /*tp_name*/ + "_recoll.Db", /*tp_name*/ sizeof(recoll_DbObject), /*tp_basicsize*/ 0, /*tp_itemsize*/ (destructor)Db_dealloc, /*tp_dealloc*/ @@ -2122,7 +2124,7 @@ static int recoll_clear(PyObject *m) { static struct PyModuleDef moduledef = { PyModuleDef_HEAD_INIT, - "recoll", + "_recoll", NULL, sizeof(struct module_state), recoll_methods, @@ -2135,13 +2137,13 @@ static struct PyModuleDef moduledef = { #define INITERROR return NULL extern "C" PyObject * -PyInit_recoll(void) +PyInit__recoll(void) #else #define INITERROR return PyMODINIT_FUNC - initrecoll(void) + init_recoll(void) #endif { // Note: we can't call recollinit here, because the confdir is only really @@ -2152,7 +2154,7 @@ PyInit_recoll(void) #if PY_MAJOR_VERSION >= 3 PyObject *module = PyModule_Create(&moduledef); #else - PyObject *module = Py_InitModule("recoll", recoll_methods); + PyObject *module = Py_InitModule("_recoll", recoll_methods); #endif if (module == NULL) INITERROR; @@ -2160,7 +2162,7 @@ PyInit_recoll(void) struct module_state *st = GETSTATE(module); // The first parameter is a char *. Hopefully we don't initialize // modules too often... - st->error = PyErr_NewException(strdup("recoll.Error"), NULL, NULL); + st->error = PyErr_NewException(strdup("_recoll.Error"), NULL, NULL); if (st->error == NULL) { Py_DECREF(module); INITERROR; @@ -2186,20 +2188,13 @@ PyInit_recoll(void) Py_INCREF((PyObject*)&recoll_SearchDataType); PyModule_AddObject(module, "SearchData", (PyObject *)&recoll_SearchDataType); - PyModule_AddStringConstant(module, "__doc__", - pyrecoll_doc_string); - PyObject *doctypecobject; + PyModule_AddStringConstant(module, "__doc__", pyrecoll_doc_string); -#if PY_MAJOR_VERSION >= 3 || (PY_MAJOR_VERSION >= 2 && PY_MINOR_VERSION >= 7) - // Export a few pointers for the benefit of other recoll python modules - doctypecobject= - PyCapsule_New(&recoll_DocType, PYRECOLL_PACKAGE "recoll.doctypeptr", 0); -#else - doctypecobject = PyCObject_FromVoidPtr(&recoll_DocType, NULL); -#endif - - PyModule_AddObject(module, "doctypeptr", doctypecobject); + if (PyType_Ready(&rclx_ExtractorType) < 0) + INITERROR; + Py_INCREF(&rclx_ExtractorType); + PyModule_AddObject(module, "Extractor", (PyObject *)&rclx_ExtractorType); #if PY_MAJOR_VERSION >= 3 return module; diff --git a/src/python/recoll/pyrecoll.h b/src/python/recoll/pyrecoll.h index 29fcae7a..be121a31 100644 --- a/src/python/recoll/pyrecoll.h +++ b/src/python/recoll/pyrecoll.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2012 J.F.Dockes +/* Copyright (C) 2012-2020 J.F.Dockes * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -17,6 +17,8 @@ #ifndef _PYRECOLL_H_INCLUDED_ #define _PYRECOLL_H_INCLUDED_ +/* Shared definitions for pyrecoll.cpp and pyrclextract.cpp */ + #include #include @@ -31,6 +33,7 @@ typedef struct { std::shared_ptr rclconfig; } recoll_DocObject; -#define PYRECOLL_PACKAGE "recoll." +extern PyTypeObject rclx_ExtractorType; +extern PyTypeObject recoll_DocType; #endif // _PYRECOLL_H_INCLUDED_ diff --git a/src/python/recoll/recoll/rclextract.py b/src/python/recoll/recoll/rclextract.py new file mode 100644 index 00000000..17b8936a --- /dev/null +++ b/src/python/recoll/recoll/rclextract.py @@ -0,0 +1,23 @@ +# Copyright (C) 2020 J.F.Dockes +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the +# Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +# We used to have two C extensions: recoll and rclextract, which was a really +# bad idea. They are now merged into the _recoll C extension module. The two +# python modules recoll.py and rclextract.py only exist for compatibility (for +# now: maybe we'll do something with them in the future). + + +from ._recoll import Extractor diff --git a/src/python/recoll/recoll/recoll.py b/src/python/recoll/recoll/recoll.py new file mode 100644 index 00000000..97fc83f6 --- /dev/null +++ b/src/python/recoll/recoll/recoll.py @@ -0,0 +1,22 @@ +# Copyright (C) 2020 J.F.Dockes +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the +# Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +# We used to have two C extensions: recoll and rclextract, which was a really +# bad idea. They are now merged into the _recoll C extension module. The two +# python modules recoll.py and rclextract.py only exist for compatibility (for +# now: maybe we'll do something with them in the future). + +from ._recoll import * diff --git a/src/python/recoll/setup.py.in b/src/python/recoll/setup.py.in index fa99e1b4..c3ac3869 100644 --- a/src/python/recoll/setup.py.in +++ b/src/python/recoll/setup.py.in @@ -25,10 +25,9 @@ if 'libdir' in os.environ and os.environ['libdir'] != "": else: runtime_library_dirs = [os.path.join('@prefix@', 'lib', 'recoll')] -module1 = Extension('recoll', +module1 = Extension('_recoll', define_macros = [('MAJOR_VERSION', '1'), ('MINOR_VERSION', '0'), - ('UNAC_VERSION', '"1.0.7"'), ('RECOLL_DATADIR', '"@RECOLL_DATADIR@"') ], include_dirs = ['/usr/local/include', @@ -36,6 +35,7 @@ module1 = Extension('recoll', os.path.join(top, 'common'), os.path.join(localtop, 'common'), os.path.join(top, 'common'), + os.path.join(top, 'internfile'), os.path.join(top, 'rcldb'), os.path.join(top, 'query'), os.path.join(top, 'unac') @@ -44,26 +44,9 @@ module1 = Extension('recoll', libraries = libraries, library_dirs = library_dirs, runtime_library_dirs = runtime_library_dirs, - sources = [os.path.join(pytop, 'pyrecoll.cpp')]) - -module2 = Extension('rclextract', - define_macros = [('MAJOR_VERSION', '1'), - ('MINOR_VERSION', '0'), - ('UNAC_VERSION', '"1.0.7"'), - ('RECOLL_DATADIR', '"@RECOLL_DATADIR@"') - ], - include_dirs = ['/usr/local/include', - os.path.join(top, 'utils'), - os.path.join(top, 'common'), - os.path.join(localtop, 'common'), - os.path.join(top, 'internfile'), - os.path.join(top, 'rcldb'), - ], - extra_compile_args = extra_compile_args, - libraries = libraries, - library_dirs = library_dirs, - runtime_library_dirs = runtime_library_dirs, - sources = [os.path.join(pytop, 'pyrclextract.cpp')]) + sources = [os.path.join(pytop, 'pyrecoll.cpp'), + os.path.join(pytop, 'pyrclextract.cpp') + ]) setup (name = 'Recoll', version = '1.0', @@ -77,4 +60,4 @@ setup (name = 'Recoll', ''', packages = ['recoll'], ext_package = 'recoll', - ext_modules = [module1, module2]) + ext_modules = [module1]) diff --git a/src/python/samples/recollq.py b/src/python/samples/recollq.py index 4b0a251c..a8374cb0 100755 --- a/src/python/samples/recollq.py +++ b/src/python/samples/recollq.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/python3 # -*- coding: utf-8 -*- """A python version of the command line query tool recollq (a bit simplified) The input string is always interpreted as a query language string. @@ -14,14 +14,8 @@ if sys.version_info[0] >= 3: else: ISP3 = False -try: - from recoll import recoll - from recoll import rclextract - hasextract = True -except: - import recoll - hasextract = False - +from recoll import recoll, rclextract + allmeta = ("title", "keywords", "abstract", "url", "mimetype", "mtime", "ipath", "fbytes", "dbytes", "relevancyrating") @@ -97,7 +91,7 @@ def doquery(db, q): print(utf8string(abs)) print('') # fulldoc = extract(doc) -# print "FULLDOC MIMETYPE", fulldoc.mimetype, "TEXT:", fulldoc.text.encode("utf-8") +# print("FULLDOC MIMETYPE %s TEXT: %s" % (fulldoc.mimetype,fulldoc.text)) ########################################### MAIN