From 73995b088bc21671034ecc7abb01aff0201a9175 Mon Sep 17 00:00:00 2001 From: dockes Date: Mon, 25 Jan 2010 11:06:54 +0000 Subject: [PATCH] initial import from W. Song --- src/php/recoll/config.m4 | 18 +++ src/php/recoll/make.sh | 7 + src/php/recoll/php_recoll.h | 18 +++ src/php/recoll/recoll.cpp | 252 ++++++++++++++++++++++++++++++++++++ src/php/recoll/recollq.h | 10 ++ src/php/sample/shell.php | 31 +++++ 6 files changed, 336 insertions(+) create mode 100644 src/php/recoll/config.m4 create mode 100755 src/php/recoll/make.sh create mode 100644 src/php/recoll/php_recoll.h create mode 100644 src/php/recoll/recoll.cpp create mode 100644 src/php/recoll/recollq.h create mode 100644 src/php/sample/shell.php diff --git a/src/php/recoll/config.m4 b/src/php/recoll/config.m4 new file mode 100644 index 00000000..32301354 --- /dev/null +++ b/src/php/recoll/config.m4 @@ -0,0 +1,18 @@ +PHP_ARG_ENABLE(recoll, + [Whether to enable the "recoll" extension], + [ --enable-recoll Enable "recoll" extension support]) + +if test $PHP_RECOLL != "no"; then + PHP_REQUIRE_CXX() + PHP_SUBST(RECOLL_SHARED_LIBADD) + PHP_ADD_INCLUDE(../../utils) + PHP_ADD_INCLUDE(../../common) + PHP_ADD_INCLUDE(../../rcldb) + PHP_ADD_INCLUDE(../../query) + PHP_ADD_INCLUDE(../../unac) + PHP_ADD_INCLUDE(../../internfile) + PHP_ADD_LIBRARY_WITH_PATH(rcl, ../../lib, RECOLL_SHARED_LIBADD) + PHP_ADD_LIBRARY(xapian, , RECOLL_SHARED_LIBADD) + PHP_ADD_LIBRARY(stdc++, 1, RECOLL_SHARED_LIBADD) + PHP_NEW_EXTENSION(recoll, recoll.cpp, $ext_shared) +fi diff --git a/src/php/recoll/make.sh b/src/php/recoll/make.sh new file mode 100755 index 00000000..efbf237f --- /dev/null +++ b/src/php/recoll/make.sh @@ -0,0 +1,7 @@ +#!/bin/sh +phpize --clean +phpize +rm aclocal.m4 -rf +autoreconf +./configure --enable-recoll +make -j3 diff --git a/src/php/recoll/php_recoll.h b/src/php/recoll/php_recoll.h new file mode 100644 index 00000000..53bc3bbd --- /dev/null +++ b/src/php/recoll/php_recoll.h @@ -0,0 +1,18 @@ +#ifndef PHP_RECOLL_H +#define PHP_RECOLL_H + +#define PHP_RECOLL_EXTNAME "recoll" +#define PHP_RECOLL_EXTVER "0.1" + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +extern "C" { +#include "php.h" +} + +extern zend_module_entry recoll_module_entry; +#define phpext_recoll_ptr &recoll_module_entry; + +#endif /* PHP_RECOLL_H */ diff --git a/src/php/recoll/recoll.cpp b/src/php/recoll/recoll.cpp new file mode 100644 index 00000000..3d8fddaa --- /dev/null +++ b/src/php/recoll/recoll.cpp @@ -0,0 +1,252 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ +/* + * First Draft 2010/01/22 + * Wenqiang Song + * wsong.cn@gmail.com + */ +#include "php_recoll.h" + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "rcldb.h" +#include "rclquery.h" +#include "rclconfig.h" +#include "pathut.h" +#include "rclinit.h" +#include "debuglog.h" +#include "wasastringtoquery.h" +#include "wasatorcl.h" +#include "internfile.h" +#include "wipedir.h" +#include "transcode.h" + +using namespace std; + +/* + * Class Query Definition + */ +zend_object_handlers query_object_handlers; +zend_class_entry *query_ce; + +struct query_object { + zend_object std; + Rcl::Query *pRclQuery; + Rcl::Db *pRclDb; +}; + +void query_free_storage(void *object TSRMLS_DC) +{ + query_object *obj = (query_object *)object; + + delete obj->pRclQuery; + delete obj->pRclDb; + + zend_hash_destroy(obj->std.properties); + FREE_HASHTABLE(obj->std.properties); + + efree(obj); +} + +zend_object_value query_create_handler(zend_class_entry *type TSRMLS_DC) +{ + zval *tmp; + zend_object_value retval; + + query_object *obj = (query_object *)emalloc(sizeof(query_object)); + memset(obj, 0, sizeof(query_object)); + obj->std.ce = type; + + ALLOC_HASHTABLE(obj->std.properties); + zend_hash_init(obj->std.properties, 0, NULL, ZVAL_PTR_DTOR, 0); + zend_hash_copy(obj->std.properties, &type->default_properties, + (copy_ctor_func_t)zval_add_ref, (void *)&tmp, sizeof(zval *)); + + retval.handle = zend_objects_store_put(obj, NULL, + query_free_storage, NULL TSRMLS_CC); + retval.handlers = &query_object_handlers; + + return retval; +} + +PHP_METHOD(Query, query) +{ + string reason; + string a_config; + char *qs_c; + int qs_len; + long maxchars; + long ctxwords; + + if (zend_parse_parameters(3 TSRMLS_CC, "sll", &qs_c, &qs_len, &maxchars, &ctxwords) == FAILURE) { + printf("failed to get parameters\n"); + RETURN_BOOL(false); + } + string qs = qs_c; + + RclConfig *rclconfig = recollinit(0, 0, reason, &a_config); + if (!rclconfig || !rclconfig->ok()) { + fprintf(stderr, "Recoll init failed: %s\n", reason.c_str()); + RETURN_BOOL(false); + } + + Rcl::Db *pRclDb = new Rcl::Db(rclconfig); + if (!pRclDb->open(Rcl::Db::DbRO)) { + cerr << "Cant open database in " << rclconfig->getDbDir() << + " reason: " << pRclDb->getReason() << endl; + RETURN_BOOL(false); + } + + pRclDb->setAbstractParams(-1, maxchars, ctxwords); + Rcl::SearchData *sd = 0; + + sd = new Rcl::SearchData(Rcl::SCLT_AND); + Rcl::SearchDataClause *clp = 0; + + // If there is no white space inside the query, then the user + // certainly means it as a phrase. + bool isreallyaphrase = false; + if (!TextSplit::hasVisibleWhite(qs)) + isreallyaphrase = true; + clp = isreallyaphrase ? + new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE, qs, 0) : + new Rcl::SearchDataClauseSimple(Rcl::SCLT_AND, qs); + + if (sd) + sd->addClause(clp); + + if (!sd) { + cerr << "Query string interpretation failed: " << reason << endl; + RETURN_BOOL(false); + } + sd->setStemlang("english"); + + RefCntr rq(sd); + Rcl::Query *pRclQuery = new Rcl::Query(pRclDb); + pRclQuery->setQuery(rq); + + query_object *obj = (query_object *)zend_object_store_get_object(getThis() TSRMLS_CC); + obj->pRclQuery = pRclQuery; + obj->pRclDb = pRclDb; + + int rescnt = pRclQuery->getResCnt(); + RETURN_LONG(rescnt); /* -1 means no result */ +} + +PHP_METHOD(Query, get_doc) +{ + Rcl::Query *pRclQuery; + query_object *obj = (query_object *)zend_object_store_get_object( + getThis() TSRMLS_CC); + pRclQuery = obj->pRclQuery; + if(NULL == pRclQuery) + { + printf("error, NULL pointer pRclQuery\n"); + RETURN_BOOL(false); + } + + long index; + if (zend_parse_parameters(1 TSRMLS_CC, "l", &index) == FAILURE) { + RETURN_BOOL(false); + } + + Rcl::Doc doc; + if (!pRclQuery->getDoc(index, doc)) + { + RETURN_BOOL(false); + } + + string abs; + pRclQuery->whatDb()->makeDocAbstract(doc, pRclQuery, abs); + + char splitter[] = {7,8,1,2,0}; + char ret_string[1000]; + snprintf(ret_string, 1000, "mime:%s%surl:%s%stitle:%s%sabs:%s", + doc.mimetype.c_str(),splitter, + doc.url.c_str(),splitter, + doc.meta[Rcl::Doc::keytt].c_str(), splitter, + abs.c_str()); + RETURN_STRING(ret_string, 1); +} + +function_entry query_methods[] = { + PHP_ME(Query, query, NULL, ZEND_ACC_PUBLIC) + PHP_ME(Query, get_doc, NULL, ZEND_ACC_PUBLIC) + {NULL, NULL, NULL} +}; +/* End of Class Query Definition */ + +PHP_FUNCTION(recoll_connect) +{ + zval *object; + + ALLOC_INIT_ZVAL(object); + object_init_ex(object, query_ce); + + query_object *obj = (query_object *)zend_object_store_get_object(object TSRMLS_CC); + obj->pRclQuery = NULL; + + RETURN_ZVAL(object,NULL,NULL); +} + +function_entry recoll_functions[] = { + PHP_FE(recoll_connect, NULL) + {NULL, NULL, NULL} +}; + +PHP_MINIT_FUNCTION(recoll) +{ + zend_class_entry ce; + INIT_CLASS_ENTRY(ce, "Query", query_methods); + query_ce = zend_register_internal_class(&ce TSRMLS_CC); + query_ce->create_object = query_create_handler; + memcpy(&query_object_handlers, + zend_get_std_object_handlers(), sizeof(zend_object_handlers)); + query_object_handlers.clone_obj = NULL; + return SUCCESS; +} + +zend_module_entry recoll_module_entry = { +#if ZEND_MODULE_API_NO >= 20010901 + STANDARD_MODULE_HEADER, +#endif + PHP_RECOLL_EXTNAME, + recoll_functions, /* Functions */ + PHP_MINIT(recoll), /* MINIT */ + NULL, /* MSHUTDOWN */ + NULL, /* RINIT */ + NULL, /* RSHUTDOWN */ + NULL, /* MINFO */ +#if ZEND_MODULE_API_NO >= 20010901 + PHP_RECOLL_EXTVER, +#endif + STANDARD_MODULE_PROPERTIES +}; + +#ifdef COMPILE_DL_RECOLL +extern "C" { +ZEND_GET_MODULE(recoll) +} +#endif diff --git a/src/php/recoll/recollq.h b/src/php/recoll/recollq.h new file mode 100644 index 00000000..c74337b0 --- /dev/null +++ b/src/php/recoll/recollq.h @@ -0,0 +1,10 @@ +#ifndef _recollq_h_included_ +#define _recollq_h_included_ +/* @(#$Id: recollq.h,v 1.1 2007-11-08 09:35:47 dockes Exp $ (C) 2007 J.F.Dockes */ + +/// Execute query, print results to stdout. This is just an api to the +/// recollq command line program. +class RclConfig; +extern int recollq(RclConfig **cfp, int argc, char **argv); + +#endif /* _recollq_h_included_ */ diff --git a/src/php/sample/shell.php b/src/php/sample/shell.php new file mode 100644 index 00000000..10a7da4b --- /dev/null +++ b/src/php/sample/shell.php @@ -0,0 +1,31 @@ +query("python",120,16); +if( false == $rescnt ) +{ + echo "error during query\n"; + var_dump($rescnt); + return; +} +print "total results ".$rescnt."\n"; + +for ($i = 0; $i < $rescnt; $i++) +{ + $res = $query->get_doc($i); + + if( false == $res) + { + echo "error\n"; + return; + } + + $token=strtok($res,$splitter); + while ($token != false) + { + echo "$token\n"; + $token = strtok($splitter); + } +} +?>