*** empty log message ***
This commit is contained in:
parent
a3a170ae68
commit
52e32853f4
346
src/query/wasastringtoquery.cpp
Normal file
346
src/query/wasastringtoquery.cpp
Normal file
@ -0,0 +1,346 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: wasastringtoquery.cpp,v 1.1 2006-11-30 18:12:16 dockes Exp $ (C) 2006 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc.,
|
||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
#ifndef TEST_STRINGTOQUERY
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <regex.h>
|
||||
|
||||
#include "wasastringtoquery.h"
|
||||
|
||||
WasaQuery::~WasaQuery()
|
||||
{
|
||||
for (vector<WasaQuery*>::iterator it = m_subs.begin();
|
||||
it != m_subs.end(); it++) {
|
||||
delete *it;
|
||||
}
|
||||
m_subs.clear();
|
||||
}
|
||||
|
||||
void WasaQuery::describe(string &desc) const
|
||||
{
|
||||
desc += "(";
|
||||
switch (m_op) {
|
||||
case OP_NULL:
|
||||
desc += "NULL";
|
||||
break;
|
||||
case OP_LEAF:
|
||||
desc += m_fieldspec.empty() ?
|
||||
m_value : m_fieldspec + ":" + m_value;
|
||||
break;
|
||||
case OP_EXCL:
|
||||
desc += string("NOT (" ) + m_value + ") ";
|
||||
break;
|
||||
case OP_OR:
|
||||
case OP_AND:
|
||||
for (vector<WasaQuery *>::const_iterator it = m_subs.begin();
|
||||
it != m_subs.end(); it++) {
|
||||
(*it)->describe(desc);
|
||||
vector<WasaQuery *>::const_iterator it1 = it;
|
||||
it1++;
|
||||
if (it1 != m_subs.end())
|
||||
desc += m_op == OP_OR ? "OR ": "AND ";
|
||||
}
|
||||
break;
|
||||
}
|
||||
desc += ") ";
|
||||
}
|
||||
|
||||
// The string query parser code:
|
||||
|
||||
/* Shamelessly lifted from Beagle:
|
||||
* This is our regular Expression Pattern:
|
||||
* we expect something like this:
|
||||
* -key:"Value String"
|
||||
* key:Value
|
||||
* or
|
||||
* Value
|
||||
([+-]?) # Required or Prohibited (optional)
|
||||
(\w+:)? # Key (optional)
|
||||
( # Query Text
|
||||
(\"([^\"]*)\"?)# quoted
|
||||
| # or
|
||||
([^\s\"]+) # unquoted
|
||||
)
|
||||
";
|
||||
*/
|
||||
|
||||
/* The master regular expression used to parse a query string
|
||||
* Sub-expressions in parenthesis are numbered from 1. Each opening
|
||||
* parenthesis increases the index, but we're not interested in all
|
||||
*/
|
||||
static const char * parserExpr =
|
||||
"([oO][rR])" //1 OR is a special word
|
||||
"|"
|
||||
"(" //2
|
||||
"([+-])?" //3 Force or exclude indicator
|
||||
"(" //4
|
||||
"([[:alpha:]][[:alnum:]]+)" //5 Field spec: "fieldname:"
|
||||
":)?"
|
||||
"(" //6
|
||||
"(\"" //7
|
||||
"([^\"]+)" //8 "A quoted term"
|
||||
"\")"
|
||||
"|"
|
||||
"([^[:space:]]+)" //9 ANormalTerm
|
||||
")"
|
||||
")"
|
||||
;
|
||||
|
||||
// For debugging the parser. But see also NMATCH
|
||||
static const char *matchNames[] = {
|
||||
/*0*/ "",
|
||||
/*1*/ "OR",
|
||||
/*2*/ "",
|
||||
/*3*/ "+-",
|
||||
/*4*/ "",
|
||||
/*5*/ "FIELD",
|
||||
/*6*/ "",
|
||||
/*7*/ "",
|
||||
/*8*/ "QUOTEDTERM",
|
||||
/*9*/ "TERM",
|
||||
};
|
||||
#define NMATCH (sizeof(matchNames) / sizeof(char *))
|
||||
|
||||
// Symbolic names for the interesting submatch indices
|
||||
enum SbMatchIdx {SMI_OR=1, SMI_PM=3, SMI_FIELD=5, SMI_QUOTED=8, SMI_TERM=9};
|
||||
|
||||
static const int maxmatchlen = 1024;
|
||||
static const int errbuflen = 300;
|
||||
|
||||
class StringToWasaQuery::Internal {
|
||||
public:
|
||||
Internal()
|
||||
: m_rxneedsfree(false)
|
||||
{}
|
||||
~Internal()
|
||||
{
|
||||
if (m_rxneedsfree)
|
||||
regfree(&m_rx);
|
||||
}
|
||||
bool StringToWasaQuery::Internal::checkSubMatch(int i, char *match,
|
||||
string& reason)
|
||||
{
|
||||
if (i < 0 || i >= int(NMATCH) || m_pmatch[i].rm_so == -1)
|
||||
return false;
|
||||
if (m_pmatch[i].rm_eo - m_pmatch[i].rm_so <= 0) {
|
||||
// weird and fatal
|
||||
reason = "Internal regular expression handling error";
|
||||
return false;
|
||||
}
|
||||
memcpy(match, m_cp + m_pmatch[i].rm_so,
|
||||
m_pmatch[i].rm_eo - m_pmatch[i].rm_so);
|
||||
match[m_pmatch[i].rm_eo - m_pmatch[i].rm_so] = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
WasaQuery *stringToQuery(const string& str, string& reason);
|
||||
|
||||
friend class StringToWasaQuery;
|
||||
private:
|
||||
const char *m_cp;
|
||||
regex_t m_rx;
|
||||
bool m_rxneedsfree;
|
||||
regmatch_t m_pmatch[NMATCH];
|
||||
};
|
||||
|
||||
StringToWasaQuery::StringToWasaQuery()
|
||||
: internal(new Internal)
|
||||
{
|
||||
}
|
||||
|
||||
StringToWasaQuery::~StringToWasaQuery()
|
||||
{
|
||||
delete internal;
|
||||
}
|
||||
|
||||
|
||||
WasaQuery *
|
||||
StringToWasaQuery::stringToQuery(const string& str, string& reason)
|
||||
{
|
||||
return internal ? internal->stringToQuery(str, reason) : 0;
|
||||
}
|
||||
|
||||
WasaQuery *
|
||||
StringToWasaQuery::Internal::stringToQuery(const string& str, string& reason)
|
||||
{
|
||||
if (m_rxneedsfree)
|
||||
regfree(&m_rx);
|
||||
|
||||
char errbuf[errbuflen+1];
|
||||
int errcode;
|
||||
if ((errcode = regcomp(&m_rx, parserExpr, REG_EXTENDED)) != 0) {
|
||||
regerror(errcode, &m_rx, errbuf, errbuflen);
|
||||
reason = errbuf;
|
||||
return 0;
|
||||
}
|
||||
m_rxneedsfree = true;
|
||||
|
||||
const char *cpe;
|
||||
m_cp = str.c_str();
|
||||
cpe = str.c_str() + str.length();
|
||||
|
||||
WasaQuery *query = new WasaQuery;
|
||||
query->m_op = WasaQuery::OP_AND;
|
||||
WasaQuery *orClause = 0;
|
||||
bool prev_or = false;
|
||||
|
||||
// Loop on repeated regexp matches on the main string.
|
||||
for (int loop = 0;;loop++) {
|
||||
if ((errcode = regexec(&m_rx, m_cp, NMATCH, m_pmatch, 0))) {
|
||||
regerror(errcode, &m_rx, errbuf, errbuflen);
|
||||
reason = errbuf;
|
||||
return 0;
|
||||
}
|
||||
if (m_pmatch[0].rm_eo <= 0) {
|
||||
// weird and fatal
|
||||
reason = "Internal regular expression handling error";
|
||||
return 0;
|
||||
}
|
||||
#if 0
|
||||
if (loop) printf("Next part:\n");
|
||||
for (i = 0; i < NMATCH; i++) {
|
||||
if (m_pmatch[i].rm_so == -1) continue;
|
||||
char match[maxmatchlen+1];
|
||||
memcpy(match, m_cp + m_pmatch[i].rm_so,
|
||||
m_pmatch[i].rm_eo - m_pmatch[i].rm_so);
|
||||
match[m_pmatch[i].rm_eo - m_pmatch[i].rm_so] = 0;
|
||||
if (matchNames[i][0])
|
||||
printf("%10s: [%s] (%d->%d)\n", matchNames[i], match,
|
||||
(int)m_pmatch[i].rm_so, (int)m_pmatch[i].rm_eo);
|
||||
}
|
||||
#endif
|
||||
char match[maxmatchlen+1];
|
||||
if (checkSubMatch(SMI_OR, match, reason)) {
|
||||
if (prev_or) {
|
||||
// Bad syntax
|
||||
reason = "Bad syntax: consecutive OR";
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (orClause == 0) {
|
||||
// Fist OR seen: start OR subclause.
|
||||
if ((orClause = new WasaQuery()) == 0) {
|
||||
reason = "Out of memory";
|
||||
return 0;
|
||||
}
|
||||
orClause->m_op = WasaQuery::OP_OR;
|
||||
}
|
||||
|
||||
// We need to transfer the previous query from the main vector
|
||||
// to the OR subquery
|
||||
if (!query->m_subs.empty()) {
|
||||
orClause->m_subs.push_back(query->m_subs.back());
|
||||
query->m_subs.pop_back();
|
||||
}
|
||||
prev_or = true;
|
||||
|
||||
} else {
|
||||
|
||||
WasaQuery *nclause = new WasaQuery;
|
||||
if (nclause == 0) {
|
||||
reason = "Out of memory";
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Check for quoted or unquoted value
|
||||
if (checkSubMatch(SMI_QUOTED, match, reason)) {
|
||||
nclause->m_value = match;
|
||||
} else if (checkSubMatch(SMI_TERM, match, reason)) {
|
||||
nclause->m_value = match;
|
||||
}
|
||||
if (nclause->m_value.empty()) {
|
||||
// Isolated +- or fieldname: without a value. Ignore until
|
||||
// told otherwise.
|
||||
delete nclause;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// +- indicator ?
|
||||
if (checkSubMatch(SMI_PM, match, reason) && match[0] == '-') {
|
||||
nclause->m_op = WasaQuery::OP_EXCL;
|
||||
} else {
|
||||
nclause->m_op = WasaQuery::OP_LEAF;
|
||||
}
|
||||
|
||||
// Field indicator ?
|
||||
if (checkSubMatch(SMI_FIELD, match, reason)) {
|
||||
nclause->m_fieldspec = match;
|
||||
}
|
||||
|
||||
if (prev_or) {
|
||||
// We're in an OR subquery, add new subquery
|
||||
orClause->m_subs.push_back(nclause);
|
||||
} else {
|
||||
if (orClause) {
|
||||
// Getting out of OR. Add the OR subquery to the main one
|
||||
query->m_subs.push_back(orClause);
|
||||
orClause = 0;
|
||||
}
|
||||
// Add new subquery to main one.
|
||||
query->m_subs.push_back(nclause);
|
||||
}
|
||||
prev_or = false;
|
||||
}
|
||||
|
||||
// Advance current string position. We checked earlier that
|
||||
// the increment is strictly positive, so we won't loop
|
||||
// forever
|
||||
m_cp += m_pmatch[0].rm_eo;
|
||||
if (m_cp >= cpe)
|
||||
break;
|
||||
}
|
||||
|
||||
regfree(&m_rx);
|
||||
m_rxneedsfree = false;
|
||||
return query;
|
||||
}
|
||||
|
||||
#else // TEST
|
||||
|
||||
#include <stdio.h>
|
||||
#include "wasastringtoquery.h"
|
||||
|
||||
static char *thisprog;
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
thisprog = argv[0];
|
||||
argc--; argv++;
|
||||
|
||||
if (argc != 1) {
|
||||
fprintf(stderr, "need one arg\n");
|
||||
exit(1);
|
||||
}
|
||||
const string str = *argv++;argc--;
|
||||
string reason;
|
||||
StringToWasaQuery qparser;
|
||||
WasaQuery *q = qparser.stringToQuery(str, reason);
|
||||
if (q == 0) {
|
||||
fprintf(stderr, "stringToQuery failed: %s\n", reason.c_str());
|
||||
exit(1);
|
||||
}
|
||||
string desc;
|
||||
q->describe(desc);
|
||||
printf("%s\n", desc.c_str());
|
||||
exit(0);
|
||||
}
|
||||
|
||||
#endif // TEST_STRINGTOQUERY
|
||||
57
src/query/wasastringtoquery.h
Normal file
57
src/query/wasastringtoquery.h
Normal file
@ -0,0 +1,57 @@
|
||||
#ifndef _WASASTRINGTOQUERY_H_INCLUDED_
|
||||
#define _WASASTRINGTOQUERY_H_INCLUDED_
|
||||
/* @(#$Id: wasastringtoquery.h,v 1.1 2006-11-30 18:12:16 dockes Exp $ (C) 2006 J.F.Dockes */
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc.,
|
||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
using std::string;
|
||||
using std::vector;
|
||||
|
||||
// A simple class to represent a parsed wasabi query string.
|
||||
class WasaQuery {
|
||||
public:
|
||||
enum Op {OP_NULL, OP_LEAF, OP_EXCL, OP_OR, OP_AND};
|
||||
typedef vector<WasaQuery*> subqlist_t;
|
||||
|
||||
WasaQuery() : m_op(OP_NULL) {}
|
||||
~WasaQuery();
|
||||
|
||||
// Get string describing this query
|
||||
void describe(string &desc) const;
|
||||
|
||||
WasaQuery::Op m_op;
|
||||
string m_fieldspec;
|
||||
vector<WasaQuery*> m_subs;
|
||||
string m_value;
|
||||
};
|
||||
|
||||
|
||||
// Wasabi query string parser class.
|
||||
class StringToWasaQuery {
|
||||
public:
|
||||
StringToWasaQuery();
|
||||
~StringToWasaQuery();
|
||||
WasaQuery *stringToQuery(const string& str, string& reason);
|
||||
class Internal;
|
||||
private:
|
||||
Internal *internal;
|
||||
};
|
||||
|
||||
#endif /* _WASASTRINGTOQUERY_H_INCLUDED_ */
|
||||
155
src/query/wasatorcl.cpp
Normal file
155
src/query/wasatorcl.cpp
Normal file
@ -0,0 +1,155 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: wasatorcl.cpp,v 1.1 2006-11-30 18:12:16 dockes Exp $ (C) 2006 J.F.Dockes";
|
||||
#endif
|
||||
#ifndef TEST_WASATORCL
|
||||
|
||||
#include "wasastringtoquery.h"
|
||||
#include "rcldb.h"
|
||||
#include "searchdata.h"
|
||||
#include "wasatorcl.h"
|
||||
|
||||
Rcl::SearchData *wasatorcl(WasaQuery *wasa)
|
||||
{
|
||||
if (wasa == 0)
|
||||
return 0;
|
||||
|
||||
Rcl::SearchData *sdata = new Rcl::SearchData(Rcl::SCLT_AND);
|
||||
|
||||
WasaQuery::subqlist_t::iterator it;
|
||||
for (it = wasa->m_subs.begin(); it != wasa->m_subs.end(); it++) {
|
||||
switch ((*it)->m_op) {
|
||||
case WasaQuery::OP_NULL:
|
||||
case WasaQuery::OP_AND:
|
||||
default:
|
||||
// ??
|
||||
continue;
|
||||
case WasaQuery::OP_LEAF:
|
||||
if ((*it)->m_value.find_first_of(" \t\n\r") != string::npos) {
|
||||
sdata->addClause
|
||||
(new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE,
|
||||
(*it)->m_value, 0));
|
||||
} else {
|
||||
sdata->addClause
|
||||
(new Rcl::SearchDataClauseSimple(Rcl::SCLT_AND,
|
||||
(*it)->m_value));
|
||||
}
|
||||
break;
|
||||
case WasaQuery::OP_EXCL:
|
||||
// Note: have to add dquotes which will be translated to
|
||||
// phrase if there are several words in there. Not pretty
|
||||
// but should work
|
||||
sdata->addClause
|
||||
(new Rcl::SearchDataClauseSimple(Rcl::SCLT_EXCL,
|
||||
string("\"") +
|
||||
(*it)->m_value + "\""));
|
||||
break;
|
||||
case WasaQuery::OP_OR:
|
||||
// Concatenate all OR values as phrases. Hope there are no
|
||||
// stray dquotes in there
|
||||
{
|
||||
string orvalue;
|
||||
WasaQuery::subqlist_t::iterator orit;
|
||||
for (orit = (*it)->m_subs.begin();
|
||||
orit != (*it)->m_subs.end(); orit++) {
|
||||
orvalue += string("\"") + (*orit)->m_value + "\"";
|
||||
}
|
||||
sdata->addClause
|
||||
(new Rcl::SearchDataClauseSimple(Rcl::SCLT_OR,
|
||||
orvalue));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return sdata;
|
||||
}
|
||||
|
||||
|
||||
#else // TEST
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "autoconfig.h"
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <signal.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <list>
|
||||
#include <string>
|
||||
|
||||
using namespace std;
|
||||
|
||||
#include "debuglog.h"
|
||||
#include "rclinit.h"
|
||||
#include "rclconfig.h"
|
||||
#include "rcldb.h"
|
||||
#include "searchdata.h"
|
||||
#include "refcntr.h"
|
||||
#include "wasastringtoquery.h"
|
||||
#include "wasatorcl.h"
|
||||
|
||||
static char *thisprog;
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
thisprog = argv[0];
|
||||
argc--; argv++;
|
||||
|
||||
if (argc != 1) {
|
||||
fprintf(stderr, "need one arg\n");
|
||||
exit(1);
|
||||
}
|
||||
const string str = *argv++;argc--;
|
||||
string reason;
|
||||
|
||||
RclConfig *config = recollinit(RCLINIT_NONE, 0, 0, reason, 0);
|
||||
if (config == 0 || !config->ok()) {
|
||||
cerr << "Configuration problem: " << reason << endl;
|
||||
exit(1);
|
||||
}
|
||||
string dbdir = config->getDbDir();
|
||||
if (dbdir.empty()) {
|
||||
// Note: this will have to be replaced by a call to a
|
||||
// configuration buildin dialog for initial configuration
|
||||
cerr << "Configuration problem: " << "No dbdir" << endl;
|
||||
exit(1);
|
||||
}
|
||||
Rcl::Db rcldb;
|
||||
if (!rcldb.open(dbdir, Rcl::Db::DbRO, 0)) {
|
||||
cerr << "Could not open database in " << dbdir << endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
StringToWasaQuery qparser;
|
||||
WasaQuery *wq = qparser.stringToQuery(str, reason);
|
||||
if (wq == 0) {
|
||||
fprintf(stderr, "wasastringtoquery failed: %s\n", reason.c_str());
|
||||
return 1;
|
||||
}
|
||||
string desc;
|
||||
wq->describe(desc);
|
||||
cout << endl << "Wasabi query description: " << desc << endl << endl;
|
||||
|
||||
Rcl::SearchData *sdata = wasatorcl(wq);
|
||||
RefCntr<Rcl::SearchData> rq(sdata);
|
||||
if (!rcldb.setQuery(rq)) {
|
||||
cerr << "setQuery failed" << endl;
|
||||
return 1;
|
||||
}
|
||||
int maxi = rcldb.getResCnt() > 10 ? 10 : rcldb.getResCnt();
|
||||
|
||||
cout << endl << "Rcl Query description: " << sdata->getDescription()
|
||||
<< endl << endl << "Results: " << endl;
|
||||
|
||||
for (int i = 0; i < maxi ; i++) {
|
||||
Rcl::Doc doc;
|
||||
if (!rcldb.getDoc(i, doc)) {
|
||||
cerr << "getDoc failed" << endl;
|
||||
return 1;
|
||||
}
|
||||
cout << i << ": " << doc.url << endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#endif // TEST_WASATORCL
|
||||
7
src/query/wasatorcl.h
Normal file
7
src/query/wasatorcl.h
Normal file
@ -0,0 +1,7 @@
|
||||
#ifndef _WASATORCL_H_INCLUDED_
|
||||
#define _WASATORCL_H_INCLUDED_
|
||||
/* @(#$Id: wasatorcl.h,v 1.1 2006-11-30 18:12:16 dockes Exp $ (C) 2006 J.F.Dockes */
|
||||
|
||||
extern Rcl::SearchData *wasatorcl(WasaQuery *wasa);
|
||||
|
||||
#endif /* _WASATORCL_H_INCLUDED_ */
|
||||
Loading…
x
Reference in New Issue
Block a user