From 7cc20a8f78a4df557638ac73a488a08819f50216 Mon Sep 17 00:00:00 2001 From: dockes Date: Tue, 14 Nov 2006 13:55:43 +0000 Subject: [PATCH] added dynamic clauses to adv search. Still needs work --- src/doc/user/usermanual.sgml | 238 ++++++++++++++++++++++------ src/qtgui/advsearch.ui | 291 ++++++++++++++++++++++------------- src/qtgui/advsearch_w.cpp | 29 +++- src/qtgui/advsearch_w.h | 14 +- src/qtgui/rclmain_w.cpp | 10 +- src/qtgui/searchclause_w.cpp | 143 +++++++++++++++++ src/qtgui/searchclause_w.h | 54 +++++++ src/rcldb/rcldb.cpp | 10 +- src/rcldb/rcldb.h | 5 +- src/rcldb/searchdata.cpp | 200 ++++++++++++++---------- src/rcldb/searchdata.h | 16 +- 11 files changed, 751 insertions(+), 259 deletions(-) create mode 100644 src/qtgui/searchclause_w.cpp create mode 100644 src/qtgui/searchclause_w.h diff --git a/src/doc/user/usermanual.sgml b/src/doc/user/usermanual.sgml index 1067a6f9..5d012d38 100644 --- a/src/doc/user/usermanual.sgml +++ b/src/doc/user/usermanual.sgml @@ -24,7 +24,7 @@ Dockes - $Id: usermanual.sgml,v 1.23 2006-11-06 17:37:22 dockes Exp $ + $Id: usermanual.sgml,v 1.24 2006-11-14 13:55:43 dockes Exp $ This document introduces full text search notions @@ -152,8 +152,8 @@ giving &RCL; a try, but you may want to adjust it later. - Indexing is started - automatically the first time you execute the + Indexing + is started automatically the first time you execute the recoll search graphical user interface, or by executing the recollindex command. @@ -180,21 +180,41 @@ later on by specifying an option to the indexing command (recollindex -z). - &RCL; indexing takes place at discrete times. There is - currently no interface to real time file modification - monitors. The typical usage is to have a nightly indexing run - programmed into your - cron file. + &RCL; indexing can be performed with two different + methods: - There is nothing in &RCL; and &XAP; - that would prevent interfacing with a real time file - modification monitor, but this would tend to consume significant - system resources for dubious gain, because you rarely need a - full text search to find documents you just - modified. recollindex -i can be used to add - individual files to the index if you want to play with this, see - the manual page. - + + + + Periodic indexing: + indexing takes place at discrete + times, by executing the recollindex + command. The typical usage is to have a nightly indexing run + programmed into your + cron file. + + + + + Real time indexing: + indexing takes place as soon as a file is created or + changed. recollindex runs as a daemon + and uses a file system alteration monitor such as + Fam, + Gamin or + inotify do detect file changes. + Monitoring a big directory tree can consume significant + system resources. + + + + + The choice between the two methods is mostly a matter of + preference, and they can be combined by setting up multiple + indexes (ie: use periodic indexing on a big documentation + directory, and real time indexing on a small home + directory). Monitoring a big file system tree can consume + significant system resources, for dubious gains. &RCL; knows about quite a few different document types. The parameters for document types recognition and @@ -345,10 +365,13 @@ recoll - - Starting indexing + + Periodic indexing - Indexing is performed either by the + + Starting indexing + + Indexing is performed either by the recollindex program, or by the indexing thread inside the recoll program (use the File menu). Both programs @@ -357,11 +380,11 @@ recoll confdir option to specify the configuration directory to be used. - If the recoll program finds no index + If the recoll program finds no index when it starts, it will automatically start indexing (except if canceled). - It is best to avoid interrupting the indexing process, as + It is best to avoid interrupting the indexing process, as this may sometimes leave the index in a bad state. This is not a serious problem, as you then just need to delete the index files and restart the indexing. The index files are @@ -371,25 +394,84 @@ recoll -z, which will reset the database before indexing. - + - - Using <command>cron</command> to automate + <sect2 id="rcl.indexing.periodic.automat"> + <title>Using <command>cron</command> to automate indexing - The most common way to set up indexing is to have a cron + The most common way to set up indexing is to have a cron task execute it every night. For example the following crontab entry would do it every day at 3:30AM (supposing recollindex is in your PATH): - 30 3 * * * recollindex > /tmp/recolltrace 2>&1 + 30 3 * * * recollindex > /tmp/recolltrace 2>&1 - The usual command to edit your + The usual command to edit your crontab is crontab -e (which will usually start the vi editor to edit the file). You may have more sophisticated tools available on your system. + + + + + Real time indexing + + Real time monitoring/indexing is performed by starting the + recollindex -m command. With this option, + recollindex will detach from the terminal and + become a daemon, forever monitoring file changes and updating + the index. + + The package must have been + configured + with option --with-fam or + --with-inotify for the monitoring + code and option to be enabled in + recollindex. This is not currently the + default. + + The rclmon.sh script can be used to + easily start and stop the daemon. It can be found in the + examples directory (typically + /usr/local/[share/]recoll/examples). + + Starting and stopping the daemon could be performed, for + example, as part of the user session script. For example, my + out of fashion xdm-based session has an .xsession script with + the following lines at the end: + + recollconf=$HOME/.recoll-home +recolldata=/usr/local/share/recoll +RECOLL_CONFDIR=$recollconf $recolldata/examples/rclmon.sh start + +fvwm + +RECOLL_CONFDIR=$recollconf $recolldata/examples/rclmon.sh stop + + + The indexing daemon gets started, then the window manager, + for which the session waits. When the window manager exits, the + indexing daemon is stopped, then the session ends (at script + exit). This should be adjusted for your flavour of session + management, and of course, there are other possibilities. + + By default, the indexing daemon will write its messages to + a file inside the configuration directory (this is controlled by + the daemlogfilename and + daemloglevel configuration parameters). You + may want to change this. Also the log file will only be truncated + when the daemon starts. If the daemon runs permanently, the log + file may grow quite big, depending on the log level. + + The real time indexing code is relatively young, and there + are still a few quirks. File deletions occurring while the + monitor is not running will not be detected. You'll have to run + a normal incremental indexing pass from time to time to purge + the database. There may still be other problems. + @@ -446,11 +528,10 @@ recoll text field). Please note, however, that only the search texts are remembered, not the mode (all/any/file name). - Hitting ^Tab (Ctrl + - Tab) while entering a word in the - simple search entry will open a window with possible completions - for the word. The completions are extracted from the - database. + Typing Esc Space) while + entering a word in the simple search entry will open a window + with possible completions for the word. The completions are + extracted from the database. Double-clicking on a word in the result list or a preview window will insert it into the simple search entry field. @@ -762,11 +843,11 @@ recoll Search tips, shortcuts Term completion - Typing ^TAB (Control + - Tab) in the simple - search entry field while entering a word will either complete - the current word if its beginning matches a unique term in the - index, or open a window to propose a list of completions + Typing Esc Space in + the simple search entry field while entering a word will + either complete the current word if its beginning matches a + unique term in the index, or open a window to propose a list + of completions. Picking up new terms from result or preview @@ -883,6 +964,54 @@ recoll config (try the <command>qtconfig</command> command.</para> </listitem> + <listitem><para><guilabel>Result paragraph format + string</guilabel>: allows you to change the presentation of + each result list entry. This is a qt-html string where the + following printf-like <literal>%</literal> substitutions will + be performed: + <itemizedlist> + <listitem> + <formalpara><title>%AAbstract + + %DDate + + %KKeywords (if + any) + + %LPreview and + Edit links + + %MMime + type + + %Nresult Number + + + %RRelevance + percentage + + %SSize + information + + %TTitle + + + %UUrl + + + The default value for the string is: + %R %S %L &nbsp;&nbsp;<b>%T</b><br> +%M&nbsp;%D&nbsp;&nbsp;&nbsp;<i>%U</i><br> +%A %K + + You may, for example, try the following for a more web-like + experience (but the document title will not act as a link): + <u><b><font size=+1 color=#1111cf>%T</font></b></u><br> +%A<font color=#008000>%U - %S</font> - %L + + + + HTML help browser: this will let you chose your preferred browser which will be started from the Help menu to read the user @@ -1119,7 +1248,7 @@ recoll Building &RCL; has been built on - Linux (redhat7.3, mandriva 2005, Fedora Core 3), FreeBSD and + Linux (redhat7.3, mandriva 2005/6, Fedora Core 3/4/5), FreeBSD and Solaris 8. If you build on another system, I would very much welcome patches. @@ -1131,7 +1260,9 @@ recoll QTDIR should point to the directory above the one that holds the qt include files (ie: - qt.h). + if qt.h is + /usr/local/qt/include/qt.h, QTDIR + should be /usr/local/qt). QMAKESPECS should be set to the name of one of the @@ -1145,13 +1276,13 @@ recoll needed because there is a default link in mkspecs/. - The &RCL; configure script does a - better job of checking these variables after release - 1.1.1. Before this, unexplained errors will occur during - compilation if the environment is not set up. Also, for 1.1.0 the - qmake command should be in your PATH (later - releases can also find it in - $QTDIR/bin). + Configure + options:--without-aspell + will disable the code for phonetic matching of search + terms. --with-fam or + --with-inotify will enable the code for + real time indexing. Refer to configure + --help output for details. Normal procedure: @@ -1338,16 +1469,21 @@ recoll - loglevel + loglevel,daemloglevel Verbosity level for recoll and recollindex. A value of 4 lists quite a lot of - debug/information messages. 2 only lists errors. + debug/information messages. 2 only lists errors. The + daemversion is specific to the indexing monitor + daemon. - logfilename + logfilename, + daemlogfilename Where the messages should go. 'stderr' can - be used as a special value, and is the default. + be used as a special value, and is the default. The + daemversion is specific to the indexing monitor + daemon. diff --git a/src/qtgui/advsearch.ui b/src/qtgui/advsearch.ui index 6c6ac2e9..2e604c38 100644 --- a/src/qtgui/advsearch.ui +++ b/src/qtgui/advsearch.ui @@ -21,7 +21,7 @@ - layout12 + layout18 @@ -29,146 +29,225 @@ - layout11 + layout17 unnamed - - - textLabel2 - - - NoFrame - - - Plain - - - 7 - - - Search for files<br>having all of: - - - All non blank fields will be combined with AND conjunctions. <br>All fields except "exact phrase" can accept a mix of simple words, and phrases enclosed in double quotes.<br>There are two <em>Any of these</em> fields so you can search for things like: <br><em>(apple OR pear) AND (green OR sour)</em> - - - layout10 + layout16 - + unnamed - - 8 - - + - andWordsTL + spacer1 - - All of these + + Vertical - - - - andWordsLE + + Expanding - + - 300 - 0 + 20 + 40 - - Enter words, and/or quoted phrases. - - - + + - phraseTL + textLabel2 + + + NoFrame + + + Plain + + + 7 - This exact phrase - - - - - phraseLE + Search for files<br>having all of: - Enter words. + All non blank fields will be combined with AND conjunctions. <br>All fields except "exact phrase" can accept a mix of simple words, and phrases enclosed in double quotes.<br>There are two <em>Any of these</em> fields so you can search for things like: <br><em>(apple OR pear) AND (green OR sour)</em> - + - orWordsTL + spacer2 + + + Vertical + + + Expanding + + + + 20 + 16 + + + + + + addClausePB - Any of these + Add clause + + + false - + + + + + clauseVBox + + + + unnamed + + - orWordsLE - - - Enter words, and/or quoted phrases. + layout10 + + + unnamed + + + 8 + + + + andWordsTL + + + All of these + + + + + andWordsLE + + + + 300 + 0 + + + + Enter words, and/or quoted phrases. + + + + + phraseTL + + + This exact phrase + + + + + phraseLE + + + Enter words. + + + + + orWordsTL + + + Any of these + + + + + orWordsLE + + + Enter words, and/or quoted phrases. + + + + + orWords1TL + + + Any of these + + + + + orWords1LE + + + Enter words, and/or quoted phrases. + + + + + noWordsTL + + + None of these + + + + + noWordsLE + + + Enter words, and/or quoted phrases. + + + + + textLabel1_2 + + + File name matching + + + + + fileNameLE + + + Enter file name. * and ? are wildcards. + + + - + - orWords1TL + line4 - - Any of these + + HLine + + + Sunken + + + Horizontal - - - orWords1LE - - - Enter words, and/or quoted phrases. - - - - - noWordsTL - - - None of these - - - - - noWordsLE - - - Enter words, and/or quoted phrases. - - - - - textLabel1_2 - - - File name matching - - - - - fileNameLE - - - Enter file name. * and ? are wildcards. - - - + @@ -385,7 +464,7 @@ - layout18 + layoutFT @@ -465,7 +544,7 @@ - layout25 + layout26 diff --git a/src/qtgui/advsearch_w.cpp b/src/qtgui/advsearch_w.cpp index 714656d8..a620b31a 100644 --- a/src/qtgui/advsearch_w.cpp +++ b/src/qtgui/advsearch_w.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: advsearch_w.cpp,v 1.5 2006-11-13 08:58:47 dockes Exp $ (C) 2005 J.F.Dockes"; +static char rcsid[] = "@(#$Id: advsearch_w.cpp,v 1.6 2006-11-14 13:55:43 dockes Exp $ (C) 2005 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -74,6 +74,7 @@ void AdvSearch::init() this, SLOT(addAFiltypPB_clicked())); connect(saveFileTypesPB, SIGNAL(clicked()), this, SLOT(saveFileTypes())); + connect(addClausePB, SIGNAL(clicked()), this, SLOT(addClause())); // Initialize lists of accepted and ignored mime types from config // and settings @@ -132,6 +133,19 @@ void AdvSearch::saveFileTypes() rwSettings(true); } +void AdvSearch::addClause() +{ + SearchClauseW *w = new SearchClauseW(this); + m_clauseWins.push_back(w); + connect(w->wordsLE, SIGNAL(returnPressed()), + this, SLOT(searchPB_clicked())); + clauseVBox->insertWidget(-1, w); + w->show(); + // Have to adjust the size else we lose the bottom buttons! Why? + QSize sz = AdvSearchBaseLayout->sizeHint(); + resize(QSize(sz.width()+20, sz.height()+40)); +} + // Move selected file types from the ignored to the searched box void AdvSearch::addFiltypPB_clicked() { @@ -208,7 +222,17 @@ void AdvSearch::searchPB_clicked() (const char *)phraseLE->text().utf8(), 0)); hasnotnot = true; } - + for (list::iterator it = m_clauseWins.begin(); + it != m_clauseWins.end(); it++) { + SearchDataClause *cl; + if ((cl = (*it)->getClause())) { + switch (cl->m_tp) { + case SCLT_EXCL: hasnot = true; break; + default: hasnotnot = true; break; + } + sdata->addClause(cl); + } + } if (!hasnotnot) { if (!hasnot) return; @@ -248,4 +272,3 @@ void AdvSearch::browsePB_clicked() QString dir = QFileDialog::getExistingDirectory(); subtreeCMB->setEditText(dir); } - diff --git a/src/qtgui/advsearch_w.h b/src/qtgui/advsearch_w.h index b9575588..bbf32c7d 100644 --- a/src/qtgui/advsearch_w.h +++ b/src/qtgui/advsearch_w.h @@ -1,6 +1,6 @@ #ifndef _ADVSEARCH_W_H_INCLUDED_ #define _ADVSEARCH_W_H_INCLUDED_ -/* @(#$Id: advsearch_w.h,v 1.3 2006-11-13 08:58:47 dockes Exp $ (C) 2005 J.F.Dockes */ +/* @(#$Id: advsearch_w.h,v 1.4 2006-11-14 13:55:43 dockes Exp $ (C) 2005 J.F.Dockes */ /* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -17,12 +17,13 @@ * Free Software Foundation, Inc., * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +#include #include #include #include "advsearch.h" -#include "refcntr.h" - +#include "searchclause_w.h" #include "recoll.h" +#include "refcntr.h" #include "searchdata.h" class AdvSearch : public AdvSearchBase @@ -30,7 +31,7 @@ class AdvSearch : public AdvSearchBase Q_OBJECT public: - AdvSearch( QWidget* parent = 0, const char* name = 0, bool modal = FALSE, WFlags fl = 0 ) : AdvSearchBase(parent,name,modal,fl) + AdvSearch(QWidget* parent = 0, const char* name = 0, bool modal = FALSE, WFlags fl = 0) : AdvSearchBase(parent,name,modal,fl) {init();} ~AdvSearch(){} public slots: @@ -38,17 +39,18 @@ public slots: virtual void delAFiltypPB_clicked(); virtual void addFiltypPB_clicked(); virtual void addAFiltypPB_clicked(); - virtual void restrictFtCB_toggled( bool on ); + virtual void restrictFtCB_toggled(bool on); virtual void searchPB_clicked(); virtual void browsePB_clicked(); virtual void saveFileTypes(); + virtual void addClause(); signals: void startSearch(RefCntr); private: virtual void init(); - + std::list m_clauseWins; }; diff --git a/src/qtgui/rclmain_w.cpp b/src/qtgui/rclmain_w.cpp index fab66b4b..36ae7e96 100644 --- a/src/qtgui/rclmain_w.cpp +++ b/src/qtgui/rclmain_w.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: rclmain_w.cpp,v 1.6 2006-11-13 08:58:47 dockes Exp $ (C) 2005 J.F.Dockes"; +static char rcsid[] = "@(#$Id: rclmain_w.cpp,v 1.7 2006-11-14 13:55:43 dockes Exp $ (C) 2005 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -317,8 +317,7 @@ static string urltolocalpath(string url) return url.substr(7, string::npos); } -// Execute an advanced search query. The parameters normally come from -// the advanced search dialog +// Start a db query and set the reslist docsource void RclMain::startAdvSearch(RefCntr sdata) { LOGDEB(("RclMain::startAdvSearch\n")); @@ -340,8 +339,11 @@ void RclMain::startAdvSearch(RefCntr sdata) if (!prefs.queryStemLang.length() == 0) qopts |= Rcl::Db::QO_STEM; - if (!rcldb->setQuery(sdata, qopts, prefs.queryStemLang.ascii())) + if (!rcldb->setQuery(sdata, qopts, prefs.queryStemLang.ascii())) { + QMessageBox::warning(0, "Recoll", tr("Cant start query: ") + + QString::fromAscii(rcldb->getReason().c_str())); return; + } curPreview = 0; DocSequence *docsource; diff --git a/src/qtgui/searchclause_w.cpp b/src/qtgui/searchclause_w.cpp new file mode 100644 index 00000000..35770044 --- /dev/null +++ b/src/qtgui/searchclause_w.cpp @@ -0,0 +1,143 @@ +#ifndef lint +static char rcsid[] = "@(#$Id: searchclause_w.cpp,v 1.1 2006-11-14 13:55:43 dockes Exp $ (C) 2005 J.F.Dockes"; +#endif +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ +#include "autoconfig.h" + +#include "searchclause_w.h" + +#include +#include +#include +#include +#include +#include +#include + +/* + * Constructs a SearchClauseW as a child of 'parent', with the + * name 'name' and widget flags set to 'f'. + */ +SearchClauseW::SearchClauseW(QWidget* parent, const char* name, WFlags fl) + : QWidget(parent, name, fl) +{ + if (!name) + setName("SearchClauseW"); + searchClauseLayout = new QVBoxLayout(this); + + hLayout = new QHBoxLayout(0, 0, 6, "hLayout"); + + sTpCMB = new QComboBox(FALSE, this, "sTpCMB"); + hLayout->addWidget(sTpCMB); + + proxSlackSB = new QSpinBox(this, "proxSlackSB"); + hLayout->addWidget(proxSlackSB); + + wordsLE = new QLineEdit(this, "wordsLE"); + wordsLE->setMinimumSize(QSize(250, 0)); + hLayout->addWidget(wordsLE); + searchClauseLayout->addLayout(hLayout); + languageChange(); + resize(QSize(0, 0).expandedTo(minimumSizeHint())); + clearWState(WState_Polished); + + connect(sTpCMB, SIGNAL(activated(int)), + this, SLOT(tpChange(int))); +} + +/* + * Destroys the object and frees any allocated resources + */ +SearchClauseW::~SearchClauseW() +{ + // no need to delete child widgets, Qt does it all for us +} + +/* + * Sets the strings of the subwidgets using the current + * language. + */ +void SearchClauseW::languageChange() +{ + setCaption(tr("SearchClauseW")); + sTpCMB->clear(); + sTpCMB->insertItem(tr("Any of these")); // 0 + sTpCMB->insertItem(tr("All of these")); //1 + sTpCMB->insertItem(tr("None of these"));//2 + sTpCMB->insertItem(tr("This phrase"));//3 + sTpCMB->insertItem(tr("Terms in proximity"));//4 + sTpCMB->insertItem(tr("File name matching"));//5 + // sTpCMB->insertItem(tr("Complex clause"));//6 + + // Ensure that the spinbox will be enabled/disabled depending on + // combobox state + tpChange(0); + + QToolTip::add(sTpCMB, tr("Select the type of query that will be performed with the words")); + QToolTip::add(proxSlackSB, tr("Number of additional words that may be interspersed with the chosen ones")); +} + +using namespace Rcl; + +// Translate my window state into an Rcl search clause +SearchDataClause * +SearchClauseW::getClause() +{ + if (wordsLE->text().isEmpty()) + return 0; + switch (sTpCMB->currentItem()) { + case 0: + return new SearchDataClauseSimple(SCLT_OR, + (const char *)wordsLE->text().utf8()); + case 1: + return new SearchDataClauseSimple(SCLT_AND, + (const char *)wordsLE->text().utf8()); + case 2: + return new SearchDataClauseSimple(SCLT_EXCL, + (const char *)wordsLE->text().utf8()); + case 3: + return new SearchDataClauseDist(SCLT_PHRASE, + (const char *)wordsLE->text().utf8(), + proxSlackSB->value()); + case 4: + fprintf(stderr, "NEAR\n"); + return new SearchDataClauseDist(SCLT_NEAR, + (const char *)wordsLE->text().utf8(), + proxSlackSB->value()); + case 5: + return new SearchDataClauseFilename((const char *)wordsLE->text().utf8()); + case 6: + default: + return 0; + } +} + +// Handle combobox change: may need to enable/disable the distance spinbox +void SearchClauseW::tpChange(int index) +{ + switch (index) { + case 3: + case 4: + proxSlackSB->setEnabled(true); + if (index == 4) + proxSlackSB->setValue(10); + break; + default: + proxSlackSB->setEnabled(false); + } +} diff --git a/src/qtgui/searchclause_w.h b/src/qtgui/searchclause_w.h new file mode 100644 index 00000000..5d085508 --- /dev/null +++ b/src/qtgui/searchclause_w.h @@ -0,0 +1,54 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ +#ifndef SEARCHCLAUSE_H +#define SEARCHCLAUSE_H +// A class for entry of a search clause: type (OR/AND/etc.), distance +// for PHRASE or NEAR, and text + +#include +#include +#include "searchdata.h" + +class QVBoxLayout; +class QHBoxLayout; +class QComboBox; +class QSpinBox; +class QLineEdit; + +class SearchClauseW : public QWidget +{ + Q_OBJECT + +public: + SearchClauseW( QWidget* parent = 0, const char* name = 0, WFlags fl = 0 ); + ~SearchClauseW(); + Rcl::SearchDataClause *getClause(); + + QComboBox* sTpCMB; + QSpinBox* proxSlackSB; + QLineEdit* wordsLE; + +protected: + QVBoxLayout* searchClauseLayout; + QHBoxLayout* hLayout; + +protected slots: + virtual void languageChange(); + virtual void tpChange(int); +}; + +#endif // SEARCHCLAUSE_H diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index eb644807..321a25cc 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.93 2006-11-13 14:51:58 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.94 2006-11-14 13:55:43 dockes Exp $ (C) 2004 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -1250,7 +1250,7 @@ bool Db::setQuery(RefCntr sdata, int opts, LOGERR(("Db::setQuery: no db!\n")); return false; } - + m_reason.erase(); LOGDEB(("Db::setQuery:\n")); m_filterTopDir = sdata->m_topdir; @@ -1259,7 +1259,11 @@ bool Db::setQuery(RefCntr sdata, int opts, m_ndb->m_termfreqs.clear(); Xapian::Query xq; - sdata->toNativeQuery(*this, &xq, (opts & Db::QO_STEM) ? stemlang : ""); + if (!sdata->toNativeQuery(*this, &xq, + (opts & Db::QO_STEM) ? stemlang : "")) { + m_reason += sdata->getReason(); + return false; + } m_ndb->query = xq; delete m_ndb->enquire; diff --git a/src/rcldb/rcldb.h b/src/rcldb/rcldb.h index 46948d74..b8f32d31 100644 --- a/src/rcldb/rcldb.h +++ b/src/rcldb/rcldb.h @@ -16,7 +16,7 @@ */ #ifndef _DB_H_INCLUDED_ #define _DB_H_INCLUDED_ -/* @(#$Id: rcldb.h,v 1.41 2006-11-13 08:49:44 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: rcldb.h,v 1.42 2006-11-14 13:55:43 dockes Exp $ (C) 2004 J.F.Dockes */ #include #include @@ -217,13 +217,14 @@ class Db { /** Filename wildcard expansion */ bool filenameWildExp(const string& exp, list& names); - + string getReason(){return m_reason;} private: string m_filterTopDir; // Current query filter on subtree top directory vector m_dbindices; // In case there is a postq filter: sequence of // db indices that match + string m_reason; // Error explanation // Things we don't want to have here. friend class Native; Native *m_ndb; // Pointer to private data. We don't want db(ie diff --git a/src/rcldb/searchdata.cpp b/src/rcldb/searchdata.cpp index 16bde44c..9c97dad2 100644 --- a/src/rcldb/searchdata.cpp +++ b/src/rcldb/searchdata.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.1 2006-11-13 08:49:44 dockes Exp $ (C) 2006 J.F.Dockes"; +static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.2 2006-11-14 13:55:43 dockes Exp $ (C) 2006 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -43,16 +43,21 @@ typedef list::iterator qlist_it_t; bool SearchData::toNativeQuery(Rcl::Db &db, void *d, const string& stemlang) { Xapian::Query xq; + m_reason.erase(); // Walk the clause list translating each in turn and building the // Xapian query tree for (qlist_it_t it = m_query.begin(); it != m_query.end(); it++) { Xapian::Query nq; - (*it)->toNativeQuery(db, &nq, stemlang); - Xapian::Query::op op; + if (!(*it)->toNativeQuery(db, &nq, stemlang)) { + LOGERR(("SearchData::toNativeQuery: failed\n")); + m_reason = (*it)->getReason(); + return false; + } // If this structure is an AND list, must use AND_NOT for excl clauses. // Else this is an OR list, and there can't be excl clauses + Xapian::Query::op op; if (m_tp == SCLT_AND) { op = (*it)->m_tp == SCLT_EXCL ? Xapian::Query::OP_AND_NOT: Xapian::Query::OP_AND; @@ -137,97 +142,133 @@ class wsQData : public TextSplitCB { } }; +/** Possibly expand term into its stem siblings, make them dumb strings */ +static void maybeStemExp(Db& db, const string& stemlang, const string& term, + list& exp) +{ + string term1; + dumb_string(term, term1); + if (!stemlang.empty()) { + bool nostemexp = false; + // Check if the first letter is a majuscule in which + // case we do not want to do stem expansion. Note that + // the test is convoluted and possibly problematic + if (term.length() > 0) { + string noacterm,noaclowterm; + if (unacmaybefold(term, noacterm, "UTF-8", false) && + unacmaybefold(noacterm, noaclowterm, "UTF-8", true)) { + Utf8Iter it1(noacterm); + Utf8Iter it2(noaclowterm); + if (*it1 != *it2) + nostemexp = true; + } + } + LOGDEB1(("Term: %s stem expansion: %s\n", + term.c_str(), nostemexp?"no":"yes")); + if (!nostemexp) { + exp = db.stemExpand(stemlang, term1); + return; + } + } -// Turn string into list of xapian queries. There is little -// interpretation done on the string (no +term -term or filename:term -// stuff). We just separate words and phrases, and interpret -// capitalized terms as wanting no stem expansion. -// The final list contains one query for each term or phrase -// - Elements corresponding to a stem-expanded part are an OP_OR -// composition of the stem-expanded terms (or a single term query). -// - Elements corresponding to a phrase are an OP_PHRASE composition of the -// phrase terms (no stem expansion in this case) -static void stringToXapianQueries(const string &iq, + exp.push_back(term1); +} + +/** Turn string into list of xapian queries. There is little + * interpretation done on the string (no +term -term or filename:term + * stuff). We just separate words and phrases, and interpret + * capitalized terms as wanting no stem expansion. + * The final list contains one query for each term or phrase + * - Elements corresponding to a stem-expanded part are an OP_OR + * composition of the stem-expanded terms (or a single term query). + * - Elements corresponding to a phrase are an OP_PHRASE composition of the + * phrase terms (no stem expansion in this case) + * @return the subquery count (either or'd stem-expanded terms or phrase word + * count) + */ +static bool stringToXapianQueries(const string &iq, const string& stemlang, Db& db, - list &pqueries) + string &ermsg, + list &pqueries, + int slack = 0, bool useNear = false) { string qstring = iq; bool opt_stemexp = !stemlang.empty(); + ermsg.erase(); - // Split into (possibly single word) phrases ("this is a phrase"): + // Split into words and phrases (word1 word2 "this is a phrase"): list phrases; stringToStrings(qstring, phrases); // Then process each phrase: split into terms and transform into // appropriate Xapian Query + try { + for (list::iterator it = phrases.begin(); + it != phrases.end(); it++) { + LOGDEB(("strToXapianQ: phrase or word: [%s]\n", it->c_str())); - for (list::iterator it=phrases.begin(); it !=phrases.end(); it++) { - LOGDEB(("strToXapianQ: phrase or word: [%s]\n", it->c_str())); + // If there are both spans and single words in this element, + // we need to use a word split, else a phrase query including + // a span would fail if we didn't adjust the proximity to + // account for the additional span term which is complicated. + wsQData splitDataS, splitDataW; + TextSplit splitterS(&splitDataS, TextSplit::TXTS_ONLYSPANS); + splitterS.text_to_words(*it); + TextSplit splitterW(&splitDataW, TextSplit::TXTS_NOSPANS); + splitterW.text_to_words(*it); + wsQData& splitData = splitDataS; + if (splitDataS.terms.size() > 1 && splitDataS.terms.size() != + splitDataW.terms.size()) + splitData = splitDataW; - // If there are both spans and single words in this element, - // we need to use a word split, else a phrase query including - // a span would fail if we didn't adjust the proximity to - // account for the additional span term which is complicated. - wsQData splitDataS, splitDataW; - TextSplit splitterS(&splitDataS, TextSplit::TXTS_ONLYSPANS); - splitterS.text_to_words(*it); - TextSplit splitterW(&splitDataW, TextSplit::TXTS_NOSPANS); - splitterW.text_to_words(*it); - wsQData& splitData = splitDataS; - if (splitDataS.terms.size() > 1 && splitDataS.terms.size() != - splitDataW.terms.size()) - splitData = splitDataW; - - LOGDEB1(("strToXapianQ: splitter term count: %d\n", - splitData.terms.size())); - switch(splitData.terms.size()) { - case 0: continue;// ?? - case 1: // Not a real phrase: one term - { - string term = splitData.terms.front(); - bool nostemexp = false; - // Check if the first letter is a majuscule in which - // case we do not want to do stem expansion. Note that - // the test is convoluted and possibly problematic - if (term.length() > 0) { - string noacterm,noaclowterm; - if (unacmaybefold(term, noacterm, "UTF-8", false) && - unacmaybefold(noacterm, noaclowterm, "UTF-8", true)) { - Utf8Iter it1(noacterm); - Utf8Iter it2(noaclowterm); - if (*it1 != *it2) - nostemexp = true; - } + LOGDEB1(("strToXapianQ: splitter term count: %d\n", + splitData.terms.size())); + switch(splitData.terms.size()) { + case 0: continue;// ?? + case 1: // Not a real phrase: one term + { + string term = splitData.terms.front(); + list exp; + maybeStemExp(db, stemlang, term, exp); + // Push either term or OR of stem-expanded set + pqueries.push_back(Xapian::Query(Xapian::Query::OP_OR, + exp.begin(), exp.end())); } - LOGDEB1(("Term: %s stem expansion: %s\n", - term.c_str(), nostemexp?"no":"yes")); + break; - list exp; - string term1; - dumb_string(term, term1); - // Possibly perform stem compression/expansion - if (!nostemexp && opt_stemexp) { - exp = db.stemExpand(stemlang, term1); - } else { - exp.push_back(term1); + default: + // Phrase/near + Xapian::Query::op op = useNear ? Xapian::Query::OP_NEAR : + Xapian::Query::OP_PHRASE; + list orqueries; + for (vector::iterator it = splitData.terms.begin(); + it != splitData.terms.end(); it++) { + listexp; + maybeStemExp(db, stemlang, *it, exp); + orqueries.push_back(Xapian::Query(Xapian::Query::OP_OR, + exp.begin(), exp.end())); } - - // Push either term or OR of stem-expanded set - pqueries.push_back(Xapian::Query(Xapian::Query::OP_OR, - exp.begin(), exp.end())); + pqueries.push_back(Xapian::Query(op, + orqueries.begin(), + orqueries.end(), + splitData.terms.size() + slack)); } - break; - - default: - // Phrase: no stem expansion - splitData.dumball(); - LOGDEB(("Pushing phrase: [%s]\n", splitData.catterms().c_str())); - pqueries.push_back(Xapian::Query(Xapian::Query::OP_PHRASE, - splitData.terms.begin(), - splitData.terms.end())); } + } catch (const Xapian::Error &e) { + ermsg = e.get_msg(); + } catch (const string &s) { + ermsg = s; + } catch (const char *s) { + ermsg = s; + } catch (...) { + ermsg = "Caught unknown exception"; } + if (!ermsg.empty()) { + LOGERR(("stringToXapianQueries: %s\n", ermsg.c_str())); + return false; + } + return true; } // Translate a simple OR, AND, or EXCL search clause. @@ -247,7 +288,8 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p, return false; } list pqueries; - stringToXapianQueries(m_text, stemlang, db, pqueries); + if (!stringToXapianQueries(m_text, stemlang, db, m_reason, pqueries)) + return false; if (pqueries.empty()) { LOGERR(("SearchDataClauseSimple: resolved to null query\n")); return true; @@ -277,17 +319,17 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p, { Xapian::Query *qp = (Xapian::Query *)p; *qp = Xapian::Query(); - - Xapian::Query::op op = m_tp == SCLT_PHRASE ? Xapian::Query::OP_PHRASE : - Xapian::Query::OP_NEAR; list pqueries; Xapian::Query nq; string s = string("\"") + m_text + string("\""); + bool useNear = m_tp == SCLT_NEAR; // Use stringToXapianQueries anyway to lowercase and simplify the // phrase terms etc. The result should be a single element list - stringToXapianQueries(s, stemlang, db, pqueries); + if (!stringToXapianQueries(s, stemlang, db, m_reason, pqueries, + m_slack, useNear)) + return false; if (pqueries.empty()) { LOGERR(("SearchDataClauseDist: resolved to null query\n")); return true; diff --git a/src/rcldb/searchdata.h b/src/rcldb/searchdata.h index 852127ee..a1304322 100644 --- a/src/rcldb/searchdata.h +++ b/src/rcldb/searchdata.h @@ -1,6 +1,6 @@ #ifndef _SEARCHDATA_H_INCLUDED_ #define _SEARCHDATA_H_INCLUDED_ -/* @(#$Id: searchdata.h,v 1.3 2006-11-13 08:49:45 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: searchdata.h,v 1.4 2006-11-14 13:55:43 dockes Exp $ (C) 2004 J.F.Dockes */ #include #include @@ -36,6 +36,7 @@ class SearchData { // Printable expanded version of the complete query, obtained from Xapian // valid after setQuery() call string m_description; + string m_reason; SearchData(SClType tp) : m_tp(tp) {} ~SearchData() {erase();} @@ -52,6 +53,8 @@ class SearchData { /** We become the owner of cl and will delete it */ bool addClause(SearchDataClause *cl); + string getReason() {return m_reason;} + private: /* Copyconst and assignment private and forbidden */ SearchData(const SearchData &) {} @@ -66,6 +69,9 @@ class SearchDataClause { virtual ~SearchDataClause() {} virtual bool toNativeQuery(Rcl::Db &db, void *, const string&) = 0; virtual bool isFileName() {return m_tp == SCLT_FILENAME ? true : false;} + string getReason() {return m_reason;} + protected: + string m_reason; }; class SearchDataClauseSimple : public SearchDataClause { @@ -81,20 +87,20 @@ protected: class SearchDataClauseFilename : public SearchDataClauseSimple { public: SearchDataClauseFilename(string txt) - : SearchDataClauseSimple(SCLT_FILENAME, m_text) {} + : SearchDataClauseSimple(SCLT_FILENAME, txt) {} virtual ~SearchDataClauseFilename() {} virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang); }; class SearchDataClauseDist : public SearchDataClauseSimple { public: - SearchDataClauseDist(SClType tp, string txt, int dist) - : SearchDataClauseSimple(tp, txt), m_distance(dist) {} + SearchDataClauseDist(SClType tp, string txt, int slack) + : SearchDataClauseSimple(tp, txt), m_slack(slack) {} virtual ~SearchDataClauseDist() {} virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang); protected: - int m_distance; + int m_slack; }; class SearchDataClauseSub : public SearchDataClause {