autosuffs featurelet

This commit is contained in:
Jean-Francois Dockes 2010-09-10 09:50:11 +02:00
parent f3b0b49c77
commit 176b9b19b6
10 changed files with 201 additions and 49 deletions

View File

@ -1412,6 +1412,14 @@ fvwm
</para>
</formalpara>
<formalpara><title>Automatic suffixes</title>
<para>Words like <literal>odt</literal> or <literal>ods</literal>
can be automatically turned into query language
<literal>ext:xxx</literal> clauses. This can be enabled in the
<guilabel>Search preferences</guilabel> panel in the GUI.
</para>
</formalpara>
<formalpara><title>Disabling stem expansion</title>
<para>Entering a capitalized word in any search field will prevent
stem expansion (no search for
@ -1545,7 +1553,8 @@ fvwm
interface itself, the parameters used for searching and
returning results, and what indexes are searched.</para>
<formalpara><title>User interface parameters:</title>
<formalpara id="rcl.search.custom.ui">
<title>User interface parameters:</title>
<para>
<itemizedlist>
@ -1639,7 +1648,8 @@ fvwm
</formalpara>
<formalpara><title>Search parameters:</title>
<formalpara id="rcl.search.custom.search">
<title>Search parameters:</title>
<para>
<itemizedlist>
@ -1678,12 +1688,6 @@ fvwm
may want to turn it off.</para>
</listitem>
<listitem><para><guilabel>Replace abstracts from
documents</guilabel>: this decides if we should synthesize and
display an abstract in place of an explicit abstract found
within the document itself.</para>
</listitem>
<listitem><para><guilabel>Synthetic abstract size</guilabel>:
adjust to taste...</para>
</listitem>
@ -1693,6 +1697,13 @@ fvwm
each term occurrence.</para>
</listitem>
<listitem><para><guilabel>Query language magic file name
suffixes</guilabel>: a list of words which automatically get
turned into <literal>ext:xxx</literal> file name suffix clauses
when starting a query language query (ie: <literal>doc xls
xlsx...</literal>). This will save some typing for people who
use file types a lot when querying.</para>
</listitem>
</itemizedlist>
</para>
</formalpara>

View File

@ -144,6 +144,9 @@ void rwSettings(bool writing)
Num, 250);
SETTING_RW(prefs.syntAbsCtx, "/Recoll/prefs/query/syntAbsCtx",
Num, 4);
SETTING_RW(prefs.autoSuffs, "/Recoll/prefs/query/autoSuffs", , "");
SETTING_RW(prefs.autoSuffsEnable,
"/Recoll/prefs/query/autoSuffsEnable", Bool, false);
SETTING_RW(prefs.sortDepth, "/Recoll/prefs/query/sortDepth",
Num, 100);
@ -255,5 +258,4 @@ void rwSettings(bool writing)
for (list<string>::iterator it = tl.begin(); it != tl.end(); it++)
prefs.asearchSubdirHist.push_front(QString::fromUtf8(it->c_str()));
}
}

View File

@ -86,8 +86,7 @@ class PrefsPack {
bool startWithSortToolOpen;
bool previewHtml;
bool collapseDuplicates;
// Extra query indexes. This are encoded to base64 before storing
// to the qt settings file to avoid any bin string/ charset conv issues
// Extra query indexes. This are stored in the history file, not qt prefs
list<string> allExtraDbs;
list<string> activeExtraDbs;
// Advanced search subdir restriction: we don't activate the last value
@ -100,6 +99,10 @@ class PrefsPack {
// Ignored file types in adv search (startup default)
QStringList asearchIgnFilTyps;
bool fileTypesByCats;
// Words that are automatically turned to ext:xx specs in the query
// language entry.
QString autoSuffs;
bool autoSuffsEnable;
// Synthetized abstract length and word context size
int syntAbsLen;

View File

@ -129,7 +129,10 @@ void SSearch::startSimpleSearch()
if (tp == SST_LANG) {
string reason;
sdata = wasaStringToRcl(u8, reason);
if (prefs.autoSuffsEnable)
sdata = wasaStringToRcl(u8, reason, (const char *)prefs.autoSuffs.utf8());
else
sdata = wasaStringToRcl(u8, reason);
if (sdata == 0) {
QMessageBox::warning(0, "Recoll", tr("Bad query string") +
QString::fromAscii(reason.c_str()));

View File

@ -561,6 +561,56 @@ May be slow for big documents.</string>
<enum>Horizontal</enum>
</property>
</widget>
<widget class="QLayoutWidget">
<property name="name">
<cstring>layoutautosuffs</cstring>
</property>
<hbox>
<property name="name">
<cstring>unnamed</cstring>
</property>
<widget class="QLabel">
<property name="name">
<cstring>textLabel14</cstring>
</property>
<property name="toolTip" stdset="0">
<string>The words in the list will be automatically turned to ext:xxx clauses in the query language entry.</string>
</property>
<property name="sizePolicy">
<sizepolicy>
<hsizetype>5</hsizetype>
<vsizetype>5</vsizetype>
<horstretch>1</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="text">
<string>Query language magic file name suffixes.</string>
</property>
</widget>
<widget class="QCheckBox">
<property name="name">
<cstring>autoSuffsCB</cstring>
</property>
<property name="text">
<string>Enable</string>
</property>
</widget>
<widget class="QLineEdit">
<property name="name">
<cstring>autoSuffsLE</cstring>
</property>
<property name="minimumSize">
<size>
<width>30</width>
<height>0</height>
</size>
</property>
</widget>
</hbox>
</widget>
<spacer>
<property name="name">
<cstring>spacer2</cstring>

View File

@ -158,6 +158,9 @@ void UIPrefsDialog::setFromPrefs()
replAbsCB->setEnabled(prefs.queryBuildAbstract);
replAbsCB->setChecked(prefs.queryReplaceAbstract);
autoSuffsCB->setChecked(prefs.autoSuffsEnable);
autoSuffsLE->setText(prefs.autoSuffs);
// Initialize the extra indexes listboxes
idxLV->clear();
for (list<string>::iterator it = prefs.allExtraDbs.begin();
@ -223,6 +226,10 @@ void UIPrefsDialog::accept()
prefs.syntAbsLen = syntlenSB->value();
prefs.syntAbsCtx = syntctxSB->value();
prefs.autoSuffsEnable = autoSuffsCB->isChecked();
prefs.autoSuffs = autoSuffsLE->text();
QListViewItemIterator it(idxLV);
prefs.allExtraDbs.clear();
prefs.activeExtraDbs.clear();

View File

@ -33,13 +33,14 @@ using std::list;
#include "refcntr.h"
#include "textsplit.h"
Rcl::SearchData *wasaStringToRcl(const string &qs, string &reason)
Rcl::SearchData *wasaStringToRcl(const string &qs, string &reason,
const string& autosuffs)
{
StringToWasaQuery parser;
WasaQuery *wq = parser.stringToQuery(qs, reason);
if (wq == 0)
return 0;
Rcl::SearchData *rq = wasaQueryToRcl(wq);
Rcl::SearchData *rq = wasaQueryToRcl(wq, autosuffs);
if (rq == 0) {
reason = "Failed translating wasa query structure to recoll";
return 0;
@ -47,7 +48,8 @@ Rcl::SearchData *wasaStringToRcl(const string &qs, string &reason)
return rq;
}
Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa)
Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa,
const string& autosuffs)
{
if (wasa == 0)
return 0;
@ -75,8 +77,9 @@ Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa)
case WasaQuery::OP_LEAF: {
LOGDEB2(("wasaQueryToRcl: leaf clause [%s]:[%s]\n",
(*it)->m_fieldspec.c_str(), (*it)->m_value.c_str()));
unsigned int mods = (unsigned int)(*it)->m_modifiers;
// Special cases (mime, category, dir filter ...). Not pretty.
if (!stringicmp("mime", (*it)->m_fieldspec) ||
!stringicmp("format", (*it)->m_fieldspec)
) {
@ -103,6 +106,23 @@ Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa)
break;
}
// Change terms found in the "autosuffs" list into "ext"
// field queries
if ((*it)->m_fieldspec.empty() && !autosuffs.empty()) {
vector<string> asfv;
if (stringToStrings(autosuffs, asfv)) {
if (find_if(asfv.begin(), asfv.end(),
StringIcmpPred((*it)->m_value)) != asfv.end()) {
(*it)->m_fieldspec = "ext";
(*it)->m_modifiers |= WasaQuery::WQM_NOSTEM;
}
}
}
// "Regular" processing follows:
unsigned int mods = (unsigned int)(*it)->m_modifiers;
if (TextSplit::hasVisibleWhite((*it)->m_value)) {
int slack = (mods & WasaQuery::WQM_PHRASESLACK) ? 10 : 0;
Rcl::SClType tp = Rcl::SCLT_PHRASE;
@ -122,8 +142,7 @@ Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa)
LOGERR(("wasaQueryToRcl: out of memory\n"));
return 0;
}
if ((*it)->m_modifiers & WasaQuery::WQM_NOSTEM) {
fprintf(stderr, "Setting NOSTEM\n");
if (mods & WasaQuery::WQM_NOSTEM) {
nclause->setModifiers(Rcl::SearchDataClause::SDCM_NOSTEMMING);
}
sdata->addClause(nclause);

View File

@ -24,8 +24,10 @@ using std::string;
#include "rcldb.h"
#include "searchdata.h"
extern Rcl::SearchData *wasaStringToRcl(const string& query, string &reason);
extern Rcl::SearchData *wasaStringToRcl(const string& query, string &reason,
const string& autosuffs = string());
class WasaQuery;
extern Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa);
extern Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa,
const string& autosuffs = string());
#endif /* _WASATORCL_H_INCLUDED_ */

View File

@ -174,7 +174,8 @@ bool samecharset(const string &cs1, const string &cs2)
return mcs1 == mcs2;
}
template <class T> bool stringToStrings(const string &s, T &tokens)
template <class T> bool stringToStrings(const string &s, T &tokens,
const string& addseps)
{
string current;
tokens.clear();
@ -237,17 +238,34 @@ template <class T> bool stringToStrings(const string &s, T &tokens)
break;
default:
switch(state) {
case ESCAPE:
state = INQUOTE;
break;
case SPACE:
state = TOKEN;
break;
case TOKEN:
case INQUOTE:
break;
}
if (!addseps.empty() && addseps.find(s[i]) != string::npos) {
switch(state) {
case ESCAPE:
state = INQUOTE;
break;
case INQUOTE:
break;
case SPACE:
tokens.insert(tokens.end(), string(1, s[i]));
continue;
case TOKEN:
tokens.insert(tokens.end(), current);
current.erase();
tokens.insert(tokens.end(), string(1, s[i]));
state = SPACE;
continue;
}
} else switch(state) {
case ESCAPE:
state = INQUOTE;
break;
case SPACE:
state = TOKEN;
break;
case TOKEN:
case INQUOTE:
break;
}
current += s[i];
}
}
@ -263,17 +281,20 @@ template <class T> bool stringToStrings(const string &s, T &tokens)
}
return true;
}
bool stringToStrings(const string &s, list<string> &tokens)
bool stringToStrings(const string &s, list<string> &tokens,
const string& as)
{
return stringToStrings<list<string> >(s, tokens);
return stringToStrings<list<string> >(s, tokens, as);
}
bool stringToStrings(const string &s, vector<string> &tokens)
bool stringToStrings(const string &s, vector<string> &tokens,
const string& as)
{
return stringToStrings<vector<string> >(s, tokens);
return stringToStrings<vector<string> >(s, tokens, as);
}
bool stringToStrings(const string &s, set<string> &tokens)
bool stringToStrings(const string &s, set<string> &tokens,
const string& as)
{
return stringToStrings<set<string> >(s, tokens);
return stringToStrings<set<string> >(s, tokens, as);
}
template <class T> void stringsToString(const T &tokens, string &s)
@ -729,10 +750,28 @@ struct spair suffpairs[] = {
};
int nsuffpairs = sizeof(suffpairs) / sizeof(struct spair);
const char *thisprog;
int main(int argc, char **argv)
{
#if 0
thisprog = *argv++;argc--;
#if 1
if (argc <=0 ) {
cerr << "Usage: smallut <stringtosplit>" << endl;
exit(1);
}
string s = *argv++;argc--;
vector<string> vs;
if (!stringToStrings(s, vs, ":-()")) {
cerr << "Bad entry" << endl;
exit(1);
}
for (vector<string>::const_iterator it = vs.begin(); it != vs.end(); it++)
cerr << "[" << *it << "] ";
cerr << endl;
exit(0);
#elif 0
for (int i = 0; i < npairs; i++) {
{
int c = stringicmp(pairs[i].s1, pairs[i].s2);
@ -768,7 +807,7 @@ int main(int argc, char **argv)
cout << "[" << neutchars(testit, "\r\n") << "]" << endl;
string i, o;
cout << "neutchars(null) is [" << neutchars(i, "\r\n") << "]" << endl;
#elif 1
#elif 0
map<string, string> substs;
substs["a"] = "A_SUBST";
substs["title"] = "TITLE_SUBST";

View File

@ -35,6 +35,17 @@ using std::set;
// Note these are all ascii routines
extern int stringicmp(const string& s1, const string& s2);
// For find_if etc.
struct StringIcmpPred {
StringIcmpPred(const string& s1)
: m_s1(s1)
{}
bool operator()(const string& s2) {
return stringicmp(m_s1, s2) == 0;
}
const string& m_s1;
};
extern int stringlowercmp(const string& alreadylower, const string& s2);
extern int stringuppercmp(const string& alreadyupper, const string& s2);
extern void stringtolower(string& io);
@ -52,11 +63,15 @@ extern bool samecharset(const string &cs1, const string &cs2);
* dquotes can be escaped with \ etc...
* Input is handled a byte at a time, things will work as long as space tab etc.
* have the ascii values and can't appear as part of a multibyte char. utf-8 ok
* but so are the iso-8859-x and surely others.
* but so are the iso-8859-x and surely others. addseps do have to be
* single-bytes
*/
extern bool stringToStrings(const string &s, list<string> &tokens);
extern bool stringToStrings(const string &s, vector<string> &tokens);
extern bool stringToStrings(const string &s, set<string> &tokens);
extern bool stringToStrings(const string& s, list<string> &tokens,
const string& addseps = "");
extern bool stringToStrings(const string& s, vector<string> &tokens,
const string& addseps = "");
extern bool stringToStrings(const string& s, set<string> &tokens,
const string& addseps = "");
/**
* Inverse operation:
@ -78,7 +93,7 @@ extern bool stringToBool(const string &s);
tab}) at beginning and end of input string */
extern void trimstring(string &s, const char *ws = " \t");
/** Escape things like < or & by turining them to entities */
/** Escape things like < or & by turning them into entities */
extern string escapeHtml(const string &in);
/** Replace some chars with spaces (ie: newline chars). This is not utf8-aware
@ -86,8 +101,8 @@ extern string escapeHtml(const string &in);
extern string neutchars(const string &str, const string &chars);
extern void neutchars(const string &str, string& out, const string &chars);
/** turn string into something that won't be expanded by a shell. In practise
* quote with single-quotes and escape internal singlequotes */
/** Turn string into something that won't be expanded by a shell. In practise
* quote with double-quotes and escape $`\ */
extern string escapeShell(const string &str);
/** Truncate a string to a given maxlength, avoiding cutting off midword
@ -108,6 +123,7 @@ bool pcSubst(const string& in, string& out, map<char, string>& subs);
/** Substitute printf-like percents and also %(key) */
bool pcSubst(const string& in, string& out, map<string, string>& subs);
/** Compute times to help with perf issues */
class Chrono {
public:
Chrono();
@ -130,8 +146,8 @@ class Chrono {
long m_nsecs;
};
class TempBuf {
public:
/** Temp buffer with automatic deallocation */
struct TempBuf {
TempBuf()
: m_buf(0)
{}