autosuffs featurelet
This commit is contained in:
parent
f3b0b49c77
commit
176b9b19b6
@ -1412,6 +1412,14 @@ fvwm
|
||||
</para>
|
||||
</formalpara>
|
||||
|
||||
<formalpara><title>Automatic suffixes</title>
|
||||
<para>Words like <literal>odt</literal> or <literal>ods</literal>
|
||||
can be automatically turned into query language
|
||||
<literal>ext:xxx</literal> clauses. This can be enabled in the
|
||||
<guilabel>Search preferences</guilabel> panel in the GUI.
|
||||
</para>
|
||||
</formalpara>
|
||||
|
||||
<formalpara><title>Disabling stem expansion</title>
|
||||
<para>Entering a capitalized word in any search field will prevent
|
||||
stem expansion (no search for
|
||||
@ -1545,7 +1553,8 @@ fvwm
|
||||
interface itself, the parameters used for searching and
|
||||
returning results, and what indexes are searched.</para>
|
||||
|
||||
<formalpara><title>User interface parameters:</title>
|
||||
<formalpara id="rcl.search.custom.ui">
|
||||
<title>User interface parameters:</title>
|
||||
<para>
|
||||
<itemizedlist>
|
||||
|
||||
@ -1639,7 +1648,8 @@ fvwm
|
||||
</formalpara>
|
||||
|
||||
|
||||
<formalpara><title>Search parameters:</title>
|
||||
<formalpara id="rcl.search.custom.search">
|
||||
<title>Search parameters:</title>
|
||||
<para>
|
||||
<itemizedlist>
|
||||
|
||||
@ -1678,12 +1688,6 @@ fvwm
|
||||
may want to turn it off.</para>
|
||||
</listitem>
|
||||
|
||||
<listitem><para><guilabel>Replace abstracts from
|
||||
documents</guilabel>: this decides if we should synthesize and
|
||||
display an abstract in place of an explicit abstract found
|
||||
within the document itself.</para>
|
||||
</listitem>
|
||||
|
||||
<listitem><para><guilabel>Synthetic abstract size</guilabel>:
|
||||
adjust to taste...</para>
|
||||
</listitem>
|
||||
@ -1693,6 +1697,13 @@ fvwm
|
||||
each term occurrence.</para>
|
||||
</listitem>
|
||||
|
||||
<listitem><para><guilabel>Query language magic file name
|
||||
suffixes</guilabel>: a list of words which automatically get
|
||||
turned into <literal>ext:xxx</literal> file name suffix clauses
|
||||
when starting a query language query (ie: <literal>doc xls
|
||||
xlsx...</literal>). This will save some typing for people who
|
||||
use file types a lot when querying.</para>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</formalpara>
|
||||
|
||||
@ -144,6 +144,9 @@ void rwSettings(bool writing)
|
||||
Num, 250);
|
||||
SETTING_RW(prefs.syntAbsCtx, "/Recoll/prefs/query/syntAbsCtx",
|
||||
Num, 4);
|
||||
SETTING_RW(prefs.autoSuffs, "/Recoll/prefs/query/autoSuffs", , "");
|
||||
SETTING_RW(prefs.autoSuffsEnable,
|
||||
"/Recoll/prefs/query/autoSuffsEnable", Bool, false);
|
||||
|
||||
SETTING_RW(prefs.sortDepth, "/Recoll/prefs/query/sortDepth",
|
||||
Num, 100);
|
||||
@ -255,5 +258,4 @@ void rwSettings(bool writing)
|
||||
for (list<string>::iterator it = tl.begin(); it != tl.end(); it++)
|
||||
prefs.asearchSubdirHist.push_front(QString::fromUtf8(it->c_str()));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -86,8 +86,7 @@ class PrefsPack {
|
||||
bool startWithSortToolOpen;
|
||||
bool previewHtml;
|
||||
bool collapseDuplicates;
|
||||
// Extra query indexes. This are encoded to base64 before storing
|
||||
// to the qt settings file to avoid any bin string/ charset conv issues
|
||||
// Extra query indexes. This are stored in the history file, not qt prefs
|
||||
list<string> allExtraDbs;
|
||||
list<string> activeExtraDbs;
|
||||
// Advanced search subdir restriction: we don't activate the last value
|
||||
@ -100,6 +99,10 @@ class PrefsPack {
|
||||
// Ignored file types in adv search (startup default)
|
||||
QStringList asearchIgnFilTyps;
|
||||
bool fileTypesByCats;
|
||||
// Words that are automatically turned to ext:xx specs in the query
|
||||
// language entry.
|
||||
QString autoSuffs;
|
||||
bool autoSuffsEnable;
|
||||
|
||||
// Synthetized abstract length and word context size
|
||||
int syntAbsLen;
|
||||
|
||||
@ -129,7 +129,10 @@ void SSearch::startSimpleSearch()
|
||||
|
||||
if (tp == SST_LANG) {
|
||||
string reason;
|
||||
sdata = wasaStringToRcl(u8, reason);
|
||||
if (prefs.autoSuffsEnable)
|
||||
sdata = wasaStringToRcl(u8, reason, (const char *)prefs.autoSuffs.utf8());
|
||||
else
|
||||
sdata = wasaStringToRcl(u8, reason);
|
||||
if (sdata == 0) {
|
||||
QMessageBox::warning(0, "Recoll", tr("Bad query string") +
|
||||
QString::fromAscii(reason.c_str()));
|
||||
|
||||
@ -561,6 +561,56 @@ May be slow for big documents.</string>
|
||||
<enum>Horizontal</enum>
|
||||
</property>
|
||||
</widget>
|
||||
|
||||
|
||||
<widget class="QLayoutWidget">
|
||||
<property name="name">
|
||||
<cstring>layoutautosuffs</cstring>
|
||||
</property>
|
||||
<hbox>
|
||||
<property name="name">
|
||||
<cstring>unnamed</cstring>
|
||||
</property>
|
||||
<widget class="QLabel">
|
||||
<property name="name">
|
||||
<cstring>textLabel14</cstring>
|
||||
</property>
|
||||
<property name="toolTip" stdset="0">
|
||||
<string>The words in the list will be automatically turned to ext:xxx clauses in the query language entry.</string>
|
||||
</property>
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy>
|
||||
<hsizetype>5</hsizetype>
|
||||
<vsizetype>5</vsizetype>
|
||||
<horstretch>1</horstretch>
|
||||
<verstretch>0</verstretch>
|
||||
</sizepolicy>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>Query language magic file name suffixes.</string>
|
||||
</property>
|
||||
</widget>
|
||||
<widget class="QCheckBox">
|
||||
<property name="name">
|
||||
<cstring>autoSuffsCB</cstring>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>Enable</string>
|
||||
</property>
|
||||
</widget>
|
||||
<widget class="QLineEdit">
|
||||
<property name="name">
|
||||
<cstring>autoSuffsLE</cstring>
|
||||
</property>
|
||||
<property name="minimumSize">
|
||||
<size>
|
||||
<width>30</width>
|
||||
<height>0</height>
|
||||
</size>
|
||||
</property>
|
||||
</widget>
|
||||
</hbox>
|
||||
</widget>
|
||||
<spacer>
|
||||
<property name="name">
|
||||
<cstring>spacer2</cstring>
|
||||
|
||||
@ -158,6 +158,9 @@ void UIPrefsDialog::setFromPrefs()
|
||||
replAbsCB->setEnabled(prefs.queryBuildAbstract);
|
||||
replAbsCB->setChecked(prefs.queryReplaceAbstract);
|
||||
|
||||
autoSuffsCB->setChecked(prefs.autoSuffsEnable);
|
||||
autoSuffsLE->setText(prefs.autoSuffs);
|
||||
|
||||
// Initialize the extra indexes listboxes
|
||||
idxLV->clear();
|
||||
for (list<string>::iterator it = prefs.allExtraDbs.begin();
|
||||
@ -223,6 +226,10 @@ void UIPrefsDialog::accept()
|
||||
prefs.syntAbsLen = syntlenSB->value();
|
||||
prefs.syntAbsCtx = syntctxSB->value();
|
||||
|
||||
|
||||
prefs.autoSuffsEnable = autoSuffsCB->isChecked();
|
||||
prefs.autoSuffs = autoSuffsLE->text();
|
||||
|
||||
QListViewItemIterator it(idxLV);
|
||||
prefs.allExtraDbs.clear();
|
||||
prefs.activeExtraDbs.clear();
|
||||
|
||||
@ -33,13 +33,14 @@ using std::list;
|
||||
#include "refcntr.h"
|
||||
#include "textsplit.h"
|
||||
|
||||
Rcl::SearchData *wasaStringToRcl(const string &qs, string &reason)
|
||||
Rcl::SearchData *wasaStringToRcl(const string &qs, string &reason,
|
||||
const string& autosuffs)
|
||||
{
|
||||
StringToWasaQuery parser;
|
||||
WasaQuery *wq = parser.stringToQuery(qs, reason);
|
||||
if (wq == 0)
|
||||
return 0;
|
||||
Rcl::SearchData *rq = wasaQueryToRcl(wq);
|
||||
Rcl::SearchData *rq = wasaQueryToRcl(wq, autosuffs);
|
||||
if (rq == 0) {
|
||||
reason = "Failed translating wasa query structure to recoll";
|
||||
return 0;
|
||||
@ -47,7 +48,8 @@ Rcl::SearchData *wasaStringToRcl(const string &qs, string &reason)
|
||||
return rq;
|
||||
}
|
||||
|
||||
Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa)
|
||||
Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa,
|
||||
const string& autosuffs)
|
||||
{
|
||||
if (wasa == 0)
|
||||
return 0;
|
||||
@ -75,8 +77,9 @@ Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa)
|
||||
case WasaQuery::OP_LEAF: {
|
||||
LOGDEB2(("wasaQueryToRcl: leaf clause [%s]:[%s]\n",
|
||||
(*it)->m_fieldspec.c_str(), (*it)->m_value.c_str()));
|
||||
unsigned int mods = (unsigned int)(*it)->m_modifiers;
|
||||
|
||||
// Special cases (mime, category, dir filter ...). Not pretty.
|
||||
|
||||
if (!stringicmp("mime", (*it)->m_fieldspec) ||
|
||||
!stringicmp("format", (*it)->m_fieldspec)
|
||||
) {
|
||||
@ -103,6 +106,23 @@ Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa)
|
||||
break;
|
||||
}
|
||||
|
||||
// Change terms found in the "autosuffs" list into "ext"
|
||||
// field queries
|
||||
if ((*it)->m_fieldspec.empty() && !autosuffs.empty()) {
|
||||
vector<string> asfv;
|
||||
if (stringToStrings(autosuffs, asfv)) {
|
||||
if (find_if(asfv.begin(), asfv.end(),
|
||||
StringIcmpPred((*it)->m_value)) != asfv.end()) {
|
||||
(*it)->m_fieldspec = "ext";
|
||||
(*it)->m_modifiers |= WasaQuery::WQM_NOSTEM;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// "Regular" processing follows:
|
||||
unsigned int mods = (unsigned int)(*it)->m_modifiers;
|
||||
|
||||
if (TextSplit::hasVisibleWhite((*it)->m_value)) {
|
||||
int slack = (mods & WasaQuery::WQM_PHRASESLACK) ? 10 : 0;
|
||||
Rcl::SClType tp = Rcl::SCLT_PHRASE;
|
||||
@ -122,8 +142,7 @@ Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa)
|
||||
LOGERR(("wasaQueryToRcl: out of memory\n"));
|
||||
return 0;
|
||||
}
|
||||
if ((*it)->m_modifiers & WasaQuery::WQM_NOSTEM) {
|
||||
fprintf(stderr, "Setting NOSTEM\n");
|
||||
if (mods & WasaQuery::WQM_NOSTEM) {
|
||||
nclause->setModifiers(Rcl::SearchDataClause::SDCM_NOSTEMMING);
|
||||
}
|
||||
sdata->addClause(nclause);
|
||||
|
||||
@ -24,8 +24,10 @@ using std::string;
|
||||
#include "rcldb.h"
|
||||
#include "searchdata.h"
|
||||
|
||||
extern Rcl::SearchData *wasaStringToRcl(const string& query, string &reason);
|
||||
extern Rcl::SearchData *wasaStringToRcl(const string& query, string &reason,
|
||||
const string& autosuffs = string());
|
||||
class WasaQuery;
|
||||
extern Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa);
|
||||
extern Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa,
|
||||
const string& autosuffs = string());
|
||||
|
||||
#endif /* _WASATORCL_H_INCLUDED_ */
|
||||
|
||||
@ -174,7 +174,8 @@ bool samecharset(const string &cs1, const string &cs2)
|
||||
return mcs1 == mcs2;
|
||||
}
|
||||
|
||||
template <class T> bool stringToStrings(const string &s, T &tokens)
|
||||
template <class T> bool stringToStrings(const string &s, T &tokens,
|
||||
const string& addseps)
|
||||
{
|
||||
string current;
|
||||
tokens.clear();
|
||||
@ -237,17 +238,34 @@ template <class T> bool stringToStrings(const string &s, T &tokens)
|
||||
break;
|
||||
|
||||
default:
|
||||
switch(state) {
|
||||
case ESCAPE:
|
||||
state = INQUOTE;
|
||||
break;
|
||||
case SPACE:
|
||||
state = TOKEN;
|
||||
break;
|
||||
case TOKEN:
|
||||
case INQUOTE:
|
||||
break;
|
||||
}
|
||||
if (!addseps.empty() && addseps.find(s[i]) != string::npos) {
|
||||
switch(state) {
|
||||
case ESCAPE:
|
||||
state = INQUOTE;
|
||||
break;
|
||||
case INQUOTE:
|
||||
break;
|
||||
case SPACE:
|
||||
tokens.insert(tokens.end(), string(1, s[i]));
|
||||
continue;
|
||||
case TOKEN:
|
||||
tokens.insert(tokens.end(), current);
|
||||
current.erase();
|
||||
tokens.insert(tokens.end(), string(1, s[i]));
|
||||
state = SPACE;
|
||||
continue;
|
||||
}
|
||||
} else switch(state) {
|
||||
case ESCAPE:
|
||||
state = INQUOTE;
|
||||
break;
|
||||
case SPACE:
|
||||
state = TOKEN;
|
||||
break;
|
||||
case TOKEN:
|
||||
case INQUOTE:
|
||||
break;
|
||||
}
|
||||
current += s[i];
|
||||
}
|
||||
}
|
||||
@ -263,17 +281,20 @@ template <class T> bool stringToStrings(const string &s, T &tokens)
|
||||
}
|
||||
return true;
|
||||
}
|
||||
bool stringToStrings(const string &s, list<string> &tokens)
|
||||
bool stringToStrings(const string &s, list<string> &tokens,
|
||||
const string& as)
|
||||
{
|
||||
return stringToStrings<list<string> >(s, tokens);
|
||||
return stringToStrings<list<string> >(s, tokens, as);
|
||||
}
|
||||
bool stringToStrings(const string &s, vector<string> &tokens)
|
||||
bool stringToStrings(const string &s, vector<string> &tokens,
|
||||
const string& as)
|
||||
{
|
||||
return stringToStrings<vector<string> >(s, tokens);
|
||||
return stringToStrings<vector<string> >(s, tokens, as);
|
||||
}
|
||||
bool stringToStrings(const string &s, set<string> &tokens)
|
||||
bool stringToStrings(const string &s, set<string> &tokens,
|
||||
const string& as)
|
||||
{
|
||||
return stringToStrings<set<string> >(s, tokens);
|
||||
return stringToStrings<set<string> >(s, tokens, as);
|
||||
}
|
||||
|
||||
template <class T> void stringsToString(const T &tokens, string &s)
|
||||
@ -729,10 +750,28 @@ struct spair suffpairs[] = {
|
||||
};
|
||||
int nsuffpairs = sizeof(suffpairs) / sizeof(struct spair);
|
||||
|
||||
const char *thisprog;
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
#if 0
|
||||
thisprog = *argv++;argc--;
|
||||
|
||||
#if 1
|
||||
if (argc <=0 ) {
|
||||
cerr << "Usage: smallut <stringtosplit>" << endl;
|
||||
exit(1);
|
||||
}
|
||||
string s = *argv++;argc--;
|
||||
vector<string> vs;
|
||||
if (!stringToStrings(s, vs, ":-()")) {
|
||||
cerr << "Bad entry" << endl;
|
||||
exit(1);
|
||||
}
|
||||
for (vector<string>::const_iterator it = vs.begin(); it != vs.end(); it++)
|
||||
cerr << "[" << *it << "] ";
|
||||
cerr << endl;
|
||||
exit(0);
|
||||
#elif 0
|
||||
for (int i = 0; i < npairs; i++) {
|
||||
{
|
||||
int c = stringicmp(pairs[i].s1, pairs[i].s2);
|
||||
@ -768,7 +807,7 @@ int main(int argc, char **argv)
|
||||
cout << "[" << neutchars(testit, "\r\n") << "]" << endl;
|
||||
string i, o;
|
||||
cout << "neutchars(null) is [" << neutchars(i, "\r\n") << "]" << endl;
|
||||
#elif 1
|
||||
#elif 0
|
||||
map<string, string> substs;
|
||||
substs["a"] = "A_SUBST";
|
||||
substs["title"] = "TITLE_SUBST";
|
||||
|
||||
@ -35,6 +35,17 @@ using std::set;
|
||||
|
||||
// Note these are all ascii routines
|
||||
extern int stringicmp(const string& s1, const string& s2);
|
||||
// For find_if etc.
|
||||
struct StringIcmpPred {
|
||||
StringIcmpPred(const string& s1)
|
||||
: m_s1(s1)
|
||||
{}
|
||||
bool operator()(const string& s2) {
|
||||
return stringicmp(m_s1, s2) == 0;
|
||||
}
|
||||
const string& m_s1;
|
||||
};
|
||||
|
||||
extern int stringlowercmp(const string& alreadylower, const string& s2);
|
||||
extern int stringuppercmp(const string& alreadyupper, const string& s2);
|
||||
extern void stringtolower(string& io);
|
||||
@ -52,11 +63,15 @@ extern bool samecharset(const string &cs1, const string &cs2);
|
||||
* dquotes can be escaped with \ etc...
|
||||
* Input is handled a byte at a time, things will work as long as space tab etc.
|
||||
* have the ascii values and can't appear as part of a multibyte char. utf-8 ok
|
||||
* but so are the iso-8859-x and surely others.
|
||||
* but so are the iso-8859-x and surely others. addseps do have to be
|
||||
* single-bytes
|
||||
*/
|
||||
extern bool stringToStrings(const string &s, list<string> &tokens);
|
||||
extern bool stringToStrings(const string &s, vector<string> &tokens);
|
||||
extern bool stringToStrings(const string &s, set<string> &tokens);
|
||||
extern bool stringToStrings(const string& s, list<string> &tokens,
|
||||
const string& addseps = "");
|
||||
extern bool stringToStrings(const string& s, vector<string> &tokens,
|
||||
const string& addseps = "");
|
||||
extern bool stringToStrings(const string& s, set<string> &tokens,
|
||||
const string& addseps = "");
|
||||
|
||||
/**
|
||||
* Inverse operation:
|
||||
@ -78,7 +93,7 @@ extern bool stringToBool(const string &s);
|
||||
tab}) at beginning and end of input string */
|
||||
extern void trimstring(string &s, const char *ws = " \t");
|
||||
|
||||
/** Escape things like < or & by turining them to entities */
|
||||
/** Escape things like < or & by turning them into entities */
|
||||
extern string escapeHtml(const string &in);
|
||||
|
||||
/** Replace some chars with spaces (ie: newline chars). This is not utf8-aware
|
||||
@ -86,8 +101,8 @@ extern string escapeHtml(const string &in);
|
||||
extern string neutchars(const string &str, const string &chars);
|
||||
extern void neutchars(const string &str, string& out, const string &chars);
|
||||
|
||||
/** turn string into something that won't be expanded by a shell. In practise
|
||||
* quote with single-quotes and escape internal singlequotes */
|
||||
/** Turn string into something that won't be expanded by a shell. In practise
|
||||
* quote with double-quotes and escape $`\ */
|
||||
extern string escapeShell(const string &str);
|
||||
|
||||
/** Truncate a string to a given maxlength, avoiding cutting off midword
|
||||
@ -108,6 +123,7 @@ bool pcSubst(const string& in, string& out, map<char, string>& subs);
|
||||
/** Substitute printf-like percents and also %(key) */
|
||||
bool pcSubst(const string& in, string& out, map<string, string>& subs);
|
||||
|
||||
/** Compute times to help with perf issues */
|
||||
class Chrono {
|
||||
public:
|
||||
Chrono();
|
||||
@ -130,8 +146,8 @@ class Chrono {
|
||||
long m_nsecs;
|
||||
};
|
||||
|
||||
class TempBuf {
|
||||
public:
|
||||
/** Temp buffer with automatic deallocation */
|
||||
struct TempBuf {
|
||||
TempBuf()
|
||||
: m_buf(0)
|
||||
{}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user