From 176b9b19b648bfed93e185c3827c0fee6fce0ab8 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Fri, 10 Sep 2010 09:50:11 +0200 Subject: [PATCH] autosuffs featurelet --- src/doc/user/usermanual.sgml | 27 ++++++++---- src/qtgui/guiutils.cpp | 4 +- src/qtgui/guiutils.h | 7 +++- src/qtgui/ssearch_w.cpp | 5 ++- src/qtgui/uiprefs.ui | 50 +++++++++++++++++++++++ src/qtgui/uiprefs_w.cpp | 7 ++++ src/query/wasatorcl.cpp | 31 +++++++++++--- src/query/wasatorcl.h | 6 ++- src/utils/smallut.cpp | 79 +++++++++++++++++++++++++++--------- src/utils/smallut.h | 34 ++++++++++++---- 10 files changed, 201 insertions(+), 49 deletions(-) diff --git a/src/doc/user/usermanual.sgml b/src/doc/user/usermanual.sgml index 2eb75fd0..6aa7ac45 100644 --- a/src/doc/user/usermanual.sgml +++ b/src/doc/user/usermanual.sgml @@ -1412,6 +1412,14 @@ fvwm + Automatic suffixes + Words like odt or ods + can be automatically turned into query language + ext:xxx clauses. This can be enabled in the + Search preferences panel in the GUI. + + + Disabling stem expansion Entering a capitalized word in any search field will prevent stem expansion (no search for @@ -1545,7 +1553,8 @@ fvwm interface itself, the parameters used for searching and returning results, and what indexes are searched. - User interface parameters: + + User interface parameters: @@ -1639,7 +1648,8 @@ fvwm - Search parameters: + + Search parameters: @@ -1678,12 +1688,6 @@ fvwm may want to turn it off. - Replace abstracts from - documents: this decides if we should synthesize and - display an abstract in place of an explicit abstract found - within the document itself. - - Synthetic abstract size: adjust to taste... @@ -1693,6 +1697,13 @@ fvwm each term occurrence. + Query language magic file name + suffixes: a list of words which automatically get + turned into ext:xxx file name suffix clauses + when starting a query language query (ie: doc xls + xlsx...). This will save some typing for people who + use file types a lot when querying. + diff --git a/src/qtgui/guiutils.cpp b/src/qtgui/guiutils.cpp index ee655a35..9406c191 100644 --- a/src/qtgui/guiutils.cpp +++ b/src/qtgui/guiutils.cpp @@ -144,6 +144,9 @@ void rwSettings(bool writing) Num, 250); SETTING_RW(prefs.syntAbsCtx, "/Recoll/prefs/query/syntAbsCtx", Num, 4); + SETTING_RW(prefs.autoSuffs, "/Recoll/prefs/query/autoSuffs", , ""); + SETTING_RW(prefs.autoSuffsEnable, + "/Recoll/prefs/query/autoSuffsEnable", Bool, false); SETTING_RW(prefs.sortDepth, "/Recoll/prefs/query/sortDepth", Num, 100); @@ -255,5 +258,4 @@ void rwSettings(bool writing) for (list::iterator it = tl.begin(); it != tl.end(); it++) prefs.asearchSubdirHist.push_front(QString::fromUtf8(it->c_str())); } - } diff --git a/src/qtgui/guiutils.h b/src/qtgui/guiutils.h index 0c8bdfeb..8d9f9442 100644 --- a/src/qtgui/guiutils.h +++ b/src/qtgui/guiutils.h @@ -86,8 +86,7 @@ class PrefsPack { bool startWithSortToolOpen; bool previewHtml; bool collapseDuplicates; - // Extra query indexes. This are encoded to base64 before storing - // to the qt settings file to avoid any bin string/ charset conv issues + // Extra query indexes. This are stored in the history file, not qt prefs list allExtraDbs; list activeExtraDbs; // Advanced search subdir restriction: we don't activate the last value @@ -100,6 +99,10 @@ class PrefsPack { // Ignored file types in adv search (startup default) QStringList asearchIgnFilTyps; bool fileTypesByCats; + // Words that are automatically turned to ext:xx specs in the query + // language entry. + QString autoSuffs; + bool autoSuffsEnable; // Synthetized abstract length and word context size int syntAbsLen; diff --git a/src/qtgui/ssearch_w.cpp b/src/qtgui/ssearch_w.cpp index 62ebb2c2..6c09131d 100644 --- a/src/qtgui/ssearch_w.cpp +++ b/src/qtgui/ssearch_w.cpp @@ -129,7 +129,10 @@ void SSearch::startSimpleSearch() if (tp == SST_LANG) { string reason; - sdata = wasaStringToRcl(u8, reason); + if (prefs.autoSuffsEnable) + sdata = wasaStringToRcl(u8, reason, (const char *)prefs.autoSuffs.utf8()); + else + sdata = wasaStringToRcl(u8, reason); if (sdata == 0) { QMessageBox::warning(0, "Recoll", tr("Bad query string") + QString::fromAscii(reason.c_str())); diff --git a/src/qtgui/uiprefs.ui b/src/qtgui/uiprefs.ui index 1957ffdc..527d2fb6 100644 --- a/src/qtgui/uiprefs.ui +++ b/src/qtgui/uiprefs.ui @@ -561,6 +561,56 @@ May be slow for big documents. Horizontal + + + + + layoutautosuffs + + + + unnamed + + + + textLabel14 + + + The words in the list will be automatically turned to ext:xxx clauses in the query language entry. + + + + 5 + 5 + 1 + 0 + + + + Query language magic file name suffixes. + + + + + autoSuffsCB + + + Enable + + + + + autoSuffsLE + + + + 30 + 0 + + + + + spacer2 diff --git a/src/qtgui/uiprefs_w.cpp b/src/qtgui/uiprefs_w.cpp index fdbda44a..d3b0ac78 100644 --- a/src/qtgui/uiprefs_w.cpp +++ b/src/qtgui/uiprefs_w.cpp @@ -158,6 +158,9 @@ void UIPrefsDialog::setFromPrefs() replAbsCB->setEnabled(prefs.queryBuildAbstract); replAbsCB->setChecked(prefs.queryReplaceAbstract); + autoSuffsCB->setChecked(prefs.autoSuffsEnable); + autoSuffsLE->setText(prefs.autoSuffs); + // Initialize the extra indexes listboxes idxLV->clear(); for (list::iterator it = prefs.allExtraDbs.begin(); @@ -223,6 +226,10 @@ void UIPrefsDialog::accept() prefs.syntAbsLen = syntlenSB->value(); prefs.syntAbsCtx = syntctxSB->value(); + + prefs.autoSuffsEnable = autoSuffsCB->isChecked(); + prefs.autoSuffs = autoSuffsLE->text(); + QListViewItemIterator it(idxLV); prefs.allExtraDbs.clear(); prefs.activeExtraDbs.clear(); diff --git a/src/query/wasatorcl.cpp b/src/query/wasatorcl.cpp index 76e55762..7214c21f 100644 --- a/src/query/wasatorcl.cpp +++ b/src/query/wasatorcl.cpp @@ -33,13 +33,14 @@ using std::list; #include "refcntr.h" #include "textsplit.h" -Rcl::SearchData *wasaStringToRcl(const string &qs, string &reason) +Rcl::SearchData *wasaStringToRcl(const string &qs, string &reason, + const string& autosuffs) { StringToWasaQuery parser; WasaQuery *wq = parser.stringToQuery(qs, reason); if (wq == 0) return 0; - Rcl::SearchData *rq = wasaQueryToRcl(wq); + Rcl::SearchData *rq = wasaQueryToRcl(wq, autosuffs); if (rq == 0) { reason = "Failed translating wasa query structure to recoll"; return 0; @@ -47,7 +48,8 @@ Rcl::SearchData *wasaStringToRcl(const string &qs, string &reason) return rq; } -Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa) +Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa, + const string& autosuffs) { if (wasa == 0) return 0; @@ -75,8 +77,9 @@ Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa) case WasaQuery::OP_LEAF: { LOGDEB2(("wasaQueryToRcl: leaf clause [%s]:[%s]\n", (*it)->m_fieldspec.c_str(), (*it)->m_value.c_str())); - unsigned int mods = (unsigned int)(*it)->m_modifiers; + // Special cases (mime, category, dir filter ...). Not pretty. + if (!stringicmp("mime", (*it)->m_fieldspec) || !stringicmp("format", (*it)->m_fieldspec) ) { @@ -103,6 +106,23 @@ Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa) break; } + // Change terms found in the "autosuffs" list into "ext" + // field queries + if ((*it)->m_fieldspec.empty() && !autosuffs.empty()) { + vector asfv; + if (stringToStrings(autosuffs, asfv)) { + if (find_if(asfv.begin(), asfv.end(), + StringIcmpPred((*it)->m_value)) != asfv.end()) { + (*it)->m_fieldspec = "ext"; + (*it)->m_modifiers |= WasaQuery::WQM_NOSTEM; + } + } + } + + + // "Regular" processing follows: + unsigned int mods = (unsigned int)(*it)->m_modifiers; + if (TextSplit::hasVisibleWhite((*it)->m_value)) { int slack = (mods & WasaQuery::WQM_PHRASESLACK) ? 10 : 0; Rcl::SClType tp = Rcl::SCLT_PHRASE; @@ -122,8 +142,7 @@ Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa) LOGERR(("wasaQueryToRcl: out of memory\n")); return 0; } - if ((*it)->m_modifiers & WasaQuery::WQM_NOSTEM) { - fprintf(stderr, "Setting NOSTEM\n"); + if (mods & WasaQuery::WQM_NOSTEM) { nclause->setModifiers(Rcl::SearchDataClause::SDCM_NOSTEMMING); } sdata->addClause(nclause); diff --git a/src/query/wasatorcl.h b/src/query/wasatorcl.h index 8f9d45f1..48a98afe 100644 --- a/src/query/wasatorcl.h +++ b/src/query/wasatorcl.h @@ -24,8 +24,10 @@ using std::string; #include "rcldb.h" #include "searchdata.h" -extern Rcl::SearchData *wasaStringToRcl(const string& query, string &reason); +extern Rcl::SearchData *wasaStringToRcl(const string& query, string &reason, + const string& autosuffs = string()); class WasaQuery; -extern Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa); +extern Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa, + const string& autosuffs = string()); #endif /* _WASATORCL_H_INCLUDED_ */ diff --git a/src/utils/smallut.cpp b/src/utils/smallut.cpp index 2ae63c1d..847f823a 100644 --- a/src/utils/smallut.cpp +++ b/src/utils/smallut.cpp @@ -174,7 +174,8 @@ bool samecharset(const string &cs1, const string &cs2) return mcs1 == mcs2; } -template bool stringToStrings(const string &s, T &tokens) +template bool stringToStrings(const string &s, T &tokens, + const string& addseps) { string current; tokens.clear(); @@ -237,17 +238,34 @@ template bool stringToStrings(const string &s, T &tokens) break; default: - switch(state) { - case ESCAPE: - state = INQUOTE; - break; - case SPACE: - state = TOKEN; - break; - case TOKEN: - case INQUOTE: - break; - } + if (!addseps.empty() && addseps.find(s[i]) != string::npos) { + switch(state) { + case ESCAPE: + state = INQUOTE; + break; + case INQUOTE: + break; + case SPACE: + tokens.insert(tokens.end(), string(1, s[i])); + continue; + case TOKEN: + tokens.insert(tokens.end(), current); + current.erase(); + tokens.insert(tokens.end(), string(1, s[i])); + state = SPACE; + continue; + } + } else switch(state) { + case ESCAPE: + state = INQUOTE; + break; + case SPACE: + state = TOKEN; + break; + case TOKEN: + case INQUOTE: + break; + } current += s[i]; } } @@ -263,17 +281,20 @@ template bool stringToStrings(const string &s, T &tokens) } return true; } -bool stringToStrings(const string &s, list &tokens) +bool stringToStrings(const string &s, list &tokens, + const string& as) { - return stringToStrings >(s, tokens); + return stringToStrings >(s, tokens, as); } -bool stringToStrings(const string &s, vector &tokens) +bool stringToStrings(const string &s, vector &tokens, + const string& as) { - return stringToStrings >(s, tokens); + return stringToStrings >(s, tokens, as); } -bool stringToStrings(const string &s, set &tokens) +bool stringToStrings(const string &s, set &tokens, + const string& as) { - return stringToStrings >(s, tokens); + return stringToStrings >(s, tokens, as); } template void stringsToString(const T &tokens, string &s) @@ -729,10 +750,28 @@ struct spair suffpairs[] = { }; int nsuffpairs = sizeof(suffpairs) / sizeof(struct spair); +const char *thisprog; int main(int argc, char **argv) { -#if 0 + thisprog = *argv++;argc--; + +#if 1 + if (argc <=0 ) { + cerr << "Usage: smallut " << endl; + exit(1); + } + string s = *argv++;argc--; + vector vs; + if (!stringToStrings(s, vs, ":-()")) { + cerr << "Bad entry" << endl; + exit(1); + } + for (vector::const_iterator it = vs.begin(); it != vs.end(); it++) + cerr << "[" << *it << "] "; + cerr << endl; + exit(0); +#elif 0 for (int i = 0; i < npairs; i++) { { int c = stringicmp(pairs[i].s1, pairs[i].s2); @@ -768,7 +807,7 @@ int main(int argc, char **argv) cout << "[" << neutchars(testit, "\r\n") << "]" << endl; string i, o; cout << "neutchars(null) is [" << neutchars(i, "\r\n") << "]" << endl; -#elif 1 +#elif 0 map substs; substs["a"] = "A_SUBST"; substs["title"] = "TITLE_SUBST"; diff --git a/src/utils/smallut.h b/src/utils/smallut.h index 8bb0dd32..14f13c9a 100644 --- a/src/utils/smallut.h +++ b/src/utils/smallut.h @@ -35,6 +35,17 @@ using std::set; // Note these are all ascii routines extern int stringicmp(const string& s1, const string& s2); +// For find_if etc. +struct StringIcmpPred { + StringIcmpPred(const string& s1) + : m_s1(s1) + {} + bool operator()(const string& s2) { + return stringicmp(m_s1, s2) == 0; + } + const string& m_s1; +}; + extern int stringlowercmp(const string& alreadylower, const string& s2); extern int stringuppercmp(const string& alreadyupper, const string& s2); extern void stringtolower(string& io); @@ -52,11 +63,15 @@ extern bool samecharset(const string &cs1, const string &cs2); * dquotes can be escaped with \ etc... * Input is handled a byte at a time, things will work as long as space tab etc. * have the ascii values and can't appear as part of a multibyte char. utf-8 ok - * but so are the iso-8859-x and surely others. + * but so are the iso-8859-x and surely others. addseps do have to be + * single-bytes */ -extern bool stringToStrings(const string &s, list &tokens); -extern bool stringToStrings(const string &s, vector &tokens); -extern bool stringToStrings(const string &s, set &tokens); +extern bool stringToStrings(const string& s, list &tokens, + const string& addseps = ""); +extern bool stringToStrings(const string& s, vector &tokens, + const string& addseps = ""); +extern bool stringToStrings(const string& s, set &tokens, + const string& addseps = ""); /** * Inverse operation: @@ -78,7 +93,7 @@ extern bool stringToBool(const string &s); tab}) at beginning and end of input string */ extern void trimstring(string &s, const char *ws = " \t"); -/** Escape things like < or & by turining them to entities */ +/** Escape things like < or & by turning them into entities */ extern string escapeHtml(const string &in); /** Replace some chars with spaces (ie: newline chars). This is not utf8-aware @@ -86,8 +101,8 @@ extern string escapeHtml(const string &in); extern string neutchars(const string &str, const string &chars); extern void neutchars(const string &str, string& out, const string &chars); -/** turn string into something that won't be expanded by a shell. In practise - * quote with single-quotes and escape internal singlequotes */ +/** Turn string into something that won't be expanded by a shell. In practise + * quote with double-quotes and escape $`\ */ extern string escapeShell(const string &str); /** Truncate a string to a given maxlength, avoiding cutting off midword @@ -108,6 +123,7 @@ bool pcSubst(const string& in, string& out, map& subs); /** Substitute printf-like percents and also %(key) */ bool pcSubst(const string& in, string& out, map& subs); +/** Compute times to help with perf issues */ class Chrono { public: Chrono(); @@ -130,8 +146,8 @@ class Chrono { long m_nsecs; }; -class TempBuf { -public: +/** Temp buffer with automatic deallocation */ +struct TempBuf { TempBuf() : m_buf(0) {}