autosuffs featurelet

2010-09-10 09:50:11 +02:00 · 2010-09-10 09:50:11 +02:00 · 176b9b19b6
commit 176b9b19b6
parent f3b0b49c77
10 changed files with 201 additions and 49 deletions
--- a/src/doc/user/usermanual.sgml
+++ b/src/doc/user/usermanual.sgml
@ -1412,6 +1412,14 @@ fvwm
          </para>
      </formalpara>

+      <formalpara><title>Automatic suffixes</title>
+          <para>Words like <literal>odt</literal> or <literal>ods</literal>
+            can be automatically turned into query language
+            <literal>ext:xxx</literal> clauses. This can be enabled in the
+            <guilabel>Search preferences</guilabel> panel in the GUI.
+          </para>
+      </formalpara>
+
      <formalpara><title>Disabling stem expansion</title>
      <para>Entering a capitalized word in any search field will prevent
        stem expansion (no search for
@ -1545,7 +1553,8 @@ fvwm
      interface itself, the parameters used for searching and
      returning results, and what indexes are searched.</para> 

-      <formalpara><title>User interface parameters:</title>
+      <formalpara id="rcl.search.custom.ui">
+       <title>User interface parameters:</title>
        <para>
      <itemizedlist>

@ -1639,7 +1648,8 @@ fvwm
      </formalpara>


-      <formalpara><title>Search parameters:</title>
+      <formalpara id="rcl.search.custom.search">
+	<title>Search parameters:</title>
        <para>
      <itemizedlist>

@ -1678,12 +1688,6 @@ fvwm
            may want to turn it off.</para>
            </listitem>

-            <listitem><para><guilabel>Replace abstracts from
-            documents</guilabel>: this decides if we should synthesize and
-            display an abstract in place of an explicit abstract found
-            within the document itself.</para>
-            </listitem>
-
            <listitem><para><guilabel>Synthetic abstract size</guilabel>:
            adjust to taste...</para>
            </listitem>
@ -1693,6 +1697,13 @@ fvwm
            each term occurrence.</para>
            </listitem>

+            <listitem><para><guilabel>Query language magic file name 
+              suffixes</guilabel>: a list of words which automatically get
+              turned into <literal>ext:xxx</literal> file name suffix clauses
+              when starting a query language query (ie: <literal>doc xls
+              xlsx...</literal>). This will save some typing for people who
+              use file types a lot when querying.</para>
+            </listitem>
      </itemizedlist>
       </para>
      </formalpara>
--- a/src/qtgui/guiutils.cpp
+++ b/src/qtgui/guiutils.cpp
@ -144,6 +144,9 @@ void rwSettings(bool writing)
 	       Num, 250);
    SETTING_RW(prefs.syntAbsCtx, "/Recoll/prefs/query/syntAbsCtx", 
 	       Num, 4);
+    SETTING_RW(prefs.autoSuffs, "/Recoll/prefs/query/autoSuffs", , "");
+    SETTING_RW(prefs.autoSuffsEnable, 
+	       "/Recoll/prefs/query/autoSuffsEnable", Bool, false);

    SETTING_RW(prefs.sortDepth, "/Recoll/prefs/query/sortDepth",
 	       Num, 100);
@ -255,5 +258,4 @@ void rwSettings(bool writing)
 	for (list<string>::iterator it = tl.begin(); it != tl.end(); it++)
 	    prefs.asearchSubdirHist.push_front(QString::fromUtf8(it->c_str()));
    }
-
 }
--- a/src/qtgui/guiutils.h
+++ b/src/qtgui/guiutils.h
@ -86,8 +86,7 @@ class PrefsPack {
    bool startWithSortToolOpen;
    bool previewHtml;
    bool collapseDuplicates;
-    // Extra query indexes. This are encoded to base64 before storing
-    // to the qt settings file to avoid any bin string/ charset conv issues
+    // Extra query indexes. This are stored in the history file, not qt prefs
    list<string> allExtraDbs;
    list<string> activeExtraDbs;
    // Advanced search subdir restriction: we don't activate the last value
@ -100,6 +99,10 @@ class PrefsPack {
    // Ignored file types in adv search (startup default)
    QStringList asearchIgnFilTyps;
    bool        fileTypesByCats;
+    // Words that are automatically turned to ext:xx specs in the query
+    // language entry. 
+    QString autoSuffs;
+    bool    autoSuffsEnable;

    // Synthetized abstract length and word context size
    int syntAbsLen;
--- a/src/qtgui/ssearch_w.cpp
+++ b/src/qtgui/ssearch_w.cpp
@ -129,7 +129,10 @@ void SSearch::startSimpleSearch()

    if (tp == SST_LANG) {
 	string reason;
-	sdata = wasaStringToRcl(u8, reason);
+        if (prefs.autoSuffsEnable)
+            sdata = wasaStringToRcl(u8, reason, (const char *)prefs.autoSuffs.utf8());
+        else
+            sdata = wasaStringToRcl(u8, reason);
 	if (sdata == 0) {
 	    QMessageBox::warning(0, "Recoll", tr("Bad query string") +
 				 QString::fromAscii(reason.c_str()));
--- a/src/qtgui/uiprefs.ui
+++ b/src/qtgui/uiprefs.ui
@ -561,6 +561,56 @@ May be slow for big documents.</string>
                                            <enum>Horizontal</enum>
                                        </property>
                                    </widget>
+
+
+                                    <widget class="QLayoutWidget">
+                                        <property name="name">
+                                            <cstring>layoutautosuffs</cstring>
+                                        </property>
+                                        <hbox>
+                                            <property name="name">
+                                                <cstring>unnamed</cstring>
+                                            </property>
+                                            <widget class="QLabel">
+                                                <property name="name">
+                                                    <cstring>textLabel14</cstring>
+                                                </property>
+                                                <property name="toolTip" stdset="0">
+                                                <string>The words in the list will be automatically turned to ext:xxx clauses in the query language entry.</string>
+                                               </property>
+                                                <property name="sizePolicy">
+                                                    <sizepolicy>
+                                                        <hsizetype>5</hsizetype>
+                                                        <vsizetype>5</vsizetype>
+                                                        <horstretch>1</horstretch>
+                                                        <verstretch>0</verstretch>
+                                                    </sizepolicy>
+                                                </property>
+                                                <property name="text">
+                                                    <string>Query language magic file name suffixes.</string>
+                                                </property>
+                                            </widget>
+                                    	    <widget class="QCheckBox">
+                                              <property name="name">
+                                                <cstring>autoSuffsCB</cstring>
+                                              </property>
+                                              <property name="text">
+                                                <string>Enable</string>
+                                              </property>
+                                            </widget>
+                                            <widget class="QLineEdit">
+                                                <property name="name">
+                                                    <cstring>autoSuffsLE</cstring>
+                                                </property>
+                                                <property name="minimumSize">
+                                                    <size>
+                                                        <width>30</width>
+                                                        <height>0</height>
+                                                    </size>
+                                                </property>
+                                            </widget>
+                                        </hbox>
+                                    </widget>
                                    <spacer>
                                        <property name="name">
                                            <cstring>spacer2</cstring>
--- a/src/qtgui/uiprefs_w.cpp
+++ b/src/qtgui/uiprefs_w.cpp
@ -158,6 +158,9 @@ void UIPrefsDialog::setFromPrefs()
    replAbsCB->setEnabled(prefs.queryBuildAbstract);
    replAbsCB->setChecked(prefs.queryReplaceAbstract);

+    autoSuffsCB->setChecked(prefs.autoSuffsEnable);
+    autoSuffsLE->setText(prefs.autoSuffs);
+
    // Initialize the extra indexes listboxes
    idxLV->clear();
    for (list<string>::iterator it = prefs.allExtraDbs.begin(); 
@ -223,6 +226,10 @@ void UIPrefsDialog::accept()
    prefs.syntAbsLen = syntlenSB->value();
    prefs.syntAbsCtx = syntctxSB->value();

+    
+    prefs.autoSuffsEnable = autoSuffsCB->isChecked();
+    prefs.autoSuffs = autoSuffsLE->text();
+
    QListViewItemIterator it(idxLV);
    prefs.allExtraDbs.clear();
    prefs.activeExtraDbs.clear();
--- a/src/query/wasatorcl.cpp
+++ b/src/query/wasatorcl.cpp
@ -33,13 +33,14 @@ using std::list;
 #include "refcntr.h"
 #include "textsplit.h"

-Rcl::SearchData *wasaStringToRcl(const string &qs, string &reason)
+Rcl::SearchData *wasaStringToRcl(const string &qs, string &reason, 
+                                 const string& autosuffs)
 {
    StringToWasaQuery parser;
    WasaQuery *wq = parser.stringToQuery(qs, reason);
    if (wq == 0) 
 	return 0;
-    Rcl::SearchData *rq = wasaQueryToRcl(wq);
+    Rcl::SearchData *rq = wasaQueryToRcl(wq, autosuffs);
    if (rq == 0) {
 	reason = "Failed translating wasa query structure to recoll";
 	return 0;
@ -47,7 +48,8 @@ Rcl::SearchData *wasaStringToRcl(const string &qs, string &reason)
    return rq;
 }

-Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa)
+Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa, 
+                                const string& autosuffs)
 {
    if (wasa == 0)
 	return 0;
@ -75,8 +77,9 @@ Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa)
 	case WasaQuery::OP_LEAF: {
 	    LOGDEB2(("wasaQueryToRcl: leaf clause [%s]:[%s]\n", 
 		     (*it)->m_fieldspec.c_str(), (*it)->m_value.c_str()));
-	    unsigned int mods = (unsigned int)(*it)->m_modifiers;
+
 	    // Special cases (mime, category, dir filter ...). Not pretty.
+
 	    if (!stringicmp("mime", (*it)->m_fieldspec) ||
 		!stringicmp("format", (*it)->m_fieldspec)
 		) {
@ -103,6 +106,23 @@ Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa)
 		break;
 	    } 

+            // Change terms found in the "autosuffs" list into "ext"
+            // field queries
+            if ((*it)->m_fieldspec.empty() && !autosuffs.empty()) {
+                vector<string> asfv;
+                if (stringToStrings(autosuffs, asfv)) {
+                    if (find_if(asfv.begin(), asfv.end(), 
+                                StringIcmpPred((*it)->m_value)) != asfv.end()) {
+                        (*it)->m_fieldspec = "ext";
+                        (*it)->m_modifiers |= WasaQuery::WQM_NOSTEM;
+                    }
+                }
+            }
+
+
+            // "Regular" processing follows:
+	    unsigned int mods = (unsigned int)(*it)->m_modifiers;
+
 	    if (TextSplit::hasVisibleWhite((*it)->m_value)) {
 		int slack = (mods & WasaQuery::WQM_PHRASESLACK) ? 10 : 0;
 		Rcl::SClType tp = Rcl::SCLT_PHRASE;
@ -122,8 +142,7 @@ Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa)
 		LOGERR(("wasaQueryToRcl: out of memory\n"));
 		return 0;
 	    }
-	    if ((*it)->m_modifiers & WasaQuery::WQM_NOSTEM) {
-		fprintf(stderr, "Setting NOSTEM\n");
+	    if (mods & WasaQuery::WQM_NOSTEM) {
 		nclause->setModifiers(Rcl::SearchDataClause::SDCM_NOSTEMMING);
 	    }
 	    sdata->addClause(nclause);
--- a/src/query/wasatorcl.h
+++ b/src/query/wasatorcl.h
@ -24,8 +24,10 @@ using std::string;
 #include "rcldb.h"
 #include "searchdata.h"

-extern Rcl::SearchData *wasaStringToRcl(const string& query, string &reason);
+extern Rcl::SearchData *wasaStringToRcl(const string& query, string &reason,
+                                        const string& autosuffs = string());
 class WasaQuery;
-extern Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa);
+extern Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa,
+                                       const string& autosuffs = string());

 #endif /* _WASATORCL_H_INCLUDED_ */
--- a/src/utils/smallut.cpp
+++ b/src/utils/smallut.cpp
@ -174,7 +174,8 @@ bool samecharset(const string &cs1, const string &cs2)
    return mcs1 == mcs2;
 }

-template <class T> bool stringToStrings(const string &s, T &tokens)
+template <class T> bool stringToStrings(const string &s, T &tokens, 
+                                        const string& addseps)
 {
    string current;
    tokens.clear();
@ -237,17 +238,34 @@ template <class T> bool stringToStrings(const string &s, T &tokens)
 	    break;

        default:
-	    switch(state) {
-            case ESCAPE:
-                state = INQUOTE;
-                break;
-            case SPACE: 
-                state = TOKEN;
-                break;
-            case TOKEN: 
-            case INQUOTE: 
-                break;
-	    }
+            if (!addseps.empty() && addseps.find(s[i]) != string::npos) {
+                switch(state) {
+                case ESCAPE:
+                    state = INQUOTE;
+                    break;
+                case INQUOTE: 
+                    break;
+                case SPACE: 
+                    tokens.insert(tokens.end(), string(1, s[i]));
+                    continue;
+                case TOKEN: 
+                    tokens.insert(tokens.end(), current);
+                    current.erase();
+                    tokens.insert(tokens.end(), string(1, s[i]));
+                    state = SPACE;
+                    continue;
+                }
+            } else switch(state) {
+                case ESCAPE:
+                    state = INQUOTE;
+                    break;
+                case SPACE: 
+                    state = TOKEN;
+                    break;
+                case TOKEN: 
+                case INQUOTE: 
+                    break;
+                }
 	    current += s[i];
 	}
    }
@ -263,17 +281,20 @@ template <class T> bool stringToStrings(const string &s, T &tokens)
    }
    return true;
 }
-bool stringToStrings(const string &s, list<string> &tokens)
+bool stringToStrings(const string &s, list<string> &tokens, 
+                     const string& as)
 {
-    return stringToStrings<list<string> >(s, tokens);
+    return stringToStrings<list<string> >(s, tokens, as);
 }
-bool stringToStrings(const string &s, vector<string> &tokens)
+bool stringToStrings(const string &s, vector<string> &tokens, 
+                     const string& as)
 {
-    return stringToStrings<vector<string> >(s, tokens);
+    return stringToStrings<vector<string> >(s, tokens, as);
 }
-bool stringToStrings(const string &s, set<string> &tokens)
+bool stringToStrings(const string &s, set<string> &tokens, 
+                     const string& as)
 {
-    return stringToStrings<set<string> >(s, tokens);
+    return stringToStrings<set<string> >(s, tokens, as);
 }

 template <class T> void stringsToString(const T &tokens, string &s) 
@ -729,10 +750,28 @@ struct spair suffpairs[] = {
 };
 int nsuffpairs = sizeof(suffpairs) / sizeof(struct spair);

+const char *thisprog;

 int main(int argc, char **argv)
 {
-#if 0
+    thisprog = *argv++;argc--;
+
+#if 1
+    if (argc <=0 ) {
+        cerr << "Usage: smallut <stringtosplit>" << endl;
+        exit(1);
+    }
+    string s = *argv++;argc--;
+    vector<string> vs;
+    if (!stringToStrings(s, vs, ":-()")) {
+        cerr << "Bad entry" << endl;
+        exit(1);
+    }
+    for (vector<string>::const_iterator it = vs.begin(); it != vs.end(); it++)
+        cerr << "[" << *it << "] ";
+    cerr << endl;
+    exit(0);
+#elif 0
    for (int i = 0; i < npairs; i++) {
 	{
 	    int c = stringicmp(pairs[i].s1, pairs[i].s2);
@ -768,7 +807,7 @@ int main(int argc, char **argv)
    cout << "[" << neutchars(testit, "\r\n") << "]" << endl;
    string i, o;
    cout << "neutchars(null) is [" << neutchars(i, "\r\n") << "]" << endl;
-#elif 1
+#elif 0
    map<string, string> substs;
    substs["a"] = "A_SUBST";
    substs["title"] = "TITLE_SUBST";
--- a/src/utils/smallut.h
+++ b/src/utils/smallut.h
@ -35,6 +35,17 @@ using std::set;

 // Note these are all ascii routines
 extern int stringicmp(const string& s1, const string& s2);
+// For find_if etc.
+struct StringIcmpPred {
+    StringIcmpPred(const string& s1) 
+        : m_s1(s1) 
+    {}
+    bool operator()(const string& s2) {
+        return stringicmp(m_s1, s2) == 0;
+    }
+    const string& m_s1;
+};
+
 extern int stringlowercmp(const string& alreadylower, const string& s2);
 extern int stringuppercmp(const string& alreadyupper, const string& s2); 
 extern void stringtolower(string& io);
@ -52,11 +63,15 @@ extern bool samecharset(const string &cs1, const string &cs2);
 * dquotes can be escaped with \ etc...
 * Input is handled a byte at a time, things will work as long as space tab etc.
 * have the ascii values and can't appear as part of a multibyte char. utf-8 ok
- * but so are the iso-8859-x and surely others.
+ * but so are the iso-8859-x and surely others. addseps do have to be 
+ * single-bytes
 */
-extern bool stringToStrings(const string &s, list<string> &tokens);
-extern bool stringToStrings(const string &s, vector<string> &tokens);
-extern bool stringToStrings(const string &s, set<string> &tokens);
+extern bool stringToStrings(const string& s, list<string> &tokens, 
+                            const string& addseps = "");
+extern bool stringToStrings(const string& s, vector<string> &tokens, 
+                            const string& addseps = "");
+extern bool stringToStrings(const string& s, set<string> &tokens, 
+                            const string& addseps = "");

 /**
 * Inverse operation:
@ -78,7 +93,7 @@ extern bool stringToBool(const string &s);
    tab}) at beginning and end of input string */
 extern void trimstring(string &s, const char *ws = " \t");

-/** Escape things like < or & by turining them to entities */
+/** Escape things like < or & by turning them into entities */
 extern string escapeHtml(const string &in);

 /** Replace some chars with spaces (ie: newline chars). This is not utf8-aware
@ -86,8 +101,8 @@ extern string escapeHtml(const string &in);
 extern string neutchars(const string &str, const string &chars);
 extern void neutchars(const string &str, string& out, const string &chars);

-/** turn string into something that won't be expanded by a shell. In practise
- * quote with single-quotes and escape internal singlequotes */
+/** Turn string into something that won't be expanded by a shell. In practise
+ *  quote with double-quotes and escape $`\ */
 extern string escapeShell(const string &str);

 /** Truncate a string to a given maxlength, avoiding cutting off midword
@ -108,6 +123,7 @@ bool pcSubst(const string& in, string& out, map<char, string>& subs);
 /** Substitute printf-like percents and also %(key) */
 bool pcSubst(const string& in, string& out, map<string, string>& subs);

+/** Compute times to help with perf issues */
 class Chrono {
 public:
  Chrono();
@ -130,8 +146,8 @@ class Chrono {
  long 	m_nsecs; 
 };

-class TempBuf {
-public:
+/** Temp buffer with automatic deallocation */
+struct TempBuf {
    TempBuf() 
        : m_buf(0)
    {}