added code to specifically index/search file names

2006-03-20 16:05:41 +00:00 · 2006-03-20 16:05:41 +00:00 · d4852f3b0d
commit d4852f3b0d
parent f96fcd6dd3
10 changed files with 259 additions and 145 deletions
--- a/src/VERSION
+++ b/src/VERSION
@ -1 +1 @@
-1.2.3
+1.3.1
--- a/src/index/indexer.cpp
+++ b/src/index/indexer.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: indexer.cpp,v 1.24 2006-01-26 07:02:06 dockes Exp $ (C) 2004 J.F.Dockes";
+static char rcsid[] = "@(#$Id: indexer.cpp,v 1.25 2006-03-20 16:05:41 dockes Exp $ (C) 2004 J.F.Dockes";
 #endif
 /*
 *   This program is free software; you can redistribute it and/or modify
@ -236,6 +236,11 @@ DbIndexer::processone(const std::string &fn, const struct stat *stp,
 	// Internal access path for multi-document files
 	doc.ipath = ipath;
 	// File name transcoded to utf8 for indexation. 
 	// We actually might want a separate param for the filename charset
 	string charset = config->getDefCharset();
 	// If this fails, the path won't be indexed, no big deal
 	transcode(fn, doc.utf8fn, charset, "UTF-8");
 	// Do database-specific work to update document data
 	if (!db.add(fn, doc, stp)) 
 	    return FsTreeWalker::FtwError;
--- a/src/internfile/internfile.cpp
+++ b/src/internfile/internfile.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: internfile.cpp,v 1.15 2006-01-23 13:32:28 dockes Exp $ (C) 2004 J.F.Dockes";
+static char rcsid[] = "@(#$Id: internfile.cpp,v 1.16 2006-03-20 16:05:41 dockes Exp $ (C) 2004 J.F.Dockes";
 #endif
 /*
 *   This program is free software; you can redistribute it and/or modify
@ -125,41 +125,42 @@ FileInterner::FileInterner(const std::string &f, RclConfig *cnf,
    // for a compressed file.
    m_mime = mimetype(m_fn, m_cfg, usfci);
-    // If identification fails, try to use the input parameter. Note that this 
+    // If identification fails, try to use the input parameter. This
-    // is normally not a compressed type (it's the mime type from the db)
+    // is then normally not a compressed type (it's the mime type from
    // the db), and is only set when previewing, not for indexing
    if (m_mime.empty() && imime)
 	m_mime = *imime;
    if (!m_mime.empty()) {
 	// Has mime: check for a compressed file. If so, create a
 	// temporary uncompressed file, and rerun the mime type
 	// identification, then do the rest with the temp file.
 	list<string>ucmd;
 	if (m_cfg->getUncompressor(m_mime, ucmd)) {
 	    if (!uncompressfile(m_cfg, m_fn, ucmd, m_tdir, m_tfile)) {
 		return;
 	    }
 	    LOGDEB(("internfile: after ucomp: m_tdir %s, tfile %s\n", 
 		    m_tdir.c_str(), m_tfile.c_str()));
 	    m_fn = m_tfile;
 	    m_mime = mimetype(m_fn, m_cfg, usfci);
 	    if (m_mime.empty() && imime)
 		m_mime = *imime;
 	}
    }
    if (m_mime.empty()) {
-	// No mime type: not listed in our map, or present in stop list
+	// No mime type. We let it through as config may warrant that
-	LOGDEB(("FileInterner::FileInterner: (no mime) [%s]\n", m_fn.c_str()));
+	// we index all file names
-	return;
+	LOGDEB(("internfile: (no mime) [%s]\n", m_fn.c_str()));
    }
-    // First check for a compressed file. If so, create a temporary
+    // Look for appropriate handler (might still return empty)
    // uncompressed file, and rerun the mime type identification, then do the
    // rest with the temp file.
    list<string>ucmd;
    if (m_cfg->getUncompressor(m_mime, ucmd)) {
 	if (!uncompressfile(m_cfg, m_fn, ucmd, m_tdir, m_tfile)) {
 	    return;
 	}
 	LOGDEB(("internfile: after ucomp: m_tdir %s, tfile %s\n", 
 		m_tdir.c_str(), m_tfile.c_str()));
 	m_fn = m_tfile;
 	m_mime = mimetype(m_fn, m_cfg, usfci);
 	if (m_mime.empty() && imime)
 	    m_mime = *imime;
 	if (m_mime.empty()) {
 	    // No mime type ?? pass on.
 	    LOGDEB(("internfile: (no mime) [%s]\n", m_fn.c_str()));
 	    return;
 	}
    }
    // Look for appropriate handler
    m_handler = getMimeHandler(m_mime, m_cfg);
    if (!m_handler) {
-	// No handler for this type, for now :(
+	// No handler for this type, for now :( if indexallfilenames
 	// is set in the config, this normally wont happen (we get mh_unknown)
 	LOGDEB(("FileInterner::FileInterner: %s: no handler\n", 
 		m_mime.c_str()));
 	return;
--- a/src/internfile/mimehandler.cpp
+++ b/src/internfile/mimehandler.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: mimehandler.cpp,v 1.16 2006-01-23 13:32:28 dockes Exp $ (C) 2004 J.F.Dockes";
+static char rcsid[] = "@(#$Id: mimehandler.cpp,v 1.17 2006-03-20 16:05:41 dockes Exp $ (C) 2004 J.F.Dockes";
 #endif
 /*
 *   This program is free software; you can redistribute it and/or modify
@ -31,6 +31,7 @@ using namespace std;
 #include "mh_mail.h"
 #include "mh_text.h"
 #include "mh_exec.h"
 #include "mh_unknown.h"
 /** Create internal handler object appropriate for given mime type */
 static MimeHandler *mhFactory(const string &mime)
@ -52,35 +53,48 @@ static MimeHandler *mhFactory(const string &mime)
 MimeHandler *getMimeHandler(const string &mtype, RclConfig *cfg)
 {
    // Get handler definition for mime type
-    string hs = cfg->getMimeHandlerDef(mtype);
+    string hs;
-    if (hs.empty())
+    if (!mtype.empty())
-	return 0;
+	hs = cfg->getMimeHandlerDef(mtype);
-    // Break definition into type and name 
+    if (!hs.empty()) {
-    list<string> toks;
+	// Break definition into type and name 
-    stringToStrings(hs, toks);
+	list<string> toks;
-    if (toks.empty()) {
+	stringToStrings(hs, toks);
-	LOGERR(("getMimeHandler: bad mimeconf line for %s\n", mtype.c_str()));
+	if (toks.empty()) {
-	return 0;
+	    LOGERR(("getMimeHandler: bad mimeconf line for %s\n", 
-    }
+		    mtype.c_str()));
    // Retrieve handler function according to type
    if (!stringlowercmp("internal", toks.front())) {
 	return mhFactory(mtype);
    } else if (!stringlowercmp("dll", toks.front())) {
 	return 0;
    } else if (!stringlowercmp("exec", toks.front())) {
 	if (toks.size() < 2) {
 	    LOGERR(("getMimeHandler: bad line for %s: %s\n", mtype.c_str(),
 		    hs.c_str()));
 	    return 0;
 	}
-	MimeHandlerExec *h = new MimeHandlerExec;
+
-	list<string>::const_iterator it1 = toks.begin();
+	// Retrieve handler function according to type
-	it1++;
+	if (!stringlowercmp("internal", toks.front())) {
-	for (;it1 != toks.end();it1++)
+	    return mhFactory(mtype);
-	    h->params.push_back(*it1);
+	} else if (!stringlowercmp("dll", toks.front())) {
-	return h;
+	} else if (!stringlowercmp("exec", toks.front())) {
 	    if (toks.size() < 2) {
 		LOGERR(("getMimeHandler: bad line for %s: %s\n", 
 			mtype.c_str(), hs.c_str()));
 		return 0;
 	    }
 	    MimeHandlerExec *h = new MimeHandlerExec;
 	    list<string>::const_iterator it1 = toks.begin();
 	    it1++;
 	    for (;it1 != toks.end();it1++)
 		h->params.push_back(*it1);
 	    return h;
 	}
    }
    // We are supposed to get here if there was no specific error, but
    // there is no identified mime type, or no handler
    // associated. These files are either ignored or their name is
    // indexed, depending on configuration
    bool indexunknown = false;
    cfg->getConfParam("indexallfilenames", &indexunknown);
    if (indexunknown) {
 	return new MimeHandlerUnknown;
    } else {
 	return 0;
    }
    return 0;
 }
--- a/src/qtgui/advsearch.ui
+++ b/src/qtgui/advsearch.ui
@ -24,7 +24,7 @@
        </property>
        <widget class="QLayoutWidget">
            <property name="name">
-                <cstring>layout13</cstring>
+                <cstring>layout12</cstring>
            </property>
            <vbox>
                <property name="name">
@ -32,18 +32,12 @@
                </property>
                <widget class="QLayoutWidget">
                    <property name="name">
-                        <cstring>layout15</cstring>
+                        <cstring>layout11</cstring>
                    </property>
                    <hbox>
                        <property name="name">
                            <cstring>unnamed</cstring>
                        </property>
                        <property name="margin">
                            <number>10</number>
                        </property>
                        <property name="spacing">
                            <number>10</number>
                        </property>
                        <widget class="QLabel">
                            <property name="name">
                                <cstring>textLabel2</cstring>
@ -54,12 +48,54 @@
                        </widget>
                        <widget class="QLayoutWidget">
                            <property name="name">
-                                <cstring>layout14</cstring>
+                                <cstring>layout10</cstring>
                            </property>
                            <grid>
                                <property name="name">
                                    <cstring>unnamed</cstring>
                                </property>
                                <widget class="QLabel" row="2" column="0" rowspan="1" colspan="2">
                                    <property name="name">
                                        <cstring>orWordsTL</cstring>
                                    </property>
                                    <property name="text">
                                        <string>Any of these words</string>
                                    </property>
                                </widget>
                                <widget class="QLineEdit" row="2" column="2">
                                    <property name="name">
                                        <cstring>orWordsLE</cstring>
                                    </property>
                                </widget>
                                <widget class="QLabel" row="4" column="0">
                                    <property name="name">
                                        <cstring>textLabel1_2</cstring>
                                    </property>
                                    <property name="text">
                                        <string>File name</string>
                                    </property>
                                </widget>
                                <widget class="QLineEdit" row="4" column="2">
                                    <property name="name">
                                        <cstring>fileNameLE</cstring>
                                    </property>
                                </widget>
                                <widget class="QLabel" row="3" column="0" rowspan="1" colspan="2">
                                    <property name="name">
                                        <cstring>noWordsTL</cstring>
                                    </property>
                                    <property name="text">
                                        <string>None of these words</string>
                                    </property>
                                </widget>
                                <widget class="QLineEdit" row="3" column="2">
                                    <property name="name">
                                        <cstring>noWordsLE</cstring>
                                    </property>
                                    <property name="text">
                                        <string></string>
                                    </property>
                                </widget>
                                <widget class="QLabel" row="0" column="0">
                                    <property name="name">
                                        <cstring>andWordsTL</cstring>
@ -74,7 +110,7 @@
                                        <string>All these words</string>
                                    </property>
                                </widget>
-                                <widget class="QLineEdit" row="0" column="1" rowspan="1" colspan="3">
+                                <widget class="QLineEdit" row="0" column="1" rowspan="1" colspan="2">
                                    <property name="name">
                                        <cstring>andWordsLE</cstring>
                                    </property>
@ -93,40 +129,11 @@
                                        <string>This exact phrase</string>
                                    </property>
                                </widget>
-                                <widget class="QLineEdit" row="1" column="2" rowspan="1" colspan="2">
+                                <widget class="QLineEdit" row="1" column="2">
                                    <property name="name">
                                        <cstring>phraseLE</cstring>
                                    </property>
                                </widget>
                                <widget class="QLabel" row="2" column="0" rowspan="1" colspan="2">
                                    <property name="name">
                                        <cstring>orWordsTL</cstring>
                                    </property>
                                    <property name="text">
                                        <string>Any of these words</string>
                                    </property>
                                </widget>
                                <widget class="QLineEdit" row="2" column="2" rowspan="1" colspan="2">
                                    <property name="name">
                                        <cstring>orWordsLE</cstring>
                                    </property>
                                </widget>
                                <widget class="QLabel" row="3" column="0" rowspan="1" colspan="3">
                                    <property name="name">
                                        <cstring>noWordsTL</cstring>
                                    </property>
                                    <property name="text">
                                        <string>None of these words</string>
                                    </property>
                                </widget>
                                <widget class="QLineEdit" row="3" column="3">
                                    <property name="name">
                                        <cstring>noWordsLE</cstring>
                                    </property>
                                    <property name="text">
                                        <string></string>
                                    </property>
                                </widget>
                            </grid>
                        </widget>
                    </hbox>
@ -353,20 +360,6 @@
                        </widget>
                    </grid>
                </widget>
                <widget class="Line">
                    <property name="name">
                        <cstring>line1</cstring>
                    </property>
                    <property name="frameShape">
                        <enum>HLine</enum>
                    </property>
                    <property name="frameShadow">
                        <enum>Sunken</enum>
                    </property>
                    <property name="orientation">
                        <enum>Horizontal</enum>
                    </property>
                </widget>
                <widget class="QLayoutWidget">
                    <property name="name">
                        <cstring>layout25</cstring>
@ -398,6 +391,20 @@
                </widget>
            </vbox>
        </widget>
        <widget class="Line">
            <property name="name">
                <cstring>line1</cstring>
            </property>
            <property name="frameShape">
                <enum>HLine</enum>
            </property>
            <property name="frameShadow">
                <enum>Sunken</enum>
            </property>
            <property name="orientation">
                <enum>Horizontal</enum>
            </property>
        </widget>
    </vbox>
 </widget>
 <connections>
--- a/src/qtgui/advsearch.ui.h
+++ b/src/qtgui/advsearch.ui.h
@ -131,6 +131,7 @@ void advsearch::searchPB_clicked()
    mydata.phrase  = string((const char*)(phraseLE->text().utf8()));
    mydata.orwords = string((const char*)(orWordsLE->text().utf8()));
    mydata.nowords = string((const char*)(noWordsLE->text().utf8()));
    mydata.filename = string((const char*)(fileNameLE->text().utf8()));
    if (restrictFtCB->isOn() && noFiltypsLB->count() > 0) {
 	for (unsigned int i = 0; i < yesFiltypsLB->count(); i++) {
 	    QCString ctext = yesFiltypsLB->item(i)->text().utf8();
--- a/src/qtgui/ssearchb.ui
+++ b/src/qtgui/ssearchb.ui
@ -75,6 +75,23 @@
                        <string>If this is set, each returned document will contain all the terms in the query. Else documents will be ordered by relevance, but may not contain all the terms.</string>
                    </property>
                </widget>
                <widget class="QCheckBox">
                    <property name="name">
                        <cstring>isFNameCB</cstring>
                    </property>
                    <property name="text">
                        <string>&amp;File name</string>
                    </property>
                    <property name="accel">
                        <string>Alt+F</string>
                    </property>
                    <property name="toolTip" stdset="0">
                        <string>Search is on file names only, and may use wildcards.</string>
                    </property>
                    <property name="whatsThis" stdset="0">
                        <string>If this is set, the search will only be performed on file names. Wildcards ? and * can be used and will be matched as in a shell command line.</string>
                    </property>
                </widget>
                <widget class="QLineEdit">
                    <property name="name">
                        <cstring>queryText</cstring>
--- a/src/qtgui/ssearchb.ui.h
+++ b/src/qtgui/ssearchb.ui.h
@ -44,9 +44,11 @@ void SSearchBase::startSimpleSearch()
    LOGDEB(("SSearchBase::startSimpleSearch\n"));
    Rcl::AdvSearchData sdata;
    QCString u8 =  queryText->text().utf8();
-    if (allTermsCB->isChecked())
+
    if (isFNameCB->isChecked())
 	sdata.filename = u8;
    else if (allTermsCB->isChecked())
 	sdata.allwords = u8;
    else
 	sdata.orwords = u8;
--- a/src/rcldb/rcldb.cpp
+++ b/src/rcldb/rcldb.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.57 2006-02-07 10:26:49 dockes Exp $ (C) 2004 J.F.Dockes";
+static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.58 2006-03-20 16:05:41 dockes Exp $ (C) 2004 J.F.Dockes";
 #endif
 /*
 *   This program is free software; you can redistribute it and/or modify
@ -20,6 +20,7 @@ static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.57 2006-02-07 10:26:49 dockes Exp $
 #include <stdio.h>
 #include <unistd.h>
 #include <sys/stat.h>
 #include <fnmatch.h>
 #include <iostream>
 #include <string>
@ -287,6 +288,7 @@ bool Rcl::dumb_string(const string &in, string &out)
    if (!unacmaybefold(s1, out, "UTF-8", true)) {
 	LOGERR(("dumb_string: unac failed for %s\n", in.c_str()));
 	out.erase();
 	// See comment at start of func
 	return true;
    }
    return true;
@ -387,11 +389,9 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc,
    // /////// Split and index terms in document body and auxiliary fields
    string noacc;
-    // Split and index file name. This supposes that it's either ascii
+    // Split and index file path. Do we really want to do this? Or do
-    // or utf-8. If this fails, we just go on. We need a config
+    // it with the simple file name only ?
-    // parameter for file name charset.
+    if (dumb_string(doc.utf8fn, noacc)) {
    // Do we really want to fold case here ?
    if (dumb_string(fn, noacc)) {
 	splitter.text_to_words(noacc);
 	splitData.basepos += splitData.curpos + 100;
    }
@ -439,6 +439,14 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc,
    string pathterm  = "P" + hash;
    newdocument.add_term(pathterm);
    // Simple file name. This is used for file name searches only. We index
    // it with a term prefix
    string sfn = path_getsimple(doc.utf8fn);
    if (dumb_string(sfn, noacc) && !noacc.empty()) {
 	sfn = string("XSFN") + noacc;
 	newdocument.add_term(sfn);
    }
    // Internal path: with path, makes unique identifier for documents
    // inside multidocument files.
    string uniterm;
@ -992,7 +1000,7 @@ bool Rcl::Db::setQuery(const std::string &iqstring, QueryOpts opts,
    Native *ndb = (Native *)pdata;
    if (!ndb)
 	return false;
-    asdata.erase();
+    m_asdata.erase();
    dbindices.clear();
    list<Xapian::Query> pqueries;
    stringToXapianQueries(iqstring, stemlang, ndb, pqueries, opts);
@ -1023,7 +1031,7 @@ bool Rcl::Db::setQuery(AdvSearchData &sdata, QueryOpts opts,
    if (!sdata.topdir.empty())
 	LOGDEB((" restricted to: %s\n", sdata.topdir.c_str()));
-    asdata = sdata;
+    m_asdata = sdata;
    dbindices.clear();
    Native *ndb = (Native *)pdata;
@ -1031,12 +1039,62 @@ bool Rcl::Db::setQuery(AdvSearchData &sdata, QueryOpts opts,
 	return false;
    list<Xapian::Query> pqueries;
    Xapian::Query xq;
-    
+
    if (!sdata.filename.empty()) {
 	LOGDEB((" filename search\n"));
 	// File name search, with possible wildcards. 
 	// We expand wildcards by scanning the filename terms (prefixed 
        // with XSFN) from the database. 
 	// We build an OR query with the expanded values if any.
 	string pattern;
 	// We take the data either from allwords or orwords to avoid
 	// interaction with the allwords checkbox
 	dumb_string(sdata.filename, pattern);
 	// If pattern is not quoted, we add * at each end: match any
 	// substring
 	if (pattern[0] == '"' && pattern[pattern.size()-1] == '"')
 	    pattern = pattern.substr(1, pattern.size() -2);
 	else 
 	    pattern = "*" + pattern + "*";
 	LOGDEB((" pattern: [%s]\n", pattern.c_str()));
 	// Match pattern against all file names in the db
 	Xapian::TermIterator it = ndb->db.allterms_begin(); 
 	it.skip_to("XSFN");
 	list<string> names;
 	for (;it != ndb->db.allterms_end(); it++) {
 	    if ((*it).find("XSFN") != 0)
 		break;
 	    string fn = (*it).substr(4);
 	    LOGDEB2(("Matching [%s] and [%s]\n", pattern.c_str(), fn.c_str()));
 	    if (fnmatch(pattern.c_str(), fn.c_str(), 0) != FNM_NOMATCH) {
 		names.push_back((*it).c_str());
 	    }
 	    // Limit the match count
 	    if (names.size() > 1000) {
 		LOGERR(("Rcl::Db::SetQuery: too many matched file names\n"));
 		break;
 	    }
 	}
 	if (names.empty()) {
 	    // Build an impossible query: we know its impossible because we
 	    // control the prefixes!
 	    names.push_back("XIMPOSSIBLE");
 	}
 	// Build a query out of the matching file name terms.
 	xq = Xapian::Query(Xapian::Query::OP_OR, names.begin(), names.end());
    }
    if (!sdata.allwords.empty()) {
 	stringToXapianQueries(sdata.allwords, stemlang, ndb, pqueries, opts);
 	if (!pqueries.empty()) {
-	    xq = Xapian::Query(Xapian::Query::OP_AND, pqueries.begin(), 
+	    Xapian::Query nq = 
-			       pqueries.end());
+		Xapian::Query(Xapian::Query::OP_AND, pqueries.begin(),
 			      pqueries.end());
 	    xq = xq.empty() ? nq :
 		Xapian::Query(Xapian::Query::OP_AND, xq, nq);
 	    pqueries.clear();
 	}
    }
@ -1044,8 +1102,8 @@ bool Rcl::Db::setQuery(AdvSearchData &sdata, QueryOpts opts,
    if (!sdata.orwords.empty()) {
 	stringToXapianQueries(sdata.orwords, stemlang, ndb, pqueries, opts);
 	if (!pqueries.empty()) {
-	    Xapian::Query nq;
+	    Xapian::Query nq = 
-	    nq = Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(),
+		Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(),
 			       pqueries.end());
 	    xq = xq.empty() ? nq :
 		Xapian::Query(Xapian::Query::OP_AND_MAYBE, xq, nq);
@ -1157,7 +1215,7 @@ class Rcl::DbPops {
 	string url;
 	parms.get(string("url"), url);
 	url = url.substr(7);
-	if (url.find(rdb->asdata.topdir) == 0) 
+	if (url.find(rdb->m_asdata.topdir) == 0) 
 	    return true;
 	return false;
    }
@ -1215,8 +1273,8 @@ bool Rcl::Db::getDoc(int exti, Doc &doc, int *percent)
    }
    // For now the only post-query filter is on dir subtree
-    bool postqfilter = !asdata.topdir.empty();
+    bool postqfilter = !m_asdata.topdir.empty();
-    LOGDEB1(("Topdir %s postqflt %d\n", asdata.topdir.c_str(), postqfilter));
+    LOGDEB1(("Topdir %s postqflt %d\n", m_asdata.topdir.c_str(), postqfilter));
    int xapi;
    if (postqfilter) {
--- a/src/rcldb/rcldb.h
+++ b/src/rcldb/rcldb.h
@ -16,7 +16,7 @@
 */
 #ifndef _DB_H_INCLUDED_
 #define _DB_H_INCLUDED_
-/* @(#$Id: rcldb.h,v 1.25 2006-02-07 10:26:49 dockes Exp $  (C) 2004 J.F.Dockes */
+/* @(#$Id: rcldb.h,v 1.26 2006-03-20 16:05:41 dockes Exp $  (C) 2004 J.F.Dockes */
 #include <string>
 #include <list>
@ -52,26 +52,33 @@ namespace Rcl {
 class Doc {
 public:
    // These fields potentially go into the document data record
-    string url;
+    // We indicate the routine that sets them up during indexing
-    string ipath;
+    string url;          // Computed from fn by Db::add
-    string mimetype;
+    string utf8fn;       // Transcoded version of the file path. 
                         // Set by DbIndexer::processone
    string ipath;        // Set by DbIndexer::processone
    string mimetype;     // Set by FileInterner::internfile
    string fmtime;       // File modification time as decimal ascii unix time
                         // Set by DbIndexer::processone
    string dmtime;       // Data reference date (same format). Ie: mail date
-    string origcharset;
+                         // Possibly set by handler
-    string title;
+    string origcharset;  // Charset we transcoded from (in case we want back)
-    string keywords;
+                         // Possibly set by handler
-    string abstract;
+    string title;        // Possibly set by handler
-    string fbytes;        // File size
+    string keywords;     // Possibly set by handler
-    string dbytes;        // Doc size
+    string abstract;     // Possibly set by handler
    string fbytes;       // File size. Set by Db::Add
    string dbytes;       // Doc size. Set by Db::Add from text length
-    // The following fields don't go to the db. text is only used when
+    // The following fields don't go to the db record
-    // indexing
+    
-    string text;
+    string text; // text is split and indexed 
    int pc; // used by sortseq, convenience
    void erase() {
 	url.erase();
 	utf8fn.erase();
 	ipath.erase();
 	mimetype.erase();
 	fmtime.erase();
@ -96,6 +103,7 @@ class AdvSearchData {
    string phrase;
    string orwords;
    string nowords;
    string filename; 
    list<string> filetypes; // restrict to types. Empty if inactive
    string topdir; // restrict to subtree. Empty if inactive
    string description; // Printable expanded version of the complete query
@ -107,6 +115,7 @@ class AdvSearchData {
 	nowords.erase();
 	filetypes.clear(); 
 	topdir.erase();
 	filename.erase();
 	description.erase();
    }
 };
@ -167,7 +176,7 @@ class Db {
 private:
-    AdvSearchData asdata;
+    AdvSearchData m_asdata;
    vector<int> dbindices; // In case there is a postq filter: sequence of 
                           // db indices that match
    void *pdata; // Pointer to private data. We don't want db(ie