From 63a29c7cedfe7280d25ae43e28c474601c95d249 Mon Sep 17 00:00:00 2001 From: dockes Date: Thu, 17 Mar 2005 15:35:49 +0000 Subject: [PATCH] only comments. Before multidoc files --- src/index/indexer.cpp | 32 +++++++++++++++++++++++--------- src/index/indexer.h | 9 +++++++-- src/internfile/internfile.cpp | 8 ++++++-- src/internfile/mh_html.h | 8 +++++++- 4 files changed, 43 insertions(+), 14 deletions(-) diff --git a/src/index/indexer.cpp b/src/index/indexer.cpp index e49b90da..6a0cb037 100644 --- a/src/index/indexer.cpp +++ b/src/index/indexer.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: indexer.cpp,v 1.7 2005-03-17 14:02:05 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: indexer.cpp,v 1.8 2005-03-17 15:35:49 dockes Exp $ (C) 2004 J.F.Dockes"; #endif #include #include @@ -32,7 +32,9 @@ using namespace std; #endif /** - * Bunch holder for data used while indexing a directory tree + * Bunch holder for data used while indexing a directory tree. This also the + * tree walker callback object (the processone method gets called for every + * file or directory). */ class DbIndexer : public FsTreeWalkerCB { FsTreeWalker walker; @@ -47,6 +49,7 @@ class DbIndexer : public FsTreeWalkerCB { { } virtual ~DbIndexer() { + // Maybe clean up temporary directory if (tmpdir.length()) { wipedir(tmpdir); if (rmdir(tmpdir.c_str()) < 0) { @@ -59,9 +62,16 @@ class DbIndexer : public FsTreeWalkerCB { FsTreeWalker::Status processone(const std::string &, const struct stat *, FsTreeWalker::CbFlag); + // The top level entry point. bool index(); }; + +// Top level file system tree index method for updating a given database. +// +// We create the temporary directory, open the database, then call a +// file system walk for each top-level directory. +// When walking is done, we create the stem databases and close the main db. bool DbIndexer::index() { string tdir; @@ -111,10 +121,13 @@ bool DbIndexer::index() /** * This function gets called for every file and directory found by the * tree walker. It checks with the db if the file has changed and needs to - * be reindexed. If so, it calls an appropriate handler depending on the mime - * type, which is responsible for populating an Rcl::Doc. + * be reindexed. If so, it calls internfile() which will identify the + * file type and call an appropriate handler to create documents in + * internal form, which we then add to the database. + * * Accent and majuscule handling are performed by the db module when doing - * the actual indexing work. + * the actual indexing work. The Rcl::Doc created by internfile() + contains pretty raw utf8 data. */ FsTreeWalker::Status DbIndexer::processone(const std::string &fn, const struct stat *stp, @@ -164,16 +177,16 @@ bool ConfIndexer::index() LOGERR(("ConfIndexer::index: no top directories in configuration\n")); return false; } - - // Group the directories by database: it is important that all - // directories for a database be indexed at once so that deleted - // file cleanup works list tdl; // List of directories to be indexed if (!ConfTree::stringToStrings(topdirs, tdl)) { LOGERR(("ConfIndexer::index: parse error for directory list\n")); return false; } + // Each top level directory to be indexed can be associated with a + // different database. We first group the directories by database: + // it is important that all directories for a database be indexed + // at once so that deleted file cleanup works list::iterator dirit; map > dbmap; map >::iterator dbit; @@ -196,6 +209,7 @@ bool ConfIndexer::index() } } + // Index each directory group in turn for (dbit = dbmap.begin(); dbit != dbmap.end(); dbit++) { //cout << dbit->first << " -> "; //list::const_iterator dit; diff --git a/src/index/indexer.h b/src/index/indexer.h index 33eba3b5..6eddbbb7 100644 --- a/src/index/indexer.h +++ b/src/index/indexer.h @@ -1,12 +1,17 @@ #ifndef _INDEXER_H_INCLUDED_ #define _INDEXER_H_INCLUDED_ -/* @(#$Id: indexer.h,v 1.4 2005-01-31 14:31:09 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: indexer.h,v 1.5 2005-03-17 15:35:49 dockes Exp $ (C) 2004 J.F.Dockes */ #include "rclconfig.h" class DbIndexer; + +/** + * The file system indexing object. Processes the configuration, then invokes + * file system walking to populate/update the database(s). + */ class ConfIndexer { RclConfig *config; - DbIndexer *indexer; + DbIndexer *indexer; // Internal object used to store opaque private data public: enum runStatus {IndexerOk, IndexerError}; ConfIndexer(RclConfig *cnf) : config(cnf), indexer(0) {} diff --git a/src/internfile/internfile.cpp b/src/internfile/internfile.cpp index 9e087cc9..49569ed7 100644 --- a/src/internfile/internfile.cpp +++ b/src/internfile/internfile.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: internfile.cpp,v 1.2 2005-02-09 12:07:29 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: internfile.cpp,v 1.3 2005-03-17 15:35:49 dockes Exp $ (C) 2004 J.F.Dockes"; #endif #include #include @@ -18,6 +18,7 @@ using namespace std; #include "pathut.h" #include "wipedir.h" +// Execute the command to uncompress a file into a temporary one. static bool uncompressfile(RclConfig *conf, const string& ifn, const list& cmdv, const string& tdir, string& tfile) @@ -95,7 +96,9 @@ bool internfile(const std::string &ifn, RclConfig *config, Rcl::Doc& doc, return false; } - // First check for a compressed file + // First check for a compressed file. If so, create a temporary + // uncompressed file, and rerun the mime type identification, then do the + // rest with the temp file. listucmd; if (getUncompressor(mime, config->getMimeConf(), ucmd)) { if (!uncompressfile(config, fn, ucmd, tdir, tfile)) @@ -112,6 +115,7 @@ bool internfile(const std::string &ifn, RclConfig *config, Rcl::Doc& doc, } + // Look for appropriate handler handler = getMimeHandler(mime, config->getMimeConf()); if (!handler) { diff --git a/src/internfile/mh_html.h b/src/internfile/mh_html.h index 2c3ad453..25af78ce 100644 --- a/src/internfile/mh_html.h +++ b/src/internfile/mh_html.h @@ -1,8 +1,14 @@ #ifndef _HTML_H_INCLUDED_ #define _HTML_H_INCLUDED_ -/* @(#$Id: mh_html.h,v 1.1 2005-02-01 17:20:05 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: mh_html.h,v 1.2 2005-03-17 15:35:49 dockes Exp $ (C) 2004 J.F.Dockes */ #include "mimehandler.h" +// Code to turn an html document into an internal one. There are 2 +// interfaces, depending if we're working on a file, or on a +// string. The string form is with external handlers for foreign +// formats: they return a result in html, which has the advantage to +// be text (easy to use in shell-scripts), and semi-structured (can +// carry titles, abstracts, whatever) class MimeHandlerHtml : public MimeHandler { public: virtual bool worker(RclConfig *conf, const string &fn,