only comments. Before multidoc files

This commit is contained in:
dockes 2005-03-17 15:35:49 +00:00
parent 6f66d9cb37
commit 63a29c7ced
4 changed files with 43 additions and 14 deletions

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.7 2005-03-17 14:02:05 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.8 2005-03-17 15:35:49 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
#include <stdio.h>
#include <sys/stat.h>
@ -32,7 +32,9 @@ using namespace std;
#endif
/**
* Bunch holder for data used while indexing a directory tree
* Bunch holder for data used while indexing a directory tree. This also the
* tree walker callback object (the processone method gets called for every
* file or directory).
*/
class DbIndexer : public FsTreeWalkerCB {
FsTreeWalker walker;
@ -47,6 +49,7 @@ class DbIndexer : public FsTreeWalkerCB {
{ }
virtual ~DbIndexer() {
// Maybe clean up temporary directory
if (tmpdir.length()) {
wipedir(tmpdir);
if (rmdir(tmpdir.c_str()) < 0) {
@ -59,9 +62,16 @@ class DbIndexer : public FsTreeWalkerCB {
FsTreeWalker::Status
processone(const std::string &, const struct stat *, FsTreeWalker::CbFlag);
// The top level entry point.
bool index();
};
// Top level file system tree index method for updating a given database.
//
// We create the temporary directory, open the database, then call a
// file system walk for each top-level directory.
// When walking is done, we create the stem databases and close the main db.
bool DbIndexer::index()
{
string tdir;
@ -111,10 +121,13 @@ bool DbIndexer::index()
/**
* This function gets called for every file and directory found by the
* tree walker. It checks with the db if the file has changed and needs to
* be reindexed. If so, it calls an appropriate handler depending on the mime
* type, which is responsible for populating an Rcl::Doc.
* be reindexed. If so, it calls internfile() which will identify the
* file type and call an appropriate handler to create documents in
* internal form, which we then add to the database.
*
* Accent and majuscule handling are performed by the db module when doing
* the actual indexing work.
* the actual indexing work. The Rcl::Doc created by internfile()
contains pretty raw utf8 data.
*/
FsTreeWalker::Status
DbIndexer::processone(const std::string &fn, const struct stat *stp,
@ -164,16 +177,16 @@ bool ConfIndexer::index()
LOGERR(("ConfIndexer::index: no top directories in configuration\n"));
return false;
}
// Group the directories by database: it is important that all
// directories for a database be indexed at once so that deleted
// file cleanup works
list<string> tdl; // List of directories to be indexed
if (!ConfTree::stringToStrings(topdirs, tdl)) {
LOGERR(("ConfIndexer::index: parse error for directory list\n"));
return false;
}
// Each top level directory to be indexed can be associated with a
// different database. We first group the directories by database:
// it is important that all directories for a database be indexed
// at once so that deleted file cleanup works
list<string>::iterator dirit;
map<string, list<string> > dbmap;
map<string, list<string> >::iterator dbit;
@ -196,6 +209,7 @@ bool ConfIndexer::index()
}
}
// Index each directory group in turn
for (dbit = dbmap.begin(); dbit != dbmap.end(); dbit++) {
//cout << dbit->first << " -> ";
//list<string>::const_iterator dit;

View File

@ -1,12 +1,17 @@
#ifndef _INDEXER_H_INCLUDED_
#define _INDEXER_H_INCLUDED_
/* @(#$Id: indexer.h,v 1.4 2005-01-31 14:31:09 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: indexer.h,v 1.5 2005-03-17 15:35:49 dockes Exp $ (C) 2004 J.F.Dockes */
#include "rclconfig.h"
class DbIndexer;
/**
* The file system indexing object. Processes the configuration, then invokes
* file system walking to populate/update the database(s).
*/
class ConfIndexer {
RclConfig *config;
DbIndexer *indexer;
DbIndexer *indexer; // Internal object used to store opaque private data
public:
enum runStatus {IndexerOk, IndexerError};
ConfIndexer(RclConfig *cnf) : config(cnf), indexer(0) {}

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: internfile.cpp,v 1.2 2005-02-09 12:07:29 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: internfile.cpp,v 1.3 2005-03-17 15:35:49 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
#include <unistd.h>
#include <sys/types.h>
@ -18,6 +18,7 @@ using namespace std;
#include "pathut.h"
#include "wipedir.h"
// Execute the command to uncompress a file into a temporary one.
static bool uncompressfile(RclConfig *conf, const string& ifn,
const list<string>& cmdv, const string& tdir,
string& tfile)
@ -95,7 +96,9 @@ bool internfile(const std::string &ifn, RclConfig *config, Rcl::Doc& doc,
return false;
}
// First check for a compressed file
// First check for a compressed file. If so, create a temporary
// uncompressed file, and rerun the mime type identification, then do the
// rest with the temp file.
list<string>ucmd;
if (getUncompressor(mime, config->getMimeConf(), ucmd)) {
if (!uncompressfile(config, fn, ucmd, tdir, tfile))
@ -112,6 +115,7 @@ bool internfile(const std::string &ifn, RclConfig *config, Rcl::Doc& doc,
}
// Look for appropriate handler
handler = getMimeHandler(mime, config->getMimeConf());
if (!handler) {

View File

@ -1,8 +1,14 @@
#ifndef _HTML_H_INCLUDED_
#define _HTML_H_INCLUDED_
/* @(#$Id: mh_html.h,v 1.1 2005-02-01 17:20:05 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: mh_html.h,v 1.2 2005-03-17 15:35:49 dockes Exp $ (C) 2004 J.F.Dockes */
#include "mimehandler.h"
// Code to turn an html document into an internal one. There are 2
// interfaces, depending if we're working on a file, or on a
// string. The string form is with external handlers for foreign
// formats: they return a result in html, which has the advantage to
// be text (easy to use in shell-scripts), and semi-structured (can
// carry titles, abstracts, whatever)
class MimeHandlerHtml : public MimeHandler {
public:
virtual bool worker(RclConfig *conf, const string &fn,