only comments. Before multidoc files
This commit is contained in:
parent
6f66d9cb37
commit
63a29c7ced
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.7 2005-03-17 14:02:05 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.8 2005-03-17 15:35:49 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <sys/stat.h>
|
||||
@ -32,7 +32,9 @@ using namespace std;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Bunch holder for data used while indexing a directory tree
|
||||
* Bunch holder for data used while indexing a directory tree. This also the
|
||||
* tree walker callback object (the processone method gets called for every
|
||||
* file or directory).
|
||||
*/
|
||||
class DbIndexer : public FsTreeWalkerCB {
|
||||
FsTreeWalker walker;
|
||||
@ -47,6 +49,7 @@ class DbIndexer : public FsTreeWalkerCB {
|
||||
{ }
|
||||
|
||||
virtual ~DbIndexer() {
|
||||
// Maybe clean up temporary directory
|
||||
if (tmpdir.length()) {
|
||||
wipedir(tmpdir);
|
||||
if (rmdir(tmpdir.c_str()) < 0) {
|
||||
@ -59,9 +62,16 @@ class DbIndexer : public FsTreeWalkerCB {
|
||||
FsTreeWalker::Status
|
||||
processone(const std::string &, const struct stat *, FsTreeWalker::CbFlag);
|
||||
|
||||
// The top level entry point.
|
||||
bool index();
|
||||
};
|
||||
|
||||
|
||||
// Top level file system tree index method for updating a given database.
|
||||
//
|
||||
// We create the temporary directory, open the database, then call a
|
||||
// file system walk for each top-level directory.
|
||||
// When walking is done, we create the stem databases and close the main db.
|
||||
bool DbIndexer::index()
|
||||
{
|
||||
string tdir;
|
||||
@ -111,10 +121,13 @@ bool DbIndexer::index()
|
||||
/**
|
||||
* This function gets called for every file and directory found by the
|
||||
* tree walker. It checks with the db if the file has changed and needs to
|
||||
* be reindexed. If so, it calls an appropriate handler depending on the mime
|
||||
* type, which is responsible for populating an Rcl::Doc.
|
||||
* be reindexed. If so, it calls internfile() which will identify the
|
||||
* file type and call an appropriate handler to create documents in
|
||||
* internal form, which we then add to the database.
|
||||
*
|
||||
* Accent and majuscule handling are performed by the db module when doing
|
||||
* the actual indexing work.
|
||||
* the actual indexing work. The Rcl::Doc created by internfile()
|
||||
contains pretty raw utf8 data.
|
||||
*/
|
||||
FsTreeWalker::Status
|
||||
DbIndexer::processone(const std::string &fn, const struct stat *stp,
|
||||
@ -164,16 +177,16 @@ bool ConfIndexer::index()
|
||||
LOGERR(("ConfIndexer::index: no top directories in configuration\n"));
|
||||
return false;
|
||||
}
|
||||
|
||||
// Group the directories by database: it is important that all
|
||||
// directories for a database be indexed at once so that deleted
|
||||
// file cleanup works
|
||||
list<string> tdl; // List of directories to be indexed
|
||||
if (!ConfTree::stringToStrings(topdirs, tdl)) {
|
||||
LOGERR(("ConfIndexer::index: parse error for directory list\n"));
|
||||
return false;
|
||||
}
|
||||
|
||||
// Each top level directory to be indexed can be associated with a
|
||||
// different database. We first group the directories by database:
|
||||
// it is important that all directories for a database be indexed
|
||||
// at once so that deleted file cleanup works
|
||||
list<string>::iterator dirit;
|
||||
map<string, list<string> > dbmap;
|
||||
map<string, list<string> >::iterator dbit;
|
||||
@ -196,6 +209,7 @@ bool ConfIndexer::index()
|
||||
}
|
||||
}
|
||||
|
||||
// Index each directory group in turn
|
||||
for (dbit = dbmap.begin(); dbit != dbmap.end(); dbit++) {
|
||||
//cout << dbit->first << " -> ";
|
||||
//list<string>::const_iterator dit;
|
||||
|
||||
@ -1,12 +1,17 @@
|
||||
#ifndef _INDEXER_H_INCLUDED_
|
||||
#define _INDEXER_H_INCLUDED_
|
||||
/* @(#$Id: indexer.h,v 1.4 2005-01-31 14:31:09 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: indexer.h,v 1.5 2005-03-17 15:35:49 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include "rclconfig.h"
|
||||
class DbIndexer;
|
||||
|
||||
/**
|
||||
* The file system indexing object. Processes the configuration, then invokes
|
||||
* file system walking to populate/update the database(s).
|
||||
*/
|
||||
class ConfIndexer {
|
||||
RclConfig *config;
|
||||
DbIndexer *indexer;
|
||||
DbIndexer *indexer; // Internal object used to store opaque private data
|
||||
public:
|
||||
enum runStatus {IndexerOk, IndexerError};
|
||||
ConfIndexer(RclConfig *cnf) : config(cnf), indexer(0) {}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: internfile.cpp,v 1.2 2005-02-09 12:07:29 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: internfile.cpp,v 1.3 2005-03-17 15:35:49 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
@ -18,6 +18,7 @@ using namespace std;
|
||||
#include "pathut.h"
|
||||
#include "wipedir.h"
|
||||
|
||||
// Execute the command to uncompress a file into a temporary one.
|
||||
static bool uncompressfile(RclConfig *conf, const string& ifn,
|
||||
const list<string>& cmdv, const string& tdir,
|
||||
string& tfile)
|
||||
@ -95,7 +96,9 @@ bool internfile(const std::string &ifn, RclConfig *config, Rcl::Doc& doc,
|
||||
return false;
|
||||
}
|
||||
|
||||
// First check for a compressed file
|
||||
// First check for a compressed file. If so, create a temporary
|
||||
// uncompressed file, and rerun the mime type identification, then do the
|
||||
// rest with the temp file.
|
||||
list<string>ucmd;
|
||||
if (getUncompressor(mime, config->getMimeConf(), ucmd)) {
|
||||
if (!uncompressfile(config, fn, ucmd, tdir, tfile))
|
||||
@ -112,6 +115,7 @@ bool internfile(const std::string &ifn, RclConfig *config, Rcl::Doc& doc,
|
||||
|
||||
}
|
||||
|
||||
|
||||
// Look for appropriate handler
|
||||
handler = getMimeHandler(mime, config->getMimeConf());
|
||||
if (!handler) {
|
||||
|
||||
@ -1,8 +1,14 @@
|
||||
#ifndef _HTML_H_INCLUDED_
|
||||
#define _HTML_H_INCLUDED_
|
||||
/* @(#$Id: mh_html.h,v 1.1 2005-02-01 17:20:05 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: mh_html.h,v 1.2 2005-03-17 15:35:49 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
#include "mimehandler.h"
|
||||
|
||||
// Code to turn an html document into an internal one. There are 2
|
||||
// interfaces, depending if we're working on a file, or on a
|
||||
// string. The string form is with external handlers for foreign
|
||||
// formats: they return a result in html, which has the advantage to
|
||||
// be text (easy to use in shell-scripts), and semi-structured (can
|
||||
// carry titles, abstracts, whatever)
|
||||
class MimeHandlerHtml : public MimeHandler {
|
||||
public:
|
||||
virtual bool worker(RclConfig *conf, const string &fn,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user