only comments. Before multidoc files
This commit is contained in:
parent
6f66d9cb37
commit
63a29c7ced
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.7 2005-03-17 14:02:05 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.8 2005-03-17 15:35:49 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
@ -32,7 +32,9 @@ using namespace std;
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Bunch holder for data used while indexing a directory tree
|
* Bunch holder for data used while indexing a directory tree. This also the
|
||||||
|
* tree walker callback object (the processone method gets called for every
|
||||||
|
* file or directory).
|
||||||
*/
|
*/
|
||||||
class DbIndexer : public FsTreeWalkerCB {
|
class DbIndexer : public FsTreeWalkerCB {
|
||||||
FsTreeWalker walker;
|
FsTreeWalker walker;
|
||||||
@ -47,6 +49,7 @@ class DbIndexer : public FsTreeWalkerCB {
|
|||||||
{ }
|
{ }
|
||||||
|
|
||||||
virtual ~DbIndexer() {
|
virtual ~DbIndexer() {
|
||||||
|
// Maybe clean up temporary directory
|
||||||
if (tmpdir.length()) {
|
if (tmpdir.length()) {
|
||||||
wipedir(tmpdir);
|
wipedir(tmpdir);
|
||||||
if (rmdir(tmpdir.c_str()) < 0) {
|
if (rmdir(tmpdir.c_str()) < 0) {
|
||||||
@ -59,9 +62,16 @@ class DbIndexer : public FsTreeWalkerCB {
|
|||||||
FsTreeWalker::Status
|
FsTreeWalker::Status
|
||||||
processone(const std::string &, const struct stat *, FsTreeWalker::CbFlag);
|
processone(const std::string &, const struct stat *, FsTreeWalker::CbFlag);
|
||||||
|
|
||||||
|
// The top level entry point.
|
||||||
bool index();
|
bool index();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// Top level file system tree index method for updating a given database.
|
||||||
|
//
|
||||||
|
// We create the temporary directory, open the database, then call a
|
||||||
|
// file system walk for each top-level directory.
|
||||||
|
// When walking is done, we create the stem databases and close the main db.
|
||||||
bool DbIndexer::index()
|
bool DbIndexer::index()
|
||||||
{
|
{
|
||||||
string tdir;
|
string tdir;
|
||||||
@ -111,10 +121,13 @@ bool DbIndexer::index()
|
|||||||
/**
|
/**
|
||||||
* This function gets called for every file and directory found by the
|
* This function gets called for every file and directory found by the
|
||||||
* tree walker. It checks with the db if the file has changed and needs to
|
* tree walker. It checks with the db if the file has changed and needs to
|
||||||
* be reindexed. If so, it calls an appropriate handler depending on the mime
|
* be reindexed. If so, it calls internfile() which will identify the
|
||||||
* type, which is responsible for populating an Rcl::Doc.
|
* file type and call an appropriate handler to create documents in
|
||||||
|
* internal form, which we then add to the database.
|
||||||
|
*
|
||||||
* Accent and majuscule handling are performed by the db module when doing
|
* Accent and majuscule handling are performed by the db module when doing
|
||||||
* the actual indexing work.
|
* the actual indexing work. The Rcl::Doc created by internfile()
|
||||||
|
contains pretty raw utf8 data.
|
||||||
*/
|
*/
|
||||||
FsTreeWalker::Status
|
FsTreeWalker::Status
|
||||||
DbIndexer::processone(const std::string &fn, const struct stat *stp,
|
DbIndexer::processone(const std::string &fn, const struct stat *stp,
|
||||||
@ -164,16 +177,16 @@ bool ConfIndexer::index()
|
|||||||
LOGERR(("ConfIndexer::index: no top directories in configuration\n"));
|
LOGERR(("ConfIndexer::index: no top directories in configuration\n"));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Group the directories by database: it is important that all
|
|
||||||
// directories for a database be indexed at once so that deleted
|
|
||||||
// file cleanup works
|
|
||||||
list<string> tdl; // List of directories to be indexed
|
list<string> tdl; // List of directories to be indexed
|
||||||
if (!ConfTree::stringToStrings(topdirs, tdl)) {
|
if (!ConfTree::stringToStrings(topdirs, tdl)) {
|
||||||
LOGERR(("ConfIndexer::index: parse error for directory list\n"));
|
LOGERR(("ConfIndexer::index: parse error for directory list\n"));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Each top level directory to be indexed can be associated with a
|
||||||
|
// different database. We first group the directories by database:
|
||||||
|
// it is important that all directories for a database be indexed
|
||||||
|
// at once so that deleted file cleanup works
|
||||||
list<string>::iterator dirit;
|
list<string>::iterator dirit;
|
||||||
map<string, list<string> > dbmap;
|
map<string, list<string> > dbmap;
|
||||||
map<string, list<string> >::iterator dbit;
|
map<string, list<string> >::iterator dbit;
|
||||||
@ -196,6 +209,7 @@ bool ConfIndexer::index()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Index each directory group in turn
|
||||||
for (dbit = dbmap.begin(); dbit != dbmap.end(); dbit++) {
|
for (dbit = dbmap.begin(); dbit != dbmap.end(); dbit++) {
|
||||||
//cout << dbit->first << " -> ";
|
//cout << dbit->first << " -> ";
|
||||||
//list<string>::const_iterator dit;
|
//list<string>::const_iterator dit;
|
||||||
|
|||||||
@ -1,12 +1,17 @@
|
|||||||
#ifndef _INDEXER_H_INCLUDED_
|
#ifndef _INDEXER_H_INCLUDED_
|
||||||
#define _INDEXER_H_INCLUDED_
|
#define _INDEXER_H_INCLUDED_
|
||||||
/* @(#$Id: indexer.h,v 1.4 2005-01-31 14:31:09 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: indexer.h,v 1.5 2005-03-17 15:35:49 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
#include "rclconfig.h"
|
#include "rclconfig.h"
|
||||||
class DbIndexer;
|
class DbIndexer;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The file system indexing object. Processes the configuration, then invokes
|
||||||
|
* file system walking to populate/update the database(s).
|
||||||
|
*/
|
||||||
class ConfIndexer {
|
class ConfIndexer {
|
||||||
RclConfig *config;
|
RclConfig *config;
|
||||||
DbIndexer *indexer;
|
DbIndexer *indexer; // Internal object used to store opaque private data
|
||||||
public:
|
public:
|
||||||
enum runStatus {IndexerOk, IndexerError};
|
enum runStatus {IndexerOk, IndexerError};
|
||||||
ConfIndexer(RclConfig *cnf) : config(cnf), indexer(0) {}
|
ConfIndexer(RclConfig *cnf) : config(cnf), indexer(0) {}
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: internfile.cpp,v 1.2 2005-02-09 12:07:29 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: internfile.cpp,v 1.3 2005-03-17 15:35:49 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
@ -18,6 +18,7 @@ using namespace std;
|
|||||||
#include "pathut.h"
|
#include "pathut.h"
|
||||||
#include "wipedir.h"
|
#include "wipedir.h"
|
||||||
|
|
||||||
|
// Execute the command to uncompress a file into a temporary one.
|
||||||
static bool uncompressfile(RclConfig *conf, const string& ifn,
|
static bool uncompressfile(RclConfig *conf, const string& ifn,
|
||||||
const list<string>& cmdv, const string& tdir,
|
const list<string>& cmdv, const string& tdir,
|
||||||
string& tfile)
|
string& tfile)
|
||||||
@ -95,7 +96,9 @@ bool internfile(const std::string &ifn, RclConfig *config, Rcl::Doc& doc,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// First check for a compressed file
|
// First check for a compressed file. If so, create a temporary
|
||||||
|
// uncompressed file, and rerun the mime type identification, then do the
|
||||||
|
// rest with the temp file.
|
||||||
list<string>ucmd;
|
list<string>ucmd;
|
||||||
if (getUncompressor(mime, config->getMimeConf(), ucmd)) {
|
if (getUncompressor(mime, config->getMimeConf(), ucmd)) {
|
||||||
if (!uncompressfile(config, fn, ucmd, tdir, tfile))
|
if (!uncompressfile(config, fn, ucmd, tdir, tfile))
|
||||||
@ -112,6 +115,7 @@ bool internfile(const std::string &ifn, RclConfig *config, Rcl::Doc& doc,
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Look for appropriate handler
|
// Look for appropriate handler
|
||||||
handler = getMimeHandler(mime, config->getMimeConf());
|
handler = getMimeHandler(mime, config->getMimeConf());
|
||||||
if (!handler) {
|
if (!handler) {
|
||||||
|
|||||||
@ -1,8 +1,14 @@
|
|||||||
#ifndef _HTML_H_INCLUDED_
|
#ifndef _HTML_H_INCLUDED_
|
||||||
#define _HTML_H_INCLUDED_
|
#define _HTML_H_INCLUDED_
|
||||||
/* @(#$Id: mh_html.h,v 1.1 2005-02-01 17:20:05 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: mh_html.h,v 1.2 2005-03-17 15:35:49 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
#include "mimehandler.h"
|
#include "mimehandler.h"
|
||||||
|
|
||||||
|
// Code to turn an html document into an internal one. There are 2
|
||||||
|
// interfaces, depending if we're working on a file, or on a
|
||||||
|
// string. The string form is with external handlers for foreign
|
||||||
|
// formats: they return a result in html, which has the advantage to
|
||||||
|
// be text (easy to use in shell-scripts), and semi-structured (can
|
||||||
|
// carry titles, abstracts, whatever)
|
||||||
class MimeHandlerHtml : public MimeHandler {
|
class MimeHandlerHtml : public MimeHandler {
|
||||||
public:
|
public:
|
||||||
virtual bool worker(RclConfig *conf, const string &fn,
|
virtual bool worker(RclConfig *conf, const string &fn,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user