recollindex -l

This commit is contained in:
dockes 2007-07-10 09:23:28 +00:00
parent ebdf1929ba
commit 7368d14d2d
6 changed files with 52 additions and 9 deletions

View File

@ -1,4 +1,4 @@
.\" $Id: recollindex.1,v 1.5 2006-12-24 07:40:26 dockes Exp $ (C) 2005 J.F.Dockes\$
.\" $Id: recollindex.1,v 1.6 2007-07-10 09:23:28 dockes Exp $ (C) 2005 J.F.Dockes\$
.TH RECOLLINDEX 1 "8 January 2006"
.SH NAME
recollindex \- indexing command for the Recoll full text search system
@ -48,6 +48,13 @@ recollindex \- indexing command for the Recoll full text search system
.B -c
<configdir>
]
.B -l
.br
.B recollindex
[
.B -c
<configdir>
]
.B -s
<lang>
.br
@ -100,7 +107,9 @@ Option
-x
disables this X11 session monitoring (daemon will stay alive even if it
cannot connect to the X11 server).
.PP
.B recollindex -l
will list the names of available language stemmers.
.PP
The other modes are useful mainly for testing.
.PP
@ -116,8 +125,10 @@ databases will not be updated.
will build the stem expansion database for a given language, which may or
may not be part of the list in the configuration file. If the language is
not part of the configuration, the stem expansion database will be deleted
during the next normal run. The following languages (abbreviations) are
recognized:
during the next normal run. You can get the list of stemmer names from the
.B recollindex -m
command. At the time of this writbng, the following languages
(abbreviations) are recognized (out of Xapian's stem.h):
.RS
.IP \(bu
danish (da)

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.59 2007-06-11 05:38:00 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.60 2007-07-10 09:23:27 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -71,6 +71,11 @@ DbIndexer::~DbIndexer() {
m_db.close();
}
list<string> DbIndexer::getStemmerNames()
{
return Rcl::Db::getStemmerNames();
}
// Index each directory in the topdirs for a given db
bool DbIndexer::indexDb(bool resetbefore, list<string> *topdirs)
{

View File

@ -16,7 +16,7 @@
*/
#ifndef _INDEXER_H_INCLUDED_
#define _INDEXER_H_INCLUDED_
/* @(#$Id: indexer.h,v 1.23 2007-02-08 17:05:12 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: indexer.h,v 1.24 2007-07-10 09:23:28 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string>
#include <list>
@ -136,6 +136,9 @@ class DbIndexer : public FsTreeWalkerCB {
/** Return my db dir */
string getDbDir() {return m_dbdir;}
/** List possible stemmer names */
static list<string> getStemmerNames();
private:
FsTreeWalker m_walker;
RclConfig *m_config;

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.33 2007-06-08 16:47:19 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.34 2007-07-10 09:23:28 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -216,6 +216,7 @@ static int op_flags;
#define OPT_e 0x200
#define OPT_w 0x400
#define OPT_x 0x800
#define OPT_l 0x1000
static const char usage [] =
"\n"
@ -234,6 +235,8 @@ static const char usage [] =
" Purge data for individual files. No stem database updates\n"
"recollindex -i <filename [filename ...]>\n"
" Index individual files. No database purge or stem database updates\n"
"recollindex -l\n"
" List available stemming languages\n"
"recollindex -s <lang>\n"
" Build stem database for additional language <lang>\n"
#ifdef RCL_USE_ASPELL
@ -282,6 +285,7 @@ int main(int argc, const char **argv)
case 'e': op_flags |= OPT_e; break;
case 'h': op_flags |= OPT_h; break;
case 'i': op_flags |= OPT_i; break;
case 'l': op_flags |= OPT_l; break;
#ifdef RCL_MONITOR
case 'm': op_flags |= OPT_m; break;
#endif
@ -338,6 +342,15 @@ int main(int argc, const char **argv)
else
exit(!purgefiles(config, filenames));
} else if (op_flags & OPT_l) {
if (argc != 0)
Usage();
list<string> stemmers = DbIndexer::getStemmerNames();
for (list<string>::const_iterator it = stemmers.begin();
it != stemmers.end(); it++) {
cout << *it << endl;
}
exit(0);
} else if (op_flags & OPT_s) {
if (argc != 1)
Usage();

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.119 2007-06-25 10:25:39 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.120 2007-07-10 09:23:28 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -518,6 +518,14 @@ Db::~Db()
i_close(true);
}
list<string> Db::getStemmerNames()
{
list<string> res;
stringToStrings(Xapian::Stem::get_available_languages(), res);
return res;
}
bool Db::open(const string& dir, const string &stops, OpenMode mode, int qops)
{
bool keep_updated = (qops & QO_KEEP_UPDATED) != 0;

View File

@ -16,7 +16,7 @@
*/
#ifndef _DB_H_INCLUDED_
#define _DB_H_INCLUDED_
/* @(#$Id: rcldb.h,v 1.53 2007-06-21 11:56:28 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: rcldb.h,v 1.54 2007-07-10 09:23:28 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string>
#include <list>
@ -97,6 +97,9 @@ class Db {
/** Field name to prefix translation (ie: author -> 'A') */
bool fieldToPrefix(const string& fldname, string &pfx);
/** List possible stemmer names */
static list<string> getStemmerNames();
/* Update-related methods ******************************************/
/** Add document. The Doc class should have been filled as much as