recollindex -l

This commit is contained in:
dockes 2007-07-10 09:23:28 +00:00
parent ebdf1929ba
commit 7368d14d2d
6 changed files with 52 additions and 9 deletions

View File

@ -1,4 +1,4 @@
.\" $Id: recollindex.1,v 1.5 2006-12-24 07:40:26 dockes Exp $ (C) 2005 J.F.Dockes\$ .\" $Id: recollindex.1,v 1.6 2007-07-10 09:23:28 dockes Exp $ (C) 2005 J.F.Dockes\$
.TH RECOLLINDEX 1 "8 January 2006" .TH RECOLLINDEX 1 "8 January 2006"
.SH NAME .SH NAME
recollindex \- indexing command for the Recoll full text search system recollindex \- indexing command for the Recoll full text search system
@ -48,6 +48,13 @@ recollindex \- indexing command for the Recoll full text search system
.B -c .B -c
<configdir> <configdir>
] ]
.B -l
.br
.B recollindex
[
.B -c
<configdir>
]
.B -s .B -s
<lang> <lang>
.br .br
@ -100,7 +107,9 @@ Option
-x -x
disables this X11 session monitoring (daemon will stay alive even if it disables this X11 session monitoring (daemon will stay alive even if it
cannot connect to the X11 server). cannot connect to the X11 server).
.PP
.B recollindex -l
will list the names of available language stemmers.
.PP .PP
The other modes are useful mainly for testing. The other modes are useful mainly for testing.
.PP .PP
@ -116,8 +125,10 @@ databases will not be updated.
will build the stem expansion database for a given language, which may or will build the stem expansion database for a given language, which may or
may not be part of the list in the configuration file. If the language is may not be part of the list in the configuration file. If the language is
not part of the configuration, the stem expansion database will be deleted not part of the configuration, the stem expansion database will be deleted
during the next normal run. The following languages (abbreviations) are during the next normal run. You can get the list of stemmer names from the
recognized: .B recollindex -m
command. At the time of this writbng, the following languages
(abbreviations) are recognized (out of Xapian's stem.h):
.RS .RS
.IP \(bu .IP \(bu
danish (da) danish (da)

View File

@ -1,5 +1,5 @@
#ifndef lint #ifndef lint
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.59 2007-06-11 05:38:00 dockes Exp $ (C) 2004 J.F.Dockes"; static char rcsid[] = "@(#$Id: indexer.cpp,v 1.60 2007-07-10 09:23:27 dockes Exp $ (C) 2004 J.F.Dockes";
#endif #endif
/* /*
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
@ -71,6 +71,11 @@ DbIndexer::~DbIndexer() {
m_db.close(); m_db.close();
} }
list<string> DbIndexer::getStemmerNames()
{
return Rcl::Db::getStemmerNames();
}
// Index each directory in the topdirs for a given db // Index each directory in the topdirs for a given db
bool DbIndexer::indexDb(bool resetbefore, list<string> *topdirs) bool DbIndexer::indexDb(bool resetbefore, list<string> *topdirs)
{ {

View File

@ -16,7 +16,7 @@
*/ */
#ifndef _INDEXER_H_INCLUDED_ #ifndef _INDEXER_H_INCLUDED_
#define _INDEXER_H_INCLUDED_ #define _INDEXER_H_INCLUDED_
/* @(#$Id: indexer.h,v 1.23 2007-02-08 17:05:12 dockes Exp $ (C) 2004 J.F.Dockes */ /* @(#$Id: indexer.h,v 1.24 2007-07-10 09:23:28 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string> #include <string>
#include <list> #include <list>
@ -136,6 +136,9 @@ class DbIndexer : public FsTreeWalkerCB {
/** Return my db dir */ /** Return my db dir */
string getDbDir() {return m_dbdir;} string getDbDir() {return m_dbdir;}
/** List possible stemmer names */
static list<string> getStemmerNames();
private: private:
FsTreeWalker m_walker; FsTreeWalker m_walker;
RclConfig *m_config; RclConfig *m_config;

View File

@ -1,5 +1,5 @@
#ifndef lint #ifndef lint
static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.33 2007-06-08 16:47:19 dockes Exp $ (C) 2004 J.F.Dockes"; static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.34 2007-07-10 09:23:28 dockes Exp $ (C) 2004 J.F.Dockes";
#endif #endif
/* /*
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
@ -216,6 +216,7 @@ static int op_flags;
#define OPT_e 0x200 #define OPT_e 0x200
#define OPT_w 0x400 #define OPT_w 0x400
#define OPT_x 0x800 #define OPT_x 0x800
#define OPT_l 0x1000
static const char usage [] = static const char usage [] =
"\n" "\n"
@ -234,6 +235,8 @@ static const char usage [] =
" Purge data for individual files. No stem database updates\n" " Purge data for individual files. No stem database updates\n"
"recollindex -i <filename [filename ...]>\n" "recollindex -i <filename [filename ...]>\n"
" Index individual files. No database purge or stem database updates\n" " Index individual files. No database purge or stem database updates\n"
"recollindex -l\n"
" List available stemming languages\n"
"recollindex -s <lang>\n" "recollindex -s <lang>\n"
" Build stem database for additional language <lang>\n" " Build stem database for additional language <lang>\n"
#ifdef RCL_USE_ASPELL #ifdef RCL_USE_ASPELL
@ -282,6 +285,7 @@ int main(int argc, const char **argv)
case 'e': op_flags |= OPT_e; break; case 'e': op_flags |= OPT_e; break;
case 'h': op_flags |= OPT_h; break; case 'h': op_flags |= OPT_h; break;
case 'i': op_flags |= OPT_i; break; case 'i': op_flags |= OPT_i; break;
case 'l': op_flags |= OPT_l; break;
#ifdef RCL_MONITOR #ifdef RCL_MONITOR
case 'm': op_flags |= OPT_m; break; case 'm': op_flags |= OPT_m; break;
#endif #endif
@ -338,6 +342,15 @@ int main(int argc, const char **argv)
else else
exit(!purgefiles(config, filenames)); exit(!purgefiles(config, filenames));
} else if (op_flags & OPT_l) {
if (argc != 0)
Usage();
list<string> stemmers = DbIndexer::getStemmerNames();
for (list<string>::const_iterator it = stemmers.begin();
it != stemmers.end(); it++) {
cout << *it << endl;
}
exit(0);
} else if (op_flags & OPT_s) { } else if (op_flags & OPT_s) {
if (argc != 1) if (argc != 1)
Usage(); Usage();

View File

@ -1,5 +1,5 @@
#ifndef lint #ifndef lint
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.119 2007-06-25 10:25:39 dockes Exp $ (C) 2004 J.F.Dockes"; static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.120 2007-07-10 09:23:28 dockes Exp $ (C) 2004 J.F.Dockes";
#endif #endif
/* /*
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
@ -518,6 +518,14 @@ Db::~Db()
i_close(true); i_close(true);
} }
list<string> Db::getStemmerNames()
{
list<string> res;
stringToStrings(Xapian::Stem::get_available_languages(), res);
return res;
}
bool Db::open(const string& dir, const string &stops, OpenMode mode, int qops) bool Db::open(const string& dir, const string &stops, OpenMode mode, int qops)
{ {
bool keep_updated = (qops & QO_KEEP_UPDATED) != 0; bool keep_updated = (qops & QO_KEEP_UPDATED) != 0;

View File

@ -16,7 +16,7 @@
*/ */
#ifndef _DB_H_INCLUDED_ #ifndef _DB_H_INCLUDED_
#define _DB_H_INCLUDED_ #define _DB_H_INCLUDED_
/* @(#$Id: rcldb.h,v 1.53 2007-06-21 11:56:28 dockes Exp $ (C) 2004 J.F.Dockes */ /* @(#$Id: rcldb.h,v 1.54 2007-07-10 09:23:28 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string> #include <string>
#include <list> #include <list>
@ -97,6 +97,9 @@ class Db {
/** Field name to prefix translation (ie: author -> 'A') */ /** Field name to prefix translation (ie: author -> 'A') */
bool fieldToPrefix(const string& fldname, string &pfx); bool fieldToPrefix(const string& fldname, string &pfx);
/** List possible stemmer names */
static list<string> getStemmerNames();
/* Update-related methods ******************************************/ /* Update-related methods ******************************************/
/** Add document. The Doc class should have been filled as much as /** Add document. The Doc class should have been filled as much as