diff --git a/src/doc/man/recollindex.1 b/src/doc/man/recollindex.1 index e21a8e57..df9f3294 100644 --- a/src/doc/man/recollindex.1 +++ b/src/doc/man/recollindex.1 @@ -1,4 +1,4 @@ -.\" $Id: recollindex.1,v 1.5 2006-12-24 07:40:26 dockes Exp $ (C) 2005 J.F.Dockes\$ +.\" $Id: recollindex.1,v 1.6 2007-07-10 09:23:28 dockes Exp $ (C) 2005 J.F.Dockes\$ .TH RECOLLINDEX 1 "8 January 2006" .SH NAME recollindex \- indexing command for the Recoll full text search system @@ -48,6 +48,13 @@ recollindex \- indexing command for the Recoll full text search system .B -c ] +.B -l +.br +.B recollindex +[ +.B -c + +] .B -s .br @@ -100,7 +107,9 @@ Option -x disables this X11 session monitoring (daemon will stay alive even if it cannot connect to the X11 server). - +.PP +.B recollindex -l +will list the names of available language stemmers. .PP The other modes are useful mainly for testing. .PP @@ -116,8 +125,10 @@ databases will not be updated. will build the stem expansion database for a given language, which may or may not be part of the list in the configuration file. If the language is not part of the configuration, the stem expansion database will be deleted -during the next normal run. The following languages (abbreviations) are -recognized: +during the next normal run. You can get the list of stemmer names from the +.B recollindex -m +command. At the time of this writbng, the following languages +(abbreviations) are recognized (out of Xapian's stem.h): .RS .IP \(bu danish (da) diff --git a/src/index/indexer.cpp b/src/index/indexer.cpp index eb724978..1fe38e9e 100644 --- a/src/index/indexer.cpp +++ b/src/index/indexer.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: indexer.cpp,v 1.59 2007-06-11 05:38:00 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: indexer.cpp,v 1.60 2007-07-10 09:23:27 dockes Exp $ (C) 2004 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -71,6 +71,11 @@ DbIndexer::~DbIndexer() { m_db.close(); } +list DbIndexer::getStemmerNames() +{ + return Rcl::Db::getStemmerNames(); +} + // Index each directory in the topdirs for a given db bool DbIndexer::indexDb(bool resetbefore, list *topdirs) { diff --git a/src/index/indexer.h b/src/index/indexer.h index c7c2e91c..008bc7fa 100644 --- a/src/index/indexer.h +++ b/src/index/indexer.h @@ -16,7 +16,7 @@ */ #ifndef _INDEXER_H_INCLUDED_ #define _INDEXER_H_INCLUDED_ -/* @(#$Id: indexer.h,v 1.23 2007-02-08 17:05:12 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: indexer.h,v 1.24 2007-07-10 09:23:28 dockes Exp $ (C) 2004 J.F.Dockes */ #include #include @@ -136,6 +136,9 @@ class DbIndexer : public FsTreeWalkerCB { /** Return my db dir */ string getDbDir() {return m_dbdir;} + /** List possible stemmer names */ + static list getStemmerNames(); + private: FsTreeWalker m_walker; RclConfig *m_config; diff --git a/src/index/recollindex.cpp b/src/index/recollindex.cpp index 0fa2c4d0..2025bb14 100644 --- a/src/index/recollindex.cpp +++ b/src/index/recollindex.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.33 2007-06-08 16:47:19 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.34 2007-07-10 09:23:28 dockes Exp $ (C) 2004 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -216,6 +216,7 @@ static int op_flags; #define OPT_e 0x200 #define OPT_w 0x400 #define OPT_x 0x800 +#define OPT_l 0x1000 static const char usage [] = "\n" @@ -234,6 +235,8 @@ static const char usage [] = " Purge data for individual files. No stem database updates\n" "recollindex -i \n" " Index individual files. No database purge or stem database updates\n" +"recollindex -l\n" +" List available stemming languages\n" "recollindex -s \n" " Build stem database for additional language \n" #ifdef RCL_USE_ASPELL @@ -282,6 +285,7 @@ int main(int argc, const char **argv) case 'e': op_flags |= OPT_e; break; case 'h': op_flags |= OPT_h; break; case 'i': op_flags |= OPT_i; break; + case 'l': op_flags |= OPT_l; break; #ifdef RCL_MONITOR case 'm': op_flags |= OPT_m; break; #endif @@ -338,6 +342,15 @@ int main(int argc, const char **argv) else exit(!purgefiles(config, filenames)); + } else if (op_flags & OPT_l) { + if (argc != 0) + Usage(); + list stemmers = DbIndexer::getStemmerNames(); + for (list::const_iterator it = stemmers.begin(); + it != stemmers.end(); it++) { + cout << *it << endl; + } + exit(0); } else if (op_flags & OPT_s) { if (argc != 1) Usage(); diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index a9389538..cb4e4681 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.119 2007-06-25 10:25:39 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.120 2007-07-10 09:23:28 dockes Exp $ (C) 2004 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -518,6 +518,14 @@ Db::~Db() i_close(true); } + list Db::getStemmerNames() +{ + list res; + stringToStrings(Xapian::Stem::get_available_languages(), res); + return res; +} + + bool Db::open(const string& dir, const string &stops, OpenMode mode, int qops) { bool keep_updated = (qops & QO_KEEP_UPDATED) != 0; diff --git a/src/rcldb/rcldb.h b/src/rcldb/rcldb.h index 9d3412d4..2da51564 100644 --- a/src/rcldb/rcldb.h +++ b/src/rcldb/rcldb.h @@ -16,7 +16,7 @@ */ #ifndef _DB_H_INCLUDED_ #define _DB_H_INCLUDED_ -/* @(#$Id: rcldb.h,v 1.53 2007-06-21 11:56:28 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: rcldb.h,v 1.54 2007-07-10 09:23:28 dockes Exp $ (C) 2004 J.F.Dockes */ #include #include @@ -97,6 +97,9 @@ class Db { /** Field name to prefix translation (ie: author -> 'A') */ bool fieldToPrefix(const string& fldname, string &pfx); + /** List possible stemmer names */ + static list getStemmerNames(); + /* Update-related methods ******************************************/ /** Add document. The Doc class should have been filled as much as