diff --git a/src/common/textsplit.cpp b/src/common/textsplit.cpp
index fd26f894..04af5396 100644
--- a/src/common/textsplit.cpp
+++ b/src/common/textsplit.cpp
@@ -118,6 +118,13 @@ void TextSplit::staticConfInit(RclConfig *config)
}
}
+ bvalue = false;
+ if (config->getConfParam("underscoreasletter", &bvalue)) {
+ if (bvalue) {
+ charclasses[int('_')] = A_LLETTER;
+ }
+ }
+
string kotagger;
config->getConfParam("hangultagger", kotagger);
if (!kotagger.empty()) {
@@ -855,7 +862,7 @@ bool TextSplit::text_to_words(const string &in)
goto SPACE;
case '@':
- case '_':
+ case '_': // If underscoreasletter is set, we'll never get this
case '\'':
// If in word, potential span: o'brien, jf@dockes.org,
// else just ignore
diff --git a/src/doc/user/recoll.conf.xml b/src/doc/user/recoll.conf.xml
index 5ecce81f..0505ace9 100644
--- a/src/doc/user/recoll.conf.xml
+++ b/src/doc/user/recoll.conf.xml
@@ -253,8 +253,12 @@ in version 1.22, and on by default. Setting the variable to off allows
restoring the previous behaviour.
Process backslash as normal letter This may
+ Process backslash as normal letter. This may
make sense for people wanting to index TeX
commands as such but is not of much general
use. Process underscore as normal letter. This
+ makes sense in so many cases that one wonders if
+ it should not be the default. Path for the tesseract command. This is mostly
useful on Windows, or for specifying a
non-default tesseract command. e.g. on Windows:
- C:/Program Files (x86)/Tesseract-OCR/tesseract.exebackslashasletter
underscoreaslettermaxtermlength