diff --git a/packaging/debian/buildppa.sh b/packaging/debian/buildppa.sh
index d741ca4a..d4b9f820 100644
--- a/packaging/debian/buildppa.sh
+++ b/packaging/debian/buildppa.sh
@@ -6,7 +6,7 @@
PPA_KEYID=7808CE96D38B9201
-RCLVERS=1.27.5
+RCLVERS=1.27.6
SCOPEVERS=1.20.2.4
GSSPVERS=1.0.0
PPAVERS=1
@@ -49,7 +49,7 @@ debdir=debian
# 16.04LTS xenial 2021-04
# 18.04LTS bionic 2023-04
# 20.04LTS focal 2025-04
-series="xenial bionic focal"
+series="xenial bionic focal groovy"
series=
if test "X$series" != X ; then
@@ -89,8 +89,8 @@ done
# 16.04LTS xenial 2021-04
# 18.04LTS bionic 2023-04
# 20.04LTS focal 2025-04
-series="xenial bionic focal"
-#series=
+series="xenial bionic focal groovy"
+# series=
debdir=debiankio
topdir=kio-recoll-${RCLVERS}
@@ -133,7 +133,7 @@ done
# 16.04LTS xenial 2021-04
# 18.04LTS bionic 2023-04
# 20.04LTS focal 2025-04
-series="xenial bionic focal"
+series="xenial bionic focal groovy"
series=
debdir=debiangssp
diff --git a/packaging/debian/debian/changelog b/packaging/debian/debian/changelog
index 3306e08e..6f3dfb02 100644
--- a/packaging/debian/debian/changelog
+++ b/packaging/debian/debian/changelog
@@ -1,3 +1,15 @@
+recoll (1.27.6-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
+
+ * Process PDF annotations if the poppler-glib Python bindings are available
+ * Fix build error with some compiler versions
+ * Fix inconsistent lock file name issue which could result in background
+ indexer exit.
+ * recollq: new -p option to show snippets lists instead of abstracts with
+ -A
+ * Fix nonumbers option.
+
+ -- Jean-Francois Dockes Sun, 13 Sep 2020 13:29:00 +0200
+
recoll (1.27.5-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
* Misc small fixes.
diff --git a/packaging/debian/debiankio/changelog b/packaging/debian/debiankio/changelog
index f4171d0a..67d94bac 100644
--- a/packaging/debian/debiankio/changelog
+++ b/packaging/debian/debiankio/changelog
@@ -1,3 +1,15 @@
+kio-recoll (1.27.6-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
+
+ * Process PDF annotations if the poppler-glib Python bindings are available
+ * Fix build error with some compiler versions
+ * Fix inconsistent lock file name issue which could result in background
+ indexer exit.
+ * recollq: new -p option to show snippets lists instead of abstracts with
+ -A
+ * Fix nonumbers option.
+
+ -- Jean-Francois Dockes Sun, 13 Sep 2020 13:29:00 +0200
+
kio-recoll (1.27.5-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
* Misc small fixes.
diff --git a/src/VERSION b/src/VERSION
index 2a5aed46..127aeda7 100644
--- a/src/VERSION
+++ b/src/VERSION
@@ -1 +1 @@
-1.27.6
+1.27.7
diff --git a/src/common/textsplit.cpp b/src/common/textsplit.cpp
index fd26f894..aa725910 100644
--- a/src/common/textsplit.cpp
+++ b/src/common/textsplit.cpp
@@ -84,6 +84,9 @@ int TextSplit::o_maxWordLength{40};
static const int o_CJKMaxNgramLen{5};
bool o_exthangultagger{false};
+// This is changed to 0 if _ is processed as a letter
+static char underscoreatend = '_';
+
void TextSplit::staticConfInit(RclConfig *config)
{
config->getConfParam("maxtermlength", &o_maxWordLength);
@@ -118,6 +121,14 @@ void TextSplit::staticConfInit(RclConfig *config)
}
}
+ bvalue = false;
+ if (config->getConfParam("underscoreasletter", &bvalue)) {
+ if (bvalue) {
+ charclasses[int('_')] = A_LLETTER;
+ underscoreatend = 0;
+ }
+ }
+
string kotagger;
config->getConfParam("hangultagger", kotagger);
if (!kotagger.empty()) {
@@ -550,26 +561,26 @@ inline bool TextSplit::doemit(bool spanerase, size_t _bp)
// Maybe trim at end. These are chars that we might keep
// inside a span, but not at the end.
- while (m_span.length() > 0) {
- switch (*(m_span.rbegin())) {
- case '.':
- case '-':
- case ',':
- case '@':
- case '_':
- case '\'':
- m_span.resize(m_span.length()-1);
+ string::size_type trimsz{0};
+ while (trimsz < m_span.length()) {
+ auto c = m_span[m_span.length() - 1 - trimsz];
+ if (c == '.' || c == '-' || c == ',' || c == '@' || c == '\'' ||
+ c == underscoreatend) {
+ trimsz++;
if (m_words_in_span.size() &&
- m_words_in_span.back().second > int(m_span.size()))
+ m_words_in_span.back().second > int(m_span.size())) {
m_words_in_span.back().second = int(m_span.size());
- if (--bp < 0)
+ }
+ if (--bp < 0) {
bp = 0;
+ }
+ } else {
break;
- default:
- goto breaktrimloop;
}
}
-breaktrimloop:
+ if (trimsz > 0) {
+ m_span.resize(m_span.length() - trimsz);
+ }
if (!words_from_span(bp)) {
return false;
@@ -855,7 +866,7 @@ bool TextSplit::text_to_words(const string &in)
goto SPACE;
case '@':
- case '_':
+ case '_': // If underscoreasletter is set, we'll never get this
case '\'':
// If in word, potential span: o'brien, jf@dockes.org,
// else just ignore
diff --git a/src/doc/user/recoll.conf.xml b/src/doc/user/recoll.conf.xml
index 5ecce81f..0505ace9 100644
--- a/src/doc/user/recoll.conf.xml
+++ b/src/doc/user/recoll.conf.xml
@@ -253,8 +253,12 @@ in version 1.22, and on by default. Setting the variable to off allows
restoring the previous behaviour.
backslashasletter
-Process backslash as normal letter This may make sense for people wanting to index TeX commands as
+Process backslash as normal letter. This may make sense for people wanting to index TeX commands as
such but is not of much general use.
+
+underscoreasletter
+Process underscore as normal letter. This makes sense in so many cases that one wonders if it should
+not be the default.
maxtermlength
Maximum term length. Words longer than this will be discarded.
@@ -758,7 +762,7 @@ script. Example values: eng, fra... See the tesseract documentation.tesseractcmd
Path for the tesseract command. This is mostly useful on Windows, or for specifying a non-default
tesseract command. e.g. on Windows:
-C:/Program Files (x86)/Tesseract-OCR/tesseract.exe
+C:/ProgramFiles(x86)/Tesseract-OCR/tesseract.exe
abbyylang
Language to assume for abbyy OCR. Important for improving the OCR accuracy. This can also be set
diff --git a/src/doc/user/usermanual.html b/src/doc/user/usermanual.html
index 72060c70..d4dc0532 100644
--- a/src/doc/user/usermanual.html
+++ b/src/doc/user/usermanual.html
@@ -8933,12 +8933,21 @@ for i in range(nres):
id=
"RCL.INSTALL.CONFIG.RECOLLCONF.BACKSLASHASLETTER">backslashasletter
- Process backslash as normal letter This may
+
Process backslash as normal letter. This may
make sense for people wanting to index TeX
commands as such but is not of much general
use.
underscoreasletter
+
+ Process underscore as normal letter. This
+ makes sense in so many cases that one wonders if
+ it should not be the default.
+
+ maxtermlength
@@ -9865,7 +9874,7 @@ for i in range(nres):
Path for the tesseract command. This is mostly
useful on Windows, or for specifying a
non-default tesseract command. e.g. on Windows:
- C:/Program Files (x86)/Tesseract-OCR/tesseract.exe
+ C:/ProgramFiles(x86)/Tesseract-OCR/tesseract.exe
-# Process backslash as normal letter
+# Process backslash as normal letter.
# This may make sense for people wanting to index TeX commands as
# such but is not of much general use.
#
#backslashasletter = 0
+#
+# Process underscore as normal letter.
+# This makes sense in so many cases that one wonders if it should
+# not be the default.
+#
+#underscoreasletter = 0
+
#
# Maximum term length.
# Words longer than this will be discarded.
diff --git a/src/testmains/trtextsplit.cpp b/src/testmains/trtextsplit.cpp
index 730d2011..ab8056e3 100644
--- a/src/testmains/trtextsplit.cpp
+++ b/src/testmains/trtextsplit.cpp
@@ -265,6 +265,7 @@ int main(int argc, char **argv)
if (!kotagger.empty()) {
fprintf(fp, "hangultagger = %s\n", kotagger.c_str());
}
+ fprintf(fp, "underscoreasletter = 0\n");
fclose(fp);
Logger::getTheLog("")->setLogLevel(Logger::LogLevel(loglevel));