fixed small glitch in abstract text splitting

This commit is contained in:
dockes 2006-04-25 08:17:36 +00:00
parent 20b7feb1a6
commit 4928503f60
2 changed files with 15 additions and 5 deletions

View File

@ -1,5 +1,5 @@
#ifndef lint #ifndef lint
static char rcsid[] = "@(#$Id: textsplit.cpp,v 1.21 2006-04-11 06:49:45 dockes Exp $ (C) 2004 J.F.Dockes"; static char rcsid[] = "@(#$Id: textsplit.cpp,v 1.22 2006-04-25 08:17:36 dockes Exp $ (C) 2004 J.F.Dockes";
#endif #endif
/* /*
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
@ -203,14 +203,16 @@ static inline int whatcc(unsigned int c)
*/ */
bool TextSplit::text_to_words(const string &in) bool TextSplit::text_to_words(const string &in)
{ {
LOGDEB2(("TextSplit::text_to_words: cb %p\n", cb)); LOGDEB2(("TextSplit::text_to_words: cb %p in [%s]\n", cb,
in.substr(0,50).c_str()));
setcharclasses(); setcharclasses();
span.erase(); span.erase();
word.erase(); // Current word: no punctuation at all in there word.erase(); // Current word: no punctuation at all in there
number = false; number = false;
wordpos = spanpos = charpos = 0; prevpos = wordpos = spanpos = charpos = 0;
prevterm.erase();
Utf8Iter it(in); Utf8Iter it(in);

View File

@ -1,5 +1,5 @@
#ifndef lint #ifndef lint
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.70 2006-04-22 06:27:37 dockes Exp $ (C) 2004 J.F.Dockes"; static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.71 2006-04-25 08:17:36 dockes Exp $ (C) 2004 J.F.Dockes";
#endif #endif
/* /*
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
@ -437,7 +437,9 @@ bool Db::add(const string &fn, const Doc &idoc,
// Truncate abstract, title and keywords to reasonable lengths. If // Truncate abstract, title and keywords to reasonable lengths. If
// abstract is currently empty, we make up one with the beginning // abstract is currently empty, we make up one with the beginning
// of the document. // of the document.
bool syntabs = false;
if (doc.abstract.empty()) { if (doc.abstract.empty()) {
syntabs = true;
doc.abstract = rclSyntAbs + doc.abstract = rclSyntAbs +
truncate_to_word(doc.text, INDEX_ABSTRACT_SIZE); truncate_to_word(doc.text, INDEX_ABSTRACT_SIZE);
} else { } else {
@ -457,12 +459,14 @@ bool Db::add(const string &fn, const Doc &idoc,
string noacc; string noacc;
// Split and index file name as document term(s) // Split and index file name as document term(s)
LOGDEB2(("Db::add: split file name [%s]\n", fn.c_str()));
if (dumb_string(doc.utf8fn, noacc)) { if (dumb_string(doc.utf8fn, noacc)) {
splitter.text_to_words(noacc); splitter.text_to_words(noacc);
splitData.basepos += splitData.curpos + 100; splitData.basepos += splitData.curpos + 100;
} }
// Split and index title // Split and index title
LOGDEB2(("Db::add: split title [%s]\n", doc.title.c_str()));
if (!dumb_string(doc.title, noacc)) { if (!dumb_string(doc.title, noacc)) {
LOGERR(("Db::add: dumb_string failed\n")); LOGERR(("Db::add: dumb_string failed\n"));
return false; return false;
@ -471,6 +475,7 @@ bool Db::add(const string &fn, const Doc &idoc,
splitData.basepos += splitData.curpos + 100; splitData.basepos += splitData.curpos + 100;
// Split and index body // Split and index body
LOGDEB2(("Db::add: split body\n"));
if (!dumb_string(doc.text, noacc)) { if (!dumb_string(doc.text, noacc)) {
LOGERR(("Db::add: dumb_string failed\n")); LOGERR(("Db::add: dumb_string failed\n"));
return false; return false;
@ -479,6 +484,7 @@ bool Db::add(const string &fn, const Doc &idoc,
splitData.basepos += splitData.curpos + 100; splitData.basepos += splitData.curpos + 100;
// Split and index keywords // Split and index keywords
LOGDEB2(("Db::add: split kw [%s]\n", doc.keywords.c_str()));
if (!dumb_string(doc.keywords, noacc)) { if (!dumb_string(doc.keywords, noacc)) {
LOGERR(("Db::add: dumb_string failed\n")); LOGERR(("Db::add: dumb_string failed\n"));
return false; return false;
@ -487,7 +493,9 @@ bool Db::add(const string &fn, const Doc &idoc,
splitData.basepos += splitData.curpos + 100; splitData.basepos += splitData.curpos + 100;
// Split and index abstract // Split and index abstract
if (!dumb_string(doc.abstract, noacc)) { LOGDEB2(("Db::add: split abstract [%s]\n", doc.abstract.c_str()));
if (!dumb_string(syntabs ? doc.abstract.substr(rclSyntAbs.length()) :
doc.abstract, noacc)) {
LOGERR(("Db::add: dumb_string failed\n")); LOGERR(("Db::add: dumb_string failed\n"));
return false; return false;
} }