From ce35b3a1b21f21386a34fe3e31384cd45887e195 Mon Sep 17 00:00:00 2001 From: dockes Date: Mon, 5 Dec 2005 10:39:20 +0000 Subject: [PATCH] also index file path as terms --- src/rcldb/rcldb.cpp | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index a844b2b7..2e28f35b 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.42 2005-11-30 09:46:25 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.43 2005-12-05 10:39:20 dockes Exp $ (C) 2004 J.F.Dockes"; #endif #include #include @@ -305,39 +305,48 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc) TextSplit splitter(&splitData); - ///////// Split and index terms in document body and auxiliary fields - - // Split title and index terms + // /////// Split and index terms in document body and auxiliary fields string noacc; + + // Split and index file name. This supposes that it's either ascii + // or utf-8. If this fails, we just go on. We need a config + // parameter for file name charset + if (dumb_string(fn, noacc)) { + splitter.text_to_words(noacc); + splitData.basepos += splitData.curpos + 100; + } + + // Split and index title if (!dumb_string(doc.title, noacc)) { LOGERR(("Rcl::Db::add: unac failed\n")); return false; } splitter.text_to_words(noacc); + splitData.basepos += splitData.curpos + 100; // Split body and index terms - splitData.basepos += splitData.curpos + 100; if (!dumb_string(doc.text, noacc)) { LOGERR(("Rcl::Db::add: dumb_string failed\n")); return false; } splitter.text_to_words(noacc); + splitData.basepos += splitData.curpos + 100; // Split keywords and index terms - splitData.basepos += splitData.curpos + 100; if (!dumb_string(doc.keywords, noacc)) { LOGERR(("Rcl::Db::add: dumb_string failed\n")); return false; } splitter.text_to_words(noacc); + splitData.basepos += splitData.curpos + 100; // Split abstract and index terms - splitData.basepos += splitData.curpos + 100; if (!dumb_string(doc.abstract, noacc)) { LOGERR(("Rcl::Db::add: dumb_string failed\n")); return false; } splitter.text_to_words(noacc); + splitData.basepos += splitData.curpos + 100; ////// Special terms for metadata // Mime type