From aff98f7fc9cbc6321fa899c9e1a18d105d1e97b0 Mon Sep 17 00:00:00 2001 From: dockes Date: Tue, 5 Dec 2006 15:17:59 +0000 Subject: [PATCH] expose abstract synthesis to let users decide when they want it done --- src/rcldb/rcldb.cpp | 43 +++++++++++++++++++++---------------------- src/rcldb/rcldb.h | 11 ++++++++--- 2 files changed, 29 insertions(+), 25 deletions(-) diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index 057eb71e..1aa85ce4 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.97 2006-11-20 15:28:57 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.98 2006-12-05 15:17:59 dockes Exp $ (C) 2004 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -107,10 +107,7 @@ class Native { string makeAbstract(Xapian::docid id, const list& terms); - bool dbDataToRclDoc(std::string &data, Doc &doc, - int qopts, - Xapian::docid docid, - const list& terms); + bool dbDataToRclDoc(Xapian::docid docid, std::string &data, Doc &doc); /** Compute list of subdocuments for a given path (given by hash) * We look for all Q terms beginning with the path/hash @@ -177,11 +174,10 @@ bool Native::subDocs(const string &hash, vector& docids) return false; } -bool Native::dbDataToRclDoc(std::string &data, Doc &doc, - int qopts, - Xapian::docid docid, const list& terms) +// Turn data record from db into document fields +bool Native::dbDataToRclDoc(Xapian::docid docid, std::string &data, Doc &doc) { - LOGDEB1(("Db::dbDataToRclDoc: opts %x data: %s\n", qopts, data.c_str())); + LOGDEB1(("Db::dbDataToRclDoc: data: %s\n", data.c_str())); ConfSimple parms(&data); if (!parms.ok()) return false; @@ -195,18 +191,11 @@ bool Native::dbDataToRclDoc(std::string &data, Doc &doc, parms.get(string("abstract"), doc.abstract); // Possibly remove synthetic abstract indicator (if it's there, we // used to index the beginning of the text as abstract). - bool syntabs = false; + doc.syntabs = false; if (doc.abstract.find(rclSyntAbs) == 0) { doc.abstract = doc.abstract.substr(rclSyntAbs.length()); - syntabs = true; + doc.syntabs = true; } - // If the option is set and the abstract is synthetic or empty , build - // abstract from position data. - if ((qopts & Db::QO_BUILD_ABSTRACT) && !terms.empty()) { - if (doc.abstract.empty() || syntabs || - (qopts & Db::QO_REPLACE_ABSTRACT)) - doc.abstract = makeAbstract(docid, terms); - } parms.get(string("ipath"), doc.ipath); parms.get(string("fbytes"), doc.fbytes); parms.get(string("dbytes"), doc.dbytes); @@ -1611,11 +1600,21 @@ bool Db::getDoc(int exti, Doc &doc, int *percent) // Parse xapian document's data and populate doc fields string data = xdoc.get_data(); - list terms; - getQueryTerms(terms); - return m_ndb->dbDataToRclDoc(data, doc, m_qOpts, docid, terms); + return m_ndb->dbDataToRclDoc(docid, data, doc); } +bool Db::makeDocAbstract(Doc &doc, string& abstract) +{ + LOGDEB1(("Db::makeDocAbstract: exti %d\n", exti)); + if (!m_ndb || !m_ndb->enquire) { + LOGERR(("Db::makeDocAbstract: no query opened\n")); + return false; + } + list terms; + getQueryTerms(terms); + abstract = m_ndb->makeAbstract(doc.xdocid, terms); + return true; +} // Retrieve document defined by file name and internal path. bool Db::getDoc(const string &fn, const string &ipath, Doc &doc, int *pc) @@ -1651,7 +1650,7 @@ bool Db::getDoc(const string &fn, const string &ipath, Doc &doc, int *pc) Xapian::Document xdoc = m_ndb->db.get_document(*docid); string data = xdoc.get_data(); list terms; - return m_ndb->dbDataToRclDoc(data, doc, QO_NONE, *docid, terms); + return m_ndb->dbDataToRclDoc(*docid, data, doc); } catch (const Xapian::Error &e) { ermsg = e.get_msg().c_str(); } catch (const string &s) { diff --git a/src/rcldb/rcldb.h b/src/rcldb/rcldb.h index b8f32d31..26fe7451 100644 --- a/src/rcldb/rcldb.h +++ b/src/rcldb/rcldb.h @@ -16,7 +16,7 @@ */ #ifndef _DB_H_INCLUDED_ #define _DB_H_INCLUDED_ -/* @(#$Id: rcldb.h,v 1.42 2006-11-14 13:55:43 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: rcldb.h,v 1.43 2006-12-05 15:17:59 dockes Exp $ (C) 2004 J.F.Dockes */ #include #include @@ -74,6 +74,8 @@ class Doc { string title; // Possibly set by handler string keywords; // Possibly set by handler string abstract; // Possibly set by handler + bool syntabs; // true if abstract is just the top of doc, not an + // explicit document attribute string fbytes; // File size. Set by Db::Add string dbytes; // Doc size. Set by Db::Add from text length @@ -96,6 +98,7 @@ class Doc { title.erase(); keywords.erase(); abstract.erase(); + syntabs = false; fbytes.erase(); dbytes.erase(); @@ -119,8 +122,7 @@ class Db { enum OpenMode {DbRO, DbUpd, DbTrunc}; // KEEP_UPDATED is internal use by reOpen() only - enum QueryOpts {QO_NONE=0, QO_STEM = 1, QO_BUILD_ABSTRACT = 2, - QO_REPLACE_ABSTRACT = 4, QO_KEEP_UPDATED = 8}; + enum QueryOpts {QO_NONE=0, QO_STEM = 1, QO_KEEP_UPDATED = 8}; bool open(const string &dbdir, OpenMode mode, int qops = QO_NONE); bool close(); @@ -184,6 +186,9 @@ class Db { */ bool getDoc(int i, Doc &doc, int *percent = 0); + /* Build synthetic abstract out of query terms and term position data */ + bool makeDocAbstract(Doc &doc, string& abstract); + /** Get document for given filename and ipath */ bool getDoc(const string &fn, const string &ipath, Doc &doc, int *percent);