expose abstract synthesis to let users decide when they want it done

This commit is contained in:
dockes 2006-12-05 15:17:59 +00:00
parent 2964dbea7f
commit aff98f7fc9
2 changed files with 29 additions and 25 deletions

View File

@ -1,5 +1,5 @@
#ifndef lint #ifndef lint
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.97 2006-11-20 15:28:57 dockes Exp $ (C) 2004 J.F.Dockes"; static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.98 2006-12-05 15:17:59 dockes Exp $ (C) 2004 J.F.Dockes";
#endif #endif
/* /*
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
@ -107,10 +107,7 @@ class Native {
string makeAbstract(Xapian::docid id, const list<string>& terms); string makeAbstract(Xapian::docid id, const list<string>& terms);
bool dbDataToRclDoc(std::string &data, Doc &doc, bool dbDataToRclDoc(Xapian::docid docid, std::string &data, Doc &doc);
int qopts,
Xapian::docid docid,
const list<string>& terms);
/** Compute list of subdocuments for a given path (given by hash) /** Compute list of subdocuments for a given path (given by hash)
* We look for all Q terms beginning with the path/hash * We look for all Q terms beginning with the path/hash
@ -177,11 +174,10 @@ bool Native::subDocs(const string &hash, vector<Xapian::docid>& docids)
return false; return false;
} }
bool Native::dbDataToRclDoc(std::string &data, Doc &doc, // Turn data record from db into document fields
int qopts, bool Native::dbDataToRclDoc(Xapian::docid docid, std::string &data, Doc &doc)
Xapian::docid docid, const list<string>& terms)
{ {
LOGDEB1(("Db::dbDataToRclDoc: opts %x data: %s\n", qopts, data.c_str())); LOGDEB1(("Db::dbDataToRclDoc: data: %s\n", data.c_str()));
ConfSimple parms(&data); ConfSimple parms(&data);
if (!parms.ok()) if (!parms.ok())
return false; return false;
@ -195,17 +191,10 @@ bool Native::dbDataToRclDoc(std::string &data, Doc &doc,
parms.get(string("abstract"), doc.abstract); parms.get(string("abstract"), doc.abstract);
// Possibly remove synthetic abstract indicator (if it's there, we // Possibly remove synthetic abstract indicator (if it's there, we
// used to index the beginning of the text as abstract). // used to index the beginning of the text as abstract).
bool syntabs = false; doc.syntabs = false;
if (doc.abstract.find(rclSyntAbs) == 0) { if (doc.abstract.find(rclSyntAbs) == 0) {
doc.abstract = doc.abstract.substr(rclSyntAbs.length()); doc.abstract = doc.abstract.substr(rclSyntAbs.length());
syntabs = true; doc.syntabs = true;
}
// If the option is set and the abstract is synthetic or empty , build
// abstract from position data.
if ((qopts & Db::QO_BUILD_ABSTRACT) && !terms.empty()) {
if (doc.abstract.empty() || syntabs ||
(qopts & Db::QO_REPLACE_ABSTRACT))
doc.abstract = makeAbstract(docid, terms);
} }
parms.get(string("ipath"), doc.ipath); parms.get(string("ipath"), doc.ipath);
parms.get(string("fbytes"), doc.fbytes); parms.get(string("fbytes"), doc.fbytes);
@ -1611,11 +1600,21 @@ bool Db::getDoc(int exti, Doc &doc, int *percent)
// Parse xapian document's data and populate doc fields // Parse xapian document's data and populate doc fields
string data = xdoc.get_data(); string data = xdoc.get_data();
list<string> terms; return m_ndb->dbDataToRclDoc(docid, data, doc);
getQueryTerms(terms);
return m_ndb->dbDataToRclDoc(data, doc, m_qOpts, docid, terms);
} }
bool Db::makeDocAbstract(Doc &doc, string& abstract)
{
LOGDEB1(("Db::makeDocAbstract: exti %d\n", exti));
if (!m_ndb || !m_ndb->enquire) {
LOGERR(("Db::makeDocAbstract: no query opened\n"));
return false;
}
list<string> terms;
getQueryTerms(terms);
abstract = m_ndb->makeAbstract(doc.xdocid, terms);
return true;
}
// Retrieve document defined by file name and internal path. // Retrieve document defined by file name and internal path.
bool Db::getDoc(const string &fn, const string &ipath, Doc &doc, int *pc) bool Db::getDoc(const string &fn, const string &ipath, Doc &doc, int *pc)
@ -1651,7 +1650,7 @@ bool Db::getDoc(const string &fn, const string &ipath, Doc &doc, int *pc)
Xapian::Document xdoc = m_ndb->db.get_document(*docid); Xapian::Document xdoc = m_ndb->db.get_document(*docid);
string data = xdoc.get_data(); string data = xdoc.get_data();
list<string> terms; list<string> terms;
return m_ndb->dbDataToRclDoc(data, doc, QO_NONE, *docid, terms); return m_ndb->dbDataToRclDoc(*docid, data, doc);
} catch (const Xapian::Error &e) { } catch (const Xapian::Error &e) {
ermsg = e.get_msg().c_str(); ermsg = e.get_msg().c_str();
} catch (const string &s) { } catch (const string &s) {

View File

@ -16,7 +16,7 @@
*/ */
#ifndef _DB_H_INCLUDED_ #ifndef _DB_H_INCLUDED_
#define _DB_H_INCLUDED_ #define _DB_H_INCLUDED_
/* @(#$Id: rcldb.h,v 1.42 2006-11-14 13:55:43 dockes Exp $ (C) 2004 J.F.Dockes */ /* @(#$Id: rcldb.h,v 1.43 2006-12-05 15:17:59 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string> #include <string>
#include <list> #include <list>
@ -74,6 +74,8 @@ class Doc {
string title; // Possibly set by handler string title; // Possibly set by handler
string keywords; // Possibly set by handler string keywords; // Possibly set by handler
string abstract; // Possibly set by handler string abstract; // Possibly set by handler
bool syntabs; // true if abstract is just the top of doc, not an
// explicit document attribute
string fbytes; // File size. Set by Db::Add string fbytes; // File size. Set by Db::Add
string dbytes; // Doc size. Set by Db::Add from text length string dbytes; // Doc size. Set by Db::Add from text length
@ -96,6 +98,7 @@ class Doc {
title.erase(); title.erase();
keywords.erase(); keywords.erase();
abstract.erase(); abstract.erase();
syntabs = false;
fbytes.erase(); fbytes.erase();
dbytes.erase(); dbytes.erase();
@ -119,8 +122,7 @@ class Db {
enum OpenMode {DbRO, DbUpd, DbTrunc}; enum OpenMode {DbRO, DbUpd, DbTrunc};
// KEEP_UPDATED is internal use by reOpen() only // KEEP_UPDATED is internal use by reOpen() only
enum QueryOpts {QO_NONE=0, QO_STEM = 1, QO_BUILD_ABSTRACT = 2, enum QueryOpts {QO_NONE=0, QO_STEM = 1, QO_KEEP_UPDATED = 8};
QO_REPLACE_ABSTRACT = 4, QO_KEEP_UPDATED = 8};
bool open(const string &dbdir, OpenMode mode, int qops = QO_NONE); bool open(const string &dbdir, OpenMode mode, int qops = QO_NONE);
bool close(); bool close();
@ -184,6 +186,9 @@ class Db {
*/ */
bool getDoc(int i, Doc &doc, int *percent = 0); bool getDoc(int i, Doc &doc, int *percent = 0);
/* Build synthetic abstract out of query terms and term position data */
bool makeDocAbstract(Doc &doc, string& abstract);
/** Get document for given filename and ipath */ /** Get document for given filename and ipath */
bool getDoc(const string &fn, const string &ipath, Doc &doc, int *percent); bool getDoc(const string &fn, const string &ipath, Doc &doc, int *percent);