expose abstract synthesis to let users decide when they want it done
This commit is contained in:
parent
2964dbea7f
commit
aff98f7fc9
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.97 2006-11-20 15:28:57 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.98 2006-12-05 15:17:59 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -107,10 +107,7 @@ class Native {
|
||||
|
||||
string makeAbstract(Xapian::docid id, const list<string>& terms);
|
||||
|
||||
bool dbDataToRclDoc(std::string &data, Doc &doc,
|
||||
int qopts,
|
||||
Xapian::docid docid,
|
||||
const list<string>& terms);
|
||||
bool dbDataToRclDoc(Xapian::docid docid, std::string &data, Doc &doc);
|
||||
|
||||
/** Compute list of subdocuments for a given path (given by hash)
|
||||
* We look for all Q terms beginning with the path/hash
|
||||
@ -177,11 +174,10 @@ bool Native::subDocs(const string &hash, vector<Xapian::docid>& docids)
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Native::dbDataToRclDoc(std::string &data, Doc &doc,
|
||||
int qopts,
|
||||
Xapian::docid docid, const list<string>& terms)
|
||||
// Turn data record from db into document fields
|
||||
bool Native::dbDataToRclDoc(Xapian::docid docid, std::string &data, Doc &doc)
|
||||
{
|
||||
LOGDEB1(("Db::dbDataToRclDoc: opts %x data: %s\n", qopts, data.c_str()));
|
||||
LOGDEB1(("Db::dbDataToRclDoc: data: %s\n", data.c_str()));
|
||||
ConfSimple parms(&data);
|
||||
if (!parms.ok())
|
||||
return false;
|
||||
@ -195,18 +191,11 @@ bool Native::dbDataToRclDoc(std::string &data, Doc &doc,
|
||||
parms.get(string("abstract"), doc.abstract);
|
||||
// Possibly remove synthetic abstract indicator (if it's there, we
|
||||
// used to index the beginning of the text as abstract).
|
||||
bool syntabs = false;
|
||||
doc.syntabs = false;
|
||||
if (doc.abstract.find(rclSyntAbs) == 0) {
|
||||
doc.abstract = doc.abstract.substr(rclSyntAbs.length());
|
||||
syntabs = true;
|
||||
doc.syntabs = true;
|
||||
}
|
||||
// If the option is set and the abstract is synthetic or empty , build
|
||||
// abstract from position data.
|
||||
if ((qopts & Db::QO_BUILD_ABSTRACT) && !terms.empty()) {
|
||||
if (doc.abstract.empty() || syntabs ||
|
||||
(qopts & Db::QO_REPLACE_ABSTRACT))
|
||||
doc.abstract = makeAbstract(docid, terms);
|
||||
}
|
||||
parms.get(string("ipath"), doc.ipath);
|
||||
parms.get(string("fbytes"), doc.fbytes);
|
||||
parms.get(string("dbytes"), doc.dbytes);
|
||||
@ -1611,11 +1600,21 @@ bool Db::getDoc(int exti, Doc &doc, int *percent)
|
||||
|
||||
// Parse xapian document's data and populate doc fields
|
||||
string data = xdoc.get_data();
|
||||
list<string> terms;
|
||||
getQueryTerms(terms);
|
||||
return m_ndb->dbDataToRclDoc(data, doc, m_qOpts, docid, terms);
|
||||
return m_ndb->dbDataToRclDoc(docid, data, doc);
|
||||
}
|
||||
|
||||
bool Db::makeDocAbstract(Doc &doc, string& abstract)
|
||||
{
|
||||
LOGDEB1(("Db::makeDocAbstract: exti %d\n", exti));
|
||||
if (!m_ndb || !m_ndb->enquire) {
|
||||
LOGERR(("Db::makeDocAbstract: no query opened\n"));
|
||||
return false;
|
||||
}
|
||||
list<string> terms;
|
||||
getQueryTerms(terms);
|
||||
abstract = m_ndb->makeAbstract(doc.xdocid, terms);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Retrieve document defined by file name and internal path.
|
||||
bool Db::getDoc(const string &fn, const string &ipath, Doc &doc, int *pc)
|
||||
@ -1651,7 +1650,7 @@ bool Db::getDoc(const string &fn, const string &ipath, Doc &doc, int *pc)
|
||||
Xapian::Document xdoc = m_ndb->db.get_document(*docid);
|
||||
string data = xdoc.get_data();
|
||||
list<string> terms;
|
||||
return m_ndb->dbDataToRclDoc(data, doc, QO_NONE, *docid, terms);
|
||||
return m_ndb->dbDataToRclDoc(*docid, data, doc);
|
||||
} catch (const Xapian::Error &e) {
|
||||
ermsg = e.get_msg().c_str();
|
||||
} catch (const string &s) {
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
*/
|
||||
#ifndef _DB_H_INCLUDED_
|
||||
#define _DB_H_INCLUDED_
|
||||
/* @(#$Id: rcldb.h,v 1.42 2006-11-14 13:55:43 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: rcldb.h,v 1.43 2006-12-05 15:17:59 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include <string>
|
||||
#include <list>
|
||||
@ -74,6 +74,8 @@ class Doc {
|
||||
string title; // Possibly set by handler
|
||||
string keywords; // Possibly set by handler
|
||||
string abstract; // Possibly set by handler
|
||||
bool syntabs; // true if abstract is just the top of doc, not an
|
||||
// explicit document attribute
|
||||
string fbytes; // File size. Set by Db::Add
|
||||
string dbytes; // Doc size. Set by Db::Add from text length
|
||||
|
||||
@ -96,6 +98,7 @@ class Doc {
|
||||
title.erase();
|
||||
keywords.erase();
|
||||
abstract.erase();
|
||||
syntabs = false;
|
||||
fbytes.erase();
|
||||
dbytes.erase();
|
||||
|
||||
@ -119,8 +122,7 @@ class Db {
|
||||
|
||||
enum OpenMode {DbRO, DbUpd, DbTrunc};
|
||||
// KEEP_UPDATED is internal use by reOpen() only
|
||||
enum QueryOpts {QO_NONE=0, QO_STEM = 1, QO_BUILD_ABSTRACT = 2,
|
||||
QO_REPLACE_ABSTRACT = 4, QO_KEEP_UPDATED = 8};
|
||||
enum QueryOpts {QO_NONE=0, QO_STEM = 1, QO_KEEP_UPDATED = 8};
|
||||
|
||||
bool open(const string &dbdir, OpenMode mode, int qops = QO_NONE);
|
||||
bool close();
|
||||
@ -184,6 +186,9 @@ class Db {
|
||||
*/
|
||||
bool getDoc(int i, Doc &doc, int *percent = 0);
|
||||
|
||||
/* Build synthetic abstract out of query terms and term position data */
|
||||
bool makeDocAbstract(Doc &doc, string& abstract);
|
||||
|
||||
/** Get document for given filename and ipath */
|
||||
bool getDoc(const string &fn, const string &ipath, Doc &doc, int *percent);
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user