expose abstract synthesis to let users decide when they want it done
This commit is contained in:
parent
2964dbea7f
commit
aff98f7fc9
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.97 2006-11-20 15:28:57 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.98 2006-12-05 15:17:59 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -107,10 +107,7 @@ class Native {
|
|||||||
|
|
||||||
string makeAbstract(Xapian::docid id, const list<string>& terms);
|
string makeAbstract(Xapian::docid id, const list<string>& terms);
|
||||||
|
|
||||||
bool dbDataToRclDoc(std::string &data, Doc &doc,
|
bool dbDataToRclDoc(Xapian::docid docid, std::string &data, Doc &doc);
|
||||||
int qopts,
|
|
||||||
Xapian::docid docid,
|
|
||||||
const list<string>& terms);
|
|
||||||
|
|
||||||
/** Compute list of subdocuments for a given path (given by hash)
|
/** Compute list of subdocuments for a given path (given by hash)
|
||||||
* We look for all Q terms beginning with the path/hash
|
* We look for all Q terms beginning with the path/hash
|
||||||
@ -177,11 +174,10 @@ bool Native::subDocs(const string &hash, vector<Xapian::docid>& docids)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Native::dbDataToRclDoc(std::string &data, Doc &doc,
|
// Turn data record from db into document fields
|
||||||
int qopts,
|
bool Native::dbDataToRclDoc(Xapian::docid docid, std::string &data, Doc &doc)
|
||||||
Xapian::docid docid, const list<string>& terms)
|
|
||||||
{
|
{
|
||||||
LOGDEB1(("Db::dbDataToRclDoc: opts %x data: %s\n", qopts, data.c_str()));
|
LOGDEB1(("Db::dbDataToRclDoc: data: %s\n", data.c_str()));
|
||||||
ConfSimple parms(&data);
|
ConfSimple parms(&data);
|
||||||
if (!parms.ok())
|
if (!parms.ok())
|
||||||
return false;
|
return false;
|
||||||
@ -195,17 +191,10 @@ bool Native::dbDataToRclDoc(std::string &data, Doc &doc,
|
|||||||
parms.get(string("abstract"), doc.abstract);
|
parms.get(string("abstract"), doc.abstract);
|
||||||
// Possibly remove synthetic abstract indicator (if it's there, we
|
// Possibly remove synthetic abstract indicator (if it's there, we
|
||||||
// used to index the beginning of the text as abstract).
|
// used to index the beginning of the text as abstract).
|
||||||
bool syntabs = false;
|
doc.syntabs = false;
|
||||||
if (doc.abstract.find(rclSyntAbs) == 0) {
|
if (doc.abstract.find(rclSyntAbs) == 0) {
|
||||||
doc.abstract = doc.abstract.substr(rclSyntAbs.length());
|
doc.abstract = doc.abstract.substr(rclSyntAbs.length());
|
||||||
syntabs = true;
|
doc.syntabs = true;
|
||||||
}
|
|
||||||
// If the option is set and the abstract is synthetic or empty , build
|
|
||||||
// abstract from position data.
|
|
||||||
if ((qopts & Db::QO_BUILD_ABSTRACT) && !terms.empty()) {
|
|
||||||
if (doc.abstract.empty() || syntabs ||
|
|
||||||
(qopts & Db::QO_REPLACE_ABSTRACT))
|
|
||||||
doc.abstract = makeAbstract(docid, terms);
|
|
||||||
}
|
}
|
||||||
parms.get(string("ipath"), doc.ipath);
|
parms.get(string("ipath"), doc.ipath);
|
||||||
parms.get(string("fbytes"), doc.fbytes);
|
parms.get(string("fbytes"), doc.fbytes);
|
||||||
@ -1611,11 +1600,21 @@ bool Db::getDoc(int exti, Doc &doc, int *percent)
|
|||||||
|
|
||||||
// Parse xapian document's data and populate doc fields
|
// Parse xapian document's data and populate doc fields
|
||||||
string data = xdoc.get_data();
|
string data = xdoc.get_data();
|
||||||
list<string> terms;
|
return m_ndb->dbDataToRclDoc(docid, data, doc);
|
||||||
getQueryTerms(terms);
|
|
||||||
return m_ndb->dbDataToRclDoc(data, doc, m_qOpts, docid, terms);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Db::makeDocAbstract(Doc &doc, string& abstract)
|
||||||
|
{
|
||||||
|
LOGDEB1(("Db::makeDocAbstract: exti %d\n", exti));
|
||||||
|
if (!m_ndb || !m_ndb->enquire) {
|
||||||
|
LOGERR(("Db::makeDocAbstract: no query opened\n"));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
list<string> terms;
|
||||||
|
getQueryTerms(terms);
|
||||||
|
abstract = m_ndb->makeAbstract(doc.xdocid, terms);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
// Retrieve document defined by file name and internal path.
|
// Retrieve document defined by file name and internal path.
|
||||||
bool Db::getDoc(const string &fn, const string &ipath, Doc &doc, int *pc)
|
bool Db::getDoc(const string &fn, const string &ipath, Doc &doc, int *pc)
|
||||||
@ -1651,7 +1650,7 @@ bool Db::getDoc(const string &fn, const string &ipath, Doc &doc, int *pc)
|
|||||||
Xapian::Document xdoc = m_ndb->db.get_document(*docid);
|
Xapian::Document xdoc = m_ndb->db.get_document(*docid);
|
||||||
string data = xdoc.get_data();
|
string data = xdoc.get_data();
|
||||||
list<string> terms;
|
list<string> terms;
|
||||||
return m_ndb->dbDataToRclDoc(data, doc, QO_NONE, *docid, terms);
|
return m_ndb->dbDataToRclDoc(*docid, data, doc);
|
||||||
} catch (const Xapian::Error &e) {
|
} catch (const Xapian::Error &e) {
|
||||||
ermsg = e.get_msg().c_str();
|
ermsg = e.get_msg().c_str();
|
||||||
} catch (const string &s) {
|
} catch (const string &s) {
|
||||||
|
|||||||
@ -16,7 +16,7 @@
|
|||||||
*/
|
*/
|
||||||
#ifndef _DB_H_INCLUDED_
|
#ifndef _DB_H_INCLUDED_
|
||||||
#define _DB_H_INCLUDED_
|
#define _DB_H_INCLUDED_
|
||||||
/* @(#$Id: rcldb.h,v 1.42 2006-11-14 13:55:43 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: rcldb.h,v 1.43 2006-12-05 15:17:59 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <list>
|
#include <list>
|
||||||
@ -74,6 +74,8 @@ class Doc {
|
|||||||
string title; // Possibly set by handler
|
string title; // Possibly set by handler
|
||||||
string keywords; // Possibly set by handler
|
string keywords; // Possibly set by handler
|
||||||
string abstract; // Possibly set by handler
|
string abstract; // Possibly set by handler
|
||||||
|
bool syntabs; // true if abstract is just the top of doc, not an
|
||||||
|
// explicit document attribute
|
||||||
string fbytes; // File size. Set by Db::Add
|
string fbytes; // File size. Set by Db::Add
|
||||||
string dbytes; // Doc size. Set by Db::Add from text length
|
string dbytes; // Doc size. Set by Db::Add from text length
|
||||||
|
|
||||||
@ -96,6 +98,7 @@ class Doc {
|
|||||||
title.erase();
|
title.erase();
|
||||||
keywords.erase();
|
keywords.erase();
|
||||||
abstract.erase();
|
abstract.erase();
|
||||||
|
syntabs = false;
|
||||||
fbytes.erase();
|
fbytes.erase();
|
||||||
dbytes.erase();
|
dbytes.erase();
|
||||||
|
|
||||||
@ -119,8 +122,7 @@ class Db {
|
|||||||
|
|
||||||
enum OpenMode {DbRO, DbUpd, DbTrunc};
|
enum OpenMode {DbRO, DbUpd, DbTrunc};
|
||||||
// KEEP_UPDATED is internal use by reOpen() only
|
// KEEP_UPDATED is internal use by reOpen() only
|
||||||
enum QueryOpts {QO_NONE=0, QO_STEM = 1, QO_BUILD_ABSTRACT = 2,
|
enum QueryOpts {QO_NONE=0, QO_STEM = 1, QO_KEEP_UPDATED = 8};
|
||||||
QO_REPLACE_ABSTRACT = 4, QO_KEEP_UPDATED = 8};
|
|
||||||
|
|
||||||
bool open(const string &dbdir, OpenMode mode, int qops = QO_NONE);
|
bool open(const string &dbdir, OpenMode mode, int qops = QO_NONE);
|
||||||
bool close();
|
bool close();
|
||||||
@ -184,6 +186,9 @@ class Db {
|
|||||||
*/
|
*/
|
||||||
bool getDoc(int i, Doc &doc, int *percent = 0);
|
bool getDoc(int i, Doc &doc, int *percent = 0);
|
||||||
|
|
||||||
|
/* Build synthetic abstract out of query terms and term position data */
|
||||||
|
bool makeDocAbstract(Doc &doc, string& abstract);
|
||||||
|
|
||||||
/** Get document for given filename and ipath */
|
/** Get document for given filename and ipath */
|
||||||
bool getDoc(const string &fn, const string &ipath, Doc &doc, int *percent);
|
bool getDoc(const string &fn, const string &ipath, Doc &doc, int *percent);
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user