rclquery: fixed log call formats and indentation

This commit is contained in:
Jean-Francois Dockes 2017-12-07 10:44:54 +01:00
parent ab5a628b9b
commit 653b1fb5a5

View File

@ -51,11 +51,11 @@ static const string cstr_ellipsis("...");
static const string& docfToDatf(const string& df)
{
if (!df.compare(Doc::keytt)) {
return cstr_caption;
return cstr_caption;
} else if (!df.compare(Doc::keymt)) {
return cstr_dmtime;
return cstr_dmtime;
} else {
return df;
return df;
}
}
@ -70,70 +70,70 @@ class QSorter : public Xapian::KeyMaker {
#endif
public:
QSorter(const string& f)
: m_fld(docfToDatf(f) + "=")
: m_fld(docfToDatf(f) + "=")
{
m_ismtime = !m_fld.compare("dmtime=");
if (m_ismtime)
m_issize = false;
else
m_issize = !m_fld.compare("fbytes=") || !m_fld.compare("dbytes=") ||
!m_fld.compare("pcbytes=");
m_ismtime = !m_fld.compare("dmtime=");
if (m_ismtime)
m_issize = false;
else
m_issize = !m_fld.compare("fbytes=") || !m_fld.compare("dbytes=") ||
!m_fld.compare("pcbytes=");
}
virtual std::string operator()(const Xapian::Document& xdoc) const
{
string data = xdoc.get_data();
// It would be simpler to do the record->Rcl::Doc thing, but
// hand-doing this will be faster. It makes more assumptions
// about the format than a ConfTree though:
string::size_type i1, i2;
i1 = data.find(m_fld);
if (i1 == string::npos) {
if (m_ismtime) {
// Ugly: specialcase mtime as it's either dmtime or fmtime
i1 = data.find("fmtime=");
if (i1 == string::npos) {
return string();
}
} else {
return string();
}
}
i1 += m_fld.length();
if (i1 >= data.length())
return string();
i2 = data.find_first_of("\n\r", i1);
if (i2 == string::npos)
return string();
string data = xdoc.get_data();
// It would be simpler to do the record->Rcl::Doc thing, but
// hand-doing this will be faster. It makes more assumptions
// about the format than a ConfTree though:
string::size_type i1, i2;
i1 = data.find(m_fld);
if (i1 == string::npos) {
if (m_ismtime) {
// Ugly: specialcase mtime as it's either dmtime or fmtime
i1 = data.find("fmtime=");
if (i1 == string::npos) {
return string();
}
} else {
return string();
}
}
i1 += m_fld.length();
if (i1 >= data.length())
return string();
i2 = data.find_first_of("\n\r", i1);
if (i2 == string::npos)
return string();
string term = data.substr(i1, i2-i1);
if (m_ismtime) {
return term;
} else if (m_issize) {
// Left zeropad values for appropriate numeric sorting
leftzeropad(term, 12);
return term;
}
string term = data.substr(i1, i2-i1);
if (m_ismtime) {
return term;
} else if (m_issize) {
// Left zeropad values for appropriate numeric sorting
leftzeropad(term, 12);
return term;
}
// Process data for better sorting. We should actually do the
// unicode thing
// (http://unicode.org/reports/tr10/#Introduction), but just
// removing accents and majuscules will remove the most
// glaring weirdnesses (or not, depending on your national
// approach to collating...)
string sortterm;
// We're not even sure the term is utf8 here (ie: url)
if (!unacmaybefold(term, sortterm, "UTF-8", UNACOP_UNACFOLD)) {
sortterm = term;
}
// Also remove some common uninteresting starting characters
i1 = sortterm.find_first_not_of(" \t\\\"'([*+,.#/");
if (i1 != 0 && i1 != string::npos) {
sortterm = sortterm.substr(i1, sortterm.size()-i1);
}
// Process data for better sorting. We should actually do the
// unicode thing
// (http://unicode.org/reports/tr10/#Introduction), but just
// removing accents and majuscules will remove the most
// glaring weirdnesses (or not, depending on your national
// approach to collating...)
string sortterm;
// We're not even sure the term is utf8 here (ie: url)
if (!unacmaybefold(term, sortterm, "UTF-8", UNACOP_UNACFOLD)) {
sortterm = term;
}
// Also remove some common uninteresting starting characters
i1 = sortterm.find_first_not_of(" \t\\\"'([*+,.#/");
if (i1 != 0 && i1 != string::npos) {
sortterm = sortterm.substr(i1, sortterm.size()-i1);
}
LOGDEB2("QSorter: [" << (term) << "] -> [" << (sortterm) << "]\n" );
return sortterm;
LOGDEB2("QSorter: [" << term << "] -> [" << sortterm << "]\n");
return sortterm;
}
private:
@ -147,26 +147,27 @@ Query::Query(Db *db)
m_collapseDuplicates(false), m_resCnt(-1), m_snipMaxPosWalk(1000000)
{
if (db)
db->getConf()->getConfParam("snippetMaxPosWalk", &m_snipMaxPosWalk);
db->getConf()->getConfParam("snippetMaxPosWalk", &m_snipMaxPosWalk);
}
Query::~Query()
{
deleteZ(m_nq);
if (m_sorter) {
delete (QSorter*)m_sorter;
m_sorter = 0;
delete (QSorter*)m_sorter;
m_sorter = 0;
}
}
void Query::setSortBy(const string& fld, bool ascending) {
if (fld.empty()) {
m_sortField.erase();
m_sortField.erase();
} else {
m_sortField = m_db->getConf()->fieldQCanon(fld);
m_sortAscending = ascending;
m_sortField = m_db->getConf()->fieldQCanon(fld);
m_sortAscending = ascending;
}
LOGDEB0("RclQuery::setSortBy: [" << (m_sortField) << "] " << (m_sortAscending ? "ascending" : "descending") << "\n" );
LOGDEB0("RclQuery::setSortBy: [" << m_sortField << "] " <<
(m_sortAscending ? "ascending" : "descending") << "\n");
}
//#define ISNULL(X) (X).isNull()
@ -175,11 +176,11 @@ void Query::setSortBy(const string& fld, bool ascending) {
// Prepare query out of user search data
bool Query::setQuery(std::shared_ptr<SearchData> sdata)
{
LOGDEB("Query::setQuery:\n" );
LOGDEB("Query::setQuery:\n");
if (!m_db || ISNULL(m_nq)) {
LOGERR("Query::setQuery: not initialised!\n" );
return false;
LOGERR("Query::setQuery: not initialised!\n");
return false;
}
m_resCnt = -1;
m_reason.erase();
@ -189,33 +190,33 @@ bool Query::setQuery(std::shared_ptr<SearchData> sdata)
Xapian::Query xq;
if (!sdata->toNativeQuery(*m_db, &xq)) {
m_reason += sdata->getReason();
return false;
m_reason += sdata->getReason();
return false;
}
m_nq->xquery = xq;
string d;
for (int tries = 0; tries < 2; tries++) {
try {
try {
m_nq->xenquire = new Xapian::Enquire(m_db->m_ndb->xrdb);
if (m_collapseDuplicates) {
m_nq->xenquire->set_collapse_key(Rcl::VALUE_MD5);
} else {
m_nq->xenquire->set_collapse_key(Xapian::BAD_VALUENO);
}
m_nq->xenquire->set_docid_order(Xapian::Enquire::DONT_CARE);
m_nq->xenquire->set_docid_order(Xapian::Enquire::DONT_CARE);
if (!m_sortField.empty() &&
stringlowercmp("relevancyrating", m_sortField)) {
stringlowercmp("relevancyrating", m_sortField)) {
if (m_sorter) {
delete (QSorter*)m_sorter;
m_sorter = 0;
}
m_sorter = new QSorter(m_sortField);
// It really seems there is a xapian bug about sort order, we
// invert here.
m_nq->xenquire->set_sort_by_key((QSorter*)m_sorter,
!m_sortAscending);
m_sorter = new QSorter(m_sortField);
// It really seems there is a xapian bug about sort order, we
// invert here.
m_nq->xenquire->set_sort_by_key((QSorter*)m_sorter,
!m_sortAscending);
}
m_nq->xenquire->set_query(m_nq->xquery);
m_nq->xmset = Xapian::MSet();
@ -223,64 +224,64 @@ bool Query::setQuery(std::shared_ptr<SearchData> sdata)
d = m_nq->xquery.get_description();
m_reason.erase();
break;
} catch (const Xapian::DatabaseModifiedError &e) {
} catch (const Xapian::DatabaseModifiedError &e) {
m_reason = e.get_msg();
m_db->m_ndb->xrdb.reopen();
m_db->m_ndb->xrdb.reopen();
continue;
} XCATCHERROR(m_reason);
} XCATCHERROR(m_reason);
break;
}
if (!m_reason.empty()) {
LOGDEB("Query::SetQuery: xapian error " << (m_reason) << "\n" );
return false;
LOGDEB("Query::SetQuery: xapian error " << m_reason << "\n");
return false;
}
if (d.find("Xapian::Query") == 0)
d.erase(0, strlen("Xapian::Query"));
d.erase(0, strlen("Xapian::Query"));
sdata->setDescription(d);
m_sd = sdata;
LOGDEB("Query::SetQuery: Q: " << (sdata->getDescription()) << "\n" );
LOGDEB("Query::SetQuery: Q: " << sdata->getDescription() << "\n");
return true;
}
bool Query::getQueryTerms(vector<string>& terms)
{
if (ISNULL(m_nq))
return false;
return false;
terms.clear();
Xapian::TermIterator it;
string ermsg;
try {
for (it = m_nq->xquery.get_terms_begin();
it != m_nq->xquery.get_terms_end(); it++) {
terms.push_back(*it);
}
for (it = m_nq->xquery.get_terms_begin();
it != m_nq->xquery.get_terms_end(); it++) {
terms.push_back(*it);
}
} XCATCHERROR(ermsg);
if (!ermsg.empty()) {
LOGERR("getQueryTerms: xapian error: " << (ermsg) << "\n" );
return false;
LOGERR("getQueryTerms: xapian error: " << ermsg << "\n");
return false;
}
return true;
}
int Query::makeDocAbstract(const Doc &doc,
vector<Snippet>& abstract,
int maxoccs, int ctxwords)
int Query::makeDocAbstract(const Doc &doc, vector<Snippet>& abstract,
int maxoccs, int ctxwords)
{
LOGDEB("makeDocAbstract: maxoccs " << (maxoccs) << " ctxwords " << (ctxwords) << "\n" );
LOGDEB("makeDocAbstract: maxoccs " << maxoccs << " ctxwords " <<
ctxwords << "\n");
if (!m_db || !m_db->m_ndb || !m_db->m_ndb->m_isopen || !m_nq) {
LOGERR("Query::makeDocAbstract: no db or no nq\n" );
return ABSRES_ERROR;
LOGERR("Query::makeDocAbstract: no db or no nq\n");
return ABSRES_ERROR;
}
int ret = ABSRES_ERROR;
XAPTRY(ret = m_nq->makeAbstract(doc.xdocid, abstract, maxoccs, ctxwords),
m_db->m_ndb->xrdb, m_reason);
if (!m_reason.empty()) {
LOGDEB("makeDocAbstract: makeAbstract error, reason: " << (m_reason) << "\n" );
return ABSRES_ERROR;
LOGDEB("makeDocAbstract: makeAbstract: reason: " << m_reason << "\n");
return ABSRES_ERROR;
}
return ret;
}
@ -289,17 +290,17 @@ bool Query::makeDocAbstract(const Doc &doc, vector<string>& abstract)
{
vector<Snippet> vpabs;
if (!makeDocAbstract(doc, vpabs))
return false;
return false;
for (vector<Snippet>::const_iterator it = vpabs.begin();
it != vpabs.end(); it++) {
string chunk;
if (it->page > 0) {
ostringstream ss;
ss << it->page;
chunk += string(" [p ") + ss.str() + "] ";
}
chunk += it->snippet;
abstract.push_back(chunk);
it != vpabs.end(); it++) {
string chunk;
if (it->page > 0) {
ostringstream ss;
ss << it->page;
chunk += string(" [p ") + ss.str() + "] ";
}
chunk += it->snippet;
abstract.push_back(chunk);
}
return true;
}
@ -308,25 +309,25 @@ bool Query::makeDocAbstract(const Doc &doc, string& abstract)
{
vector<Snippet> vpabs;
if (!makeDocAbstract(doc, vpabs))
return false;
return false;
for (vector<Snippet>::const_iterator it = vpabs.begin();
it != vpabs.end(); it++) {
abstract.append(it->snippet);
abstract.append(cstr_ellipsis);
it != vpabs.end(); it++) {
abstract.append(it->snippet);
abstract.append(cstr_ellipsis);
}
return m_reason.empty() ? true : false;
}
int Query::getFirstMatchPage(const Doc &doc, string& term)
{
LOGDEB1("Db::getFirstMatchPage\n" );;
LOGDEB1("Db::getFirstMatchPage\n");;
if (!m_nq) {
LOGERR("Query::getFirstMatchPage: no nq\n" );
return false;
LOGERR("Query::getFirstMatchPage: no nq\n");
return false;
}
int pagenum = -1;
XAPTRY(pagenum = m_nq->getFirstMatchPage(Xapian::docid(doc.xdocid), term),
m_db->m_ndb->xrdb, m_reason);
m_db->m_ndb->xrdb, m_reason);
return m_reason.empty() ? pagenum : -1;
}
@ -339,11 +340,11 @@ static const int qquantum = 50;
int Query::getResCnt()
{
if (ISNULL(m_nq) || !m_nq->xenquire) {
LOGERR("Query::getResCnt: no query opened\n" );
return -1;
LOGERR("Query::getResCnt: no query opened\n");
return -1;
}
if (m_resCnt >= 0)
return m_resCnt;
return m_resCnt;
m_resCnt = -1;
if (m_nq->xmset.size() <= 0) {
@ -354,9 +355,9 @@ int Query::getResCnt()
m_resCnt = m_nq->xmset.get_matches_lower_bound(),
m_db->m_ndb->xrdb, m_reason);
LOGDEB("Query::getResCnt: " << (m_resCnt) << " " << (chron.millis()) << " mS\n" );
if (!m_reason.empty())
LOGERR("xenquire->get_mset: exception: " << (m_reason) << "\n" );
LOGDEB("Query::getResCnt: "<<m_resCnt<<" "<< chron.millis() << " mS\n");
if (!m_reason.empty())
LOGERR("xenquire->get_mset: exception: " << m_reason << "\n");
} else {
m_resCnt = m_nq->xmset.get_matches_lower_bound();
}
@ -373,32 +374,32 @@ int Query::getResCnt()
// on subsequent calls is probably only due to disk caching.
bool Query::getDoc(int xapi, Doc &doc)
{
LOGDEB1("Query::getDoc: xapian enquire index " << (xapi) << "\n" );
LOGDEB1("Query::getDoc: xapian enquire index " << xapi << "\n");
if (ISNULL(m_nq) || !m_nq->xenquire) {
LOGERR("Query::getDoc: no query opened\n" );
return false;
LOGERR("Query::getDoc: no query opened\n");
return false;
}
int first = m_nq->xmset.get_firstitem();
int last = first + m_nq->xmset.size() -1;
if (!(xapi >= first && xapi <= last)) {
LOGDEB("Fetching for first " << (xapi) << ", count " << (qquantum) << "\n" );
LOGDEB("Fetching for first " << xapi << ", count " << qquantum << "\n");
XAPTRY(m_nq->xmset = m_nq->xenquire->get_mset(xapi, qquantum,
(const Xapian::RSet *)0),
XAPTRY(m_nq->xmset = m_nq->xenquire->get_mset(xapi, qquantum,
(const Xapian::RSet *)0),
m_db->m_ndb->xrdb, m_reason);
if (!m_reason.empty()) {
LOGERR("enquire->get_mset: exception: " << (m_reason) << "\n" );
LOGERR("enquire->get_mset: exception: " << m_reason << "\n");
return false;
}
if (m_nq->xmset.empty()) {
LOGDEB("enquire->get_mset: got empty result\n" );
return false;
}
first = m_nq->xmset.get_firstitem();
last = first + m_nq->xmset.size() -1;
if (m_nq->xmset.empty()) {
LOGDEB("enquire->get_mset: got empty result\n");
return false;
}
first = m_nq->xmset.get_firstitem();
last = first + m_nq->xmset.size() -1;
}
Xapian::Document xdoc;
@ -411,14 +412,15 @@ bool Query::getDoc(int xapi, Doc &doc)
for (int xaptries=0; xaptries < 2; xaptries++) {
try {
xdoc = m_nq->xmset[xapi-first].get_document();
collapsecount = m_nq->xmset[xapi-first].get_collapse_count();
collapsecount = m_nq->xmset[xapi-first].get_collapse_count();
docid = *(m_nq->xmset[xapi-first]);
pc = m_nq->xmset.convert_to_percent(m_nq->xmset[xapi-first]);
data = xdoc.get_data();
m_reason.erase();
Chrono chron;
m_db->m_ndb->xdocToUdi(xdoc, udi);
LOGDEB2("Query::getDoc: " << (chron.millis()) << " ms for udi [" << (udi) << "], collapse count " << (collapsecount) << "\n" );
m_db->m_ndb->xdocToUdi(xdoc, udi);
LOGDEB2("Query::getDoc: " << chron.millis() << " ms for udi [" <<
udi << "], collapse count " << collapsecount << "\n");
break;
} catch (Xapian::DatabaseModifiedError &error) {
// retry or end of loop
@ -429,7 +431,7 @@ bool Query::getDoc(int xapi, Doc &doc)
break;
}
if (!m_reason.empty()) {
LOGERR("Query::getDoc: " << (m_reason) << "\n" );
LOGERR("Query::getDoc: " << m_reason << "\n");
return false;
}
doc.meta[Rcl::Doc::keyudi] = udi;
@ -437,15 +439,15 @@ bool Query::getDoc(int xapi, Doc &doc)
doc.pc = pc;
char buf[200];
if (collapsecount > 0) {
sprintf(buf,"%3d%% (%d)", pc, collapsecount + 1);
sprintf(buf,"%3d%% (%d)", pc, collapsecount + 1);
} else {
sprintf(buf,"%3d%%", pc);
sprintf(buf,"%3d%%", pc);
}
doc.meta[Doc::keyrr] = buf;
if (collapsecount > 0) {
sprintf(buf, "%d", collapsecount);
doc.meta[Rcl::Doc::keycc] = buf;
sprintf(buf, "%d", collapsecount);
doc.meta[Rcl::Doc::keycc] = buf;
}
// Parse xapian document's data and populate doc fields
@ -454,42 +456,42 @@ bool Query::getDoc(int xapi, Doc &doc)
vector<string> Query::expand(const Doc &doc)
{
LOGDEB("Rcl::Query::expand()\n" );
LOGDEB("Rcl::Query::expand()\n");
vector<string> res;
if (ISNULL(m_nq) || !m_nq->xenquire) {
LOGERR("Query::expand: no query opened\n" );
return res;
LOGERR("Query::expand: no query opened\n");
return res;
}
for (int tries = 0; tries < 2; tries++) {
try {
Xapian::RSet rset;
rset.add_document(Xapian::docid(doc.xdocid));
// We don't exclude the original query terms.
Xapian::ESet eset = m_nq->xenquire->get_eset(20, rset, false);
LOGDEB("ESet terms:\n" );
// We filter out the special terms
for (Xapian::ESetIterator it = eset.begin();
it != eset.end(); it++) {
LOGDEB(" [" << ((*it)) << "]\n" );
if ((*it).empty() || has_prefix(*it))
continue;
res.push_back(*it);
if (res.size() >= 10)
break;
}
try {
Xapian::RSet rset;
rset.add_document(Xapian::docid(doc.xdocid));
// We don't exclude the original query terms.
Xapian::ESet eset = m_nq->xenquire->get_eset(20, rset, false);
LOGDEB("ESet terms:\n");
// We filter out the special terms
for (Xapian::ESetIterator it = eset.begin();
it != eset.end(); it++) {
LOGDEB(" [" << (*it) << "]\n");
if ((*it).empty() || has_prefix(*it))
continue;
res.push_back(*it);
if (res.size() >= 10)
break;
}
m_reason.erase();
break;
} catch (const Xapian::DatabaseModifiedError &e) {
} catch (const Xapian::DatabaseModifiedError &e) {
m_reason = e.get_msg();
m_db->m_ndb->xrdb.reopen();
continue;
} XCATCHERROR(m_reason);
break;
} XCATCHERROR(m_reason);
break;
}
if (!m_reason.empty()) {
LOGERR("Query::expand: xapian error " << (m_reason) << "\n" );
LOGERR("Query::expand: xapian error " << m_reason << "\n");
res.clear();
}