From 207e3d5af772c9cb3cc9895f357f0d4bb53b7b6d Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Thu, 7 Dec 2017 10:51:10 +0100 Subject: [PATCH] rclabstract: fixed log call formats and indentation --- src/rcldb/rclabstract.cpp | 602 +++++++++++++++++++------------------- 1 file changed, 306 insertions(+), 296 deletions(-) diff --git a/src/rcldb/rclabstract.cpp b/src/rcldb/rclabstract.cpp index 457281db..41836d8a 100644 --- a/src/rcldb/rclabstract.cpp +++ b/src/rcldb/rclabstract.cpp @@ -50,7 +50,7 @@ static void listList(const string& what, const vector&l) for (vector::const_iterator it = l.begin(); it != l.end(); it++) { a = a + *it + " "; } - LOGDEB("" << (what) << ": " << (a) << "\n" ); + LOGDEB("" << what << ": " << a << "\n"); } #else #define LOGABS LOGDEB2 @@ -67,12 +67,12 @@ static const bool prune_prefixed_terms = true; static void noPrefixList(const vector& in, vector& out) { for (vector::const_iterator qit = in.begin(); - qit != in.end(); qit++) { - if (prune_prefixed_terms) { - if (has_prefix(*qit)) - continue; - } - out.push_back(strip_prefix(*qit)); + qit != in.end(); qit++) { + if (prune_prefixed_terms) { + if (has_prefix(*qit)) + continue; + } + out.push_back(strip_prefix(*qit)); } sort(out.begin(), out.end()); vector::iterator it = unique(out.begin(), out.end()); @@ -82,8 +82,8 @@ static void noPrefixList(const vector& in, vector& out) bool Query::Native::getMatchTerms(unsigned long xdocid, vector& terms) { if (!xenquire) { - LOGERR("Query::getMatchTerms: no query opened\n" ); - return false; + LOGERR("Query::getMatchTerms: no query opened\n"); + return false; } terms.clear(); @@ -95,8 +95,8 @@ bool Query::Native::getMatchTerms(unsigned long xdocid, vector& terms) xenquire->get_matching_terms_end(id)), m_q->m_db->m_ndb->xrdb, m_q->m_reason); if (!m_q->m_reason.empty()) { - LOGERR("getMatchTerms: xapian error: " << (m_q->m_reason) << "\n" ); - return false; + LOGERR("getMatchTerms: xapian error: " << m_q->m_reason << "\n"); + return false; } noPrefixList(iterms, terms); return true; @@ -109,25 +109,26 @@ void Query::Native::setDbWideQTermsFreqs() { // Do it once only for a given query. if (!termfreqs.empty()) - return; + return; vector qterms; { - vector iqterms; - m_q->getQueryTerms(iqterms); - noPrefixList(iqterms, qterms); + vector iqterms; + m_q->getQueryTerms(iqterms); + noPrefixList(iqterms, qterms); } // listList("Query terms: ", qterms); Xapian::Database &xrdb = m_q->m_db->m_ndb->xrdb; double doccnt = xrdb.get_doccount(); if (doccnt == 0) - doccnt = 1; + doccnt = 1; for (vector::const_iterator qit = qterms.begin(); - qit != qterms.end(); qit++) { - termfreqs[*qit] = xrdb.get_termfreq(*qit) / doccnt; - LOGABS("setDbWideQTermFreqs: [" << (qit) << "] db freq " << (termfreqs[*qit]) << "\n" ); + qit != qterms.end(); qit++) { + termfreqs[*qit] = xrdb.get_termfreq(*qit) / doccnt; + LOGABS("setDbWideQTermFreqs: [" << *qit << "] db freq " << + termfreqs[*qit] << "\n"); } } @@ -143,10 +144,10 @@ void Query::Native::setDbWideQTermsFreqs() // occurrences, and let the frequency for each group member be the // aggregated frequency. double Query::Native::qualityTerms(Xapian::docid docid, - const vector& terms, - multimap >& byQ) + const vector& terms, + multimap >& byQ) { - LOGABS("qualityTerms\n" ); + LOGABS("qualityTerms\n"); setDbWideQTermsFreqs(); map termQcoefs; @@ -155,46 +156,46 @@ double Query::Native::qualityTerms(Xapian::docid docid, Xapian::Database &xrdb = m_q->m_db->m_ndb->xrdb; double doclen = xrdb.get_doclength(docid); if (doclen == 0) - doclen = 1; + doclen = 1; HighlightData hld; if (m_q->m_sd) { - m_q->m_sd->getTerms(hld); + m_q->m_sd->getTerms(hld); } #ifdef DEBUGABSTRACT { - string deb; - hld.toString(deb); - LOGABS("qualityTerms: hld: " << (deb) << "\n" ); + string deb; + hld.toString(deb); + LOGABS("qualityTerms: hld: " << deb << "\n"); } #endif // Group the input terms by the user term they were possibly expanded from map > byRoot; for (vector::const_iterator qit = terms.begin(); - qit != terms.end(); qit++) { - map::const_iterator eit = hld.terms.find(*qit); - if (eit != hld.terms.end()) { - byRoot[eit->second].push_back(*qit); - } else { - LOGDEB0("qualityTerms: [" << ((*qit)) << "] not found in hld\n" ); - byRoot[*qit].push_back(*qit); - } + qit != terms.end(); qit++) { + map::const_iterator eit = hld.terms.find(*qit); + if (eit != hld.terms.end()) { + byRoot[eit->second].push_back(*qit); + } else { + LOGDEB0("qualityTerms: [" << *qit << "] not found in hld\n"); + byRoot[*qit].push_back(*qit); + } } #ifdef DEBUGABSTRACT { - string byRootstr; - for (map >::const_iterator debit = - byRoot.begin(); debit != byRoot.end(); debit++) { - byRootstr.append("[").append(debit->first).append("]->"); - for (vector::const_iterator it = debit->second.begin(); - it != debit->second.end(); it++) { - byRootstr.append("[").append(*it).append("] "); - } - byRootstr.append("\n"); - } - LOGABS("\nqualityTerms: uterms to terms: " << (byRootstr) << "\n" ); + string byRootstr; + for (map >::const_iterator debit = + byRoot.begin(); debit != byRoot.end(); debit++) { + byRootstr.append("[").append(debit->first).append("]->"); + for (vector::const_iterator it = debit->second.begin(); + it != debit->second.end(); it++) { + byRootstr.append("[").append(*it).append("] "); + } + byRootstr.append("\n"); + } + LOGABS("\nqualityTerms: uterms to terms: " << byRootstr << "\n"); } #endif @@ -202,51 +203,51 @@ double Query::Native::qualityTerms(Xapian::docid docid, map grpwdfs; map grptfreqs; for (map >::const_iterator git = byRoot.begin(); - git != byRoot.end(); git++) { - for (vector::const_iterator qit = git->second.begin(); - qit != git->second.end(); qit++) { - Xapian::TermIterator term = xrdb.termlist_begin(docid); - term.skip_to(*qit); - if (term != xrdb.termlist_end(docid) && *term == *qit) { - if (grpwdfs.find(git->first) != grpwdfs.end()) { - grpwdfs[git->first] = term.get_wdf() / doclen; - grptfreqs[git->first] = termfreqs[*qit]; - } else { - grpwdfs[git->first] += term.get_wdf() / doclen; - grptfreqs[git->first] += termfreqs[*qit]; - } - } - } + git != byRoot.end(); git++) { + for (vector::const_iterator qit = git->second.begin(); + qit != git->second.end(); qit++) { + Xapian::TermIterator term = xrdb.termlist_begin(docid); + term.skip_to(*qit); + if (term != xrdb.termlist_end(docid) && *term == *qit) { + if (grpwdfs.find(git->first) != grpwdfs.end()) { + grpwdfs[git->first] = term.get_wdf() / doclen; + grptfreqs[git->first] = termfreqs[*qit]; + } else { + grpwdfs[git->first] += term.get_wdf() / doclen; + grptfreqs[git->first] += termfreqs[*qit]; + } + } + } } // Build a sorted by quality container for the groups for (map >::const_iterator git = byRoot.begin(); - git != byRoot.end(); git++) { - double q = (grpwdfs[git->first]) * grptfreqs[git->first]; - q = -log10(q); - if (q < 3) { - q = 0.05; - } else if (q < 4) { - q = 0.3; - } else if (q < 5) { - q = 0.7; - } else if (q < 6) { - q = 0.8; - } else { - q = 1; - } - totalweight += q; - byQ.insert(pair >(q, git->second)); + git != byRoot.end(); git++) { + double q = (grpwdfs[git->first]) * grptfreqs[git->first]; + q = -log10(q); + if (q < 3) { + q = 0.05; + } else if (q < 4) { + q = 0.3; + } else if (q < 5) { + q = 0.7; + } else if (q < 6) { + q = 0.8; + } else { + q = 1; + } + totalweight += q; + byQ.insert(pair >(q, git->second)); } #ifdef DEBUGABSTRACT for (multimap >::reverse_iterator mit= byQ.rbegin(); - mit != byQ.rend(); mit++) { - LOGABS("qualityTerms: group\n" ); - for (vector::const_iterator qit = mit->second.begin(); - qit != mit->second.end(); qit++) { - LOGABS("" << (mit->first) << "->[" << (qit) << "]\n" ); - } + mit != byQ.rend(); mit++) { + LOGABS("qualityTerms: group\n"); + for (vector::const_iterator qit = mit->second.begin(); + qit != mit->second.end(); qit++) { + LOGABS("" << mit->first << "->[" << *qit << "]\n"); + } } #endif return totalweight; @@ -257,8 +258,8 @@ int Query::Native::getFirstMatchPage(Xapian::docid docid, string& term) { LOGDEB("Query::Native::getFirstMatchPage\n"); if (!m_q|| !m_q->m_db || !m_q->m_db->m_ndb || !m_q->m_db->m_ndb->m_isopen) { - LOGERR("Query::getFirstMatchPage: no db\n" ); - return -1; + LOGERR("Query::getFirstMatchPage: no db\n"); + return -1; } Rcl::Db::Native *ndb(m_q->m_db->m_ndb); Xapian::Database& xrdb(ndb->xrdb); @@ -267,15 +268,15 @@ int Query::Native::getFirstMatchPage(Xapian::docid docid, string& term) getMatchTerms(docid, terms); if (terms.empty()) { - LOGDEB("getFirstMatchPage: empty match term list (field match?)\n" ); - return -1; + LOGDEB("getFirstMatchPage: empty match term list (field match?)\n"); + return -1; } vector pagepos; ndb->getPagePositions(docid, pagepos); if (pagepos.empty()) - return -1; - + return -1; + setDbWideQTermsFreqs(); // We try to use a page which matches the "best" term. Get a sorted list @@ -283,25 +284,25 @@ int Query::Native::getFirstMatchPage(Xapian::docid docid, string& term) qualityTerms(docid, terms, byQ); for (multimap >::reverse_iterator mit = byQ.rbegin(); - mit != byQ.rend(); mit++) { - for (vector::const_iterator qit = mit->second.begin(); - qit != mit->second.end(); qit++) { - string qterm = *qit; - Xapian::PositionIterator pos; - string emptys; - try { - for (pos = xrdb.positionlist_begin(docid, qterm); - pos != xrdb.positionlist_end(docid, qterm); pos++) { - int pagenum = ndb->getPageNumberForPosition(pagepos, *pos); - if (pagenum > 0) { - term = qterm; - return pagenum; - } - } - } catch (...) { - // Term does not occur. No problem. - } - } + mit != byQ.rend(); mit++) { + for (vector::const_iterator qit = mit->second.begin(); + qit != mit->second.end(); qit++) { + string qterm = *qit; + Xapian::PositionIterator pos; + string emptys; + try { + for (pos = xrdb.positionlist_begin(docid, qterm); + pos != xrdb.positionlist_end(docid, qterm); pos++) { + int pagenum = ndb->getPageNumberForPosition(pagepos, *pos); + if (pagenum > 0) { + term = qterm; + return pagenum; + } + } + } catch (...) { + // Term does not occur. No problem. + } + } } return -1; } @@ -312,18 +313,19 @@ int Query::Native::getFirstMatchPage(Xapian::docid docid, string& term) // DatabaseModified and other general exceptions are catched and // possibly retried by our caller int Query::Native::makeAbstract(Xapian::docid docid, - vector& vabs, - int imaxoccs, int ictxwords) + vector& vabs, + int imaxoccs, int ictxwords) { Chrono chron; - LOGABS("makeAbstract: docid " << (long(docid)) << " imaxoccs " << (imaxoccs) << " ictxwords " << (ictxwords) << "\n" ); + LOGABS("makeAbstract: docid " << docid << " imaxoccs " << + imaxoccs << " ictxwords " << ictxwords << "\n"); // The (unprefixed) terms matched by this document vector matchedTerms; getMatchTerms(docid, matchedTerms); if (matchedTerms.empty()) { - LOGDEB("makeAbstract::Empty term list\n" ); - return ABSRES_ERROR; + LOGDEB("makeAbstract:"< > byQ; double totalweight = qualityTerms(docid, matchedTerms, byQ); - LOGABS("makeAbstract:" << (chron.ms()) << ": computed Qcoefs.\n" ); + LOGABS("makeAbstract:" << chron.millis() << "mS: computed Qcoefs.\n"); // This can't happen, but would crash us if (totalweight == 0.0) { - LOGERR("makeAbstract: totalweight == 0.0 !\n" ); - return ABSRES_ERROR; + LOGERR("makeAbstract:"<m_db->m_ndb); @@ -374,124 +376,129 @@ int Query::Native::makeAbstract(Xapian::docid docid, // with words. We used to limit the character size at the end, but // this damaged our careful selection of terms const unsigned int maxtotaloccs = imaxoccs > 0 ? imaxoccs : - m_q->m_db->getAbsLen() /(7 * (m_q->m_db->getAbsCtxLen() + 1)); + m_q->m_db->getAbsLen() /(7 * (m_q->m_db->getAbsCtxLen() + 1)); int ctxwords = ictxwords == -1 ? m_q->m_db->getAbsCtxLen() : ictxwords; - LOGABS("makeAbstract:" << (chron.ms()) << ": mxttloccs " << (maxtotaloccs) << " ctxwords " << (ctxwords) << "\n" ); + LOGABS("makeAbstract:" << chron.millis() << "mS: mxttloccs " << + maxtotaloccs << " ctxwords " << ctxwords << "\n"); int ret = ABSRES_OK; // Let's go populate - for (multimap >::reverse_iterator mit = byQ.rbegin(); - mit != byQ.rend(); mit++) { - unsigned int maxgrpoccs; - double q; - if (byQ.size() == 1) { - maxgrpoccs = maxtotaloccs; - q = 1.0; - } else { - // We give more slots to the better term groups - q = mit->first / totalweight; - maxgrpoccs = int(ceil(maxtotaloccs * q)); - } - unsigned int grpoccs = 0; + for (multimap >::reverse_iterator mit = byQ.rbegin(); + mit != byQ.rend(); mit++) { + unsigned int maxgrpoccs; + double q; + if (byQ.size() == 1) { + maxgrpoccs = maxtotaloccs; + q = 1.0; + } else { + // We give more slots to the better term groups + q = mit->first / totalweight; + maxgrpoccs = int(ceil(maxtotaloccs * q)); + } + unsigned int grpoccs = 0; - for (vector::const_iterator qit = mit->second.begin(); - qit != mit->second.end(); qit++) { + for (vector::const_iterator qit = mit->second.begin(); + qit != mit->second.end(); qit++) { - // Group done ? - if (grpoccs >= maxgrpoccs) - break; + // Group done ? + if (grpoccs >= maxgrpoccs) + break; - string qterm = *qit; + string qterm = *qit; - LOGABS("makeAbstract: [" << (qterm) << "] " << (maxgrpoccs) << " max grp occs (coef " << (q) << ")\n" ); + LOGABS("makeAbstract: [" << qterm << "] " << maxgrpoccs << + " max grp occs (coef " << q << ")\n"); - // The match term may span several words - int qtrmwrdcnt = - TextSplit::countWords(qterm, TextSplit::TXTS_NOSPANS); + // The match term may span several words + int qtrmwrdcnt = + TextSplit::countWords(qterm, TextSplit::TXTS_NOSPANS); - Xapian::PositionIterator pos; - // There may be query terms not in this doc. This raises an - // exception when requesting the position list, we catch it ?? - // Not clear how this can happen because we are walking the - // match list returned by Xapian. Maybe something with the - // fields? - string emptys; - try { - for (pos = xrdb.positionlist_begin(docid, qterm); - pos != xrdb.positionlist_end(docid, qterm); pos++) { - int ipos = *pos; - if (ipos < int(baseTextPosition)) // Not in text body - continue; - LOGABS("makeAbstract: [" << (qterm) << "] at pos " << (ipos) << " grpoccs " << (grpoccs) << " maxgrpoccs " << (maxgrpoccs) << "\n" ); + Xapian::PositionIterator pos; + // There may be query terms not in this doc. This raises an + // exception when requesting the position list, we catch it ?? + // Not clear how this can happen because we are walking the + // match list returned by Xapian. Maybe something with the + // fields? + string emptys; + try { + for (pos = xrdb.positionlist_begin(docid, qterm); + pos != xrdb.positionlist_end(docid, qterm); pos++) { + int ipos = *pos; + if (ipos < int(baseTextPosition)) // Not in text body + continue; + LOGABS("makeAbstract: [" << qterm << "] at pos " << + ipos << " grpoccs " << grpoccs << " maxgrpoccs " << + maxgrpoccs << "\n"); - totaloccs++; - grpoccs++; + totaloccs++; + grpoccs++; - // Add adjacent slots to the set to populate at next - // step by inserting empty strings. Special provisions - // for adding ellipsis and for positions overlapped by - // the match term. - unsigned int sta = MAX(int(baseTextPosition), - ipos - ctxwords); - unsigned int sto = ipos + qtrmwrdcnt-1 + - m_q->m_db->getAbsCtxLen(); - for (unsigned int ii = sta; ii <= sto; ii++) { - if (ii == (unsigned int)ipos) { - sparseDoc[ii] = qterm; - searchTermPositions.insert(ii); - if (ii > maxpos) - maxpos = ii; - } else if (ii > (unsigned int)ipos && - ii < (unsigned int)ipos + qtrmwrdcnt) { - sparseDoc[ii] = occupiedmarker; - } else if (!sparseDoc[ii].compare(cstr_ellipsis)) { - // For an empty slot, the test has a side - // effect of inserting an empty string which - // is what we want. - sparseDoc[ii] = emptys; - } - } - // Add ellipsis at the end. This may be replaced later by - // an overlapping extract. Take care not to replace an - // empty string here, we really want an empty slot, - // use find() - if (sparseDoc.find(sto+1) == sparseDoc.end()) { - sparseDoc[sto+1] = cstr_ellipsis; - } + // Add adjacent slots to the set to populate at next + // step by inserting empty strings. Special provisions + // for adding ellipsis and for positions overlapped by + // the match term. + unsigned int sta = MAX(int(baseTextPosition), + ipos - ctxwords); + unsigned int sto = ipos + qtrmwrdcnt-1 + + m_q->m_db->getAbsCtxLen(); + for (unsigned int ii = sta; ii <= sto; ii++) { + if (ii == (unsigned int)ipos) { + sparseDoc[ii] = qterm; + searchTermPositions.insert(ii); + if (ii > maxpos) + maxpos = ii; + } else if (ii > (unsigned int)ipos && + ii < (unsigned int)ipos + qtrmwrdcnt) { + sparseDoc[ii] = occupiedmarker; + } else if (!sparseDoc[ii].compare(cstr_ellipsis)) { + // For an empty slot, the test has a side + // effect of inserting an empty string which + // is what we want. + sparseDoc[ii] = emptys; + } + } + // Add ellipsis at the end. This may be replaced later by + // an overlapping extract. Take care not to replace an + // empty string here, we really want an empty slot, + // use find() + if (sparseDoc.find(sto+1) == sparseDoc.end()) { + sparseDoc[sto+1] = cstr_ellipsis; + } - // Group done ? - if (grpoccs >= maxgrpoccs) { - ret |= ABSRES_TRUNC; - LOGABS("Db::makeAbstract: max group occs cutoff\n" ); - break; - } - // Global done ? - if (totaloccs >= maxtotaloccs) { - ret |= ABSRES_TRUNC; - LOGABS("Db::makeAbstract: max occurrences cutoff\n" ); - break; - } - } - } catch (...) { - // Term does not occur. No problem. - } + // Group done ? + if (grpoccs >= maxgrpoccs) { + ret |= ABSRES_TRUNC; + LOGABS("Db::makeAbstract: max group occs cutoff\n"); + break; + } + // Global done ? + if (totaloccs >= maxtotaloccs) { + ret |= ABSRES_TRUNC; + LOGABS("Db::makeAbstract: max occurrences cutoff\n"); + break; + } + } + } catch (...) { + // Term does not occur. No problem. + } - if (totaloccs >= maxtotaloccs) { - ret |= ABSRES_TRUNC; - LOGABS("Db::makeAbstract: max1 occurrences cutoff\n" ); - break; - } - } + if (totaloccs >= maxtotaloccs) { + ret |= ABSRES_TRUNC; + LOGABS("Db::makeAbstract: max1 occurrences cutoff\n"); + break; + } + } } maxpos += ctxwords + 1; - LOGABS("makeAbstract:" << (chron.millis()) << ":chosen number of positions " << (totaloccs) << "\n" ); + LOGABS("makeAbstract:" << chron.millis() << + "mS:chosen number of positions " << totaloccs << "\n"); // This can happen if there are term occurences in the keywords // etc. but not elsewhere ? if (totaloccs == 0) { - LOGDEB("makeAbstract: no occurrences\n" ); - return ABSRES_OK; + LOGDEB("makeAbstract: no occurrences\n"); + return ABSRES_OK; } // Walk all document's terms position lists and populate slots @@ -500,69 +507,74 @@ int Query::Native::makeAbstract(Xapian::docid docid, // inconsistant (missing words, potentially altering meaning), // which is bad. { - Xapian::TermIterator term; - int cutoff = m_q->m_snipMaxPosWalk; - for (term = xrdb.termlist_begin(docid); - term != xrdb.termlist_end(docid); term++) { - // Ignore prefixed terms - if (has_prefix(*term)) - continue; - if (m_q->m_snipMaxPosWalk > 0 && cutoff-- < 0) { - ret |= ABSRES_TERMMISS; - LOGDEB0("makeAbstract: max term count cutoff " << (m_q->m_snipMaxPosWalk) << "\n" ); - break; - } + Xapian::TermIterator term; + int cutoff = m_q->m_snipMaxPosWalk; + for (term = xrdb.termlist_begin(docid); + term != xrdb.termlist_end(docid); term++) { + // Ignore prefixed terms + if (has_prefix(*term)) + continue; + if (m_q->m_snipMaxPosWalk > 0 && cutoff-- < 0) { + ret |= ABSRES_TERMMISS; + LOGDEB0("makeAbstract: max term count cutoff " << + m_q->m_snipMaxPosWalk << "\n"); + break; + } - map::iterator vit; - Xapian::PositionIterator pos; - for (pos = xrdb.positionlist_begin(docid, *term); - pos != xrdb.positionlist_end(docid, *term); pos++) { - if (m_q->m_snipMaxPosWalk > 0 && cutoff-- < 0) { - ret |= ABSRES_TERMMISS; - LOGDEB0("makeAbstract: max term count cutoff " << (m_q->m_snipMaxPosWalk) << "\n" ); - break; - } - // If we are beyond the max possible position, stop - // for this term - if (*pos > maxpos) { - break; - } - if ((vit = sparseDoc.find(*pos)) != sparseDoc.end()) { - // Don't replace a term: the terms list is in - // alphabetic order, and we may have several terms - // at the same position, we want to keep only the - // first one (ie: dockes and dockes@wanadoo.fr) - if (vit->second.empty()) { - LOGDEB2("makeAbstract: populating: [" << ((*term)) << "] at " << (*pos) << "\n" ); - sparseDoc[*pos] = *term; - } - } - } - } + map::iterator vit; + Xapian::PositionIterator pos; + for (pos = xrdb.positionlist_begin(docid, *term); + pos != xrdb.positionlist_end(docid, *term); pos++) { + if (m_q->m_snipMaxPosWalk > 0 && cutoff-- < 0) { + ret |= ABSRES_TERMMISS; + LOGDEB0("makeAbstract: max term count cutoff " << + m_q->m_snipMaxPosWalk << "\n"); + break; + } + // If we are beyond the max possible position, stop + // for this term + if (*pos > maxpos) { + break; + } + if ((vit = sparseDoc.find(*pos)) != sparseDoc.end()) { + // Don't replace a term: the terms list is in + // alphabetic order, and we may have several terms + // at the same position, we want to keep only the + // first one (ie: dockes and dockes@wanadoo.fr) + if (vit->second.empty()) { + LOGDEB2("makeAbstract: populating: [" << *term << + "] at " << *pos << "\n"); + sparseDoc[*pos] = *term; + } + } + } + } } + LOGABS("makeAbstract:" << chron.millis() << "mS: all term poslist read\n"); #if 0 // Debug only: output the full term[position] vector bool epty = false; int ipos = 0; for (map::iterator it = sparseDoc.begin(); - it != sparseDoc.end(); - it++, ipos++) { - if (it->empty()) { - if (!epty) - LOGDEB("makeAbstract:vec[" << (ipos) << "]: [" << (it) << "]\n" ); - epty=true; - } else { - epty = false; - LOGDEB("makeAbstract:vec[" << (ipos) << "]: [" << (it) << "]\n" ); - } + it != sparseDoc.end(); + it++, ipos++) { + if (it->empty()) { + if (!epty) + LOGDEB("makeAbstract:vec[" << ipos << "]: [" << it << "]\n"); + epty=true; + } else { + epty = false; + LOGDEB("makeAbstract:vec[" << ipos << "]: [" << it << "]\n"); + } } #endif vector vpbreaks; ndb->getPagePositions(docid, vpbreaks); - LOGABS("makeAbstract:" << (chron.millis()) << ": extracting. Got " << (vpbreaks.size()) << " pages\n" ); + LOGABS("makeAbstract:" << chron.millis() << "mS: extracting. Got " << + vpbreaks.size() << " pages\n"); // Finally build the abstract by walking the map (in order of position) vabs.clear(); string chunk; @@ -570,45 +582,43 @@ int Query::Native::makeAbstract(Xapian::docid docid, int page = 0; string term; for (map::const_iterator it = sparseDoc.begin(); - it != sparseDoc.end(); it++) { - LOGDEB2("Abtract:output " << (it->first) << " -> [" << (it->second) << "]\n" ); - if (!occupiedmarker.compare(it->second)) { - LOGDEB("Abstract: qtrm position not filled ??\n" ); - continue; - } - if (chunk.empty() && !vpbreaks.empty()) { - page = ndb->getPageNumberForPosition(vpbreaks, it->first); - if (page < 0) - page = 0; - term.clear(); - } - Utf8Iter uit(it->second); - bool newcjk = false; - if (TextSplit::isCJK(*uit)) - newcjk = true; - if (!incjk || (incjk && !newcjk)) - chunk += " "; - incjk = newcjk; - if (searchTermPositions.find(it->first) != searchTermPositions.end()) - term = it->second; - if (it->second == cstr_ellipsis) { - vabs.push_back(Snippet(page, chunk).setTerm(term)); - chunk.clear(); - } else { - if (it->second.compare(end_of_field_term) && - it->second.compare(start_of_field_term)) - chunk += it->second; - } + it != sparseDoc.end(); it++) { + LOGDEB2("Abtract:output " << it->first << " -> [" << it->second << + "]\n"); + if (!occupiedmarker.compare(it->second)) { + LOGDEB("Abstract: qtrm position not filled ??\n"); + continue; + } + if (chunk.empty() && !vpbreaks.empty()) { + page = ndb->getPageNumberForPosition(vpbreaks, it->first); + if (page < 0) + page = 0; + term.clear(); + } + Utf8Iter uit(it->second); + bool newcjk = false; + if (TextSplit::isCJK(*uit)) + newcjk = true; + if (!incjk || (incjk && !newcjk)) + chunk += " "; + incjk = newcjk; + if (searchTermPositions.find(it->first) != searchTermPositions.end()) + term = it->second; + if (it->second == cstr_ellipsis) { + vabs.push_back(Snippet(page, chunk).setTerm(term)); + chunk.clear(); + } else { + if (it->second.compare(end_of_field_term) && + it->second.compare(start_of_field_term)) + chunk += it->second; + } } if (!chunk.empty()) - vabs.push_back(Snippet(page, chunk).setTerm(term)); + vabs.push_back(Snippet(page, chunk).setTerm(term)); - LOGDEB2("makeAbtract: done in " << (chron.millis()) << " mS\n" ); + LOGABS("makeAbtract: done in " << chron.millis() << " mS\n"); return ret; } } - - -