cleanup repeated punctuation in snippets
This commit is contained in:
parent
8dee9054fe
commit
e4e5ee35d6
@ -22,6 +22,7 @@
|
|||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <deque>
|
#include <deque>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <regex>
|
||||||
|
|
||||||
#include "log.h"
|
#include "log.h"
|
||||||
#include "rcldb.h"
|
#include "rcldb.h"
|
||||||
@ -45,8 +46,19 @@ using namespace std;
|
|||||||
|
|
||||||
namespace Rcl {
|
namespace Rcl {
|
||||||
|
|
||||||
|
//// Fragment cleanup
|
||||||
// Chars we turn to spaces in the Snippets
|
// Chars we turn to spaces in the Snippets
|
||||||
static const string cstr_nc("\n\r\x0c\\");
|
static const string cstr_nc("\n\r\x0c\\");
|
||||||
|
// Things that we don't want to repeat in a displayed snippet.
|
||||||
|
// e.g. > > > > > >
|
||||||
|
static const string punctcls("[<>.-_+,#*=]");
|
||||||
|
static const string punctRE = "(" + punctcls + " *)(" + punctcls + " *)+";
|
||||||
|
static std::regex fixfrag_re(punctRE);
|
||||||
|
static string fixfrag(const string& infrag)
|
||||||
|
{
|
||||||
|
return std::regex_replace(neutchars(infrag, cstr_nc), fixfrag_re, "$2");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// Fragment descriptor. A fragment is a text area with one or several
|
// Fragment descriptor. A fragment is a text area with one or several
|
||||||
// matched terms and some context. It is ranked according to the
|
// matched terms and some context. It is ranked according to the
|
||||||
@ -383,8 +395,8 @@ int Query::Native::abstractFromText(
|
|||||||
// main term and the page positions.
|
// main term and the page positions.
|
||||||
unsigned int count = 0;
|
unsigned int count = 0;
|
||||||
for (const auto& entry : result) {
|
for (const auto& entry : result) {
|
||||||
string frag = neutchars(
|
string frag(
|
||||||
rawtext.substr(entry.start, entry.stop - entry.start), cstr_nc);
|
fixfrag(rawtext.substr(entry.start, entry.stop - entry.start)));
|
||||||
|
|
||||||
#ifdef COMPUTE_HLZONES
|
#ifdef COMPUTE_HLZONES
|
||||||
// This would need to be modified to take tag parameters
|
// This would need to be modified to take tag parameters
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user