dont insert space in cjk abstracts
This commit is contained in:
parent
d318f506bf
commit
64ef8d0b81
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: textsplit.cpp,v 1.37 2008-12-05 11:09:31 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: textsplit.cpp,v 1.38 2008-12-12 11:53:45 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -149,6 +149,11 @@ static inline int whatcc(unsigned int c)
|
||||
|| ((p) >= 0x20000 && (p) <= 0x2A6DF) \
|
||||
|| ((p) >= 0x2F800 && (p) <= 0x2FA1F))
|
||||
|
||||
bool TextSplit::isCJK(int c)
|
||||
{
|
||||
return UNICODE_IS_CJK(c);
|
||||
}
|
||||
|
||||
bool TextSplit::o_processCJK = true;
|
||||
unsigned int TextSplit::o_CJKNgramLen = 2;
|
||||
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
*/
|
||||
#ifndef _TEXTSPLIT_H_INCLUDED_
|
||||
#define _TEXTSPLIT_H_INCLUDED_
|
||||
/* @(#$Id: textsplit.h,v 1.21 2008-12-05 11:09:31 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: textsplit.h,v 1.22 2008-12-12 11:53:45 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include <string>
|
||||
#include <list>
|
||||
@ -96,7 +96,10 @@ public:
|
||||
* handles all Unicode whitespace, but needs correct utf-8 input
|
||||
*/
|
||||
static bool stringToStrings(const string &s, list<string> &tokens);
|
||||
|
||||
|
||||
/** Is char CJK ? */
|
||||
static bool isCJK(int c);
|
||||
|
||||
private:
|
||||
Flags m_flags;
|
||||
TextSplitCB *m_cb;
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.150 2008-12-12 11:02:20 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.151 2008-12-12 11:53:45 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -444,10 +444,19 @@ string Db::Native::makeAbstract(Xapian::docid docid, Query *query)
|
||||
|
||||
// Finally build the abstract by walking the map (in order of position)
|
||||
string abstract;
|
||||
abstract.reserve(sparseDoc.size() * 10);
|
||||
bool incjk = false;
|
||||
for (map<unsigned int, string>::const_iterator it = sparseDoc.begin();
|
||||
it != sparseDoc.end(); it++) {
|
||||
LOGDEB2(("Abtract:output %u -> [%s]\n", it->first,it->second.c_str()));
|
||||
abstract += it->second + " ";
|
||||
Utf8Iter uit(it->second);
|
||||
bool newcjk = false;
|
||||
if (TextSplit::isCJK(*uit))
|
||||
newcjk = true;
|
||||
if (!incjk || (incjk && !newcjk))
|
||||
abstract += " ";
|
||||
incjk = newcjk;
|
||||
abstract += it->second;
|
||||
}
|
||||
|
||||
// This happens for docs with no terms (only filename) indexed? I'll fix
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user