comments and indent
This commit is contained in:
parent
0755f4f4e2
commit
94b94593e3
@ -25,14 +25,19 @@
|
||||
namespace Rcl {
|
||||
|
||||
/**
|
||||
* Termproc objects take a stream of term tokens as input and do something
|
||||
* Termproc objects take term tokens as input and do something
|
||||
* with them: transform to lowercase, filter out stop words, generate n-grams,
|
||||
* finally index or generate search clauses, etc. They are chained and can
|
||||
* be arranged to form different pipelines depending on the desired processing
|
||||
* steps: for example, optional stoplist or commongram processing.
|
||||
*
|
||||
* Shared processing steps are defined in this file. The first and last steps
|
||||
* (ie: adding index term) are usually defined in the specific module.
|
||||
* are usually defined in the specific module.
|
||||
* - The front TermProc is typically chained from a TextSplit object
|
||||
* which generates the original terms, and calls takeword() from its
|
||||
* own takeword() method.
|
||||
* - The last TermProc does something with the finalized terms, e.g. adds
|
||||
* them to the index.
|
||||
*/
|
||||
|
||||
/**
|
||||
@ -50,6 +55,7 @@ public:
|
||||
else
|
||||
return true;
|
||||
}
|
||||
// newpage() is like takeword(), but for page breaks.
|
||||
virtual void newpage(int pos)
|
||||
{
|
||||
if (m_next)
|
||||
@ -66,37 +72,38 @@ private:
|
||||
TermProc *m_next;
|
||||
/* Copyconst and assignment private and forbidden */
|
||||
TermProc(const TermProc &) {}
|
||||
TermProc& operator=(const TermProc &) {return *this;};
|
||||
TermProc& operator=(const TermProc &) {
|
||||
return *this;
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* Specialized TextSplit class: this will probably replace the base
|
||||
* TextSplit when we've converted all the code. The takeword() routine in this
|
||||
* calls a TermProc's instead of being overriden in a user derived class.
|
||||
* The text_to_words() method also takes care of flushing.
|
||||
* Helper specialized TextSplit class, feeds the pipeline:
|
||||
* - The takeword() method calls a TermProc->takeword().
|
||||
* - The text_to_words() method also takes care of flushing.
|
||||
* Both methods can be further specialized by the user (they should then call
|
||||
* the base methods when they've done the local processing).
|
||||
*/
|
||||
class TextSplitP : public TextSplit {
|
||||
public:
|
||||
TextSplitP(TermProc *prc, Flags flags = Flags(TXTS_NONE))
|
||||
: TextSplit(flags), m_prc(prc) {}
|
||||
|
||||
virtual bool text_to_words(const string &in)
|
||||
{
|
||||
virtual bool text_to_words(const string &in) {
|
||||
bool ret = TextSplit::text_to_words(in);
|
||||
if (m_prc && !m_prc->flush())
|
||||
return false;
|
||||
return ret;
|
||||
}
|
||||
|
||||
virtual bool takeword(const string& term, int pos, int bs, int be)
|
||||
{
|
||||
virtual bool takeword(const string& term, int pos, int bs, int be) {
|
||||
if (m_prc)
|
||||
return m_prc->takeword(term, pos, bs, be);
|
||||
else
|
||||
return true;
|
||||
}
|
||||
virtual void newpage(int pos)
|
||||
{
|
||||
|
||||
virtual void newpage(int pos) {
|
||||
if (m_prc)
|
||||
return m_prc->newpage(pos);
|
||||
}
|
||||
@ -105,7 +112,9 @@ private:
|
||||
TermProc *m_prc;
|
||||
};
|
||||
|
||||
/** Unaccent and lowercase term. This is usually the first in the pipeline */
|
||||
/** Unaccent and lowercase term. If the index is
|
||||
* not case/diac-sensitive, this is usually the first step in the pipeline
|
||||
*/
|
||||
class TermProcPrep : public TermProc {
|
||||
public:
|
||||
TermProcPrep(TermProc *nxt)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user