comments and indent

This commit is contained in:
Jean-Francois Dockes 2015-06-09 19:34:15 +02:00
parent 0755f4f4e2
commit 94b94593e3

View File

@ -25,14 +25,19 @@
namespace Rcl { namespace Rcl {
/** /**
* Termproc objects take a stream of term tokens as input and do something * Termproc objects take term tokens as input and do something
* with them: transform to lowercase, filter out stop words, generate n-grams, * with them: transform to lowercase, filter out stop words, generate n-grams,
* finally index or generate search clauses, etc. They are chained and can * finally index or generate search clauses, etc. They are chained and can
* be arranged to form different pipelines depending on the desired processing * be arranged to form different pipelines depending on the desired processing
* steps: for example, optional stoplist or commongram processing. * steps: for example, optional stoplist or commongram processing.
* *
* Shared processing steps are defined in this file. The first and last steps * Shared processing steps are defined in this file. The first and last steps
* (ie: adding index term) are usually defined in the specific module. * are usually defined in the specific module.
* - The front TermProc is typically chained from a TextSplit object
* which generates the original terms, and calls takeword() from its
* own takeword() method.
* - The last TermProc does something with the finalized terms, e.g. adds
* them to the index.
*/ */
/** /**
@ -50,6 +55,7 @@ public:
else else
return true; return true;
} }
// newpage() is like takeword(), but for page breaks.
virtual void newpage(int pos) virtual void newpage(int pos)
{ {
if (m_next) if (m_next)
@ -66,37 +72,38 @@ private:
TermProc *m_next; TermProc *m_next;
/* Copyconst and assignment private and forbidden */ /* Copyconst and assignment private and forbidden */
TermProc(const TermProc &) {} TermProc(const TermProc &) {}
TermProc& operator=(const TermProc &) {return *this;}; TermProc& operator=(const TermProc &) {
return *this;
};
}; };
/** /**
* Specialized TextSplit class: this will probably replace the base * Helper specialized TextSplit class, feeds the pipeline:
* TextSplit when we've converted all the code. The takeword() routine in this * - The takeword() method calls a TermProc->takeword().
* calls a TermProc's instead of being overriden in a user derived class. * - The text_to_words() method also takes care of flushing.
* The text_to_words() method also takes care of flushing. * Both methods can be further specialized by the user (they should then call
* the base methods when they've done the local processing).
*/ */
class TextSplitP : public TextSplit { class TextSplitP : public TextSplit {
public: public:
TextSplitP(TermProc *prc, Flags flags = Flags(TXTS_NONE)) TextSplitP(TermProc *prc, Flags flags = Flags(TXTS_NONE))
: TextSplit(flags), m_prc(prc) {} : TextSplit(flags), m_prc(prc) {}
virtual bool text_to_words(const string &in) virtual bool text_to_words(const string &in) {
{
bool ret = TextSplit::text_to_words(in); bool ret = TextSplit::text_to_words(in);
if (m_prc && !m_prc->flush()) if (m_prc && !m_prc->flush())
return false; return false;
return ret; return ret;
} }
virtual bool takeword(const string& term, int pos, int bs, int be) virtual bool takeword(const string& term, int pos, int bs, int be) {
{
if (m_prc) if (m_prc)
return m_prc->takeword(term, pos, bs, be); return m_prc->takeword(term, pos, bs, be);
else else
return true; return true;
} }
virtual void newpage(int pos)
{ virtual void newpage(int pos) {
if (m_prc) if (m_prc)
return m_prc->newpage(pos); return m_prc->newpage(pos);
} }
@ -105,7 +112,9 @@ private:
TermProc *m_prc; TermProc *m_prc;
}; };
/** Unaccent and lowercase term. This is usually the first in the pipeline */ /** Unaccent and lowercase term. If the index is
* not case/diac-sensitive, this is usually the first step in the pipeline
*/
class TermProcPrep : public TermProc { class TermProcPrep : public TermProc {
public: public:
TermProcPrep(TermProc *nxt) TermProcPrep(TermProc *nxt)