From 4a7ff398b2738dfd631c919bc35c587d3c3ad1ee Mon Sep 17 00:00:00 2001
From: Jean-Francois Dockes <jfd@recoll.org>
Date: Fri, 7 Oct 2011 08:05:36 +0200
Subject: [PATCH] comments

---
 src/rcldb/termproc.h | 41 +++++++++++++++++++++++++++++++++++++----
 1 file changed, 37 insertions(+), 4 deletions(-)

diff --git a/src/rcldb/termproc.h b/src/rcldb/termproc.h
index d7b6e777..0d37dfe6 100644
--- a/src/rcldb/termproc.h
+++ b/src/rcldb/termproc.h
@@ -19,9 +19,27 @@
 #ifndef _TERMPROC_H_INCLUDED_
 #define _TERMPROC_H_INCLUDED_
 
+
 #include "textsplit.h"
 #include "stoplist.h"
+
 namespace Rcl {
+
+/** 
+ * Termproc objects take a stream of term tokens as input and do something
+ * with them: transform to lowercase, filter out stop words, generate n-grams,
+ * finally index or generate search clauses, etc. They are chained and can 
+ * be arranged to form different pipelines depending on the desired processing
+ * steps: for example, optional stoplist or commongram processing.
+ *
+ * Shared processing steps are defined in this file. The first and last steps
+ * (ie: adding index term) are usually defined in the specific module.
+ */
+
+/** 
+ * The base class takes care of chaining: all derived classes call its 
+ * takeword() and flush() methods to ensure that terms go through the pipe.
+ */
 class TermProc {
 public:
     TermProc(TermProc* next) : m_next(next) {}
@@ -42,13 +60,21 @@ public:
     }
 private:
     TermProc *m_next;
+    /* Copyconst and assignment private and forbidden */
+    TermProc(const TermProc &) {}
+    TermProc& operator=(const TermProc &) {return *this;};
 };
 
+/** 
+ * Intermediary specialized texsplit class: this will probably replace the base
+ * textsplit when we've converted all the code. The takeword() routine in this
+ * calls a TextProc's instead of being specialized in a derived class by the
+ * user module. The text_to_word() method also takes care of flushing.
+ */
 class TextSplitP : public TextSplit {
 public:
     TextSplitP(TermProc *prc, Flags flags = Flags(TXTS_NONE))
-	: TextSplit(flags), m_prc(prc)
-    {}
+	: TextSplit(flags), m_prc(prc)  {}
 
     virtual bool text_to_words(const string &in)
     {
@@ -70,6 +96,7 @@ private:
     TermProc *m_prc;
 };
 
+/** Unaccent and lowercase term. This is usually the first in the pipeline */
 class TermProcPrep : public TermProc {
 public:
     TermProcPrep(TermProc *nxt)	: TermProc(nxt) {}
@@ -86,10 +113,12 @@ public:
     }
 };
 
+/** Compare to stop words list and discard if match found */
 class TermProcStop : public TermProc {
 public:
     TermProcStop(TermProc *nxt, const Rcl::StopList& stops)
-	: TermProc(nxt), m_stops(stops) { }
+	: TermProc(nxt), m_stops(stops) {}
+
     virtual bool takeword(const string& term, int pos, int bts, int bte)
     {
 	if (m_stops.isStop(term)) {
@@ -101,6 +130,9 @@ private:
     const Rcl::StopList& m_stops;
 };
 
+/** Handle common-gram generation: combine frequent terms with neighbours to
+ *  shorten the positions lists for phrase searches.
+ */
 class TermProcCommongrams : public TermProc {
 public:
     TermProcCommongrams(TermProc *nxt, const Rcl::StopList& stops)
@@ -177,6 +209,7 @@ private:
     bool   m_onlygrams;
 };
 
-}
+
+} // End namespace Rcl
 
 #endif /* _TERMPROC_H_INCLUDED_ */