diff --git a/src/common/textsplitko.cpp b/src/common/textsplitko.cpp
index 4906de6e..86c7bc8e 100644
--- a/src/common/textsplitko.cpp
+++ b/src/common/textsplitko.cpp
@@ -103,6 +103,7 @@ static bool initCmd()
 
 bool TextSplit::ko_to_words(Utf8Iter *itp, unsigned int *cp)
 {
+    LOGDEB1("ko_to_words\n");
     std::unique_lock<std::mutex> mylock(o_mutex);
     initCmd();
     if (nullptr == o_talker) {
@@ -131,16 +132,13 @@ bool TextSplit::ko_to_words(Utf8Iter *itp, unsigned int *cp)
         c = *it;
         if (!isHANGUL(c) && isalpha(c)) {
             // Done with Korean stretch, process and go back to main routine
-            //std::cerr << "Broke on char " << (std::string)it << endl;
+            LOGDEB1("ko_to_words: broke on " << (std::string)it << endl);
             break;
         } else {
             if (c == '\f') {
-                inputdata += magicpage;
+                inputdata += magicpage + " ";
             } else {
-                if (isKomoran && (c == '\n' || c == '\r')) {
-                    // Komoran does not like some control chars (initially
-                    // thought only formfeed, but not), which is a prob
-                    // for pdf pages counts. will need to fix this
+                if (c < 0x20 || (c > 0x7e && c < 0xa0)) {
                     inputdata += ' ';
                 } else {
                     it.appendchartostring(inputdata);
@@ -175,9 +173,10 @@ bool TextSplit::ko_to_words(Utf8Iter *itp, unsigned int *cp)
     vector<string> tags;
     stringToTokens(outtags, tags, sepchars);
 
-    // This is the position in the whole text, not the local fragment,
-    // which is bytepos-orgbytepos
-    string::size_type bytepos(orgbytepos);
+    // This is the position in the local fragment,
+    // not in the whole text which is orgbytepos + bytepos
+    string::size_type bytepos{0};
+    string::size_type pagefix{0};
     for (unsigned int i = 0; i < words.size(); i++) {
         // The POS tagger strips characters from the input (e.g. multiple
         // spaces, sometimes new lines, possibly other stuff). This
@@ -190,25 +189,32 @@ bool TextSplit::ko_to_words(Utf8Iter *itp, unsigned int *cp)
         string word = words[i];
         trimstring(word);
         if (word == magicpage) {
+            LOGDEB1("ko_to_words: NEWPAGE\n");
             newpage(m_wordpos);
+            bytepos += word.size() + 1;
+            pagefix += word.size();
+            continue;
         }
-        string::size_type newpos = bytepos - orgbytepos;
-        newpos = inputdata.find(word, newpos);
+        // Find the actual start position of the word in the section.
+        string::size_type newpos = inputdata.find(word, bytepos);
         if (newpos != string::npos) {
-            bytepos = orgbytepos + newpos;
+            bytepos = newpos;
+        } else {
+            LOGDEB("textsplitko: word [" << word << "] not found in text\n");
         }
-        LOGDEB1("WORD OPOS " << bytepos-orgbytepos <<
-                " FOUND POS " << newpos << endl);
+        LOGDEB1("WORD [" << word << "] size " << word.size() <<
+                " TAG " << tags[i] << " inputdata size " << inputdata.size() <<
+                " absbytepos " << orgbytepos + bytepos << 
+                " bytepos " << bytepos << " word from text: " <<
+                inputdata.substr(bytepos, word.size()) << endl);
         if (tags[i] == "Noun" || tags[i] == "Verb" ||
             tags[i] == "Adjective" || tags[i] == "Adverb") {
-            if (!takeword(
-                    word, m_wordpos++, bytepos, bytepos + words[i].size())) {
+            string::size_type abspos = orgbytepos + bytepos - pagefix;
+            if (!takeword(word, m_wordpos++, abspos, abspos + word.size())) {
                 return false;
             }
         }
-        LOGDEB1("WORD [" << words[i] << "] size " << words[i].size() <<
-               " TAG " << tags[i] << endl);
-        bytepos += words[i].size();
+        bytepos += word.size();
     }
 
 #if DO_CHECK_THINGS
@@ -229,5 +235,6 @@ bool TextSplit::ko_to_words(Utf8Iter *itp, unsigned int *cp)
     clearsplitstate();
     m_spanpos = m_wordpos = pos;
     *cp = c;
+    LOGDEB1("ko_to_words: returning\n");
     return true;
 }
diff --git a/src/rcldb/rclabsfromtext.cpp b/src/rcldb/rclabsfromtext.cpp
index a19ef9c5..28aab57b 100644
--- a/src/rcldb/rclabsfromtext.cpp
+++ b/src/rcldb/rclabsfromtext.cpp
@@ -106,13 +106,14 @@ struct MatchFragment {
 class TextSplitABS : public TextSplit {
 public:
 
-    TextSplitABS(const vector<string>& matchTerms,
+    TextSplitABS(const string& rawtext, const vector<string>& matchTerms,
                  const HighlightData& hdata,
                  unordered_map<string, double>& wordcoefs,
                  unsigned int ctxwords,
                  Flags flags,
                  unsigned int maxterms)
-        :  TextSplit(flags), m_terms(matchTerms.begin(), matchTerms.end()),
+        :  TextSplit(flags), m_rawtext(rawtext),
+           m_terms(matchTerms.begin(), matchTerms.end()),
            m_hdata(hdata), m_wordcoefs(wordcoefs), m_ctxwords(ctxwords),
            maxtermcount(maxterms) {
 
@@ -132,7 +133,7 @@ public:
     // Accept a word and its position. If the word is a matched term,
     // add/update fragment definition.
     virtual bool takeword(const std::string& term, int pos, int bts, int bte) {
-        LOGDEB2("takeword: " << term << endl);
+        LOGDEB1("takeword: [" << term << "] bytepos: "<<bts<<":"<<bte<<endl);
         // Limit time taken with monster documents. The resulting
         // abstract will be incorrect or inexistant, but this is
         // better than taking forever (the default cutoff value comes
@@ -169,9 +170,9 @@ public:
 
         if (m_terms.find(dumb) != m_terms.end()) {
             // This word is a search term. Extend or create fragment
-            LOGDEB2("match: [" << dumb << "] current: " << m_curfrag.first <<
-                    ", " << m_curfrag.second << " remain " <<
-                    m_remainingWords << endl);
+            LOGDEB1("match: [" << dumb << "] pos " << pos << " bpos " << bts <<
+                   ":" << bte << " remainingWords " << m_remainingWords << endl);
+            LOGDEB1("Match text " << m_rawtext.substr(bts, bte - bts) << endl);
             double coef = m_wordcoefs[dumb];
             if (!m_remainingWords) {
                 // No current fragment. Start one
@@ -219,7 +220,7 @@ public:
                 // Term group (phrase/near) handling
                 m_plists[dumb].push_back(pos);
                 m_gpostobytes[pos] = pair<int,int>(bts, bte);
-                LOGDEB2("Recorded bpos for " << pos << ": " << bts << " " <<
+                LOGDEB1("Recorded bpos for pos " << pos << ": " << bts << " " <<
                         bte << "\n");
             }
         }
@@ -236,6 +237,11 @@ public:
             m_remainingWords--;
             m_curfrag.second = bte;
             if (m_remainingWords == 0) {
+                LOGDEB1("FRAGMENT: from byte " << m_curfrag.first <<
+                        " to  byte " << m_curfrag.second << endl);
+                LOGDEB1("FRAGMENT TEXT [" << m_rawtext.substr(
+                            m_curfrag.first, m_curfrag.second-m_curfrag.first)
+                        << "]\n");
                 // We used to not push weak fragments if we had a lot
                 // already. This can cause problems if the fragments
                 // we drop are actually group fragments (which have
@@ -337,6 +343,7 @@ public:
     }
     
 private:
+    const string& m_rawtext;
     // Past terms because we need to go back for context before a hit
     deque<pair<int,int>>  m_prevterms;
     // Data about the fragment we are building
@@ -424,7 +431,7 @@ int Query::Native::abstractFromText(
     }
     LOGABS("abstractFromText: getterms: " << chron.millis() << "mS\n");
 
-    TextSplitABS splitter(matchTerms, hld, wordcoefs, ctxwords,
+    TextSplitABS splitter(rawtext, matchTerms, hld, wordcoefs, ctxwords,
                           TextSplit::TXTS_ONLYSPANS,
                           m_q->m_snipMaxPosWalk);
     splitter.text_to_words(rawtext);