diff --git a/src/common/textsplit.cpp b/src/common/textsplit.cpp
index 081e05d4..28f91c13 100644
--- a/src/common/textsplit.cpp
+++ b/src/common/textsplit.cpp
@@ -468,8 +468,8 @@ inline bool TextSplit::doemit(bool spanerase, size_t _bp)
 
 void TextSplit::discardspan()
 {
+    m_span.clear();
     m_words_in_span.clear();
-    m_span.erase();
     m_spanpos = m_wordpos;
     m_wordStart = 0;
     m_wordLen = m_wordChars = 0;
@@ -513,10 +513,9 @@ bool TextSplit::text_to_words(const string &in)
     if (in.empty())
 	return true;
 
-    m_span.erase();
-    m_inNumber = false;
-    m_wordStart = m_wordLen = m_wordChars = m_prevpos = m_prevlen = m_wordpos 
-	= m_spanpos = 0;
+    // Reset the data members relative to splitting state
+    clearsplitstate();
+    
     bool pagepending = false;
     bool softhyphenpending = false;
 
@@ -935,10 +934,12 @@ bool TextSplit::cjk_to_words(Utf8Iter *itp, unsigned int *cp)
 	}
     }
 
-    m_span.erase();
-    m_inNumber = false;
-    m_wordStart = m_wordLen = m_wordChars = m_prevpos = m_prevlen = 0;
-    m_spanpos = m_wordpos;
+    // Reset state, saving term position, and return the found non-cjk
+    // unicode character value. The current input byte offset is kept
+    // in the utf8Iter
+    int pos = m_wordpos;
+    clearsplitstate();
+    m_spanpos = m_wordpos = pos;
     *cp = c;
     return true;
 }
diff --git a/src/common/textsplit.h b/src/common/textsplit.h
index b68d9430..9f66d13c 100644
--- a/src/common/textsplit.h
+++ b/src/common/textsplit.h
@@ -213,6 +213,14 @@ private:
     // Word length in characters. Declared but not updated if !TEXTSPLIT_STATS
     unsigned int  m_wordChars;
 
+    void clearsplitstate() {
+        m_span.clear();
+        m_words_in_span.clear();
+        m_inNumber = false;
+        m_wordStart = m_wordLen = m_wordpos = m_spanpos = m_prevpos =
+            m_prevlen = m_wordChars = 0;
+    }
+
     // This processes cjk text:
     bool cjk_to_words(Utf8Iter *it, unsigned int *cp);