diff --git a/src/common/textsplit.cpp b/src/common/textsplit.cpp
index 4528bb79..76e84738 100644
--- a/src/common/textsplit.cpp
+++ b/src/common/textsplit.cpp
@@ -88,7 +88,7 @@ static void setcharclasses()
     for (i = 0; i  < strlen(wild); i++)
 	charclasses[int(wild[i])] = WILD;
 
-    char special[] = ".@+-,#'\n\r";
+    char special[] = ".@+-,#'_\n\r";
     for (i = 0; i  < strlen(special); i++)
 	charclasses[int(special[i])] = special[i];
 
@@ -138,16 +138,18 @@ static inline int whatcc(unsigned int c)
 // FF00..FFEF; Halfwidth and Fullwidth Forms
 // 20000..2A6DF; CJK Unified Ideographs Extension B
 // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
+// Note: the p > 127 test is not necessary, but optimizes away the ascii case
 #define UNICODE_IS_CJK(p)						\
-    (((p) >= 0x2E80 && (p) <= 0x2EFF)                                   \
-     || ((p) >= 0x3000 && (p) <= 0x9FFF)                                \
-     || ((p) >= 0xA700 && (p) <= 0xA71F)                                \
-     || ((p) >= 0xAC00 && (p) <= 0xD7AF)                                \
-     || ((p) >= 0xF900 && (p) <= 0xFAFF)                                \
-     || ((p) >= 0xFE30 && (p) <= 0xFE4F)                                \
-     || ((p) >= 0xFF00 && (p) <= 0xFFEF)                                \
-     || ((p) >= 0x20000 && (p) <= 0x2A6DF)                              \
-     || ((p) >= 0x2F800 && (p) <= 0x2FA1F))
+    ((p) > 127 &&							\
+     (((p) >= 0x2E80 && (p) <= 0x2EFF) ||				\
+      ((p) >= 0x3000 && (p) <= 0x9FFF) ||				\
+      ((p) >= 0xA700 && (p) <= 0xA71F) ||				\
+      ((p) >= 0xAC00 && (p) <= 0xD7AF) ||				\
+      ((p) >= 0xF900 && (p) <= 0xFAFF) ||				\
+      ((p) >= 0xFE30 && (p) <= 0xFE4F) ||				\
+      ((p) >= 0xFF00 && (p) <= 0xFFEF) ||				\
+      ((p) >= 0x20000 && (p) <= 0x2A6DF) ||				\
+      ((p) >= 0x2F800 && (p) <= 0x2FA1F)))
 
 bool TextSplit::isCJK(int c)
 {
@@ -385,6 +387,14 @@ bool TextSplit::text_to_words(const string &in)
 	    }
 	    m_wordStart += it.appendchartostring(m_span);
 	    break;
+	case '_':
+	    if (m_wordLen) {
+		if (!doemit(false, it.getBpos()))
+		    return false;
+		m_inNumber = false;
+	    }
+	    m_wordStart += it.appendchartostring(m_span);
+	    break;
 	case '\'':
 	    // If in word, potential span: o'brien, else, this is more 
 	    // whitespace