diff --git a/src/filters/rclchm b/src/filters/rclchm
index e9cf0291..e3046d39 100755
--- a/src/filters/rclchm
+++ b/src/filters/rclchm
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python2
"""Extract Html files from a Microsoft Compiled Html Help file (.chm)
Needs at least python 2.2 for HTMLParser (chmlib needs 2.2 too)"""
diff --git a/src/filters/rcllatinclass.py b/src/filters/rcllatinclass.py
index ad5d3efe..fa9504b9 100755
--- a/src/filters/rcllatinclass.py
+++ b/src/filters/rcllatinclass.py
@@ -16,12 +16,15 @@ with acute accent ?
from __future__ import print_function
import sys
-import string
+PY3 = sys.version > '3'
+if not PY3:
+ import string
import glob
import os
import os.path
from zipfile import ZipFile
+
class European8859TextClassifier:
def __init__(self, langzip=""):
"""langzip contains text files. Each text file is named like lang_code.txt
@@ -33,9 +36,12 @@ class European8859TextClassifier:
self.readlanguages(langzip)
# Table to translate from punctuation to spaces
- self.punct = '''*?[].@+-,#_$%&={};.,:!"''' + "'\n\r"
- spaces = len(self.punct) * " "
- self.spacetable = string.maketrans(self.punct, spaces)
+ self.punct = b'''0123456789<>/*?[].@+-,#_$%&={};.,:!"''' + b"'\n\r"
+ spaces = len(self.punct) * b' '
+ if PY3:
+ self.spacetable = bytes.maketrans(self.punct, spaces)
+ else:
+ self.spacetable = string.maketrans(self.punct, spaces)
def readlanguages(self, langzip):
"""Extract the stop words lists from the zip file.
@@ -53,7 +59,7 @@ class European8859TextClassifier:
text = zip.read(fn)
words = text.split()
for word in words:
- if self.allwords.has_key(word):
+ if word in self.allwords:
self.allwords[word].append((lang, code))
else:
self.allwords[word] = [(lang, code)]
@@ -64,7 +70,7 @@ class European8859TextClassifier:
# Limit to reasonable size.
if len(rawtext) > 10000:
- i = rawtext.find(" ", 9000)
+ i = rawtext.find(b' ', 9000)
if i == -1:
i = 9000
rawtext = rawtext[0:i]
@@ -79,9 +85,9 @@ class European8859TextClassifier:
dict = {}
for w in words:
dict[w] = dict.get(w, 0) + 1
- lfreq = [a[0] for a in sorted(dict.iteritems(), \
+ lfreq = [a[0] for a in sorted(dict.items(), \
key=lambda entry: entry[1], reverse=True)[0:ntest]]
- #print lfreq
+ #print(lfreq)
# Build a dict (lang,code)->matchcount
langstats = {}
@@ -91,9 +97,9 @@ class European8859TextClassifier:
langstats[lc] = langstats.get(lc, 0) + 1
# Get a list of (lang,code) sorted by match count
- lcfreq = sorted(langstats.iteritems(), \
+ lcfreq = sorted(langstats.items(), \
key=lambda entry: entry[1], reverse=True)
- #print lcfreq[0:3]
+ #print(lcfreq[0:3])
if len(lcfreq) != 0:
lc,maxcount = lcfreq[0]
maxlang = lc[0]
diff --git a/src/filters/rclmpdf.py b/src/filters/rclmpdf.py
index e78d76b9..0b5ba836 100755
--- a/src/filters/rclmpdf.py
+++ b/src/filters/rclmpdf.py
@@ -17,6 +17,8 @@
# Recoll PDF extractor, with support for attachments
+from __future__ import print_function
+
import os
import sys
import re
@@ -89,7 +91,7 @@ class PDFExtractor:
"output", tmpdir])
self.attachlist = sorted(os.listdir(tmpdir))
return True
- except Exception, e:
+ except Exception as e:
self.em.rclog("extractAttach: failed: %s" % e)
# Return true anyway, pdf attachments are no big deal
return True
diff --git a/src/filters/rclopxml.py b/src/filters/rclopxml.py
index 0073e17b..cc9948b0 100755
--- a/src/filters/rclopxml.py
+++ b/src/filters/rclopxml.py
@@ -146,7 +146,7 @@ class OXExtractor:
return stylesheet
def extractone(self, params):
- if not params.has_key("filename:"):
+ if "filename:" not in params:
self.em.rclog("extractone: no mime or file name")
return (False, "", "", rclexecm.RclExecM.eofnow)
fn = params["filename:"]
diff --git a/src/filters/rclppt.py b/src/filters/rclppt.py
index c2319e18..211d822a 100755
--- a/src/filters/rclppt.py
+++ b/src/filters/rclppt.py
@@ -1,4 +1,10 @@
-#!/usr/bin/env python
+#!/usr/bin/env python2
+
+# Recoll PPT text extractor
+# Msodump is not compatible with Python3 AFAIK, so this is stuck to
+# Python2 too
+
+from __future__ import print_function
import rclexecm
import rclexec1
diff --git a/src/filters/rclpython b/src/filters/rclpython
index 990d03b5..362d8a4e 100755
--- a/src/filters/rclpython
+++ b/src/filters/rclpython
@@ -22,6 +22,8 @@
# - parse script encoding and allow output in any encoding by using unicode
# as intermediate
+from __future__ import print_function
+
__version__ = '0.3'
__date__ = '2005-07-04'
__license__ = 'GPL'
@@ -29,9 +31,26 @@ __author__ = 'J
# Imports
-import cgi, string, sys, cStringIO
+import cgi, string, sys
+PY2 = sys.version < '3'
+if PY2:
+ import cStringIO
+else:
+ import io
import keyword, token, tokenize
+if PY2:
+ def makebytes(data):
+ if isinstance(data, unicode):
+ return data.encode("UTF-8")
+ else:
+ return data
+else:
+ def makebytes(data):
+ if isinstance(data, bytes):
+ return data
+ else:
+ return data.encode("UTF-8")
#############################################################################
### Python Source Parser (does Hilighting)
@@ -57,7 +76,7 @@ _HTML_HEADER = """\
%%(title)s
-
+
@@ -114,7 +133,7 @@ class Parser:
def __init__(self, raw, out=sys.stdout):
""" Store the source text.
"""
- self.raw = string.strip(string.expandtabs(raw))
+ self.raw = raw.expandtabs().strip()
self.out = out
def format(self):
@@ -124,35 +143,44 @@ class Parser:
self.lines = [0, 0]
pos = 0
while 1:
- pos = string.find(self.raw, '\n', pos) + 1
+ pos = self.raw.find(b'\n', pos) + 1
if not pos: break
self.lines.append(pos)
self.lines.append(len(self.raw))
# parse the source and write it
self.pos = 0
- text = cStringIO.StringIO(self.raw)
- self.out.write(self.stylesheet)
- self.out.write('\n')
+ if PY2:
+ text = cStringIO.StringIO(self.raw)
+ else:
+ text = io.BytesIO(self.raw)
+ self.out.write(makebytes(self.stylesheet))
+ self.out.write(b'\n')
try:
- tokenize.tokenize(text.readline, self)
- except tokenize.TokenError, ex:
+ if PY2:
+ tokenize.tokenize(text.readline, self)
+ else:
+ for a,b,c,d,e in tokenize.tokenize(text.readline):
+ self(a,b,c,d,e)
+ except tokenize.TokenError as ex:
msg = ex[0]
line = ex[1][0]
self.out.write("ERROR: %s
%s\n" % (
msg, self.raw[self.lines[line]:]))
- except IndentationError, ex:
+ except IndentationError as ex:
msg = ex[0]
self.out.write("ERROR: %s
\n" % (msg))
- self.out.write('\n')
+ self.out.write(b'\n
')
- def __call__(self, toktype, toktext, (srow,scol), (erow,ecol), line):
+ def __call__(self, toktype, toktext, startpos, endpos, line):
""" Token handler.
"""
if 0:
- print "type", toktype, token.tok_name[toktype], "text", toktext,
- print "start", srow,scol, "end", erow,ecol, "
"
-
+ print("type %s %s text %s start %s %s end %s %s
\n" % \
+ (toktype, token.tok_name[toktype], toktext, \
+ srow, scol,erow,ecol))
+ srow, scol = startpos
+ erow, ecol = endpos
# calculate new positions
oldpos = self.pos
newpos = self.lines[srow] + scol
@@ -160,7 +188,7 @@ class Parser:
# handle newlines
if toktype in [token.NEWLINE, tokenize.NL]:
- self.out.write('\n')
+ self.out.write(b'\n')
return
# send the original whitespace, if needed
@@ -180,9 +208,9 @@ class Parser:
css_class = _css_classes.get(toktype, 'text')
# send text
- self.out.write('' % (css_class,))
- self.out.write(cgi.escape(toktext))
- self.out.write('')
+ self.out.write(makebytes('' % (css_class,)))
+ self.out.write(makebytes(cgi.escape(toktext)))
+ self.out.write(b'')
def colorize_file(file=None, outstream=sys.stdout, standalone=True):
@@ -205,7 +233,7 @@ def colorize_file(file=None, outstream=sys.stdout, standalone=True):
filename = 'STREAM'
elif file is not None:
try:
- sourcefile = open(file)
+ sourcefile = open(file, 'rb')
filename = basename(file)
except IOError:
raise SystemExit("File %s unknown." % file)
@@ -215,22 +243,26 @@ def colorize_file(file=None, outstream=sys.stdout, standalone=True):
source = sourcefile.read()
if standalone:
- outstream.write(_HTML_HEADER % {'title': filename})
+ outstream.write(makebytes(_HTML_HEADER % {'title': filename}))
Parser(source, out=outstream).format()
if standalone:
- outstream.write(_HTML_FOOTER)
+ outstream.write(makebytes(_HTML_FOOTER))
if file:
sourcefile.close()
if __name__ == "__main__":
import os
+ if PY2:
+ out = sys.stdout
+ else:
+ out = sys.stdout.buffer
if os.environ.get('PATH_TRANSLATED'):
filepath = os.environ.get('PATH_TRANSLATED')
- print 'Content-Type: text/html; charset="iso-8859-1"\n'
- colorize_file(filepath)
+ print('Content-Type: text/html; charset="iso-8859-1"\n')
+ colorize_file(filepath, out)
elif len(sys.argv) > 1:
filepath = sys.argv[1]
- colorize_file(filepath)
+ colorize_file(filepath, out)
else:
colorize_file()
diff --git a/src/filters/rclrar b/src/filters/rclrar
index 0846263c..f11c2a39 100755
--- a/src/filters/rclrar
+++ b/src/filters/rclrar
@@ -18,12 +18,14 @@
# Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+from __future__ import print_function
+
import sys
import rclexecm
try:
from rarfile import RarFile
except:
- print "RECFILTERROR HELPERNOTFOUND python:rarfile"
+ print("RECFILTERROR HELPERNOTFOUND python:rarfile")
sys.exit(1);
# Requires RarFile python module. Try "sudo pip install rarfile"
@@ -67,9 +69,7 @@ class RarExtractor:
iseof = rclexecm.RclExecM.noteof
if self.currentindex >= len(self.rar.namelist()) -1:
iseof = rclexecm.RclExecM.eofnext
- if isinstance(ipath, unicode):
- ipath = ipath.encode("utf-8")
- return (ok, docdata, ipath, iseof)
+ return (ok, docdata, rclexecm.makebytes(ipath), iseof)
###### File type handler api, used by rclexecm ---------->
def openfile(self, params):
diff --git a/src/filters/rclsoff.py b/src/filters/rclsoff.py
index d6a5b8c7..67e08014 100755
--- a/src/filters/rclsoff.py
+++ b/src/filters/rclsoff.py
@@ -16,6 +16,8 @@
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
######################################
+from __future__ import print_function
+
import sys
import rclexecm
import rclxslt
@@ -130,19 +132,19 @@ class OOExtractor:
self.currentindex = 0
def extractone(self, params):
- if not params.has_key("filename:"):
+ if "filename:" not in params:
self.em.rclog("extractone: no mime or file name")
return (False, "", "", rclexecm.RclExecM.eofnow)
fn = params["filename:"]
try:
- zip = ZipFile(fn)
+ zip = ZipFile(fn.decode('UTF-8'))
except Exception as err:
- self.em.rclog("unzip failed: " + str(err))
+ self.em.rclog("unzip failed: %s" % err)
return (False, "", "", rclexecm.RclExecM.eofnow)
- docdata = ''
+ docdata = b''
try:
metadata = zip.read("meta.xml")
@@ -160,9 +162,9 @@ class OOExtractor:
if content:
res = rclxslt.apply_sheet_data(stylesheet_content, content)
docdata += res
- docdata += ''
+ docdata += b''
except Exception as err:
- self.em.rclog("bad data in %s" % fn)
+ self.em.rclog("bad data in %s: %s" % (fn, err))
return (False, "", "", rclexecm.RclExecM.eofnow)
return (True, docdata, "", rclexecm.RclExecM.eofnext)
diff --git a/src/filters/rclsvg.py b/src/filters/rclsvg.py
index 7fde9f2e..ef99664b 100755
--- a/src/filters/rclsvg.py
+++ b/src/filters/rclsvg.py
@@ -16,6 +16,8 @@
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
######################################
+from __future__ import print_function
+
import sys
import rclexecm
import rclxslt
@@ -104,7 +106,7 @@ class SVGExtractor:
self.currentindex = 0
def extractone(self, params):
- if not params.has_key("filename:"):
+ if "filename:" not in params:
self.em.rclog("extractone: no mime or file name")
return (False, "", "", rclexecm.RclExecM.eofnow)
fn = params["filename:"]
diff --git a/src/filters/rcltar b/src/filters/rcltar
index 7dba94d3..f597bb79 100755
--- a/src/filters/rcltar
+++ b/src/filters/rcltar
@@ -6,12 +6,14 @@
# It works not only for tar-files, but automatically for gzipped and
# bzipped tar-files at well.
+from __future__ import print_function
+
import rclexecm
try:
import tarfile
except:
- print "RECFILTERROR HELPERNOTFOUND python:tarfile"
+ print("RECFILTERROR HELPERNOTFOUND python:tarfile")
sys.exit(1);
class TarExtractor:
@@ -38,15 +40,15 @@ class TarExtractor:
iseof = rclexecm.RclExecM.noteof
if self.currentindex >= len(self.namen) -1:
iseof = rclexecm.RclExecM.eofnext
- if isinstance(ipath, unicode):
- ipath = ipath.encode("utf-8")
- return (ok, docdata, ipath, iseof)
+ return (ok, docdata, rclexecm.makebytes(ipath), iseof)
def openfile(self, params):
self.currentindex = -1
try:
self.tar = tarfile.open(name=params["filename:"],mode='r')
- self.namen = [ y.name for y in filter(lambda z:z.isfile(),self.tar.getmembers())]
+ #self.namen = [ y.name for y in filter(lambda z:z.isfile(),self.tar.getmembers())]
+ self.namen = [ y.name for y in [z for z in self.tar.getmembers() if z.isfile()]]
+
return True
except:
return False
diff --git a/src/filters/rcltext.py b/src/filters/rcltext.py
index 2605f047..847a80b2 100755
--- a/src/filters/rcltext.py
+++ b/src/filters/rcltext.py
@@ -1,10 +1,13 @@
#!/usr/bin/env python
+# Wrapping a text file. Recoll does it internally in most cases, but
+# this is for use by another filter.
+
+from __future__ import print_function
+
import rclexecm
import sys
-# Wrapping a text file. Recoll does it internally in most cases, but
-# there is a reason this exists, just can't remember it ...
class TxtDump:
def __init__(self, em):
self.em = em
@@ -12,7 +15,7 @@ class TxtDump:
def extractone(self, params):
#self.em.rclog("extractone %s %s" % (params["filename:"], \
#params["mimetype:"]))
- if not params.has_key("filename:"):
+ if not "filename:" in params:
self.em.rclog("extractone: no file name")
return (False, "", "", rclexecm.RclExecM.eofnow)
diff --git a/src/filters/rclwar b/src/filters/rclwar
index 30a95e9f..8b0dc35f 100755
--- a/src/filters/rclwar
+++ b/src/filters/rclwar
@@ -2,6 +2,8 @@
# WAR web archive filter for recoll. War file are gzipped tar files
+from __future__ import print_function
+
import rclexecm
import tarfile
diff --git a/src/filters/rclxls.py b/src/filters/rclxls.py
index cbae1692..a8e1bf97 100755
--- a/src/filters/rclxls.py
+++ b/src/filters/rclxls.py
@@ -1,5 +1,7 @@
-#!/usr/bin/env python
+#!/usr/bin/env python2
+# Extractor for Excel files.
+# Mso-dumper is not compatible with Python3
import rclexecm
import rclexec1
import xlsxmltocsv
diff --git a/src/filters/rclxml.py b/src/filters/rclxml.py
index 78e93f8a..1fd993f2 100755
--- a/src/filters/rclxml.py
+++ b/src/filters/rclxml.py
@@ -62,7 +62,7 @@ class XMLExtractor:
self.currentindex = 0
def extractone(self, params):
- if not params.has_key("filename:"):
+ if "filename:" not in params:
self.em.rclog("extractone: no mime or file name")
return (False, "", "", rclexecm.RclExecM.eofnow)
fn = params["filename:"]
diff --git a/src/filters/rclxslt.py b/src/filters/rclxslt.py
index 4b0e2e8c..2441294e 100644
--- a/src/filters/rclxslt.py
+++ b/src/filters/rclxslt.py
@@ -17,36 +17,54 @@
# Helper module for xslt-based filters
+from __future__ import print_function
+
import sys
-try:
- import libxml2
- import libxslt
-except:
- print "RECFILTERROR HELPERNOTFOUND python:libxml2/python:libxslt1"
- sys.exit(1);
+PY2 = sys.version < '3'
-libxml2.substituteEntitiesDefault(1)
-
-def apply_sheet_data(sheet, data):
- styledoc = libxml2.parseMemory(sheet, len(sheet))
- style = libxslt.parseStylesheetDoc(styledoc)
- doc = libxml2.parseMemory(data, len(data))
- result = style.applyStylesheet(doc, None)
- res = style.saveResultToString(result)
- style.freeStylesheet()
- doc.freeDoc()
- result.freeDoc()
- return res
-
-def apply_sheet_file(sheet, fn):
- styledoc = libxml2.parseMemory(sheet, len(sheet))
- style = libxslt.parseStylesheetDoc(styledoc)
- doc = libxml2.parseFile(fn)
- result = style.applyStylesheet(doc, None)
- res = style.saveResultToString(result)
- style.freeStylesheet()
- doc.freeDoc()
- result.freeDoc()
- return res
+if PY2:
+ try:
+ import libxml2
+ import libxslt
+ libxml2.substituteEntitiesDefault(1)
+ except:
+ print("RECFILTERROR HELPERNOTFOUND python:libxml2/python:libxslt1")
+ sys.exit(1);
+ def apply_sheet_data(sheet, data):
+ styledoc = libxml2.parseMemory(sheet, len(sheet))
+ style = libxslt.parseStylesheetDoc(styledoc)
+ doc = libxml2.parseMemory(data, len(data))
+ result = style.applyStylesheet(doc, None)
+ res = style.saveResultToString(result)
+ style.freeStylesheet()
+ doc.freeDoc()
+ result.freeDoc()
+ return res
+ def apply_sheet_file(sheet, fn):
+ styledoc = libxml2.parseMemory(sheet, len(sheet))
+ style = libxslt.parseStylesheetDoc(styledoc)
+ doc = libxml2.parseFile(fn)
+ result = style.applyStylesheet(doc, None)
+ res = style.saveResultToString(result)
+ style.freeStylesheet()
+ doc.freeDoc()
+ result.freeDoc()
+ return res
+else:
+ try:
+ from lxml import etree
+ except:
+ print("RECFILTERROR HELPERNOTFOUND python3:lxml")
+ sys.exit(1);
+ def apply_sheet_data(sheet, data):
+ styledoc = etree.fromstring(sheet)
+ transform = etree.XSLT(styledoc)
+ doc = etree.fromstring(data)
+ return etree.tostring(transform(doc))
+ def apply_sheet_file(sheet, fn):
+ styledoc = etree.fromstring(sheet)
+ transform = etree.XSLT(styledoc)
+ doc = etree.parse(fn)
+ return etree.tostring(transform(doc))
diff --git a/src/filters/rclzip b/src/filters/rclzip
index 9d88dc76..82974e54 100755
--- a/src/filters/rclzip
+++ b/src/filters/rclzip
@@ -1,6 +1,24 @@
#!/usr/bin/env python
+# Copyright (C) 2014 J.F.Dockes
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the
+# Free Software Foundation, Inc.,
+# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
-# Zip file filter for Recoll
+# Zip file extractor for Recoll
+
+from __future__ import print_function
import os
import fnmatch
@@ -78,9 +96,7 @@ class ZipExtractor:
iseof = rclexecm.RclExecM.noteof
if self.currentindex >= len(self.zip.namelist()) -1:
iseof = rclexecm.RclExecM.eofnext
- if isinstance(ipath, unicode):
- ipath = ipath.encode("utf-8")
- return (ok, docdata, ipath, iseof)
+ return (ok, docdata, rclexecm.makebytes(ipath), iseof)
###### File type handler api, used by rclexecm ---------->
def openfile(self, params):
@@ -96,7 +112,14 @@ class ZipExtractor:
self.skiplist = skipped.split(" ")
try:
- self.zip = ZipFile(filename)
+ if rclexecm.PY3:
+ # Note: python3 ZipFile wants an str file name, which
+ # is wrong: file names are binary. But it accepts an
+ # open file, and open() has no such restriction
+ f = open(filename, 'rb')
+ self.zip = ZipFile(f)
+ else:
+ self.zip = ZipFile(filename)
return True
except Exception as err:
self.em.rclog("openfile: failed: [%s]" % err)
diff --git a/src/filters/xls-dump.py b/src/filters/xls-dump.py
index d826654f..15613f35 100755
--- a/src/filters/xls-dump.py
+++ b/src/filters/xls-dump.py
@@ -1,10 +1,14 @@
-#!/usr/bin/env python
+#!/usr/bin/env python2
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
#
+# mso-dumper is not compatible with python3
+
+from __future__ import print_function
+
import sys, os.path, optparse
sys.path.append(sys.path[0]+"/msodump.zip")
@@ -97,7 +101,7 @@ class XLDumper(object):
node.prettyPrint(sys.stdout, docroot, utf8 = self.params.utf8)
except Exception as err:
- print >> sys.stderr, "xls-dump.py: error: %s" % err
+ print("xls-dump.py: error: %s" % err, file=sys.stderr)
sys.exit(1)
def dump (self):
diff --git a/src/filters/xlsxmltocsv.py b/src/filters/xlsxmltocsv.py
index 72850d3a..cfc39304 100755
--- a/src/filters/xlsxmltocsv.py
+++ b/src/filters/xlsxmltocsv.py
@@ -1,4 +1,9 @@
-#!/usr/bin/env python
+#!/usr/bin/env python2
+
+# Transform XML output from xls-dump.py into csv format.
+# Note: msodumper is not compatible with python3.
+
+from __future__ import print_function
import sys
import xml.sax
@@ -28,7 +33,7 @@ class XlsXmlHandler(xml.sax.handler.ContentHandler):
if "value" in attrs:
value = attrs["value"].encode("UTF-8")
else:
- value = unicode()
+ value = b''
if "col" in attrs:
self.cells[int(attrs["col"])] = value
else:
@@ -42,7 +47,7 @@ class XlsXmlHandler(xml.sax.handler.ContentHandler):
def endElement(self, name, ):
if name == "row":
curidx = 0
- for idx, value in self.cells.iteritems():
+ for idx, value in self.cells.items():
self.output += sepstring * (idx - curidx)
self.output += "%s%s%s" % (dquote, value, dquote)
curidx = idx