Process text/plain subdocuments like .txt files (paging big ones, etc.)
This commit is contained in:
parent
ff722de81a
commit
15924ce037
@ -939,7 +939,10 @@ breakloop:
|
|||||||
}
|
}
|
||||||
// Keep this AFTER collectIpathAndMT
|
// Keep this AFTER collectIpathAndMT
|
||||||
dijontorcl(doc);
|
dijontorcl(doc);
|
||||||
|
// Fix the bogus mtype used to force mh_text processing of text subdocs
|
||||||
|
if (doc.mimetype == "text/plain1") {
|
||||||
|
doc.mimetype = "text/plain";
|
||||||
|
}
|
||||||
// Possibly destack so that we can test for FIDone. While doing this
|
// Possibly destack so that we can test for FIDone. While doing this
|
||||||
// possibly set aside an ancestor html text (for the GUI preview)
|
// possibly set aside an ancestor html text (for the GUI preview)
|
||||||
while (!m_handlers.empty() && !m_handlers.back()->has_documents()) {
|
while (!m_handlers.empty() && !m_handlers.back()->has_documents()) {
|
||||||
|
|||||||
@ -314,6 +314,15 @@ bool MimeHandlerExecMultiple::next_document()
|
|||||||
mtype = "application/octet-stream";
|
mtype = "application/octet-stream";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/* If we identify text/plain from the suffix (as opposed
|
||||||
|
to the handler setting the type), we use text/plain1
|
||||||
|
instead. As directed in mimeconf, this will cause the
|
||||||
|
text handler to be applied (instead of internfile just
|
||||||
|
ending things there), allowing splitting and default
|
||||||
|
charset conversions. */
|
||||||
|
if (mtype == "text/plain") {
|
||||||
|
mtype = "text/plain1";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
m_metaData[cstr_dj_keymt] = mtype;
|
m_metaData[cstr_dj_keymt] = mtype;
|
||||||
if (!m_forPreview) {
|
if (!m_forPreview) {
|
||||||
|
|||||||
@ -36,8 +36,23 @@
|
|||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
const int MB = 1024*1024;
|
void MimeHandlerText::getparams()
|
||||||
const int KB = 1024;
|
{
|
||||||
|
m_config->getConfParam("textfilemaxmbs", &m_maxmbs);
|
||||||
|
|
||||||
|
// Text file page size: if set, we split text files into
|
||||||
|
// multiple documents
|
||||||
|
int ps = 1000;
|
||||||
|
m_config->getConfParam("textfilepagekbs", &ps);
|
||||||
|
if (ps != -1) {
|
||||||
|
ps *= 1024;
|
||||||
|
m_paging = true;
|
||||||
|
} else {
|
||||||
|
m_paging = false;
|
||||||
|
}
|
||||||
|
m_pagesz = size_t(ps);
|
||||||
|
m_offs = 0;
|
||||||
|
}
|
||||||
|
|
||||||
// Process a plain text file
|
// Process a plain text file
|
||||||
bool MimeHandlerText::set_document_file_impl(const string&, const string &fn)
|
bool MimeHandlerText::set_document_file_impl(const string&, const string &fn)
|
||||||
@ -46,13 +61,9 @@ bool MimeHandlerText::set_document_file_impl(const string&, const string &fn)
|
|||||||
m_offs << "\n");
|
m_offs << "\n");
|
||||||
|
|
||||||
m_fn = fn;
|
m_fn = fn;
|
||||||
// This should not be necessary, but it happens on msw that offset is large
|
|
||||||
// negative at this point, could not find the reason (still trying).
|
|
||||||
m_offs = 0;
|
|
||||||
|
|
||||||
// file size for oversize check
|
// file size for oversize check
|
||||||
long long fsize = path_filesize(m_fn);
|
m_totlen = path_filesize(m_fn);
|
||||||
if (fsize < 0) {
|
if (m_totlen < 0) {
|
||||||
LOGERR("MimeHandlerText::set_document_file: stat " << m_fn <<
|
LOGERR("MimeHandlerText::set_document_file: stat " << m_fn <<
|
||||||
" errno " << errno << "\n");
|
" errno " << errno << "\n");
|
||||||
return false;
|
return false;
|
||||||
@ -64,31 +75,14 @@ bool MimeHandlerText::set_document_file_impl(const string&, const string &fn)
|
|||||||
pxattr::get(m_fn, "charset", &m_charsetfromxattr);
|
pxattr::get(m_fn, "charset", &m_charsetfromxattr);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Max file size parameter: texts over this size are not indexed
|
getparams();
|
||||||
int maxmbs = 20;
|
if (m_maxmbs != -1 && m_totlen / (1024*1024) > m_maxmbs) {
|
||||||
m_config->getConfParam("textfilemaxmbs", &maxmbs);
|
LOGINF("MimeHandlerText: file too big (textfilemaxmbs=" << m_maxmbs <<
|
||||||
|
|
||||||
if (maxmbs == -1 || fsize / MB <= maxmbs) {
|
|
||||||
// Text file page size: if set, we split text files into
|
|
||||||
// multiple documents
|
|
||||||
int ps = 1000;
|
|
||||||
m_config->getConfParam("textfilepagekbs", &ps);
|
|
||||||
if (ps != -1) {
|
|
||||||
ps *= KB;
|
|
||||||
m_paging = true;
|
|
||||||
}
|
|
||||||
// Note: size_t is guaranteed unsigned, so max if ps is -1
|
|
||||||
m_pagesz = size_t(ps);
|
|
||||||
if (!readnext())
|
|
||||||
return false;
|
|
||||||
} else {
|
|
||||||
LOGINF("MimeHandlerText: file too big (textfilemaxmbs=" << maxmbs <<
|
|
||||||
"), contents will not be indexed: " << fn << endl);
|
"), contents will not be indexed: " << fn << endl);
|
||||||
}
|
} else {
|
||||||
if (!m_forPreview) {
|
if (!readnext()) {
|
||||||
string md5, xmd5;
|
return false;
|
||||||
MD5String(m_text, md5);
|
}
|
||||||
m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5);
|
|
||||||
}
|
}
|
||||||
m_havedoc = true;
|
m_havedoc = true;
|
||||||
return true;
|
return true;
|
||||||
@ -97,12 +91,25 @@ bool MimeHandlerText::set_document_file_impl(const string&, const string &fn)
|
|||||||
bool MimeHandlerText::set_document_string_impl(const string&,
|
bool MimeHandlerText::set_document_string_impl(const string&,
|
||||||
const string& otext)
|
const string& otext)
|
||||||
{
|
{
|
||||||
m_text = otext;
|
m_fn.clear();
|
||||||
if (!m_forPreview) {
|
m_totlen = otext.size();
|
||||||
string md5, xmd5;
|
|
||||||
MD5String(m_text, md5);
|
getparams();
|
||||||
m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5);
|
if (m_maxmbs != -1 && m_totlen / (1024*1024) > m_maxmbs) {
|
||||||
|
LOGINF("MimeHandlerText: text too big (textfilemaxmbs=" << m_maxmbs <<
|
||||||
|
"), contents will not be indexed\n");
|
||||||
|
} else {
|
||||||
|
if (!m_paging || (m_totlen <= (int64_t)m_pagesz)) {
|
||||||
|
// Avoid copy for texts smaller than page size
|
||||||
|
m_paging = false;
|
||||||
|
m_text = otext;
|
||||||
|
m_offs = m_totlen;
|
||||||
|
} else {
|
||||||
|
m_alltext = otext;
|
||||||
|
readnext();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
m_havedoc = true;
|
m_havedoc = true;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -112,9 +119,9 @@ bool MimeHandlerText::skip_to_document(const string& ipath)
|
|||||||
char *endptr;
|
char *endptr;
|
||||||
int64_t t = strtoll(ipath.c_str(), &endptr, 10);
|
int64_t t = strtoll(ipath.c_str(), &endptr, 10);
|
||||||
if (endptr == ipath.c_str()) {
|
if (endptr == ipath.c_str()) {
|
||||||
LOGERR("MimeHandlerText::skip_to_document: bad ipath offs [" <<
|
LOGERR("MimeHandlerText::skip_to_document: bad ipath offs [" <<
|
||||||
ipath << "]\n");
|
ipath << "]\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
m_offs = t;
|
m_offs = t;
|
||||||
readnext();
|
readnext();
|
||||||
@ -126,16 +133,21 @@ bool MimeHandlerText::next_document()
|
|||||||
LOGDEB("MimeHandlerText::next_document: m_havedoc " << m_havedoc << "\n");
|
LOGDEB("MimeHandlerText::next_document: m_havedoc " << m_havedoc << "\n");
|
||||||
|
|
||||||
if (m_havedoc == false)
|
if (m_havedoc == false)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (m_charsetfromxattr.empty())
|
if (m_charsetfromxattr.empty())
|
||||||
m_metaData[cstr_dj_keyorigcharset] = m_dfltInputCharset;
|
m_metaData[cstr_dj_keyorigcharset] = m_dfltInputCharset;
|
||||||
else
|
else
|
||||||
m_metaData[cstr_dj_keyorigcharset] = m_charsetfromxattr;
|
m_metaData[cstr_dj_keyorigcharset] = m_charsetfromxattr;
|
||||||
|
|
||||||
m_metaData[cstr_dj_keymt] = cstr_textplain;
|
m_metaData[cstr_dj_keymt] = cstr_textplain;
|
||||||
|
|
||||||
size_t srclen = m_text.length();
|
size_t srclen = m_text.length();
|
||||||
|
if (!m_forPreview) {
|
||||||
|
string md5, xmd5;
|
||||||
|
MD5String(m_text, md5);
|
||||||
|
m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5);
|
||||||
|
}
|
||||||
m_metaData[cstr_dj_keycontent].swap(m_text);
|
m_metaData[cstr_dj_keycontent].swap(m_text);
|
||||||
|
|
||||||
// We transcode even if defcharset is supposedly already utf-8:
|
// We transcode even if defcharset is supposedly already utf-8:
|
||||||
@ -143,7 +155,6 @@ bool MimeHandlerText::next_document()
|
|||||||
// txtdcode() truncates the text if transcoding fails
|
// txtdcode() truncates the text if transcoding fails
|
||||||
(void)txtdcode("mh_text");
|
(void)txtdcode("mh_text");
|
||||||
|
|
||||||
|
|
||||||
// If the text length is 0 (the file is empty or oversize), or we are
|
// If the text length is 0 (the file is empty or oversize), or we are
|
||||||
// not paging, we're done
|
// not paging, we're done
|
||||||
if (srclen == 0 || !m_paging) {
|
if (srclen == 0 || !m_paging) {
|
||||||
@ -152,21 +163,26 @@ bool MimeHandlerText::next_document()
|
|||||||
} else {
|
} else {
|
||||||
// Paging: set ipath then read next chunk.
|
// Paging: set ipath then read next chunk.
|
||||||
|
|
||||||
|
int64_t start_offset = m_offs - srclen;
|
||||||
|
string buf = lltodecstr(start_offset);
|
||||||
|
|
||||||
// Don't set ipath for the first chunk to avoid having 2
|
// Don't set ipath for the first chunk to avoid having 2
|
||||||
// records for small files (one for the file, one for the
|
// records for small files (one for the file, one for the
|
||||||
// first chunk). This is a hack. The right thing to do would
|
// first chunk). This is a hack. The right thing to do would
|
||||||
// be to use a different mtype for files over the page size,
|
// be to use a different mtype for files over the page size,
|
||||||
// and keep text/plain only for smaller files.
|
// and keep text/plain only for smaller files.
|
||||||
string buf = lltodecstr(m_offs - srclen);
|
if (start_offset != 0)
|
||||||
if (m_offs - srclen != 0)
|
|
||||||
m_metaData[cstr_dj_keyipath] = buf;
|
m_metaData[cstr_dj_keyipath] = buf;
|
||||||
|
|
||||||
readnext();
|
readnext();
|
||||||
|
|
||||||
// This ensures that the first chunk (offs==srclen) of a
|
// This ensures that the first chunk (offs==srclen) of a
|
||||||
// multi-chunk file does have an ipath. Else it stands for the
|
// multi-chunk file does have an ipath. Else it stands for the
|
||||||
// whole file, which used to be the case but does not seem
|
// whole file (see just above), which used to be the case but
|
||||||
// right
|
// does not seem right
|
||||||
if (m_havedoc)
|
if (m_havedoc)
|
||||||
m_metaData[cstr_dj_keyipath] = buf;
|
m_metaData[cstr_dj_keyipath] = buf;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -175,11 +191,16 @@ bool MimeHandlerText::readnext()
|
|||||||
{
|
{
|
||||||
string reason;
|
string reason;
|
||||||
m_text.clear();
|
m_text.clear();
|
||||||
if (!file_to_string(m_fn, m_text, m_offs, m_pagesz, &reason)) {
|
if (!m_fn.empty()) {
|
||||||
LOGERR("MimeHandlerText: can't read file: " << reason << "\n" );
|
if (!file_to_string(m_fn, m_text, m_offs, m_pagesz, &reason)) {
|
||||||
m_havedoc = false;
|
LOGERR("MimeHandlerText: can't read file: " << reason << "\n" );
|
||||||
return false;
|
m_havedoc = false;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
m_text = m_alltext.substr(m_offs, m_pagesz);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_text.length() == 0) {
|
if (m_text.length() == 0) {
|
||||||
// EOF
|
// EOF
|
||||||
m_havedoc = false;
|
m_havedoc = false;
|
||||||
@ -189,7 +210,8 @@ bool MimeHandlerText::readnext()
|
|||||||
// If possible try to adjust the chunk to end right after a line
|
// If possible try to adjust the chunk to end right after a line
|
||||||
// Don't do this for the last chunk. Last chunk of exactly the
|
// Don't do this for the last chunk. Last chunk of exactly the
|
||||||
// page size might be unduly split, no big deal
|
// page size might be unduly split, no big deal
|
||||||
if (m_text.length() == m_pagesz) {
|
if (m_text.length() == m_pagesz && m_text.back() != '\n' &&
|
||||||
|
m_text.back() != '\r') {
|
||||||
string::size_type pos = m_text.find_last_of("\n\r");
|
string::size_type pos = m_text.find_last_of("\n\r");
|
||||||
if (pos != string::npos && pos != 0) {
|
if (pos != string::npos && pos != 0) {
|
||||||
m_text.erase(pos);
|
m_text.erase(pos);
|
||||||
@ -198,4 +220,3 @@ bool MimeHandlerText::readnext()
|
|||||||
m_offs += m_text.length();
|
m_offs += m_text.length();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
201
src/internfile/mh_text.cpp-old
Normal file
201
src/internfile/mh_text.cpp-old
Normal file
@ -0,0 +1,201 @@
|
|||||||
|
/* Copyright (C) 2005 J.F.Dockes
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc.,
|
||||||
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*/
|
||||||
|
#include "autoconfig.h"
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include "safefcntl.h"
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include "safeunistd.h"
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "cstr.h"
|
||||||
|
#include "mh_text.h"
|
||||||
|
#include "log.h"
|
||||||
|
#include "readfile.h"
|
||||||
|
#include "md5ut.h"
|
||||||
|
#include "rclconfig.h"
|
||||||
|
#include "pxattr.h"
|
||||||
|
#include "pathut.h"
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
const int MB = 1024*1024;
|
||||||
|
const int KB = 1024;
|
||||||
|
|
||||||
|
// Process a plain text file
|
||||||
|
bool MimeHandlerText::set_document_file_impl(const string&, const string &fn)
|
||||||
|
{
|
||||||
|
LOGDEB("MimeHandlerText::set_document_file: [" << fn << "] offs " <<
|
||||||
|
m_offs << "\n");
|
||||||
|
|
||||||
|
m_fn = fn;
|
||||||
|
// This should not be necessary, but it happens on msw that offset is large
|
||||||
|
// negative at this point, could not find the reason (still trying).
|
||||||
|
m_offs = 0;
|
||||||
|
|
||||||
|
// file size for oversize check
|
||||||
|
long long fsize = path_filesize(m_fn);
|
||||||
|
if (fsize < 0) {
|
||||||
|
LOGERR("MimeHandlerText::set_document_file: stat " << m_fn <<
|
||||||
|
" errno " << errno << "\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifndef _WIN32
|
||||||
|
// Check for charset defined in extended attribute as per:
|
||||||
|
// http://freedesktop.org/wiki/CommonExtendedAttributes
|
||||||
|
pxattr::get(m_fn, "charset", &m_charsetfromxattr);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Max file size parameter: texts over this size are not indexed
|
||||||
|
int maxmbs = 20;
|
||||||
|
m_config->getConfParam("textfilemaxmbs", &maxmbs);
|
||||||
|
|
||||||
|
if (maxmbs == -1 || fsize / MB <= maxmbs) {
|
||||||
|
// Text file page size: if set, we split text files into
|
||||||
|
// multiple documents
|
||||||
|
int ps = 1000;
|
||||||
|
m_config->getConfParam("textfilepagekbs", &ps);
|
||||||
|
if (ps != -1) {
|
||||||
|
ps *= KB;
|
||||||
|
m_paging = true;
|
||||||
|
}
|
||||||
|
// Note: size_t is guaranteed unsigned, so max if ps is -1
|
||||||
|
m_pagesz = size_t(ps);
|
||||||
|
if (!readnext())
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
LOGINF("MimeHandlerText: file too big (textfilemaxmbs=" << maxmbs <<
|
||||||
|
"), contents will not be indexed: " << fn << endl);
|
||||||
|
}
|
||||||
|
if (!m_forPreview) {
|
||||||
|
string md5, xmd5;
|
||||||
|
MD5String(m_text, md5);
|
||||||
|
m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5);
|
||||||
|
}
|
||||||
|
m_havedoc = true;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool MimeHandlerText::set_document_string_impl(const string&,
|
||||||
|
const string& otext)
|
||||||
|
{
|
||||||
|
m_text = otext;
|
||||||
|
if (!m_forPreview) {
|
||||||
|
string md5, xmd5;
|
||||||
|
MD5String(m_text, md5);
|
||||||
|
m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5);
|
||||||
|
}
|
||||||
|
m_havedoc = true;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool MimeHandlerText::skip_to_document(const string& ipath)
|
||||||
|
{
|
||||||
|
char *endptr;
|
||||||
|
int64_t t = strtoll(ipath.c_str(), &endptr, 10);
|
||||||
|
if (endptr == ipath.c_str()) {
|
||||||
|
LOGERR("MimeHandlerText::skip_to_document: bad ipath offs [" <<
|
||||||
|
ipath << "]\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
m_offs = t;
|
||||||
|
readnext();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool MimeHandlerText::next_document()
|
||||||
|
{
|
||||||
|
LOGDEB("MimeHandlerText::next_document: m_havedoc " << m_havedoc << "\n");
|
||||||
|
|
||||||
|
if (m_havedoc == false)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (m_charsetfromxattr.empty())
|
||||||
|
m_metaData[cstr_dj_keyorigcharset] = m_dfltInputCharset;
|
||||||
|
else
|
||||||
|
m_metaData[cstr_dj_keyorigcharset] = m_charsetfromxattr;
|
||||||
|
|
||||||
|
m_metaData[cstr_dj_keymt] = cstr_textplain;
|
||||||
|
|
||||||
|
size_t srclen = m_text.length();
|
||||||
|
m_metaData[cstr_dj_keycontent].swap(m_text);
|
||||||
|
|
||||||
|
// We transcode even if defcharset is supposedly already utf-8:
|
||||||
|
// this validates the encoding.
|
||||||
|
// txtdcode() truncates the text if transcoding fails
|
||||||
|
(void)txtdcode("mh_text");
|
||||||
|
|
||||||
|
|
||||||
|
// If the text length is 0 (the file is empty or oversize), or we are
|
||||||
|
// not paging, we're done
|
||||||
|
if (srclen == 0 || !m_paging) {
|
||||||
|
m_havedoc = false;
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
// Paging: set ipath then read next chunk.
|
||||||
|
|
||||||
|
// Don't set ipath for the first chunk to avoid having 2
|
||||||
|
// records for small files (one for the file, one for the
|
||||||
|
// first chunk). This is a hack. The right thing to do would
|
||||||
|
// be to use a different mtype for files over the page size,
|
||||||
|
// and keep text/plain only for smaller files.
|
||||||
|
string buf = lltodecstr(m_offs - srclen);
|
||||||
|
if (m_offs - srclen != 0)
|
||||||
|
m_metaData[cstr_dj_keyipath] = buf;
|
||||||
|
readnext();
|
||||||
|
// This ensures that the first chunk (offs==srclen) of a
|
||||||
|
// multi-chunk file does have an ipath. Else it stands for the
|
||||||
|
// whole file, which used to be the case but does not seem
|
||||||
|
// right
|
||||||
|
if (m_havedoc)
|
||||||
|
m_metaData[cstr_dj_keyipath] = buf;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool MimeHandlerText::readnext()
|
||||||
|
{
|
||||||
|
string reason;
|
||||||
|
m_text.clear();
|
||||||
|
if (!file_to_string(m_fn, m_text, m_offs, m_pagesz, &reason)) {
|
||||||
|
LOGERR("MimeHandlerText: can't read file: " << reason << "\n" );
|
||||||
|
m_havedoc = false;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (m_text.length() == 0) {
|
||||||
|
// EOF
|
||||||
|
m_havedoc = false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If possible try to adjust the chunk to end right after a line
|
||||||
|
// Don't do this for the last chunk. Last chunk of exactly the
|
||||||
|
// page size might be unduly split, no big deal
|
||||||
|
if (m_text.length() == m_pagesz) {
|
||||||
|
string::size_type pos = m_text.find_last_of("\n\r");
|
||||||
|
if (pos != string::npos && pos != 0) {
|
||||||
|
m_text.erase(pos);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
m_offs += m_text.length();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
@ -61,12 +61,16 @@ protected:
|
|||||||
private:
|
private:
|
||||||
bool m_paging{false};
|
bool m_paging{false};
|
||||||
std::string m_text;
|
std::string m_text;
|
||||||
|
std::string m_alltext;
|
||||||
std::string m_fn;
|
std::string m_fn;
|
||||||
int64_t m_offs{0}; // Offset of next read in file if we're paging
|
int64_t m_offs{0}; // Offset of next read in file if we're paging
|
||||||
|
int64_t m_totlen{0};
|
||||||
size_t m_pagesz{0};
|
size_t m_pagesz{0};
|
||||||
|
int m_maxmbs{20};
|
||||||
std::string m_charsetfromxattr;
|
std::string m_charsetfromxattr;
|
||||||
|
|
||||||
bool readnext();
|
bool readnext();
|
||||||
|
void getparams();
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* _MH_TEXT_H_INCLUDED_ */
|
#endif /* _MH_TEXT_H_INCLUDED_ */
|
||||||
|
|||||||
@ -205,6 +205,7 @@ text/calendar = execm rclics;mimetype=text/plain
|
|||||||
text/css = internal text/plain
|
text/css = internal text/plain
|
||||||
text/html = internal
|
text/html = internal
|
||||||
text/plain = internal
|
text/plain = internal
|
||||||
|
text/plain1 = internal
|
||||||
text/rtf = exec unrtf --nopict --html;mimetype=text/html
|
text/rtf = exec unrtf --nopict --html;mimetype=text/html
|
||||||
text/x-bibtex = exec rclbibtex.sh ; mimetype = text/plain
|
text/x-bibtex = exec rclbibtex.sh ; mimetype = text/plain
|
||||||
text/x-c = internal
|
text/x-c = internal
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user