search/index: fixed a number of bad conversions to properly deal with text documents bigger than 2GB
This commit is contained in:
parent
5e59354535
commit
88685d2e64
@ -109,3 +109,8 @@
|
|||||||
#undef _FILE_OFFSET_BITS
|
#undef _FILE_OFFSET_BITS
|
||||||
#undef _LARGE_FILES
|
#undef _LARGE_FILES
|
||||||
|
|
||||||
|
#if _FILE_OFFSET_BITS == 64 || defined(__APPLE__)
|
||||||
|
#define OFFTPC "%lld"
|
||||||
|
#else
|
||||||
|
#define OFFTPC "%ld"
|
||||||
|
#endif
|
||||||
|
|||||||
@ -406,7 +406,7 @@ BeagleQueueIndexer::processone(const string &path,
|
|||||||
dotdoc.fmtime = ascdate;
|
dotdoc.fmtime = ascdate;
|
||||||
|
|
||||||
char cbuf[100];
|
char cbuf[100];
|
||||||
sprintf(cbuf, "%ld", (long)stp->st_size);
|
sprintf(cbuf, OFFTPC, stp->st_size);
|
||||||
dotdoc.fbytes = cbuf;
|
dotdoc.fbytes = cbuf;
|
||||||
|
|
||||||
// Document signature for up to date checks: none.
|
// Document signature for up to date checks: none.
|
||||||
@ -453,7 +453,7 @@ BeagleQueueIndexer::processone(const string &path,
|
|||||||
doc.fmtime = ascdate;
|
doc.fmtime = ascdate;
|
||||||
|
|
||||||
char cbuf[100];
|
char cbuf[100];
|
||||||
sprintf(cbuf, "%ld", (long)stp->st_size);
|
sprintf(cbuf, OFFTPC, stp->st_size);
|
||||||
doc.fbytes = cbuf;
|
doc.fbytes = cbuf;
|
||||||
// Document signature for up to date checks: none.
|
// Document signature for up to date checks: none.
|
||||||
doc.sig = "";
|
doc.sig = "";
|
||||||
|
|||||||
@ -288,6 +288,12 @@ void FsIndexer::setlocalfields(Rcl::Doc& doc)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void makesig(const struct stat *stp, string& out)
|
||||||
|
{
|
||||||
|
char cbuf[100];
|
||||||
|
sprintf(cbuf, OFFTPC "%ld", stp->st_size, (long)stp->RCL_STTIME);
|
||||||
|
out = cbuf;
|
||||||
|
}
|
||||||
|
|
||||||
/// This method gets called for every file and directory found by the
|
/// This method gets called for every file and directory found by the
|
||||||
/// tree walker.
|
/// tree walker.
|
||||||
@ -340,9 +346,8 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp,
|
|||||||
// for the uptodate check (the value computed here is checked
|
// for the uptodate check (the value computed here is checked
|
||||||
// against the stored one). Changing the computation forces a full
|
// against the stored one). Changing the computation forces a full
|
||||||
// reindex of course.
|
// reindex of course.
|
||||||
char cbuf[100];
|
string sig;
|
||||||
sprintf(cbuf, "%ld%ld", (long)stp->st_size, (long)stp->RCL_STTIME);
|
makesig(stp, sig);
|
||||||
string sig = cbuf;
|
|
||||||
string udi;
|
string udi;
|
||||||
make_udi(fn, "", udi);
|
make_udi(fn, "", udi);
|
||||||
if (!m_db->needUpdate(udi, sig)) {
|
if (!m_db->needUpdate(udi, sig)) {
|
||||||
@ -420,14 +425,14 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp,
|
|||||||
doc.utf8fn = utf8fn;
|
doc.utf8fn = utf8fn;
|
||||||
|
|
||||||
char cbuf[100];
|
char cbuf[100];
|
||||||
sprintf(cbuf, "%ld", (long)stp->st_size);
|
sprintf(cbuf, OFFTPC, stp->st_size);
|
||||||
doc.fbytes = cbuf;
|
doc.fbytes = cbuf;
|
||||||
// Document signature for up to date checks: concatenate
|
// Document signature for up to date checks: concatenate
|
||||||
// m/ctime and size. Looking for changes only, no need to
|
// m/ctime and size. Looking for changes only, no need to
|
||||||
// parseback so no need for reversible formatting. Also set,
|
// parseback so no need for reversible formatting. Also set,
|
||||||
// but never used, for subdocs.
|
// but never used, for subdocs.
|
||||||
sprintf(cbuf, "%ld%ld", (long)stp->st_size, (long)stp->RCL_STTIME);
|
makesig(stp, doc.sig);
|
||||||
doc.sig = cbuf;
|
|
||||||
// If there was an error, ensure indexing will be
|
// If there was an error, ensure indexing will be
|
||||||
// retried. This is for the once missing, later installed
|
// retried. This is for the once missing, later installed
|
||||||
// filter case. It can make indexing much slower (if there are
|
// filter case. It can make indexing much slower (if there are
|
||||||
@ -473,11 +478,10 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp,
|
|||||||
fileDoc.url = string("file://") + fn;
|
fileDoc.url = string("file://") + fn;
|
||||||
|
|
||||||
char cbuf[100];
|
char cbuf[100];
|
||||||
sprintf(cbuf, "%ld", (long)stp->st_size);
|
sprintf(cbuf, OFFTPC, stp->st_size);
|
||||||
fileDoc.fbytes = cbuf;
|
fileDoc.fbytes = cbuf;
|
||||||
// Document signature for up to date checks.
|
// Document signature for up to date checks.
|
||||||
sprintf(cbuf, "%ld%ld", (long)stp->st_size, (long)stp->RCL_STTIME);
|
makesig(stp, fileDoc.sig);
|
||||||
fileDoc.sig = cbuf;
|
|
||||||
if (!m_db->addOrUpdate(parent_udi, "", fileDoc))
|
if (!m_db->addOrUpdate(parent_udi, "", fileDoc))
|
||||||
return FsTreeWalker::FtwError;
|
return FsTreeWalker::FtwError;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -108,8 +108,8 @@ public:
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
if (fseeko(fp, cacheoffset(msgnum), SEEK_SET) != 0) {
|
if (fseeko(fp, cacheoffset(msgnum), SEEK_SET) != 0) {
|
||||||
LOGDEB0(("MboxCache::get_offsets: seek %ld errno %d\n",
|
LOGDEB0(("MboxCache::get_offsets: seek %lld errno %d\n",
|
||||||
(long)cacheoffset(msgnum), errno));
|
cacheoffset(msgnum), errno));
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
mbhoff_type offset = -1;
|
mbhoff_type offset = -1;
|
||||||
|
|||||||
@ -69,11 +69,13 @@ bool MimeHandlerText::set_document_file(const string &fn)
|
|||||||
}
|
}
|
||||||
m_pagesz = size_t(ps);
|
m_pagesz = size_t(ps);
|
||||||
string reason;
|
string reason;
|
||||||
|
LOGDEB(("calling file_to_string\n"));
|
||||||
// file_to_string() takes pagesz == size_t(-1) to mean read all.
|
// file_to_string() takes pagesz == size_t(-1) to mean read all.
|
||||||
if (!file_to_string(fn, m_text, 0, m_pagesz, &reason)) {
|
if (!file_to_string(fn, m_text, 0, m_pagesz, &reason)) {
|
||||||
LOGERR(("MimeHandlerText: can't read file: %s\n", reason.c_str()));
|
LOGERR(("MimeHandlerText: can't read file: %s\n", reason.c_str()));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
LOGDEB(("file_to_string OK\n"));
|
||||||
m_offs = m_text.length();
|
m_offs = m_text.length();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -14,6 +14,9 @@
|
|||||||
* Free Software Foundation, Inc.,
|
* Free Software Foundation, Inc.,
|
||||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
*/
|
*/
|
||||||
|
#ifdef HAVE_CONFIG_H
|
||||||
|
#include "autoconfig.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
@ -163,7 +166,7 @@ void ResListPager::displayDoc(RclConfig *config,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Size information. We print both doc and file if they differ a lot
|
// Size information. We print both doc and file if they differ a lot
|
||||||
long fsize = -1, dsize = -1;
|
off_t fsize = -1, dsize = -1;
|
||||||
if (!doc.dbytes.empty())
|
if (!doc.dbytes.empty())
|
||||||
dsize = atol(doc.dbytes.c_str());
|
dsize = atol(doc.dbytes.c_str());
|
||||||
if (!doc.fbytes.empty())
|
if (!doc.fbytes.empty())
|
||||||
|
|||||||
@ -368,7 +368,7 @@ string& MD5HexScan(const string& xdigest, string& digest)
|
|||||||
class FileScanMd5 : public FileScanDo {
|
class FileScanMd5 : public FileScanDo {
|
||||||
public:
|
public:
|
||||||
FileScanMd5(string& d) : digest(d) {}
|
FileScanMd5(string& d) : digest(d) {}
|
||||||
virtual bool init(unsigned int size, string *)
|
virtual bool init(size_t size, string *)
|
||||||
{
|
{
|
||||||
MD5Init(&ctx);
|
MD5Init(&ctx);
|
||||||
return true;
|
return true;
|
||||||
|
|||||||
@ -78,7 +78,7 @@ class FileToString : public FileScanDo {
|
|||||||
public:
|
public:
|
||||||
FileToString(string& data) : m_data(data) {}
|
FileToString(string& data) : m_data(data) {}
|
||||||
string& m_data;
|
string& m_data;
|
||||||
bool init(unsigned int size, string *reason) {
|
bool init(size_t size, string *reason) {
|
||||||
if (size > 0)
|
if (size > 0)
|
||||||
m_data.reserve(size);
|
m_data.reserve(size);
|
||||||
return true;
|
return true;
|
||||||
@ -135,10 +135,10 @@ bool file_scan(const string &fn, FileScanDo* doer, off_t startoffs,
|
|||||||
noclosing = false;
|
noclosing = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (st.st_size > 0) {
|
if (cnttoread != (size_t)-1 && cnttoread) {
|
||||||
doer->init(st.st_size+1, reason);
|
|
||||||
} else if (cnttoread) {
|
|
||||||
doer->init(cnttoread+1, reason);
|
doer->init(cnttoread+1, reason);
|
||||||
|
} else if (st.st_size > 0) {
|
||||||
|
doer->init(st.st_size+1, reason);
|
||||||
} else {
|
} else {
|
||||||
doer->init(0, reason);
|
doer->init(0, reason);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -29,7 +29,7 @@ using std::string;
|
|||||||
class FileScanDo {
|
class FileScanDo {
|
||||||
public:
|
public:
|
||||||
virtual ~FileScanDo() {}
|
virtual ~FileScanDo() {}
|
||||||
virtual bool init(unsigned int size, string *reason) = 0;
|
virtual bool init(size_t size, string *reason) = 0;
|
||||||
virtual bool data(const char *buf, int cnt, string* reason) = 0;
|
virtual bool data(const char *buf, int cnt, string* reason) = 0;
|
||||||
};
|
};
|
||||||
bool file_scan(const string &filename, FileScanDo* doer, string *reason = 0);
|
bool file_scan(const string &filename, FileScanDo* doer, string *reason = 0);
|
||||||
|
|||||||
@ -14,7 +14,11 @@
|
|||||||
* Free Software Foundation, Inc.,
|
* Free Software Foundation, Inc.,
|
||||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef TEST_SMALLUT
|
#ifndef TEST_SMALLUT
|
||||||
|
#ifdef HAVE_CONFIG_H
|
||||||
|
#include "autoconfig.h"
|
||||||
|
#endif
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
@ -556,7 +560,7 @@ bool pcSubst(const string& in, string& out, map<string, string>& subs)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Convert byte count into unit (KB/MB...) appropriate for display
|
// Convert byte count into unit (KB/MB...) appropriate for display
|
||||||
string displayableBytes(long size)
|
string displayableBytes(off_t size)
|
||||||
{
|
{
|
||||||
char sizebuf[30];
|
char sizebuf[30];
|
||||||
const char * unit = " B ";
|
const char * unit = " B ";
|
||||||
@ -568,7 +572,7 @@ string displayableBytes(long size)
|
|||||||
unit = " MB ";
|
unit = " MB ";
|
||||||
size /= (1024*1024);
|
size /= (1024*1024);
|
||||||
}
|
}
|
||||||
sprintf(sizebuf, "%ld%s", size, unit);
|
sprintf(sizebuf, OFFTPC "%s", size, unit);
|
||||||
return string(sizebuf);
|
return string(sizebuf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -16,6 +16,7 @@
|
|||||||
*/
|
*/
|
||||||
#ifndef _SMALLUT_H_INCLUDED_
|
#ifndef _SMALLUT_H_INCLUDED_
|
||||||
#define _SMALLUT_H_INCLUDED_
|
#define _SMALLUT_H_INCLUDED_
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
@ -125,7 +126,7 @@ extern string truncate_to_word(const string &input, string::size_type maxlen);
|
|||||||
extern void utf8truncate(string &s, int maxlen);
|
extern void utf8truncate(string &s, int maxlen);
|
||||||
|
|
||||||
/** Convert byte count into unit (KB/MB...) appropriate for display */
|
/** Convert byte count into unit (KB/MB...) appropriate for display */
|
||||||
string displayableBytes(long size);
|
string displayableBytes(off_t size);
|
||||||
|
|
||||||
/** Break big string into lines */
|
/** Break big string into lines */
|
||||||
string breakIntoLines(const string& in, unsigned int ll = 100,
|
string breakIntoLines(const string& in, unsigned int ll = 100,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user