Merged the changes from the current windows port
This commit is contained in:
commit
5330685ec1
15
.hgignore
15
.hgignore
@ -11,6 +11,16 @@ libtool
|
|||||||
*.lo
|
*.lo
|
||||||
*~
|
*~
|
||||||
\#*
|
\#*
|
||||||
|
*.obj
|
||||||
|
*.sdf
|
||||||
|
*.tlog
|
||||||
|
*.lib
|
||||||
|
*.idb
|
||||||
|
*.log
|
||||||
|
*.pdb
|
||||||
|
.vs
|
||||||
|
*.exe
|
||||||
|
*.ilk
|
||||||
ptrans
|
ptrans
|
||||||
src/aclocal.m4
|
src/aclocal.m4
|
||||||
src/compile
|
src/compile
|
||||||
@ -82,12 +92,7 @@ src/qtgui/recoll
|
|||||||
src/qtgui/recoll.app
|
src/qtgui/recoll.app
|
||||||
src/qtgui/recoll.pro
|
src/qtgui/recoll.pro
|
||||||
src/query/alldeps
|
src/query/alldeps
|
||||||
src/query/location.hh
|
|
||||||
src/query/position.hh
|
|
||||||
src/query/recollq
|
src/query/recollq
|
||||||
src/query/stack.hh
|
|
||||||
src/query/wasaparse.cpp
|
|
||||||
src/query/wasaparse.hpp
|
|
||||||
src/sampleconf/rclmon.sh
|
src/sampleconf/rclmon.sh
|
||||||
src/sampleconf/recoll.conf
|
src/sampleconf/recoll.conf
|
||||||
src/utils/alldeps
|
src/utils/alldeps
|
||||||
|
|||||||
@ -22,7 +22,8 @@ COMMONCPPFLAGS = -I. \
|
|||||||
-I$(top_srcdir)/rcldb \
|
-I$(top_srcdir)/rcldb \
|
||||||
-I$(top_srcdir)/unac \
|
-I$(top_srcdir)/unac \
|
||||||
-I$(top_srcdir)/utils \
|
-I$(top_srcdir)/utils \
|
||||||
-I$(top_srcdir)/xaposix
|
-I$(top_srcdir)/xaposix \
|
||||||
|
-DBUILDING_RECOLL
|
||||||
|
|
||||||
AM_CPPFLAGS = -Wall -Wno-unused \
|
AM_CPPFLAGS = -Wall -Wno-unused \
|
||||||
$(COMMONCPPFLAGS) \
|
$(COMMONCPPFLAGS) \
|
||||||
|
|||||||
@ -47,7 +47,7 @@ BincStream::~BincStream(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------------
|
//------------------------------------------------------------------------
|
||||||
string BincStream::popString(unsigned int size)
|
string BincStream::popString(std::string::size_type size)
|
||||||
{
|
{
|
||||||
if (size > nstr.length())
|
if (size > nstr.length())
|
||||||
size = nstr.length();
|
size = nstr.length();
|
||||||
|
|||||||
@ -25,6 +25,7 @@
|
|||||||
*/
|
*/
|
||||||
#ifndef convert_h_included
|
#ifndef convert_h_included
|
||||||
#define convert_h_included
|
#define convert_h_included
|
||||||
|
#include <stddef.h>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
@ -93,7 +94,7 @@ namespace Binc {
|
|||||||
std::string tmp;
|
std::string tmp;
|
||||||
for (std::string::const_iterator i = s.begin();
|
for (std::string::const_iterator i = s.begin();
|
||||||
i != s.end() && i + 1 != s.end(); i += 2) {
|
i != s.end() && i + 1 != s.end(); i += 2) {
|
||||||
int n;
|
ptrdiff_t n;
|
||||||
unsigned char c = *i;
|
unsigned char c = *i;
|
||||||
unsigned char d = *(i + 1);
|
unsigned char d = *(i + 1);
|
||||||
|
|
||||||
@ -122,7 +123,7 @@ namespace Binc {
|
|||||||
for (std::string::const_iterator i = s_in.begin(); i != s_in.end(); ++i) {
|
for (std::string::const_iterator i = s_in.begin(); i != s_in.end(); ++i) {
|
||||||
unsigned char c = (unsigned char)*i;
|
unsigned char c = (unsigned char)*i;
|
||||||
if (c <= 31 || c >= 127 || c == '\"' || c == '\\')
|
if (c <= 31 || c >= 127 || c == '\"' || c == '\\')
|
||||||
return "{" + toString(s_in.length()) + "}\r\n" + s_in;
|
return "{" + toString((unsigned long)s_in.length()) + "}\r\n" + s_in;
|
||||||
}
|
}
|
||||||
|
|
||||||
return "\"" + s_in + "\"";
|
return "\"" + s_in + "\"";
|
||||||
@ -145,7 +146,7 @@ namespace Binc {
|
|||||||
//----------------------------------------------------------------------
|
//----------------------------------------------------------------------
|
||||||
inline void chomp(std::string &s_in, const std::string &chars = " \t\r\n")
|
inline void chomp(std::string &s_in, const std::string &chars = " \t\r\n")
|
||||||
{
|
{
|
||||||
int n = s_in.length();
|
std::string::size_type n = s_in.length();
|
||||||
while (n > 1 && chars.find(s_in[n - 1]) != std::string::npos)
|
while (n > 1 && chars.find(s_in[n - 1]) != std::string::npos)
|
||||||
s_in.resize(n-- - 1);
|
s_in.resize(n-- - 1);
|
||||||
}
|
}
|
||||||
@ -290,7 +291,7 @@ namespace Binc {
|
|||||||
BincStream &operator << (char t);
|
BincStream &operator << (char t);
|
||||||
|
|
||||||
//--
|
//--
|
||||||
std::string popString(unsigned int size);
|
std::string popString(std::string::size_type size);
|
||||||
|
|
||||||
//--
|
//--
|
||||||
char popChar(void);
|
char popChar(void);
|
||||||
|
|||||||
@ -25,7 +25,7 @@
|
|||||||
*/
|
*/
|
||||||
#ifndef mime_inputsource_h_included
|
#ifndef mime_inputsource_h_included
|
||||||
#define mime_inputsource_h_included
|
#define mime_inputsource_h_included
|
||||||
|
#include "autoconfig.h"
|
||||||
// Data source for MIME parser
|
// Data source for MIME parser
|
||||||
|
|
||||||
// Note about large files: we might want to change the unsigned int
|
// Note about large files: we might want to change the unsigned int
|
||||||
@ -49,7 +49,7 @@ namespace Binc {
|
|||||||
inline MimeInputSource(int fd, unsigned int start = 0);
|
inline MimeInputSource(int fd, unsigned int start = 0);
|
||||||
virtual inline ~MimeInputSource(void);
|
virtual inline ~MimeInputSource(void);
|
||||||
|
|
||||||
virtual inline size_t fillRaw(char *raw, size_t nbytes);
|
virtual inline ssize_t fillRaw(char *raw, size_t nbytes);
|
||||||
virtual inline void reset(void);
|
virtual inline void reset(void);
|
||||||
|
|
||||||
virtual inline bool fillInputBuffer(void);
|
virtual inline bool fillInputBuffer(void);
|
||||||
@ -87,7 +87,7 @@ namespace Binc {
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
inline size_t MimeInputSource::fillRaw(char *raw, size_t nbytes)
|
inline ssize_t MimeInputSource::fillRaw(char *raw, size_t nbytes)
|
||||||
{
|
{
|
||||||
return read(fd, raw, nbytes);
|
return read(fd, raw, nbytes);
|
||||||
}
|
}
|
||||||
@ -179,7 +179,7 @@ namespace Binc {
|
|||||||
class MimeInputSourceStream : public MimeInputSource {
|
class MimeInputSourceStream : public MimeInputSource {
|
||||||
public:
|
public:
|
||||||
inline MimeInputSourceStream(istream& s, unsigned int start = 0);
|
inline MimeInputSourceStream(istream& s, unsigned int start = 0);
|
||||||
virtual inline size_t fillRaw(char *raw, size_t nb);
|
virtual inline ssize_t fillRaw(char *raw, size_t nb);
|
||||||
virtual inline void reset(void);
|
virtual inline void reset(void);
|
||||||
private:
|
private:
|
||||||
istream& s;
|
istream& s;
|
||||||
@ -191,7 +191,7 @@ namespace Binc {
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
inline size_t MimeInputSourceStream::fillRaw(char *raw, size_t nb)
|
inline ssize_t MimeInputSourceStream::fillRaw(char *raw, size_t nb)
|
||||||
{
|
{
|
||||||
// Why can't streams tell how many characters were actually read
|
// Why can't streams tell how many characters were actually read
|
||||||
// when hitting eof ?
|
// when hitting eof ?
|
||||||
@ -199,16 +199,16 @@ namespace Binc {
|
|||||||
s.seekg(0, ios::end);
|
s.seekg(0, ios::end);
|
||||||
std::streampos lst = s.tellg();
|
std::streampos lst = s.tellg();
|
||||||
s.seekg(st);
|
s.seekg(st);
|
||||||
size_t nbytes = lst - st;
|
size_t nbytes = size_t(lst - st);
|
||||||
if (nbytes > nb) {
|
if (nbytes > nb) {
|
||||||
nbytes = nb;
|
nbytes = nb;
|
||||||
}
|
}
|
||||||
if (nbytes <= 0) {
|
if (nbytes <= 0) {
|
||||||
return (size_t)-1;
|
return (ssize_t)-1;
|
||||||
}
|
}
|
||||||
|
|
||||||
s.read(raw, nbytes);
|
s.read(raw, nbytes);
|
||||||
return nbytes;
|
return static_cast<ssize_t>(nbytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void MimeInputSourceStream::reset(void)
|
inline void MimeInputSourceStream::reset(void)
|
||||||
|
|||||||
@ -306,9 +306,9 @@ void Binc::MimePart::parseMessageRFC822(vector<Binc::MimePart> *members,
|
|||||||
bool Binc::MimePart::skipUntilBoundary(const string &delimiter,
|
bool Binc::MimePart::skipUntilBoundary(const string &delimiter,
|
||||||
unsigned int *nlines, bool *eof)
|
unsigned int *nlines, bool *eof)
|
||||||
{
|
{
|
||||||
int endpos = delimiter.length();
|
string::size_type endpos = delimiter.length();
|
||||||
char *delimiterqueue = 0;
|
char *delimiterqueue = 0;
|
||||||
int delimiterpos = 0;
|
string::size_type delimiterpos = 0;
|
||||||
const char *delimiterStr = delimiter.c_str();
|
const char *delimiterStr = delimiter.c_str();
|
||||||
if (delimiter != "") {
|
if (delimiter != "") {
|
||||||
delimiterqueue = new char[endpos];
|
delimiterqueue = new char[endpos];
|
||||||
@ -340,7 +340,7 @@ bool Binc::MimePart::skipUntilBoundary(const string &delimiter,
|
|||||||
delimiterpos = 0;
|
delimiterpos = 0;
|
||||||
|
|
||||||
if (compareStringToQueue(delimiterStr, delimiterqueue,
|
if (compareStringToQueue(delimiterStr, delimiterqueue,
|
||||||
delimiterpos, endpos)) {
|
delimiterpos, int(endpos))) {
|
||||||
foundBoundary = true;
|
foundBoundary = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -451,7 +451,7 @@ void Binc::MimePart::parseMultipart(const string &boundary,
|
|||||||
skipUntilBoundary(delimiter, nlines, eof);
|
skipUntilBoundary(delimiter, nlines, eof);
|
||||||
|
|
||||||
if (!eof)
|
if (!eof)
|
||||||
*boundarysize = delimiter.size();
|
*boundarysize = int(delimiter.size());
|
||||||
|
|
||||||
postBoundaryProcessing(eof, nlines, boundarysize, foundendofpart);
|
postBoundaryProcessing(eof, nlines, boundarysize, foundendofpart);
|
||||||
|
|
||||||
@ -484,7 +484,7 @@ void Binc::MimePart::parseMultipart(const string &boundary,
|
|||||||
skipUntilBoundary(delimiter, nlines, eof);
|
skipUntilBoundary(delimiter, nlines, eof);
|
||||||
|
|
||||||
if (!*eof)
|
if (!*eof)
|
||||||
*boundarysize = delimiter.size();
|
*boundarysize = int(delimiter.size());
|
||||||
|
|
||||||
postBoundaryProcessing(eof, nlines, boundarysize, foundendofpart);
|
postBoundaryProcessing(eof, nlines, boundarysize, foundendofpart);
|
||||||
}
|
}
|
||||||
@ -528,7 +528,7 @@ void Binc::MimePart::parseSinglePart(const string &toboundary,
|
|||||||
// *boundarysize = _toboundary.length();
|
// *boundarysize = _toboundary.length();
|
||||||
|
|
||||||
char *boundaryqueue = 0;
|
char *boundaryqueue = 0;
|
||||||
int endpos = _toboundary.length();
|
size_t endpos = _toboundary.length();
|
||||||
if (toboundary != "") {
|
if (toboundary != "") {
|
||||||
boundaryqueue = new char[endpos];
|
boundaryqueue = new char[endpos];
|
||||||
memset(boundaryqueue, 0, endpos);
|
memset(boundaryqueue, 0, endpos);
|
||||||
@ -540,7 +540,7 @@ void Binc::MimePart::parseSinglePart(const string &toboundary,
|
|||||||
string line;
|
string line;
|
||||||
bool toboundaryIsEmpty = (toboundary == "");
|
bool toboundaryIsEmpty = (toboundary == "");
|
||||||
char c;
|
char c;
|
||||||
int boundarypos = 0;
|
string::size_type boundarypos = 0;
|
||||||
while (mimeSource->getChar(&c)) {
|
while (mimeSource->getChar(&c)) {
|
||||||
if (c == '\n') { ++*nbodylines; ++*nlines; }
|
if (c == '\n') { ++*nbodylines; ++*nlines; }
|
||||||
|
|
||||||
@ -553,8 +553,8 @@ void Binc::MimePart::parseSinglePart(const string &toboundary,
|
|||||||
boundarypos = 0;
|
boundarypos = 0;
|
||||||
|
|
||||||
if (compareStringToQueue(_toboundaryStr, boundaryqueue,
|
if (compareStringToQueue(_toboundaryStr, boundaryqueue,
|
||||||
boundarypos, endpos)) {
|
boundarypos, int(endpos))) {
|
||||||
*boundarysize = _toboundary.length();
|
*boundarysize = static_cast<int>(_toboundary.length());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -119,7 +119,7 @@ int Binc::MimePart::doParseOnlyHeader(MimeInputSource *ms,
|
|||||||
if (c == '\n') ++nlines;
|
if (c == '\n') ++nlines;
|
||||||
if (c == ':') break;
|
if (c == ':') break;
|
||||||
if (c == '\n') {
|
if (c == '\n') {
|
||||||
for (int i = name.length() - 1; i >= 0; --i)
|
for (string::size_type i = name.length() - 1; i >= 0; --i)
|
||||||
mimeSource->ungetChar();
|
mimeSource->ungetChar();
|
||||||
|
|
||||||
quit = true;
|
quit = true;
|
||||||
|
|||||||
186
src/common/autoconfig-win.h
Normal file
186
src/common/autoconfig-win.h
Normal file
@ -0,0 +1,186 @@
|
|||||||
|
/* Manually edited version of autoconfig.h for windows. Many things are
|
||||||
|
overriden in the c++ code by ifdefs _WIN32 anyway */
|
||||||
|
#ifndef _AUTOCONFIG_H_INCLUDED
|
||||||
|
#define _AUTOCONFIG_H_INCLUDED
|
||||||
|
/* Define if building universal (internal helper macro) */
|
||||||
|
/* #undef AC_APPLE_UNIVERSAL_BUILD */
|
||||||
|
|
||||||
|
/* Path to the aspell api include file */
|
||||||
|
/* #undef ASPELL_INCLUDE "aspell-local.h" */
|
||||||
|
|
||||||
|
/* Path to the aspell program */
|
||||||
|
/* #define ASPELL_PROG "/usr/bin/aspell" */
|
||||||
|
|
||||||
|
/* No X11 session monitoring support */
|
||||||
|
#define DISABLE_X11MON
|
||||||
|
|
||||||
|
/* Path to the fam api include file */
|
||||||
|
/* #undef FAM_INCLUDE */
|
||||||
|
|
||||||
|
/* Path to the file program */
|
||||||
|
#define FILE_PROG "/usr/bin/file"
|
||||||
|
|
||||||
|
/* "Have C++0x" */
|
||||||
|
#undef HAVE_CXX0X_UNORDERED
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <dlfcn.h> header file. */
|
||||||
|
#define HAVE_DLFCN_H 1
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||||
|
#define HAVE_INTTYPES_H 1
|
||||||
|
|
||||||
|
/* Define to 1 if you have the `dl' library (-ldl). */
|
||||||
|
#define HAVE_LIBDL 1
|
||||||
|
|
||||||
|
/* Define to 1 if you have the `pthread' library (-lpthread). */
|
||||||
|
#define HAVE_LIBPTHREAD 1
|
||||||
|
|
||||||
|
/* Define to 1 if you have the `z' library (-lz). */
|
||||||
|
#define HAVE_LIBZ 1
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <memory.h> header file. */
|
||||||
|
#define HAVE_MEMORY_H 1
|
||||||
|
|
||||||
|
/* Define to 1 if you have the `mkdtemp' function. */
|
||||||
|
/* #undef HAVE_MKDTEMP */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the `posix_spawn,' function. */
|
||||||
|
/* #undef HAVE_POSIX_SPAWN_ */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the `setrlimit' function. */
|
||||||
|
#define HAVE_SETRLIMIT 1
|
||||||
|
|
||||||
|
/* Has std::shared_ptr */
|
||||||
|
#define HAVE_SHARED_PTR_STD
|
||||||
|
|
||||||
|
/* Has std::tr1::shared_ptr */
|
||||||
|
/* #undef HAVE_SHARED_PTR_TR1 */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <spawn.h> header file. */
|
||||||
|
#define HAVE_SPAWN_H 1
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <stdint.h> header file. */
|
||||||
|
#define HAVE_STDINT_H 1
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <stdlib.h> header file. */
|
||||||
|
#define HAVE_STDLIB_H 1
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <strings.h> header file. */
|
||||||
|
#define HAVE_STRINGS_H 1
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <string.h> header file. */
|
||||||
|
#define HAVE_STRING_H 1
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <sys/mount.h> header file. */
|
||||||
|
/* #undef HAVE_SYS_MOUNT_H */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <sys/param.h,> header file. */
|
||||||
|
/* #undef HAVE_SYS_PARAM_H_ */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <sys/statfs.h> header file. */
|
||||||
|
/* #undef HAVE_SYS_STATFS_H */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <sys/statvfs.h> header file. */
|
||||||
|
/* #undef HAVE_SYS_STATVFS_H */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <sys/stat.h> header file. */
|
||||||
|
#define HAVE_SYS_STAT_H 1
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||||
|
#define HAVE_SYS_TYPES_H 1
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <sys/vfs.h> header file. */
|
||||||
|
/* #undef HAVE_SYS_VFS_H */
|
||||||
|
|
||||||
|
/* "Have tr1" */
|
||||||
|
/* #undef HAVE_TR1_UNORDERED */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <unistd.h> header file. */
|
||||||
|
/* #undef HAVE_UNISTD_H */
|
||||||
|
|
||||||
|
/* Use multiple threads for indexing */
|
||||||
|
#define IDX_THREADS 1
|
||||||
|
|
||||||
|
/* Define to the sub-directory in which libtool stores uninstalled libraries.
|
||||||
|
*/
|
||||||
|
#define LT_OBJDIR ".libs/"
|
||||||
|
|
||||||
|
/* Define to the address where bug reports for this package should be sent. */
|
||||||
|
#define PACKAGE_BUGREPORT ""
|
||||||
|
|
||||||
|
/* Define to the full name of this package. */
|
||||||
|
#define PACKAGE_NAME "Recoll"
|
||||||
|
|
||||||
|
/* Define to the full name and version of this package. */
|
||||||
|
#define PACKAGE_STRING "Recoll 1.22.0"
|
||||||
|
|
||||||
|
/* Define to the one symbol short name of this package. */
|
||||||
|
#define PACKAGE_TARNAME "recoll"
|
||||||
|
|
||||||
|
/* Define to the home page for this package. */
|
||||||
|
#define PACKAGE_URL ""
|
||||||
|
|
||||||
|
/* Define to the version of this package. */
|
||||||
|
#define PACKAGE_VERSION "1.22.0"
|
||||||
|
|
||||||
|
/* putenv parameter is const */
|
||||||
|
/* #undef PUTENV_ARG_CONST */
|
||||||
|
|
||||||
|
/* iconv parameter 2 is const char** */
|
||||||
|
#define RCL_ICONV_INBUF_CONST 1
|
||||||
|
|
||||||
|
/* Real time monitoring option */
|
||||||
|
#undef RCL_MONITOR
|
||||||
|
|
||||||
|
/* Split camelCase words */
|
||||||
|
/* #undef RCL_SPLIT_CAMELCASE */
|
||||||
|
|
||||||
|
/* Compile the aspell interface */
|
||||||
|
/* #undef RCL_USE_ASPELL */
|
||||||
|
|
||||||
|
/* Compile the fam interface */
|
||||||
|
/* #undef RCL_USE_FAM */
|
||||||
|
|
||||||
|
/* Compile the inotify interface */
|
||||||
|
#define RCL_USE_INOTIFY 1
|
||||||
|
|
||||||
|
/* Define to 1 if you have the ANSI C header files. */
|
||||||
|
#define STDC_HEADERS 1
|
||||||
|
|
||||||
|
/* Use posix_spawn() */
|
||||||
|
/* #undef USE_POSIX_SPAWN */
|
||||||
|
|
||||||
|
/* Enable using the system's 'file' command to id mime if we fail internally
|
||||||
|
*/
|
||||||
|
/* #undef USE_SYSTEM_FILE_COMMAND */
|
||||||
|
|
||||||
|
/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
|
||||||
|
significant byte first (like Motorola and SPARC, unlike Intel). */
|
||||||
|
#if defined AC_APPLE_UNIVERSAL_BUILD
|
||||||
|
# if defined __BIG_ENDIAN__
|
||||||
|
# define WORDS_BIGENDIAN 1
|
||||||
|
# endif
|
||||||
|
#else
|
||||||
|
# ifndef WORDS_BIGENDIAN
|
||||||
|
/* # undef WORDS_BIGENDIAN */
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Define to 1 if the X Window System is missing or not being used. */
|
||||||
|
/* #undef X_DISPLAY_MISSING */
|
||||||
|
|
||||||
|
/* Enable large inode numbers on Mac OS X 10.5. */
|
||||||
|
#ifndef _DARWIN_USE_64_BIT_INODE
|
||||||
|
# define _DARWIN_USE_64_BIT_INODE 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Number of bits in a file offset, on hosts where this is settable. */
|
||||||
|
/* #undef _FILE_OFFSET_BITS */
|
||||||
|
|
||||||
|
/* Define for large files, on AIX-style hosts. */
|
||||||
|
/* #undef _LARGE_FILES */
|
||||||
|
|
||||||
|
#define DISABLE_WEB_INDEXER
|
||||||
|
|
||||||
|
#include "conf_post.h"
|
||||||
|
#endif // already included
|
||||||
@ -35,7 +35,7 @@ BeagleQueueCache::BeagleQueueCache(RclConfig *cnf)
|
|||||||
ccdir = "webcache";
|
ccdir = "webcache";
|
||||||
ccdir = path_tildexpand(ccdir);
|
ccdir = path_tildexpand(ccdir);
|
||||||
// If not an absolute path, compute relative to config dir
|
// If not an absolute path, compute relative to config dir
|
||||||
if (ccdir.at(0) != '/')
|
if (!path_isabsolute(ccdir))
|
||||||
ccdir = path_cat(cnf->getConfDir(), ccdir);
|
ccdir = path_cat(cnf->getConfDir(), ccdir);
|
||||||
|
|
||||||
int maxmbs = 40;
|
int maxmbs = 40;
|
||||||
|
|||||||
@ -26,3 +26,42 @@
|
|||||||
# define STD_SHARED_PTR RefCntr
|
# define STD_SHARED_PTR RefCntr
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
#include "safewindows.h"
|
||||||
|
typedef int pid_t;
|
||||||
|
inline int readlink(const char *cp, void *buf, int cnt) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
#define HAVE_STRUCT_TIMESPEC
|
||||||
|
#define strdup _strdup
|
||||||
|
#define timegm _mkgmtime
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
// gmtime is supposedly thread-safe on windows
|
||||||
|
#define gmtime_r(A, B) gmtime(A)
|
||||||
|
#define localtime_r(A,B) localtime(A)
|
||||||
|
#define PATH_MAX MAX_PATH
|
||||||
|
#define MAXPATHLEN PATH_MAX
|
||||||
|
typedef int mode_t;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef DWORD32 u_int32_t;
|
||||||
|
typedef DWORD64 u_int64_t;
|
||||||
|
typedef unsigned __int8 u_int8_t;
|
||||||
|
typedef int ssize_t;
|
||||||
|
#define strncasecmp _strnicmp
|
||||||
|
#define strcasecmp _stricmp
|
||||||
|
#define ftruncate _chsize_s
|
||||||
|
#define chdir _chdir
|
||||||
|
|
||||||
|
#define R_OK 4
|
||||||
|
#define W_OK 2
|
||||||
|
#define X_OK 4
|
||||||
|
#define RECOLL_DATADIR "C:\\recoll\\"
|
||||||
|
#define S_ISLNK(X) false
|
||||||
|
#define lstat stat
|
||||||
|
#define fseeko _fseeki64
|
||||||
|
#define ftello (off_t)_ftelli64
|
||||||
|
#define timegm _mkgmtime
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -19,11 +19,13 @@
|
|||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
|
#ifndef _WIN32
|
||||||
#include <langinfo.h>
|
#include <langinfo.h>
|
||||||
|
#include <sys/param.h>
|
||||||
|
#endif
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
#include "safesysstat.h"
|
#include "safesysstat.h"
|
||||||
#include "safeunistd.h"
|
#include "safeunistd.h"
|
||||||
#include <sys/param.h>
|
|
||||||
#ifdef __FreeBSD__
|
#ifdef __FreeBSD__
|
||||||
#include <osreldate.h>
|
#include <osreldate.h>
|
||||||
#endif
|
#endif
|
||||||
@ -45,6 +47,7 @@
|
|||||||
#include "readfile.h"
|
#include "readfile.h"
|
||||||
#include "fstreewalk.h"
|
#include "fstreewalk.h"
|
||||||
#include "cpuconf.h"
|
#include "cpuconf.h"
|
||||||
|
#include "execmd.h"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
@ -120,7 +123,8 @@ void RclConfig::zeroMe() {
|
|||||||
|
|
||||||
bool RclConfig::isDefaultConfig() const
|
bool RclConfig::isDefaultConfig() const
|
||||||
{
|
{
|
||||||
string defaultconf = path_cat(path_canon(path_home()), ".recoll/");
|
string defaultconf = path_cat(path_homedata(),
|
||||||
|
path_defaultrecollconfsubdir());
|
||||||
string specifiedconf = path_canon(m_confdir);
|
string specifiedconf = path_canon(m_confdir);
|
||||||
path_catslash(specifiedconf);
|
path_catslash(specifiedconf);
|
||||||
return !defaultconf.compare(specifiedconf);
|
return !defaultconf.compare(specifiedconf);
|
||||||
@ -146,14 +150,7 @@ RclConfig::RclConfig(const string *argcnf)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Compute our data dir name, typically /usr/local/share/recoll
|
// Compute our data dir name, typically /usr/local/share/recoll
|
||||||
const char *cdatadir = getenv("RECOLL_DATADIR");
|
m_datadir = path_sharedatadir();
|
||||||
if (cdatadir == 0) {
|
|
||||||
// If not in environment, use the compiled-in constant.
|
|
||||||
m_datadir = RECOLL_DATADIR;
|
|
||||||
} else {
|
|
||||||
m_datadir = cdatadir;
|
|
||||||
}
|
|
||||||
|
|
||||||
// We only do the automatic configuration creation thing for the default
|
// We only do the automatic configuration creation thing for the default
|
||||||
// config dir, not if it was specified through -c or RECOLL_CONFDIR
|
// config dir, not if it was specified through -c or RECOLL_CONFDIR
|
||||||
bool autoconfdir = false;
|
bool autoconfdir = false;
|
||||||
@ -172,7 +169,7 @@ RclConfig::RclConfig(const string *argcnf)
|
|||||||
m_confdir = path_canon(cp);
|
m_confdir = path_canon(cp);
|
||||||
} else {
|
} else {
|
||||||
autoconfdir = true;
|
autoconfdir = true;
|
||||||
m_confdir = path_cat(path_home(), ".recoll/");
|
m_confdir = path_cat(path_homedata(), path_defaultrecollconfsubdir());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -200,6 +197,7 @@ RclConfig::RclConfig(const string *argcnf)
|
|||||||
// is called from the main thread at once, by constructing a config
|
// is called from the main thread at once, by constructing a config
|
||||||
// from recollinit
|
// from recollinit
|
||||||
if (o_localecharset.empty()) {
|
if (o_localecharset.empty()) {
|
||||||
|
#ifndef _WIN32
|
||||||
const char *cp;
|
const char *cp;
|
||||||
cp = nl_langinfo(CODESET);
|
cp = nl_langinfo(CODESET);
|
||||||
// We don't keep US-ASCII. It's better to use a superset
|
// We don't keep US-ASCII. It's better to use a superset
|
||||||
@ -217,6 +215,9 @@ RclConfig::RclConfig(const string *argcnf)
|
|||||||
// Use cp1252 instead of iso-8859-1, it's a superset.
|
// Use cp1252 instead of iso-8859-1, it's a superset.
|
||||||
o_localecharset = string(cstr_cp1252);
|
o_localecharset = string(cstr_cp1252);
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
o_localecharset = "UTF-8";
|
||||||
|
#endif
|
||||||
LOGDEB1(("RclConfig::getDefCharset: localecharset [%s]\n",
|
LOGDEB1(("RclConfig::getDefCharset: localecharset [%s]\n",
|
||||||
o_localecharset.c_str()));
|
o_localecharset.c_str()));
|
||||||
}
|
}
|
||||||
@ -635,7 +636,7 @@ bool RclConfig::inStopSuffixes(const string& fni)
|
|||||||
it != stoplist.end(); it++) {
|
it != stoplist.end(); it++) {
|
||||||
STOPSUFFIXES->insert(SfString(stringtolower(*it)));
|
STOPSUFFIXES->insert(SfString(stringtolower(*it)));
|
||||||
if (m_maxsufflen < it->length())
|
if (m_maxsufflen < it->length())
|
||||||
m_maxsufflen = it->length();
|
m_maxsufflen = int(it->length());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1154,7 +1155,7 @@ string RclConfig::getConfdirPath(const char *varname, const char *dflt) const
|
|||||||
} else {
|
} else {
|
||||||
result = path_tildexpand(result);
|
result = path_tildexpand(result);
|
||||||
// If not an absolute path, compute relative to config dir
|
// If not an absolute path, compute relative to config dir
|
||||||
if (result.at(0) != '/') {
|
if (!path_isabsolute(result)) {
|
||||||
result = path_cat(getConfDir(), result);
|
result = path_cat(getConfDir(), result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1212,7 +1213,7 @@ void RclConfig::urlrewrite(const string& dbdir, string& url) const
|
|||||||
// This call always succeeds because the key comes from getNames()
|
// This call always succeeds because the key comes from getNames()
|
||||||
if (m_ptrans->get(*it, npath, dbdir)) {
|
if (m_ptrans->get(*it, npath, dbdir)) {
|
||||||
path = path.replace(0, it->size(), npath);
|
path = path.replace(0, it->size(), npath);
|
||||||
url = "file://" + path;
|
url = path_pathtofileurl(path);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -1305,45 +1306,45 @@ vector<string> RclConfig::getDaemSkippedPaths() const
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Look up an executable filter. We look in $RECOLL_FILTERSDIR,
|
// Look up an executable filter. We add $RECOLL_FILTERSDIR,
|
||||||
// filtersdir in config file, then let the system use the PATH
|
// and filtersdir from the config file to the PATH, then use execmd::which()
|
||||||
string RclConfig::findFilter(const string &icmd) const
|
string RclConfig::findFilter(const string &icmd) const
|
||||||
{
|
{
|
||||||
// If the path is absolute, this is it
|
// If the path is absolute, this is it
|
||||||
if (icmd[0] == '/')
|
if (path_isabsolute(icmd))
|
||||||
return icmd;
|
return icmd;
|
||||||
|
|
||||||
string cmd;
|
const char *cp = getenv("PATH");
|
||||||
const char *cp;
|
if (!cp) //??
|
||||||
|
cp = "";
|
||||||
|
string PATH(cp);
|
||||||
|
|
||||||
// Filters dir from environment ?
|
// For historical reasons: check in personal config directory
|
||||||
|
PATH = getConfDir() + path_PATHsep() + PATH;
|
||||||
|
|
||||||
|
string temp;
|
||||||
|
// Prepend $datadir/filters
|
||||||
|
temp = path_cat(m_datadir, "filters");
|
||||||
|
PATH = temp + path_PATHsep() + PATH;
|
||||||
|
|
||||||
|
// Prepend possible configuration parameter?
|
||||||
|
if (getConfParam(string("filtersdir"), temp)) {
|
||||||
|
temp = path_tildexpand(temp);
|
||||||
|
PATH = temp + path_PATHsep() + PATH;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Prepend possible environment variable
|
||||||
if ((cp = getenv("RECOLL_FILTERSDIR"))) {
|
if ((cp = getenv("RECOLL_FILTERSDIR"))) {
|
||||||
cmd = path_cat(cp, icmd);
|
PATH = string(cp) + path_PATHsep() + PATH;
|
||||||
if (access(cmd.c_str(), X_OK) == 0)
|
|
||||||
return cmd;
|
|
||||||
}
|
|
||||||
// Filters dir as configuration parameter?
|
|
||||||
if (getConfParam(string("filtersdir"), cmd)) {
|
|
||||||
cmd = path_cat(cmd, icmd);
|
|
||||||
if (access(cmd.c_str(), X_OK) == 0)
|
|
||||||
return cmd;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Filters dir as datadir subdir. Actually the standard case, but
|
string cmd;
|
||||||
// this is normally the same value found in config file (previous step)
|
if (ExecCmd::which(icmd, cmd, PATH.c_str())) {
|
||||||
cmd = path_cat(m_datadir, "filters");
|
return cmd;
|
||||||
cmd = path_cat(cmd, icmd);
|
} else {
|
||||||
if (access(cmd.c_str(), X_OK) == 0)
|
// Let the shell try to find it...
|
||||||
return cmd;
|
return icmd;
|
||||||
|
}
|
||||||
// Last resort for historical reasons: check in personal config
|
|
||||||
// directory
|
|
||||||
cmd = path_cat(getConfDir(), icmd);
|
|
||||||
if (access(cmd.c_str(), X_OK) == 0)
|
|
||||||
return cmd;
|
|
||||||
|
|
||||||
// Let the shell try to find it...
|
|
||||||
return icmd;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@ -17,6 +17,9 @@
|
|||||||
#include "autoconfig.h"
|
#include "autoconfig.h"
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#ifdef _WIN32
|
||||||
|
#include "safewindows.h"
|
||||||
|
#endif
|
||||||
#include <signal.h>
|
#include <signal.h>
|
||||||
#include <locale.h>
|
#include <locale.h>
|
||||||
#include <pthread.h>
|
#include <pthread.h>
|
||||||
@ -33,8 +36,6 @@
|
|||||||
#include "smallut.h"
|
#include "smallut.h"
|
||||||
#include "execmd.h"
|
#include "execmd.h"
|
||||||
|
|
||||||
static const int catchedSigs[] = {SIGINT, SIGQUIT, SIGTERM, SIGUSR1, SIGUSR2};
|
|
||||||
|
|
||||||
static pthread_t mainthread_id;
|
static pthread_t mainthread_id;
|
||||||
|
|
||||||
static void siglogreopen(int)
|
static void siglogreopen(int)
|
||||||
@ -43,23 +44,17 @@ static void siglogreopen(int)
|
|||||||
DebugLog::reopen();
|
DebugLog::reopen();
|
||||||
}
|
}
|
||||||
|
|
||||||
RclConfig *recollinit(RclInitFlags flags,
|
#ifndef _WIN32
|
||||||
void (*cleanup)(void), void (*sigcleanup)(int),
|
// We would like to block SIGCHLD globally, but we can't because
|
||||||
string &reason, const string *argcnf)
|
// QT uses it. Have to block it inside execmd.cpp
|
||||||
|
static const int catchedSigs[] = {SIGINT, SIGQUIT, SIGTERM, SIGUSR1, SIGUSR2};
|
||||||
|
void initAsyncSigs(void (*sigcleanup)(int))
|
||||||
{
|
{
|
||||||
if (cleanup)
|
|
||||||
atexit(cleanup);
|
|
||||||
|
|
||||||
// We ignore SIGPIPE always. All pieces of code which can write to a pipe
|
// We ignore SIGPIPE always. All pieces of code which can write to a pipe
|
||||||
// must check write() return values.
|
// must check write() return values.
|
||||||
|
#ifndef _WIN32
|
||||||
signal(SIGPIPE, SIG_IGN);
|
signal(SIGPIPE, SIG_IGN);
|
||||||
|
#endif
|
||||||
// Make sure the locale is set. This is only for converting file names
|
|
||||||
// to utf8 for indexing.
|
|
||||||
setlocale(LC_CTYPE, "");
|
|
||||||
|
|
||||||
// We would like to block SIGCHLD globally, but we can't because
|
|
||||||
// QT uses it. Have to block it inside execmd.cpp
|
|
||||||
|
|
||||||
// Install app signal handler
|
// Install app signal handler
|
||||||
if (sigcleanup) {
|
if (sigcleanup) {
|
||||||
@ -75,11 +70,91 @@ RclConfig *recollinit(RclInitFlags flags,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Install log rotate sig handler
|
||||||
|
{
|
||||||
|
struct sigaction action;
|
||||||
|
action.sa_handler = siglogreopen;
|
||||||
|
action.sa_flags = 0;
|
||||||
|
sigemptyset(&action.sa_mask);
|
||||||
|
if (signal(SIGHUP, SIG_IGN) != SIG_IGN) {
|
||||||
|
if (sigaction(SIGHUP, &action, 0) < 0) {
|
||||||
|
perror("Sigaction failed");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
|
||||||
|
// Windows signals etc.
|
||||||
|
//
|
||||||
|
// ^C can be caught by the signal() emulation, but not ^Break
|
||||||
|
// apparently, which is why we use the native approach too
|
||||||
|
//
|
||||||
|
// When a keyboard interrupt occurs, windows creates a thread inside
|
||||||
|
// the process and calls the handler. The process exits when the
|
||||||
|
// handler returns or after at most 10S
|
||||||
|
//
|
||||||
|
// In practise, only recollindex sets sigcleanup(), and the routine
|
||||||
|
// just sets a global termination flag. So we just call it and sleep,
|
||||||
|
// hoping that cleanup does not take more than what Windows will let
|
||||||
|
// us live.
|
||||||
|
|
||||||
|
static void (*l_sigcleanup)(int);
|
||||||
|
|
||||||
|
static BOOL WINAPI CtrlHandler(DWORD fdwCtrlType)
|
||||||
|
{
|
||||||
|
if (l_sigcleanup == 0)
|
||||||
|
return FALSE;
|
||||||
|
|
||||||
|
switch(fdwCtrlType) {
|
||||||
|
case CTRL_C_EVENT:
|
||||||
|
case CTRL_CLOSE_EVENT:
|
||||||
|
case CTRL_BREAK_EVENT:
|
||||||
|
case CTRL_LOGOFF_EVENT:
|
||||||
|
case CTRL_SHUTDOWN_EVENT:
|
||||||
|
l_sigcleanup(SIGINT);
|
||||||
|
Sleep(10000);
|
||||||
|
return TRUE;
|
||||||
|
default:
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static const int catchedSigs[] = {SIGINT, SIGTERM};
|
||||||
|
void initAsyncSigs(void (*sigcleanup)(int))
|
||||||
|
{
|
||||||
|
// Install app signal handler
|
||||||
|
if (sigcleanup) {
|
||||||
|
l_sigcleanup = sigcleanup;
|
||||||
|
for (unsigned int i = 0; i < sizeof(catchedSigs) / sizeof(int); i++) {
|
||||||
|
if (signal(catchedSigs[i], SIG_IGN) != SIG_IGN) {
|
||||||
|
signal(catchedSigs[i], sigcleanup);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
SetConsoleCtrlHandler((PHANDLER_ROUTINE)CtrlHandler, TRUE);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
RclConfig *recollinit(RclInitFlags flags,
|
||||||
|
void (*cleanup)(void), void (*sigcleanup)(int),
|
||||||
|
string &reason, const string *argcnf)
|
||||||
|
{
|
||||||
|
if (cleanup)
|
||||||
|
atexit(cleanup);
|
||||||
|
|
||||||
|
// Make sure the locale is set. This is only for converting file names
|
||||||
|
// to utf8 for indexing.
|
||||||
|
setlocale(LC_CTYPE, "");
|
||||||
|
|
||||||
DebugLog::getdbl()->setloglevel(DEBDEB1);
|
DebugLog::getdbl()->setloglevel(DEBDEB1);
|
||||||
DebugLog::setfilename("stderr");
|
DebugLog::setfilename("stderr");
|
||||||
if (getenv("RECOLL_LOGDATE"))
|
if (getenv("RECOLL_LOGDATE"))
|
||||||
DebugLog::getdbl()->logdate(1);
|
DebugLog::getdbl()->logdate(1);
|
||||||
|
|
||||||
|
initAsyncSigs(sigcleanup);
|
||||||
|
|
||||||
RclConfig *config = new RclConfig(argcnf);
|
RclConfig *config = new RclConfig(argcnf);
|
||||||
if (!config || !config->ok()) {
|
if (!config || !config->ok()) {
|
||||||
reason = "Configuration could not be built:\n";
|
reason = "Configuration could not be built:\n";
|
||||||
@ -105,7 +180,7 @@ RclConfig *recollinit(RclInitFlags flags,
|
|||||||
if (!logfilename.empty()) {
|
if (!logfilename.empty()) {
|
||||||
logfilename = path_tildexpand(logfilename);
|
logfilename = path_tildexpand(logfilename);
|
||||||
// If not an absolute path or , compute relative to config dir
|
// If not an absolute path or , compute relative to config dir
|
||||||
if (logfilename.at(0) != '/' &&
|
if (!path_isabsolute(logfilename) &&
|
||||||
!DebugLog::DebugLog::isspecialname(logfilename.c_str())) {
|
!DebugLog::DebugLog::isspecialname(logfilename.c_str())) {
|
||||||
logfilename = path_cat(config->getConfDir(), logfilename);
|
logfilename = path_cat(config->getConfDir(), logfilename);
|
||||||
}
|
}
|
||||||
@ -115,18 +190,6 @@ RclConfig *recollinit(RclInitFlags flags,
|
|||||||
int lev = atoi(loglevel.c_str());
|
int lev = atoi(loglevel.c_str());
|
||||||
DebugLog::getdbl()->setloglevel(lev);
|
DebugLog::getdbl()->setloglevel(lev);
|
||||||
}
|
}
|
||||||
// Install log rotate sig handler
|
|
||||||
{
|
|
||||||
struct sigaction action;
|
|
||||||
action.sa_handler = siglogreopen;
|
|
||||||
action.sa_flags = 0;
|
|
||||||
sigemptyset(&action.sa_mask);
|
|
||||||
if (signal(SIGHUP, SIG_IGN) != SIG_IGN) {
|
|
||||||
if (sigaction(SIGHUP, &action, 0) < 0) {
|
|
||||||
perror("Sigaction failed");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Make sure the locale charset is initialized (so that multiple
|
// Make sure the locale charset is initialized (so that multiple
|
||||||
// threads don't try to do it at once).
|
// threads don't try to do it at once).
|
||||||
@ -139,14 +202,20 @@ RclConfig *recollinit(RclInitFlags flags,
|
|||||||
// Init smallut and pathut static values
|
// Init smallut and pathut static values
|
||||||
pathut_init_mt();
|
pathut_init_mt();
|
||||||
smallut_init_mt();
|
smallut_init_mt();
|
||||||
|
// Init execmd.h static PATH and PATHELT splitting
|
||||||
|
{string bogus;
|
||||||
|
ExecCmd::which("nosuchcmd", bogus);
|
||||||
|
}
|
||||||
|
|
||||||
// Init Unac translation exceptions
|
// Init Unac translation exceptions
|
||||||
string unacex;
|
string unacex;
|
||||||
if (config->getConfParam("unac_except_trans", unacex) && !unacex.empty())
|
if (config->getConfParam("unac_except_trans", unacex) && !unacex.empty())
|
||||||
unac_set_except_translations(unacex.c_str());
|
unac_set_except_translations(unacex.c_str());
|
||||||
|
|
||||||
#ifndef IDX_THREADS
|
#ifndef IDX_THREADS
|
||||||
|
#ifndef _WIN32
|
||||||
ExecCmd::useVfork(true);
|
ExecCmd::useVfork(true);
|
||||||
|
#endif
|
||||||
#else
|
#else
|
||||||
// Keep threads init behind log init, but make sure it's done before
|
// Keep threads init behind log init, but make sure it's done before
|
||||||
// we do the vfork choice ! The latter is not used any more actually,
|
// we do the vfork choice ! The latter is not used any more actually,
|
||||||
@ -156,11 +225,15 @@ RclConfig *recollinit(RclInitFlags flags,
|
|||||||
bool novfork;
|
bool novfork;
|
||||||
config->getConfParam("novfork", &novfork);
|
config->getConfParam("novfork", &novfork);
|
||||||
if (novfork) {
|
if (novfork) {
|
||||||
|
#ifndef _WIN32
|
||||||
LOGDEB0(("rclinit: will use fork() for starting commands\n"));
|
LOGDEB0(("rclinit: will use fork() for starting commands\n"));
|
||||||
ExecCmd::useVfork(false);
|
ExecCmd::useVfork(false);
|
||||||
|
#endif
|
||||||
} else {
|
} else {
|
||||||
|
#ifndef _WIN32
|
||||||
LOGDEB0(("rclinit: will use vfork() for starting commands\n"));
|
LOGDEB0(("rclinit: will use vfork() for starting commands\n"));
|
||||||
ExecCmd::useVfork(true);
|
ExecCmd::useVfork(true);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -179,10 +252,11 @@ RclConfig *recollinit(RclInitFlags flags,
|
|||||||
return config;
|
return config;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Signals are handled by the main thread. All others should call this routine
|
// Signals are handled by the main thread. All others should call this
|
||||||
// to block possible signals
|
// routine to block possible signals
|
||||||
void recoll_threadinit()
|
void recoll_threadinit()
|
||||||
{
|
{
|
||||||
|
#ifndef _WIN32
|
||||||
sigset_t sset;
|
sigset_t sset;
|
||||||
sigemptyset(&sset);
|
sigemptyset(&sset);
|
||||||
|
|
||||||
@ -190,6 +264,14 @@ void recoll_threadinit()
|
|||||||
sigaddset(&sset, catchedSigs[i]);
|
sigaddset(&sset, catchedSigs[i]);
|
||||||
sigaddset(&sset, SIGHUP);
|
sigaddset(&sset, SIGHUP);
|
||||||
pthread_sigmask(SIG_BLOCK, &sset, 0);
|
pthread_sigmask(SIG_BLOCK, &sset, 0);
|
||||||
|
#else
|
||||||
|
// Not sure that this is needed at all or correct under windows.
|
||||||
|
for (unsigned int i = 0; i < sizeof(catchedSigs) / sizeof(int); i++) {
|
||||||
|
if (signal(catchedSigs[i], SIG_IGN) != SIG_IGN) {
|
||||||
|
signal(catchedSigs[i], SIG_IGN);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
bool recoll_ismainthread()
|
bool recoll_ismainthread()
|
||||||
|
|||||||
@ -110,7 +110,7 @@ bool SynGroups::setfile(const string& fn)
|
|||||||
lnum++;
|
lnum++;
|
||||||
|
|
||||||
{
|
{
|
||||||
int ll = strlen(cline);
|
size_t ll = strlen(cline);
|
||||||
while (ll > 0 && (cline[ll-1] == '\n' || cline[ll-1] == '\r')) {
|
while (ll > 0 && (cline[ll-1] == '\n' || cline[ll-1] == '\r')) {
|
||||||
cline[ll-1] = 0;
|
cline[ll-1] = 0;
|
||||||
ll--;
|
ll--;
|
||||||
|
|||||||
@ -142,6 +142,8 @@ static inline int whatcc(unsigned int c)
|
|||||||
} else {
|
} else {
|
||||||
vector<unsigned int>::iterator it =
|
vector<unsigned int>::iterator it =
|
||||||
lower_bound(vpuncblocks.begin(), vpuncblocks.end(), c);
|
lower_bound(vpuncblocks.begin(), vpuncblocks.end(), c);
|
||||||
|
if (it == vpuncblocks.end())
|
||||||
|
return LETTER;
|
||||||
if (c == *it)
|
if (c == *it)
|
||||||
return SPACE;
|
return SPACE;
|
||||||
if ((it - vpuncblocks.begin()) % 2 == 1) {
|
if ((it - vpuncblocks.begin()) % 2 == 1) {
|
||||||
@ -217,11 +219,11 @@ bool TextSplit::o_deHyphenate = false;
|
|||||||
// Final term checkpoint: do some checking (the kind which is simpler
|
// Final term checkpoint: do some checking (the kind which is simpler
|
||||||
// to do here than in the main loop), then send term to our client.
|
// to do here than in the main loop), then send term to our client.
|
||||||
inline bool TextSplit::emitterm(bool isspan, string &w, int pos,
|
inline bool TextSplit::emitterm(bool isspan, string &w, int pos,
|
||||||
int btstart, int btend)
|
size_t btstart, size_t btend)
|
||||||
{
|
{
|
||||||
LOGDEB2(("TextSplit::emitterm: [%s] pos %d\n", w.c_str(), pos));
|
LOGDEB2(("TextSplit::emitterm: [%s] pos %d\n", w.c_str(), pos));
|
||||||
|
|
||||||
unsigned int l = w.length();
|
int l = int(w.length());
|
||||||
|
|
||||||
#ifdef TEXTSPLIT_STATS
|
#ifdef TEXTSPLIT_STATS
|
||||||
// Update word length statistics. Do this before we filter out
|
// Update word length statistics. Do this before we filter out
|
||||||
@ -230,7 +232,7 @@ inline bool TextSplit::emitterm(bool isspan, string &w, int pos,
|
|||||||
m_stats.newsamp(m_wordChars);
|
m_stats.newsamp(m_wordChars);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (l > 0 && l < (unsigned)m_maxWordLength) {
|
if (l > 0 && l < m_maxWordLength) {
|
||||||
// 1 byte word: we index single ascii letters and digits, but
|
// 1 byte word: we index single ascii letters and digits, but
|
||||||
// nothing else. We might want to turn this into a test for a
|
// nothing else. We might want to turn this into a test for a
|
||||||
// single utf8 character instead ?
|
// single utf8 character instead ?
|
||||||
@ -245,9 +247,9 @@ inline bool TextSplit::emitterm(bool isspan, string &w, int pos,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (pos != m_prevpos || l != m_prevlen) {
|
if (pos != m_prevpos || l != m_prevlen) {
|
||||||
bool ret = takeword(w, pos, btstart, btend);
|
bool ret = takeword(w, pos, int(btstart), int(btend));
|
||||||
m_prevpos = pos;
|
m_prevpos = pos;
|
||||||
m_prevlen = w.length();
|
m_prevlen = int(w.length());
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
LOGDEB2(("TextSplit::emitterm:dup: [%s] pos %d\n", w.c_str(), pos));
|
LOGDEB2(("TextSplit::emitterm:dup: [%s] pos %d\n", w.c_str(), pos));
|
||||||
@ -293,7 +295,7 @@ bool TextSplit::span_is_acronym(string *acronym)
|
|||||||
|
|
||||||
// Generate terms from span. Have to take into account the
|
// Generate terms from span. Have to take into account the
|
||||||
// flags: ONLYSPANS, NOSPANS, noNumbers
|
// flags: ONLYSPANS, NOSPANS, noNumbers
|
||||||
bool TextSplit::words_from_span(int bp)
|
bool TextSplit::words_from_span(size_t bp)
|
||||||
{
|
{
|
||||||
#if 0
|
#if 0
|
||||||
cerr << "Span: [" << m_span << "] " << " w_i_s size: " <<
|
cerr << "Span: [" << m_span << "] " << " w_i_s size: " <<
|
||||||
@ -305,10 +307,10 @@ bool TextSplit::words_from_span(int bp)
|
|||||||
}
|
}
|
||||||
cerr << endl;
|
cerr << endl;
|
||||||
#endif
|
#endif
|
||||||
unsigned int spanwords = m_words_in_span.size();
|
int spanwords = int(m_words_in_span.size());
|
||||||
int pos = m_spanpos;
|
int pos = m_spanpos;
|
||||||
// Byte position of the span start
|
// Byte position of the span start
|
||||||
int spboffs = bp - m_span.size();
|
size_t spboffs = bp - m_span.size();
|
||||||
|
|
||||||
if (o_deHyphenate && spanwords == 2 &&
|
if (o_deHyphenate && spanwords == 2 &&
|
||||||
m_span[m_words_in_span[0].second] == '-') {
|
m_span[m_words_in_span[0].second] == '-') {
|
||||||
@ -322,13 +324,13 @@ bool TextSplit::words_from_span(int bp)
|
|||||||
m_spanpos, spboffs, spboffs + m_words_in_span[1].second);
|
m_spanpos, spboffs, spboffs + m_words_in_span[1].second);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (unsigned int i = 0;
|
for (int i = 0;
|
||||||
i < ((m_flags&TXTS_ONLYSPANS) ? 1 : spanwords);
|
i < ((m_flags&TXTS_ONLYSPANS) ? 1 : spanwords);
|
||||||
i++, pos++) {
|
i++, pos++) {
|
||||||
|
|
||||||
int deb = m_words_in_span[i].first;
|
int deb = m_words_in_span[i].first;
|
||||||
|
|
||||||
for (unsigned int j = ((m_flags&TXTS_ONLYSPANS) ? spanwords-1 : i);
|
for (int j = ((m_flags&TXTS_ONLYSPANS) ? spanwords-1 : i);
|
||||||
j < ((m_flags&TXTS_NOSPANS) ? i+1 : spanwords);
|
j < ((m_flags&TXTS_NOSPANS) ? i+1 : spanwords);
|
||||||
j++) {
|
j++) {
|
||||||
|
|
||||||
@ -362,11 +364,11 @@ bool TextSplit::words_from_span(int bp)
|
|||||||
* @param spanerase Set if the current span is at its end. Process it.
|
* @param spanerase Set if the current span is at its end. Process it.
|
||||||
* @param bp The current BYTE position in the stream
|
* @param bp The current BYTE position in the stream
|
||||||
*/
|
*/
|
||||||
inline bool TextSplit::doemit(bool spanerase, int bp)
|
inline bool TextSplit::doemit(bool spanerase, size_t bp)
|
||||||
{
|
{
|
||||||
LOGDEB2(("TextSplit::doemit: sper %d bp %d spp %d spanwords %u wS %d wL %d "
|
LOGDEB2(("TextSplit::doemit: sper %d bp %d spp %d spanwords %u wS %d wL %d "
|
||||||
"inn %d span [%s]\n",
|
"inn %d span [%s]\n",
|
||||||
spanerase, bp, m_spanpos, m_words_in_span.size(),
|
spanerase, int(bp), m_spanpos, m_words_in_span.size(),
|
||||||
m_wordStart, m_wordLen, m_inNumber, m_span.c_str()));
|
m_wordStart, m_wordLen, m_inNumber, m_span.c_str()));
|
||||||
|
|
||||||
if (m_wordLen) {
|
if (m_wordLen) {
|
||||||
@ -404,8 +406,8 @@ inline bool TextSplit::doemit(bool spanerase, int bp)
|
|||||||
case '\'':
|
case '\'':
|
||||||
m_span.resize(m_span.length()-1);
|
m_span.resize(m_span.length()-1);
|
||||||
if (m_words_in_span.size() &&
|
if (m_words_in_span.size() &&
|
||||||
m_words_in_span.back().second > m_span.size())
|
m_words_in_span.back().second > int(m_span.size()))
|
||||||
m_words_in_span.back().second = m_span.size();
|
m_words_in_span.back().second = int(m_span.size());
|
||||||
if (--bp < 0)
|
if (--bp < 0)
|
||||||
bp = 0;
|
bp = 0;
|
||||||
break;
|
break;
|
||||||
@ -422,7 +424,7 @@ inline bool TextSplit::doemit(bool spanerase, int bp)
|
|||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
m_wordStart = m_span.length();
|
m_wordStart = int(m_span.length());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -830,16 +832,16 @@ bool TextSplit::cjk_to_words(Utf8Iter *itp, unsigned int *cp)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Take note of byte offset for this character.
|
// Take note of byte offset for this character.
|
||||||
boffs[nchars-1] = it.getBpos();
|
boffs[nchars-1] = int(it.getBpos());
|
||||||
|
|
||||||
// Output all new ngrams: they begin at each existing position
|
// Output all new ngrams: they begin at each existing position
|
||||||
// and end after the new character. onlyspans->only output
|
// and end after the new character. onlyspans->only output
|
||||||
// maximum words, nospans=> single chars
|
// maximum words, nospans=> single chars
|
||||||
if (!(m_flags & TXTS_ONLYSPANS) || nchars == o_CJKNgramLen) {
|
if (!(m_flags & TXTS_ONLYSPANS) || nchars == o_CJKNgramLen) {
|
||||||
unsigned int btend = it.getBpos() + it.getBlen();
|
int btend = int(it.getBpos() + it.getBlen());
|
||||||
unsigned int loopbeg = (m_flags & TXTS_NOSPANS) ? nchars-1 : 0;
|
int loopbeg = (m_flags & TXTS_NOSPANS) ? nchars-1 : 0;
|
||||||
unsigned int loopend = (m_flags & TXTS_ONLYSPANS) ? 1 : nchars;
|
int loopend = (m_flags & TXTS_ONLYSPANS) ? 1 : nchars;
|
||||||
for (unsigned int i = loopbeg; i < loopend; i++) {
|
for (int i = loopbeg; i < loopend; i++) {
|
||||||
if (!takeword(it.buffer().substr(boffs[i],
|
if (!takeword(it.buffer().substr(boffs[i],
|
||||||
btend-boffs[i]),
|
btend-boffs[i]),
|
||||||
m_wordpos - (nchars-i-1), boffs[i], btend)) {
|
m_wordpos - (nchars-i-1), boffs[i], btend)) {
|
||||||
@ -860,7 +862,7 @@ bool TextSplit::cjk_to_words(Utf8Iter *itp, unsigned int *cp)
|
|||||||
// If onlyspans is set, there may be things to flush in the buffer
|
// If onlyspans is set, there may be things to flush in the buffer
|
||||||
// first
|
// first
|
||||||
if ((m_flags & TXTS_ONLYSPANS) && nchars > 0 && nchars != o_CJKNgramLen) {
|
if ((m_flags & TXTS_ONLYSPANS) && nchars > 0 && nchars != o_CJKNgramLen) {
|
||||||
unsigned int btend = it.getBpos(); // Current char is out
|
int btend = int(it.getBpos()); // Current char is out
|
||||||
if (!takeword(it.buffer().substr(boffs[0], btend-boffs[0]),
|
if (!takeword(it.buffer().substr(boffs[0], btend-boffs[0]),
|
||||||
m_wordpos - nchars,
|
m_wordpos - nchars,
|
||||||
boffs[0], btend)) {
|
boffs[0], btend)) {
|
||||||
|
|||||||
@ -184,7 +184,7 @@ private:
|
|||||||
// Current span. Might be jf.dockes@wanadoo.f
|
// Current span. Might be jf.dockes@wanadoo.f
|
||||||
std::string m_span;
|
std::string m_span;
|
||||||
|
|
||||||
std::vector <std::pair<unsigned int, unsigned int> > m_words_in_span;
|
std::vector <std::pair<int, int> > m_words_in_span;
|
||||||
|
|
||||||
// Current word: no punctuation at all in there. Byte offset
|
// Current word: no punctuation at all in there. Byte offset
|
||||||
// relative to the current span and byte length
|
// relative to the current span and byte length
|
||||||
@ -201,7 +201,7 @@ private:
|
|||||||
// It may happen that our cleanup would result in emitting the
|
// It may happen that our cleanup would result in emitting the
|
||||||
// same term twice. We try to avoid this
|
// same term twice. We try to avoid this
|
||||||
int m_prevpos;
|
int m_prevpos;
|
||||||
unsigned int m_prevlen;
|
int m_prevlen;
|
||||||
|
|
||||||
#ifdef TEXTSPLIT_STATS
|
#ifdef TEXTSPLIT_STATS
|
||||||
// Stats counters. These are processed in TextSplit rather than by a
|
// Stats counters. These are processed in TextSplit rather than by a
|
||||||
@ -215,11 +215,11 @@ private:
|
|||||||
// This processes cjk text:
|
// This processes cjk text:
|
||||||
bool cjk_to_words(Utf8Iter *it, unsigned int *cp);
|
bool cjk_to_words(Utf8Iter *it, unsigned int *cp);
|
||||||
|
|
||||||
bool emitterm(bool isspan, std::string &term, int pos, int bs, int be);
|
bool emitterm(bool isspan, std::string &term, int pos, size_t bs,size_t be);
|
||||||
bool doemit(bool spanerase, int bp);
|
bool doemit(bool spanerase, size_t bp);
|
||||||
void discardspan();
|
void discardspan();
|
||||||
bool span_is_acronym(std::string *acronym);
|
bool span_is_acronym(std::string *acronym);
|
||||||
bool words_from_span(int bp);
|
bool words_from_span(size_t bp);
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* _TEXTSPLIT_H_INCLUDED_ */
|
#endif /* _TEXTSPLIT_H_INCLUDED_ */
|
||||||
|
|||||||
@ -408,13 +408,13 @@ AC_ARG_ENABLE(recollq,
|
|||||||
need for Qt). This is done by default if --disable-qtgui is set but this
|
need for Qt). This is done by default if --disable-qtgui is set but this
|
||||||
option enables forcing it.]),
|
option enables forcing it.]),
|
||||||
enableRECOLLQ=$enableval, enableRECOLLQ="no")
|
enableRECOLLQ=$enableval, enableRECOLLQ="no")
|
||||||
|
if test X"$enableRECOLLQ" != X ; then
|
||||||
if text X"$enableRECOLLQ" != X ; then
|
|
||||||
AM_CONDITIONAL(MAKECMDLINE, [test X$enableRECOLLQ = Xyes])
|
AM_CONDITIONAL(MAKECMDLINE, [test X$enableRECOLLQ = Xyes])
|
||||||
else
|
else
|
||||||
AM_CONDITIONAL(MAKECMDLINE, [test X$enableQT = Xno])
|
AM_CONDITIONAL(MAKECMDLINE, [test X$enableQT = Xno])
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
if test X$enableQT = Xyes ; then
|
if test X$enableQT = Xyes ; then
|
||||||
|
|
||||||
if test X$QTDIR != X ; then
|
if test X$QTDIR != X ; then
|
||||||
@ -586,6 +586,5 @@ AC_SUBST(RCLLIBVERSION)
|
|||||||
AC_CONFIG_FILES(Makefile)
|
AC_CONFIG_FILES(Makefile)
|
||||||
AC_CONFIG_FILES(common/rclversion.h)
|
AC_CONFIG_FILES(common/rclversion.h)
|
||||||
AC_CONFIG_FILES(python/recoll/setup.py)
|
AC_CONFIG_FILES(python/recoll/setup.py)
|
||||||
AC_CONFIG_FILES(sampleconf/recoll.conf)
|
|
||||||
|
|
||||||
AC_OUTPUT
|
AC_OUTPUT
|
||||||
|
|||||||
165
src/filters/rcldoc.py
Executable file
165
src/filters/rcldoc.py
Executable file
@ -0,0 +1,165 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import rclexecm
|
||||||
|
import rclexec1
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
# Processing the output from antiword: create html header and tail, process
|
||||||
|
# continuation lines escape, HTML special characters, accumulate the data.
|
||||||
|
class WordProcessData:
|
||||||
|
def __init__(self, em):
|
||||||
|
self.em = em
|
||||||
|
self.out = ""
|
||||||
|
self.cont = ""
|
||||||
|
self.gotdata = False
|
||||||
|
# Line with continued word (ending in -)
|
||||||
|
# we strip the - which is not nice for actually hyphenated word.
|
||||||
|
# What to do ?
|
||||||
|
self.patcont = re.compile('''[\w][-]$''')
|
||||||
|
# Pattern for breaking continuation at last word start
|
||||||
|
self.patws = re.compile('''([\s])([\w]+)(-)$''')
|
||||||
|
|
||||||
|
def takeLine(self, line):
|
||||||
|
if not self.gotdata:
|
||||||
|
if line == "":
|
||||||
|
return
|
||||||
|
self.out = '<html><head><title></title>' + \
|
||||||
|
'<meta http-equiv="Content-Type"' + \
|
||||||
|
'content="text/html;charset=UTF-8">' + \
|
||||||
|
'</head><body><p>'
|
||||||
|
self.gotdata = True
|
||||||
|
|
||||||
|
if self.cont:
|
||||||
|
line = self.cont + line
|
||||||
|
self.cont = ""
|
||||||
|
|
||||||
|
if line == "\f":
|
||||||
|
self.out += "</p><hr><p>"
|
||||||
|
return
|
||||||
|
|
||||||
|
if self.patcont.search(line):
|
||||||
|
# Break at last whitespace
|
||||||
|
match = self.patws.search(line)
|
||||||
|
if match:
|
||||||
|
self.cont = line[match.start(2):match.end(2)]
|
||||||
|
line = line[0:match.start(1)]
|
||||||
|
else:
|
||||||
|
self.cont = line
|
||||||
|
line = ""
|
||||||
|
|
||||||
|
if line:
|
||||||
|
self.out += self.em.htmlescape(line) + "<br>"
|
||||||
|
else:
|
||||||
|
self.out += "<br>"
|
||||||
|
|
||||||
|
def wrapData(self):
|
||||||
|
if self.gotdata:
|
||||||
|
self.out += "</p></body></html>"
|
||||||
|
self.em.setmimetype("text/html")
|
||||||
|
return self.out
|
||||||
|
|
||||||
|
# Null data accumulator. We use this when antiword has fail, and the
|
||||||
|
# data actually comes from rclrtf, rcltext or vwWare, which all
|
||||||
|
# output HTML
|
||||||
|
class WordPassData:
|
||||||
|
def __init__(self, em):
|
||||||
|
self.out = ""
|
||||||
|
self.em = em
|
||||||
|
|
||||||
|
def takeLine(self, line):
|
||||||
|
self.out += line
|
||||||
|
|
||||||
|
def wrapData(self):
|
||||||
|
self.em.setmimetype("text/html")
|
||||||
|
return self.out
|
||||||
|
|
||||||
|
|
||||||
|
# Filter for msword docs. Try antiword, and if this fails, check for
|
||||||
|
# an rtf or text document (.doc are sometimes like this...). Also try
|
||||||
|
# vwWare if the doc is actually a word doc
|
||||||
|
class WordFilter:
|
||||||
|
def __init__(self, em, td):
|
||||||
|
self.em = em
|
||||||
|
self.ntry = 0
|
||||||
|
self.execdir = td
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
self.ntry = 0
|
||||||
|
|
||||||
|
def hasControlChars(self, data):
|
||||||
|
for c in data:
|
||||||
|
if c < chr(32) and c != '\n' and c != '\t' and \
|
||||||
|
c != '\f' and c != '\r':
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def mimetype(self, fn):
|
||||||
|
rtfprolog ="{\\rtf1"
|
||||||
|
docprolog = b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1"
|
||||||
|
try:
|
||||||
|
f = open(fn, "rb")
|
||||||
|
except:
|
||||||
|
return ""
|
||||||
|
data = f.read(100)
|
||||||
|
if data[0:6] == rtfprolog:
|
||||||
|
return "text/rtf"
|
||||||
|
elif data[0:8] == docprolog:
|
||||||
|
return "application/msword"
|
||||||
|
elif self.hasControlChars(data):
|
||||||
|
return "application/octet-stream"
|
||||||
|
else:
|
||||||
|
return "text/plain"
|
||||||
|
|
||||||
|
def getCmd(self, fn):
|
||||||
|
'''Return command to execute, and postprocessor, according to
|
||||||
|
our state: first try antiword, then others depending on mime
|
||||||
|
identification. Do 2 tries at most'''
|
||||||
|
if self.ntry == 0:
|
||||||
|
self.ntry = 1
|
||||||
|
cmd = rclexecm.which("antiword")
|
||||||
|
if cmd:
|
||||||
|
return ([cmd, "-t", "-i", "1", "-m", "UTF-8"],
|
||||||
|
WordProcessData(self.em))
|
||||||
|
else:
|
||||||
|
return ([],None)
|
||||||
|
elif self.ntry == 1:
|
||||||
|
self.ntry = 2
|
||||||
|
# antiword failed. Check for an rtf file, or text and
|
||||||
|
# process accordingly. It the doc is actually msword, try
|
||||||
|
# wvWare.
|
||||||
|
mt = self.mimetype(fn)
|
||||||
|
self.em.rclog("rcldoc.py: actual MIME type %s" % mt)
|
||||||
|
if mt == "text/plain":
|
||||||
|
return ([python, os.path.join(self.execdir, "rcltext.py")],
|
||||||
|
WordPassData(self.em))
|
||||||
|
elif mt == "text/rtf":
|
||||||
|
cmd = ["python", os.path.join(self.execdir, "rclrtf.py"),
|
||||||
|
"-s"]
|
||||||
|
self.em.rclog("rcldoc.py: returning cmd %s" % cmd)
|
||||||
|
return (cmd, WordPassData(self.em))
|
||||||
|
elif mt == "application/msword":
|
||||||
|
cmd = rclexecm.which("wvWare")
|
||||||
|
if cmd:
|
||||||
|
return ([cmd, "--nographics", "--charset=utf-8"],
|
||||||
|
WordPassData(self.em))
|
||||||
|
else:
|
||||||
|
return ([],None)
|
||||||
|
else:
|
||||||
|
return ([],None)
|
||||||
|
else:
|
||||||
|
return ([],None)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
# Remember where we execute filters from, in case we need to exec another
|
||||||
|
execdir = os.path.dirname(sys.argv[0])
|
||||||
|
# Check that we have antiword. We could fallback to wvWare, but
|
||||||
|
# this is not what the old filter did.
|
||||||
|
if not rclexecm.which("antiword"):
|
||||||
|
print("RECFILTERROR HELPERNOTFOUND antiword")
|
||||||
|
sys.exit(1)
|
||||||
|
proto = rclexecm.RclExecM()
|
||||||
|
filter = WordFilter(proto, execdir)
|
||||||
|
extract = rclexec1.Executor(proto, filter)
|
||||||
|
rclexecm.main(proto, extract)
|
||||||
112
src/filters/rclexec1.py
Normal file
112
src/filters/rclexec1.py
Normal file
@ -0,0 +1,112 @@
|
|||||||
|
#################################
|
||||||
|
# Copyright (C) 2014 J.F.Dockes
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the
|
||||||
|
# Free Software Foundation, Inc.,
|
||||||
|
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
########################################################
|
||||||
|
|
||||||
|
# Common code for replacing the old shell scripts with Python execm
|
||||||
|
# ones: this implements the basic functions for a filter which
|
||||||
|
# executes a command to translate a simple file (like rclword with
|
||||||
|
# antiword).
|
||||||
|
#
|
||||||
|
# This was motivated by the Windows port: to replace shell and Unix
|
||||||
|
# utility (awk , etc usage). We can't just execute python scripts,
|
||||||
|
# this would be to slow. So this helps implementing a permanent script
|
||||||
|
# to repeatedly execute single commands.
|
||||||
|
|
||||||
|
import subprocess
|
||||||
|
import rclexecm
|
||||||
|
|
||||||
|
# This class has the code to execute the subprocess and call a
|
||||||
|
# data-specific post-processor. Command and processor are supplied by
|
||||||
|
# the object which we receive as a parameter, which in turn is defined
|
||||||
|
# in the actual executable filter (e.g. rcldoc.py)
|
||||||
|
class Executor:
|
||||||
|
opt_ignxval = 1
|
||||||
|
|
||||||
|
def __init__(self, em, flt):
|
||||||
|
self.em = em
|
||||||
|
self.flt = flt
|
||||||
|
self.currentindex = 0
|
||||||
|
|
||||||
|
def runCmd(self, cmd, filename, postproc, opt):
|
||||||
|
''' Substitute parameters and execute command, process output
|
||||||
|
with the specific postprocessor and return the complete text.
|
||||||
|
We expect cmd as a list of command name + arguments'''
|
||||||
|
|
||||||
|
try:
|
||||||
|
fullcmd = cmd + [filename]
|
||||||
|
proc = subprocess.Popen(fullcmd,
|
||||||
|
stdout = subprocess.PIPE)
|
||||||
|
stdout = proc.stdout
|
||||||
|
except subprocess.CalledProcessError as err:
|
||||||
|
self.em.rclog("extractone: Popen(%s) error: %s" % (fullcmd, err))
|
||||||
|
return (False, "")
|
||||||
|
except OSError as err:
|
||||||
|
self.em.rclog("extractone: Popen(%s) OS error: %s" % (fullcmd, err))
|
||||||
|
return (False, "")
|
||||||
|
|
||||||
|
for line in stdout:
|
||||||
|
postproc.takeLine(line.strip())
|
||||||
|
|
||||||
|
proc.wait()
|
||||||
|
if (opt & self.opt_ignxval) == 0 and proc.returncode:
|
||||||
|
self.em.rclog("extractone: [%s] returncode %d" % \
|
||||||
|
(filename, proc.returncode))
|
||||||
|
return False, postproc.wrapData()
|
||||||
|
else:
|
||||||
|
return True, postproc.wrapData()
|
||||||
|
|
||||||
|
def extractone(self, params):
|
||||||
|
#self.em.rclog("extractone %s %s" % (params["filename:"], \
|
||||||
|
# params["mimetype:"]))
|
||||||
|
self.flt.reset()
|
||||||
|
ok = False
|
||||||
|
if not params.has_key("filename:"):
|
||||||
|
self.em.rclog("extractone: no mime or file name")
|
||||||
|
return (ok, "", "", rclexecm.RclExecM.eofnow)
|
||||||
|
|
||||||
|
fn = params["filename:"]
|
||||||
|
while True:
|
||||||
|
cmdseq = self.flt.getCmd(fn)
|
||||||
|
cmd = cmdseq[0]
|
||||||
|
postproc = cmdseq[1]
|
||||||
|
opt = cmdseq[2] if len(cmdseq) == 3 else 0
|
||||||
|
if cmd:
|
||||||
|
ok, data = self.runCmd(cmd, fn, postproc, opt)
|
||||||
|
if ok:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
if ok:
|
||||||
|
return (ok, data, "", rclexecm.RclExecM.eofnext)
|
||||||
|
else:
|
||||||
|
return (ok, "", "", rclexecm.RclExecM.eofnow)
|
||||||
|
|
||||||
|
###### File type handler api, used by rclexecm ---------->
|
||||||
|
def openfile(self, params):
|
||||||
|
self.currentindex = 0
|
||||||
|
return True
|
||||||
|
|
||||||
|
def getipath(self, params):
|
||||||
|
return self.extractone(params)
|
||||||
|
|
||||||
|
def getnext(self, params):
|
||||||
|
if self.currentindex >= 1:
|
||||||
|
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
|
else:
|
||||||
|
ret= self.extractone(params)
|
||||||
|
self.currentindex += 1
|
||||||
|
return ret
|
||||||
@ -1,10 +1,34 @@
|
|||||||
#!/usr/bin/env python
|
#################################
|
||||||
|
# Copyright (C) 2014 J.F.Dockes
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the
|
||||||
|
# Free Software Foundation, Inc.,
|
||||||
|
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
########################################################
|
||||||
|
## Recoll multifilter communication module and utilities
|
||||||
|
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
###########################################
|
|
||||||
## Generic recoll multifilter communication code
|
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
|
import tempfile
|
||||||
|
import shutil
|
||||||
|
import getopt
|
||||||
|
|
||||||
|
############################################
|
||||||
|
# RclExecM implements the
|
||||||
|
# communication protocol with the recollindex process. It calls the
|
||||||
|
# object specific of the document type to actually get the data.
|
||||||
class RclExecM:
|
class RclExecM:
|
||||||
noteof = 0
|
noteof = 0
|
||||||
eofnext = 1
|
eofnext = 1
|
||||||
@ -27,9 +51,13 @@ class RclExecM:
|
|||||||
else:
|
else:
|
||||||
self.maxmembersize = 50 * 1024
|
self.maxmembersize = 50 * 1024
|
||||||
self.maxmembersize = self.maxmembersize * 1024
|
self.maxmembersize = self.maxmembersize * 1024
|
||||||
|
if sys.platform == "win32":
|
||||||
|
import msvcrt
|
||||||
|
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
|
||||||
|
msvcrt.setmode(sys.stdin.fileno(), os.O_BINARY)
|
||||||
|
|
||||||
def rclog(self, s, doexit = 0, exitvalue = 1):
|
def rclog(self, s, doexit = 0, exitvalue = 1):
|
||||||
print >> sys.stderr, "RCLMFILT:", self.myname, ":", s
|
print("RCLMFILT: %s: %s" % (self.myname, s), file=sys.stderr)
|
||||||
if doexit:
|
if doexit:
|
||||||
sys.exit(exitvalue)
|
sys.exit(exitvalue)
|
||||||
|
|
||||||
@ -87,29 +115,29 @@ class RclExecM:
|
|||||||
self.rclog("GOT UNICODE for ipath [%s]" % (ipath,))
|
self.rclog("GOT UNICODE for ipath [%s]" % (ipath,))
|
||||||
docdata = docdata.encode("UTF-8")
|
docdata = docdata.encode("UTF-8")
|
||||||
|
|
||||||
print "Document:", len(docdata)
|
print("Document: %d" % len(docdata))
|
||||||
sys.stdout.write(docdata)
|
sys.stdout.write(docdata)
|
||||||
|
|
||||||
if len(ipath):
|
if len(ipath):
|
||||||
print "Ipath:", len(ipath)
|
print("Ipath: %d" % len(ipath))
|
||||||
sys.stdout.write(ipath)
|
sys.stdout.write(ipath)
|
||||||
|
|
||||||
if len(self.mimetype):
|
if len(self.mimetype):
|
||||||
print "Mimetype:", len(self.mimetype)
|
print("Mimetype: %d" % len(self.mimetype))
|
||||||
sys.stdout.write(self.mimetype)
|
sys.stdout.write(self.mimetype)
|
||||||
|
|
||||||
# If we're at the end of the contents, say so
|
# If we're at the end of the contents, say so
|
||||||
if iseof == RclExecM.eofnow:
|
if iseof == RclExecM.eofnow:
|
||||||
print "Eofnow: 0"
|
print("Eofnow: 0")
|
||||||
elif iseof == RclExecM.eofnext:
|
elif iseof == RclExecM.eofnext:
|
||||||
print "Eofnext: 0"
|
print("Eofnext: 0")
|
||||||
if iserror == RclExecM.subdocerror:
|
if iserror == RclExecM.subdocerror:
|
||||||
print "Subdocerror: 0"
|
print("Subdocerror: 0")
|
||||||
elif iserror == RclExecM.fileerror:
|
elif iserror == RclExecM.fileerror:
|
||||||
print "Fileerror: 0"
|
print("Fileerror: 0")
|
||||||
|
|
||||||
# End of message
|
# End of message
|
||||||
print
|
print()
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
#self.rclog("done writing data")
|
#self.rclog("done writing data")
|
||||||
|
|
||||||
@ -168,67 +196,161 @@ class RclExecM:
|
|||||||
self.processmessage(processor, params)
|
self.processmessage(processor, params)
|
||||||
|
|
||||||
|
|
||||||
|
# Helper routine to test for program accessibility
|
||||||
|
def which(program):
|
||||||
|
def is_exe(fpath):
|
||||||
|
return os.path.exists(fpath) and os.access(fpath, os.X_OK)
|
||||||
|
def ext_candidates(fpath):
|
||||||
|
yield fpath
|
||||||
|
for ext in os.environ.get("PATHEXT", "").split(os.pathsep):
|
||||||
|
yield fpath + ext
|
||||||
|
|
||||||
|
def path_candidates():
|
||||||
|
yield os.path.dirname(sys.argv[0])
|
||||||
|
for path in os.environ["PATH"].split(os.pathsep):
|
||||||
|
yield path
|
||||||
|
|
||||||
|
fpath, fname = os.path.split(program)
|
||||||
|
if fpath:
|
||||||
|
if is_exe(program):
|
||||||
|
return program
|
||||||
|
else:
|
||||||
|
for path in path_candidates():
|
||||||
|
exe_file = os.path.join(path, program)
|
||||||
|
for candidate in ext_candidates(exe_file):
|
||||||
|
if is_exe(candidate):
|
||||||
|
return candidate
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Temp dir helper
|
||||||
|
class SafeTmpDir:
|
||||||
|
def __init__(self, em):
|
||||||
|
self.em = em
|
||||||
|
self.toptmp = ""
|
||||||
|
self.tmpdir = ""
|
||||||
|
|
||||||
|
def __del__(self):
|
||||||
|
try:
|
||||||
|
if self.toptmp:
|
||||||
|
shutil.rmtree(self.tmpdir, True)
|
||||||
|
os.rmdir(self.toptmp)
|
||||||
|
except Exception as err:
|
||||||
|
self.em.rclog("delete dir failed for " + self.toptmp)
|
||||||
|
|
||||||
|
def getpath(self):
|
||||||
|
if not self.tmpdir:
|
||||||
|
envrcltmp = os.getenv('RECOLL_TMPDIR')
|
||||||
|
if envrcltmp:
|
||||||
|
self.toptmp = tempfile.mkdtemp(prefix='rcltmp', dir=envrcltmp)
|
||||||
|
else:
|
||||||
|
self.toptmp = tempfile.mkdtemp(prefix='rcltmp')
|
||||||
|
|
||||||
|
self.tmpdir = os.path.join(self.toptmp, 'rclsofftmp')
|
||||||
|
os.makedirs(self.tmpdir)
|
||||||
|
|
||||||
|
return self.tmpdir
|
||||||
|
|
||||||
|
|
||||||
# Common main routine for all python execm filters: either run the
|
# Common main routine for all python execm filters: either run the
|
||||||
# normal protocol engine or a local loop to test without recollindex
|
# normal protocol engine or a local loop to test without recollindex
|
||||||
def main(proto, extract):
|
def main(proto, extract):
|
||||||
if len(sys.argv) == 1:
|
if len(sys.argv) == 1:
|
||||||
proto.mainloop(extract)
|
proto.mainloop(extract)
|
||||||
else:
|
# mainloop does not return. Just in case
|
||||||
# Got a file name parameter: TESTING without an execm parent
|
sys.exit(1)
|
||||||
# Loop on all entries or get specific ipath
|
|
||||||
def mimetype_with_file(f):
|
|
||||||
cmd = 'file -i "' + f + '"'
|
|
||||||
fileout = os.popen(cmd).read()
|
|
||||||
lst = fileout.split(':')
|
|
||||||
mimetype = lst[len(lst)-1].strip()
|
|
||||||
lst = mimetype.split(';')
|
|
||||||
return lst[0].strip()
|
|
||||||
def mimetype_with_xdg(f):
|
|
||||||
cmd = 'xdg-mime query filetype "' + f + '"'
|
|
||||||
return os.popen(cmd).read().strip()
|
|
||||||
params = {'filename:': sys.argv[1]}
|
|
||||||
# Some filters (e.g. rclaudio) need/get a MIME type from the indexer
|
|
||||||
mimetype = mimetype_with_xdg(sys.argv[1])
|
|
||||||
params['mimetype:'] = mimetype
|
|
||||||
if not extract.openfile(params):
|
|
||||||
print "Open error"
|
|
||||||
sys.exit(1)
|
|
||||||
ipath = ""
|
|
||||||
if len(sys.argv) == 3:
|
|
||||||
ipath = sys.argv[2]
|
|
||||||
|
|
||||||
if ipath != "":
|
|
||||||
params['ipath:'] = ipath
|
# Not running the main loop: either acting as single filter (when called
|
||||||
ok, data, ipath, eof = extract.getipath(params)
|
# from other filter for example), or debugging
|
||||||
if ok:
|
def usage():
|
||||||
print "== Found entry for ipath %s (mimetype [%s]):" % \
|
print("Usage: rclexecm.py [-d] [-s] [-i ipath] [filename]",
|
||||||
(ipath, proto.mimetype)
|
file=sys.stderr)
|
||||||
if isinstance(data, unicode):
|
sys.exit(1)
|
||||||
bdata = data.encode("UTF-8")
|
|
||||||
else:
|
actAsSingle = False
|
||||||
bdata = data
|
debugDumpData = False
|
||||||
|
ipath = ""
|
||||||
|
|
||||||
|
args = sys.argv[1:]
|
||||||
|
opts, args = getopt.getopt(args, "hdsi:")
|
||||||
|
for opt, arg in opts:
|
||||||
|
if opt in ['-h']:
|
||||||
|
usage()
|
||||||
|
elif opt in ['-s']:
|
||||||
|
actAsSingle = True
|
||||||
|
elif opt in ['-i']:
|
||||||
|
ipath = arg
|
||||||
|
elif opt in ['-d']:
|
||||||
|
debugDumpData = True
|
||||||
|
else:
|
||||||
|
print("unknown option %s\n"%opt, file=sys.stderr)
|
||||||
|
usage()
|
||||||
|
|
||||||
|
if len(args) != 1:
|
||||||
|
usage()
|
||||||
|
|
||||||
|
def mimetype_with_file(f):
|
||||||
|
cmd = 'file -i "' + f + '"'
|
||||||
|
fileout = os.popen(cmd).read()
|
||||||
|
lst = fileout.split(':')
|
||||||
|
mimetype = lst[len(lst)-1].strip()
|
||||||
|
lst = mimetype.split(';')
|
||||||
|
return lst[0].strip()
|
||||||
|
|
||||||
|
def mimetype_with_xdg(f):
|
||||||
|
cmd = 'xdg-mime query filetype "' + f + '"'
|
||||||
|
return os.popen(cmd).read().strip()
|
||||||
|
|
||||||
|
def debprint(s):
|
||||||
|
if not actAsSingle:
|
||||||
|
print(s)
|
||||||
|
|
||||||
|
params = {'filename:': args[0]}
|
||||||
|
# Some filters (e.g. rclaudio) need/get a MIME type from the indexer
|
||||||
|
mimetype = mimetype_with_xdg(args[0])
|
||||||
|
params['mimetype:'] = mimetype
|
||||||
|
|
||||||
|
if not extract.openfile(params):
|
||||||
|
print("Open error", file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
if ipath != "" or actAsSingle:
|
||||||
|
params['ipath:'] = ipath
|
||||||
|
ok, data, ipath, eof = extract.getipath(params)
|
||||||
|
if ok:
|
||||||
|
debprint("== Found entry for ipath %s (mimetype [%s]):" % \
|
||||||
|
(ipath, proto.mimetype))
|
||||||
|
if isinstance(data, unicode):
|
||||||
|
bdata = data.encode("UTF-8")
|
||||||
|
else:
|
||||||
|
bdata = data
|
||||||
|
if debugDumpData or actAsSingle:
|
||||||
sys.stdout.write(bdata)
|
sys.stdout.write(bdata)
|
||||||
print
|
print()
|
||||||
else:
|
|
||||||
print "Got error, eof %d"%eof
|
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
else:
|
||||||
|
print("Got error, eof %d"%eof, file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
ecnt = 0
|
ecnt = 0
|
||||||
while 1:
|
while 1:
|
||||||
ok, data, ipath, eof = extract.getnext(params)
|
ok, data, ipath, eof = extract.getnext(params)
|
||||||
if ok:
|
if ok:
|
||||||
ecnt = ecnt + 1
|
ecnt = ecnt + 1
|
||||||
print "== Entry %d ipath %s (mimetype [%s]):" % \
|
debprint("== Entry %d ipath %s (mimetype [%s]):" % \
|
||||||
(ecnt, ipath, proto.mimetype)
|
(ecnt, ipath, proto.mimetype))
|
||||||
if isinstance(data, unicode):
|
if isinstance(data, unicode):
|
||||||
bdata = data.encode("UTF-8")
|
bdata = data.encode("UTF-8")
|
||||||
else:
|
|
||||||
bdata = data
|
|
||||||
#sys.stdout.write(bdata)
|
|
||||||
print
|
|
||||||
if eof != RclExecM.noteof:
|
|
||||||
break
|
|
||||||
else:
|
else:
|
||||||
print "Not ok, eof %d" % eof
|
bdata = data
|
||||||
break
|
if debugDumpData:
|
||||||
|
sys.stdout.write(bdata)
|
||||||
|
print()
|
||||||
|
if eof != RclExecM.noteof:
|
||||||
|
sys.exit(0)
|
||||||
|
else:
|
||||||
|
print("Not ok, eof %d" % eof, file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
# Not sure this makes sense, but going on looping certainly does not
|
||||||
|
if actAsSingle:
|
||||||
|
sys.exit(0)
|
||||||
|
|||||||
@ -147,6 +147,9 @@ if ($@) {
|
|||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
binmode(STDIN) || die "cannot binmode STDIN";
|
||||||
|
binmode(STDOUT) || die "cannot binmode STDOUT";
|
||||||
|
|
||||||
#print STDERR "RCLIMG: Starting\n";
|
#print STDERR "RCLIMG: Starting\n";
|
||||||
$| = 1;
|
$| = 1;
|
||||||
while (1) {
|
while (1) {
|
||||||
|
|||||||
@ -109,7 +109,7 @@ class European8859TextClassifier:
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
f = open(sys.argv[1])
|
f = open(sys.argv[1], "rb")
|
||||||
rawtext = f.read()
|
rawtext = f.read()
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
|
|||||||
@ -1,9 +0,0 @@
|
|||||||
#!/bin/sh
|
|
||||||
# It may make sense in some cases to set this null filter (no output)
|
|
||||||
# instead of using recoll_noindex or leaving the default filter in
|
|
||||||
# case one doesn't want to install it: this will avoid endless retries
|
|
||||||
# to reindex the affected files, as recoll will think it has succeeded
|
|
||||||
# indexing them. Downside: the files won't be indexed when one
|
|
||||||
# actually installs the real filter, will need a -z
|
|
||||||
|
|
||||||
exit 0
|
|
||||||
224
src/filters/rclopxml.py
Executable file
224
src/filters/rclopxml.py
Executable file
@ -0,0 +1,224 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# Copyright (C) 2015 J.F.Dockes
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the
|
||||||
|
# Free Software Foundation, Inc.,
|
||||||
|
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
######################################
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import rclexecm
|
||||||
|
import rclxslt
|
||||||
|
import fnmatch
|
||||||
|
from zipfile import ZipFile
|
||||||
|
|
||||||
|
meta_stylesheet = '''<?xml version="1.0"?>
|
||||||
|
<xsl:stylesheet
|
||||||
|
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
|
||||||
|
xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties"
|
||||||
|
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||||
|
xmlns:dcterms="http://purl.org/dc/terms/"
|
||||||
|
xmlns:dcmitype="http://purl.org/dc/dcmitype/"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
||||||
|
|
||||||
|
<!-- <xsl:output method="text"/> -->
|
||||||
|
<xsl:output omit-xml-declaration="yes"/>
|
||||||
|
|
||||||
|
<xsl:template match="cp:coreProperties">
|
||||||
|
<xsl:text> </xsl:text>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
|
||||||
|
<xsl:text> </xsl:text>
|
||||||
|
<xsl:apply-templates/>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="dc:creator">
|
||||||
|
<meta>
|
||||||
|
<xsl:attribute name="name">
|
||||||
|
<!-- <xsl:value-of select="name()"/> pour sortir tous les meta avec
|
||||||
|
le meme nom que dans le xml (si on devenait dc-natif) -->
|
||||||
|
<xsl:text>author</xsl:text>
|
||||||
|
</xsl:attribute>
|
||||||
|
<xsl:attribute name="content">
|
||||||
|
<xsl:value-of select="."/>
|
||||||
|
</xsl:attribute>
|
||||||
|
</meta>
|
||||||
|
<xsl:text> </xsl:text>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="dcterms:modified">
|
||||||
|
<meta>
|
||||||
|
<xsl:attribute name="name">
|
||||||
|
<xsl:text>date</xsl:text>
|
||||||
|
</xsl:attribute>
|
||||||
|
<xsl:attribute name="content">
|
||||||
|
<xsl:value-of select="."/>
|
||||||
|
</xsl:attribute>
|
||||||
|
</meta>
|
||||||
|
<xsl:text> </xsl:text>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="*">
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
</xsl:stylesheet>
|
||||||
|
'''
|
||||||
|
|
||||||
|
word_tagmatch = 'w:p'
|
||||||
|
word_xmlns_decls = '''xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
|
||||||
|
xmlns:ve="http://schemas.openxmlformats.org/markup-compatibility/2006"
|
||||||
|
xmlns:o="urn:schemas-microsoft-com:office:office"
|
||||||
|
xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
|
||||||
|
xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math"
|
||||||
|
xmlns:v="urn:schemas-microsoft-com:vml"
|
||||||
|
xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
|
||||||
|
xmlns:w10="urn:schemas-microsoft-com:office:word"
|
||||||
|
xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
|
||||||
|
xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml"
|
||||||
|
'''
|
||||||
|
word_moretemplates = ''
|
||||||
|
|
||||||
|
|
||||||
|
xl_tagmatch = 'x:t'
|
||||||
|
xl_xmlns_decls='''xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
|
||||||
|
xmlns:x="http://schemas.openxmlformats.org/spreadsheetml/2006/main"
|
||||||
|
'''
|
||||||
|
xl_moretemplates = ''
|
||||||
|
|
||||||
|
pp_tagmatch = 'a:t'
|
||||||
|
pp_xmlns_decls = '''xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
|
||||||
|
xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"
|
||||||
|
xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
|
||||||
|
xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main"
|
||||||
|
'''
|
||||||
|
# I want to suppress text output for all except a:t, don't know how to do it
|
||||||
|
# help ! At least get rid of these:
|
||||||
|
pp_moretemplates = '''<xsl:template match="p:attrName">
|
||||||
|
</xsl:template>
|
||||||
|
'''
|
||||||
|
|
||||||
|
content_stylesheet = '''<?xml version="1.0"?>
|
||||||
|
<xsl:stylesheet @XMLNS_DECLS@ >
|
||||||
|
|
||||||
|
<xsl:output omit-xml-declaration="yes"/>
|
||||||
|
|
||||||
|
<xsl:template match="/">
|
||||||
|
<div>
|
||||||
|
<xsl:apply-templates/>
|
||||||
|
</div>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="@TAGMATCH@">
|
||||||
|
<p>
|
||||||
|
<xsl:value-of select="."/>
|
||||||
|
</p>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
@MORETEMPLATES@
|
||||||
|
|
||||||
|
</xsl:stylesheet>
|
||||||
|
'''
|
||||||
|
|
||||||
|
class OXExtractor:
|
||||||
|
def __init__(self, em):
|
||||||
|
self.em = em
|
||||||
|
self.currentindex = 0
|
||||||
|
|
||||||
|
# Replace values inside data style sheet, depending on type of doc
|
||||||
|
def computestylesheet(self, nm):
|
||||||
|
decls = globals()[nm + '_xmlns_decls']
|
||||||
|
stylesheet = content_stylesheet.replace('@XMLNS_DECLS@', decls)
|
||||||
|
tagmatch = globals()[nm + '_tagmatch']
|
||||||
|
stylesheet = stylesheet.replace('@TAGMATCH@', tagmatch)
|
||||||
|
moretmpl = globals()[nm + '_moretemplates']
|
||||||
|
stylesheet = stylesheet.replace('@MORETEMPLATES@', moretmpl)
|
||||||
|
|
||||||
|
return stylesheet
|
||||||
|
|
||||||
|
def extractone(self, params):
|
||||||
|
if not params.has_key("filename:"):
|
||||||
|
self.em.rclog("extractone: no mime or file name")
|
||||||
|
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
|
fn = params["filename:"]
|
||||||
|
|
||||||
|
try:
|
||||||
|
zip = ZipFile(fn)
|
||||||
|
except Exception as err:
|
||||||
|
self.em.rclog("unzip failed: " + str(err))
|
||||||
|
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
|
|
||||||
|
docdata = '<html><head>'
|
||||||
|
|
||||||
|
try:
|
||||||
|
metadata = zip.read("docProps/core.xml")
|
||||||
|
if metadata:
|
||||||
|
res = rclxslt.apply_sheet_data(meta_stylesheet, metadata)
|
||||||
|
docdata += res
|
||||||
|
except:
|
||||||
|
# To be checked. I'm under the impression that I get this when
|
||||||
|
# nothing matches?
|
||||||
|
# self.em.rclog("no/bad metadata in %s" % fn)
|
||||||
|
pass
|
||||||
|
|
||||||
|
docdata += '</head><body>'
|
||||||
|
|
||||||
|
try:
|
||||||
|
content= zip.read('word/document.xml')
|
||||||
|
stl = self.computestylesheet('word')
|
||||||
|
docdata += rclxslt.apply_sheet_data(stl, content)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
content = zip.read('xl/sharedStrings.xml')
|
||||||
|
stl = self.computestylesheet('xl')
|
||||||
|
docdata += rclxslt.apply_sheet_data(stl, content)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
stl = self.computestylesheet('pp')
|
||||||
|
# Note that we'd need a numeric sort really (else we get slide1
|
||||||
|
# slide11 slide2)
|
||||||
|
for fn in sorted(zip.namelist()):
|
||||||
|
if fnmatch.fnmatch(fn, 'ppt/slides/slide*.xml'):
|
||||||
|
content = zip.read(fn)
|
||||||
|
docdata += rclxslt.apply_sheet_data(stl, content)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
docdata += '</body></html>'
|
||||||
|
|
||||||
|
return (True, docdata, "", rclexecm.RclExecM.eofnext)
|
||||||
|
|
||||||
|
|
||||||
|
###### File type handler api, used by rclexecm ---------->
|
||||||
|
def openfile(self, params):
|
||||||
|
self.currentindex = 0
|
||||||
|
return True
|
||||||
|
|
||||||
|
def getipath(self, params):
|
||||||
|
return self.extractone(params)
|
||||||
|
|
||||||
|
def getnext(self, params):
|
||||||
|
if self.currentindex >= 1:
|
||||||
|
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
|
else:
|
||||||
|
ret= self.extractone(params)
|
||||||
|
self.currentindex += 1
|
||||||
|
return ret
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
proto = rclexecm.RclExecM()
|
||||||
|
extract = OXExtractor(proto)
|
||||||
|
rclexecm.main(proto, extract)
|
||||||
58
src/filters/rclppt.py
Executable file
58
src/filters/rclppt.py
Executable file
@ -0,0 +1,58 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import rclexecm
|
||||||
|
import rclexec1
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
# Processing the output from unrtf
|
||||||
|
class PPTProcessData:
|
||||||
|
def __init__(self, em):
|
||||||
|
self.em = em
|
||||||
|
self.out = ""
|
||||||
|
self.gotdata = 0
|
||||||
|
|
||||||
|
# Some versions of unrtf put out a garbled charset line.
|
||||||
|
# Apart from this, we pass the data untouched.
|
||||||
|
def takeLine(self, line):
|
||||||
|
if not self.gotdata:
|
||||||
|
self.out += '''<html><head>''' + \
|
||||||
|
'''<meta http-equiv="Content-Type" ''' + \
|
||||||
|
'''content="text/html;charset=UTF-8">''' + \
|
||||||
|
'''</head><body><pre>'''
|
||||||
|
self.gotdata = True
|
||||||
|
self.out += self.em.htmlescape(line)
|
||||||
|
|
||||||
|
def wrapData(self):
|
||||||
|
return self.out + '''</pre></body></html>'''
|
||||||
|
|
||||||
|
class PPTFilter:
|
||||||
|
def __init__(self, em):
|
||||||
|
self.em = em
|
||||||
|
self.ntry = 0
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
self.ntry = 0
|
||||||
|
pass
|
||||||
|
|
||||||
|
def getCmd(self, fn):
|
||||||
|
if self.ntry:
|
||||||
|
return ([], None)
|
||||||
|
self.ntry = 1
|
||||||
|
cmd = rclexecm.which("ppt-dump.py")
|
||||||
|
if cmd:
|
||||||
|
# ppt-dump.py often exits 1 with valid data. Ignore exit value
|
||||||
|
return (["python", cmd, "--no-struct-output", "--dump-text"],
|
||||||
|
PPTProcessData(self.em), rclexec1.Executor.opt_ignxval)
|
||||||
|
else:
|
||||||
|
return ([], None)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
if not rclexecm.which("ppt-dump.py"):
|
||||||
|
print("RECFILTERROR HELPERNOTFOUND ppt-dump.py")
|
||||||
|
sys.exit(1)
|
||||||
|
proto = rclexecm.RclExecM()
|
||||||
|
filter = PPTFilter(proto)
|
||||||
|
extract = rclexec1.Executor(proto, filter)
|
||||||
|
rclexecm.main(proto, extract)
|
||||||
60
src/filters/rclrtf.py
Executable file
60
src/filters/rclrtf.py
Executable file
@ -0,0 +1,60 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import rclexecm
|
||||||
|
import rclexec1
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
# Processing the output from unrtf
|
||||||
|
class RTFProcessData:
|
||||||
|
def __init__(self, em):
|
||||||
|
self.em = em
|
||||||
|
self.out = ""
|
||||||
|
self.gothead = 0
|
||||||
|
self.patendhead = re.compile('''</head>''')
|
||||||
|
self.patcharset = re.compile('''^<meta http-equiv=''')
|
||||||
|
|
||||||
|
# Some versions of unrtf put out a garbled charset line.
|
||||||
|
# Apart from this, we pass the data untouched.
|
||||||
|
def takeLine(self, line):
|
||||||
|
if not self.gothead:
|
||||||
|
if self.patendhead.search(line):
|
||||||
|
self.out += '<meta http-equiv="Content-Type" ' + \
|
||||||
|
'content="text/html;charset=UTF-8">' + "\n"
|
||||||
|
self.out += line + "\n"
|
||||||
|
self.gothead = 1
|
||||||
|
elif not self.patcharset.search(line):
|
||||||
|
self.out += line + "\n"
|
||||||
|
else:
|
||||||
|
self.out += line + "\n"
|
||||||
|
|
||||||
|
def wrapData(self):
|
||||||
|
return self.out
|
||||||
|
|
||||||
|
class RTFFilter:
|
||||||
|
def __init__(self, em):
|
||||||
|
self.em = em
|
||||||
|
self.ntry = 0
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
self.ntry = 0
|
||||||
|
|
||||||
|
def getCmd(self, fn):
|
||||||
|
if self.ntry:
|
||||||
|
return ([], None)
|
||||||
|
self.ntry = 1
|
||||||
|
cmd = rclexecm.which("unrtf")
|
||||||
|
if cmd:
|
||||||
|
return ([cmd, "--nopict", "--html"], RTFProcessData(self.em))
|
||||||
|
else:
|
||||||
|
return ([], None)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
if not rclexecm.which("unrtf"):
|
||||||
|
print("RECFILTERROR HELPERNOTFOUND antiword")
|
||||||
|
sys.exit(1)
|
||||||
|
proto = rclexecm.RclExecM()
|
||||||
|
filter = RTFFilter(proto)
|
||||||
|
extract = rclexec1.Executor(proto, filter)
|
||||||
|
rclexecm.main(proto, extract)
|
||||||
189
src/filters/rclsoff.py
Executable file
189
src/filters/rclsoff.py
Executable file
@ -0,0 +1,189 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# Copyright (C) 2014 J.F.Dockes
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the
|
||||||
|
# Free Software Foundation, Inc.,
|
||||||
|
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
######################################
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import rclexecm
|
||||||
|
import rclxslt
|
||||||
|
from zipfile import ZipFile
|
||||||
|
|
||||||
|
stylesheet_meta = '''<?xml version="1.0"?>
|
||||||
|
<xsl:stylesheet version="1.0"
|
||||||
|
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||||
|
xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0"
|
||||||
|
xmlns:xlink="http://www.w3.org/1999/xlink"
|
||||||
|
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||||
|
xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0"
|
||||||
|
xmlns:ooo="http://openoffice.org/2004/office"
|
||||||
|
exclude-result-prefixes="office xlink meta ooo dc"
|
||||||
|
>
|
||||||
|
|
||||||
|
<xsl:output method="html" encoding="UTF-8"/>
|
||||||
|
|
||||||
|
<xsl:template match="/office:document-meta">
|
||||||
|
<xsl:apply-templates select="office:meta/dc:description"/>
|
||||||
|
<xsl:apply-templates select="office:meta/dc:subject"/>
|
||||||
|
<xsl:apply-templates select="office:meta/dc:title"/>
|
||||||
|
<xsl:apply-templates select="office:meta/meta:keyword"/>
|
||||||
|
<xsl:apply-templates select="office:meta/dc:creator"/>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="dc:title">
|
||||||
|
<title> <xsl:value-of select="."/> </title><xsl:text>
|
||||||
|
</xsl:text>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="dc:description">
|
||||||
|
<meta>
|
||||||
|
<xsl:attribute name="name">abstract</xsl:attribute>
|
||||||
|
<xsl:attribute name="content">
|
||||||
|
<xsl:value-of select="."/>
|
||||||
|
</xsl:attribute>
|
||||||
|
</meta><xsl:text>
|
||||||
|
</xsl:text>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="dc:subject">
|
||||||
|
<meta>
|
||||||
|
<xsl:attribute name="name">keywords</xsl:attribute>
|
||||||
|
<xsl:attribute name="content">
|
||||||
|
<xsl:value-of select="."/>
|
||||||
|
</xsl:attribute>
|
||||||
|
</meta><xsl:text>
|
||||||
|
</xsl:text>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="dc:creator">
|
||||||
|
<meta>
|
||||||
|
<xsl:attribute name="name">author</xsl:attribute>
|
||||||
|
<xsl:attribute name="content">
|
||||||
|
<xsl:value-of select="."/>
|
||||||
|
</xsl:attribute>
|
||||||
|
</meta><xsl:text>
|
||||||
|
</xsl:text>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="meta:keyword">
|
||||||
|
<meta>
|
||||||
|
<xsl:attribute name="name">keywords</xsl:attribute>
|
||||||
|
<xsl:attribute name="content">
|
||||||
|
<xsl:value-of select="."/>
|
||||||
|
</xsl:attribute>
|
||||||
|
</meta><xsl:text>
|
||||||
|
</xsl:text>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
</xsl:stylesheet>
|
||||||
|
'''
|
||||||
|
|
||||||
|
stylesheet_content = '''<?xml version="1.0"?>
|
||||||
|
<xsl:stylesheet version="1.0"
|
||||||
|
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||||
|
xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0"
|
||||||
|
exclude-result-prefixes="text"
|
||||||
|
>
|
||||||
|
|
||||||
|
<xsl:output method="html" encoding="UTF-8"/>
|
||||||
|
|
||||||
|
<xsl:template match="text:p">
|
||||||
|
<p><xsl:apply-templates/></p><xsl:text>
|
||||||
|
</xsl:text>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="text:h">
|
||||||
|
<p><xsl:apply-templates/></p><xsl:text>
|
||||||
|
</xsl:text>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="text:s">
|
||||||
|
<xsl:text> </xsl:text>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="text:line-break">
|
||||||
|
<br />
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="text:tab">
|
||||||
|
<xsl:text> </xsl:text>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
</xsl:stylesheet>
|
||||||
|
'''
|
||||||
|
|
||||||
|
class OOExtractor:
|
||||||
|
def __init__(self, em):
|
||||||
|
self.em = em
|
||||||
|
self.currentindex = 0
|
||||||
|
|
||||||
|
def extractone(self, params):
|
||||||
|
if not params.has_key("filename:"):
|
||||||
|
self.em.rclog("extractone: no mime or file name")
|
||||||
|
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
|
fn = params["filename:"]
|
||||||
|
|
||||||
|
try:
|
||||||
|
zip = ZipFile(fn)
|
||||||
|
except Exception as err:
|
||||||
|
self.em.rclog("unzip failed: " + str(err))
|
||||||
|
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
|
|
||||||
|
docdata = '<html><head><meta http-equiv="Content-Type"' \
|
||||||
|
'content="text/html; charset=UTF-8"></head><body>'
|
||||||
|
|
||||||
|
try:
|
||||||
|
metadata = zip.read("meta.xml")
|
||||||
|
if metadata:
|
||||||
|
res = rclxslt.apply_sheet_data(stylesheet_meta, metadata)
|
||||||
|
docdata += res
|
||||||
|
except:
|
||||||
|
# To be checked. I'm under the impression that I get this when
|
||||||
|
# nothing matches?
|
||||||
|
#self.em.rclog("no/bad metadata in %s" % fn)
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
content = zip.read("content.xml")
|
||||||
|
if content:
|
||||||
|
res = rclxslt.apply_sheet_data(stylesheet_content, content)
|
||||||
|
docdata += res
|
||||||
|
docdata += '</body></html>'
|
||||||
|
except Exception as err:
|
||||||
|
self.em.rclog("bad data in %s" % fn)
|
||||||
|
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
|
|
||||||
|
return (True, docdata, "", rclexecm.RclExecM.eofnext)
|
||||||
|
|
||||||
|
###### File type handler api, used by rclexecm ---------->
|
||||||
|
def openfile(self, params):
|
||||||
|
self.currentindex = 0
|
||||||
|
return True
|
||||||
|
|
||||||
|
def getipath(self, params):
|
||||||
|
return self.extractone(params)
|
||||||
|
|
||||||
|
def getnext(self, params):
|
||||||
|
if self.currentindex >= 1:
|
||||||
|
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
|
else:
|
||||||
|
ret= self.extractone(params)
|
||||||
|
self.currentindex += 1
|
||||||
|
return ret
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
proto = rclexecm.RclExecM()
|
||||||
|
extract = OOExtractor(proto)
|
||||||
|
rclexecm.main(proto, extract)
|
||||||
140
src/filters/rclsvg.py
Executable file
140
src/filters/rclsvg.py
Executable file
@ -0,0 +1,140 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# Copyright (C) 2014 J.F.Dockes
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the
|
||||||
|
# Free Software Foundation, Inc.,
|
||||||
|
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
######################################
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import rclexecm
|
||||||
|
import rclxslt
|
||||||
|
|
||||||
|
stylesheet_all = '''<?xml version="1.0"?>
|
||||||
|
<xsl:stylesheet version="1.0"
|
||||||
|
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||||
|
xmlns:svg="http://www.w3.org/2000/svg"
|
||||||
|
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||||
|
exclude-result-prefixes="svg"
|
||||||
|
>
|
||||||
|
|
||||||
|
<xsl:output method="html" encoding="UTF-8"/>
|
||||||
|
|
||||||
|
<xsl:template match="/">
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<xsl:apply-templates select="svg:svg/svg:title"/>
|
||||||
|
<xsl:apply-templates select="svg:svg/svg:desc"/>
|
||||||
|
<xsl:apply-templates select="svg:svg/svg:metadata/descendant::dc:creator"/>
|
||||||
|
<xsl:apply-templates select="svg:svg/svg:metadata/descendant::dc:subject"/>
|
||||||
|
<xsl:apply-templates select="svg:svg/svg:metadata/descendant::dc:description"/>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<xsl:apply-templates select="//svg:text"/>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="svg:desc">
|
||||||
|
<meta>
|
||||||
|
<xsl:attribute name="name">keywords</xsl:attribute>
|
||||||
|
<xsl:attribute name="content">
|
||||||
|
<xsl:value-of select="."/>
|
||||||
|
</xsl:attribute>
|
||||||
|
</meta><xsl:text>
|
||||||
|
</xsl:text>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="dc:creator">
|
||||||
|
<meta>
|
||||||
|
<xsl:attribute name="name">author</xsl:attribute>
|
||||||
|
<xsl:attribute name="content">
|
||||||
|
<xsl:value-of select="."/>
|
||||||
|
</xsl:attribute>
|
||||||
|
</meta><xsl:text>
|
||||||
|
</xsl:text>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="dc:subject">
|
||||||
|
<meta>
|
||||||
|
<xsl:attribute name="name">keywords</xsl:attribute>
|
||||||
|
<xsl:attribute name="content">
|
||||||
|
<xsl:value-of select="."/>
|
||||||
|
</xsl:attribute>
|
||||||
|
</meta><xsl:text>
|
||||||
|
</xsl:text>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="dc:description">
|
||||||
|
<meta>
|
||||||
|
<xsl:attribute name="name">description</xsl:attribute>
|
||||||
|
<xsl:attribute name="content">
|
||||||
|
<xsl:value-of select="."/>
|
||||||
|
</xsl:attribute>
|
||||||
|
</meta><xsl:text>
|
||||||
|
</xsl:text>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="svg:title">
|
||||||
|
<title><xsl:value-of select="."/></title><xsl:text>
|
||||||
|
</xsl:text>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="svg:text">
|
||||||
|
<p><xsl:value-of select="."/></p><xsl:text>
|
||||||
|
</xsl:text>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
</xsl:stylesheet>
|
||||||
|
'''
|
||||||
|
|
||||||
|
class SVGExtractor:
|
||||||
|
def __init__(self, em):
|
||||||
|
self.em = em
|
||||||
|
self.currentindex = 0
|
||||||
|
|
||||||
|
def extractone(self, params):
|
||||||
|
if not params.has_key("filename:"):
|
||||||
|
self.em.rclog("extractone: no mime or file name")
|
||||||
|
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
|
fn = params["filename:"]
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = open(fn, 'rb').read()
|
||||||
|
docdata = rclxslt.apply_sheet_data(stylesheet_all, data)
|
||||||
|
except Exception as err:
|
||||||
|
self.em.rclog("%s: bad data: " % (fn, err))
|
||||||
|
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
|
|
||||||
|
return (True, docdata, "", rclexecm.RclExecM.eofnow)
|
||||||
|
|
||||||
|
###### File type handler api, used by rclexecm ---------->
|
||||||
|
def openfile(self, params):
|
||||||
|
self.currentindex = 0
|
||||||
|
return True
|
||||||
|
|
||||||
|
def getipath(self, params):
|
||||||
|
return self.extractone(params)
|
||||||
|
|
||||||
|
def getnext(self, params):
|
||||||
|
if self.currentindex >= 1:
|
||||||
|
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
|
else:
|
||||||
|
ret= self.extractone(params)
|
||||||
|
self.currentindex += 1
|
||||||
|
return ret
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
proto = rclexecm.RclExecM()
|
||||||
|
extract = SVGExtractor(proto)
|
||||||
|
rclexecm.main(proto, extract)
|
||||||
51
src/filters/rcltext.py
Executable file
51
src/filters/rcltext.py
Executable file
@ -0,0 +1,51 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import rclexecm
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# Wrapping a text file. Recoll does it internally in most cases, but
|
||||||
|
# there is a reason this exists, just can't remember it ...
|
||||||
|
class TxtDump:
|
||||||
|
def __init__(self, em):
|
||||||
|
self.em = em
|
||||||
|
|
||||||
|
def extractone(self, params):
|
||||||
|
#self.em.rclog("extractone %s %s" % (params["filename:"], \
|
||||||
|
#params["mimetype:"]))
|
||||||
|
if not params.has_key("filename:"):
|
||||||
|
self.em.rclog("extractone: no file name")
|
||||||
|
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
|
|
||||||
|
fn = params["filename:"]
|
||||||
|
# No charset, so recoll will have to use its config to guess it
|
||||||
|
txt = '<html><head><title></title></head><body><pre>'
|
||||||
|
try:
|
||||||
|
f = open(fn, "rb")
|
||||||
|
txt += self.em.htmlescape(f.read())
|
||||||
|
except Exception as err:
|
||||||
|
self.em.rclog("TxtDump: %s : %s" % (fn, err))
|
||||||
|
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
|
|
||||||
|
txt += '</pre></body></html>'
|
||||||
|
return (True, txt, "", rclexecm.RclExecM.eofnext)
|
||||||
|
|
||||||
|
###### File type handler api, used by rclexecm ---------->
|
||||||
|
def openfile(self, params):
|
||||||
|
self.currentindex = 0
|
||||||
|
return True
|
||||||
|
|
||||||
|
def getipath(self, params):
|
||||||
|
return self.extractone(params)
|
||||||
|
|
||||||
|
def getnext(self, params):
|
||||||
|
if self.currentindex >= 1:
|
||||||
|
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
|
else:
|
||||||
|
ret= self.extractone(params)
|
||||||
|
self.currentindex += 1
|
||||||
|
return ret
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
proto = rclexecm.RclExecM()
|
||||||
|
extract = TxtDump(proto)
|
||||||
|
rclexecm.main(proto, extract)
|
||||||
65
src/filters/rclxls.py
Executable file
65
src/filters/rclxls.py
Executable file
@ -0,0 +1,65 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import rclexecm
|
||||||
|
import rclexec1
|
||||||
|
import xlsxmltocsv
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import xml.sax
|
||||||
|
|
||||||
|
# Processing the output from unrtf
|
||||||
|
class XLSProcessData:
|
||||||
|
def __init__(self, em):
|
||||||
|
self.em = em
|
||||||
|
self.out = ""
|
||||||
|
self.gotdata = 0
|
||||||
|
self.xmldata = ""
|
||||||
|
|
||||||
|
# Some versions of unrtf put out a garbled charset line.
|
||||||
|
# Apart from this, we pass the data untouched.
|
||||||
|
def takeLine(self, line):
|
||||||
|
if not self.gotdata:
|
||||||
|
self.out += '''<html><head>''' + \
|
||||||
|
'''<meta http-equiv="Content-Type" ''' + \
|
||||||
|
'''content="text/html;charset=UTF-8">''' + \
|
||||||
|
'''</head><body><pre>'''
|
||||||
|
self.gotdata = True
|
||||||
|
self.xmldata += line
|
||||||
|
|
||||||
|
def wrapData(self):
|
||||||
|
handler = xlsxmltocsv.XlsXmlHandler()
|
||||||
|
data = xml.sax.parseString(self.xmldata, handler)
|
||||||
|
self.out += self.em.htmlescape(handler.output)
|
||||||
|
return self.out + '''</pre></body></html>'''
|
||||||
|
|
||||||
|
class XLSFilter:
|
||||||
|
def __init__(self, em):
|
||||||
|
self.em = em
|
||||||
|
self.ntry = 0
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
self.ntry = 0
|
||||||
|
pass
|
||||||
|
|
||||||
|
def getCmd(self, fn):
|
||||||
|
if self.ntry:
|
||||||
|
return ([], None)
|
||||||
|
self.ntry = 1
|
||||||
|
cmd = rclexecm.which("xls-dump.py")
|
||||||
|
if cmd:
|
||||||
|
# xls-dump.py often exits 1 with valid data. Ignore exit value
|
||||||
|
return (["python", cmd, "--dump-mode=canonical-xml", \
|
||||||
|
"--utf-8", "--catch"],
|
||||||
|
XLSProcessData(self.em), rclexec1.Executor.opt_ignxval)
|
||||||
|
else:
|
||||||
|
return ([], None)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
if not rclexecm.which("ppt-dump.py"):
|
||||||
|
print("RECFILTERROR HELPERNOTFOUND ppt-dump.py")
|
||||||
|
sys.exit(1)
|
||||||
|
proto = rclexecm.RclExecM()
|
||||||
|
filter = XLSFilter(proto)
|
||||||
|
extract = rclexec1.Executor(proto, filter)
|
||||||
|
rclexecm.main(proto, extract)
|
||||||
98
src/filters/rclxml.py
Executable file
98
src/filters/rclxml.py
Executable file
@ -0,0 +1,98 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# Copyright (C) 2014 J.F.Dockes
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the
|
||||||
|
# Free Software Foundation, Inc.,
|
||||||
|
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
######################################
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import rclexecm
|
||||||
|
import rclxslt
|
||||||
|
|
||||||
|
stylesheet_all = '''<?xml version="1.0"?>
|
||||||
|
<xsl:stylesheet version="1.0"
|
||||||
|
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
|
||||||
|
|
||||||
|
<xsl:output method="html" encoding="UTF-8"/>
|
||||||
|
|
||||||
|
<xsl:template match="/">
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<xsl:if test="//*[local-name() = 'title']">
|
||||||
|
<title>
|
||||||
|
<xsl:value-of select="//*[local-name() = 'title'][1]"/>
|
||||||
|
</title>
|
||||||
|
</xsl:if>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<xsl:apply-templates/>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="text()">
|
||||||
|
<xsl:if test="string-length(normalize-space(.)) > 0">
|
||||||
|
<p><xsl:value-of select="."/></p>
|
||||||
|
<xsl:text>
|
||||||
|
</xsl:text>
|
||||||
|
</xsl:if>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="*">
|
||||||
|
<xsl:apply-templates/>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
</xsl:stylesheet>
|
||||||
|
'''
|
||||||
|
|
||||||
|
class XMLExtractor:
|
||||||
|
def __init__(self, em):
|
||||||
|
self.em = em
|
||||||
|
self.currentindex = 0
|
||||||
|
|
||||||
|
def extractone(self, params):
|
||||||
|
if not params.has_key("filename:"):
|
||||||
|
self.em.rclog("extractone: no mime or file name")
|
||||||
|
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
|
fn = params["filename:"]
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = open(fn, 'rb').read()
|
||||||
|
docdata = rclxslt.apply_sheet_data(stylesheet_all, data)
|
||||||
|
except Exception as err:
|
||||||
|
self.em.rclog("%s: bad data: " % (fn, err))
|
||||||
|
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
|
|
||||||
|
return (True, docdata, "", rclexecm.RclExecM.eofnow)
|
||||||
|
|
||||||
|
###### File type handler api, used by rclexecm ---------->
|
||||||
|
def openfile(self, params):
|
||||||
|
self.currentindex = 0
|
||||||
|
return True
|
||||||
|
|
||||||
|
def getipath(self, params):
|
||||||
|
return self.extractone(params)
|
||||||
|
|
||||||
|
def getnext(self, params):
|
||||||
|
if self.currentindex >= 1:
|
||||||
|
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
|
else:
|
||||||
|
ret= self.extractone(params)
|
||||||
|
self.currentindex += 1
|
||||||
|
return ret
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
proto = rclexecm.RclExecM()
|
||||||
|
extract = XMLExtractor(proto)
|
||||||
|
rclexecm.main(proto, extract)
|
||||||
52
src/filters/rclxslt.py
Normal file
52
src/filters/rclxslt.py
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
# Copyright (C) 2014 J.F.Dockes
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the
|
||||||
|
# Free Software Foundation, Inc.,
|
||||||
|
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
######################################
|
||||||
|
|
||||||
|
# Helper module for xslt-based filters
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
try:
|
||||||
|
import libxml2
|
||||||
|
import libxslt
|
||||||
|
except:
|
||||||
|
print "RECFILTERROR HELPERNOTFOUND python:libxml2/python:libxslt1"
|
||||||
|
sys.exit(1);
|
||||||
|
|
||||||
|
libxml2.substituteEntitiesDefault(1)
|
||||||
|
|
||||||
|
def apply_sheet_data(sheet, data):
|
||||||
|
styledoc = libxml2.parseMemory(sheet, len(sheet))
|
||||||
|
style = libxslt.parseStylesheetDoc(styledoc)
|
||||||
|
doc = libxml2.parseMemory(data, len(data))
|
||||||
|
result = style.applyStylesheet(doc, None)
|
||||||
|
res = style.saveResultToString(result)
|
||||||
|
style.freeStylesheet()
|
||||||
|
doc.freeDoc()
|
||||||
|
result.freeDoc()
|
||||||
|
return res
|
||||||
|
|
||||||
|
def apply_sheet_file(sheet, fn):
|
||||||
|
styledoc = libxml2.parseMemory(sheet, len(sheet))
|
||||||
|
style = libxslt.parseStylesheetDoc(styledoc)
|
||||||
|
doc = libxml2.parseFile(fn)
|
||||||
|
result = style.applyStylesheet(doc, None)
|
||||||
|
res = style.saveResultToString(result)
|
||||||
|
style.freeStylesheet()
|
||||||
|
doc.freeDoc()
|
||||||
|
result.freeDoc()
|
||||||
|
return res
|
||||||
|
|
||||||
@ -15,10 +15,13 @@ else:
|
|||||||
dquote = '"'
|
dquote = '"'
|
||||||
|
|
||||||
class XlsXmlHandler(xml.sax.handler.ContentHandler):
|
class XlsXmlHandler(xml.sax.handler.ContentHandler):
|
||||||
|
def __init__(self):
|
||||||
|
self.output = ""
|
||||||
|
|
||||||
def startElement(self, name, attrs):
|
def startElement(self, name, attrs):
|
||||||
if name == "worksheet":
|
if name == "worksheet":
|
||||||
if "name" in attrs:
|
if "name" in attrs:
|
||||||
print("%s" % attrs["name"].encode("UTF-8"))
|
self.output += "%s\n" % attrs["name"].encode("UTF-8")
|
||||||
elif name == "row":
|
elif name == "row":
|
||||||
self.cells = dict()
|
self.cells = dict()
|
||||||
elif name == "label-cell" or name == "number-cell":
|
elif name == "label-cell" or name == "number-cell":
|
||||||
@ -30,7 +33,7 @@ class XlsXmlHandler(xml.sax.handler.ContentHandler):
|
|||||||
self.cells[int(attrs["col"])] = value
|
self.cells[int(attrs["col"])] = value
|
||||||
else:
|
else:
|
||||||
#??
|
#??
|
||||||
sys.stdout.write("%s%s"%(value.encode("UTF-8"),sepstring))
|
self.output += "%s%s" % (value.encode("UTF-8"), sepstring)
|
||||||
elif name == "formula-cell":
|
elif name == "formula-cell":
|
||||||
if "formula-result" in attrs and "col" in attrs:
|
if "formula-result" in attrs and "col" in attrs:
|
||||||
self.cells[int(attrs["col"])] = \
|
self.cells[int(attrs["col"])] = \
|
||||||
@ -40,17 +43,21 @@ class XlsXmlHandler(xml.sax.handler.ContentHandler):
|
|||||||
if name == "row":
|
if name == "row":
|
||||||
curidx = 0
|
curidx = 0
|
||||||
for idx, value in self.cells.iteritems():
|
for idx, value in self.cells.iteritems():
|
||||||
sys.stdout.write(sepstring * (idx - curidx))
|
self.output += sepstring * (idx - curidx)
|
||||||
sys.stdout.write('%s%s%s' % (dquote, value, dquote))
|
self.output += "%s%s%s" % (dquote, value, dquote)
|
||||||
curidx = idx
|
curidx = idx
|
||||||
sys.stdout.write("\n")
|
self.output += "\n"
|
||||||
elif name == "worksheet":
|
elif name == "worksheet":
|
||||||
print("")
|
self.output += "\n"
|
||||||
|
|
||||||
try:
|
|
||||||
xml.sax.parse(sys.stdin, XlsXmlHandler())
|
|
||||||
except BaseException as err:
|
|
||||||
error("xml-parse: %s\n" % (str(sys.exc_info()[:2]),))
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
sys.exit(0)
|
if __name__ == '__main__':
|
||||||
|
try:
|
||||||
|
handler = XlsXmlHandler()
|
||||||
|
xml.sax.parse(sys.stdin, handler)
|
||||||
|
print(handler.output)
|
||||||
|
except BaseException as err:
|
||||||
|
error("xml-parse: %s\n" % (str(sys.exc_info()[:2]),))
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
sys.exit(0)
|
||||||
|
|||||||
@ -14,9 +14,8 @@
|
|||||||
* Free Software Foundation, Inc.,
|
* Free Software Foundation, Inc.,
|
||||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
*/
|
*/
|
||||||
#ifdef HAVE_CONFIG_H
|
|
||||||
#include "autoconfig.h"
|
#include "autoconfig.h"
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "debuglog.h"
|
#include "debuglog.h"
|
||||||
|
|
||||||
@ -34,8 +33,10 @@ DocFetcher *docFetcherMake(const Rcl::Doc& idoc)
|
|||||||
idoc.getmeta(Rcl::Doc::keybcknd, &backend);
|
idoc.getmeta(Rcl::Doc::keybcknd, &backend);
|
||||||
if (backend.empty() || !backend.compare("FS")) {
|
if (backend.empty() || !backend.compare("FS")) {
|
||||||
return new FSDocFetcher;
|
return new FSDocFetcher;
|
||||||
|
#ifndef DISABLE_WEB_INDEXER
|
||||||
} else if (!backend.compare("BGL")) {
|
} else if (!backend.compare("BGL")) {
|
||||||
return new BGLDocFetcher;
|
return new BGLDocFetcher;
|
||||||
|
#endif
|
||||||
} else {
|
} else {
|
||||||
LOGERR(("DocFetcherFactory: unknown backend [%s]\n", backend.c_str()));
|
LOGERR(("DocFetcherFactory: unknown backend [%s]\n", backend.c_str()));
|
||||||
return 0;
|
return 0;
|
||||||
|
|||||||
@ -43,7 +43,6 @@
|
|||||||
#include "fileudi.h"
|
#include "fileudi.h"
|
||||||
#include "cancelcheck.h"
|
#include "cancelcheck.h"
|
||||||
#include "rclinit.h"
|
#include "rclinit.h"
|
||||||
#include "execmd.h"
|
|
||||||
#include "extrameta.h"
|
#include "extrameta.h"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
@ -145,13 +144,11 @@ FsIndexer::~FsIndexer()
|
|||||||
void *status;
|
void *status;
|
||||||
if (m_haveInternQ) {
|
if (m_haveInternQ) {
|
||||||
status = m_iwqueue.setTerminateAndWait();
|
status = m_iwqueue.setTerminateAndWait();
|
||||||
LOGDEB0(("FsIndexer: internfile wrkr status: %ld (1->ok)\n",
|
LOGDEB0(("FsIndexer: internfile wrkr status: %p (1->ok)\n", status));
|
||||||
long(status)));
|
|
||||||
}
|
}
|
||||||
if (m_haveSplitQ) {
|
if (m_haveSplitQ) {
|
||||||
status = m_dwqueue.setTerminateAndWait();
|
status = m_dwqueue.setTerminateAndWait();
|
||||||
LOGDEB0(("FsIndexer: dbupd worker status: %ld (1->ok)\n",
|
LOGDEB0(("FsIndexer: dbupd worker status: %p (1->ok)\n", status));
|
||||||
long(status)));
|
|
||||||
}
|
}
|
||||||
delete m_stableconfig;
|
delete m_stableconfig;
|
||||||
#endif // IDX_THREADS
|
#endif // IDX_THREADS
|
||||||
@ -259,7 +256,7 @@ static bool matchesSkipped(const vector<string>& tdl,
|
|||||||
string canonpath = path_canon(path);
|
string canonpath = path_canon(path);
|
||||||
string mpath = canonpath;
|
string mpath = canonpath;
|
||||||
string topdir;
|
string topdir;
|
||||||
while (mpath.length() > 1) {
|
while (!path_isroot(mpath)) { // we assume root not in skipped paths.
|
||||||
for (vector<string>::const_iterator it = tdl.begin();
|
for (vector<string>::const_iterator it = tdl.begin();
|
||||||
it != tdl.end(); it++) {
|
it != tdl.end(); it++) {
|
||||||
// the topdirs members are already canonized.
|
// the topdirs members are already canonized.
|
||||||
@ -281,7 +278,7 @@ static bool matchesSkipped(const vector<string>& tdl,
|
|||||||
mpath = path_getfather(mpath);
|
mpath = path_getfather(mpath);
|
||||||
// getfather normally returns a path ending with /, canonic
|
// getfather normally returns a path ending with /, canonic
|
||||||
// paths don't (except for '/' itself).
|
// paths don't (except for '/' itself).
|
||||||
if (!mpath.empty() && mpath[mpath.size()-1] == '/')
|
if (!path_isroot(mpath) && mpath[mpath.size()-1] == '/')
|
||||||
mpath.erase(mpath.size()-1);
|
mpath.erase(mpath.size()-1);
|
||||||
// should not be necessary, but lets be prudent. If the
|
// should not be necessary, but lets be prudent. If the
|
||||||
// path did not shorten, something is seriously amiss
|
// path did not shorten, something is seriously amiss
|
||||||
@ -330,7 +327,7 @@ bool FsIndexer::indexFiles(list<string>& files, int flags)
|
|||||||
{
|
{
|
||||||
LOGDEB(("FsIndexer::indexFiles\n"));
|
LOGDEB(("FsIndexer::indexFiles\n"));
|
||||||
m_noretryfailed = (flags & ConfIndexer::IxFNoRetryFailed) != 0;
|
m_noretryfailed = (flags & ConfIndexer::IxFNoRetryFailed) != 0;
|
||||||
int ret = false;
|
bool ret = false;
|
||||||
|
|
||||||
if (!init())
|
if (!init())
|
||||||
return false;
|
return false;
|
||||||
@ -703,7 +700,7 @@ FsIndexer::processonefile(RclConfig *config,
|
|||||||
}
|
}
|
||||||
|
|
||||||
LOGDEB0(("processone: processing: [%s] %s\n",
|
LOGDEB0(("processone: processing: [%s] %s\n",
|
||||||
displayableBytes(stp->st_size).c_str(), fn.c_str()));
|
displayableBytes(off_t(stp->st_size)).c_str(), fn.c_str()));
|
||||||
|
|
||||||
string utf8fn = compute_utf8fn(config, fn);
|
string utf8fn = compute_utf8fn(config, fn);
|
||||||
|
|
||||||
@ -772,7 +769,7 @@ FsIndexer::processonefile(RclConfig *config,
|
|||||||
if (doc.fmtime.empty())
|
if (doc.fmtime.empty())
|
||||||
doc.fmtime = ascdate;
|
doc.fmtime = ascdate;
|
||||||
if (doc.url.empty())
|
if (doc.url.empty())
|
||||||
doc.url = cstr_fileu + fn;
|
doc.url = path_pathtofileurl(fn);
|
||||||
const string *fnp = 0;
|
const string *fnp = 0;
|
||||||
if (doc.ipath.empty()) {
|
if (doc.ipath.empty()) {
|
||||||
if (!doc.peekmeta(Rcl::Doc::keyfn, &fnp) || fnp->empty())
|
if (!doc.peekmeta(Rcl::Doc::keyfn, &fnp) || fnp->empty())
|
||||||
@ -868,7 +865,7 @@ FsIndexer::processonefile(RclConfig *config,
|
|||||||
fileDoc.meta[Rcl::Doc::keytcfn] = utf8fn;
|
fileDoc.meta[Rcl::Doc::keytcfn] = utf8fn;
|
||||||
fileDoc.haschildren = true;
|
fileDoc.haschildren = true;
|
||||||
fileDoc.mimetype = mimetype;
|
fileDoc.mimetype = mimetype;
|
||||||
fileDoc.url = cstr_fileu + fn;
|
fileDoc.url = path_pathtofileurl(fn);
|
||||||
if (m_havelocalfields)
|
if (m_havelocalfields)
|
||||||
setlocalfields(localfields, fileDoc);
|
setlocalfields(localfields, fileDoc);
|
||||||
char cbuf[100];
|
char cbuf[100];
|
||||||
|
|||||||
@ -14,9 +14,7 @@
|
|||||||
* Free Software Foundation, Inc.,
|
* Free Software Foundation, Inc.,
|
||||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
*/
|
*/
|
||||||
#ifdef HAVE_CONFIG_H
|
|
||||||
#include "autoconfig.h"
|
#include "autoconfig.h"
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
@ -27,7 +25,9 @@
|
|||||||
#include "debuglog.h"
|
#include "debuglog.h"
|
||||||
#include "indexer.h"
|
#include "indexer.h"
|
||||||
#include "fsindexer.h"
|
#include "fsindexer.h"
|
||||||
|
#ifndef DISABLE_WEB_INDEXER
|
||||||
#include "beaglequeue.h"
|
#include "beaglequeue.h"
|
||||||
|
#endif
|
||||||
#include "mimehandler.h"
|
#include "mimehandler.h"
|
||||||
#include "pathut.h"
|
#include "pathut.h"
|
||||||
|
|
||||||
@ -46,7 +46,9 @@ ConfIndexer::ConfIndexer(RclConfig *cnf, DbIxStatusUpdater *updfunc)
|
|||||||
ConfIndexer::~ConfIndexer()
|
ConfIndexer::~ConfIndexer()
|
||||||
{
|
{
|
||||||
deleteZ(m_fsindexer);
|
deleteZ(m_fsindexer);
|
||||||
|
#ifndef DISABLE_WEB_INDEXER
|
||||||
deleteZ(m_beagler);
|
deleteZ(m_beagler);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// Determine if this is likely the first time that the user runs
|
// Determine if this is likely the first time that the user runs
|
||||||
@ -107,7 +109,7 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun, int flags)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#ifndef DISABLE_WEB_INDEXER
|
||||||
if (m_dobeagle && (typestorun & IxTBeagleQueue)) {
|
if (m_dobeagle && (typestorun & IxTBeagleQueue)) {
|
||||||
deleteZ(m_beagler);
|
deleteZ(m_beagler);
|
||||||
m_beagler = new BeagleQueueIndexer(m_config, &m_db, m_updater);
|
m_beagler = new BeagleQueueIndexer(m_config, &m_db, m_updater);
|
||||||
@ -116,7 +118,7 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun, int flags)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
if (typestorun == IxTAll) {
|
if (typestorun == IxTAll) {
|
||||||
// Get rid of all database entries that don't exist in the
|
// Get rid of all database entries that don't exist in the
|
||||||
// filesystem anymore. Only if all *configured* indexers ran.
|
// filesystem anymore. Only if all *configured* indexers ran.
|
||||||
@ -173,6 +175,7 @@ bool ConfIndexer::indexFiles(list<string>& ifiles, int flag)
|
|||||||
ret = m_fsindexer->indexFiles(myfiles, flag);
|
ret = m_fsindexer->indexFiles(myfiles, flag);
|
||||||
LOGDEB2(("ConfIndexer::indexFiles: fsindexer returned %d, "
|
LOGDEB2(("ConfIndexer::indexFiles: fsindexer returned %d, "
|
||||||
"%d files remainining\n", ret, myfiles.size()));
|
"%d files remainining\n", ret, myfiles.size()));
|
||||||
|
#ifndef DISABLE_WEB_INDEXER
|
||||||
|
|
||||||
if (m_dobeagle && !myfiles.empty() && !(flag & IxFNoWeb)) {
|
if (m_dobeagle && !myfiles.empty() && !(flag & IxFNoWeb)) {
|
||||||
if (!m_beagler)
|
if (!m_beagler)
|
||||||
@ -183,7 +186,7 @@ bool ConfIndexer::indexFiles(list<string>& ifiles, int flag)
|
|||||||
ret = false;
|
ret = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
// The close would be done in our destructor, but we want status here
|
// The close would be done in our destructor, but we want status here
|
||||||
if (!m_db.close()) {
|
if (!m_db.close()) {
|
||||||
LOGERR(("ConfIndexer::index: error closing database in %s\n",
|
LOGERR(("ConfIndexer::index: error closing database in %s\n",
|
||||||
@ -255,6 +258,7 @@ bool ConfIndexer::purgeFiles(std::list<string> &files, int flag)
|
|||||||
if (m_fsindexer)
|
if (m_fsindexer)
|
||||||
ret = m_fsindexer->purgeFiles(myfiles);
|
ret = m_fsindexer->purgeFiles(myfiles);
|
||||||
|
|
||||||
|
#ifndef DISABLE_WEB_INDEXER
|
||||||
if (m_dobeagle && !myfiles.empty() && !(flag & IxFNoWeb)) {
|
if (m_dobeagle && !myfiles.empty() && !(flag & IxFNoWeb)) {
|
||||||
if (!m_beagler)
|
if (!m_beagler)
|
||||||
m_beagler = new BeagleQueueIndexer(m_config, &m_db, m_updater);
|
m_beagler = new BeagleQueueIndexer(m_config, &m_db, m_updater);
|
||||||
@ -264,6 +268,7 @@ bool ConfIndexer::purgeFiles(std::list<string> &files, int flag)
|
|||||||
ret = false;
|
ret = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// The close would be done in our destructor, but we want status here
|
// The close would be done in our destructor, but we want status here
|
||||||
if (!m_db.close()) {
|
if (!m_db.close()) {
|
||||||
|
|||||||
@ -16,20 +16,18 @@
|
|||||||
*/
|
*/
|
||||||
#ifndef _INDEXER_H_INCLUDED_
|
#ifndef _INDEXER_H_INCLUDED_
|
||||||
#define _INDEXER_H_INCLUDED_
|
#define _INDEXER_H_INCLUDED_
|
||||||
|
#include "rclconfig.h"
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <list>
|
#include <list>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#ifndef NO_NAMESPACES
|
|
||||||
using std::string;
|
using std::string;
|
||||||
using std::list;
|
using std::list;
|
||||||
using std::map;
|
using std::map;
|
||||||
using std::vector;
|
using std::vector;
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "rclconfig.h"
|
|
||||||
#include "rcldb.h"
|
#include "rcldb.h"
|
||||||
#include "rcldoc.h"
|
#include "rcldoc.h"
|
||||||
#ifdef IDX_THREADS
|
#ifdef IDX_THREADS
|
||||||
|
|||||||
@ -140,6 +140,7 @@ string mimetype(const string &fn, const struct stat *stp,
|
|||||||
|
|
||||||
string mtype;
|
string mtype;
|
||||||
|
|
||||||
|
#ifndef _WIN32
|
||||||
// Extended attribute has priority on everything, as per:
|
// Extended attribute has priority on everything, as per:
|
||||||
// http://freedesktop.org/wiki/CommonExtendedAttributes
|
// http://freedesktop.org/wiki/CommonExtendedAttributes
|
||||||
if (pxattr::get(fn, "mime_type", &mtype)) {
|
if (pxattr::get(fn, "mime_type", &mtype)) {
|
||||||
@ -150,6 +151,7 @@ string mimetype(const string &fn, const struct stat *stp,
|
|||||||
return mtype;
|
return mtype;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (cfg == 0) {
|
if (cfg == 0) {
|
||||||
LOGERR(("Mimetype: null config ??\n"));
|
LOGERR(("Mimetype: null config ??\n"));
|
||||||
@ -177,7 +179,6 @@ string mimetype(const string &fn, const struct stat *stp,
|
|||||||
if (mtype.empty() && stp)
|
if (mtype.empty() && stp)
|
||||||
mtype = mimetypefromdata(cfg, fn, usfc);
|
mtype = mimetypefromdata(cfg, fn, usfc);
|
||||||
|
|
||||||
out:
|
|
||||||
return mtype;
|
return mtype;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -17,10 +17,10 @@
|
|||||||
#ifndef _MIMETYPE_H_INCLUDED_
|
#ifndef _MIMETYPE_H_INCLUDED_
|
||||||
#define _MIMETYPE_H_INCLUDED_
|
#define _MIMETYPE_H_INCLUDED_
|
||||||
|
|
||||||
|
#include "safesysstat.h"
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
class RclConfig;
|
class RclConfig;
|
||||||
struct stat;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Try to determine a mime type for file.
|
* Try to determine a mime type for file.
|
||||||
|
|||||||
@ -469,7 +469,7 @@ bool RclFAM::getEvent(RclMonEvent& ev, int msecs)
|
|||||||
MONDEB(("RclFAM::getEvent: FAMNextEvent returned\n"));
|
MONDEB(("RclFAM::getEvent: FAMNextEvent returned\n"));
|
||||||
|
|
||||||
map<int,string>::const_iterator it;
|
map<int,string>::const_iterator it;
|
||||||
if ((fe.filename[0] != '/') &&
|
if ((!path_isabsolute(fe.filename)) &&
|
||||||
(it = m_idtopath.find(fe.fr.reqnum)) != m_idtopath.end()) {
|
(it = m_idtopath.find(fe.fr.reqnum)) != m_idtopath.end()) {
|
||||||
ev.m_path = path_cat(it->second, fe.filename);
|
ev.m_path = path_cat(it->second, fe.filename);
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@ -14,16 +14,18 @@
|
|||||||
* Free Software Foundation, Inc.,
|
* Free Software Foundation, Inc.,
|
||||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
*/
|
*/
|
||||||
#ifdef HAVE_CONFIG_H
|
|
||||||
#include "autoconfig.h"
|
#include "autoconfig.h"
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <signal.h>
|
#include <signal.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
|
#ifndef _WIN32
|
||||||
#include <sys/time.h>
|
#include <sys/time.h>
|
||||||
#include <sys/resource.h>
|
#include <sys/resource.h>
|
||||||
#include <fcntl.h>
|
#else
|
||||||
|
#include <direct.h>
|
||||||
|
#endif
|
||||||
|
#include "safefcntl.h"
|
||||||
#include "safeunistd.h"
|
#include "safeunistd.h"
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
@ -42,10 +44,14 @@ using namespace std;
|
|||||||
#include "x11mon.h"
|
#include "x11mon.h"
|
||||||
#include "cancelcheck.h"
|
#include "cancelcheck.h"
|
||||||
#include "rcldb.h"
|
#include "rcldb.h"
|
||||||
|
#ifndef DISABLE_WEB_INDEXER
|
||||||
#include "beaglequeue.h"
|
#include "beaglequeue.h"
|
||||||
|
#endif
|
||||||
#include "recollindex.h"
|
#include "recollindex.h"
|
||||||
#include "fsindexer.h"
|
#include "fsindexer.h"
|
||||||
|
#ifndef _WIN32
|
||||||
#include "rclionice.h"
|
#include "rclionice.h"
|
||||||
|
#endif
|
||||||
#include "execmd.h"
|
#include "execmd.h"
|
||||||
#include "checkretryfailed.h"
|
#include "checkretryfailed.h"
|
||||||
|
|
||||||
@ -133,6 +139,7 @@ class MyUpdater : public DbIxStatusUpdater {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef DISABLE_X11MON
|
||||||
// If we are in the monitor, we also need to check X11 status
|
// If we are in the monitor, we also need to check X11 status
|
||||||
// during the initial indexing pass (else the user could log
|
// during the initial indexing pass (else the user could log
|
||||||
// out and the indexing would go on, not good (ie: if the user
|
// out and the indexing would go on, not good (ie: if the user
|
||||||
@ -142,7 +149,7 @@ class MyUpdater : public DbIxStatusUpdater {
|
|||||||
stopindexing = true;
|
stopindexing = true;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -177,11 +184,13 @@ static void makeIndexerOrExit(RclConfig *config, bool inPlaceReset)
|
|||||||
|
|
||||||
void rclIxIonice(const RclConfig *config)
|
void rclIxIonice(const RclConfig *config)
|
||||||
{
|
{
|
||||||
|
#ifndef _WIN32
|
||||||
string clss, classdata;
|
string clss, classdata;
|
||||||
if (!config->getConfParam("monioniceclass", clss) || clss.empty())
|
if (!config->getConfParam("monioniceclass", clss) || clss.empty())
|
||||||
clss = "3";
|
clss = "3";
|
||||||
config->getConfParam("monioniceclassdata", classdata);
|
config->getConfParam("monioniceclassdata", classdata);
|
||||||
rclionice(clss, classdata);
|
rclionice(clss, classdata);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
class MakeListWalkerCB : public FsTreeWalkerCB {
|
class MakeListWalkerCB : public FsTreeWalkerCB {
|
||||||
@ -273,7 +282,7 @@ static bool checktopdirs(RclConfig *config, vector<string>& nonexist)
|
|||||||
|
|
||||||
for (vector<string>::iterator it = tdl.begin(); it != tdl.end(); it++) {
|
for (vector<string>::iterator it = tdl.begin(); it != tdl.end(); it++) {
|
||||||
*it = path_tildexpand(*it);
|
*it = path_tildexpand(*it);
|
||||||
if (!it->size() || (*it)[0] != '/') {
|
if (!it->size() || !path_isabsolute(*it)) {
|
||||||
if ((*it)[0] == '~') {
|
if ((*it)[0] == '~') {
|
||||||
cerr << "Tilde expansion failed: " << *it << endl;
|
cerr << "Tilde expansion failed: " << *it << endl;
|
||||||
LOGERR(("recollindex: tilde expansion failed: %s\n",
|
LOGERR(("recollindex: tilde expansion failed: %s\n",
|
||||||
@ -285,7 +294,7 @@ static bool checktopdirs(RclConfig *config, vector<string>& nonexist)
|
|||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (access(it->c_str(), 0) < 0) {
|
if (!path_exists(*it)) {
|
||||||
nonexist.push_back(*it);
|
nonexist.push_back(*it);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -295,7 +304,7 @@ static bool checktopdirs(RclConfig *config, vector<string>& nonexist)
|
|||||||
if (config->getConfParam("skippedPaths", &tdl, true)) {
|
if (config->getConfParam("skippedPaths", &tdl, true)) {
|
||||||
for (vector<string>::iterator it = tdl.begin(); it != tdl.end(); it++) {
|
for (vector<string>::iterator it = tdl.begin(); it != tdl.end(); it++) {
|
||||||
*it = path_tildexpand(*it);
|
*it = path_tildexpand(*it);
|
||||||
if (access(it->c_str(), 0) < 0) {
|
if (!path_exists(*it)) {
|
||||||
nonexist.push_back(*it);
|
nonexist.push_back(*it);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -304,7 +313,7 @@ static bool checktopdirs(RclConfig *config, vector<string>& nonexist)
|
|||||||
if (config->getConfParam("daemSkippedPaths", &tdl, true)) {
|
if (config->getConfParam("daemSkippedPaths", &tdl, true)) {
|
||||||
for (vector<string>::iterator it = tdl.begin(); it != tdl.end(); it++) {
|
for (vector<string>::iterator it = tdl.begin(); it != tdl.end(); it++) {
|
||||||
*it = path_tildexpand(*it);
|
*it = path_tildexpand(*it);
|
||||||
if (access(it->c_str(), 0) < 0) {
|
if (!path_exists(*it)) {
|
||||||
nonexist.push_back(*it);
|
nonexist.push_back(*it);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -393,8 +402,10 @@ int main(int argc, char **argv)
|
|||||||
// The reexec struct is used by the daemon to shed memory after
|
// The reexec struct is used by the daemon to shed memory after
|
||||||
// the initial indexing pass and to restart when the configuration
|
// the initial indexing pass and to restart when the configuration
|
||||||
// changes
|
// changes
|
||||||
|
#ifndef _WIN32
|
||||||
o_reexec = new ReExec;
|
o_reexec = new ReExec;
|
||||||
o_reexec->init(argc, argv);
|
o_reexec->init(argc, argv);
|
||||||
|
#endif
|
||||||
|
|
||||||
thisprog = argv[0];
|
thisprog = argv[0];
|
||||||
argc--; argv++;
|
argc--; argv++;
|
||||||
@ -463,7 +474,9 @@ int main(int argc, char **argv)
|
|||||||
cerr << "Configuration problem: " << reason << endl;
|
cerr << "Configuration problem: " << reason << endl;
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
#ifndef _WIN32
|
||||||
o_reexec->atexit(cleanup);
|
o_reexec->atexit(cleanup);
|
||||||
|
#endif
|
||||||
|
|
||||||
vector<string> nonexist;
|
vector<string> nonexist;
|
||||||
if (!checktopdirs(config, nonexist))
|
if (!checktopdirs(config, nonexist))
|
||||||
@ -511,9 +524,11 @@ int main(int argc, char **argv)
|
|||||||
if (op_flags & OPT_k) {
|
if (op_flags & OPT_k) {
|
||||||
indexerFlags &= ~ConfIndexer::IxFNoRetryFailed;
|
indexerFlags &= ~ConfIndexer::IxFNoRetryFailed;
|
||||||
} else {
|
} else {
|
||||||
|
#ifndef _WIN32
|
||||||
if (checkRetryFailed(config, false)) {
|
if (checkRetryFailed(config, false)) {
|
||||||
indexerFlags &= ~ConfIndexer::IxFNoRetryFailed;
|
indexerFlags &= ~ConfIndexer::IxFNoRetryFailed;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
Pidfile pidfile(config->getPidfile());
|
Pidfile pidfile(config->getPidfile());
|
||||||
@ -522,12 +537,13 @@ int main(int argc, char **argv)
|
|||||||
// Log something at LOGINFO to reset the trace file. Else at level
|
// Log something at LOGINFO to reset the trace file. Else at level
|
||||||
// 3 it's not even truncated if all docs are up to date.
|
// 3 it's not even truncated if all docs are up to date.
|
||||||
LOGINFO(("recollindex: starting up\n"));
|
LOGINFO(("recollindex: starting up\n"));
|
||||||
|
#ifndef _WIN32
|
||||||
if (setpriority(PRIO_PROCESS, 0, 20) != 0) {
|
if (setpriority(PRIO_PROCESS, 0, 20) != 0) {
|
||||||
LOGINFO(("recollindex: can't setpriority(), errno %d\n", errno));
|
LOGINFO(("recollindex: can't setpriority(), errno %d\n", errno));
|
||||||
}
|
}
|
||||||
// Try to ionice. This does not work on all platforms
|
// Try to ionice. This does not work on all platforms
|
||||||
rclIxIonice(config);
|
rclIxIonice(config);
|
||||||
|
#endif
|
||||||
|
|
||||||
if (op_flags & (OPT_i|OPT_e)) {
|
if (op_flags & (OPT_i|OPT_e)) {
|
||||||
lockorexit(&pidfile);
|
lockorexit(&pidfile);
|
||||||
@ -596,15 +612,17 @@ int main(int argc, char **argv)
|
|||||||
lockorexit(&pidfile);
|
lockorexit(&pidfile);
|
||||||
if (!(op_flags&OPT_D)) {
|
if (!(op_flags&OPT_D)) {
|
||||||
LOGDEB(("recollindex: daemonizing\n"));
|
LOGDEB(("recollindex: daemonizing\n"));
|
||||||
|
#ifndef _WIN32
|
||||||
if (daemon(0,0) != 0) {
|
if (daemon(0,0) != 0) {
|
||||||
fprintf(stderr, "daemon() failed, errno %d\n", errno);
|
fprintf(stderr, "daemon() failed, errno %d\n", errno);
|
||||||
LOGERR(("daemon() failed, errno %d\n", errno));
|
LOGERR(("daemon() failed, errno %d\n", errno));
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
// Need to rewrite pid, it changed
|
// Need to rewrite pid, it changed
|
||||||
pidfile.write_pid();
|
pidfile.write_pid();
|
||||||
|
#ifndef _WIN32
|
||||||
// Not too sure if I have to redo the nice thing after daemon(),
|
// Not too sure if I have to redo the nice thing after daemon(),
|
||||||
// can't hurt anyway (easier than testing on all platforms...)
|
// can't hurt anyway (easier than testing on all platforms...)
|
||||||
if (setpriority(PRIO_PROCESS, 0, 20) != 0) {
|
if (setpriority(PRIO_PROCESS, 0, 20) != 0) {
|
||||||
@ -612,6 +630,7 @@ int main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
// Try to ionice. This does not work on all platforms
|
// Try to ionice. This does not work on all platforms
|
||||||
rclIxIonice(config);
|
rclIxIonice(config);
|
||||||
|
#endif
|
||||||
|
|
||||||
if (sleepsecs > 0) {
|
if (sleepsecs > 0) {
|
||||||
LOGDEB(("recollindex: sleeping %d\n", sleepsecs));
|
LOGDEB(("recollindex: sleeping %d\n", sleepsecs));
|
||||||
@ -633,12 +652,15 @@ int main(int argc, char **argv)
|
|||||||
"not going into monitor mode\n"));
|
"not going into monitor mode\n"));
|
||||||
exit(1);
|
exit(1);
|
||||||
} else {
|
} else {
|
||||||
|
#ifndef _WIN32
|
||||||
// Record success of indexing pass with failed files retries.
|
// Record success of indexing pass with failed files retries.
|
||||||
if (!(indexerFlags & ConfIndexer::IxFNoRetryFailed)) {
|
if (!(indexerFlags & ConfIndexer::IxFNoRetryFailed)) {
|
||||||
checkRetryFailed(config, true);
|
checkRetryFailed(config, true);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
deleteZ(confindexer);
|
deleteZ(confindexer);
|
||||||
|
#ifndef _WIN32
|
||||||
o_reexec->insertArgs(vector<string>(1, "-n"));
|
o_reexec->insertArgs(vector<string>(1, "-n"));
|
||||||
LOGINFO(("recollindex: reexecuting with -n after initial full pass\n"));
|
LOGINFO(("recollindex: reexecuting with -n after initial full pass\n"));
|
||||||
// Note that -n will be inside the reexec when we come
|
// Note that -n will be inside the reexec when we come
|
||||||
@ -646,6 +668,7 @@ int main(int argc, char **argv)
|
|||||||
// starting a config change exec to ensure that we do a
|
// starting a config change exec to ensure that we do a
|
||||||
// purging pass in this case.
|
// purging pass in this case.
|
||||||
o_reexec->reexec();
|
o_reexec->reexec();
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
if (updater) {
|
if (updater) {
|
||||||
updater->status.phase = DbIxStatus::DBIXS_MONITOR;
|
updater->status.phase = DbIxStatus::DBIXS_MONITOR;
|
||||||
@ -672,11 +695,12 @@ int main(int argc, char **argv)
|
|||||||
makeIndexerOrExit(config, inPlaceReset);
|
makeIndexerOrExit(config, inPlaceReset);
|
||||||
bool status = confindexer->index(rezero, ConfIndexer::IxTAll,
|
bool status = confindexer->index(rezero, ConfIndexer::IxTAll,
|
||||||
indexerFlags);
|
indexerFlags);
|
||||||
|
#ifndef _WIN32
|
||||||
// Record success of indexing pass with failed files retries.
|
// Record success of indexing pass with failed files retries.
|
||||||
if (status && !(indexerFlags & ConfIndexer::IxFNoRetryFailed)) {
|
if (status && !(indexerFlags & ConfIndexer::IxFNoRetryFailed)) {
|
||||||
checkRetryFailed(config, true);
|
checkRetryFailed(config, true);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
if (!status)
|
if (!status)
|
||||||
cerr << "Indexing failed" << endl;
|
cerr << "Indexing failed" << endl;
|
||||||
if (!confindexer->getReason().empty())
|
if (!confindexer->getReason().empty())
|
||||||
|
|||||||
@ -108,7 +108,7 @@ namespace Dijon
|
|||||||
*/
|
*/
|
||||||
virtual bool set_document_data(const std::string& mtype,
|
virtual bool set_document_data(const std::string& mtype,
|
||||||
const char *data_ptr,
|
const char *data_ptr,
|
||||||
unsigned int data_length) = 0;
|
size_t data_length) = 0;
|
||||||
|
|
||||||
/** (Re)initializes the filter with the given data.
|
/** (Re)initializes the filter with the given data.
|
||||||
* Call next_document() to position the filter onto the first document.
|
* Call next_document() to position the filter onto the first document.
|
||||||
@ -140,7 +140,7 @@ namespace Dijon
|
|||||||
stat() calls The value is stored inside metaData, docsize
|
stat() calls The value is stored inside metaData, docsize
|
||||||
key
|
key
|
||||||
*/
|
*/
|
||||||
virtual void set_docsize(size_t size) = 0;
|
virtual void set_docsize(off_t size) = 0;
|
||||||
|
|
||||||
// Going from one nested document to the next.
|
// Going from one nested document to the next.
|
||||||
|
|
||||||
|
|||||||
@ -46,7 +46,7 @@ void reapXAttrs(const RclConfig* cfg, const string& path,
|
|||||||
map<string, string>& xfields)
|
map<string, string>& xfields)
|
||||||
{
|
{
|
||||||
LOGDEB2(("reapXAttrs: [%s]\n", path.c_str()));
|
LOGDEB2(("reapXAttrs: [%s]\n", path.c_str()));
|
||||||
|
#ifndef _WIN32
|
||||||
// Retrieve xattrs names from files and mapping table from config
|
// Retrieve xattrs names from files and mapping table from config
|
||||||
vector<string> xnames;
|
vector<string> xnames;
|
||||||
if (!pxattr::list(path, &xnames)) {
|
if (!pxattr::list(path, &xnames)) {
|
||||||
@ -79,6 +79,7 @@ void reapXAttrs(const RclConfig* cfg, const string& path,
|
|||||||
xfields[key] = value;
|
xfields[key] = value;
|
||||||
LOGDEB2(("reapXAttrs: [%s] -> [%s]\n", key.c_str(), value.c_str()));
|
LOGDEB2(("reapXAttrs: [%s] -> [%s]\n", key.c_str(), value.c_str()));
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void docFieldsFromXattrs(RclConfig *cfg, const map<string, string>& xfields,
|
void docFieldsFromXattrs(RclConfig *cfg, const map<string, string>& xfields,
|
||||||
@ -93,6 +94,7 @@ void docFieldsFromXattrs(RclConfig *cfg, const map<string, string>& xfields,
|
|||||||
void reapMetaCmds(RclConfig* cfg, const string& path,
|
void reapMetaCmds(RclConfig* cfg, const string& path,
|
||||||
map<string, string>& cfields)
|
map<string, string>& cfields)
|
||||||
{
|
{
|
||||||
|
#ifndef _WIN32
|
||||||
const vector<MDReaper>& reapers = cfg->getMDReapers();
|
const vector<MDReaper>& reapers = cfg->getMDReapers();
|
||||||
if (reapers.empty())
|
if (reapers.empty())
|
||||||
return;
|
return;
|
||||||
@ -111,6 +113,7 @@ void reapMetaCmds(RclConfig* cfg, const string& path,
|
|||||||
cfields[rp->fieldname] = output;
|
cfields[rp->fieldname] = output;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set fields from external commands
|
// Set fields from external commands
|
||||||
|
|||||||
@ -169,7 +169,7 @@ void FileInterner::init(const string &f, const struct stat *stp, RclConfig *cnf,
|
|||||||
l_mime = *imime;
|
l_mime = *imime;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t docsize = stp->st_size;
|
off_t docsize = stp->st_size;
|
||||||
|
|
||||||
if (!l_mime.empty()) {
|
if (!l_mime.empty()) {
|
||||||
// Has mime: check for a compressed file. If so, create a
|
// Has mime: check for a compressed file. If so, create a
|
||||||
@ -216,7 +216,7 @@ void FileInterner::init(const string &f, const struct stat *stp, RclConfig *cnf,
|
|||||||
m_mimetype = l_mime;
|
m_mimetype = l_mime;
|
||||||
RecollFilter *df = getMimeHandler(l_mime, m_cfg, !m_forPreview);
|
RecollFilter *df = getMimeHandler(l_mime, m_cfg, !m_forPreview);
|
||||||
|
|
||||||
if (!df or df->is_unknown()) {
|
if (!df || df->is_unknown()) {
|
||||||
// No real handler for this type, for now :(
|
// No real handler for this type, for now :(
|
||||||
LOGDEB(("FileInterner:: unprocessed mime: [%s] [%s]\n",
|
LOGDEB(("FileInterner:: unprocessed mime: [%s] [%s]\n",
|
||||||
l_mime.c_str(), f.c_str()));
|
l_mime.c_str(), f.c_str()));
|
||||||
@ -629,7 +629,7 @@ void FileInterner::popHandler()
|
|||||||
{
|
{
|
||||||
if (m_handlers.empty())
|
if (m_handlers.empty())
|
||||||
return;
|
return;
|
||||||
int i = m_handlers.size() - 1;
|
size_t i = m_handlers.size() - 1;
|
||||||
if (m_tmpflgs[i]) {
|
if (m_tmpflgs[i]) {
|
||||||
m_tempfiles.pop_back();
|
m_tempfiles.pop_back();
|
||||||
m_tmpflgs[i] = false;
|
m_tmpflgs[i] = false;
|
||||||
|
|||||||
@ -17,10 +17,10 @@
|
|||||||
#include "autoconfig.h"
|
#include "autoconfig.h"
|
||||||
|
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <sys/wait.h>
|
#include <time.h>
|
||||||
|
#include "safesyswait.h"
|
||||||
|
|
||||||
#include <list>
|
#include <list>
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
#include "cstr.h"
|
#include "cstr.h"
|
||||||
#include "execmd.h"
|
#include "execmd.h"
|
||||||
@ -32,6 +32,8 @@ using namespace std;
|
|||||||
#include "md5ut.h"
|
#include "md5ut.h"
|
||||||
#include "rclconfig.h"
|
#include "rclconfig.h"
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
// This is called periodically by ExeCmd when it is waiting for data,
|
// This is called periodically by ExeCmd when it is waiting for data,
|
||||||
// or when it does receive some. We may choose to interrupt the
|
// or when it does receive some. We may choose to interrupt the
|
||||||
// command.
|
// command.
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
/* Copyright (C) 2005 J.F.Dockes
|
/* Copyright (C) 2005 J.F.Dockes
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
* the Free Software Foundation; either version 2 of the License, or
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
@ -32,7 +32,7 @@ using namespace std;
|
|||||||
#include "idfile.h"
|
#include "idfile.h"
|
||||||
|
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <sys/wait.h>
|
#include "safesyswait.h"
|
||||||
|
|
||||||
bool MimeHandlerExecMultiple::startCmd()
|
bool MimeHandlerExecMultiple::startCmd()
|
||||||
{
|
{
|
||||||
@ -90,6 +90,9 @@ bool MimeHandlerExecMultiple::readDataElement(string& name, string &data)
|
|||||||
LOGERR(("MHExecMultiple: getline error\n"));
|
LOGERR(("MHExecMultiple: getline error\n"));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LOGDEB1(("MHEM:rde: line [%s]\n", ibuf.c_str()));
|
||||||
|
|
||||||
// Empty line (end of message) ?
|
// Empty line (end of message) ?
|
||||||
if (!ibuf.compare("\n")) {
|
if (!ibuf.compare("\n")) {
|
||||||
LOGDEB(("MHExecMultiple: Got empty line\n"));
|
LOGDEB(("MHExecMultiple: Got empty line\n"));
|
||||||
@ -163,7 +166,7 @@ bool MimeHandlerExecMultiple::next_document()
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_cmd.getChildPid() < 0 && !startCmd()) {
|
if (m_cmd.getChildPid() <= 0 && !startCmd()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -14,13 +14,15 @@
|
|||||||
* Free Software Foundation, Inc.,
|
* Free Software Foundation, Inc.,
|
||||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
*/
|
*/
|
||||||
|
#include "autoconfig.h"
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <unistd.h>
|
#include "safeunistd.h"
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
|
#include "safesysstat.h"
|
||||||
|
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
|||||||
@ -22,7 +22,11 @@
|
|||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include "safesysstat.h"
|
#include "safesysstat.h"
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
#ifndef _WIN32
|
||||||
#include <regex.h>
|
#include <regex.h>
|
||||||
|
#else
|
||||||
|
#include <regex>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <map>
|
#include <map>
|
||||||
@ -70,6 +74,14 @@ static PTMutexInit o_mcache_mutex;
|
|||||||
* offsets for all message "From_" lines follow. The format is purely
|
* offsets for all message "From_" lines follow. The format is purely
|
||||||
* binary, values are not even byte-swapped to be proc-idependant.
|
* binary, values are not even byte-swapped to be proc-idependant.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
// vc++ does not let define an array of size o_b1size because non-const??
|
||||||
|
#define M_o_b1size 1024
|
||||||
|
#else
|
||||||
|
#define M_o_b1size o_b1size
|
||||||
|
#endif
|
||||||
|
|
||||||
class MboxCache {
|
class MboxCache {
|
||||||
public:
|
public:
|
||||||
typedef MimeHandlerMbox::mbhoff_type mbhoff_type;
|
typedef MimeHandlerMbox::mbhoff_type mbhoff_type;
|
||||||
@ -98,7 +110,7 @@ public:
|
|||||||
}
|
}
|
||||||
FpKeeper keeper(&fp);
|
FpKeeper keeper(&fp);
|
||||||
|
|
||||||
char blk1[o_b1size];
|
char blk1[M_o_b1size];
|
||||||
if (fread(blk1, 1, o_b1size, fp) != o_b1size) {
|
if (fread(blk1, 1, o_b1size, fp) != o_b1size) {
|
||||||
LOGDEB0(("MboxCache::get_offsets: read blk1 errno %d\n", errno));
|
LOGDEB0(("MboxCache::get_offsets: read blk1 errno %d\n", errno));
|
||||||
return -1;
|
return -1;
|
||||||
@ -116,7 +128,7 @@ public:
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
mbhoff_type offset = -1;
|
mbhoff_type offset = -1;
|
||||||
int ret;
|
size_t ret;
|
||||||
if ((ret = fread(&offset, 1, sizeof(mbhoff_type), fp))
|
if ((ret = fread(&offset, 1, sizeof(mbhoff_type), fp))
|
||||||
!= sizeof(mbhoff_type)) {
|
!= sizeof(mbhoff_type)) {
|
||||||
LOGDEB0(("MboxCache::get_offsets: read ret %d errno %d\n",
|
LOGDEB0(("MboxCache::get_offsets: read ret %d errno %d\n",
|
||||||
@ -184,7 +196,7 @@ public:
|
|||||||
m_dir = "mboxcache";
|
m_dir = "mboxcache";
|
||||||
m_dir = path_tildexpand(m_dir);
|
m_dir = path_tildexpand(m_dir);
|
||||||
// If not an absolute path, compute relative to config dir
|
// If not an absolute path, compute relative to config dir
|
||||||
if (m_dir.at(0) != '/')
|
if (!path_isabsolute(m_dir))
|
||||||
m_dir = path_cat(config->getConfDir(), m_dir);
|
m_dir = path_cat(config->getConfDir(), m_dir);
|
||||||
m_ok = true;
|
m_ok = true;
|
||||||
}
|
}
|
||||||
@ -226,7 +238,6 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
const size_t MboxCache::o_b1size = 1024;
|
const size_t MboxCache::o_b1size = 1024;
|
||||||
|
|
||||||
static class MboxCache o_mcache;
|
static class MboxCache o_mcache;
|
||||||
|
|
||||||
static const string cstr_keyquirks("mhmboxquirks");
|
static const string cstr_keyquirks("mhmboxquirks");
|
||||||
@ -307,7 +318,7 @@ bool MimeHandlerMbox::set_document_file(const string& mt, const string &fn)
|
|||||||
typedef char line_type[LL+10];
|
typedef char line_type[LL+10];
|
||||||
static inline void stripendnl(line_type& line, int& ll)
|
static inline void stripendnl(line_type& line, int& ll)
|
||||||
{
|
{
|
||||||
ll = strlen(line);
|
ll = int(strlen(line));
|
||||||
while (ll > 0) {
|
while (ll > 0) {
|
||||||
if (line[ll-1] == '\n' || line[ll-1] == '\r') {
|
if (line[ll-1] == '\n' || line[ll-1] == '\r') {
|
||||||
line[ll-1] = 0;
|
line[ll-1] = 0;
|
||||||
@ -376,9 +387,20 @@ static const char *frompat =
|
|||||||
// exactly like: From ^M (From followed by space and eol). We only
|
// exactly like: From ^M (From followed by space and eol). We only
|
||||||
// test for this if QUIRKS_TBIRD is set
|
// test for this if QUIRKS_TBIRD is set
|
||||||
static const char *miniTbirdFrom = "^From $";
|
static const char *miniTbirdFrom = "^From $";
|
||||||
|
#ifndef _WIN32
|
||||||
static regex_t fromregex;
|
static regex_t fromregex;
|
||||||
static regex_t minifromregex;
|
static regex_t minifromregex;
|
||||||
|
#define M_regexec(A,B,C,D,E) regexec(&(A),B,C,D,E)
|
||||||
|
#else
|
||||||
|
basic_regex<char> fromregex;
|
||||||
|
basic_regex<char> minifromregex;
|
||||||
|
#define REG_ICASE std::regex_constants::icase
|
||||||
|
#define REG_NOSUB std::regex_constants::nosubs
|
||||||
|
#define REG_EXTENDED std::regex_constants::extended
|
||||||
|
#define M_regexec(A, B, C, D, E) regex_match(B,A)
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
static bool regcompiled;
|
static bool regcompiled;
|
||||||
static PTMutexInit o_regex_mutex;
|
static PTMutexInit o_regex_mutex;
|
||||||
|
|
||||||
@ -390,8 +412,13 @@ static void compileregexes()
|
|||||||
// that we are alone.
|
// that we are alone.
|
||||||
if (regcompiled)
|
if (regcompiled)
|
||||||
return;
|
return;
|
||||||
|
#ifndef _WIN32
|
||||||
regcomp(&fromregex, frompat, REG_NOSUB|REG_EXTENDED);
|
regcomp(&fromregex, frompat, REG_NOSUB|REG_EXTENDED);
|
||||||
regcomp(&minifromregex, miniTbirdFrom, REG_NOSUB|REG_EXTENDED);
|
regcomp(&minifromregex, miniTbirdFrom, REG_NOSUB|REG_EXTENDED);
|
||||||
|
#else
|
||||||
|
fromregex = basic_regex<char>(frompat, REG_NOSUB | REG_EXTENDED);
|
||||||
|
minifromregex = basic_regex<char>(miniTbirdFrom, REG_NOSUB | REG_EXTENDED);
|
||||||
|
#endif
|
||||||
regcompiled = true;
|
regcompiled = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -440,9 +467,9 @@ bool MimeHandlerMbox::next_document()
|
|||||||
(off = o_mcache.get_offset(m_config, m_udi, mtarg)) >= 0 &&
|
(off = o_mcache.get_offset(m_config, m_udi, mtarg)) >= 0 &&
|
||||||
fseeko(fp, (off_t)off, SEEK_SET) >= 0 &&
|
fseeko(fp, (off_t)off, SEEK_SET) >= 0 &&
|
||||||
fgets(line, LL, fp) &&
|
fgets(line, LL, fp) &&
|
||||||
(!regexec(&fromregex, line, 0, 0, 0) ||
|
(!M_regexec(fromregex, line, 0, 0, 0) ||
|
||||||
((m_quirks & MBOXQUIRK_TBIRD) &&
|
((m_quirks & MBOXQUIRK_TBIRD) &&
|
||||||
!regexec(&minifromregex, line, 0, 0, 0))) ) {
|
!M_regexec(minifromregex, line, 0, 0, 0))) ) {
|
||||||
LOGDEB0(("MimeHandlerMbox: Cache: From_ Ok\n"));
|
LOGDEB0(("MimeHandlerMbox: Cache: From_ Ok\n"));
|
||||||
fseeko(fp, (off_t)off, SEEK_SET);
|
fseeko(fp, (off_t)off, SEEK_SET);
|
||||||
m_msgnum = mtarg -1;
|
m_msgnum = mtarg -1;
|
||||||
@ -485,9 +512,9 @@ bool MimeHandlerMbox::next_document()
|
|||||||
/* The 'F' compare is redundant but it improves performance
|
/* The 'F' compare is redundant but it improves performance
|
||||||
A LOT */
|
A LOT */
|
||||||
if (line[0] == 'F' && (
|
if (line[0] == 'F' && (
|
||||||
!regexec(&fromregex, line, 0, 0, 0) ||
|
!M_regexec(fromregex, line, 0, 0, 0) ||
|
||||||
((m_quirks & MBOXQUIRK_TBIRD) &&
|
((m_quirks & MBOXQUIRK_TBIRD) &&
|
||||||
!regexec(&minifromregex, line, 0, 0, 0)))
|
!M_regexec(minifromregex, line, 0, 0, 0)))
|
||||||
) {
|
) {
|
||||||
LOGDEB1(("MimeHandlerMbox: msgnum %d, "
|
LOGDEB1(("MimeHandlerMbox: msgnum %d, "
|
||||||
"From_ at line %d: [%s]\n", m_msgnum, m_lineno, line));
|
"From_ at line %d: [%s]\n", m_msgnum, m_lineno, line));
|
||||||
|
|||||||
59
src/internfile/mh_null.h
Normal file
59
src/internfile/mh_null.h
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
/* Copyright (C) 2004 J.F.Dockes
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc.,
|
||||||
|
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
*/
|
||||||
|
#ifndef _MH_NULL_H_INCLUDED_
|
||||||
|
#define _MH_NULL_H_INCLUDED_
|
||||||
|
|
||||||
|
// It may make sense in some cases to set this null filter (no output)
|
||||||
|
// instead of using recoll_noindex or leaving the default filter in
|
||||||
|
// case one doesn't want to install it: this will avoid endless retries
|
||||||
|
// to reindex the affected files, as recoll will think it has succeeded
|
||||||
|
// indexing them. Downside: the files won't be indexed when one
|
||||||
|
// actually installs the real filter, will need a -z
|
||||||
|
// Actually used for empty files
|
||||||
|
// Associated to application/x-zerosize, so use
|
||||||
|
// <mimetype> = internal application/x-zerosize
|
||||||
|
// in mimeconf
|
||||||
|
#include <string>
|
||||||
|
#include "cstr.h"
|
||||||
|
#include "mimehandler.h"
|
||||||
|
|
||||||
|
class MimeHandlerNull : public RecollFilter {
|
||||||
|
public:
|
||||||
|
MimeHandlerNull(RclConfig *cnf, const std::string& id)
|
||||||
|
: RecollFilter(cnf, id)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
virtual ~MimeHandlerNull()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
virtual bool set_document_file(const string& mt, const string& fn)
|
||||||
|
{
|
||||||
|
RecollFilter::set_document_file(mt, fn);
|
||||||
|
return m_havedoc = true;
|
||||||
|
}
|
||||||
|
virtual bool next_document()
|
||||||
|
{
|
||||||
|
if (m_havedoc == false)
|
||||||
|
return false;
|
||||||
|
m_havedoc = false;
|
||||||
|
m_metaData[cstr_dj_keycontent] = cstr_null;
|
||||||
|
m_metaData[cstr_dj_keymt] = cstr_textplain;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* _MH_NULL_H_INCLUDED_ */
|
||||||
@ -18,7 +18,7 @@
|
|||||||
#define _MH_SYMLINK_H_INCLUDED_
|
#define _MH_SYMLINK_H_INCLUDED_
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <unistd.h>
|
#include "safeunistd.h"
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
|
|
||||||
#include "cstr.h"
|
#include "cstr.h"
|
||||||
|
|||||||
@ -18,10 +18,13 @@
|
|||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
|
#include "safefcntl.h"
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include "safesysstat.h"
|
||||||
|
#include "safeunistd.h"
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <string>
|
#include <string>
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
#include "cstr.h"
|
#include "cstr.h"
|
||||||
#include "mh_text.h"
|
#include "mh_text.h"
|
||||||
@ -32,16 +35,23 @@ using namespace std;
|
|||||||
#include "pxattr.h"
|
#include "pxattr.h"
|
||||||
#include "pathut.h"
|
#include "pathut.h"
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
const int MB = 1024*1024;
|
const int MB = 1024*1024;
|
||||||
const int KB = 1024;
|
const int KB = 1024;
|
||||||
|
|
||||||
// Process a plain text file
|
// Process a plain text file
|
||||||
bool MimeHandlerText::set_document_file(const string& mt, const string &fn)
|
bool MimeHandlerText::set_document_file(const string& mt, const string &fn)
|
||||||
{
|
{
|
||||||
LOGDEB(("MimeHandlerText::set_document_file: [%s]\n", fn.c_str()));
|
LOGDEB(("MimeHandlerText::set_document_file: [%s] offs %lld\n",
|
||||||
|
fn.c_str(), m_offs));
|
||||||
|
|
||||||
RecollFilter::set_document_file(mt, fn);
|
RecollFilter::set_document_file(mt, fn);
|
||||||
|
|
||||||
m_fn = fn;
|
m_fn = fn;
|
||||||
|
// This should not be necessary, but it happens on msw that offset is large
|
||||||
|
// negative at this point, could not find the reason (still trying).
|
||||||
|
m_offs = 0;
|
||||||
|
|
||||||
// file size for oversize check
|
// file size for oversize check
|
||||||
long long fsize = path_filesize(m_fn);
|
long long fsize = path_filesize(m_fn);
|
||||||
@ -51,9 +61,11 @@ bool MimeHandlerText::set_document_file(const string& mt, const string &fn)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef _WIN32
|
||||||
// Check for charset defined in extended attribute as per:
|
// Check for charset defined in extended attribute as per:
|
||||||
// http://freedesktop.org/wiki/CommonExtendedAttributes
|
// http://freedesktop.org/wiki/CommonExtendedAttributes
|
||||||
pxattr::get(m_fn, "charset", &m_charsetfromxattr);
|
pxattr::get(m_fn, "charset", &m_charsetfromxattr);
|
||||||
|
#endif
|
||||||
|
|
||||||
// Max file size parameter: texts over this size are not indexed
|
// Max file size parameter: texts over this size are not indexed
|
||||||
int maxmbs = 20;
|
int maxmbs = 20;
|
||||||
|
|||||||
@ -19,7 +19,6 @@
|
|||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
using std::string;
|
|
||||||
|
|
||||||
#include "mimehandler.h"
|
#include "mimehandler.h"
|
||||||
|
|
||||||
@ -30,22 +29,22 @@ using std::string;
|
|||||||
*/
|
*/
|
||||||
class MimeHandlerText : public RecollFilter {
|
class MimeHandlerText : public RecollFilter {
|
||||||
public:
|
public:
|
||||||
MimeHandlerText(RclConfig *cnf, const string& id)
|
MimeHandlerText(RclConfig *cnf, const std::string& id)
|
||||||
: RecollFilter(cnf, id), m_paging(false), m_offs(0)
|
: RecollFilter(cnf, id), m_paging(false), m_offs(0), m_pagesz(0)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
virtual ~MimeHandlerText()
|
virtual ~MimeHandlerText()
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
virtual bool set_document_file(const string& mt, const string &file_path);
|
virtual bool set_document_file(const std::string& mt, const std::string &file_path);
|
||||||
virtual bool set_document_string(const string&, const string&);
|
virtual bool set_document_string(const std::string&, const std::string&);
|
||||||
virtual bool is_data_input_ok(DataInput input) const {
|
virtual bool is_data_input_ok(DataInput input) const {
|
||||||
if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING)
|
if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING)
|
||||||
return true;
|
return true;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
virtual bool next_document();
|
virtual bool next_document();
|
||||||
virtual bool skip_to_document(const string& s);
|
virtual bool skip_to_document(const std::string& s);
|
||||||
virtual void clear()
|
virtual void clear()
|
||||||
{
|
{
|
||||||
m_paging = false;
|
m_paging = false;
|
||||||
@ -56,11 +55,11 @@ class MimeHandlerText : public RecollFilter {
|
|||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
bool m_paging;
|
bool m_paging;
|
||||||
string m_text;
|
std::string m_text;
|
||||||
string m_fn;
|
std::string m_fn;
|
||||||
off_t m_offs; // Offset of next read in file if we're paging
|
off_t m_offs; // Offset of next read in file if we're paging
|
||||||
size_t m_pagesz;
|
size_t m_pagesz;
|
||||||
string m_charsetfromxattr;
|
std::string m_charsetfromxattr;
|
||||||
|
|
||||||
bool readnext();
|
bool readnext();
|
||||||
};
|
};
|
||||||
|
|||||||
@ -31,7 +31,6 @@ using namespace std;
|
|||||||
#include "rclconfig.h"
|
#include "rclconfig.h"
|
||||||
#include "smallut.h"
|
#include "smallut.h"
|
||||||
#include "md5ut.h"
|
#include "md5ut.h"
|
||||||
|
|
||||||
#include "mh_exec.h"
|
#include "mh_exec.h"
|
||||||
#include "mh_execm.h"
|
#include "mh_execm.h"
|
||||||
#include "mh_html.h"
|
#include "mh_html.h"
|
||||||
@ -40,6 +39,7 @@ using namespace std;
|
|||||||
#include "mh_text.h"
|
#include "mh_text.h"
|
||||||
#include "mh_symlink.h"
|
#include "mh_symlink.h"
|
||||||
#include "mh_unknown.h"
|
#include "mh_unknown.h"
|
||||||
|
#include "mh_null.h"
|
||||||
#include "ptmutex.h"
|
#include "ptmutex.h"
|
||||||
|
|
||||||
// Performance help: we use a pool of already known and created
|
// Performance help: we use a pool of already known and created
|
||||||
@ -163,6 +163,10 @@ static RecollFilter *mhFactory(RclConfig *config, const string &mime,
|
|||||||
LOGDEB2(("mhFactory(%s): ret MimeHandlerSymlink\n", mime.c_str()));
|
LOGDEB2(("mhFactory(%s): ret MimeHandlerSymlink\n", mime.c_str()));
|
||||||
MD5String("MimeHandlerSymlink", id);
|
MD5String("MimeHandlerSymlink", id);
|
||||||
return nobuild ? 0 : new MimeHandlerSymlink(config, id);
|
return nobuild ? 0 : new MimeHandlerSymlink(config, id);
|
||||||
|
} else if ("application/x-zerosize" == lmime) {
|
||||||
|
LOGDEB(("mhFactory(%s): ret MimeHandlerNull\n", mime.c_str()));
|
||||||
|
MD5String("MimeHandlerNull", id);
|
||||||
|
return nobuild ? 0 : new MimeHandlerNull(config, id);
|
||||||
} else if (lmime.find("text/") == 0) {
|
} else if (lmime.find("text/") == 0) {
|
||||||
// Try to handle unknown text/xx as text/plain. This
|
// Try to handle unknown text/xx as text/plain. This
|
||||||
// only happen if the text/xx was defined as "internal" in
|
// only happen if the text/xx was defined as "internal" in
|
||||||
@ -206,7 +210,7 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Split command name and args, and build exec object
|
// Split command name and args, and build exec object
|
||||||
list<string> cmdtoks;
|
vector<string> cmdtoks;
|
||||||
stringToStrings(cmdstr, cmdtoks);
|
stringToStrings(cmdstr, cmdtoks);
|
||||||
if (cmdtoks.empty()) {
|
if (cmdtoks.empty()) {
|
||||||
LOGERR(("mhExecFactory: bad config line for [%s]: [%s]\n",
|
LOGERR(("mhExecFactory: bad config line for [%s]: [%s]\n",
|
||||||
@ -216,7 +220,22 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs,
|
|||||||
MimeHandlerExec *h = multiple ?
|
MimeHandlerExec *h = multiple ?
|
||||||
new MimeHandlerExecMultiple(cfg, id) :
|
new MimeHandlerExecMultiple(cfg, id) :
|
||||||
new MimeHandlerExec(cfg, id);
|
new MimeHandlerExec(cfg, id);
|
||||||
list<string>::iterator it = cmdtoks.begin();
|
vector<string>::iterator it = cmdtoks.begin();
|
||||||
|
|
||||||
|
// Special-case python and perl on windows: we need to also locate the
|
||||||
|
// first argument which is the script name "python somescript.py".
|
||||||
|
// On Unix, thanks to #!, we usually just run "somescript.py", but need
|
||||||
|
// the same change if we ever want to use the same cmdling as windows
|
||||||
|
if (!stringlowercmp("python", *it) || !stringlowercmp("perl", *it)) {
|
||||||
|
if (cmdtoks.size() < 2) {
|
||||||
|
LOGERR(("mhExecFactory: python/perl cmd: no script?. [%s]: [%s]\n",
|
||||||
|
mtype.c_str(), hs.c_str()));
|
||||||
|
}
|
||||||
|
vector<string>::iterator it1(it);
|
||||||
|
it1++;
|
||||||
|
*it1 = cfg->findFilter(*it1);
|
||||||
|
}
|
||||||
|
|
||||||
h->params.push_back(cfg->findFilter(*it++));
|
h->params.push_back(cfg->findFilter(*it++));
|
||||||
h->params.insert(h->params.end(), it, cmdtoks.end());
|
h->params.insert(h->params.end(), it, cmdtoks.end());
|
||||||
|
|
||||||
|
|||||||
@ -86,12 +86,12 @@ public:
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
virtual bool set_document_data(const std::string& mtype,
|
virtual bool set_document_data(const std::string& mtype,
|
||||||
const char *cp, unsigned int sz)
|
const char *cp, size_t sz)
|
||||||
{
|
{
|
||||||
return set_document_string(mtype, std::string(cp, sz));
|
return set_document_string(mtype, std::string(cp, sz));
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void set_docsize(size_t size)
|
virtual void set_docsize(off_t size)
|
||||||
{
|
{
|
||||||
char csize[30];
|
char csize[30];
|
||||||
sprintf(csize, "%lld", (long long)size);
|
sprintf(csize, "%lld", (long long)size);
|
||||||
|
|||||||
@ -23,6 +23,10 @@
|
|||||||
* -----END-LICENCE-----
|
* -----END-LICENCE-----
|
||||||
*/
|
*/
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
#ifdef _WIN32
|
||||||
|
// Local implementation in windows directory
|
||||||
|
#include "strptime.h"
|
||||||
|
#endif
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
|||||||
@ -108,8 +108,12 @@ bool Uncomp::uncompressfile(const string& ifn,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Execute command and retrieve output file name, check that it exists
|
// Execute command and retrieve output file name, check that it exists
|
||||||
|
#ifndef _WIN32
|
||||||
ExecCmd ex;
|
ExecCmd ex;
|
||||||
int status = ex.doexec(cmd, args, 0, &tfile);
|
int status = ex.doexec(cmd, args, 0, &tfile);
|
||||||
|
#else
|
||||||
|
int status = -1;
|
||||||
|
#endif
|
||||||
if (status || tfile.empty()) {
|
if (status || tfile.empty()) {
|
||||||
LOGERR(("uncompressfile: doexec: failed for [%s] status 0x%x\n",
|
LOGERR(("uncompressfile: doexec: failed for [%s] status 0x%x\n",
|
||||||
ifn.c_str(), status));
|
ifn.c_str(), status));
|
||||||
|
|||||||
@ -14,7 +14,6 @@
|
|||||||
* Free Software Foundation, Inc.,
|
* Free Software Foundation, Inc.,
|
||||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
*/
|
*/
|
||||||
#include <unistd.h>
|
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
|
|||||||
@ -14,11 +14,8 @@
|
|||||||
* Free Software Foundation, Inc.,
|
* Free Software Foundation, Inc.,
|
||||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
#include "autoconfig.h"
|
#include "autoconfig.h"
|
||||||
|
|
||||||
#include <unistd.h>
|
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
|
|
||||||
#include <qapplication.h>
|
#include <qapplication.h>
|
||||||
@ -164,14 +161,6 @@ static void recollCleanup()
|
|||||||
LOGDEB2(("recollCleanup: done\n"));
|
LOGDEB2(("recollCleanup: done\n"));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void sigcleanup(int)
|
|
||||||
{
|
|
||||||
// We used to not call exit from here, because of the idxthread, but
|
|
||||||
// this is now gone, so...
|
|
||||||
recollNeedsExit = 1;
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
void applyStyleSheet(const QString& ssfname)
|
void applyStyleSheet(const QString& ssfname)
|
||||||
{
|
{
|
||||||
const char *cfname = (const char *)ssfname.toLocal8Bit();
|
const char *cfname = (const char *)ssfname.toLocal8Bit();
|
||||||
@ -305,7 +294,7 @@ int main(int argc, char **argv)
|
|||||||
|
|
||||||
|
|
||||||
string reason;
|
string reason;
|
||||||
theconfig = recollinit(recollCleanup, sigcleanup, reason, &a_config);
|
theconfig = recollinit(recollCleanup, 0, reason, &a_config);
|
||||||
if (!theconfig || !theconfig->ok()) {
|
if (!theconfig || !theconfig->ok()) {
|
||||||
QString msg = "Configuration problem: ";
|
QString msg = "Configuration problem: ";
|
||||||
msg += QString::fromUtf8(reason.c_str());
|
msg += QString::fromUtf8(reason.c_str());
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@ -16,6 +16,8 @@
|
|||||||
*/
|
*/
|
||||||
#include "autoconfig.h"
|
#include "autoconfig.h"
|
||||||
|
|
||||||
|
#include <signal.h>
|
||||||
|
|
||||||
#include <QMessageBox>
|
#include <QMessageBox>
|
||||||
#include <QTimer>
|
#include <QTimer>
|
||||||
|
|
||||||
|
|||||||
@ -310,7 +310,7 @@ void RclMain::startNativeViewer(Rcl::Doc doc, int pagenum, QString term)
|
|||||||
istempfile = true;
|
istempfile = true;
|
||||||
rememberTempFile(temp);
|
rememberTempFile(temp);
|
||||||
fn = temp->filename();
|
fn = temp->filename();
|
||||||
url = string("file://") + fn;
|
url = path_pathtofileurl(fn);
|
||||||
}
|
}
|
||||||
|
|
||||||
// If using an actual file, check that it exists, and if it is
|
// If using an actual file, check that it exists, and if it is
|
||||||
@ -335,7 +335,7 @@ void RclMain::startNativeViewer(Rcl::Doc doc, int pagenum, QString term)
|
|||||||
if (temp) {
|
if (temp) {
|
||||||
rememberTempFile(temp);
|
rememberTempFile(temp);
|
||||||
fn = temp->filename();
|
fn = temp->filename();
|
||||||
url = string("file://") + fn;
|
url = path_pathtofileurl(fn);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -430,16 +430,16 @@ void RclMain::startManual()
|
|||||||
void RclMain::startManual(const string& index)
|
void RclMain::startManual(const string& index)
|
||||||
{
|
{
|
||||||
Rcl::Doc doc;
|
Rcl::Doc doc;
|
||||||
doc.url = "file://";
|
string path = theconfig->getDatadir();
|
||||||
doc.url = path_cat(doc.url, theconfig->getDatadir());
|
path = path_cat(path, "doc");
|
||||||
doc.url = path_cat(doc.url, "doc");
|
path = path_cat(path, "usermanual.html");
|
||||||
doc.url = path_cat(doc.url, "usermanual.html");
|
|
||||||
LOGDEB(("RclMain::startManual: help index is %s\n",
|
LOGDEB(("RclMain::startManual: help index is %s\n",
|
||||||
index.empty()?"(null)":index.c_str()));
|
index.empty()?"(null)":index.c_str()));
|
||||||
if (!index.empty()) {
|
if (!index.empty()) {
|
||||||
doc.url += "#";
|
path += "#";
|
||||||
doc.url += index;
|
path += index;
|
||||||
}
|
}
|
||||||
|
doc.url = path_pathtofileurl(path);
|
||||||
doc.mimetype = "text/html";
|
doc.mimetype = "text/html";
|
||||||
startNativeViewer(doc);
|
startNativeViewer(doc);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -16,9 +16,6 @@
|
|||||||
*/
|
*/
|
||||||
#include "autoconfig.h"
|
#include "autoconfig.h"
|
||||||
|
|
||||||
#include <fcntl.h>
|
|
||||||
#include "safeunistd.h"
|
|
||||||
|
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include MEMORY_INCLUDE
|
#include MEMORY_INCLUDE
|
||||||
|
|
||||||
@ -138,10 +135,14 @@ void RclMain::init()
|
|||||||
|
|
||||||
// idxstatus file. Make sure it exists before trying to watch it
|
// idxstatus file. Make sure it exists before trying to watch it
|
||||||
// (case where we're started on an older index, or if the status
|
// (case where we're started on an older index, or if the status
|
||||||
// file was deleted since indexing
|
// file was deleted since indexing)
|
||||||
::close(::open(theconfig->getIdxStatusFile().c_str(), O_CREAT, 0600));
|
QString idxfn =
|
||||||
m_watcher.addPath(QString::fromLocal8Bit(
|
QString::fromLocal8Bit(theconfig->getIdxStatusFile().c_str());
|
||||||
theconfig->getIdxStatusFile().c_str()));
|
QFile qf(idxfn);
|
||||||
|
qf.open(QIODevice::ReadWrite);
|
||||||
|
qf.setPermissions(QFile::ReadOwner|QFile::WriteOwner);
|
||||||
|
qf.close();
|
||||||
|
m_watcher.addPath(idxfn);
|
||||||
// At least some versions of qt4 don't display the status bar if
|
// At least some versions of qt4 don't display the status bar if
|
||||||
// it's not created here.
|
// it's not created here.
|
||||||
(void)statusBar();
|
(void)statusBar();
|
||||||
@ -728,7 +729,7 @@ void RclMain::initiateQuery()
|
|||||||
qApp->processEvents();
|
qApp->processEvents();
|
||||||
if (progress.wasCanceled()) {
|
if (progress.wasCanceled()) {
|
||||||
// Just get out of there asap.
|
// Just get out of there asap.
|
||||||
_exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
qApp->processEvents();
|
qApp->processEvents();
|
||||||
|
|||||||
@ -19,6 +19,7 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include "safesysstat.h"
|
#include "safesysstat.h"
|
||||||
#include "safeunistd.h"
|
#include "safeunistd.h"
|
||||||
|
#include <signal.h>
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
|||||||
@ -16,7 +16,6 @@
|
|||||||
*/
|
*/
|
||||||
#include "autoconfig.h"
|
#include "autoconfig.h"
|
||||||
|
|
||||||
#include <unistd.h>
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|||||||
@ -17,7 +17,6 @@
|
|||||||
#include "autoconfig.h"
|
#include "autoconfig.h"
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <unistd.h>
|
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <list>
|
#include <list>
|
||||||
|
|||||||
@ -14,6 +14,8 @@
|
|||||||
* Free Software Foundation, Inc.,
|
* Free Software Foundation, Inc.,
|
||||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
*/
|
*/
|
||||||
|
#include "autoconfig.h"
|
||||||
|
|
||||||
#include "docseq.h"
|
#include "docseq.h"
|
||||||
#include "filtseq.h"
|
#include "filtseq.h"
|
||||||
#include "sortseq.h"
|
#include "sortseq.h"
|
||||||
|
|||||||
@ -14,17 +14,20 @@
|
|||||||
* Free Software Foundation, Inc.,
|
* Free Software Foundation, Inc.,
|
||||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
*/
|
*/
|
||||||
|
#include "autoconfig.h"
|
||||||
|
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
|
||||||
#include <list>
|
#include <list>
|
||||||
using std::list;
|
|
||||||
|
|
||||||
#include "docseqdb.h"
|
#include "docseqdb.h"
|
||||||
#include "rcldb.h"
|
#include "rcldb.h"
|
||||||
#include "debuglog.h"
|
#include "debuglog.h"
|
||||||
#include "wasatorcl.h"
|
#include "wasatorcl.h"
|
||||||
|
|
||||||
|
using std::list;
|
||||||
|
|
||||||
DocSequenceDb::DocSequenceDb(STD_SHARED_PTR<Rcl::Query> q, const string &t,
|
DocSequenceDb::DocSequenceDb(STD_SHARED_PTR<Rcl::Query> q, const string &t,
|
||||||
STD_SHARED_PTR<Rcl::SearchData> sdata)
|
STD_SHARED_PTR<Rcl::SearchData> sdata)
|
||||||
: DocSequence(t), m_q(q), m_sdata(sdata), m_fsdata(sdata),
|
: DocSequence(t), m_q(q), m_sdata(sdata), m_fsdata(sdata),
|
||||||
|
|||||||
@ -35,7 +35,7 @@ using std::list;
|
|||||||
bool RclDHistoryEntry::encode(string& value)
|
bool RclDHistoryEntry::encode(string& value)
|
||||||
{
|
{
|
||||||
char chartime[30];
|
char chartime[30];
|
||||||
sprintf(chartime,"%ld", unixtime);
|
sprintf(chartime,"%lld", (long long)unixtime);
|
||||||
string budi;
|
string budi;
|
||||||
base64_encode(udi, budi);
|
base64_encode(udi, budi);
|
||||||
value = string("U ") + string(chartime) + " " + budi;
|
value = string("U ") + string(chartime) + " " + budi;
|
||||||
@ -161,5 +161,5 @@ int DocSequenceHistory::getResCnt()
|
|||||||
{
|
{
|
||||||
if (m_hlist.empty())
|
if (m_hlist.empty())
|
||||||
m_hlist = getDocHistory(m_hist);
|
m_hlist = getDocHistory(m_hist);
|
||||||
return m_hlist.size();
|
return int(m_hlist.size());
|
||||||
}
|
}
|
||||||
|
|||||||
@ -16,6 +16,7 @@
|
|||||||
*/
|
*/
|
||||||
#ifndef _DOCSEQHIST_H_INCLUDED_
|
#ifndef _DOCSEQHIST_H_INCLUDED_
|
||||||
#define _DOCSEQHIST_H_INCLUDED_
|
#define _DOCSEQHIST_H_INCLUDED_
|
||||||
|
#include <time.h>
|
||||||
|
|
||||||
#include "docseq.h"
|
#include "docseq.h"
|
||||||
#include "dynconf.h"
|
#include "dynconf.h"
|
||||||
@ -28,13 +29,13 @@ namespace Rcl {
|
|||||||
class RclDHistoryEntry : public DynConfEntry {
|
class RclDHistoryEntry : public DynConfEntry {
|
||||||
public:
|
public:
|
||||||
RclDHistoryEntry() : unixtime(0) {}
|
RclDHistoryEntry() : unixtime(0) {}
|
||||||
RclDHistoryEntry(long t, const string& u)
|
RclDHistoryEntry(time_t t, const string& u)
|
||||||
: unixtime(t), udi(u) {}
|
: unixtime(t), udi(u) {}
|
||||||
virtual ~RclDHistoryEntry() {}
|
virtual ~RclDHistoryEntry() {}
|
||||||
virtual bool decode(const string &value);
|
virtual bool decode(const string &value);
|
||||||
virtual bool encode(string& value);
|
virtual bool encode(string& value);
|
||||||
virtual bool equal(const DynConfEntry& other);
|
virtual bool equal(const DynConfEntry& other);
|
||||||
long unixtime;
|
time_t unixtime;
|
||||||
string udi;
|
string udi;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -57,7 +58,7 @@ private:
|
|||||||
Rcl::Db *m_db;
|
Rcl::Db *m_db;
|
||||||
RclDynConf *m_hist;
|
RclDynConf *m_hist;
|
||||||
int m_prevnum;
|
int m_prevnum;
|
||||||
long m_prevtime;
|
time_t m_prevtime;
|
||||||
std::string m_description; // This is just an nls translated 'doc history'
|
std::string m_description; // This is just an nls translated 'doc history'
|
||||||
std::list<RclDHistoryEntry> m_hlist;
|
std::list<RclDHistoryEntry> m_hlist;
|
||||||
std::list<RclDHistoryEntry>::const_iterator m_it;
|
std::list<RclDHistoryEntry>::const_iterator m_it;
|
||||||
|
|||||||
187
src/query/location.hh
Normal file
187
src/query/location.hh
Normal file
@ -0,0 +1,187 @@
|
|||||||
|
// A Bison parser, made by GNU Bison 3.0.2.
|
||||||
|
|
||||||
|
// Locations for Bison parsers in C++
|
||||||
|
|
||||||
|
// Copyright (C) 2002-2013 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU General Public License as published by
|
||||||
|
// the Free Software Foundation, either version 3 of the License, or
|
||||||
|
// (at your option) any later version.
|
||||||
|
|
||||||
|
// This program is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
|
||||||
|
// You should have received a copy of the GNU General Public License
|
||||||
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
// As a special exception, you may create a larger work that contains
|
||||||
|
// part or all of the Bison parser skeleton and distribute that work
|
||||||
|
// under terms of your choice, so long as that work isn't itself a
|
||||||
|
// parser generator using the skeleton or a modified version thereof
|
||||||
|
// as a parser skeleton. Alternatively, if you modify or redistribute
|
||||||
|
// the parser skeleton itself, you may (at your option) remove this
|
||||||
|
// special exception, which will cause the skeleton and the resulting
|
||||||
|
// Bison output files to be licensed under the GNU General Public
|
||||||
|
// License without this special exception.
|
||||||
|
|
||||||
|
// This special exception was added by the Free Software Foundation in
|
||||||
|
// version 2.2 of Bison.
|
||||||
|
|
||||||
|
/**
|
||||||
|
** \file location.hh
|
||||||
|
** Define the yy::location class.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef YY_YY_LOCATION_HH_INCLUDED
|
||||||
|
# define YY_YY_LOCATION_HH_INCLUDED
|
||||||
|
|
||||||
|
# include "position.hh"
|
||||||
|
|
||||||
|
|
||||||
|
namespace yy {
|
||||||
|
#line 46 "location.hh" // location.cc:291
|
||||||
|
/// Abstract a location.
|
||||||
|
class location
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
|
||||||
|
/// Construct a location from \a b to \a e.
|
||||||
|
location (const position& b, const position& e)
|
||||||
|
: begin (b)
|
||||||
|
, end (e)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Construct a 0-width location in \a p.
|
||||||
|
explicit location (const position& p = position ())
|
||||||
|
: begin (p)
|
||||||
|
, end (p)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Construct a 0-width location in \a f, \a l, \a c.
|
||||||
|
explicit location (std::string* f,
|
||||||
|
unsigned int l = 1u,
|
||||||
|
unsigned int c = 1u)
|
||||||
|
: begin (f, l, c)
|
||||||
|
, end (f, l, c)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// Initialization.
|
||||||
|
void initialize (std::string* f = YY_NULLPTR,
|
||||||
|
unsigned int l = 1u,
|
||||||
|
unsigned int c = 1u)
|
||||||
|
{
|
||||||
|
begin.initialize (f, l, c);
|
||||||
|
end = begin;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \name Line and Column related manipulators
|
||||||
|
** \{ */
|
||||||
|
public:
|
||||||
|
/// Reset initial location to final location.
|
||||||
|
void step ()
|
||||||
|
{
|
||||||
|
begin = end;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Extend the current location to the COUNT next columns.
|
||||||
|
void columns (int count = 1)
|
||||||
|
{
|
||||||
|
end += count;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Extend the current location to the COUNT next lines.
|
||||||
|
void lines (int count = 1)
|
||||||
|
{
|
||||||
|
end.lines (count);
|
||||||
|
}
|
||||||
|
/** \} */
|
||||||
|
|
||||||
|
|
||||||
|
public:
|
||||||
|
/// Beginning of the located region.
|
||||||
|
position begin;
|
||||||
|
/// End of the located region.
|
||||||
|
position end;
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Join two location objects to create a location.
|
||||||
|
inline location operator+ (location res, const location& end)
|
||||||
|
{
|
||||||
|
res.end = end.end;
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Change end position in place.
|
||||||
|
inline location& operator+= (location& res, int width)
|
||||||
|
{
|
||||||
|
res.columns (width);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Change end position.
|
||||||
|
inline location operator+ (location res, int width)
|
||||||
|
{
|
||||||
|
return res += width;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Change end position in place.
|
||||||
|
inline location& operator-= (location& res, int width)
|
||||||
|
{
|
||||||
|
return res += -width;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Change end position.
|
||||||
|
inline location operator- (const location& begin, int width)
|
||||||
|
{
|
||||||
|
return begin + -width;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Compare two location objects.
|
||||||
|
inline bool
|
||||||
|
operator== (const location& loc1, const location& loc2)
|
||||||
|
{
|
||||||
|
return loc1.begin == loc2.begin && loc1.end == loc2.end;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Compare two location objects.
|
||||||
|
inline bool
|
||||||
|
operator!= (const location& loc1, const location& loc2)
|
||||||
|
{
|
||||||
|
return !(loc1 == loc2);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \brief Intercept output stream redirection.
|
||||||
|
** \param ostr the destination output stream
|
||||||
|
** \param loc a reference to the location to redirect
|
||||||
|
**
|
||||||
|
** Avoid duplicate information.
|
||||||
|
*/
|
||||||
|
template <typename YYChar>
|
||||||
|
inline std::basic_ostream<YYChar>&
|
||||||
|
operator<< (std::basic_ostream<YYChar>& ostr, const location& loc)
|
||||||
|
{
|
||||||
|
unsigned int end_col = 0 < loc.end.column ? loc.end.column - 1 : 0;
|
||||||
|
ostr << loc.begin// << "(" << loc.end << ") "
|
||||||
|
;
|
||||||
|
if (loc.end.filename
|
||||||
|
&& (!loc.begin.filename
|
||||||
|
|| *loc.begin.filename != *loc.end.filename))
|
||||||
|
ostr << '-' << loc.end.filename << ':' << loc.end.line << '.' << end_col;
|
||||||
|
else if (loc.begin.line < loc.end.line)
|
||||||
|
ostr << '-' << loc.end.line << '.' << end_col;
|
||||||
|
else if (loc.begin.column < end_col)
|
||||||
|
ostr << '-' << end_col;
|
||||||
|
return ostr;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
} // yy
|
||||||
|
#line 187 "location.hh" // location.cc:291
|
||||||
|
#endif // !YY_YY_LOCATION_HH_INCLUDED
|
||||||
@ -15,7 +15,7 @@
|
|||||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <limits.h>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <list>
|
#include <list>
|
||||||
@ -54,8 +54,8 @@ struct MatchEntry {
|
|||||||
pair<int, int> offs;
|
pair<int, int> offs;
|
||||||
// Index of the search group this comes from: this is to relate a
|
// Index of the search group this comes from: this is to relate a
|
||||||
// match to the original user input.
|
// match to the original user input.
|
||||||
unsigned int grpidx;
|
size_t grpidx;
|
||||||
MatchEntry(int sta, int sto, unsigned int idx)
|
MatchEntry(int sta, int sto, size_t idx)
|
||||||
: offs(sta, sto), grpidx(idx)
|
: offs(sta, sto), grpidx(idx)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
@ -105,7 +105,7 @@ class TextSplitPTR : public TextSplit {
|
|||||||
// pos, bts, bte));
|
// pos, bts, bte));
|
||||||
|
|
||||||
// If this word is a search term, remember its byte-offset span.
|
// If this word is a search term, remember its byte-offset span.
|
||||||
map<string, unsigned int>::const_iterator it = m_terms.find(dumb);
|
map<string, size_t>::const_iterator it = m_terms.find(dumb);
|
||||||
if (it != m_terms.end()) {
|
if (it != m_terms.end()) {
|
||||||
tboffs.push_back(MatchEntry(bts, bte, (*it).second));
|
tboffs.push_back(MatchEntry(bts, bte, (*it).second));
|
||||||
}
|
}
|
||||||
@ -135,7 +135,7 @@ private:
|
|||||||
int m_wcount;
|
int m_wcount;
|
||||||
|
|
||||||
// In: user query terms
|
// In: user query terms
|
||||||
map<string, unsigned int> m_terms;
|
map<string, size_t> m_terms;
|
||||||
|
|
||||||
// m_gterms holds all the terms in m_groups, as a set for quick lookup
|
// m_gterms holds all the terms in m_groups, as a set for quick lookup
|
||||||
set<string> m_gterms;
|
set<string> m_gterms;
|
||||||
@ -214,7 +214,7 @@ static bool do_proximity_test(int window, vector<vector<int>* >& plists,
|
|||||||
bool TextSplitPTR::matchGroup(unsigned int grpidx)
|
bool TextSplitPTR::matchGroup(unsigned int grpidx)
|
||||||
{
|
{
|
||||||
const vector<string>& terms = m_hdata.groups[grpidx];
|
const vector<string>& terms = m_hdata.groups[grpidx];
|
||||||
int window = m_hdata.groups[grpidx].size() + m_hdata.slacks[grpidx];
|
int window = int(m_hdata.groups[grpidx].size() + m_hdata.slacks[grpidx]);
|
||||||
|
|
||||||
LOGDEB1(("TextSplitPTR::matchGroup:d %d: %s\n", window,
|
LOGDEB1(("TextSplitPTR::matchGroup:d %d: %s\n", window,
|
||||||
vecStringToString(terms).c_str()));
|
vecStringToString(terms).c_str()));
|
||||||
@ -270,7 +270,7 @@ bool TextSplitPTR::matchGroup(unsigned int grpidx)
|
|||||||
for (vector<int>::iterator it = plists[0]->begin();
|
for (vector<int>::iterator it = plists[0]->begin();
|
||||||
it != plists[0]->end(); it++) {
|
it != plists[0]->end(); it++) {
|
||||||
int pos = *it;
|
int pos = *it;
|
||||||
int sta = int(10E9), sto = 0;
|
int sta = INT_MAX, sto = 0;
|
||||||
LOGDEB2(("MatchGroup: Testing at pos %d\n", pos));
|
LOGDEB2(("MatchGroup: Testing at pos %d\n", pos));
|
||||||
if (do_proximity_test(window,plists, 1, pos, pos, &sta, &sto, minpos)) {
|
if (do_proximity_test(window,plists, 1, pos, pos, &sta, &sto, minpos)) {
|
||||||
LOGDEB1(("TextSplitPTR::matchGroup: MATCH termpos [%d,%d]\n",
|
LOGDEB1(("TextSplitPTR::matchGroup: MATCH termpos [%d,%d]\n",
|
||||||
@ -417,10 +417,10 @@ bool PlainToRich::plaintorich(const string& in,
|
|||||||
// If we still have terms positions, check (byte) position. If
|
// If we still have terms positions, check (byte) position. If
|
||||||
// we are at or after a term match, mark.
|
// we are at or after a term match, mark.
|
||||||
if (tPosIt != tPosEnd) {
|
if (tPosIt != tPosEnd) {
|
||||||
int ibyteidx = chariter.getBpos();
|
int ibyteidx = int(chariter.getBpos());
|
||||||
if (ibyteidx == tPosIt->offs.first) {
|
if (ibyteidx == tPosIt->offs.first) {
|
||||||
if (!intag && ibyteidx >= (int)headend) {
|
if (!intag && ibyteidx >= (int)headend) {
|
||||||
*olit += startMatch(tPosIt->grpidx);
|
*olit += startMatch((unsigned int)(tPosIt->grpidx));
|
||||||
}
|
}
|
||||||
inrcltag = 1;
|
inrcltag = 1;
|
||||||
} else if (ibyteidx == tPosIt->offs.second) {
|
} else if (ibyteidx == tPosIt->offs.second) {
|
||||||
|
|||||||
180
src/query/position.hh
Normal file
180
src/query/position.hh
Normal file
@ -0,0 +1,180 @@
|
|||||||
|
// A Bison parser, made by GNU Bison 3.0.2.
|
||||||
|
|
||||||
|
// Positions for Bison parsers in C++
|
||||||
|
|
||||||
|
// Copyright (C) 2002-2013 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU General Public License as published by
|
||||||
|
// the Free Software Foundation, either version 3 of the License, or
|
||||||
|
// (at your option) any later version.
|
||||||
|
|
||||||
|
// This program is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
|
||||||
|
// You should have received a copy of the GNU General Public License
|
||||||
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
// As a special exception, you may create a larger work that contains
|
||||||
|
// part or all of the Bison parser skeleton and distribute that work
|
||||||
|
// under terms of your choice, so long as that work isn't itself a
|
||||||
|
// parser generator using the skeleton or a modified version thereof
|
||||||
|
// as a parser skeleton. Alternatively, if you modify or redistribute
|
||||||
|
// the parser skeleton itself, you may (at your option) remove this
|
||||||
|
// special exception, which will cause the skeleton and the resulting
|
||||||
|
// Bison output files to be licensed under the GNU General Public
|
||||||
|
// License without this special exception.
|
||||||
|
|
||||||
|
// This special exception was added by the Free Software Foundation in
|
||||||
|
// version 2.2 of Bison.
|
||||||
|
|
||||||
|
/**
|
||||||
|
** \file position.hh
|
||||||
|
** Define the yy::position class.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef YY_YY_POSITION_HH_INCLUDED
|
||||||
|
# define YY_YY_POSITION_HH_INCLUDED
|
||||||
|
|
||||||
|
# include <algorithm> // std::max
|
||||||
|
# include <iostream>
|
||||||
|
# include <string>
|
||||||
|
|
||||||
|
# ifndef YY_NULLPTR
|
||||||
|
# if defined __cplusplus && 201103L <= __cplusplus
|
||||||
|
# define YY_NULLPTR nullptr
|
||||||
|
# else
|
||||||
|
# define YY_NULLPTR 0
|
||||||
|
# endif
|
||||||
|
# endif
|
||||||
|
|
||||||
|
|
||||||
|
namespace yy {
|
||||||
|
#line 56 "position.hh" // location.cc:291
|
||||||
|
/// Abstract a position.
|
||||||
|
class position
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
/// Construct a position.
|
||||||
|
explicit position (std::string* f = YY_NULLPTR,
|
||||||
|
unsigned int l = 1u,
|
||||||
|
unsigned int c = 1u)
|
||||||
|
: filename (f)
|
||||||
|
, line (l)
|
||||||
|
, column (c)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// Initialization.
|
||||||
|
void initialize (std::string* fn = YY_NULLPTR,
|
||||||
|
unsigned int l = 1u,
|
||||||
|
unsigned int c = 1u)
|
||||||
|
{
|
||||||
|
filename = fn;
|
||||||
|
line = l;
|
||||||
|
column = c;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \name Line and Column related manipulators
|
||||||
|
** \{ */
|
||||||
|
/// (line related) Advance to the COUNT next lines.
|
||||||
|
void lines (int count = 1)
|
||||||
|
{
|
||||||
|
if (count)
|
||||||
|
{
|
||||||
|
column = 1u;
|
||||||
|
line = add_ (line, count, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// (column related) Advance to the COUNT next columns.
|
||||||
|
void columns (int count = 1)
|
||||||
|
{
|
||||||
|
column = add_ (column, count, 1);
|
||||||
|
}
|
||||||
|
/** \} */
|
||||||
|
|
||||||
|
/// File name to which this position refers.
|
||||||
|
std::string* filename;
|
||||||
|
/// Current line number.
|
||||||
|
unsigned int line;
|
||||||
|
/// Current column number.
|
||||||
|
unsigned int column;
|
||||||
|
|
||||||
|
private:
|
||||||
|
/// Compute max(min, lhs+rhs) (provided min <= lhs).
|
||||||
|
static unsigned int add_ (unsigned int lhs, int rhs, unsigned int min)
|
||||||
|
{
|
||||||
|
return (0 < rhs || -static_cast<unsigned int>(rhs) < lhs
|
||||||
|
? rhs + lhs
|
||||||
|
: min);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Add and assign a position.
|
||||||
|
inline position&
|
||||||
|
operator+= (position& res, int width)
|
||||||
|
{
|
||||||
|
res.columns (width);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add two position objects.
|
||||||
|
inline position
|
||||||
|
operator+ (position res, int width)
|
||||||
|
{
|
||||||
|
return res += width;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add and assign a position.
|
||||||
|
inline position&
|
||||||
|
operator-= (position& res, int width)
|
||||||
|
{
|
||||||
|
return res += -width;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add two position objects.
|
||||||
|
inline position
|
||||||
|
operator- (position res, int width)
|
||||||
|
{
|
||||||
|
return res -= width;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Compare two position objects.
|
||||||
|
inline bool
|
||||||
|
operator== (const position& pos1, const position& pos2)
|
||||||
|
{
|
||||||
|
return (pos1.line == pos2.line
|
||||||
|
&& pos1.column == pos2.column
|
||||||
|
&& (pos1.filename == pos2.filename
|
||||||
|
|| (pos1.filename && pos2.filename
|
||||||
|
&& *pos1.filename == *pos2.filename)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Compare two position objects.
|
||||||
|
inline bool
|
||||||
|
operator!= (const position& pos1, const position& pos2)
|
||||||
|
{
|
||||||
|
return !(pos1 == pos2);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \brief Intercept output stream redirection.
|
||||||
|
** \param ostr the destination output stream
|
||||||
|
** \param pos a reference to the position to redirect
|
||||||
|
*/
|
||||||
|
template <typename YYChar>
|
||||||
|
inline std::basic_ostream<YYChar>&
|
||||||
|
operator<< (std::basic_ostream<YYChar>& ostr, const position& pos)
|
||||||
|
{
|
||||||
|
if (pos.filename)
|
||||||
|
ostr << *pos.filename << ':';
|
||||||
|
return ostr << pos.line << '.' << pos.column;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
} // yy
|
||||||
|
#line 180 "position.hh" // location.cc:291
|
||||||
|
#endif // !YY_YY_POSITION_HH_INCLUDED
|
||||||
@ -77,7 +77,7 @@ void ResListPager::resultPageNext()
|
|||||||
if (m_winfirst < 0) {
|
if (m_winfirst < 0) {
|
||||||
m_winfirst = 0;
|
m_winfirst = 0;
|
||||||
} else {
|
} else {
|
||||||
m_winfirst += m_respage.size();
|
m_winfirst += int(m_respage.size());
|
||||||
}
|
}
|
||||||
// Get the next page of results. Note that we look ahead by one to
|
// Get the next page of results. Note that we look ahead by one to
|
||||||
// determine if there is actually a next page
|
// determine if there is actually a next page
|
||||||
@ -102,7 +102,7 @@ void ResListPager::resultPageNext()
|
|||||||
// Next button. We'd need to remove the Next link from the page
|
// Next button. We'd need to remove the Next link from the page
|
||||||
// too.
|
// too.
|
||||||
// Restore the m_winfirst value, let the current result vector alone
|
// Restore the m_winfirst value, let the current result vector alone
|
||||||
m_winfirst -= m_respage.size();
|
m_winfirst -= int(m_respage.size());
|
||||||
} else {
|
} else {
|
||||||
// No results at all (on first page)
|
// No results at all (on first page)
|
||||||
m_winfirst = -1;
|
m_winfirst = -1;
|
||||||
@ -213,9 +213,9 @@ void ResListPager::displayDoc(RclConfig *config, int i, Rcl::Doc& doc,
|
|||||||
// Size information. We print both doc and file if they differ a lot
|
// Size information. We print both doc and file if they differ a lot
|
||||||
off_t fsize = -1, dsize = -1;
|
off_t fsize = -1, dsize = -1;
|
||||||
if (!doc.dbytes.empty())
|
if (!doc.dbytes.empty())
|
||||||
dsize = atoll(doc.dbytes.c_str());
|
dsize = static_cast<off_t>(atoll(doc.dbytes.c_str()));
|
||||||
if (!doc.fbytes.empty())
|
if (!doc.fbytes.empty())
|
||||||
fsize = atoll(doc.fbytes.c_str());
|
fsize = static_cast<off_t>(atoll(doc.fbytes.c_str()));
|
||||||
string sizebuf;
|
string sizebuf;
|
||||||
if (dsize > 0) {
|
if (dsize > 0) {
|
||||||
sizebuf = displayableBytes(dsize);
|
sizebuf = displayableBytes(dsize);
|
||||||
|
|||||||
@ -64,7 +64,7 @@ public:
|
|||||||
int pageLastDocNum() {
|
int pageLastDocNum() {
|
||||||
if (m_winfirst < 0 || m_respage.size() == 0)
|
if (m_winfirst < 0 || m_respage.size() == 0)
|
||||||
return -1;
|
return -1;
|
||||||
return m_winfirst + m_respage.size() - 1;
|
return m_winfirst + int(m_respage.size()) - 1;
|
||||||
}
|
}
|
||||||
virtual int pageSize() const {return m_pagesize;}
|
virtual int pageSize() const {return m_pagesize;}
|
||||||
void pageNext();
|
void pageNext();
|
||||||
|
|||||||
@ -39,7 +39,7 @@ class DocSeqSorted : public DocSeqModifier {
|
|||||||
virtual bool canSort() {return true;}
|
virtual bool canSort() {return true;}
|
||||||
virtual bool setSortSpec(const DocSeqSortSpec &sortspec);
|
virtual bool setSortSpec(const DocSeqSortSpec &sortspec);
|
||||||
virtual bool getDoc(int num, Rcl::Doc &doc, string *sh = 0);
|
virtual bool getDoc(int num, Rcl::Doc &doc, string *sh = 0);
|
||||||
virtual int getResCnt() {return m_docsp.size();}
|
virtual int getResCnt() {return int(m_docsp.size());}
|
||||||
private:
|
private:
|
||||||
DocSeqSortSpec m_spec;
|
DocSeqSortSpec m_spec;
|
||||||
std::vector<Rcl::Doc> m_docs;
|
std::vector<Rcl::Doc> m_docs;
|
||||||
|
|||||||
158
src/query/stack.hh
Normal file
158
src/query/stack.hh
Normal file
@ -0,0 +1,158 @@
|
|||||||
|
// A Bison parser, made by GNU Bison 3.0.2.
|
||||||
|
|
||||||
|
// Stack handling for Bison parsers in C++
|
||||||
|
|
||||||
|
// Copyright (C) 2002-2013 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU General Public License as published by
|
||||||
|
// the Free Software Foundation, either version 3 of the License, or
|
||||||
|
// (at your option) any later version.
|
||||||
|
|
||||||
|
// This program is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
|
||||||
|
// You should have received a copy of the GNU General Public License
|
||||||
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
// As a special exception, you may create a larger work that contains
|
||||||
|
// part or all of the Bison parser skeleton and distribute that work
|
||||||
|
// under terms of your choice, so long as that work isn't itself a
|
||||||
|
// parser generator using the skeleton or a modified version thereof
|
||||||
|
// as a parser skeleton. Alternatively, if you modify or redistribute
|
||||||
|
// the parser skeleton itself, you may (at your option) remove this
|
||||||
|
// special exception, which will cause the skeleton and the resulting
|
||||||
|
// Bison output files to be licensed under the GNU General Public
|
||||||
|
// License without this special exception.
|
||||||
|
|
||||||
|
// This special exception was added by the Free Software Foundation in
|
||||||
|
// version 2.2 of Bison.
|
||||||
|
|
||||||
|
/**
|
||||||
|
** \file stack.hh
|
||||||
|
** Define the yy::stack class.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef YY_YY_STACK_HH_INCLUDED
|
||||||
|
# define YY_YY_STACK_HH_INCLUDED
|
||||||
|
|
||||||
|
# include <vector>
|
||||||
|
|
||||||
|
|
||||||
|
namespace yy {
|
||||||
|
#line 46 "stack.hh" // stack.hh:133
|
||||||
|
template <class T, class S = std::vector<T> >
|
||||||
|
class stack
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
// Hide our reversed order.
|
||||||
|
typedef typename S::reverse_iterator iterator;
|
||||||
|
typedef typename S::const_reverse_iterator const_iterator;
|
||||||
|
|
||||||
|
stack ()
|
||||||
|
: seq_ ()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
stack (unsigned int n)
|
||||||
|
: seq_ (n)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
T&
|
||||||
|
operator[] (unsigned int i)
|
||||||
|
{
|
||||||
|
return seq_[seq_.size () - 1 - i];
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
const T&
|
||||||
|
operator[] (unsigned int i) const
|
||||||
|
{
|
||||||
|
return seq_[seq_.size () - 1 - i];
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Steal the contents of \a t.
|
||||||
|
///
|
||||||
|
/// Close to move-semantics.
|
||||||
|
inline
|
||||||
|
void
|
||||||
|
push (T& t)
|
||||||
|
{
|
||||||
|
seq_.push_back (T());
|
||||||
|
operator[](0).move (t);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
void
|
||||||
|
pop (unsigned int n = 1)
|
||||||
|
{
|
||||||
|
for (; n; --n)
|
||||||
|
seq_.pop_back ();
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
clear ()
|
||||||
|
{
|
||||||
|
seq_.clear ();
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
typename S::size_type
|
||||||
|
size () const
|
||||||
|
{
|
||||||
|
return seq_.size ();
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
const_iterator
|
||||||
|
begin () const
|
||||||
|
{
|
||||||
|
return seq_.rbegin ();
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
const_iterator
|
||||||
|
end () const
|
||||||
|
{
|
||||||
|
return seq_.rend ();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
stack (const stack&);
|
||||||
|
stack& operator= (const stack&);
|
||||||
|
/// The wrapped container.
|
||||||
|
S seq_;
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Present a slice of the top of a stack.
|
||||||
|
template <class T, class S = stack<T> >
|
||||||
|
class slice
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
slice (const S& stack, unsigned int range)
|
||||||
|
: stack_ (stack)
|
||||||
|
, range_ (range)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
const T&
|
||||||
|
operator [] (unsigned int i) const
|
||||||
|
{
|
||||||
|
return stack_[range_ - i];
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
const S& stack_;
|
||||||
|
unsigned int range_;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
} // yy
|
||||||
|
#line 157 "stack.hh" // stack.hh:133
|
||||||
|
|
||||||
|
#endif // !YY_YY_STACK_HH_INCLUDED
|
||||||
1517
src/query/wasaparse.cpp
Normal file
1517
src/query/wasaparse.cpp
Normal file
File diff suppressed because it is too large
Load Diff
476
src/query/wasaparse.hpp
Normal file
476
src/query/wasaparse.hpp
Normal file
@ -0,0 +1,476 @@
|
|||||||
|
// A Bison parser, made by GNU Bison 3.0.2.
|
||||||
|
|
||||||
|
// Skeleton interface for Bison LALR(1) parsers in C++
|
||||||
|
|
||||||
|
// Copyright (C) 2002-2013 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU General Public License as published by
|
||||||
|
// the Free Software Foundation, either version 3 of the License, or
|
||||||
|
// (at your option) any later version.
|
||||||
|
|
||||||
|
// This program is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
|
||||||
|
// You should have received a copy of the GNU General Public License
|
||||||
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
// As a special exception, you may create a larger work that contains
|
||||||
|
// part or all of the Bison parser skeleton and distribute that work
|
||||||
|
// under terms of your choice, so long as that work isn't itself a
|
||||||
|
// parser generator using the skeleton or a modified version thereof
|
||||||
|
// as a parser skeleton. Alternatively, if you modify or redistribute
|
||||||
|
// the parser skeleton itself, you may (at your option) remove this
|
||||||
|
// special exception, which will cause the skeleton and the resulting
|
||||||
|
// Bison output files to be licensed under the GNU General Public
|
||||||
|
// License without this special exception.
|
||||||
|
|
||||||
|
// This special exception was added by the Free Software Foundation in
|
||||||
|
// version 2.2 of Bison.
|
||||||
|
|
||||||
|
/**
|
||||||
|
** \file y.tab.h
|
||||||
|
** Define the yy::parser class.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// C++ LALR(1) parser skeleton written by Akim Demaille.
|
||||||
|
|
||||||
|
#ifndef YY_YY_Y_TAB_H_INCLUDED
|
||||||
|
# define YY_YY_Y_TAB_H_INCLUDED
|
||||||
|
|
||||||
|
|
||||||
|
# include <vector>
|
||||||
|
# include <iostream>
|
||||||
|
# include <stdexcept>
|
||||||
|
# include <string>
|
||||||
|
# include "stack.hh"
|
||||||
|
# include "location.hh"
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef YY_ATTRIBUTE
|
||||||
|
# if (defined __GNUC__ \
|
||||||
|
&& (2 < __GNUC__ || (__GNUC__ == 2 && 96 <= __GNUC_MINOR__))) \
|
||||||
|
|| defined __SUNPRO_C && 0x5110 <= __SUNPRO_C
|
||||||
|
# define YY_ATTRIBUTE(Spec) __attribute__(Spec)
|
||||||
|
# else
|
||||||
|
# define YY_ATTRIBUTE(Spec) /* empty */
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef YY_ATTRIBUTE_PURE
|
||||||
|
# define YY_ATTRIBUTE_PURE YY_ATTRIBUTE ((__pure__))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef YY_ATTRIBUTE_UNUSED
|
||||||
|
# define YY_ATTRIBUTE_UNUSED YY_ATTRIBUTE ((__unused__))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !defined _Noreturn \
|
||||||
|
&& (!defined __STDC_VERSION__ || __STDC_VERSION__ < 201112)
|
||||||
|
# if defined _MSC_VER && 1200 <= _MSC_VER
|
||||||
|
# define _Noreturn __declspec (noreturn)
|
||||||
|
# else
|
||||||
|
# define _Noreturn YY_ATTRIBUTE ((__noreturn__))
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Suppress unused-variable warnings by "using" E. */
|
||||||
|
#if ! defined lint || defined __GNUC__
|
||||||
|
# define YYUSE(E) ((void) (E))
|
||||||
|
#else
|
||||||
|
# define YYUSE(E) /* empty */
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined __GNUC__ && 407 <= __GNUC__ * 100 + __GNUC_MINOR__
|
||||||
|
/* Suppress an incorrect diagnostic about yylval being uninitialized. */
|
||||||
|
# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN \
|
||||||
|
_Pragma ("GCC diagnostic push") \
|
||||||
|
_Pragma ("GCC diagnostic ignored \"-Wuninitialized\"")\
|
||||||
|
_Pragma ("GCC diagnostic ignored \"-Wmaybe-uninitialized\"")
|
||||||
|
# define YY_IGNORE_MAYBE_UNINITIALIZED_END \
|
||||||
|
_Pragma ("GCC diagnostic pop")
|
||||||
|
#else
|
||||||
|
# define YY_INITIAL_VALUE(Value) Value
|
||||||
|
#endif
|
||||||
|
#ifndef YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
|
||||||
|
# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
|
||||||
|
# define YY_IGNORE_MAYBE_UNINITIALIZED_END
|
||||||
|
#endif
|
||||||
|
#ifndef YY_INITIAL_VALUE
|
||||||
|
# define YY_INITIAL_VALUE(Value) /* Nothing. */
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Debug traces. */
|
||||||
|
#ifndef YYDEBUG
|
||||||
|
# define YYDEBUG 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
namespace yy {
|
||||||
|
#line 113 "y.tab.h" // lalr1.cc:372
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/// A Bison parser.
|
||||||
|
class parser
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
#ifndef YYSTYPE
|
||||||
|
/// Symbol semantic values.
|
||||||
|
union semantic_type
|
||||||
|
{
|
||||||
|
#line 44 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:372
|
||||||
|
|
||||||
|
std::string *str;
|
||||||
|
Rcl::SearchDataClauseSimple *cl;
|
||||||
|
Rcl::SearchData *sd;
|
||||||
|
|
||||||
|
#line 133 "y.tab.h" // lalr1.cc:372
|
||||||
|
};
|
||||||
|
#else
|
||||||
|
typedef YYSTYPE semantic_type;
|
||||||
|
#endif
|
||||||
|
/// Symbol locations.
|
||||||
|
typedef location location_type;
|
||||||
|
|
||||||
|
/// Syntax errors thrown from user actions.
|
||||||
|
struct syntax_error : std::runtime_error
|
||||||
|
{
|
||||||
|
syntax_error (const location_type& l, const std::string& m);
|
||||||
|
location_type location;
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Tokens.
|
||||||
|
struct token
|
||||||
|
{
|
||||||
|
enum yytokentype
|
||||||
|
{
|
||||||
|
WORD = 258,
|
||||||
|
QUOTED = 259,
|
||||||
|
QUALIFIERS = 260,
|
||||||
|
AND = 261,
|
||||||
|
UCONCAT = 262,
|
||||||
|
OR = 263,
|
||||||
|
EQUALS = 264,
|
||||||
|
CONTAINS = 265,
|
||||||
|
SMALLEREQ = 266,
|
||||||
|
SMALLER = 267,
|
||||||
|
GREATEREQ = 268,
|
||||||
|
GREATER = 269
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
/// (External) token type, as returned by yylex.
|
||||||
|
typedef token::yytokentype token_type;
|
||||||
|
|
||||||
|
/// Internal symbol number.
|
||||||
|
typedef int symbol_number_type;
|
||||||
|
|
||||||
|
/// Internal symbol number for tokens (subsumed by symbol_number_type).
|
||||||
|
typedef unsigned char token_number_type;
|
||||||
|
|
||||||
|
/// A complete symbol.
|
||||||
|
///
|
||||||
|
/// Expects its Base type to provide access to the symbol type
|
||||||
|
/// via type_get().
|
||||||
|
///
|
||||||
|
/// Provide access to semantic value and location.
|
||||||
|
template <typename Base>
|
||||||
|
struct basic_symbol : Base
|
||||||
|
{
|
||||||
|
/// Alias to Base.
|
||||||
|
typedef Base super_type;
|
||||||
|
|
||||||
|
/// Default constructor.
|
||||||
|
basic_symbol ();
|
||||||
|
|
||||||
|
/// Copy constructor.
|
||||||
|
basic_symbol (const basic_symbol& other);
|
||||||
|
|
||||||
|
/// Constructor for valueless symbols.
|
||||||
|
basic_symbol (typename Base::kind_type t,
|
||||||
|
const location_type& l);
|
||||||
|
|
||||||
|
/// Constructor for symbols with semantic value.
|
||||||
|
basic_symbol (typename Base::kind_type t,
|
||||||
|
const semantic_type& v,
|
||||||
|
const location_type& l);
|
||||||
|
|
||||||
|
~basic_symbol ();
|
||||||
|
|
||||||
|
/// Destructive move, \a s is emptied into this.
|
||||||
|
void move (basic_symbol& s);
|
||||||
|
|
||||||
|
/// The semantic value.
|
||||||
|
semantic_type value;
|
||||||
|
|
||||||
|
/// The location.
|
||||||
|
location_type location;
|
||||||
|
|
||||||
|
private:
|
||||||
|
/// Assignment operator.
|
||||||
|
basic_symbol& operator= (const basic_symbol& other);
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Type access provider for token (enum) based symbols.
|
||||||
|
struct by_type
|
||||||
|
{
|
||||||
|
/// Default constructor.
|
||||||
|
by_type ();
|
||||||
|
|
||||||
|
/// Copy constructor.
|
||||||
|
by_type (const by_type& other);
|
||||||
|
|
||||||
|
/// The symbol type as needed by the constructor.
|
||||||
|
typedef token_type kind_type;
|
||||||
|
|
||||||
|
/// Constructor from (external) token numbers.
|
||||||
|
by_type (kind_type t);
|
||||||
|
|
||||||
|
/// Steal the symbol type from \a that.
|
||||||
|
void move (by_type& that);
|
||||||
|
|
||||||
|
/// The (internal) type number (corresponding to \a type).
|
||||||
|
/// -1 when this symbol is empty.
|
||||||
|
symbol_number_type type_get () const;
|
||||||
|
|
||||||
|
/// The token.
|
||||||
|
token_type token () const;
|
||||||
|
|
||||||
|
enum { empty = 0 };
|
||||||
|
|
||||||
|
/// The symbol type.
|
||||||
|
/// -1 when this symbol is empty.
|
||||||
|
token_number_type type;
|
||||||
|
};
|
||||||
|
|
||||||
|
/// "External" symbols: returned by the scanner.
|
||||||
|
typedef basic_symbol<by_type> symbol_type;
|
||||||
|
|
||||||
|
|
||||||
|
/// Build a parser object.
|
||||||
|
parser (WasaParserDriver* d_yyarg);
|
||||||
|
virtual ~parser ();
|
||||||
|
|
||||||
|
/// Parse.
|
||||||
|
/// \returns 0 iff parsing succeeded.
|
||||||
|
virtual int parse ();
|
||||||
|
|
||||||
|
#if YYDEBUG
|
||||||
|
/// The current debugging stream.
|
||||||
|
std::ostream& debug_stream () const YY_ATTRIBUTE_PURE;
|
||||||
|
/// Set the current debugging stream.
|
||||||
|
void set_debug_stream (std::ostream &);
|
||||||
|
|
||||||
|
/// Type for debugging levels.
|
||||||
|
typedef int debug_level_type;
|
||||||
|
/// The current debugging level.
|
||||||
|
debug_level_type debug_level () const YY_ATTRIBUTE_PURE;
|
||||||
|
/// Set the current debugging level.
|
||||||
|
void set_debug_level (debug_level_type l);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/// Report a syntax error.
|
||||||
|
/// \param loc where the syntax error is found.
|
||||||
|
/// \param msg a description of the syntax error.
|
||||||
|
virtual void error (const location_type& loc, const std::string& msg);
|
||||||
|
|
||||||
|
/// Report a syntax error.
|
||||||
|
void error (const syntax_error& err);
|
||||||
|
|
||||||
|
private:
|
||||||
|
/// This class is not copyable.
|
||||||
|
parser (const parser&);
|
||||||
|
parser& operator= (const parser&);
|
||||||
|
|
||||||
|
/// State numbers.
|
||||||
|
typedef int state_type;
|
||||||
|
|
||||||
|
/// Generate an error message.
|
||||||
|
/// \param yystate the state where the error occurred.
|
||||||
|
/// \param yytoken the lookahead token type, or yyempty_.
|
||||||
|
virtual std::string yysyntax_error_ (state_type yystate,
|
||||||
|
symbol_number_type yytoken) const;
|
||||||
|
|
||||||
|
/// Compute post-reduction state.
|
||||||
|
/// \param yystate the current state
|
||||||
|
/// \param yysym the nonterminal to push on the stack
|
||||||
|
state_type yy_lr_goto_state_ (state_type yystate, int yysym);
|
||||||
|
|
||||||
|
/// Whether the given \c yypact_ value indicates a defaulted state.
|
||||||
|
/// \param yyvalue the value to check
|
||||||
|
static bool yy_pact_value_is_default_ (int yyvalue);
|
||||||
|
|
||||||
|
/// Whether the given \c yytable_ value indicates a syntax error.
|
||||||
|
/// \param yyvalue the value to check
|
||||||
|
static bool yy_table_value_is_error_ (int yyvalue);
|
||||||
|
|
||||||
|
static const signed char yypact_ninf_;
|
||||||
|
static const signed char yytable_ninf_;
|
||||||
|
|
||||||
|
/// Convert a scanner token number \a t to a symbol number.
|
||||||
|
static token_number_type yytranslate_ (int t);
|
||||||
|
|
||||||
|
// Tables.
|
||||||
|
// YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing
|
||||||
|
// STATE-NUM.
|
||||||
|
static const signed char yypact_[];
|
||||||
|
|
||||||
|
// YYDEFACT[STATE-NUM] -- Default reduction number in state STATE-NUM.
|
||||||
|
// Performed when YYTABLE does not specify something else to do. Zero
|
||||||
|
// means the default is an error.
|
||||||
|
static const unsigned char yydefact_[];
|
||||||
|
|
||||||
|
// YYPGOTO[NTERM-NUM].
|
||||||
|
static const signed char yypgoto_[];
|
||||||
|
|
||||||
|
// YYDEFGOTO[NTERM-NUM].
|
||||||
|
static const signed char yydefgoto_[];
|
||||||
|
|
||||||
|
// YYTABLE[YYPACT[STATE-NUM]] -- What to do in state STATE-NUM. If
|
||||||
|
// positive, shift that token. If negative, reduce the rule whose
|
||||||
|
// number is the opposite. If YYTABLE_NINF, syntax error.
|
||||||
|
static const signed char yytable_[];
|
||||||
|
|
||||||
|
static const signed char yycheck_[];
|
||||||
|
|
||||||
|
// YYSTOS[STATE-NUM] -- The (internal number of the) accessing
|
||||||
|
// symbol of state STATE-NUM.
|
||||||
|
static const unsigned char yystos_[];
|
||||||
|
|
||||||
|
// YYR1[YYN] -- Symbol number of symbol that rule YYN derives.
|
||||||
|
static const unsigned char yyr1_[];
|
||||||
|
|
||||||
|
// YYR2[YYN] -- Number of symbols on the right hand side of rule YYN.
|
||||||
|
static const unsigned char yyr2_[];
|
||||||
|
|
||||||
|
|
||||||
|
/// Convert the symbol name \a n to a form suitable for a diagnostic.
|
||||||
|
static std::string yytnamerr_ (const char *n);
|
||||||
|
|
||||||
|
|
||||||
|
/// For a symbol, its name in clear.
|
||||||
|
static const char* const yytname_[];
|
||||||
|
#if YYDEBUG
|
||||||
|
// YYRLINE[YYN] -- Source line where rule number YYN was defined.
|
||||||
|
static const unsigned char yyrline_[];
|
||||||
|
/// Report on the debug stream that the rule \a r is going to be reduced.
|
||||||
|
virtual void yy_reduce_print_ (int r);
|
||||||
|
/// Print the state stack on the debug stream.
|
||||||
|
virtual void yystack_print_ ();
|
||||||
|
|
||||||
|
// Debugging.
|
||||||
|
int yydebug_;
|
||||||
|
std::ostream* yycdebug_;
|
||||||
|
|
||||||
|
/// \brief Display a symbol type, value and location.
|
||||||
|
/// \param yyo The output stream.
|
||||||
|
/// \param yysym The symbol.
|
||||||
|
template <typename Base>
|
||||||
|
void yy_print_ (std::ostream& yyo, const basic_symbol<Base>& yysym) const;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/// \brief Reclaim the memory associated to a symbol.
|
||||||
|
/// \param yymsg Why this token is reclaimed.
|
||||||
|
/// If null, print nothing.
|
||||||
|
/// \param yysym The symbol.
|
||||||
|
template <typename Base>
|
||||||
|
void yy_destroy_ (const char* yymsg, basic_symbol<Base>& yysym) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
/// Type access provider for state based symbols.
|
||||||
|
struct by_state
|
||||||
|
{
|
||||||
|
/// Default constructor.
|
||||||
|
by_state ();
|
||||||
|
|
||||||
|
/// The symbol type as needed by the constructor.
|
||||||
|
typedef state_type kind_type;
|
||||||
|
|
||||||
|
/// Constructor.
|
||||||
|
by_state (kind_type s);
|
||||||
|
|
||||||
|
/// Copy constructor.
|
||||||
|
by_state (const by_state& other);
|
||||||
|
|
||||||
|
/// Steal the symbol type from \a that.
|
||||||
|
void move (by_state& that);
|
||||||
|
|
||||||
|
/// The (internal) type number (corresponding to \a state).
|
||||||
|
/// "empty" when empty.
|
||||||
|
symbol_number_type type_get () const;
|
||||||
|
|
||||||
|
enum { empty = 0 };
|
||||||
|
|
||||||
|
/// The state.
|
||||||
|
state_type state;
|
||||||
|
};
|
||||||
|
|
||||||
|
/// "Internal" symbol: element of the stack.
|
||||||
|
struct stack_symbol_type : basic_symbol<by_state>
|
||||||
|
{
|
||||||
|
/// Superclass.
|
||||||
|
typedef basic_symbol<by_state> super_type;
|
||||||
|
/// Construct an empty symbol.
|
||||||
|
stack_symbol_type ();
|
||||||
|
/// Steal the contents from \a sym to build this.
|
||||||
|
stack_symbol_type (state_type s, symbol_type& sym);
|
||||||
|
/// Assignment, needed by push_back.
|
||||||
|
stack_symbol_type& operator= (const stack_symbol_type& that);
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Stack type.
|
||||||
|
typedef stack<stack_symbol_type> stack_type;
|
||||||
|
|
||||||
|
/// The stack.
|
||||||
|
stack_type yystack_;
|
||||||
|
|
||||||
|
/// Push a new state on the stack.
|
||||||
|
/// \param m a debug message to display
|
||||||
|
/// if null, no trace is output.
|
||||||
|
/// \param s the symbol
|
||||||
|
/// \warning the contents of \a s.value is stolen.
|
||||||
|
void yypush_ (const char* m, stack_symbol_type& s);
|
||||||
|
|
||||||
|
/// Push a new look ahead token on the state on the stack.
|
||||||
|
/// \param m a debug message to display
|
||||||
|
/// if null, no trace is output.
|
||||||
|
/// \param s the state
|
||||||
|
/// \param sym the symbol (for its value and location).
|
||||||
|
/// \warning the contents of \a s.value is stolen.
|
||||||
|
void yypush_ (const char* m, state_type s, symbol_type& sym);
|
||||||
|
|
||||||
|
/// Pop \a n symbols the three stacks.
|
||||||
|
void yypop_ (unsigned int n = 1);
|
||||||
|
|
||||||
|
// Constants.
|
||||||
|
enum
|
||||||
|
{
|
||||||
|
yyeof_ = 0,
|
||||||
|
yylast_ = 59, ///< Last index in yytable_.
|
||||||
|
yynnts_ = 7, ///< Number of nonterminal symbols.
|
||||||
|
yyempty_ = -2,
|
||||||
|
yyfinal_ = 14, ///< Termination state number.
|
||||||
|
yyterror_ = 1,
|
||||||
|
yyerrcode_ = 256,
|
||||||
|
yyntokens_ = 18 ///< Number of tokens.
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// User arguments.
|
||||||
|
WasaParserDriver* d;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
} // yy
|
||||||
|
#line 472 "y.tab.h" // lalr1.cc:372
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#endif // !YY_YY_Y_TAB_H_INCLUDED
|
||||||
@ -161,10 +161,10 @@ bool WasaParserDriver::addClause(SearchData *sd,
|
|||||||
size_t size = strtoll(cl->gettext().c_str(), &cp, 10);
|
size_t size = strtoll(cl->gettext().c_str(), &cp, 10);
|
||||||
if (*cp != 0) {
|
if (*cp != 0) {
|
||||||
switch (*cp) {
|
switch (*cp) {
|
||||||
case 'k': case 'K': size *= 1E3;break;
|
case 'k': case 'K': size *= 1000;break;
|
||||||
case 'm': case 'M': size *= 1E6;break;
|
case 'm': case 'M': size *= 1000*1000;break;
|
||||||
case 'g': case 'G': size *= 1E9;break;
|
case 'g': case 'G': size *= 1000*1000*1000;break;
|
||||||
case 't': case 'T': size *= 1E12;break;
|
case 't': case 'T': size *= size_t(1000)*1000*1000*1000;break;
|
||||||
default:
|
default:
|
||||||
m_reason = string("Bad multiplier suffix: ") + *cp;
|
m_reason = string("Bad multiplier suffix: ") + *cp;
|
||||||
delete cl;
|
delete cl;
|
||||||
|
|||||||
@ -29,7 +29,7 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
#include "xapian.h"
|
#include <xapian.h>
|
||||||
|
|
||||||
#include "debuglog.h"
|
#include "debuglog.h"
|
||||||
#include "rclconfig.h"
|
#include "rclconfig.h"
|
||||||
|
|||||||
@ -97,6 +97,8 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
|
|||||||
|
|
||||||
// Detect and skip CJK terms.
|
// Detect and skip CJK terms.
|
||||||
Utf8Iter utfit(*it);
|
Utf8Iter utfit(*it);
|
||||||
|
if (utfit.eof()) // Empty term?? Seems to happen.
|
||||||
|
continue;
|
||||||
if (TextSplit::isCJK(*utfit)) {
|
if (TextSplit::isCJK(*utfit)) {
|
||||||
// LOGDEB(("stemskipped: Skipping CJK\n"));
|
// LOGDEB(("stemskipped: Skipping CJK\n"));
|
||||||
continue;
|
continue;
|
||||||
|
|||||||
@ -82,7 +82,7 @@ bool Query::Native::getMatchTerms(unsigned long xdocid, vector<string>& terms)
|
|||||||
{
|
{
|
||||||
if (!xenquire) {
|
if (!xenquire) {
|
||||||
LOGERR(("Query::getMatchTerms: no query opened\n"));
|
LOGERR(("Query::getMatchTerms: no query opened\n"));
|
||||||
return -1;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
terms.clear();
|
terms.clear();
|
||||||
@ -386,7 +386,7 @@ int Query::Native::makeAbstract(Xapian::docid docid,
|
|||||||
for (multimap<double, vector<string> >::reverse_iterator mit = byQ.rbegin();
|
for (multimap<double, vector<string> >::reverse_iterator mit = byQ.rbegin();
|
||||||
mit != byQ.rend(); mit++) {
|
mit != byQ.rend(); mit++) {
|
||||||
unsigned int maxgrpoccs;
|
unsigned int maxgrpoccs;
|
||||||
float q;
|
double q;
|
||||||
if (byQ.size() == 1) {
|
if (byQ.size() == 1) {
|
||||||
maxgrpoccs = maxtotaloccs;
|
maxgrpoccs = maxtotaloccs;
|
||||||
q = 1.0;
|
q = 1.0;
|
||||||
|
|||||||
@ -18,7 +18,7 @@
|
|||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <unistd.h>
|
#include "safeunistd.h"
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
|
||||||
@ -433,7 +433,7 @@ bool Db::Native::dbDataToRclDoc(Xapian::docid docid, std::string &data,
|
|||||||
string dbdir = m_rcldb->m_basedir;
|
string dbdir = m_rcldb->m_basedir;
|
||||||
doc.idxi = 0;
|
doc.idxi = 0;
|
||||||
if (!m_rcldb->m_extraDbs.empty()) {
|
if (!m_rcldb->m_extraDbs.empty()) {
|
||||||
unsigned int idxi = whatDbIdx(docid);
|
int idxi = int(whatDbIdx(docid));
|
||||||
|
|
||||||
// idxi is in [0, extraDbs.size()]. 0 is for the main index,
|
// idxi is in [0, extraDbs.size()]. 0 is for the main index,
|
||||||
// idxi-1 indexes into the additional dbs array.
|
// idxi-1 indexes into the additional dbs array.
|
||||||
@ -549,14 +549,13 @@ bool Db::Native::getPagePositions(Xapian::docid docid, vector<int>& vpos)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
int Db::Native::getPageNumberForPosition(const vector<int>& pbreaks,
|
int Db::Native::getPageNumberForPosition(const vector<int>& pbreaks, int pos)
|
||||||
unsigned int pos)
|
|
||||||
{
|
{
|
||||||
if (pos < baseTextPosition) // Not in text body
|
if (pos < int(baseTextPosition)) // Not in text body
|
||||||
return -1;
|
return -1;
|
||||||
vector<int>::const_iterator it =
|
vector<int>::const_iterator it =
|
||||||
upper_bound(pbreaks.begin(), pbreaks.end(), pos);
|
upper_bound(pbreaks.begin(), pbreaks.end(), pos);
|
||||||
return it - pbreaks.begin() + 1;
|
return int(it - pbreaks.begin() + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Note: we're passed a Xapian::Document* because Xapian
|
// Note: we're passed a Xapian::Document* because Xapian
|
||||||
@ -1420,10 +1419,11 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
|
|||||||
time_t mtime = atoll(doc.dmtime.empty() ? doc.fmtime.c_str() :
|
time_t mtime = atoll(doc.dmtime.empty() ? doc.fmtime.c_str() :
|
||||||
doc.dmtime.c_str());
|
doc.dmtime.c_str());
|
||||||
struct tm tmb;
|
struct tm tmb;
|
||||||
localtime_r(&mtime, &tmb);
|
struct tm *tmbp = &tmb;
|
||||||
|
tmbp = localtime_r(&mtime, &tmb);
|
||||||
char buf[9];
|
char buf[9];
|
||||||
snprintf(buf, 9, "%04d%02d%02d",
|
snprintf(buf, 9, "%04d%02d%02d",
|
||||||
tmb.tm_year+1900, tmb.tm_mon + 1, tmb.tm_mday);
|
tmbp->tm_year+1900, tmbp->tm_mon + 1, tmbp->tm_mday);
|
||||||
// Date (YYYYMMDD)
|
// Date (YYYYMMDD)
|
||||||
newdocument.add_boolean_term(wrap_prefix(xapday_prefix) + string(buf));
|
newdocument.add_boolean_term(wrap_prefix(xapday_prefix) + string(buf));
|
||||||
// Month (YYYYMM)
|
// Month (YYYYMM)
|
||||||
|
|||||||
@ -120,7 +120,7 @@ class Db::Native {
|
|||||||
const string& uniterm);
|
const string& uniterm);
|
||||||
|
|
||||||
bool getPagePositions(Xapian::docid docid, vector<int>& vpos);
|
bool getPagePositions(Xapian::docid docid, vector<int>& vpos);
|
||||||
int getPageNumberForPosition(const vector<int>& pbreaks, unsigned int pos);
|
int getPageNumberForPosition(const vector<int>& pbreaks, int pos);
|
||||||
|
|
||||||
bool dbDataToRclDoc(Xapian::docid docid, std::string &data, Doc &doc);
|
bool dbDataToRclDoc(Xapian::docid docid, std::string &data, Doc &doc);
|
||||||
|
|
||||||
|
|||||||
@ -21,13 +21,15 @@
|
|||||||
#include "autoconfig.h"
|
#include "autoconfig.h"
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
#include "debuglog.h"
|
#include "debuglog.h"
|
||||||
#include "rcldb.h"
|
#include "rcldb.h"
|
||||||
#include "rcldb_p.h"
|
#include "rcldb_p.h"
|
||||||
#include "stemdb.h"
|
#include "stemdb.h"
|
||||||
#include "expansiondbs.h"
|
#include "expansiondbs.h"
|
||||||
|
#include "strmatcher.h"
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
namespace Rcl {
|
namespace Rcl {
|
||||||
|
|
||||||
@ -41,10 +43,10 @@ bool Db::filenameWildExp(const string& fnexp, vector<string>& names, int max)
|
|||||||
// get here currently anyway), and has no wildcards, we add * at
|
// get here currently anyway), and has no wildcards, we add * at
|
||||||
// each end: match any substring
|
// each end: match any substring
|
||||||
if (pattern[0] == '"' && pattern[pattern.size()-1] == '"') {
|
if (pattern[0] == '"' && pattern[pattern.size()-1] == '"') {
|
||||||
pattern = pattern.substr(1, pattern.size() -2);
|
pattern = pattern.substr(1, pattern.size() -2);
|
||||||
} else if (pattern.find_first_of(cstr_minwilds) == string::npos &&
|
} else if (pattern.find_first_of(cstr_minwilds) == string::npos &&
|
||||||
!unaciscapital(pattern)) {
|
!unaciscapital(pattern)) {
|
||||||
pattern = "*" + pattern + "*";
|
pattern = "*" + pattern + "*";
|
||||||
} // else let it be
|
} // else let it be
|
||||||
|
|
||||||
LOGDEB(("Rcl::Db::filenameWildExp: pattern: [%s]\n", pattern.c_str()));
|
LOGDEB(("Rcl::Db::filenameWildExp: pattern: [%s]\n", pattern.c_str()));
|
||||||
@ -55,21 +57,21 @@ bool Db::filenameWildExp(const string& fnexp, vector<string>& names, int max)
|
|||||||
// stripping conditionally on indexstripchars.
|
// stripping conditionally on indexstripchars.
|
||||||
string pat1;
|
string pat1;
|
||||||
if (unacmaybefold(pattern, pat1, "UTF-8", UNACOP_UNACFOLD)) {
|
if (unacmaybefold(pattern, pat1, "UTF-8", UNACOP_UNACFOLD)) {
|
||||||
pattern.swap(pat1);
|
pattern.swap(pat1);
|
||||||
}
|
}
|
||||||
|
|
||||||
TermMatchResult result;
|
TermMatchResult result;
|
||||||
if (!idxTermMatch(ET_WILD, string(), pattern, result, max,
|
if (!idxTermMatch(ET_WILD, string(), pattern, result, max,
|
||||||
unsplitFilenameFieldName))
|
unsplitFilenameFieldName))
|
||||||
return false;
|
return false;
|
||||||
for (vector<TermMatchEntry>::const_iterator it = result.entries.begin();
|
for (vector<TermMatchEntry>::const_iterator it = result.entries.begin();
|
||||||
it != result.entries.end(); it++)
|
it != result.entries.end(); it++)
|
||||||
names.push_back(it->term);
|
names.push_back(it->term);
|
||||||
|
|
||||||
if (names.empty()) {
|
if (names.empty()) {
|
||||||
// Build an impossible query: we know its impossible because we
|
// Build an impossible query: we know its impossible because we
|
||||||
// control the prefixes!
|
// control the prefixes!
|
||||||
names.push_back(wrap_prefix("XNONE") + "NoMatchingTerms");
|
names.push_back(wrap_prefix("XNONE") + "NoMatchingTerms");
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -82,11 +84,11 @@ bool Db::maxYearSpan(int *minyear, int *maxyear)
|
|||||||
*maxyear = -1000000;
|
*maxyear = -1000000;
|
||||||
TermMatchResult result;
|
TermMatchResult result;
|
||||||
if (!idxTermMatch(ET_WILD, string(), "*", result, -1, "xapyear")) {
|
if (!idxTermMatch(ET_WILD, string(), "*", result, -1, "xapyear")) {
|
||||||
LOGINFO(("Rcl::Db:maxYearSpan: termMatch failed\n"));
|
LOGINFO(("Rcl::Db:maxYearSpan: termMatch failed\n"));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
for (vector<TermMatchEntry>::const_iterator it = result.entries.begin();
|
for (vector<TermMatchEntry>::const_iterator it = result.entries.begin();
|
||||||
it != result.entries.end(); it++) {
|
it != result.entries.end(); it++) {
|
||||||
if (!it->term.empty()) {
|
if (!it->term.empty()) {
|
||||||
int year = atoi(strip_prefix(it->term).c_str());
|
int year = atoi(strip_prefix(it->term).c_str());
|
||||||
if (year < *minyear)
|
if (year < *minyear)
|
||||||
@ -102,11 +104,11 @@ bool Db::getAllDbMimeTypes(std::vector<std::string>& exp)
|
|||||||
{
|
{
|
||||||
Rcl::TermMatchResult res;
|
Rcl::TermMatchResult res;
|
||||||
if (!idxTermMatch(Rcl::Db::ET_WILD, "", "*", res, -1, "mtype")) {
|
if (!idxTermMatch(Rcl::Db::ET_WILD, "", "*", res, -1, "mtype")) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
for (vector<Rcl::TermMatchEntry>::const_iterator rit = res.entries.begin();
|
for (vector<Rcl::TermMatchEntry>::const_iterator rit = res.entries.begin();
|
||||||
rit != res.entries.end(); rit++) {
|
rit != res.entries.end(); rit++) {
|
||||||
exp.push_back(Rcl::strip_prefix(rit->term));
|
exp.push_back(Rcl::strip_prefix(rit->term));
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -114,19 +116,19 @@ bool Db::getAllDbMimeTypes(std::vector<std::string>& exp)
|
|||||||
class TermMatchCmpByWcf {
|
class TermMatchCmpByWcf {
|
||||||
public:
|
public:
|
||||||
int operator()(const TermMatchEntry& l, const TermMatchEntry& r) {
|
int operator()(const TermMatchEntry& l, const TermMatchEntry& r) {
|
||||||
return r.wcf - l.wcf < 0;
|
return r.wcf - l.wcf < 0;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
class TermMatchCmpByTerm {
|
class TermMatchCmpByTerm {
|
||||||
public:
|
public:
|
||||||
int operator()(const TermMatchEntry& l, const TermMatchEntry& r) {
|
int operator()(const TermMatchEntry& l, const TermMatchEntry& r) {
|
||||||
return l.term.compare(r.term) > 0;
|
return l.term.compare(r.term) > 0;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
class TermMatchTermEqual {
|
class TermMatchTermEqual {
|
||||||
public:
|
public:
|
||||||
int operator()(const TermMatchEntry& l, const TermMatchEntry& r) {
|
int operator()(const TermMatchEntry& l, const TermMatchEntry& r) {
|
||||||
return !l.term.compare(r.term);
|
return !l.term.compare(r.term);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -136,10 +138,10 @@ public:
|
|||||||
static void addPrefix(vector<TermMatchEntry>& terms, const string& prefix)
|
static void addPrefix(vector<TermMatchEntry>& terms, const string& prefix)
|
||||||
{
|
{
|
||||||
if (prefix.empty())
|
if (prefix.empty())
|
||||||
return;
|
return;
|
||||||
for (vector<TermMatchEntry>::iterator it = terms.begin();
|
for (vector<TermMatchEntry>::iterator it = terms.begin();
|
||||||
it != terms.end(); it++)
|
it != terms.end(); it++)
|
||||||
it->term.insert(0, prefix);
|
it->term.insert(0, prefix);
|
||||||
}
|
}
|
||||||
|
|
||||||
static const char *tmtptostr(int typ)
|
static const char *tmtptostr(int typ)
|
||||||
@ -164,22 +166,22 @@ static const char *tmtptostr(int typ)
|
|||||||
// using the main index terms (filtering, retrieving stats, expansion
|
// using the main index terms (filtering, retrieving stats, expansion
|
||||||
// in some cases).
|
// in some cases).
|
||||||
bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
|
bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
|
||||||
TermMatchResult& res, int max, const string& field,
|
TermMatchResult& res, int max, const string& field,
|
||||||
vector<string>* multiwords)
|
vector<string>* multiwords)
|
||||||
{
|
{
|
||||||
int matchtyp = matchTypeTp(typ_sens);
|
int matchtyp = matchTypeTp(typ_sens);
|
||||||
if (!m_ndb || !m_ndb->m_isopen)
|
if (!m_ndb || !m_ndb->m_isopen)
|
||||||
return false;
|
return false;
|
||||||
Xapian::Database xrdb = m_ndb->xrdb;
|
Xapian::Database xrdb = m_ndb->xrdb;
|
||||||
|
|
||||||
bool diac_sensitive = (typ_sens & ET_DIACSENS) != 0;
|
bool diac_sensitive = (typ_sens & ET_DIACSENS) != 0;
|
||||||
bool case_sensitive = (typ_sens & ET_CASESENS) != 0;
|
bool case_sensitive = (typ_sens & ET_CASESENS) != 0;
|
||||||
|
|
||||||
LOGDEB0(("Db::TermMatch: typ %s diacsens %d casesens %d lang [%s] term [%s]"
|
LOGDEB0(("Db::TermMatch: typ %s diacsens %d casesens %d lang [%s] term [%s]"
|
||||||
" max %d field [%s] stripped %d init res.size %u\n",
|
" max %d field [%s] stripped %d init res.size %u\n",
|
||||||
tmtptostr(matchtyp), diac_sensitive, case_sensitive, lang.c_str(),
|
tmtptostr(matchtyp), diac_sensitive, case_sensitive, lang.c_str(),
|
||||||
_term.c_str(), max, field.c_str(), o_index_stripchars,
|
_term.c_str(), max, field.c_str(), o_index_stripchars,
|
||||||
res.entries.size()));
|
res.entries.size()));
|
||||||
|
|
||||||
// If index is stripped, no case or diac expansion can be needed:
|
// If index is stripped, no case or diac expansion can be needed:
|
||||||
// for the processing inside this routine, everything looks like
|
// for the processing inside this routine, everything looks like
|
||||||
@ -187,11 +189,11 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
|
|||||||
// Also, convert input to lowercase and strip its accents.
|
// Also, convert input to lowercase and strip its accents.
|
||||||
string term = _term;
|
string term = _term;
|
||||||
if (o_index_stripchars) {
|
if (o_index_stripchars) {
|
||||||
diac_sensitive = case_sensitive = true;
|
diac_sensitive = case_sensitive = true;
|
||||||
if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) {
|
if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) {
|
||||||
LOGERR(("Db::termMatch: unac failed for [%s]\n", _term.c_str()));
|
LOGERR(("Db::termMatch: unac failed for [%s]\n", _term.c_str()));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// The case/diac expansion db
|
// The case/diac expansion db
|
||||||
@ -199,125 +201,125 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
|
|||||||
XapComputableSynFamMember synac(xrdb, synFamDiCa, "all", &unacfoldtrans);
|
XapComputableSynFamMember synac(xrdb, synFamDiCa, "all", &unacfoldtrans);
|
||||||
|
|
||||||
if (matchtyp == ET_WILD || matchtyp == ET_REGEXP) {
|
if (matchtyp == ET_WILD || matchtyp == ET_REGEXP) {
|
||||||
STD_SHARED_PTR<StrMatcher> matcher;
|
STD_SHARED_PTR<StrMatcher> matcher;
|
||||||
if (matchtyp == ET_WILD) {
|
if (matchtyp == ET_WILD) {
|
||||||
matcher = STD_SHARED_PTR<StrMatcher>(new StrWildMatcher(term));
|
matcher = STD_SHARED_PTR<StrMatcher>(new StrWildMatcher(term));
|
||||||
} else {
|
} else {
|
||||||
matcher = STD_SHARED_PTR<StrMatcher>(new StrRegexpMatcher(term));
|
matcher = STD_SHARED_PTR<StrMatcher>(new StrRegexpMatcher(term));
|
||||||
}
|
}
|
||||||
if (!diac_sensitive || !case_sensitive) {
|
if (!diac_sensitive || !case_sensitive) {
|
||||||
// Perform case/diac expansion on the exp as appropriate and
|
// Perform case/diac expansion on the exp as appropriate and
|
||||||
// expand the result.
|
// expand the result.
|
||||||
vector<string> exp;
|
vector<string> exp;
|
||||||
if (diac_sensitive) {
|
if (diac_sensitive) {
|
||||||
// Expand for diacritics and case, filtering for same diacritics
|
// Expand for diacritics and case, filtering for same diacritics
|
||||||
SynTermTransUnac foldtrans(UNACOP_FOLD);
|
SynTermTransUnac foldtrans(UNACOP_FOLD);
|
||||||
synac.synKeyExpand(matcher.get(), exp, &foldtrans);
|
synac.synKeyExpand(matcher.get(), exp, &foldtrans);
|
||||||
} else if (case_sensitive) {
|
} else if (case_sensitive) {
|
||||||
// Expand for diacritics and case, filtering for same case
|
// Expand for diacritics and case, filtering for same case
|
||||||
SynTermTransUnac unactrans(UNACOP_UNAC);
|
SynTermTransUnac unactrans(UNACOP_UNAC);
|
||||||
synac.synKeyExpand(matcher.get(), exp, &unactrans);
|
synac.synKeyExpand(matcher.get(), exp, &unactrans);
|
||||||
} else {
|
} else {
|
||||||
// Expand for diacritics and case, no filtering
|
// Expand for diacritics and case, no filtering
|
||||||
synac.synKeyExpand(matcher.get(), exp);
|
synac.synKeyExpand(matcher.get(), exp);
|
||||||
}
|
}
|
||||||
// Retrieve additional info and filter against the index itself
|
// Retrieve additional info and filter against the index itself
|
||||||
for (vector<string>::const_iterator it = exp.begin();
|
for (vector<string>::const_iterator it = exp.begin();
|
||||||
it != exp.end(); it++) {
|
it != exp.end(); it++) {
|
||||||
idxTermMatch(ET_NONE, "", *it, res, max, field);
|
idxTermMatch(ET_NONE, "", *it, res, max, field);
|
||||||
}
|
}
|
||||||
// And also expand the original expression against the
|
// And also expand the original expression against the
|
||||||
// main index: for the common case where the expression
|
// main index: for the common case where the expression
|
||||||
// had no case/diac expansion (no entry in the exp db if
|
// had no case/diac expansion (no entry in the exp db if
|
||||||
// the original term is lowercase and without accents).
|
// the original term is lowercase and without accents).
|
||||||
idxTermMatch(typ_sens, lang, term, res, max, field);
|
idxTermMatch(typ_sens, lang, term, res, max, field);
|
||||||
} else {
|
} else {
|
||||||
idxTermMatch(typ_sens, lang, term, res, max, field);
|
idxTermMatch(typ_sens, lang, term, res, max, field);
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
// Expansion is STEM or NONE (which may still need synonyms
|
// Expansion is STEM or NONE (which may still need synonyms
|
||||||
// and case/diac exp)
|
// and case/diac exp)
|
||||||
|
|
||||||
vector<string> lexp;
|
vector<string> lexp;
|
||||||
if (diac_sensitive && case_sensitive) {
|
if (diac_sensitive && case_sensitive) {
|
||||||
// No case/diac expansion
|
// No case/diac expansion
|
||||||
lexp.push_back(term);
|
lexp.push_back(term);
|
||||||
} else if (diac_sensitive) {
|
} else if (diac_sensitive) {
|
||||||
// Expand for accents and case, filtering for same accents,
|
// Expand for accents and case, filtering for same accents,
|
||||||
SynTermTransUnac foldtrans(UNACOP_FOLD);
|
SynTermTransUnac foldtrans(UNACOP_FOLD);
|
||||||
synac.synExpand(term, lexp, &foldtrans);
|
synac.synExpand(term, lexp, &foldtrans);
|
||||||
} else if (case_sensitive) {
|
} else if (case_sensitive) {
|
||||||
// Expand for accents and case, filtering for same case
|
// Expand for accents and case, filtering for same case
|
||||||
SynTermTransUnac unactrans(UNACOP_UNAC);
|
SynTermTransUnac unactrans(UNACOP_UNAC);
|
||||||
synac.synExpand(term, lexp, &unactrans);
|
synac.synExpand(term, lexp, &unactrans);
|
||||||
} else {
|
} else {
|
||||||
// We are neither accent- nor case- sensitive and may need stem
|
// We are neither accent- nor case- sensitive and may need stem
|
||||||
// expansion or not. Expand for accents and case
|
// expansion or not. Expand for accents and case
|
||||||
synac.synExpand(term, lexp);
|
synac.synExpand(term, lexp);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (matchtyp == ET_STEM || (typ_sens & ET_SYNEXP)) {
|
if (matchtyp == ET_STEM || (typ_sens & ET_SYNEXP)) {
|
||||||
// Note: if any of the above conds is true, we are insensitive to
|
// Note: if any of the above conds is true, we are insensitive to
|
||||||
// diacs and case (enforced in searchdatatox:termexpand
|
// diacs and case (enforced in searchdatatox:termexpand
|
||||||
// Need stem expansion. Lowercase the result of accent and case
|
// Need stem expansion. Lowercase the result of accent and case
|
||||||
// expansion for input to stemdb.
|
// expansion for input to stemdb.
|
||||||
for (unsigned int i = 0; i < lexp.size(); i++) {
|
for (unsigned int i = 0; i < lexp.size(); i++) {
|
||||||
string lower;
|
string lower;
|
||||||
unacmaybefold(lexp[i], lower, "UTF-8", UNACOP_FOLD);
|
unacmaybefold(lexp[i], lower, "UTF-8", UNACOP_FOLD);
|
||||||
lexp[i] = lower;
|
lexp[i] = lower;
|
||||||
}
|
}
|
||||||
sort(lexp.begin(), lexp.end());
|
sort(lexp.begin(), lexp.end());
|
||||||
lexp.erase(unique(lexp.begin(), lexp.end()), lexp.end());
|
lexp.erase(unique(lexp.begin(), lexp.end()), lexp.end());
|
||||||
|
|
||||||
if (matchtyp == ET_STEM) {
|
if (matchtyp == ET_STEM) {
|
||||||
StemDb sdb(xrdb);
|
StemDb sdb(xrdb);
|
||||||
vector<string> exp1;
|
vector<string> exp1;
|
||||||
for (vector<string>::const_iterator it = lexp.begin();
|
for (vector<string>::const_iterator it = lexp.begin();
|
||||||
it != lexp.end(); it++) {
|
it != lexp.end(); it++) {
|
||||||
sdb.stemExpand(lang, *it, exp1);
|
sdb.stemExpand(lang, *it, exp1);
|
||||||
}
|
}
|
||||||
exp1.swap(lexp);
|
exp1.swap(lexp);
|
||||||
sort(lexp.begin(), lexp.end());
|
sort(lexp.begin(), lexp.end());
|
||||||
lexp.erase(unique(lexp.begin(), lexp.end()), lexp.end());
|
lexp.erase(unique(lexp.begin(), lexp.end()), lexp.end());
|
||||||
LOGDEB(("ExpTerm: stemexp: %s\n",
|
LOGDEB(("ExpTerm: stemexp: %s\n",
|
||||||
stringsToString(lexp).c_str()));
|
stringsToString(lexp).c_str()));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Expand the result for synonyms. Note that doing it here
|
// Expand the result for synonyms. Note that doing it here
|
||||||
// means that multi-term synonyms will not work
|
// means that multi-term synonyms will not work
|
||||||
// (e.g. stakhanovist -> "hard at work". We would have to
|
// (e.g. stakhanovist -> "hard at work". We would have to
|
||||||
// separate the multi-word expansions for our caller to
|
// separate the multi-word expansions for our caller to
|
||||||
// add them as phrases to the query. Not impossible, but
|
// add them as phrases to the query. Not impossible, but
|
||||||
// let's keep it at single words for now.
|
// let's keep it at single words for now.
|
||||||
if (m_syngroups.ok() && (typ_sens & ET_SYNEXP)) {
|
if (m_syngroups.ok() && (typ_sens & ET_SYNEXP)) {
|
||||||
LOGDEB(("ExpTerm: got syngroups\n"));
|
LOGDEB(("ExpTerm: got syngroups\n"));
|
||||||
vector<string> exp1(lexp);
|
vector<string> exp1(lexp);
|
||||||
for (vector<string>::const_iterator it = lexp.begin();
|
for (vector<string>::const_iterator it = lexp.begin();
|
||||||
it != lexp.end(); it++) {
|
it != lexp.end(); it++) {
|
||||||
vector<string> sg = m_syngroups.getgroup(*it);
|
vector<string> sg = m_syngroups.getgroup(*it);
|
||||||
if (!sg.empty()) {
|
if (!sg.empty()) {
|
||||||
LOGDEB(("ExpTerm: syns: %s -> %s\n",
|
LOGDEB(("ExpTerm: syns: %s -> %s\n",
|
||||||
it->c_str(), stringsToString(sg).c_str()));
|
it->c_str(), stringsToString(sg).c_str()));
|
||||||
for (vector<string>::const_iterator it1 = sg.begin();
|
for (vector<string>::const_iterator it1 = sg.begin();
|
||||||
it1 != sg.end(); it1++) {
|
it1 != sg.end(); it1++) {
|
||||||
if (it1->find_first_of(" ") != string::npos) {
|
if (it1->find_first_of(" ") != string::npos) {
|
||||||
if (multiwords) {
|
if (multiwords) {
|
||||||
multiwords->push_back(*it1);
|
multiwords->push_back(*it1);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
exp1.push_back(*it1);
|
exp1.push_back(*it1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
lexp.swap(exp1);
|
lexp.swap(exp1);
|
||||||
sort(lexp.begin(), lexp.end());
|
sort(lexp.begin(), lexp.end());
|
||||||
lexp.erase(unique(lexp.begin(), lexp.end()), lexp.end());
|
lexp.erase(unique(lexp.begin(), lexp.end()), lexp.end());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Expand the resulting list for case (all stemdb content
|
// Expand the resulting list for case (all stemdb content
|
||||||
// is lowercase)
|
// is lowercase)
|
||||||
vector<string> exp1;
|
vector<string> exp1;
|
||||||
for (vector<string>::const_iterator it = lexp.begin();
|
for (vector<string>::const_iterator it = lexp.begin();
|
||||||
it != lexp.end(); it++) {
|
it != lexp.end(); it++) {
|
||||||
@ -326,27 +328,27 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
|
|||||||
exp1.swap(lexp);
|
exp1.swap(lexp);
|
||||||
sort(lexp.begin(), lexp.end());
|
sort(lexp.begin(), lexp.end());
|
||||||
lexp.erase(unique(lexp.begin(), lexp.end()), lexp.end());
|
lexp.erase(unique(lexp.begin(), lexp.end()), lexp.end());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Filter the result and get the stats, possibly add prefixes.
|
// Filter the result and get the stats, possibly add prefixes.
|
||||||
LOGDEB(("ExpandTerm:TM: lexp: %s\n", stringsToString(lexp).c_str()));
|
LOGDEB(("ExpandTerm:TM: lexp: %s\n", stringsToString(lexp).c_str()));
|
||||||
for (vector<string>::const_iterator it = lexp.begin();
|
for (vector<string>::const_iterator it = lexp.begin();
|
||||||
it != lexp.end(); it++) {
|
it != lexp.end(); it++) {
|
||||||
idxTermMatch(Rcl::Db::ET_WILD, "", *it, res, max, field);
|
idxTermMatch(Rcl::Db::ET_WILD, "", *it, res, max, field);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TermMatchCmpByTerm tcmp;
|
TermMatchCmpByTerm tcmp;
|
||||||
sort(res.entries.begin(), res.entries.end(), tcmp);
|
sort(res.entries.begin(), res.entries.end(), tcmp);
|
||||||
TermMatchTermEqual teq;
|
TermMatchTermEqual teq;
|
||||||
vector<TermMatchEntry>::iterator uit =
|
vector<TermMatchEntry>::iterator uit =
|
||||||
unique(res.entries.begin(), res.entries.end(), teq);
|
unique(res.entries.begin(), res.entries.end(), teq);
|
||||||
res.entries.resize(uit - res.entries.begin());
|
res.entries.resize(uit - res.entries.begin());
|
||||||
TermMatchCmpByWcf wcmp;
|
TermMatchCmpByWcf wcmp;
|
||||||
sort(res.entries.begin(), res.entries.end(), wcmp);
|
sort(res.entries.begin(), res.entries.end(), wcmp);
|
||||||
if (max > 0) {
|
if (max > 0) {
|
||||||
// Would need a small max and big stem expansion...
|
// Would need a small max and big stem expansion...
|
||||||
res.entries.resize(MIN(res.entries.size(), (unsigned int)max));
|
res.entries.resize(MIN(res.entries.size(), (unsigned int)max));
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -354,114 +356,116 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
|
|||||||
// Second phase of wildcard/regexp term expansion after case/diac
|
// Second phase of wildcard/regexp term expansion after case/diac
|
||||||
// expansion: expand against main index terms
|
// expansion: expand against main index terms
|
||||||
bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root,
|
bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root,
|
||||||
TermMatchResult& res, int max, const string& field)
|
TermMatchResult& res, int max, const string& field)
|
||||||
{
|
{
|
||||||
int typ = matchTypeTp(typ_sens);
|
int typ = matchTypeTp(typ_sens);
|
||||||
LOGDEB1(("Db::idxTermMatch: typ %s lang [%s] term [%s] "
|
LOGDEB1(("Db::idxTermMatch: typ %s lang [%s] term [%s] "
|
||||||
"max %d field [%s] init res.size %u\n",
|
"max %d field [%s] init res.size %u\n",
|
||||||
tmtptostr(typ), lang.c_str(), root.c_str(),
|
tmtptostr(typ), lang.c_str(), root.c_str(),
|
||||||
max, field.c_str(), res.entries.size()));
|
max, field.c_str(), res.entries.size()));
|
||||||
|
|
||||||
if (typ == ET_STEM) {
|
if (typ == ET_STEM) {
|
||||||
LOGFATAL(("RCLDB: internal error: idxTermMatch called with ET_STEM\n"));
|
LOGFATAL(("RCLDB: internal error: idxTermMatch called with ET_STEM\n"));
|
||||||
abort();
|
abort();
|
||||||
}
|
}
|
||||||
|
|
||||||
Xapian::Database xdb = m_ndb->xrdb;
|
Xapian::Database xdb = m_ndb->xrdb;
|
||||||
|
|
||||||
string prefix;
|
string prefix;
|
||||||
if (!field.empty()) {
|
if (!field.empty()) {
|
||||||
const FieldTraits *ftp = 0;
|
const FieldTraits *ftp = 0;
|
||||||
if (!fieldToTraits(field, &ftp, true) || ftp->pfx.empty()) {
|
if (!fieldToTraits(field, &ftp, true) || ftp->pfx.empty()) {
|
||||||
LOGDEB(("Db::termMatch: field is not indexed (no prefix): [%s]\n",
|
LOGDEB(("Db::termMatch: field is not indexed (no prefix): [%s]\n",
|
||||||
field.c_str()));
|
field.c_str()));
|
||||||
} else {
|
} else {
|
||||||
prefix = wrap_prefix(ftp->pfx);
|
prefix = wrap_prefix(ftp->pfx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
res.prefix = prefix;
|
res.prefix = prefix;
|
||||||
|
|
||||||
STD_SHARED_PTR<StrMatcher> matcher;
|
STD_SHARED_PTR<StrMatcher> matcher;
|
||||||
if (typ == ET_REGEXP) {
|
if (typ == ET_REGEXP) {
|
||||||
matcher = STD_SHARED_PTR<StrMatcher>(new StrRegexpMatcher(root));
|
matcher = STD_SHARED_PTR<StrMatcher>(new StrRegexpMatcher(root));
|
||||||
if (!matcher->ok()) {
|
if (!matcher->ok()) {
|
||||||
LOGERR(("termMatch: regcomp failed: %s\n",
|
LOGERR(("termMatch: regcomp failed: %s\n",
|
||||||
matcher->getreason().c_str()))
|
matcher->getreason().c_str()))
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
} else if (typ == ET_WILD) {
|
} else if (typ == ET_WILD) {
|
||||||
matcher = STD_SHARED_PTR<StrMatcher>(new StrWildMatcher(root));
|
matcher = STD_SHARED_PTR<StrMatcher>(new StrWildMatcher(root));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Find the initial section before any special char
|
// Find the initial section before any special char
|
||||||
string::size_type es = string::npos;
|
string::size_type es = string::npos;
|
||||||
if (matcher) {
|
if (matcher) {
|
||||||
es = matcher->baseprefixlen();
|
es = matcher->baseprefixlen();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initial section: the part of the prefix+expr before the
|
// Initial section: the part of the prefix+expr before the
|
||||||
// first wildcard character. We only scan the part of the
|
// first wildcard character. We only scan the part of the
|
||||||
// index where this matches
|
// index where this matches
|
||||||
string is;
|
string is;
|
||||||
switch (es) {
|
if (es == string::npos) {
|
||||||
case string::npos: is = prefix + root; break;
|
is = prefix + root;
|
||||||
case 0: is = prefix; break;
|
} else if (es == 0) {
|
||||||
default: is = prefix + root.substr(0, es); break;
|
is = prefix;
|
||||||
|
} else {
|
||||||
|
is = prefix + root.substr(0, es);
|
||||||
}
|
}
|
||||||
LOGDEB2(("termMatch: initsec: [%s]\n", is.c_str()));
|
LOGDEB2(("termMatch: initsec: [%s]\n", is.c_str()));
|
||||||
|
|
||||||
for (int tries = 0; tries < 2; tries++) {
|
for (int tries = 0; tries < 2; tries++) {
|
||||||
try {
|
try {
|
||||||
Xapian::TermIterator it = xdb.allterms_begin();
|
Xapian::TermIterator it = xdb.allterms_begin();
|
||||||
if (!is.empty())
|
if (!is.empty())
|
||||||
it.skip_to(is.c_str());
|
it.skip_to(is.c_str());
|
||||||
for (int rcnt = 0; it != xdb.allterms_end(); it++) {
|
for (int rcnt = 0; it != xdb.allterms_end(); it++) {
|
||||||
// If we're beyond the terms matching the initial
|
// If we're beyond the terms matching the initial
|
||||||
// section, end
|
// section, end
|
||||||
if (!is.empty() && (*it).find(is) != 0)
|
if (!is.empty() && (*it).find(is) != 0)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// Else try to match the term. The matcher content
|
// Else try to match the term. The matcher content
|
||||||
// is without prefix, so we remove this if any. We
|
// is without prefix, so we remove this if any. We
|
||||||
// just checked that the index term did begin with
|
// just checked that the index term did begin with
|
||||||
// the prefix.
|
// the prefix.
|
||||||
string term;
|
string term;
|
||||||
if (!prefix.empty()) {
|
if (!prefix.empty()) {
|
||||||
term = (*it).substr(prefix.length());
|
term = (*it).substr(prefix.length());
|
||||||
} else {
|
} else {
|
||||||
if (has_prefix(*it)) {
|
if (has_prefix(*it)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
term = *it;
|
term = *it;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (matcher && !matcher->match(term))
|
if (matcher && !matcher->match(term))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
res.entries.push_back(
|
res.entries.push_back(
|
||||||
TermMatchEntry(*it, xdb.get_collection_freq(*it),
|
TermMatchEntry(*it, xdb.get_collection_freq(*it),
|
||||||
it.get_termfreq()));
|
it.get_termfreq()));
|
||||||
|
|
||||||
// The problem with truncating here is that this is done
|
// The problem with truncating here is that this is done
|
||||||
// alphabetically and we may not keep the most frequent
|
// alphabetically and we may not keep the most frequent
|
||||||
// terms. OTOH, not doing it may stall the program if
|
// terms. OTOH, not doing it may stall the program if
|
||||||
// we are walking the whole term list. We compromise
|
// we are walking the whole term list. We compromise
|
||||||
// by cutting at 2*max
|
// by cutting at 2*max
|
||||||
if (max > 0 && ++rcnt >= 2*max)
|
if (max > 0 && ++rcnt >= 2*max)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
m_reason.erase();
|
m_reason.erase();
|
||||||
break;
|
break;
|
||||||
} catch (const Xapian::DatabaseModifiedError &e) {
|
} catch (const Xapian::DatabaseModifiedError &e) {
|
||||||
m_reason = e.get_msg();
|
m_reason = e.get_msg();
|
||||||
xdb.reopen();
|
xdb.reopen();
|
||||||
continue;
|
continue;
|
||||||
} XCATCHERROR(m_reason);
|
} XCATCHERROR(m_reason);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (!m_reason.empty()) {
|
if (!m_reason.empty()) {
|
||||||
LOGERR(("termMatch: %s\n", m_reason.c_str()));
|
LOGERR(("termMatch: %s\n", m_reason.c_str()));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
@ -476,62 +480,62 @@ public:
|
|||||||
TermIter *Db::termWalkOpen()
|
TermIter *Db::termWalkOpen()
|
||||||
{
|
{
|
||||||
if (!m_ndb || !m_ndb->m_isopen)
|
if (!m_ndb || !m_ndb->m_isopen)
|
||||||
return 0;
|
return 0;
|
||||||
TermIter *tit = new TermIter;
|
TermIter *tit = new TermIter;
|
||||||
if (tit) {
|
if (tit) {
|
||||||
tit->db = m_ndb->xrdb;
|
tit->db = m_ndb->xrdb;
|
||||||
XAPTRY(tit->it = tit->db.allterms_begin(), tit->db, m_reason);
|
XAPTRY(tit->it = tit->db.allterms_begin(), tit->db, m_reason);
|
||||||
if (!m_reason.empty()) {
|
if (!m_reason.empty()) {
|
||||||
LOGERR(("Db::termWalkOpen: xapian error: %s\n", m_reason.c_str()));
|
LOGERR(("Db::termWalkOpen: xapian error: %s\n", m_reason.c_str()));
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return tit;
|
return tit;
|
||||||
}
|
}
|
||||||
bool Db::termWalkNext(TermIter *tit, string &term)
|
bool Db::termWalkNext(TermIter *tit, string &term)
|
||||||
{
|
{
|
||||||
XAPTRY(
|
XAPTRY(
|
||||||
if (tit && tit->it != tit->db.allterms_end()) {
|
if (tit && tit->it != tit->db.allterms_end()) {
|
||||||
term = *(tit->it)++;
|
term = *(tit->it)++;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
, tit->db, m_reason);
|
, tit->db, m_reason);
|
||||||
|
|
||||||
if (!m_reason.empty()) {
|
if (!m_reason.empty()) {
|
||||||
LOGERR(("Db::termWalkOpen: xapian error: %s\n", m_reason.c_str()));
|
LOGERR(("Db::termWalkOpen: xapian error: %s\n", m_reason.c_str()));
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
void Db::termWalkClose(TermIter *tit)
|
void Db::termWalkClose(TermIter *tit)
|
||||||
{
|
{
|
||||||
try {
|
try {
|
||||||
delete tit;
|
delete tit;
|
||||||
} catch (...) {}
|
} catch (...) {}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Db::termExists(const string& word)
|
bool Db::termExists(const string& word)
|
||||||
{
|
{
|
||||||
if (!m_ndb || !m_ndb->m_isopen)
|
if (!m_ndb || !m_ndb->m_isopen)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
XAPTRY(if (!m_ndb->xrdb.term_exists(word)) return false,
|
XAPTRY(if (!m_ndb->xrdb.term_exists(word)) return false,
|
||||||
m_ndb->xrdb, m_reason);
|
m_ndb->xrdb, m_reason);
|
||||||
|
|
||||||
if (!m_reason.empty()) {
|
if (!m_reason.empty()) {
|
||||||
LOGERR(("Db::termWalkOpen: xapian error: %s\n", m_reason.c_str()));
|
LOGERR(("Db::termWalkOpen: xapian error: %s\n", m_reason.c_str()));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Db::stemDiffers(const string& lang, const string& word,
|
bool Db::stemDiffers(const string& lang, const string& word,
|
||||||
const string& base)
|
const string& base)
|
||||||
{
|
{
|
||||||
Xapian::Stem stemmer(lang);
|
Xapian::Stem stemmer(lang);
|
||||||
if (!stemmer(word).compare(stemmer(base))) {
|
if (!stemmer(word).compare(stemmer(base))) {
|
||||||
LOGDEB2(("Rcl::Db::stemDiffers: same for %s and %s\n",
|
LOGDEB2(("Rcl::Db::stemDiffers: same for %s and %s\n",
|
||||||
word.c_str(), base.c_str()));
|
word.c_str(), base.c_str()));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -242,7 +242,7 @@ void SearchData::simplify()
|
|||||||
j < i + clsubp->getSub()->m_query.size(); j++) {
|
j < i + clsubp->getSub()->m_query.size(); j++) {
|
||||||
m_query[j]->setParent(this);
|
m_query[j]->setParent(this);
|
||||||
}
|
}
|
||||||
i += clsubp->getSub()->m_query.size() - 1;
|
i += int(clsubp->getSub()->m_query.size()) - 1;
|
||||||
|
|
||||||
// We don't want the clauses to be deleted when the parent is, as we
|
// We don't want the clauses to be deleted when the parent is, as we
|
||||||
// know own them.
|
// know own them.
|
||||||
|
|||||||
@ -270,7 +270,7 @@ public:
|
|||||||
{
|
{
|
||||||
return m_parentSearch ? m_parentSearch->getMaxExp() : 10000;
|
return m_parentSearch ? m_parentSearch->getMaxExp() : 10000;
|
||||||
}
|
}
|
||||||
int getMaxCl()
|
size_t getMaxCl()
|
||||||
{
|
{
|
||||||
return m_parentSearch ? m_parentSearch->getMaxCl() : 100000;
|
return m_parentSearch ? m_parentSearch->getMaxCl() : 100000;
|
||||||
}
|
}
|
||||||
@ -376,7 +376,7 @@ protected:
|
|||||||
std::string m_field; // Field specification if any
|
std::string m_field; // Field specification if any
|
||||||
HighlightData m_hldata;
|
HighlightData m_hldata;
|
||||||
// Current count of Xapian clauses, to check against expansion limit
|
// Current count of Xapian clauses, to check against expansion limit
|
||||||
int m_curcl;
|
size_t m_curcl;
|
||||||
bool processUserString(Rcl::Db &db, const string &iq,
|
bool processUserString(Rcl::Db &db, const string &iq,
|
||||||
std::string &ermsg,
|
std::string &ermsg,
|
||||||
void* pq, int slack = 0, bool useNear = false);
|
void* pq, int slack = 0, bool useNear = false);
|
||||||
|
|||||||
@ -840,7 +840,7 @@ bool SearchDataClauseSimple::processUserString(Rcl::Db &db, const string &iq,
|
|||||||
tpq.setTSQ(&splitter);
|
tpq.setTSQ(&splitter);
|
||||||
splitter.text_to_words(*it);
|
splitter.text_to_words(*it);
|
||||||
|
|
||||||
slack += tpq.lastpos() - tpq.terms().size() + 1;
|
slack += tpq.lastpos() - int(tpq.terms().size()) + 1;
|
||||||
|
|
||||||
LOGDEB0(("strToXapianQ: termcount: %d\n", tpq.terms().size()));
|
LOGDEB0(("strToXapianQ: termcount: %d\n", tpq.terms().size()));
|
||||||
switch (tpq.terms().size() + terminc) {
|
switch (tpq.terms().size() + terminc) {
|
||||||
@ -963,7 +963,7 @@ bool SearchDataClausePath::toNativeQuery(Rcl::Db &db, void *p)
|
|||||||
|
|
||||||
vector<Xapian::Query> orqueries;
|
vector<Xapian::Query> orqueries;
|
||||||
|
|
||||||
if (m_text[0] == '/')
|
if (path_isabsolute(m_text))
|
||||||
orqueries.push_back(Xapian::Query(wrap_prefix(pathelt_prefix)));
|
orqueries.push_back(Xapian::Query(wrap_prefix(pathelt_prefix)));
|
||||||
else
|
else
|
||||||
m_text = path_tildexpand(m_text);
|
m_text = path_tildexpand(m_text);
|
||||||
|
|||||||
@ -22,11 +22,12 @@
|
|||||||
|
|
||||||
#include "autoconfig.h"
|
#include "autoconfig.h"
|
||||||
|
|
||||||
#include <unistd.h>
|
#include "safeunistd.h"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
#include <xapian.h>
|
#include <xapian.h>
|
||||||
|
|||||||
@ -133,7 +133,8 @@ image/vnd.djvu = exec rcldjvu
|
|||||||
image/svg+xml = exec rclsvg
|
image/svg+xml = exec rclsvg
|
||||||
image/x-xcf = execm rclimg
|
image/x-xcf = execm rclimg
|
||||||
inode/symlink = internal
|
inode/symlink = internal
|
||||||
inode/x-empty = exec rclnull
|
application/x-zerosize = internal
|
||||||
|
inode/x-empty = internal application/x-zerosize
|
||||||
message/rfc822 = internal
|
message/rfc822 = internal
|
||||||
text/calendar = execm rclics;mimetype=text/plain
|
text/calendar = execm rclics;mimetype=text/plain
|
||||||
text/html = internal
|
text/html = internal
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
# (C) 2004 J.F.Dockes. License: GPL
|
# (C) 2004 J.F.Dockes. License: GPL
|
||||||
#
|
#
|
||||||
# Recoll default configuration file. This typically lives in
|
# Recoll default configuration file. This typically lives in
|
||||||
# @prefix@/share/recoll/examples and provides default values. You can
|
# $prefix/share/recoll/examples and provides default values. You can
|
||||||
# override selected parameters by adding assigments to
|
# override selected parameters by adding assigments to
|
||||||
# ~/.recoll/recoll.conf (or $RECOLL_CONFDIR/recoll.conf)
|
# ~/.recoll/recoll.conf (or $RECOLL_CONFDIR/recoll.conf)
|
||||||
#
|
#
|
||||||
@ -199,12 +199,13 @@ maxfsoccuppc = 0
|
|||||||
idxflushmb = 10
|
idxflushmb = 10
|
||||||
|
|
||||||
# Place to search for executable filters. If RECOLL_FILTERSDIR is set in
|
# Place to search for executable filters. If RECOLL_FILTERSDIR is set in
|
||||||
# the environment, we use it instead
|
# the environment, we use it instead. Defaults to $prefix/share/recoll/filters
|
||||||
filtersdir = @prefix@/share/recoll/filters
|
# filtersdir = /path/to/my/filters
|
||||||
|
|
||||||
# Place to search for icons. The only reason to change this would be if you
|
# Place to search for icons. The only reason to change this would be if you
|
||||||
# want to change the icons displayed in the result list
|
# want to change the icons displayed in the result list.
|
||||||
iconsdir = @prefix@/share/recoll/images
|
# Defaults to $prefix/share/recoll/images
|
||||||
|
# iconsdir = /path/to/my/icons
|
||||||
|
|
||||||
# Should we use the system's 'file -i' command as a final step in file type
|
# Should we use the system's 'file -i' command as a final step in file type
|
||||||
# identification ? This may be useful, but will usually cause the
|
# identification ? This may be useful, but will usually cause the
|
||||||
@ -16,21 +16,20 @@
|
|||||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifdef HAVE_CONFIG_H
|
#ifdef BUILDING_RECOLL
|
||||||
#ifdef RECOLL_DATADIR
|
|
||||||
#include "autoconfig.h"
|
#include "autoconfig.h"
|
||||||
#else
|
#else
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
#endif /* RECOLL */
|
#endif /* RECOLL */
|
||||||
#endif /* HAVE_CONFIG_H */
|
|
||||||
|
|
||||||
#ifdef RECOLL_DATADIR
|
#ifdef BUILDING_RECOLL
|
||||||
/* Yes, recoll unac is actually c++, lets face modernity, I will not be
|
/* Yes, recoll unac is actually c++, lets face modernity, I will not be
|
||||||
caught writing another binary search */
|
caught writing another binary search */
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <iostream>
|
||||||
#include UNORDERED_MAP_INCLUDE
|
#include UNORDERED_MAP_INCLUDE
|
||||||
|
|
||||||
using std::string;
|
using std::string;
|
||||||
@ -53,7 +52,7 @@ static inline bool is_except_char(unsigned short c, string& trans)
|
|||||||
trans = it->second;
|
trans = it->second;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
#endif /* RECOLL_DATADIR */
|
#endif /* BUILDING_RECOLL*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If configure.in has not defined this symbol, assume const. It
|
* If configure.in has not defined this symbol, assume const. It
|
||||||
@ -14171,9 +14170,9 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length,
|
|||||||
char** outp, size_t* out_lengthp, int what)
|
char** outp, size_t* out_lengthp, int what)
|
||||||
{
|
{
|
||||||
char* out;
|
char* out;
|
||||||
int out_size;
|
size_t out_size;
|
||||||
int out_length;
|
size_t out_length;
|
||||||
unsigned int i;
|
size_t i;
|
||||||
|
|
||||||
out_size = in_length > 0 ? in_length : 1024;
|
out_size = in_length > 0 ? in_length : 1024;
|
||||||
|
|
||||||
@ -14191,13 +14190,13 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length,
|
|||||||
for(i = 0; i < in_length; i += 2) {
|
for(i = 0; i < in_length; i += 2) {
|
||||||
unsigned short c;
|
unsigned short c;
|
||||||
unsigned short* p;
|
unsigned short* p;
|
||||||
int l;
|
size_t l;
|
||||||
int k;
|
size_t k;
|
||||||
c = (in[i] << 8) | (in[i + 1] & 0xff);
|
c = (in[i] << 8) | (in[i + 1] & 0xff);
|
||||||
/*
|
/*
|
||||||
* Lookup the tables for decomposition information
|
* Lookup the tables for decomposition information
|
||||||
*/
|
*/
|
||||||
#ifdef RECOLL_DATADIR
|
#ifdef BUILDING_RECOLL
|
||||||
// Exception unac/fold values set by user. There should be 3 arrays for
|
// Exception unac/fold values set by user. There should be 3 arrays for
|
||||||
// unac/fold/unac+fold. For now there is only one array, which used to
|
// unac/fold/unac+fold. For now there is only one array, which used to
|
||||||
// be set for unac+fold, and is mostly or only used to prevent diacritics
|
// be set for unac+fold, and is mostly or only used to prevent diacritics
|
||||||
@ -14220,11 +14219,11 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length,
|
|||||||
l = trans.size() / 2;
|
l = trans.size() / 2;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
#endif /* RECOLL_DATADIR */
|
#endif /* BUILDING_RECOLL */
|
||||||
unac_uf_char_utf16_(c, p, l, what)
|
unac_uf_char_utf16_(c, p, l, what)
|
||||||
#ifdef RECOLL_DATADIR
|
#ifdef BUILDING_RECOLL
|
||||||
}
|
}
|
||||||
#endif /* RECOLL_DATADIR */
|
#endif /* BUILDING_RECOLL */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Explain what's done in great detail
|
* Explain what's done in great detail
|
||||||
@ -14237,7 +14236,7 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length,
|
|||||||
if(l == 0) {
|
if(l == 0) {
|
||||||
DEBUG_APPEND("untouched\n");
|
DEBUG_APPEND("untouched\n");
|
||||||
} else {
|
} else {
|
||||||
int i;
|
size_t i;
|
||||||
for(i = 0; i < l; i++)
|
for(i = 0; i < l; i++)
|
||||||
DEBUG_APPEND("0x%04x ", p[i]);
|
DEBUG_APPEND("0x%04x ", p[i]);
|
||||||
DEBUG_APPEND("\n");
|
DEBUG_APPEND("\n");
|
||||||
@ -14437,10 +14436,11 @@ static int convert(const char* from, const char* to,
|
|||||||
const char* tmp = space;
|
const char* tmp = space;
|
||||||
size_t tmp_length = 2;
|
size_t tmp_length = 2;
|
||||||
if(iconv(cd, (ICONV_CONST char **) &tmp, &tmp_length, &out, &out_remain) == (size_t)-1) {
|
if(iconv(cd, (ICONV_CONST char **) &tmp, &tmp_length, &out, &out_remain) == (size_t)-1) {
|
||||||
if(errno == E2BIG)
|
if(errno == E2BIG) {
|
||||||
/* fall thru to the E2BIG case below */;
|
/* fall thru to the E2BIG case below */;
|
||||||
else
|
} else {
|
||||||
goto out;
|
goto out;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
/* The offending character was replaced by a SPACE, skip it. */
|
/* The offending character was replaced by a SPACE, skip it. */
|
||||||
in += 2;
|
in += 2;
|
||||||
@ -14456,7 +14456,7 @@ static int convert(const char* from, const char* to,
|
|||||||
/*
|
/*
|
||||||
* The output does not fit in the current out buffer, enlarge it.
|
* The output does not fit in the current out buffer, enlarge it.
|
||||||
*/
|
*/
|
||||||
int length = out - out_base;
|
size_t length = out - out_base;
|
||||||
out_size *= 2;
|
out_size *= 2;
|
||||||
{
|
{
|
||||||
char *saved = out_base;
|
char *saved = out_base;
|
||||||
@ -14562,7 +14562,7 @@ const char* unac_version(void)
|
|||||||
return UNAC_VERSION;
|
return UNAC_VERSION;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef RECOLL_DATADIR
|
#ifdef BUILDING_RECOLL
|
||||||
void unac_set_except_translations(const char *spectrans)
|
void unac_set_except_translations(const char *spectrans)
|
||||||
{
|
{
|
||||||
except_trans.clear();
|
except_trans.clear();
|
||||||
@ -14615,4 +14615,4 @@ void unac_set_except_translations(const char *spectrans)
|
|||||||
free(out);
|
free(out);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif /* RECOLL_DATADIR */
|
#endif /* BUILDING_RECOLL */
|
||||||
|
|||||||
@ -1 +0,0 @@
|
|||||||
unac.c
|
|
||||||
1
src/unac/unac.cpp
Normal file
1
src/unac/unac.cpp
Normal file
@ -0,0 +1 @@
|
|||||||
|
#include "unac.c"
|
||||||
@ -114,7 +114,7 @@ int fold_string(const char* charset,
|
|||||||
/* To be called before starting threads in mt programs */
|
/* To be called before starting threads in mt programs */
|
||||||
void unac_init_mt();
|
void unac_init_mt();
|
||||||
|
|
||||||
#ifdef RECOLL_DATADIR
|
#ifdef BUILDING_RECOLL
|
||||||
#include <string>
|
#include <string>
|
||||||
/**
|
/**
|
||||||
* Set exceptions for unaccenting, for characters which should not be
|
* Set exceptions for unaccenting, for characters which should not be
|
||||||
@ -128,7 +128,7 @@ void unac_init_mt();
|
|||||||
* can't be an exception character, deal with it...
|
* can't be an exception character, deal with it...
|
||||||
*/
|
*/
|
||||||
void unac_set_except_translations(const char *spectrans);
|
void unac_set_except_translations(const char *spectrans);
|
||||||
#endif /* RECOLL_DATADIR */
|
#endif /* BUILDING_RECOLL */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Return unac version number.
|
* Return unac version number.
|
||||||
|
|||||||
@ -109,8 +109,8 @@ trfileudi.o : fileudi.cpp fileudi.h
|
|||||||
EXECMD_OBJS= trexecmd.o
|
EXECMD_OBJS= trexecmd.o
|
||||||
trexecmd : $(EXECMD_OBJS)
|
trexecmd : $(EXECMD_OBJS)
|
||||||
$(CXX) $(ALL_CXXFLAGS) -o trexecmd $(EXECMD_OBJS) $(LIBRECOLL)
|
$(CXX) $(ALL_CXXFLAGS) -o trexecmd $(EXECMD_OBJS) $(LIBRECOLL)
|
||||||
trexecmd.o : execmd.cpp execmd.h
|
trexecmd.o : trexecmd.cpp execmd.h
|
||||||
$(CXX) -o trexecmd.o -c $(ALL_CXXFLAGS) -DTEST_EXECMD execmd.cpp
|
$(CXX) -o trexecmd.o -c $(ALL_CXXFLAGS) -I../xaposix trexecmd.cpp
|
||||||
|
|
||||||
TRANSCODE_OBJS= trtranscode.o
|
TRANSCODE_OBJS= trtranscode.o
|
||||||
transcode : $(TRANSCODE_OBJS)
|
transcode : $(TRANSCODE_OBJS)
|
||||||
|
|||||||
@ -217,7 +217,7 @@ void base64_encode(const string &in, string &out)
|
|||||||
|
|
||||||
out.clear();
|
out.clear();
|
||||||
|
|
||||||
int srclength = in.length();
|
string::size_type srclength = in.length();
|
||||||
int sidx = 0;
|
int sidx = 0;
|
||||||
while (2 < srclength) {
|
while (2 < srclength) {
|
||||||
input[0] = in[sidx++];
|
input[0] = in[sidx++];
|
||||||
@ -244,7 +244,7 @@ void base64_encode(const string &in, string &out)
|
|||||||
if (0 != srclength) {
|
if (0 != srclength) {
|
||||||
/* Get what's left. */
|
/* Get what's left. */
|
||||||
input[0] = input[1] = input[2] = '\0';
|
input[0] = input[1] = input[2] = '\0';
|
||||||
for (int i = 0; i < srclength; i++)
|
for (string::size_type i = 0; i < srclength; i++)
|
||||||
input[i] = in[sidx++];
|
input[i] = in[sidx++];
|
||||||
|
|
||||||
output[0] = input[0] >> 2;
|
output[0] = input[0] >> 2;
|
||||||
|
|||||||
@ -184,7 +184,7 @@ public:
|
|||||||
// Offset of last write (newest header)
|
// Offset of last write (newest header)
|
||||||
off_t m_nheadoffs;
|
off_t m_nheadoffs;
|
||||||
// Pad size for newest entry.
|
// Pad size for newest entry.
|
||||||
int m_npadsize;
|
off_t m_npadsize;
|
||||||
// Keep history or only last entry
|
// Keep history or only last entry
|
||||||
bool m_uniquentries;
|
bool m_uniquentries;
|
||||||
///////////////////// End header entries
|
///////////////////// End header entries
|
||||||
@ -956,10 +956,10 @@ bool CirCache::erase(const string& udi)
|
|||||||
// entry.
|
// entry.
|
||||||
class CCScanHookSpacer : public CCScanHook {
|
class CCScanHookSpacer : public CCScanHook {
|
||||||
public:
|
public:
|
||||||
UINT sizewanted;
|
off_t sizewanted;
|
||||||
UINT sizeseen;
|
off_t sizeseen;
|
||||||
vector<pair<string, off_t> > squashed_udis;
|
vector<pair<string, off_t> > squashed_udis;
|
||||||
CCScanHookSpacer(int sz)
|
CCScanHookSpacer(off_t sz)
|
||||||
: sizewanted(sz), sizeseen(0) {assert(sz > 0);}
|
: sizewanted(sz), sizeseen(0) {assert(sz > 0);}
|
||||||
|
|
||||||
virtual status takeone(off_t offs, const string& udi,
|
virtual status takeone(off_t offs, const string& udi,
|
||||||
@ -1009,14 +1009,14 @@ bool CirCache::put(const string& udi, const ConfSimple *iconf,
|
|||||||
|
|
||||||
// Data compression ?
|
// Data compression ?
|
||||||
const char *datap = data.c_str();
|
const char *datap = data.c_str();
|
||||||
unsigned int datalen = data.size();
|
size_t datalen = data.size();
|
||||||
unsigned short flags = 0;
|
unsigned short flags = 0;
|
||||||
TempBuf compbuf;
|
TempBuf compbuf;
|
||||||
if (!(iflags & NoCompHint)) {
|
if (!(iflags & NoCompHint)) {
|
||||||
ULONG len = compressBound(data.size());
|
uLong len = compressBound(static_cast<uLong>(data.size()));
|
||||||
char *bf = compbuf.setsize(len);
|
char *bf = compbuf.setsize(len);
|
||||||
if (bf != 0 &&
|
if (bf != 0 &&
|
||||||
compress((Bytef*)bf, &len, (Bytef*)data.c_str(), data.size())
|
compress((Bytef*)bf, &len, (Bytef*)data.c_str(), static_cast<uLong>(data.size()))
|
||||||
== Z_OK) {
|
== Z_OK) {
|
||||||
if (float(len) < 0.9 * float(data.size())) {
|
if (float(len) < 0.9 * float(data.size())) {
|
||||||
// bf is local but it's our static buffer address
|
// bf is local but it's our static buffer address
|
||||||
@ -1034,16 +1034,16 @@ bool CirCache::put(const string& udi, const ConfSimple *iconf,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Characteristics for the new entry.
|
// Characteristics for the new entry.
|
||||||
int nsize = CIRCACHE_HEADER_SIZE + dic.size() + datalen;
|
off_t nsize = CIRCACHE_HEADER_SIZE + dic.size() + datalen;
|
||||||
int nwriteoffs = m_d->m_oheadoffs;
|
off_t nwriteoffs = m_d->m_oheadoffs;
|
||||||
int npadsize = 0;
|
off_t npadsize = 0;
|
||||||
bool extending = false;
|
bool extending = false;
|
||||||
|
|
||||||
LOGDEB(("CirCache::put: nsz %d oheadoffs %d\n", nsize, m_d->m_oheadoffs));
|
LOGDEB(("CirCache::put: nsz %d oheadoffs %d\n", nsize, m_d->m_oheadoffs));
|
||||||
|
|
||||||
// Check if we can recover some pad space from the (physically) previous
|
// Check if we can recover some pad space from the (physically) previous
|
||||||
// entry.
|
// entry.
|
||||||
int recovpadsize = m_d->m_oheadoffs == CIRCACHE_FIRSTBLOCK_SIZE ?
|
off_t recovpadsize = m_d->m_oheadoffs == CIRCACHE_FIRSTBLOCK_SIZE ?
|
||||||
0 : m_d->m_npadsize;
|
0 : m_d->m_npadsize;
|
||||||
if (recovpadsize != 0) {
|
if (recovpadsize != 0) {
|
||||||
// Need to read the latest entry's header, to rewrite it with a
|
// Need to read the latest entry's header, to rewrite it with a
|
||||||
@ -1082,7 +1082,7 @@ bool CirCache::put(const string& udi, const ConfSimple *iconf,
|
|||||||
} else {
|
} else {
|
||||||
// Scan the file until we have enough space for the new entry,
|
// Scan the file until we have enough space for the new entry,
|
||||||
// and determine the pad size up to the 1st preserved entry
|
// and determine the pad size up to the 1st preserved entry
|
||||||
int scansize = nsize - recovpadsize;
|
off_t scansize = nsize - recovpadsize;
|
||||||
LOGDEB(("CirCache::put: scanning for size %d from offs %u\n",
|
LOGDEB(("CirCache::put: scanning for size %d from offs %u\n",
|
||||||
scansize, (UINT)m_d->m_oheadoffs));
|
scansize, (UINT)m_d->m_oheadoffs));
|
||||||
CCScanHookSpacer spacer(scansize);
|
CCScanHookSpacer spacer(scansize);
|
||||||
|
|||||||
@ -14,9 +14,7 @@
|
|||||||
* Free Software Foundation, Inc.,
|
* Free Software Foundation, Inc.,
|
||||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
*/
|
*/
|
||||||
#ifdef HAVE_CONFIG_H
|
#include "autoconfig.h"
|
||||||
#include "config.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef TEST_CONFTREE
|
#ifndef TEST_CONFTREE
|
||||||
|
|
||||||
@ -70,7 +68,7 @@ void ConfSimple::parseinput(istream &input)
|
|||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
int ll = strlen(cline);
|
size_t ll = strlen(cline);
|
||||||
while (ll > 0 && (cline[ll-1] == '\n' || cline[ll-1] == '\r')) {
|
while (ll > 0 && (cline[ll-1] == '\n' || cline[ll-1] == '\r')) {
|
||||||
cline[ll-1] = 0;
|
cline[ll-1] = 0;
|
||||||
ll--;
|
ll--;
|
||||||
@ -576,8 +574,8 @@ bool ConfSimple::hasNameAnywhere(const string& nm) const
|
|||||||
int ConfTree::get(const std::string &name, string &value, const string &sk)
|
int ConfTree::get(const std::string &name, string &value, const string &sk)
|
||||||
const
|
const
|
||||||
{
|
{
|
||||||
if (sk.empty() || sk[0] != '/') {
|
if (sk.empty() || !path_isabsolute(sk) ) {
|
||||||
// LOGDEB((stderr, "ConfTree::get: looking in global space\n"));
|
// LOGDEB((stderr, "ConfTree::get: looking in global space for sk [%s]\n", sk.c_str()));
|
||||||
return ConfSimple::get(name, value, sk);
|
return ConfSimple::get(name, value, sk);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -590,15 +588,21 @@ int ConfTree::get(const std::string &name, string &value, const string &sk)
|
|||||||
|
|
||||||
// Look in subkey and up its parents until root ('')
|
// Look in subkey and up its parents until root ('')
|
||||||
for (;;) {
|
for (;;) {
|
||||||
// LOGDEB((stderr,"ConfTree::get: looking for '%s' in '%s'\n",
|
// LOGDEB((stderr,"ConfTree::get: looking for '%s' in '%s'\n",
|
||||||
// name.c_str(), msk.c_str()));
|
// name.c_str(), msk.c_str()));
|
||||||
if (ConfSimple::get(name, value, msk))
|
if (ConfSimple::get(name, value, msk))
|
||||||
return 1;
|
return 1;
|
||||||
string::size_type pos = msk.rfind("/");
|
string::size_type pos = msk.rfind("/");
|
||||||
if (pos != string::npos) {
|
if (pos != string::npos) {
|
||||||
msk.replace(pos, string::npos, string());
|
msk.replace(pos, string::npos, string());
|
||||||
} else
|
} else {
|
||||||
|
#ifdef _WIN32
|
||||||
|
if (msk.size() == 2 && isalpha(msk[0]) && msk[1] == ':')
|
||||||
|
msk.clear();
|
||||||
|
else
|
||||||
|
#endif
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -15,14 +15,18 @@
|
|||||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
*/
|
*/
|
||||||
#ifndef TEST_COPYFILE
|
#ifndef TEST_COPYFILE
|
||||||
|
#include "autoconfig.h"
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include "safefcntl.h"
|
#include "safefcntl.h"
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include "safesysstat.h"
|
#include "safesysstat.h"
|
||||||
#include "safeunistd.h"
|
#include "safeunistd.h"
|
||||||
|
#ifndef _WIN32
|
||||||
#include <sys/time.h>
|
#include <sys/time.h>
|
||||||
#include <utime.h>
|
#include <utime.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
|
||||||
@ -43,7 +47,7 @@ bool copyfile(const char *src, const char *dst, string &reason, int flags)
|
|||||||
|
|
||||||
LOGDEB(("copyfile: %s to %s\n", src, dst));
|
LOGDEB(("copyfile: %s to %s\n", src, dst));
|
||||||
|
|
||||||
if ((sfd = ::open(src, O_RDONLY)) < 0) {
|
if ((sfd = ::open(src, O_RDONLY, 0)) < 0) {
|
||||||
reason += string("open ") + src + ": " + strerror(errno);
|
reason += string("open ") + src + ": " + strerror(errno);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
@ -149,6 +153,7 @@ bool renameormove(const char *src, const char *dst, string &reason)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef _WIN32
|
||||||
// Try to preserve modes, owner, times. This may fail for a number
|
// Try to preserve modes, owner, times. This may fail for a number
|
||||||
// of reasons
|
// of reasons
|
||||||
if ((st1.st_mode & 0777) != (st.st_mode & 0777)) {
|
if ((st1.st_mode & 0777) != (st.st_mode & 0777)) {
|
||||||
@ -167,7 +172,7 @@ bool renameormove(const char *src, const char *dst, string &reason)
|
|||||||
times[1].tv_sec = st.st_mtime;
|
times[1].tv_sec = st.st_mtime;
|
||||||
times[1].tv_usec = 0;
|
times[1].tv_usec = 0;
|
||||||
utimes(dst, times);
|
utimes(dst, times);
|
||||||
|
#endif
|
||||||
// All ok, get rid of origin
|
// All ok, get rid of origin
|
||||||
if (unlink(src) < 0) {
|
if (unlink(src) < 0) {
|
||||||
reason += string("Can't unlink ") + src + "Error : " + strerror(errno);
|
reason += string("Can't unlink ") + src + "Error : " + strerror(errno);
|
||||||
|
|||||||
@ -62,7 +62,6 @@ bool getCpuConf(CpuConf& cpus)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#else // TEST_CPUCONF
|
#else // TEST_CPUCONF
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user