Merged the changes from the current windows port
This commit is contained in:
commit
5330685ec1
15
.hgignore
15
.hgignore
@ -11,6 +11,16 @@ libtool
|
||||
*.lo
|
||||
*~
|
||||
\#*
|
||||
*.obj
|
||||
*.sdf
|
||||
*.tlog
|
||||
*.lib
|
||||
*.idb
|
||||
*.log
|
||||
*.pdb
|
||||
.vs
|
||||
*.exe
|
||||
*.ilk
|
||||
ptrans
|
||||
src/aclocal.m4
|
||||
src/compile
|
||||
@ -82,12 +92,7 @@ src/qtgui/recoll
|
||||
src/qtgui/recoll.app
|
||||
src/qtgui/recoll.pro
|
||||
src/query/alldeps
|
||||
src/query/location.hh
|
||||
src/query/position.hh
|
||||
src/query/recollq
|
||||
src/query/stack.hh
|
||||
src/query/wasaparse.cpp
|
||||
src/query/wasaparse.hpp
|
||||
src/sampleconf/rclmon.sh
|
||||
src/sampleconf/recoll.conf
|
||||
src/utils/alldeps
|
||||
|
||||
@ -22,7 +22,8 @@ COMMONCPPFLAGS = -I. \
|
||||
-I$(top_srcdir)/rcldb \
|
||||
-I$(top_srcdir)/unac \
|
||||
-I$(top_srcdir)/utils \
|
||||
-I$(top_srcdir)/xaposix
|
||||
-I$(top_srcdir)/xaposix \
|
||||
-DBUILDING_RECOLL
|
||||
|
||||
AM_CPPFLAGS = -Wall -Wno-unused \
|
||||
$(COMMONCPPFLAGS) \
|
||||
|
||||
@ -47,7 +47,7 @@ BincStream::~BincStream(void)
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
string BincStream::popString(unsigned int size)
|
||||
string BincStream::popString(std::string::size_type size)
|
||||
{
|
||||
if (size > nstr.length())
|
||||
size = nstr.length();
|
||||
|
||||
@ -25,6 +25,7 @@
|
||||
*/
|
||||
#ifndef convert_h_included
|
||||
#define convert_h_included
|
||||
#include <stddef.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <iomanip>
|
||||
@ -93,7 +94,7 @@ namespace Binc {
|
||||
std::string tmp;
|
||||
for (std::string::const_iterator i = s.begin();
|
||||
i != s.end() && i + 1 != s.end(); i += 2) {
|
||||
int n;
|
||||
ptrdiff_t n;
|
||||
unsigned char c = *i;
|
||||
unsigned char d = *(i + 1);
|
||||
|
||||
@ -122,7 +123,7 @@ namespace Binc {
|
||||
for (std::string::const_iterator i = s_in.begin(); i != s_in.end(); ++i) {
|
||||
unsigned char c = (unsigned char)*i;
|
||||
if (c <= 31 || c >= 127 || c == '\"' || c == '\\')
|
||||
return "{" + toString(s_in.length()) + "}\r\n" + s_in;
|
||||
return "{" + toString((unsigned long)s_in.length()) + "}\r\n" + s_in;
|
||||
}
|
||||
|
||||
return "\"" + s_in + "\"";
|
||||
@ -145,7 +146,7 @@ namespace Binc {
|
||||
//----------------------------------------------------------------------
|
||||
inline void chomp(std::string &s_in, const std::string &chars = " \t\r\n")
|
||||
{
|
||||
int n = s_in.length();
|
||||
std::string::size_type n = s_in.length();
|
||||
while (n > 1 && chars.find(s_in[n - 1]) != std::string::npos)
|
||||
s_in.resize(n-- - 1);
|
||||
}
|
||||
@ -290,7 +291,7 @@ namespace Binc {
|
||||
BincStream &operator << (char t);
|
||||
|
||||
//--
|
||||
std::string popString(unsigned int size);
|
||||
std::string popString(std::string::size_type size);
|
||||
|
||||
//--
|
||||
char popChar(void);
|
||||
|
||||
@ -25,7 +25,7 @@
|
||||
*/
|
||||
#ifndef mime_inputsource_h_included
|
||||
#define mime_inputsource_h_included
|
||||
|
||||
#include "autoconfig.h"
|
||||
// Data source for MIME parser
|
||||
|
||||
// Note about large files: we might want to change the unsigned int
|
||||
@ -49,7 +49,7 @@ namespace Binc {
|
||||
inline MimeInputSource(int fd, unsigned int start = 0);
|
||||
virtual inline ~MimeInputSource(void);
|
||||
|
||||
virtual inline size_t fillRaw(char *raw, size_t nbytes);
|
||||
virtual inline ssize_t fillRaw(char *raw, size_t nbytes);
|
||||
virtual inline void reset(void);
|
||||
|
||||
virtual inline bool fillInputBuffer(void);
|
||||
@ -87,7 +87,7 @@ namespace Binc {
|
||||
{
|
||||
}
|
||||
|
||||
inline size_t MimeInputSource::fillRaw(char *raw, size_t nbytes)
|
||||
inline ssize_t MimeInputSource::fillRaw(char *raw, size_t nbytes)
|
||||
{
|
||||
return read(fd, raw, nbytes);
|
||||
}
|
||||
@ -179,7 +179,7 @@ namespace Binc {
|
||||
class MimeInputSourceStream : public MimeInputSource {
|
||||
public:
|
||||
inline MimeInputSourceStream(istream& s, unsigned int start = 0);
|
||||
virtual inline size_t fillRaw(char *raw, size_t nb);
|
||||
virtual inline ssize_t fillRaw(char *raw, size_t nb);
|
||||
virtual inline void reset(void);
|
||||
private:
|
||||
istream& s;
|
||||
@ -191,7 +191,7 @@ namespace Binc {
|
||||
{
|
||||
}
|
||||
|
||||
inline size_t MimeInputSourceStream::fillRaw(char *raw, size_t nb)
|
||||
inline ssize_t MimeInputSourceStream::fillRaw(char *raw, size_t nb)
|
||||
{
|
||||
// Why can't streams tell how many characters were actually read
|
||||
// when hitting eof ?
|
||||
@ -199,16 +199,16 @@ namespace Binc {
|
||||
s.seekg(0, ios::end);
|
||||
std::streampos lst = s.tellg();
|
||||
s.seekg(st);
|
||||
size_t nbytes = lst - st;
|
||||
size_t nbytes = size_t(lst - st);
|
||||
if (nbytes > nb) {
|
||||
nbytes = nb;
|
||||
}
|
||||
if (nbytes <= 0) {
|
||||
return (size_t)-1;
|
||||
return (ssize_t)-1;
|
||||
}
|
||||
|
||||
s.read(raw, nbytes);
|
||||
return nbytes;
|
||||
return static_cast<ssize_t>(nbytes);
|
||||
}
|
||||
|
||||
inline void MimeInputSourceStream::reset(void)
|
||||
|
||||
@ -306,9 +306,9 @@ void Binc::MimePart::parseMessageRFC822(vector<Binc::MimePart> *members,
|
||||
bool Binc::MimePart::skipUntilBoundary(const string &delimiter,
|
||||
unsigned int *nlines, bool *eof)
|
||||
{
|
||||
int endpos = delimiter.length();
|
||||
string::size_type endpos = delimiter.length();
|
||||
char *delimiterqueue = 0;
|
||||
int delimiterpos = 0;
|
||||
string::size_type delimiterpos = 0;
|
||||
const char *delimiterStr = delimiter.c_str();
|
||||
if (delimiter != "") {
|
||||
delimiterqueue = new char[endpos];
|
||||
@ -340,7 +340,7 @@ bool Binc::MimePart::skipUntilBoundary(const string &delimiter,
|
||||
delimiterpos = 0;
|
||||
|
||||
if (compareStringToQueue(delimiterStr, delimiterqueue,
|
||||
delimiterpos, endpos)) {
|
||||
delimiterpos, int(endpos))) {
|
||||
foundBoundary = true;
|
||||
break;
|
||||
}
|
||||
@ -451,7 +451,7 @@ void Binc::MimePart::parseMultipart(const string &boundary,
|
||||
skipUntilBoundary(delimiter, nlines, eof);
|
||||
|
||||
if (!eof)
|
||||
*boundarysize = delimiter.size();
|
||||
*boundarysize = int(delimiter.size());
|
||||
|
||||
postBoundaryProcessing(eof, nlines, boundarysize, foundendofpart);
|
||||
|
||||
@ -484,7 +484,7 @@ void Binc::MimePart::parseMultipart(const string &boundary,
|
||||
skipUntilBoundary(delimiter, nlines, eof);
|
||||
|
||||
if (!*eof)
|
||||
*boundarysize = delimiter.size();
|
||||
*boundarysize = int(delimiter.size());
|
||||
|
||||
postBoundaryProcessing(eof, nlines, boundarysize, foundendofpart);
|
||||
}
|
||||
@ -528,7 +528,7 @@ void Binc::MimePart::parseSinglePart(const string &toboundary,
|
||||
// *boundarysize = _toboundary.length();
|
||||
|
||||
char *boundaryqueue = 0;
|
||||
int endpos = _toboundary.length();
|
||||
size_t endpos = _toboundary.length();
|
||||
if (toboundary != "") {
|
||||
boundaryqueue = new char[endpos];
|
||||
memset(boundaryqueue, 0, endpos);
|
||||
@ -540,7 +540,7 @@ void Binc::MimePart::parseSinglePart(const string &toboundary,
|
||||
string line;
|
||||
bool toboundaryIsEmpty = (toboundary == "");
|
||||
char c;
|
||||
int boundarypos = 0;
|
||||
string::size_type boundarypos = 0;
|
||||
while (mimeSource->getChar(&c)) {
|
||||
if (c == '\n') { ++*nbodylines; ++*nlines; }
|
||||
|
||||
@ -553,8 +553,8 @@ void Binc::MimePart::parseSinglePart(const string &toboundary,
|
||||
boundarypos = 0;
|
||||
|
||||
if (compareStringToQueue(_toboundaryStr, boundaryqueue,
|
||||
boundarypos, endpos)) {
|
||||
*boundarysize = _toboundary.length();
|
||||
boundarypos, int(endpos))) {
|
||||
*boundarysize = static_cast<int>(_toboundary.length());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -119,7 +119,7 @@ int Binc::MimePart::doParseOnlyHeader(MimeInputSource *ms,
|
||||
if (c == '\n') ++nlines;
|
||||
if (c == ':') break;
|
||||
if (c == '\n') {
|
||||
for (int i = name.length() - 1; i >= 0; --i)
|
||||
for (string::size_type i = name.length() - 1; i >= 0; --i)
|
||||
mimeSource->ungetChar();
|
||||
|
||||
quit = true;
|
||||
|
||||
186
src/common/autoconfig-win.h
Normal file
186
src/common/autoconfig-win.h
Normal file
@ -0,0 +1,186 @@
|
||||
/* Manually edited version of autoconfig.h for windows. Many things are
|
||||
overriden in the c++ code by ifdefs _WIN32 anyway */
|
||||
#ifndef _AUTOCONFIG_H_INCLUDED
|
||||
#define _AUTOCONFIG_H_INCLUDED
|
||||
/* Define if building universal (internal helper macro) */
|
||||
/* #undef AC_APPLE_UNIVERSAL_BUILD */
|
||||
|
||||
/* Path to the aspell api include file */
|
||||
/* #undef ASPELL_INCLUDE "aspell-local.h" */
|
||||
|
||||
/* Path to the aspell program */
|
||||
/* #define ASPELL_PROG "/usr/bin/aspell" */
|
||||
|
||||
/* No X11 session monitoring support */
|
||||
#define DISABLE_X11MON
|
||||
|
||||
/* Path to the fam api include file */
|
||||
/* #undef FAM_INCLUDE */
|
||||
|
||||
/* Path to the file program */
|
||||
#define FILE_PROG "/usr/bin/file"
|
||||
|
||||
/* "Have C++0x" */
|
||||
#undef HAVE_CXX0X_UNORDERED
|
||||
|
||||
/* Define to 1 if you have the <dlfcn.h> header file. */
|
||||
#define HAVE_DLFCN_H 1
|
||||
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||
#define HAVE_INTTYPES_H 1
|
||||
|
||||
/* Define to 1 if you have the `dl' library (-ldl). */
|
||||
#define HAVE_LIBDL 1
|
||||
|
||||
/* Define to 1 if you have the `pthread' library (-lpthread). */
|
||||
#define HAVE_LIBPTHREAD 1
|
||||
|
||||
/* Define to 1 if you have the `z' library (-lz). */
|
||||
#define HAVE_LIBZ 1
|
||||
|
||||
/* Define to 1 if you have the <memory.h> header file. */
|
||||
#define HAVE_MEMORY_H 1
|
||||
|
||||
/* Define to 1 if you have the `mkdtemp' function. */
|
||||
/* #undef HAVE_MKDTEMP */
|
||||
|
||||
/* Define to 1 if you have the `posix_spawn,' function. */
|
||||
/* #undef HAVE_POSIX_SPAWN_ */
|
||||
|
||||
/* Define to 1 if you have the `setrlimit' function. */
|
||||
#define HAVE_SETRLIMIT 1
|
||||
|
||||
/* Has std::shared_ptr */
|
||||
#define HAVE_SHARED_PTR_STD
|
||||
|
||||
/* Has std::tr1::shared_ptr */
|
||||
/* #undef HAVE_SHARED_PTR_TR1 */
|
||||
|
||||
/* Define to 1 if you have the <spawn.h> header file. */
|
||||
#define HAVE_SPAWN_H 1
|
||||
|
||||
/* Define to 1 if you have the <stdint.h> header file. */
|
||||
#define HAVE_STDINT_H 1
|
||||
|
||||
/* Define to 1 if you have the <stdlib.h> header file. */
|
||||
#define HAVE_STDLIB_H 1
|
||||
|
||||
/* Define to 1 if you have the <strings.h> header file. */
|
||||
#define HAVE_STRINGS_H 1
|
||||
|
||||
/* Define to 1 if you have the <string.h> header file. */
|
||||
#define HAVE_STRING_H 1
|
||||
|
||||
/* Define to 1 if you have the <sys/mount.h> header file. */
|
||||
/* #undef HAVE_SYS_MOUNT_H */
|
||||
|
||||
/* Define to 1 if you have the <sys/param.h,> header file. */
|
||||
/* #undef HAVE_SYS_PARAM_H_ */
|
||||
|
||||
/* Define to 1 if you have the <sys/statfs.h> header file. */
|
||||
/* #undef HAVE_SYS_STATFS_H */
|
||||
|
||||
/* Define to 1 if you have the <sys/statvfs.h> header file. */
|
||||
/* #undef HAVE_SYS_STATVFS_H */
|
||||
|
||||
/* Define to 1 if you have the <sys/stat.h> header file. */
|
||||
#define HAVE_SYS_STAT_H 1
|
||||
|
||||
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||
#define HAVE_SYS_TYPES_H 1
|
||||
|
||||
/* Define to 1 if you have the <sys/vfs.h> header file. */
|
||||
/* #undef HAVE_SYS_VFS_H */
|
||||
|
||||
/* "Have tr1" */
|
||||
/* #undef HAVE_TR1_UNORDERED */
|
||||
|
||||
/* Define to 1 if you have the <unistd.h> header file. */
|
||||
/* #undef HAVE_UNISTD_H */
|
||||
|
||||
/* Use multiple threads for indexing */
|
||||
#define IDX_THREADS 1
|
||||
|
||||
/* Define to the sub-directory in which libtool stores uninstalled libraries.
|
||||
*/
|
||||
#define LT_OBJDIR ".libs/"
|
||||
|
||||
/* Define to the address where bug reports for this package should be sent. */
|
||||
#define PACKAGE_BUGREPORT ""
|
||||
|
||||
/* Define to the full name of this package. */
|
||||
#define PACKAGE_NAME "Recoll"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "Recoll 1.22.0"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "recoll"
|
||||
|
||||
/* Define to the home page for this package. */
|
||||
#define PACKAGE_URL ""
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "1.22.0"
|
||||
|
||||
/* putenv parameter is const */
|
||||
/* #undef PUTENV_ARG_CONST */
|
||||
|
||||
/* iconv parameter 2 is const char** */
|
||||
#define RCL_ICONV_INBUF_CONST 1
|
||||
|
||||
/* Real time monitoring option */
|
||||
#undef RCL_MONITOR
|
||||
|
||||
/* Split camelCase words */
|
||||
/* #undef RCL_SPLIT_CAMELCASE */
|
||||
|
||||
/* Compile the aspell interface */
|
||||
/* #undef RCL_USE_ASPELL */
|
||||
|
||||
/* Compile the fam interface */
|
||||
/* #undef RCL_USE_FAM */
|
||||
|
||||
/* Compile the inotify interface */
|
||||
#define RCL_USE_INOTIFY 1
|
||||
|
||||
/* Define to 1 if you have the ANSI C header files. */
|
||||
#define STDC_HEADERS 1
|
||||
|
||||
/* Use posix_spawn() */
|
||||
/* #undef USE_POSIX_SPAWN */
|
||||
|
||||
/* Enable using the system's 'file' command to id mime if we fail internally
|
||||
*/
|
||||
/* #undef USE_SYSTEM_FILE_COMMAND */
|
||||
|
||||
/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
|
||||
significant byte first (like Motorola and SPARC, unlike Intel). */
|
||||
#if defined AC_APPLE_UNIVERSAL_BUILD
|
||||
# if defined __BIG_ENDIAN__
|
||||
# define WORDS_BIGENDIAN 1
|
||||
# endif
|
||||
#else
|
||||
# ifndef WORDS_BIGENDIAN
|
||||
/* # undef WORDS_BIGENDIAN */
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Define to 1 if the X Window System is missing or not being used. */
|
||||
/* #undef X_DISPLAY_MISSING */
|
||||
|
||||
/* Enable large inode numbers on Mac OS X 10.5. */
|
||||
#ifndef _DARWIN_USE_64_BIT_INODE
|
||||
# define _DARWIN_USE_64_BIT_INODE 1
|
||||
#endif
|
||||
|
||||
/* Number of bits in a file offset, on hosts where this is settable. */
|
||||
/* #undef _FILE_OFFSET_BITS */
|
||||
|
||||
/* Define for large files, on AIX-style hosts. */
|
||||
/* #undef _LARGE_FILES */
|
||||
|
||||
#define DISABLE_WEB_INDEXER
|
||||
|
||||
#include "conf_post.h"
|
||||
#endif // already included
|
||||
@ -35,7 +35,7 @@ BeagleQueueCache::BeagleQueueCache(RclConfig *cnf)
|
||||
ccdir = "webcache";
|
||||
ccdir = path_tildexpand(ccdir);
|
||||
// If not an absolute path, compute relative to config dir
|
||||
if (ccdir.at(0) != '/')
|
||||
if (!path_isabsolute(ccdir))
|
||||
ccdir = path_cat(cnf->getConfDir(), ccdir);
|
||||
|
||||
int maxmbs = 40;
|
||||
|
||||
@ -26,3 +26,42 @@
|
||||
# define STD_SHARED_PTR RefCntr
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
#include "safewindows.h"
|
||||
typedef int pid_t;
|
||||
inline int readlink(const char *cp, void *buf, int cnt) {
|
||||
return -1;
|
||||
}
|
||||
#define HAVE_STRUCT_TIMESPEC
|
||||
#define strdup _strdup
|
||||
#define timegm _mkgmtime
|
||||
#ifdef _MSC_VER
|
||||
// gmtime is supposedly thread-safe on windows
|
||||
#define gmtime_r(A, B) gmtime(A)
|
||||
#define localtime_r(A,B) localtime(A)
|
||||
#define PATH_MAX MAX_PATH
|
||||
#define MAXPATHLEN PATH_MAX
|
||||
typedef int mode_t;
|
||||
#endif
|
||||
|
||||
typedef DWORD32 u_int32_t;
|
||||
typedef DWORD64 u_int64_t;
|
||||
typedef unsigned __int8 u_int8_t;
|
||||
typedef int ssize_t;
|
||||
#define strncasecmp _strnicmp
|
||||
#define strcasecmp _stricmp
|
||||
#define ftruncate _chsize_s
|
||||
#define chdir _chdir
|
||||
|
||||
#define R_OK 4
|
||||
#define W_OK 2
|
||||
#define X_OK 4
|
||||
#define RECOLL_DATADIR "C:\\recoll\\"
|
||||
#define S_ISLNK(X) false
|
||||
#define lstat stat
|
||||
#define fseeko _fseeki64
|
||||
#define ftello (off_t)_ftelli64
|
||||
#define timegm _mkgmtime
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
@ -19,11 +19,13 @@
|
||||
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
#ifndef _WIN32
|
||||
#include <langinfo.h>
|
||||
#include <sys/param.h>
|
||||
#endif
|
||||
#include <limits.h>
|
||||
#include "safesysstat.h"
|
||||
#include "safeunistd.h"
|
||||
#include <sys/param.h>
|
||||
#ifdef __FreeBSD__
|
||||
#include <osreldate.h>
|
||||
#endif
|
||||
@ -45,6 +47,7 @@
|
||||
#include "readfile.h"
|
||||
#include "fstreewalk.h"
|
||||
#include "cpuconf.h"
|
||||
#include "execmd.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -120,7 +123,8 @@ void RclConfig::zeroMe() {
|
||||
|
||||
bool RclConfig::isDefaultConfig() const
|
||||
{
|
||||
string defaultconf = path_cat(path_canon(path_home()), ".recoll/");
|
||||
string defaultconf = path_cat(path_homedata(),
|
||||
path_defaultrecollconfsubdir());
|
||||
string specifiedconf = path_canon(m_confdir);
|
||||
path_catslash(specifiedconf);
|
||||
return !defaultconf.compare(specifiedconf);
|
||||
@ -146,14 +150,7 @@ RclConfig::RclConfig(const string *argcnf)
|
||||
}
|
||||
|
||||
// Compute our data dir name, typically /usr/local/share/recoll
|
||||
const char *cdatadir = getenv("RECOLL_DATADIR");
|
||||
if (cdatadir == 0) {
|
||||
// If not in environment, use the compiled-in constant.
|
||||
m_datadir = RECOLL_DATADIR;
|
||||
} else {
|
||||
m_datadir = cdatadir;
|
||||
}
|
||||
|
||||
m_datadir = path_sharedatadir();
|
||||
// We only do the automatic configuration creation thing for the default
|
||||
// config dir, not if it was specified through -c or RECOLL_CONFDIR
|
||||
bool autoconfdir = false;
|
||||
@ -172,7 +169,7 @@ RclConfig::RclConfig(const string *argcnf)
|
||||
m_confdir = path_canon(cp);
|
||||
} else {
|
||||
autoconfdir = true;
|
||||
m_confdir = path_cat(path_home(), ".recoll/");
|
||||
m_confdir = path_cat(path_homedata(), path_defaultrecollconfsubdir());
|
||||
}
|
||||
}
|
||||
|
||||
@ -200,6 +197,7 @@ RclConfig::RclConfig(const string *argcnf)
|
||||
// is called from the main thread at once, by constructing a config
|
||||
// from recollinit
|
||||
if (o_localecharset.empty()) {
|
||||
#ifndef _WIN32
|
||||
const char *cp;
|
||||
cp = nl_langinfo(CODESET);
|
||||
// We don't keep US-ASCII. It's better to use a superset
|
||||
@ -217,6 +215,9 @@ RclConfig::RclConfig(const string *argcnf)
|
||||
// Use cp1252 instead of iso-8859-1, it's a superset.
|
||||
o_localecharset = string(cstr_cp1252);
|
||||
}
|
||||
#else
|
||||
o_localecharset = "UTF-8";
|
||||
#endif
|
||||
LOGDEB1(("RclConfig::getDefCharset: localecharset [%s]\n",
|
||||
o_localecharset.c_str()));
|
||||
}
|
||||
@ -635,7 +636,7 @@ bool RclConfig::inStopSuffixes(const string& fni)
|
||||
it != stoplist.end(); it++) {
|
||||
STOPSUFFIXES->insert(SfString(stringtolower(*it)));
|
||||
if (m_maxsufflen < it->length())
|
||||
m_maxsufflen = it->length();
|
||||
m_maxsufflen = int(it->length());
|
||||
}
|
||||
}
|
||||
|
||||
@ -1154,7 +1155,7 @@ string RclConfig::getConfdirPath(const char *varname, const char *dflt) const
|
||||
} else {
|
||||
result = path_tildexpand(result);
|
||||
// If not an absolute path, compute relative to config dir
|
||||
if (result.at(0) != '/') {
|
||||
if (!path_isabsolute(result)) {
|
||||
result = path_cat(getConfDir(), result);
|
||||
}
|
||||
}
|
||||
@ -1212,7 +1213,7 @@ void RclConfig::urlrewrite(const string& dbdir, string& url) const
|
||||
// This call always succeeds because the key comes from getNames()
|
||||
if (m_ptrans->get(*it, npath, dbdir)) {
|
||||
path = path.replace(0, it->size(), npath);
|
||||
url = "file://" + path;
|
||||
url = path_pathtofileurl(path);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -1305,45 +1306,45 @@ vector<string> RclConfig::getDaemSkippedPaths() const
|
||||
}
|
||||
|
||||
|
||||
// Look up an executable filter. We look in $RECOLL_FILTERSDIR,
|
||||
// filtersdir in config file, then let the system use the PATH
|
||||
// Look up an executable filter. We add $RECOLL_FILTERSDIR,
|
||||
// and filtersdir from the config file to the PATH, then use execmd::which()
|
||||
string RclConfig::findFilter(const string &icmd) const
|
||||
{
|
||||
// If the path is absolute, this is it
|
||||
if (icmd[0] == '/')
|
||||
if (path_isabsolute(icmd))
|
||||
return icmd;
|
||||
|
||||
string cmd;
|
||||
const char *cp;
|
||||
const char *cp = getenv("PATH");
|
||||
if (!cp) //??
|
||||
cp = "";
|
||||
string PATH(cp);
|
||||
|
||||
// Filters dir from environment ?
|
||||
// For historical reasons: check in personal config directory
|
||||
PATH = getConfDir() + path_PATHsep() + PATH;
|
||||
|
||||
string temp;
|
||||
// Prepend $datadir/filters
|
||||
temp = path_cat(m_datadir, "filters");
|
||||
PATH = temp + path_PATHsep() + PATH;
|
||||
|
||||
// Prepend possible configuration parameter?
|
||||
if (getConfParam(string("filtersdir"), temp)) {
|
||||
temp = path_tildexpand(temp);
|
||||
PATH = temp + path_PATHsep() + PATH;
|
||||
}
|
||||
|
||||
// Prepend possible environment variable
|
||||
if ((cp = getenv("RECOLL_FILTERSDIR"))) {
|
||||
cmd = path_cat(cp, icmd);
|
||||
if (access(cmd.c_str(), X_OK) == 0)
|
||||
return cmd;
|
||||
}
|
||||
// Filters dir as configuration parameter?
|
||||
if (getConfParam(string("filtersdir"), cmd)) {
|
||||
cmd = path_cat(cmd, icmd);
|
||||
if (access(cmd.c_str(), X_OK) == 0)
|
||||
return cmd;
|
||||
PATH = string(cp) + path_PATHsep() + PATH;
|
||||
}
|
||||
|
||||
// Filters dir as datadir subdir. Actually the standard case, but
|
||||
// this is normally the same value found in config file (previous step)
|
||||
cmd = path_cat(m_datadir, "filters");
|
||||
cmd = path_cat(cmd, icmd);
|
||||
if (access(cmd.c_str(), X_OK) == 0)
|
||||
return cmd;
|
||||
|
||||
// Last resort for historical reasons: check in personal config
|
||||
// directory
|
||||
cmd = path_cat(getConfDir(), icmd);
|
||||
if (access(cmd.c_str(), X_OK) == 0)
|
||||
return cmd;
|
||||
|
||||
// Let the shell try to find it...
|
||||
return icmd;
|
||||
string cmd;
|
||||
if (ExecCmd::which(icmd, cmd, PATH.c_str())) {
|
||||
return cmd;
|
||||
} else {
|
||||
// Let the shell try to find it...
|
||||
return icmd;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@ -17,6 +17,9 @@
|
||||
#include "autoconfig.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#ifdef _WIN32
|
||||
#include "safewindows.h"
|
||||
#endif
|
||||
#include <signal.h>
|
||||
#include <locale.h>
|
||||
#include <pthread.h>
|
||||
@ -33,8 +36,6 @@
|
||||
#include "smallut.h"
|
||||
#include "execmd.h"
|
||||
|
||||
static const int catchedSigs[] = {SIGINT, SIGQUIT, SIGTERM, SIGUSR1, SIGUSR2};
|
||||
|
||||
static pthread_t mainthread_id;
|
||||
|
||||
static void siglogreopen(int)
|
||||
@ -43,23 +44,17 @@ static void siglogreopen(int)
|
||||
DebugLog::reopen();
|
||||
}
|
||||
|
||||
RclConfig *recollinit(RclInitFlags flags,
|
||||
void (*cleanup)(void), void (*sigcleanup)(int),
|
||||
string &reason, const string *argcnf)
|
||||
#ifndef _WIN32
|
||||
// We would like to block SIGCHLD globally, but we can't because
|
||||
// QT uses it. Have to block it inside execmd.cpp
|
||||
static const int catchedSigs[] = {SIGINT, SIGQUIT, SIGTERM, SIGUSR1, SIGUSR2};
|
||||
void initAsyncSigs(void (*sigcleanup)(int))
|
||||
{
|
||||
if (cleanup)
|
||||
atexit(cleanup);
|
||||
|
||||
// We ignore SIGPIPE always. All pieces of code which can write to a pipe
|
||||
// must check write() return values.
|
||||
#ifndef _WIN32
|
||||
signal(SIGPIPE, SIG_IGN);
|
||||
|
||||
// Make sure the locale is set. This is only for converting file names
|
||||
// to utf8 for indexing.
|
||||
setlocale(LC_CTYPE, "");
|
||||
|
||||
// We would like to block SIGCHLD globally, but we can't because
|
||||
// QT uses it. Have to block it inside execmd.cpp
|
||||
#endif
|
||||
|
||||
// Install app signal handler
|
||||
if (sigcleanup) {
|
||||
@ -75,11 +70,91 @@ RclConfig *recollinit(RclInitFlags flags,
|
||||
}
|
||||
}
|
||||
|
||||
// Install log rotate sig handler
|
||||
{
|
||||
struct sigaction action;
|
||||
action.sa_handler = siglogreopen;
|
||||
action.sa_flags = 0;
|
||||
sigemptyset(&action.sa_mask);
|
||||
if (signal(SIGHUP, SIG_IGN) != SIG_IGN) {
|
||||
if (sigaction(SIGHUP, &action, 0) < 0) {
|
||||
perror("Sigaction failed");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
|
||||
// Windows signals etc.
|
||||
//
|
||||
// ^C can be caught by the signal() emulation, but not ^Break
|
||||
// apparently, which is why we use the native approach too
|
||||
//
|
||||
// When a keyboard interrupt occurs, windows creates a thread inside
|
||||
// the process and calls the handler. The process exits when the
|
||||
// handler returns or after at most 10S
|
||||
//
|
||||
// In practise, only recollindex sets sigcleanup(), and the routine
|
||||
// just sets a global termination flag. So we just call it and sleep,
|
||||
// hoping that cleanup does not take more than what Windows will let
|
||||
// us live.
|
||||
|
||||
static void (*l_sigcleanup)(int);
|
||||
|
||||
static BOOL WINAPI CtrlHandler(DWORD fdwCtrlType)
|
||||
{
|
||||
if (l_sigcleanup == 0)
|
||||
return FALSE;
|
||||
|
||||
switch(fdwCtrlType) {
|
||||
case CTRL_C_EVENT:
|
||||
case CTRL_CLOSE_EVENT:
|
||||
case CTRL_BREAK_EVENT:
|
||||
case CTRL_LOGOFF_EVENT:
|
||||
case CTRL_SHUTDOWN_EVENT:
|
||||
l_sigcleanup(SIGINT);
|
||||
Sleep(10000);
|
||||
return TRUE;
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
static const int catchedSigs[] = {SIGINT, SIGTERM};
|
||||
void initAsyncSigs(void (*sigcleanup)(int))
|
||||
{
|
||||
// Install app signal handler
|
||||
if (sigcleanup) {
|
||||
l_sigcleanup = sigcleanup;
|
||||
for (unsigned int i = 0; i < sizeof(catchedSigs) / sizeof(int); i++) {
|
||||
if (signal(catchedSigs[i], SIG_IGN) != SIG_IGN) {
|
||||
signal(catchedSigs[i], sigcleanup);
|
||||
}
|
||||
}
|
||||
}
|
||||
SetConsoleCtrlHandler((PHANDLER_ROUTINE)CtrlHandler, TRUE);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
RclConfig *recollinit(RclInitFlags flags,
|
||||
void (*cleanup)(void), void (*sigcleanup)(int),
|
||||
string &reason, const string *argcnf)
|
||||
{
|
||||
if (cleanup)
|
||||
atexit(cleanup);
|
||||
|
||||
// Make sure the locale is set. This is only for converting file names
|
||||
// to utf8 for indexing.
|
||||
setlocale(LC_CTYPE, "");
|
||||
|
||||
DebugLog::getdbl()->setloglevel(DEBDEB1);
|
||||
DebugLog::setfilename("stderr");
|
||||
if (getenv("RECOLL_LOGDATE"))
|
||||
DebugLog::getdbl()->logdate(1);
|
||||
|
||||
initAsyncSigs(sigcleanup);
|
||||
|
||||
RclConfig *config = new RclConfig(argcnf);
|
||||
if (!config || !config->ok()) {
|
||||
reason = "Configuration could not be built:\n";
|
||||
@ -105,7 +180,7 @@ RclConfig *recollinit(RclInitFlags flags,
|
||||
if (!logfilename.empty()) {
|
||||
logfilename = path_tildexpand(logfilename);
|
||||
// If not an absolute path or , compute relative to config dir
|
||||
if (logfilename.at(0) != '/' &&
|
||||
if (!path_isabsolute(logfilename) &&
|
||||
!DebugLog::DebugLog::isspecialname(logfilename.c_str())) {
|
||||
logfilename = path_cat(config->getConfDir(), logfilename);
|
||||
}
|
||||
@ -115,18 +190,6 @@ RclConfig *recollinit(RclInitFlags flags,
|
||||
int lev = atoi(loglevel.c_str());
|
||||
DebugLog::getdbl()->setloglevel(lev);
|
||||
}
|
||||
// Install log rotate sig handler
|
||||
{
|
||||
struct sigaction action;
|
||||
action.sa_handler = siglogreopen;
|
||||
action.sa_flags = 0;
|
||||
sigemptyset(&action.sa_mask);
|
||||
if (signal(SIGHUP, SIG_IGN) != SIG_IGN) {
|
||||
if (sigaction(SIGHUP, &action, 0) < 0) {
|
||||
perror("Sigaction failed");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Make sure the locale charset is initialized (so that multiple
|
||||
// threads don't try to do it at once).
|
||||
@ -139,14 +202,20 @@ RclConfig *recollinit(RclInitFlags flags,
|
||||
// Init smallut and pathut static values
|
||||
pathut_init_mt();
|
||||
smallut_init_mt();
|
||||
|
||||
// Init execmd.h static PATH and PATHELT splitting
|
||||
{string bogus;
|
||||
ExecCmd::which("nosuchcmd", bogus);
|
||||
}
|
||||
|
||||
// Init Unac translation exceptions
|
||||
string unacex;
|
||||
if (config->getConfParam("unac_except_trans", unacex) && !unacex.empty())
|
||||
unac_set_except_translations(unacex.c_str());
|
||||
|
||||
#ifndef IDX_THREADS
|
||||
#ifndef _WIN32
|
||||
ExecCmd::useVfork(true);
|
||||
#endif
|
||||
#else
|
||||
// Keep threads init behind log init, but make sure it's done before
|
||||
// we do the vfork choice ! The latter is not used any more actually,
|
||||
@ -156,11 +225,15 @@ RclConfig *recollinit(RclInitFlags flags,
|
||||
bool novfork;
|
||||
config->getConfParam("novfork", &novfork);
|
||||
if (novfork) {
|
||||
#ifndef _WIN32
|
||||
LOGDEB0(("rclinit: will use fork() for starting commands\n"));
|
||||
ExecCmd::useVfork(false);
|
||||
#endif
|
||||
} else {
|
||||
#ifndef _WIN32
|
||||
LOGDEB0(("rclinit: will use vfork() for starting commands\n"));
|
||||
ExecCmd::useVfork(true);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -179,10 +252,11 @@ RclConfig *recollinit(RclInitFlags flags,
|
||||
return config;
|
||||
}
|
||||
|
||||
// Signals are handled by the main thread. All others should call this routine
|
||||
// to block possible signals
|
||||
// Signals are handled by the main thread. All others should call this
|
||||
// routine to block possible signals
|
||||
void recoll_threadinit()
|
||||
{
|
||||
#ifndef _WIN32
|
||||
sigset_t sset;
|
||||
sigemptyset(&sset);
|
||||
|
||||
@ -190,6 +264,14 @@ void recoll_threadinit()
|
||||
sigaddset(&sset, catchedSigs[i]);
|
||||
sigaddset(&sset, SIGHUP);
|
||||
pthread_sigmask(SIG_BLOCK, &sset, 0);
|
||||
#else
|
||||
// Not sure that this is needed at all or correct under windows.
|
||||
for (unsigned int i = 0; i < sizeof(catchedSigs) / sizeof(int); i++) {
|
||||
if (signal(catchedSigs[i], SIG_IGN) != SIG_IGN) {
|
||||
signal(catchedSigs[i], SIG_IGN);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
bool recoll_ismainthread()
|
||||
|
||||
@ -110,7 +110,7 @@ bool SynGroups::setfile(const string& fn)
|
||||
lnum++;
|
||||
|
||||
{
|
||||
int ll = strlen(cline);
|
||||
size_t ll = strlen(cline);
|
||||
while (ll > 0 && (cline[ll-1] == '\n' || cline[ll-1] == '\r')) {
|
||||
cline[ll-1] = 0;
|
||||
ll--;
|
||||
|
||||
@ -142,6 +142,8 @@ static inline int whatcc(unsigned int c)
|
||||
} else {
|
||||
vector<unsigned int>::iterator it =
|
||||
lower_bound(vpuncblocks.begin(), vpuncblocks.end(), c);
|
||||
if (it == vpuncblocks.end())
|
||||
return LETTER;
|
||||
if (c == *it)
|
||||
return SPACE;
|
||||
if ((it - vpuncblocks.begin()) % 2 == 1) {
|
||||
@ -217,11 +219,11 @@ bool TextSplit::o_deHyphenate = false;
|
||||
// Final term checkpoint: do some checking (the kind which is simpler
|
||||
// to do here than in the main loop), then send term to our client.
|
||||
inline bool TextSplit::emitterm(bool isspan, string &w, int pos,
|
||||
int btstart, int btend)
|
||||
size_t btstart, size_t btend)
|
||||
{
|
||||
LOGDEB2(("TextSplit::emitterm: [%s] pos %d\n", w.c_str(), pos));
|
||||
|
||||
unsigned int l = w.length();
|
||||
int l = int(w.length());
|
||||
|
||||
#ifdef TEXTSPLIT_STATS
|
||||
// Update word length statistics. Do this before we filter out
|
||||
@ -230,7 +232,7 @@ inline bool TextSplit::emitterm(bool isspan, string &w, int pos,
|
||||
m_stats.newsamp(m_wordChars);
|
||||
#endif
|
||||
|
||||
if (l > 0 && l < (unsigned)m_maxWordLength) {
|
||||
if (l > 0 && l < m_maxWordLength) {
|
||||
// 1 byte word: we index single ascii letters and digits, but
|
||||
// nothing else. We might want to turn this into a test for a
|
||||
// single utf8 character instead ?
|
||||
@ -245,9 +247,9 @@ inline bool TextSplit::emitterm(bool isspan, string &w, int pos,
|
||||
}
|
||||
}
|
||||
if (pos != m_prevpos || l != m_prevlen) {
|
||||
bool ret = takeword(w, pos, btstart, btend);
|
||||
bool ret = takeword(w, pos, int(btstart), int(btend));
|
||||
m_prevpos = pos;
|
||||
m_prevlen = w.length();
|
||||
m_prevlen = int(w.length());
|
||||
return ret;
|
||||
}
|
||||
LOGDEB2(("TextSplit::emitterm:dup: [%s] pos %d\n", w.c_str(), pos));
|
||||
@ -293,7 +295,7 @@ bool TextSplit::span_is_acronym(string *acronym)
|
||||
|
||||
// Generate terms from span. Have to take into account the
|
||||
// flags: ONLYSPANS, NOSPANS, noNumbers
|
||||
bool TextSplit::words_from_span(int bp)
|
||||
bool TextSplit::words_from_span(size_t bp)
|
||||
{
|
||||
#if 0
|
||||
cerr << "Span: [" << m_span << "] " << " w_i_s size: " <<
|
||||
@ -305,10 +307,10 @@ bool TextSplit::words_from_span(int bp)
|
||||
}
|
||||
cerr << endl;
|
||||
#endif
|
||||
unsigned int spanwords = m_words_in_span.size();
|
||||
int spanwords = int(m_words_in_span.size());
|
||||
int pos = m_spanpos;
|
||||
// Byte position of the span start
|
||||
int spboffs = bp - m_span.size();
|
||||
size_t spboffs = bp - m_span.size();
|
||||
|
||||
if (o_deHyphenate && spanwords == 2 &&
|
||||
m_span[m_words_in_span[0].second] == '-') {
|
||||
@ -322,13 +324,13 @@ bool TextSplit::words_from_span(int bp)
|
||||
m_spanpos, spboffs, spboffs + m_words_in_span[1].second);
|
||||
}
|
||||
|
||||
for (unsigned int i = 0;
|
||||
for (int i = 0;
|
||||
i < ((m_flags&TXTS_ONLYSPANS) ? 1 : spanwords);
|
||||
i++, pos++) {
|
||||
|
||||
int deb = m_words_in_span[i].first;
|
||||
|
||||
for (unsigned int j = ((m_flags&TXTS_ONLYSPANS) ? spanwords-1 : i);
|
||||
for (int j = ((m_flags&TXTS_ONLYSPANS) ? spanwords-1 : i);
|
||||
j < ((m_flags&TXTS_NOSPANS) ? i+1 : spanwords);
|
||||
j++) {
|
||||
|
||||
@ -362,11 +364,11 @@ bool TextSplit::words_from_span(int bp)
|
||||
* @param spanerase Set if the current span is at its end. Process it.
|
||||
* @param bp The current BYTE position in the stream
|
||||
*/
|
||||
inline bool TextSplit::doemit(bool spanerase, int bp)
|
||||
inline bool TextSplit::doemit(bool spanerase, size_t bp)
|
||||
{
|
||||
LOGDEB2(("TextSplit::doemit: sper %d bp %d spp %d spanwords %u wS %d wL %d "
|
||||
"inn %d span [%s]\n",
|
||||
spanerase, bp, m_spanpos, m_words_in_span.size(),
|
||||
spanerase, int(bp), m_spanpos, m_words_in_span.size(),
|
||||
m_wordStart, m_wordLen, m_inNumber, m_span.c_str()));
|
||||
|
||||
if (m_wordLen) {
|
||||
@ -404,8 +406,8 @@ inline bool TextSplit::doemit(bool spanerase, int bp)
|
||||
case '\'':
|
||||
m_span.resize(m_span.length()-1);
|
||||
if (m_words_in_span.size() &&
|
||||
m_words_in_span.back().second > m_span.size())
|
||||
m_words_in_span.back().second = m_span.size();
|
||||
m_words_in_span.back().second > int(m_span.size()))
|
||||
m_words_in_span.back().second = int(m_span.size());
|
||||
if (--bp < 0)
|
||||
bp = 0;
|
||||
break;
|
||||
@ -422,7 +424,7 @@ inline bool TextSplit::doemit(bool spanerase, int bp)
|
||||
|
||||
} else {
|
||||
|
||||
m_wordStart = m_span.length();
|
||||
m_wordStart = int(m_span.length());
|
||||
|
||||
}
|
||||
|
||||
@ -830,16 +832,16 @@ bool TextSplit::cjk_to_words(Utf8Iter *itp, unsigned int *cp)
|
||||
}
|
||||
|
||||
// Take note of byte offset for this character.
|
||||
boffs[nchars-1] = it.getBpos();
|
||||
boffs[nchars-1] = int(it.getBpos());
|
||||
|
||||
// Output all new ngrams: they begin at each existing position
|
||||
// and end after the new character. onlyspans->only output
|
||||
// maximum words, nospans=> single chars
|
||||
if (!(m_flags & TXTS_ONLYSPANS) || nchars == o_CJKNgramLen) {
|
||||
unsigned int btend = it.getBpos() + it.getBlen();
|
||||
unsigned int loopbeg = (m_flags & TXTS_NOSPANS) ? nchars-1 : 0;
|
||||
unsigned int loopend = (m_flags & TXTS_ONLYSPANS) ? 1 : nchars;
|
||||
for (unsigned int i = loopbeg; i < loopend; i++) {
|
||||
int btend = int(it.getBpos() + it.getBlen());
|
||||
int loopbeg = (m_flags & TXTS_NOSPANS) ? nchars-1 : 0;
|
||||
int loopend = (m_flags & TXTS_ONLYSPANS) ? 1 : nchars;
|
||||
for (int i = loopbeg; i < loopend; i++) {
|
||||
if (!takeword(it.buffer().substr(boffs[i],
|
||||
btend-boffs[i]),
|
||||
m_wordpos - (nchars-i-1), boffs[i], btend)) {
|
||||
@ -860,7 +862,7 @@ bool TextSplit::cjk_to_words(Utf8Iter *itp, unsigned int *cp)
|
||||
// If onlyspans is set, there may be things to flush in the buffer
|
||||
// first
|
||||
if ((m_flags & TXTS_ONLYSPANS) && nchars > 0 && nchars != o_CJKNgramLen) {
|
||||
unsigned int btend = it.getBpos(); // Current char is out
|
||||
int btend = int(it.getBpos()); // Current char is out
|
||||
if (!takeword(it.buffer().substr(boffs[0], btend-boffs[0]),
|
||||
m_wordpos - nchars,
|
||||
boffs[0], btend)) {
|
||||
|
||||
@ -184,7 +184,7 @@ private:
|
||||
// Current span. Might be jf.dockes@wanadoo.f
|
||||
std::string m_span;
|
||||
|
||||
std::vector <std::pair<unsigned int, unsigned int> > m_words_in_span;
|
||||
std::vector <std::pair<int, int> > m_words_in_span;
|
||||
|
||||
// Current word: no punctuation at all in there. Byte offset
|
||||
// relative to the current span and byte length
|
||||
@ -201,7 +201,7 @@ private:
|
||||
// It may happen that our cleanup would result in emitting the
|
||||
// same term twice. We try to avoid this
|
||||
int m_prevpos;
|
||||
unsigned int m_prevlen;
|
||||
int m_prevlen;
|
||||
|
||||
#ifdef TEXTSPLIT_STATS
|
||||
// Stats counters. These are processed in TextSplit rather than by a
|
||||
@ -215,11 +215,11 @@ private:
|
||||
// This processes cjk text:
|
||||
bool cjk_to_words(Utf8Iter *it, unsigned int *cp);
|
||||
|
||||
bool emitterm(bool isspan, std::string &term, int pos, int bs, int be);
|
||||
bool doemit(bool spanerase, int bp);
|
||||
bool emitterm(bool isspan, std::string &term, int pos, size_t bs,size_t be);
|
||||
bool doemit(bool spanerase, size_t bp);
|
||||
void discardspan();
|
||||
bool span_is_acronym(std::string *acronym);
|
||||
bool words_from_span(int bp);
|
||||
bool words_from_span(size_t bp);
|
||||
};
|
||||
|
||||
#endif /* _TEXTSPLIT_H_INCLUDED_ */
|
||||
|
||||
@ -408,13 +408,13 @@ AC_ARG_ENABLE(recollq,
|
||||
need for Qt). This is done by default if --disable-qtgui is set but this
|
||||
option enables forcing it.]),
|
||||
enableRECOLLQ=$enableval, enableRECOLLQ="no")
|
||||
|
||||
if text X"$enableRECOLLQ" != X ; then
|
||||
if test X"$enableRECOLLQ" != X ; then
|
||||
AM_CONDITIONAL(MAKECMDLINE, [test X$enableRECOLLQ = Xyes])
|
||||
else
|
||||
AM_CONDITIONAL(MAKECMDLINE, [test X$enableQT = Xno])
|
||||
fi
|
||||
|
||||
|
||||
if test X$enableQT = Xyes ; then
|
||||
|
||||
if test X$QTDIR != X ; then
|
||||
@ -586,6 +586,5 @@ AC_SUBST(RCLLIBVERSION)
|
||||
AC_CONFIG_FILES(Makefile)
|
||||
AC_CONFIG_FILES(common/rclversion.h)
|
||||
AC_CONFIG_FILES(python/recoll/setup.py)
|
||||
AC_CONFIG_FILES(sampleconf/recoll.conf)
|
||||
|
||||
AC_OUTPUT
|
||||
|
||||
165
src/filters/rcldoc.py
Executable file
165
src/filters/rcldoc.py
Executable file
@ -0,0 +1,165 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import rclexecm
|
||||
import rclexec1
|
||||
import re
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Processing the output from antiword: create html header and tail, process
|
||||
# continuation lines escape, HTML special characters, accumulate the data.
|
||||
class WordProcessData:
|
||||
def __init__(self, em):
|
||||
self.em = em
|
||||
self.out = ""
|
||||
self.cont = ""
|
||||
self.gotdata = False
|
||||
# Line with continued word (ending in -)
|
||||
# we strip the - which is not nice for actually hyphenated word.
|
||||
# What to do ?
|
||||
self.patcont = re.compile('''[\w][-]$''')
|
||||
# Pattern for breaking continuation at last word start
|
||||
self.patws = re.compile('''([\s])([\w]+)(-)$''')
|
||||
|
||||
def takeLine(self, line):
|
||||
if not self.gotdata:
|
||||
if line == "":
|
||||
return
|
||||
self.out = '<html><head><title></title>' + \
|
||||
'<meta http-equiv="Content-Type"' + \
|
||||
'content="text/html;charset=UTF-8">' + \
|
||||
'</head><body><p>'
|
||||
self.gotdata = True
|
||||
|
||||
if self.cont:
|
||||
line = self.cont + line
|
||||
self.cont = ""
|
||||
|
||||
if line == "\f":
|
||||
self.out += "</p><hr><p>"
|
||||
return
|
||||
|
||||
if self.patcont.search(line):
|
||||
# Break at last whitespace
|
||||
match = self.patws.search(line)
|
||||
if match:
|
||||
self.cont = line[match.start(2):match.end(2)]
|
||||
line = line[0:match.start(1)]
|
||||
else:
|
||||
self.cont = line
|
||||
line = ""
|
||||
|
||||
if line:
|
||||
self.out += self.em.htmlescape(line) + "<br>"
|
||||
else:
|
||||
self.out += "<br>"
|
||||
|
||||
def wrapData(self):
|
||||
if self.gotdata:
|
||||
self.out += "</p></body></html>"
|
||||
self.em.setmimetype("text/html")
|
||||
return self.out
|
||||
|
||||
# Null data accumulator. We use this when antiword has fail, and the
|
||||
# data actually comes from rclrtf, rcltext or vwWare, which all
|
||||
# output HTML
|
||||
class WordPassData:
|
||||
def __init__(self, em):
|
||||
self.out = ""
|
||||
self.em = em
|
||||
|
||||
def takeLine(self, line):
|
||||
self.out += line
|
||||
|
||||
def wrapData(self):
|
||||
self.em.setmimetype("text/html")
|
||||
return self.out
|
||||
|
||||
|
||||
# Filter for msword docs. Try antiword, and if this fails, check for
|
||||
# an rtf or text document (.doc are sometimes like this...). Also try
|
||||
# vwWare if the doc is actually a word doc
|
||||
class WordFilter:
|
||||
def __init__(self, em, td):
|
||||
self.em = em
|
||||
self.ntry = 0
|
||||
self.execdir = td
|
||||
|
||||
def reset(self):
|
||||
self.ntry = 0
|
||||
|
||||
def hasControlChars(self, data):
|
||||
for c in data:
|
||||
if c < chr(32) and c != '\n' and c != '\t' and \
|
||||
c != '\f' and c != '\r':
|
||||
return True
|
||||
return False
|
||||
|
||||
def mimetype(self, fn):
|
||||
rtfprolog ="{\\rtf1"
|
||||
docprolog = b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1"
|
||||
try:
|
||||
f = open(fn, "rb")
|
||||
except:
|
||||
return ""
|
||||
data = f.read(100)
|
||||
if data[0:6] == rtfprolog:
|
||||
return "text/rtf"
|
||||
elif data[0:8] == docprolog:
|
||||
return "application/msword"
|
||||
elif self.hasControlChars(data):
|
||||
return "application/octet-stream"
|
||||
else:
|
||||
return "text/plain"
|
||||
|
||||
def getCmd(self, fn):
|
||||
'''Return command to execute, and postprocessor, according to
|
||||
our state: first try antiword, then others depending on mime
|
||||
identification. Do 2 tries at most'''
|
||||
if self.ntry == 0:
|
||||
self.ntry = 1
|
||||
cmd = rclexecm.which("antiword")
|
||||
if cmd:
|
||||
return ([cmd, "-t", "-i", "1", "-m", "UTF-8"],
|
||||
WordProcessData(self.em))
|
||||
else:
|
||||
return ([],None)
|
||||
elif self.ntry == 1:
|
||||
self.ntry = 2
|
||||
# antiword failed. Check for an rtf file, or text and
|
||||
# process accordingly. It the doc is actually msword, try
|
||||
# wvWare.
|
||||
mt = self.mimetype(fn)
|
||||
self.em.rclog("rcldoc.py: actual MIME type %s" % mt)
|
||||
if mt == "text/plain":
|
||||
return ([python, os.path.join(self.execdir, "rcltext.py")],
|
||||
WordPassData(self.em))
|
||||
elif mt == "text/rtf":
|
||||
cmd = ["python", os.path.join(self.execdir, "rclrtf.py"),
|
||||
"-s"]
|
||||
self.em.rclog("rcldoc.py: returning cmd %s" % cmd)
|
||||
return (cmd, WordPassData(self.em))
|
||||
elif mt == "application/msword":
|
||||
cmd = rclexecm.which("wvWare")
|
||||
if cmd:
|
||||
return ([cmd, "--nographics", "--charset=utf-8"],
|
||||
WordPassData(self.em))
|
||||
else:
|
||||
return ([],None)
|
||||
else:
|
||||
return ([],None)
|
||||
else:
|
||||
return ([],None)
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Remember where we execute filters from, in case we need to exec another
|
||||
execdir = os.path.dirname(sys.argv[0])
|
||||
# Check that we have antiword. We could fallback to wvWare, but
|
||||
# this is not what the old filter did.
|
||||
if not rclexecm.which("antiword"):
|
||||
print("RECFILTERROR HELPERNOTFOUND antiword")
|
||||
sys.exit(1)
|
||||
proto = rclexecm.RclExecM()
|
||||
filter = WordFilter(proto, execdir)
|
||||
extract = rclexec1.Executor(proto, filter)
|
||||
rclexecm.main(proto, extract)
|
||||
112
src/filters/rclexec1.py
Normal file
112
src/filters/rclexec1.py
Normal file
@ -0,0 +1,112 @@
|
||||
#################################
|
||||
# Copyright (C) 2014 J.F.Dockes
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the
|
||||
# Free Software Foundation, Inc.,
|
||||
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
########################################################
|
||||
|
||||
# Common code for replacing the old shell scripts with Python execm
|
||||
# ones: this implements the basic functions for a filter which
|
||||
# executes a command to translate a simple file (like rclword with
|
||||
# antiword).
|
||||
#
|
||||
# This was motivated by the Windows port: to replace shell and Unix
|
||||
# utility (awk , etc usage). We can't just execute python scripts,
|
||||
# this would be to slow. So this helps implementing a permanent script
|
||||
# to repeatedly execute single commands.
|
||||
|
||||
import subprocess
|
||||
import rclexecm
|
||||
|
||||
# This class has the code to execute the subprocess and call a
|
||||
# data-specific post-processor. Command and processor are supplied by
|
||||
# the object which we receive as a parameter, which in turn is defined
|
||||
# in the actual executable filter (e.g. rcldoc.py)
|
||||
class Executor:
|
||||
opt_ignxval = 1
|
||||
|
||||
def __init__(self, em, flt):
|
||||
self.em = em
|
||||
self.flt = flt
|
||||
self.currentindex = 0
|
||||
|
||||
def runCmd(self, cmd, filename, postproc, opt):
|
||||
''' Substitute parameters and execute command, process output
|
||||
with the specific postprocessor and return the complete text.
|
||||
We expect cmd as a list of command name + arguments'''
|
||||
|
||||
try:
|
||||
fullcmd = cmd + [filename]
|
||||
proc = subprocess.Popen(fullcmd,
|
||||
stdout = subprocess.PIPE)
|
||||
stdout = proc.stdout
|
||||
except subprocess.CalledProcessError as err:
|
||||
self.em.rclog("extractone: Popen(%s) error: %s" % (fullcmd, err))
|
||||
return (False, "")
|
||||
except OSError as err:
|
||||
self.em.rclog("extractone: Popen(%s) OS error: %s" % (fullcmd, err))
|
||||
return (False, "")
|
||||
|
||||
for line in stdout:
|
||||
postproc.takeLine(line.strip())
|
||||
|
||||
proc.wait()
|
||||
if (opt & self.opt_ignxval) == 0 and proc.returncode:
|
||||
self.em.rclog("extractone: [%s] returncode %d" % \
|
||||
(filename, proc.returncode))
|
||||
return False, postproc.wrapData()
|
||||
else:
|
||||
return True, postproc.wrapData()
|
||||
|
||||
def extractone(self, params):
|
||||
#self.em.rclog("extractone %s %s" % (params["filename:"], \
|
||||
# params["mimetype:"]))
|
||||
self.flt.reset()
|
||||
ok = False
|
||||
if not params.has_key("filename:"):
|
||||
self.em.rclog("extractone: no mime or file name")
|
||||
return (ok, "", "", rclexecm.RclExecM.eofnow)
|
||||
|
||||
fn = params["filename:"]
|
||||
while True:
|
||||
cmdseq = self.flt.getCmd(fn)
|
||||
cmd = cmdseq[0]
|
||||
postproc = cmdseq[1]
|
||||
opt = cmdseq[2] if len(cmdseq) == 3 else 0
|
||||
if cmd:
|
||||
ok, data = self.runCmd(cmd, fn, postproc, opt)
|
||||
if ok:
|
||||
break
|
||||
else:
|
||||
break
|
||||
if ok:
|
||||
return (ok, data, "", rclexecm.RclExecM.eofnext)
|
||||
else:
|
||||
return (ok, "", "", rclexecm.RclExecM.eofnow)
|
||||
|
||||
###### File type handler api, used by rclexecm ---------->
|
||||
def openfile(self, params):
|
||||
self.currentindex = 0
|
||||
return True
|
||||
|
||||
def getipath(self, params):
|
||||
return self.extractone(params)
|
||||
|
||||
def getnext(self, params):
|
||||
if self.currentindex >= 1:
|
||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||
else:
|
||||
ret= self.extractone(params)
|
||||
self.currentindex += 1
|
||||
return ret
|
||||
@ -1,10 +1,34 @@
|
||||
#!/usr/bin/env python
|
||||
#################################
|
||||
# Copyright (C) 2014 J.F.Dockes
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the
|
||||
# Free Software Foundation, Inc.,
|
||||
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
########################################################
|
||||
## Recoll multifilter communication module and utilities
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
###########################################
|
||||
## Generic recoll multifilter communication code
|
||||
import sys
|
||||
import os
|
||||
import tempfile
|
||||
import shutil
|
||||
import getopt
|
||||
|
||||
############################################
|
||||
# RclExecM implements the
|
||||
# communication protocol with the recollindex process. It calls the
|
||||
# object specific of the document type to actually get the data.
|
||||
class RclExecM:
|
||||
noteof = 0
|
||||
eofnext = 1
|
||||
@ -27,9 +51,13 @@ class RclExecM:
|
||||
else:
|
||||
self.maxmembersize = 50 * 1024
|
||||
self.maxmembersize = self.maxmembersize * 1024
|
||||
if sys.platform == "win32":
|
||||
import msvcrt
|
||||
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
|
||||
msvcrt.setmode(sys.stdin.fileno(), os.O_BINARY)
|
||||
|
||||
def rclog(self, s, doexit = 0, exitvalue = 1):
|
||||
print >> sys.stderr, "RCLMFILT:", self.myname, ":", s
|
||||
print("RCLMFILT: %s: %s" % (self.myname, s), file=sys.stderr)
|
||||
if doexit:
|
||||
sys.exit(exitvalue)
|
||||
|
||||
@ -87,29 +115,29 @@ class RclExecM:
|
||||
self.rclog("GOT UNICODE for ipath [%s]" % (ipath,))
|
||||
docdata = docdata.encode("UTF-8")
|
||||
|
||||
print "Document:", len(docdata)
|
||||
print("Document: %d" % len(docdata))
|
||||
sys.stdout.write(docdata)
|
||||
|
||||
if len(ipath):
|
||||
print "Ipath:", len(ipath)
|
||||
print("Ipath: %d" % len(ipath))
|
||||
sys.stdout.write(ipath)
|
||||
|
||||
if len(self.mimetype):
|
||||
print "Mimetype:", len(self.mimetype)
|
||||
print("Mimetype: %d" % len(self.mimetype))
|
||||
sys.stdout.write(self.mimetype)
|
||||
|
||||
# If we're at the end of the contents, say so
|
||||
if iseof == RclExecM.eofnow:
|
||||
print "Eofnow: 0"
|
||||
print("Eofnow: 0")
|
||||
elif iseof == RclExecM.eofnext:
|
||||
print "Eofnext: 0"
|
||||
print("Eofnext: 0")
|
||||
if iserror == RclExecM.subdocerror:
|
||||
print "Subdocerror: 0"
|
||||
print("Subdocerror: 0")
|
||||
elif iserror == RclExecM.fileerror:
|
||||
print "Fileerror: 0"
|
||||
print("Fileerror: 0")
|
||||
|
||||
# End of message
|
||||
print
|
||||
print()
|
||||
sys.stdout.flush()
|
||||
#self.rclog("done writing data")
|
||||
|
||||
@ -168,67 +196,161 @@ class RclExecM:
|
||||
self.processmessage(processor, params)
|
||||
|
||||
|
||||
|
||||
# Helper routine to test for program accessibility
|
||||
def which(program):
|
||||
def is_exe(fpath):
|
||||
return os.path.exists(fpath) and os.access(fpath, os.X_OK)
|
||||
def ext_candidates(fpath):
|
||||
yield fpath
|
||||
for ext in os.environ.get("PATHEXT", "").split(os.pathsep):
|
||||
yield fpath + ext
|
||||
|
||||
def path_candidates():
|
||||
yield os.path.dirname(sys.argv[0])
|
||||
for path in os.environ["PATH"].split(os.pathsep):
|
||||
yield path
|
||||
|
||||
fpath, fname = os.path.split(program)
|
||||
if fpath:
|
||||
if is_exe(program):
|
||||
return program
|
||||
else:
|
||||
for path in path_candidates():
|
||||
exe_file = os.path.join(path, program)
|
||||
for candidate in ext_candidates(exe_file):
|
||||
if is_exe(candidate):
|
||||
return candidate
|
||||
return None
|
||||
|
||||
# Temp dir helper
|
||||
class SafeTmpDir:
|
||||
def __init__(self, em):
|
||||
self.em = em
|
||||
self.toptmp = ""
|
||||
self.tmpdir = ""
|
||||
|
||||
def __del__(self):
|
||||
try:
|
||||
if self.toptmp:
|
||||
shutil.rmtree(self.tmpdir, True)
|
||||
os.rmdir(self.toptmp)
|
||||
except Exception as err:
|
||||
self.em.rclog("delete dir failed for " + self.toptmp)
|
||||
|
||||
def getpath(self):
|
||||
if not self.tmpdir:
|
||||
envrcltmp = os.getenv('RECOLL_TMPDIR')
|
||||
if envrcltmp:
|
||||
self.toptmp = tempfile.mkdtemp(prefix='rcltmp', dir=envrcltmp)
|
||||
else:
|
||||
self.toptmp = tempfile.mkdtemp(prefix='rcltmp')
|
||||
|
||||
self.tmpdir = os.path.join(self.toptmp, 'rclsofftmp')
|
||||
os.makedirs(self.tmpdir)
|
||||
|
||||
return self.tmpdir
|
||||
|
||||
|
||||
# Common main routine for all python execm filters: either run the
|
||||
# normal protocol engine or a local loop to test without recollindex
|
||||
def main(proto, extract):
|
||||
if len(sys.argv) == 1:
|
||||
proto.mainloop(extract)
|
||||
else:
|
||||
# Got a file name parameter: TESTING without an execm parent
|
||||
# Loop on all entries or get specific ipath
|
||||
def mimetype_with_file(f):
|
||||
cmd = 'file -i "' + f + '"'
|
||||
fileout = os.popen(cmd).read()
|
||||
lst = fileout.split(':')
|
||||
mimetype = lst[len(lst)-1].strip()
|
||||
lst = mimetype.split(';')
|
||||
return lst[0].strip()
|
||||
def mimetype_with_xdg(f):
|
||||
cmd = 'xdg-mime query filetype "' + f + '"'
|
||||
return os.popen(cmd).read().strip()
|
||||
params = {'filename:': sys.argv[1]}
|
||||
# Some filters (e.g. rclaudio) need/get a MIME type from the indexer
|
||||
mimetype = mimetype_with_xdg(sys.argv[1])
|
||||
params['mimetype:'] = mimetype
|
||||
if not extract.openfile(params):
|
||||
print "Open error"
|
||||
sys.exit(1)
|
||||
ipath = ""
|
||||
if len(sys.argv) == 3:
|
||||
ipath = sys.argv[2]
|
||||
# mainloop does not return. Just in case
|
||||
sys.exit(1)
|
||||
|
||||
if ipath != "":
|
||||
params['ipath:'] = ipath
|
||||
ok, data, ipath, eof = extract.getipath(params)
|
||||
if ok:
|
||||
print "== Found entry for ipath %s (mimetype [%s]):" % \
|
||||
(ipath, proto.mimetype)
|
||||
if isinstance(data, unicode):
|
||||
bdata = data.encode("UTF-8")
|
||||
else:
|
||||
bdata = data
|
||||
|
||||
# Not running the main loop: either acting as single filter (when called
|
||||
# from other filter for example), or debugging
|
||||
def usage():
|
||||
print("Usage: rclexecm.py [-d] [-s] [-i ipath] [filename]",
|
||||
file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
actAsSingle = False
|
||||
debugDumpData = False
|
||||
ipath = ""
|
||||
|
||||
args = sys.argv[1:]
|
||||
opts, args = getopt.getopt(args, "hdsi:")
|
||||
for opt, arg in opts:
|
||||
if opt in ['-h']:
|
||||
usage()
|
||||
elif opt in ['-s']:
|
||||
actAsSingle = True
|
||||
elif opt in ['-i']:
|
||||
ipath = arg
|
||||
elif opt in ['-d']:
|
||||
debugDumpData = True
|
||||
else:
|
||||
print("unknown option %s\n"%opt, file=sys.stderr)
|
||||
usage()
|
||||
|
||||
if len(args) != 1:
|
||||
usage()
|
||||
|
||||
def mimetype_with_file(f):
|
||||
cmd = 'file -i "' + f + '"'
|
||||
fileout = os.popen(cmd).read()
|
||||
lst = fileout.split(':')
|
||||
mimetype = lst[len(lst)-1].strip()
|
||||
lst = mimetype.split(';')
|
||||
return lst[0].strip()
|
||||
|
||||
def mimetype_with_xdg(f):
|
||||
cmd = 'xdg-mime query filetype "' + f + '"'
|
||||
return os.popen(cmd).read().strip()
|
||||
|
||||
def debprint(s):
|
||||
if not actAsSingle:
|
||||
print(s)
|
||||
|
||||
params = {'filename:': args[0]}
|
||||
# Some filters (e.g. rclaudio) need/get a MIME type from the indexer
|
||||
mimetype = mimetype_with_xdg(args[0])
|
||||
params['mimetype:'] = mimetype
|
||||
|
||||
if not extract.openfile(params):
|
||||
print("Open error", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if ipath != "" or actAsSingle:
|
||||
params['ipath:'] = ipath
|
||||
ok, data, ipath, eof = extract.getipath(params)
|
||||
if ok:
|
||||
debprint("== Found entry for ipath %s (mimetype [%s]):" % \
|
||||
(ipath, proto.mimetype))
|
||||
if isinstance(data, unicode):
|
||||
bdata = data.encode("UTF-8")
|
||||
else:
|
||||
bdata = data
|
||||
if debugDumpData or actAsSingle:
|
||||
sys.stdout.write(bdata)
|
||||
print
|
||||
else:
|
||||
print "Got error, eof %d"%eof
|
||||
print()
|
||||
sys.exit(0)
|
||||
else:
|
||||
print("Got error, eof %d"%eof, file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
ecnt = 0
|
||||
while 1:
|
||||
ok, data, ipath, eof = extract.getnext(params)
|
||||
if ok:
|
||||
ecnt = ecnt + 1
|
||||
print "== Entry %d ipath %s (mimetype [%s]):" % \
|
||||
(ecnt, ipath, proto.mimetype)
|
||||
if isinstance(data, unicode):
|
||||
bdata = data.encode("UTF-8")
|
||||
else:
|
||||
bdata = data
|
||||
#sys.stdout.write(bdata)
|
||||
print
|
||||
if eof != RclExecM.noteof:
|
||||
break
|
||||
ecnt = 0
|
||||
while 1:
|
||||
ok, data, ipath, eof = extract.getnext(params)
|
||||
if ok:
|
||||
ecnt = ecnt + 1
|
||||
debprint("== Entry %d ipath %s (mimetype [%s]):" % \
|
||||
(ecnt, ipath, proto.mimetype))
|
||||
if isinstance(data, unicode):
|
||||
bdata = data.encode("UTF-8")
|
||||
else:
|
||||
print "Not ok, eof %d" % eof
|
||||
break
|
||||
bdata = data
|
||||
if debugDumpData:
|
||||
sys.stdout.write(bdata)
|
||||
print()
|
||||
if eof != RclExecM.noteof:
|
||||
sys.exit(0)
|
||||
else:
|
||||
print("Not ok, eof %d" % eof, file=sys.stderr)
|
||||
sys.exit(1)
|
||||
# Not sure this makes sense, but going on looping certainly does not
|
||||
if actAsSingle:
|
||||
sys.exit(0)
|
||||
|
||||
@ -147,6 +147,9 @@ if ($@) {
|
||||
exit(1);
|
||||
}
|
||||
|
||||
binmode(STDIN) || die "cannot binmode STDIN";
|
||||
binmode(STDOUT) || die "cannot binmode STDOUT";
|
||||
|
||||
#print STDERR "RCLIMG: Starting\n";
|
||||
$| = 1;
|
||||
while (1) {
|
||||
|
||||
@ -109,7 +109,7 @@ class European8859TextClassifier:
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
f = open(sys.argv[1])
|
||||
f = open(sys.argv[1], "rb")
|
||||
rawtext = f.read()
|
||||
f.close()
|
||||
|
||||
|
||||
@ -1,9 +0,0 @@
|
||||
#!/bin/sh
|
||||
# It may make sense in some cases to set this null filter (no output)
|
||||
# instead of using recoll_noindex or leaving the default filter in
|
||||
# case one doesn't want to install it: this will avoid endless retries
|
||||
# to reindex the affected files, as recoll will think it has succeeded
|
||||
# indexing them. Downside: the files won't be indexed when one
|
||||
# actually installs the real filter, will need a -z
|
||||
|
||||
exit 0
|
||||
224
src/filters/rclopxml.py
Executable file
224
src/filters/rclopxml.py
Executable file
@ -0,0 +1,224 @@
|
||||
#!/usr/bin/env python
|
||||
# Copyright (C) 2015 J.F.Dockes
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the
|
||||
# Free Software Foundation, Inc.,
|
||||
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
######################################
|
||||
from __future__ import print_function
|
||||
|
||||
import sys
|
||||
import rclexecm
|
||||
import rclxslt
|
||||
import fnmatch
|
||||
from zipfile import ZipFile
|
||||
|
||||
meta_stylesheet = '''<?xml version="1.0"?>
|
||||
<xsl:stylesheet
|
||||
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
|
||||
xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:dcterms="http://purl.org/dc/terms/"
|
||||
xmlns:dcmitype="http://purl.org/dc/dcmitype/"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
||||
|
||||
<!-- <xsl:output method="text"/> -->
|
||||
<xsl:output omit-xml-declaration="yes"/>
|
||||
|
||||
<xsl:template match="cp:coreProperties">
|
||||
<xsl:text> </xsl:text>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
|
||||
<xsl:text> </xsl:text>
|
||||
<xsl:apply-templates/>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="dc:creator">
|
||||
<meta>
|
||||
<xsl:attribute name="name">
|
||||
<!-- <xsl:value-of select="name()"/> pour sortir tous les meta avec
|
||||
le meme nom que dans le xml (si on devenait dc-natif) -->
|
||||
<xsl:text>author</xsl:text>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="content">
|
||||
<xsl:value-of select="."/>
|
||||
</xsl:attribute>
|
||||
</meta>
|
||||
<xsl:text> </xsl:text>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="dcterms:modified">
|
||||
<meta>
|
||||
<xsl:attribute name="name">
|
||||
<xsl:text>date</xsl:text>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="content">
|
||||
<xsl:value-of select="."/>
|
||||
</xsl:attribute>
|
||||
</meta>
|
||||
<xsl:text> </xsl:text>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="*">
|
||||
</xsl:template>
|
||||
|
||||
</xsl:stylesheet>
|
||||
'''
|
||||
|
||||
word_tagmatch = 'w:p'
|
||||
word_xmlns_decls = '''xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
|
||||
xmlns:ve="http://schemas.openxmlformats.org/markup-compatibility/2006"
|
||||
xmlns:o="urn:schemas-microsoft-com:office:office"
|
||||
xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
|
||||
xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math"
|
||||
xmlns:v="urn:schemas-microsoft-com:vml"
|
||||
xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
|
||||
xmlns:w10="urn:schemas-microsoft-com:office:word"
|
||||
xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
|
||||
xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml"
|
||||
'''
|
||||
word_moretemplates = ''
|
||||
|
||||
|
||||
xl_tagmatch = 'x:t'
|
||||
xl_xmlns_decls='''xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
|
||||
xmlns:x="http://schemas.openxmlformats.org/spreadsheetml/2006/main"
|
||||
'''
|
||||
xl_moretemplates = ''
|
||||
|
||||
pp_tagmatch = 'a:t'
|
||||
pp_xmlns_decls = '''xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
|
||||
xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"
|
||||
xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
|
||||
xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main"
|
||||
'''
|
||||
# I want to suppress text output for all except a:t, don't know how to do it
|
||||
# help ! At least get rid of these:
|
||||
pp_moretemplates = '''<xsl:template match="p:attrName">
|
||||
</xsl:template>
|
||||
'''
|
||||
|
||||
content_stylesheet = '''<?xml version="1.0"?>
|
||||
<xsl:stylesheet @XMLNS_DECLS@ >
|
||||
|
||||
<xsl:output omit-xml-declaration="yes"/>
|
||||
|
||||
<xsl:template match="/">
|
||||
<div>
|
||||
<xsl:apply-templates/>
|
||||
</div>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="@TAGMATCH@">
|
||||
<p>
|
||||
<xsl:value-of select="."/>
|
||||
</p>
|
||||
</xsl:template>
|
||||
|
||||
@MORETEMPLATES@
|
||||
|
||||
</xsl:stylesheet>
|
||||
'''
|
||||
|
||||
class OXExtractor:
|
||||
def __init__(self, em):
|
||||
self.em = em
|
||||
self.currentindex = 0
|
||||
|
||||
# Replace values inside data style sheet, depending on type of doc
|
||||
def computestylesheet(self, nm):
|
||||
decls = globals()[nm + '_xmlns_decls']
|
||||
stylesheet = content_stylesheet.replace('@XMLNS_DECLS@', decls)
|
||||
tagmatch = globals()[nm + '_tagmatch']
|
||||
stylesheet = stylesheet.replace('@TAGMATCH@', tagmatch)
|
||||
moretmpl = globals()[nm + '_moretemplates']
|
||||
stylesheet = stylesheet.replace('@MORETEMPLATES@', moretmpl)
|
||||
|
||||
return stylesheet
|
||||
|
||||
def extractone(self, params):
|
||||
if not params.has_key("filename:"):
|
||||
self.em.rclog("extractone: no mime or file name")
|
||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||
fn = params["filename:"]
|
||||
|
||||
try:
|
||||
zip = ZipFile(fn)
|
||||
except Exception as err:
|
||||
self.em.rclog("unzip failed: " + str(err))
|
||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||
|
||||
docdata = '<html><head>'
|
||||
|
||||
try:
|
||||
metadata = zip.read("docProps/core.xml")
|
||||
if metadata:
|
||||
res = rclxslt.apply_sheet_data(meta_stylesheet, metadata)
|
||||
docdata += res
|
||||
except:
|
||||
# To be checked. I'm under the impression that I get this when
|
||||
# nothing matches?
|
||||
# self.em.rclog("no/bad metadata in %s" % fn)
|
||||
pass
|
||||
|
||||
docdata += '</head><body>'
|
||||
|
||||
try:
|
||||
content= zip.read('word/document.xml')
|
||||
stl = self.computestylesheet('word')
|
||||
docdata += rclxslt.apply_sheet_data(stl, content)
|
||||
except:
|
||||
pass
|
||||
|
||||
try:
|
||||
content = zip.read('xl/sharedStrings.xml')
|
||||
stl = self.computestylesheet('xl')
|
||||
docdata += rclxslt.apply_sheet_data(stl, content)
|
||||
except:
|
||||
pass
|
||||
|
||||
try:
|
||||
stl = self.computestylesheet('pp')
|
||||
# Note that we'd need a numeric sort really (else we get slide1
|
||||
# slide11 slide2)
|
||||
for fn in sorted(zip.namelist()):
|
||||
if fnmatch.fnmatch(fn, 'ppt/slides/slide*.xml'):
|
||||
content = zip.read(fn)
|
||||
docdata += rclxslt.apply_sheet_data(stl, content)
|
||||
except:
|
||||
pass
|
||||
|
||||
docdata += '</body></html>'
|
||||
|
||||
return (True, docdata, "", rclexecm.RclExecM.eofnext)
|
||||
|
||||
|
||||
###### File type handler api, used by rclexecm ---------->
|
||||
def openfile(self, params):
|
||||
self.currentindex = 0
|
||||
return True
|
||||
|
||||
def getipath(self, params):
|
||||
return self.extractone(params)
|
||||
|
||||
def getnext(self, params):
|
||||
if self.currentindex >= 1:
|
||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||
else:
|
||||
ret= self.extractone(params)
|
||||
self.currentindex += 1
|
||||
return ret
|
||||
|
||||
if __name__ == '__main__':
|
||||
proto = rclexecm.RclExecM()
|
||||
extract = OXExtractor(proto)
|
||||
rclexecm.main(proto, extract)
|
||||
58
src/filters/rclppt.py
Executable file
58
src/filters/rclppt.py
Executable file
@ -0,0 +1,58 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import rclexecm
|
||||
import rclexec1
|
||||
import re
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Processing the output from unrtf
|
||||
class PPTProcessData:
|
||||
def __init__(self, em):
|
||||
self.em = em
|
||||
self.out = ""
|
||||
self.gotdata = 0
|
||||
|
||||
# Some versions of unrtf put out a garbled charset line.
|
||||
# Apart from this, we pass the data untouched.
|
||||
def takeLine(self, line):
|
||||
if not self.gotdata:
|
||||
self.out += '''<html><head>''' + \
|
||||
'''<meta http-equiv="Content-Type" ''' + \
|
||||
'''content="text/html;charset=UTF-8">''' + \
|
||||
'''</head><body><pre>'''
|
||||
self.gotdata = True
|
||||
self.out += self.em.htmlescape(line)
|
||||
|
||||
def wrapData(self):
|
||||
return self.out + '''</pre></body></html>'''
|
||||
|
||||
class PPTFilter:
|
||||
def __init__(self, em):
|
||||
self.em = em
|
||||
self.ntry = 0
|
||||
|
||||
def reset(self):
|
||||
self.ntry = 0
|
||||
pass
|
||||
|
||||
def getCmd(self, fn):
|
||||
if self.ntry:
|
||||
return ([], None)
|
||||
self.ntry = 1
|
||||
cmd = rclexecm.which("ppt-dump.py")
|
||||
if cmd:
|
||||
# ppt-dump.py often exits 1 with valid data. Ignore exit value
|
||||
return (["python", cmd, "--no-struct-output", "--dump-text"],
|
||||
PPTProcessData(self.em), rclexec1.Executor.opt_ignxval)
|
||||
else:
|
||||
return ([], None)
|
||||
|
||||
if __name__ == '__main__':
|
||||
if not rclexecm.which("ppt-dump.py"):
|
||||
print("RECFILTERROR HELPERNOTFOUND ppt-dump.py")
|
||||
sys.exit(1)
|
||||
proto = rclexecm.RclExecM()
|
||||
filter = PPTFilter(proto)
|
||||
extract = rclexec1.Executor(proto, filter)
|
||||
rclexecm.main(proto, extract)
|
||||
60
src/filters/rclrtf.py
Executable file
60
src/filters/rclrtf.py
Executable file
@ -0,0 +1,60 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import rclexecm
|
||||
import rclexec1
|
||||
import re
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Processing the output from unrtf
|
||||
class RTFProcessData:
|
||||
def __init__(self, em):
|
||||
self.em = em
|
||||
self.out = ""
|
||||
self.gothead = 0
|
||||
self.patendhead = re.compile('''</head>''')
|
||||
self.patcharset = re.compile('''^<meta http-equiv=''')
|
||||
|
||||
# Some versions of unrtf put out a garbled charset line.
|
||||
# Apart from this, we pass the data untouched.
|
||||
def takeLine(self, line):
|
||||
if not self.gothead:
|
||||
if self.patendhead.search(line):
|
||||
self.out += '<meta http-equiv="Content-Type" ' + \
|
||||
'content="text/html;charset=UTF-8">' + "\n"
|
||||
self.out += line + "\n"
|
||||
self.gothead = 1
|
||||
elif not self.patcharset.search(line):
|
||||
self.out += line + "\n"
|
||||
else:
|
||||
self.out += line + "\n"
|
||||
|
||||
def wrapData(self):
|
||||
return self.out
|
||||
|
||||
class RTFFilter:
|
||||
def __init__(self, em):
|
||||
self.em = em
|
||||
self.ntry = 0
|
||||
|
||||
def reset(self):
|
||||
self.ntry = 0
|
||||
|
||||
def getCmd(self, fn):
|
||||
if self.ntry:
|
||||
return ([], None)
|
||||
self.ntry = 1
|
||||
cmd = rclexecm.which("unrtf")
|
||||
if cmd:
|
||||
return ([cmd, "--nopict", "--html"], RTFProcessData(self.em))
|
||||
else:
|
||||
return ([], None)
|
||||
|
||||
if __name__ == '__main__':
|
||||
if not rclexecm.which("unrtf"):
|
||||
print("RECFILTERROR HELPERNOTFOUND antiword")
|
||||
sys.exit(1)
|
||||
proto = rclexecm.RclExecM()
|
||||
filter = RTFFilter(proto)
|
||||
extract = rclexec1.Executor(proto, filter)
|
||||
rclexecm.main(proto, extract)
|
||||
189
src/filters/rclsoff.py
Executable file
189
src/filters/rclsoff.py
Executable file
@ -0,0 +1,189 @@
|
||||
#!/usr/bin/env python
|
||||
# Copyright (C) 2014 J.F.Dockes
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the
|
||||
# Free Software Foundation, Inc.,
|
||||
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
######################################
|
||||
|
||||
import sys
|
||||
import rclexecm
|
||||
import rclxslt
|
||||
from zipfile import ZipFile
|
||||
|
||||
stylesheet_meta = '''<?xml version="1.0"?>
|
||||
<xsl:stylesheet version="1.0"
|
||||
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||
xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0"
|
||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0"
|
||||
xmlns:ooo="http://openoffice.org/2004/office"
|
||||
exclude-result-prefixes="office xlink meta ooo dc"
|
||||
>
|
||||
|
||||
<xsl:output method="html" encoding="UTF-8"/>
|
||||
|
||||
<xsl:template match="/office:document-meta">
|
||||
<xsl:apply-templates select="office:meta/dc:description"/>
|
||||
<xsl:apply-templates select="office:meta/dc:subject"/>
|
||||
<xsl:apply-templates select="office:meta/dc:title"/>
|
||||
<xsl:apply-templates select="office:meta/meta:keyword"/>
|
||||
<xsl:apply-templates select="office:meta/dc:creator"/>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="dc:title">
|
||||
<title> <xsl:value-of select="."/> </title><xsl:text>
|
||||
</xsl:text>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="dc:description">
|
||||
<meta>
|
||||
<xsl:attribute name="name">abstract</xsl:attribute>
|
||||
<xsl:attribute name="content">
|
||||
<xsl:value-of select="."/>
|
||||
</xsl:attribute>
|
||||
</meta><xsl:text>
|
||||
</xsl:text>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="dc:subject">
|
||||
<meta>
|
||||
<xsl:attribute name="name">keywords</xsl:attribute>
|
||||
<xsl:attribute name="content">
|
||||
<xsl:value-of select="."/>
|
||||
</xsl:attribute>
|
||||
</meta><xsl:text>
|
||||
</xsl:text>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="dc:creator">
|
||||
<meta>
|
||||
<xsl:attribute name="name">author</xsl:attribute>
|
||||
<xsl:attribute name="content">
|
||||
<xsl:value-of select="."/>
|
||||
</xsl:attribute>
|
||||
</meta><xsl:text>
|
||||
</xsl:text>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="meta:keyword">
|
||||
<meta>
|
||||
<xsl:attribute name="name">keywords</xsl:attribute>
|
||||
<xsl:attribute name="content">
|
||||
<xsl:value-of select="."/>
|
||||
</xsl:attribute>
|
||||
</meta><xsl:text>
|
||||
</xsl:text>
|
||||
</xsl:template>
|
||||
|
||||
</xsl:stylesheet>
|
||||
'''
|
||||
|
||||
stylesheet_content = '''<?xml version="1.0"?>
|
||||
<xsl:stylesheet version="1.0"
|
||||
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||
xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0"
|
||||
exclude-result-prefixes="text"
|
||||
>
|
||||
|
||||
<xsl:output method="html" encoding="UTF-8"/>
|
||||
|
||||
<xsl:template match="text:p">
|
||||
<p><xsl:apply-templates/></p><xsl:text>
|
||||
</xsl:text>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="text:h">
|
||||
<p><xsl:apply-templates/></p><xsl:text>
|
||||
</xsl:text>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="text:s">
|
||||
<xsl:text> </xsl:text>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="text:line-break">
|
||||
<br />
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="text:tab">
|
||||
<xsl:text> </xsl:text>
|
||||
</xsl:template>
|
||||
|
||||
</xsl:stylesheet>
|
||||
'''
|
||||
|
||||
class OOExtractor:
|
||||
def __init__(self, em):
|
||||
self.em = em
|
||||
self.currentindex = 0
|
||||
|
||||
def extractone(self, params):
|
||||
if not params.has_key("filename:"):
|
||||
self.em.rclog("extractone: no mime or file name")
|
||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||
fn = params["filename:"]
|
||||
|
||||
try:
|
||||
zip = ZipFile(fn)
|
||||
except Exception as err:
|
||||
self.em.rclog("unzip failed: " + str(err))
|
||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||
|
||||
docdata = '<html><head><meta http-equiv="Content-Type"' \
|
||||
'content="text/html; charset=UTF-8"></head><body>'
|
||||
|
||||
try:
|
||||
metadata = zip.read("meta.xml")
|
||||
if metadata:
|
||||
res = rclxslt.apply_sheet_data(stylesheet_meta, metadata)
|
||||
docdata += res
|
||||
except:
|
||||
# To be checked. I'm under the impression that I get this when
|
||||
# nothing matches?
|
||||
#self.em.rclog("no/bad metadata in %s" % fn)
|
||||
pass
|
||||
|
||||
try:
|
||||
content = zip.read("content.xml")
|
||||
if content:
|
||||
res = rclxslt.apply_sheet_data(stylesheet_content, content)
|
||||
docdata += res
|
||||
docdata += '</body></html>'
|
||||
except Exception as err:
|
||||
self.em.rclog("bad data in %s" % fn)
|
||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||
|
||||
return (True, docdata, "", rclexecm.RclExecM.eofnext)
|
||||
|
||||
###### File type handler api, used by rclexecm ---------->
|
||||
def openfile(self, params):
|
||||
self.currentindex = 0
|
||||
return True
|
||||
|
||||
def getipath(self, params):
|
||||
return self.extractone(params)
|
||||
|
||||
def getnext(self, params):
|
||||
if self.currentindex >= 1:
|
||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||
else:
|
||||
ret= self.extractone(params)
|
||||
self.currentindex += 1
|
||||
return ret
|
||||
|
||||
if __name__ == '__main__':
|
||||
proto = rclexecm.RclExecM()
|
||||
extract = OOExtractor(proto)
|
||||
rclexecm.main(proto, extract)
|
||||
140
src/filters/rclsvg.py
Executable file
140
src/filters/rclsvg.py
Executable file
@ -0,0 +1,140 @@
|
||||
#!/usr/bin/env python
|
||||
# Copyright (C) 2014 J.F.Dockes
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the
|
||||
# Free Software Foundation, Inc.,
|
||||
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
######################################
|
||||
|
||||
import sys
|
||||
import rclexecm
|
||||
import rclxslt
|
||||
|
||||
stylesheet_all = '''<?xml version="1.0"?>
|
||||
<xsl:stylesheet version="1.0"
|
||||
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||
xmlns:svg="http://www.w3.org/2000/svg"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
exclude-result-prefixes="svg"
|
||||
>
|
||||
|
||||
<xsl:output method="html" encoding="UTF-8"/>
|
||||
|
||||
<xsl:template match="/">
|
||||
<html>
|
||||
<head>
|
||||
<xsl:apply-templates select="svg:svg/svg:title"/>
|
||||
<xsl:apply-templates select="svg:svg/svg:desc"/>
|
||||
<xsl:apply-templates select="svg:svg/svg:metadata/descendant::dc:creator"/>
|
||||
<xsl:apply-templates select="svg:svg/svg:metadata/descendant::dc:subject"/>
|
||||
<xsl:apply-templates select="svg:svg/svg:metadata/descendant::dc:description"/>
|
||||
</head>
|
||||
<body>
|
||||
<xsl:apply-templates select="//svg:text"/>
|
||||
</body>
|
||||
</html>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="svg:desc">
|
||||
<meta>
|
||||
<xsl:attribute name="name">keywords</xsl:attribute>
|
||||
<xsl:attribute name="content">
|
||||
<xsl:value-of select="."/>
|
||||
</xsl:attribute>
|
||||
</meta><xsl:text>
|
||||
</xsl:text>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="dc:creator">
|
||||
<meta>
|
||||
<xsl:attribute name="name">author</xsl:attribute>
|
||||
<xsl:attribute name="content">
|
||||
<xsl:value-of select="."/>
|
||||
</xsl:attribute>
|
||||
</meta><xsl:text>
|
||||
</xsl:text>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="dc:subject">
|
||||
<meta>
|
||||
<xsl:attribute name="name">keywords</xsl:attribute>
|
||||
<xsl:attribute name="content">
|
||||
<xsl:value-of select="."/>
|
||||
</xsl:attribute>
|
||||
</meta><xsl:text>
|
||||
</xsl:text>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="dc:description">
|
||||
<meta>
|
||||
<xsl:attribute name="name">description</xsl:attribute>
|
||||
<xsl:attribute name="content">
|
||||
<xsl:value-of select="."/>
|
||||
</xsl:attribute>
|
||||
</meta><xsl:text>
|
||||
</xsl:text>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="svg:title">
|
||||
<title><xsl:value-of select="."/></title><xsl:text>
|
||||
</xsl:text>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="svg:text">
|
||||
<p><xsl:value-of select="."/></p><xsl:text>
|
||||
</xsl:text>
|
||||
</xsl:template>
|
||||
|
||||
</xsl:stylesheet>
|
||||
'''
|
||||
|
||||
class SVGExtractor:
|
||||
def __init__(self, em):
|
||||
self.em = em
|
||||
self.currentindex = 0
|
||||
|
||||
def extractone(self, params):
|
||||
if not params.has_key("filename:"):
|
||||
self.em.rclog("extractone: no mime or file name")
|
||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||
fn = params["filename:"]
|
||||
|
||||
try:
|
||||
data = open(fn, 'rb').read()
|
||||
docdata = rclxslt.apply_sheet_data(stylesheet_all, data)
|
||||
except Exception as err:
|
||||
self.em.rclog("%s: bad data: " % (fn, err))
|
||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||
|
||||
return (True, docdata, "", rclexecm.RclExecM.eofnow)
|
||||
|
||||
###### File type handler api, used by rclexecm ---------->
|
||||
def openfile(self, params):
|
||||
self.currentindex = 0
|
||||
return True
|
||||
|
||||
def getipath(self, params):
|
||||
return self.extractone(params)
|
||||
|
||||
def getnext(self, params):
|
||||
if self.currentindex >= 1:
|
||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||
else:
|
||||
ret= self.extractone(params)
|
||||
self.currentindex += 1
|
||||
return ret
|
||||
|
||||
if __name__ == '__main__':
|
||||
proto = rclexecm.RclExecM()
|
||||
extract = SVGExtractor(proto)
|
||||
rclexecm.main(proto, extract)
|
||||
51
src/filters/rcltext.py
Executable file
51
src/filters/rcltext.py
Executable file
@ -0,0 +1,51 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import rclexecm
|
||||
import sys
|
||||
|
||||
# Wrapping a text file. Recoll does it internally in most cases, but
|
||||
# there is a reason this exists, just can't remember it ...
|
||||
class TxtDump:
|
||||
def __init__(self, em):
|
||||
self.em = em
|
||||
|
||||
def extractone(self, params):
|
||||
#self.em.rclog("extractone %s %s" % (params["filename:"], \
|
||||
#params["mimetype:"]))
|
||||
if not params.has_key("filename:"):
|
||||
self.em.rclog("extractone: no file name")
|
||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||
|
||||
fn = params["filename:"]
|
||||
# No charset, so recoll will have to use its config to guess it
|
||||
txt = '<html><head><title></title></head><body><pre>'
|
||||
try:
|
||||
f = open(fn, "rb")
|
||||
txt += self.em.htmlescape(f.read())
|
||||
except Exception as err:
|
||||
self.em.rclog("TxtDump: %s : %s" % (fn, err))
|
||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||
|
||||
txt += '</pre></body></html>'
|
||||
return (True, txt, "", rclexecm.RclExecM.eofnext)
|
||||
|
||||
###### File type handler api, used by rclexecm ---------->
|
||||
def openfile(self, params):
|
||||
self.currentindex = 0
|
||||
return True
|
||||
|
||||
def getipath(self, params):
|
||||
return self.extractone(params)
|
||||
|
||||
def getnext(self, params):
|
||||
if self.currentindex >= 1:
|
||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||
else:
|
||||
ret= self.extractone(params)
|
||||
self.currentindex += 1
|
||||
return ret
|
||||
|
||||
if __name__ == '__main__':
|
||||
proto = rclexecm.RclExecM()
|
||||
extract = TxtDump(proto)
|
||||
rclexecm.main(proto, extract)
|
||||
65
src/filters/rclxls.py
Executable file
65
src/filters/rclxls.py
Executable file
@ -0,0 +1,65 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import rclexecm
|
||||
import rclexec1
|
||||
import xlsxmltocsv
|
||||
import re
|
||||
import sys
|
||||
import os
|
||||
import xml.sax
|
||||
|
||||
# Processing the output from unrtf
|
||||
class XLSProcessData:
|
||||
def __init__(self, em):
|
||||
self.em = em
|
||||
self.out = ""
|
||||
self.gotdata = 0
|
||||
self.xmldata = ""
|
||||
|
||||
# Some versions of unrtf put out a garbled charset line.
|
||||
# Apart from this, we pass the data untouched.
|
||||
def takeLine(self, line):
|
||||
if not self.gotdata:
|
||||
self.out += '''<html><head>''' + \
|
||||
'''<meta http-equiv="Content-Type" ''' + \
|
||||
'''content="text/html;charset=UTF-8">''' + \
|
||||
'''</head><body><pre>'''
|
||||
self.gotdata = True
|
||||
self.xmldata += line
|
||||
|
||||
def wrapData(self):
|
||||
handler = xlsxmltocsv.XlsXmlHandler()
|
||||
data = xml.sax.parseString(self.xmldata, handler)
|
||||
self.out += self.em.htmlescape(handler.output)
|
||||
return self.out + '''</pre></body></html>'''
|
||||
|
||||
class XLSFilter:
|
||||
def __init__(self, em):
|
||||
self.em = em
|
||||
self.ntry = 0
|
||||
|
||||
def reset(self):
|
||||
self.ntry = 0
|
||||
pass
|
||||
|
||||
def getCmd(self, fn):
|
||||
if self.ntry:
|
||||
return ([], None)
|
||||
self.ntry = 1
|
||||
cmd = rclexecm.which("xls-dump.py")
|
||||
if cmd:
|
||||
# xls-dump.py often exits 1 with valid data. Ignore exit value
|
||||
return (["python", cmd, "--dump-mode=canonical-xml", \
|
||||
"--utf-8", "--catch"],
|
||||
XLSProcessData(self.em), rclexec1.Executor.opt_ignxval)
|
||||
else:
|
||||
return ([], None)
|
||||
|
||||
if __name__ == '__main__':
|
||||
if not rclexecm.which("ppt-dump.py"):
|
||||
print("RECFILTERROR HELPERNOTFOUND ppt-dump.py")
|
||||
sys.exit(1)
|
||||
proto = rclexecm.RclExecM()
|
||||
filter = XLSFilter(proto)
|
||||
extract = rclexec1.Executor(proto, filter)
|
||||
rclexecm.main(proto, extract)
|
||||
98
src/filters/rclxml.py
Executable file
98
src/filters/rclxml.py
Executable file
@ -0,0 +1,98 @@
|
||||
#!/usr/bin/env python
|
||||
# Copyright (C) 2014 J.F.Dockes
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the
|
||||
# Free Software Foundation, Inc.,
|
||||
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
######################################
|
||||
|
||||
import sys
|
||||
import rclexecm
|
||||
import rclxslt
|
||||
|
||||
stylesheet_all = '''<?xml version="1.0"?>
|
||||
<xsl:stylesheet version="1.0"
|
||||
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
|
||||
|
||||
<xsl:output method="html" encoding="UTF-8"/>
|
||||
|
||||
<xsl:template match="/">
|
||||
<html>
|
||||
<head>
|
||||
<xsl:if test="//*[local-name() = 'title']">
|
||||
<title>
|
||||
<xsl:value-of select="//*[local-name() = 'title'][1]"/>
|
||||
</title>
|
||||
</xsl:if>
|
||||
</head>
|
||||
<body>
|
||||
<xsl:apply-templates/>
|
||||
</body>
|
||||
</html>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="text()">
|
||||
<xsl:if test="string-length(normalize-space(.)) > 0">
|
||||
<p><xsl:value-of select="."/></p>
|
||||
<xsl:text>
|
||||
</xsl:text>
|
||||
</xsl:if>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="*">
|
||||
<xsl:apply-templates/>
|
||||
</xsl:template>
|
||||
|
||||
</xsl:stylesheet>
|
||||
'''
|
||||
|
||||
class XMLExtractor:
|
||||
def __init__(self, em):
|
||||
self.em = em
|
||||
self.currentindex = 0
|
||||
|
||||
def extractone(self, params):
|
||||
if not params.has_key("filename:"):
|
||||
self.em.rclog("extractone: no mime or file name")
|
||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||
fn = params["filename:"]
|
||||
|
||||
try:
|
||||
data = open(fn, 'rb').read()
|
||||
docdata = rclxslt.apply_sheet_data(stylesheet_all, data)
|
||||
except Exception as err:
|
||||
self.em.rclog("%s: bad data: " % (fn, err))
|
||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||
|
||||
return (True, docdata, "", rclexecm.RclExecM.eofnow)
|
||||
|
||||
###### File type handler api, used by rclexecm ---------->
|
||||
def openfile(self, params):
|
||||
self.currentindex = 0
|
||||
return True
|
||||
|
||||
def getipath(self, params):
|
||||
return self.extractone(params)
|
||||
|
||||
def getnext(self, params):
|
||||
if self.currentindex >= 1:
|
||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||
else:
|
||||
ret= self.extractone(params)
|
||||
self.currentindex += 1
|
||||
return ret
|
||||
|
||||
if __name__ == '__main__':
|
||||
proto = rclexecm.RclExecM()
|
||||
extract = XMLExtractor(proto)
|
||||
rclexecm.main(proto, extract)
|
||||
52
src/filters/rclxslt.py
Normal file
52
src/filters/rclxslt.py
Normal file
@ -0,0 +1,52 @@
|
||||
# Copyright (C) 2014 J.F.Dockes
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the
|
||||
# Free Software Foundation, Inc.,
|
||||
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
######################################
|
||||
|
||||
# Helper module for xslt-based filters
|
||||
|
||||
import sys
|
||||
|
||||
try:
|
||||
import libxml2
|
||||
import libxslt
|
||||
except:
|
||||
print "RECFILTERROR HELPERNOTFOUND python:libxml2/python:libxslt1"
|
||||
sys.exit(1);
|
||||
|
||||
libxml2.substituteEntitiesDefault(1)
|
||||
|
||||
def apply_sheet_data(sheet, data):
|
||||
styledoc = libxml2.parseMemory(sheet, len(sheet))
|
||||
style = libxslt.parseStylesheetDoc(styledoc)
|
||||
doc = libxml2.parseMemory(data, len(data))
|
||||
result = style.applyStylesheet(doc, None)
|
||||
res = style.saveResultToString(result)
|
||||
style.freeStylesheet()
|
||||
doc.freeDoc()
|
||||
result.freeDoc()
|
||||
return res
|
||||
|
||||
def apply_sheet_file(sheet, fn):
|
||||
styledoc = libxml2.parseMemory(sheet, len(sheet))
|
||||
style = libxslt.parseStylesheetDoc(styledoc)
|
||||
doc = libxml2.parseFile(fn)
|
||||
result = style.applyStylesheet(doc, None)
|
||||
res = style.saveResultToString(result)
|
||||
style.freeStylesheet()
|
||||
doc.freeDoc()
|
||||
result.freeDoc()
|
||||
return res
|
||||
|
||||
@ -15,10 +15,13 @@ else:
|
||||
dquote = '"'
|
||||
|
||||
class XlsXmlHandler(xml.sax.handler.ContentHandler):
|
||||
def __init__(self):
|
||||
self.output = ""
|
||||
|
||||
def startElement(self, name, attrs):
|
||||
if name == "worksheet":
|
||||
if "name" in attrs:
|
||||
print("%s" % attrs["name"].encode("UTF-8"))
|
||||
self.output += "%s\n" % attrs["name"].encode("UTF-8")
|
||||
elif name == "row":
|
||||
self.cells = dict()
|
||||
elif name == "label-cell" or name == "number-cell":
|
||||
@ -30,7 +33,7 @@ class XlsXmlHandler(xml.sax.handler.ContentHandler):
|
||||
self.cells[int(attrs["col"])] = value
|
||||
else:
|
||||
#??
|
||||
sys.stdout.write("%s%s"%(value.encode("UTF-8"),sepstring))
|
||||
self.output += "%s%s" % (value.encode("UTF-8"), sepstring)
|
||||
elif name == "formula-cell":
|
||||
if "formula-result" in attrs and "col" in attrs:
|
||||
self.cells[int(attrs["col"])] = \
|
||||
@ -40,17 +43,21 @@ class XlsXmlHandler(xml.sax.handler.ContentHandler):
|
||||
if name == "row":
|
||||
curidx = 0
|
||||
for idx, value in self.cells.iteritems():
|
||||
sys.stdout.write(sepstring * (idx - curidx))
|
||||
sys.stdout.write('%s%s%s' % (dquote, value, dquote))
|
||||
self.output += sepstring * (idx - curidx)
|
||||
self.output += "%s%s%s" % (dquote, value, dquote)
|
||||
curidx = idx
|
||||
sys.stdout.write("\n")
|
||||
self.output += "\n"
|
||||
elif name == "worksheet":
|
||||
print("")
|
||||
self.output += "\n"
|
||||
|
||||
try:
|
||||
xml.sax.parse(sys.stdin, XlsXmlHandler())
|
||||
except BaseException as err:
|
||||
error("xml-parse: %s\n" % (str(sys.exc_info()[:2]),))
|
||||
sys.exit(1)
|
||||
|
||||
sys.exit(0)
|
||||
if __name__ == '__main__':
|
||||
try:
|
||||
handler = XlsXmlHandler()
|
||||
xml.sax.parse(sys.stdin, handler)
|
||||
print(handler.output)
|
||||
except BaseException as err:
|
||||
error("xml-parse: %s\n" % (str(sys.exc_info()[:2]),))
|
||||
sys.exit(1)
|
||||
|
||||
sys.exit(0)
|
||||
|
||||
@ -14,9 +14,8 @@
|
||||
* Free Software Foundation, Inc.,
|
||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "autoconfig.h"
|
||||
#endif
|
||||
|
||||
|
||||
#include "debuglog.h"
|
||||
|
||||
@ -34,8 +33,10 @@ DocFetcher *docFetcherMake(const Rcl::Doc& idoc)
|
||||
idoc.getmeta(Rcl::Doc::keybcknd, &backend);
|
||||
if (backend.empty() || !backend.compare("FS")) {
|
||||
return new FSDocFetcher;
|
||||
#ifndef DISABLE_WEB_INDEXER
|
||||
} else if (!backend.compare("BGL")) {
|
||||
return new BGLDocFetcher;
|
||||
#endif
|
||||
} else {
|
||||
LOGERR(("DocFetcherFactory: unknown backend [%s]\n", backend.c_str()));
|
||||
return 0;
|
||||
|
||||
@ -43,7 +43,6 @@
|
||||
#include "fileudi.h"
|
||||
#include "cancelcheck.h"
|
||||
#include "rclinit.h"
|
||||
#include "execmd.h"
|
||||
#include "extrameta.h"
|
||||
|
||||
using namespace std;
|
||||
@ -145,13 +144,11 @@ FsIndexer::~FsIndexer()
|
||||
void *status;
|
||||
if (m_haveInternQ) {
|
||||
status = m_iwqueue.setTerminateAndWait();
|
||||
LOGDEB0(("FsIndexer: internfile wrkr status: %ld (1->ok)\n",
|
||||
long(status)));
|
||||
LOGDEB0(("FsIndexer: internfile wrkr status: %p (1->ok)\n", status));
|
||||
}
|
||||
if (m_haveSplitQ) {
|
||||
status = m_dwqueue.setTerminateAndWait();
|
||||
LOGDEB0(("FsIndexer: dbupd worker status: %ld (1->ok)\n",
|
||||
long(status)));
|
||||
LOGDEB0(("FsIndexer: dbupd worker status: %p (1->ok)\n", status));
|
||||
}
|
||||
delete m_stableconfig;
|
||||
#endif // IDX_THREADS
|
||||
@ -259,7 +256,7 @@ static bool matchesSkipped(const vector<string>& tdl,
|
||||
string canonpath = path_canon(path);
|
||||
string mpath = canonpath;
|
||||
string topdir;
|
||||
while (mpath.length() > 1) {
|
||||
while (!path_isroot(mpath)) { // we assume root not in skipped paths.
|
||||
for (vector<string>::const_iterator it = tdl.begin();
|
||||
it != tdl.end(); it++) {
|
||||
// the topdirs members are already canonized.
|
||||
@ -281,7 +278,7 @@ static bool matchesSkipped(const vector<string>& tdl,
|
||||
mpath = path_getfather(mpath);
|
||||
// getfather normally returns a path ending with /, canonic
|
||||
// paths don't (except for '/' itself).
|
||||
if (!mpath.empty() && mpath[mpath.size()-1] == '/')
|
||||
if (!path_isroot(mpath) && mpath[mpath.size()-1] == '/')
|
||||
mpath.erase(mpath.size()-1);
|
||||
// should not be necessary, but lets be prudent. If the
|
||||
// path did not shorten, something is seriously amiss
|
||||
@ -330,7 +327,7 @@ bool FsIndexer::indexFiles(list<string>& files, int flags)
|
||||
{
|
||||
LOGDEB(("FsIndexer::indexFiles\n"));
|
||||
m_noretryfailed = (flags & ConfIndexer::IxFNoRetryFailed) != 0;
|
||||
int ret = false;
|
||||
bool ret = false;
|
||||
|
||||
if (!init())
|
||||
return false;
|
||||
@ -703,7 +700,7 @@ FsIndexer::processonefile(RclConfig *config,
|
||||
}
|
||||
|
||||
LOGDEB0(("processone: processing: [%s] %s\n",
|
||||
displayableBytes(stp->st_size).c_str(), fn.c_str()));
|
||||
displayableBytes(off_t(stp->st_size)).c_str(), fn.c_str()));
|
||||
|
||||
string utf8fn = compute_utf8fn(config, fn);
|
||||
|
||||
@ -772,7 +769,7 @@ FsIndexer::processonefile(RclConfig *config,
|
||||
if (doc.fmtime.empty())
|
||||
doc.fmtime = ascdate;
|
||||
if (doc.url.empty())
|
||||
doc.url = cstr_fileu + fn;
|
||||
doc.url = path_pathtofileurl(fn);
|
||||
const string *fnp = 0;
|
||||
if (doc.ipath.empty()) {
|
||||
if (!doc.peekmeta(Rcl::Doc::keyfn, &fnp) || fnp->empty())
|
||||
@ -868,7 +865,7 @@ FsIndexer::processonefile(RclConfig *config,
|
||||
fileDoc.meta[Rcl::Doc::keytcfn] = utf8fn;
|
||||
fileDoc.haschildren = true;
|
||||
fileDoc.mimetype = mimetype;
|
||||
fileDoc.url = cstr_fileu + fn;
|
||||
fileDoc.url = path_pathtofileurl(fn);
|
||||
if (m_havelocalfields)
|
||||
setlocalfields(localfields, fileDoc);
|
||||
char cbuf[100];
|
||||
|
||||
@ -14,9 +14,7 @@
|
||||
* Free Software Foundation, Inc.,
|
||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "autoconfig.h"
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
@ -27,7 +25,9 @@
|
||||
#include "debuglog.h"
|
||||
#include "indexer.h"
|
||||
#include "fsindexer.h"
|
||||
#ifndef DISABLE_WEB_INDEXER
|
||||
#include "beaglequeue.h"
|
||||
#endif
|
||||
#include "mimehandler.h"
|
||||
#include "pathut.h"
|
||||
|
||||
@ -46,7 +46,9 @@ ConfIndexer::ConfIndexer(RclConfig *cnf, DbIxStatusUpdater *updfunc)
|
||||
ConfIndexer::~ConfIndexer()
|
||||
{
|
||||
deleteZ(m_fsindexer);
|
||||
#ifndef DISABLE_WEB_INDEXER
|
||||
deleteZ(m_beagler);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Determine if this is likely the first time that the user runs
|
||||
@ -107,7 +109,7 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun, int flags)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef DISABLE_WEB_INDEXER
|
||||
if (m_dobeagle && (typestorun & IxTBeagleQueue)) {
|
||||
deleteZ(m_beagler);
|
||||
m_beagler = new BeagleQueueIndexer(m_config, &m_db, m_updater);
|
||||
@ -116,7 +118,7 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun, int flags)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
if (typestorun == IxTAll) {
|
||||
// Get rid of all database entries that don't exist in the
|
||||
// filesystem anymore. Only if all *configured* indexers ran.
|
||||
@ -173,6 +175,7 @@ bool ConfIndexer::indexFiles(list<string>& ifiles, int flag)
|
||||
ret = m_fsindexer->indexFiles(myfiles, flag);
|
||||
LOGDEB2(("ConfIndexer::indexFiles: fsindexer returned %d, "
|
||||
"%d files remainining\n", ret, myfiles.size()));
|
||||
#ifndef DISABLE_WEB_INDEXER
|
||||
|
||||
if (m_dobeagle && !myfiles.empty() && !(flag & IxFNoWeb)) {
|
||||
if (!m_beagler)
|
||||
@ -183,7 +186,7 @@ bool ConfIndexer::indexFiles(list<string>& ifiles, int flag)
|
||||
ret = false;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
// The close would be done in our destructor, but we want status here
|
||||
if (!m_db.close()) {
|
||||
LOGERR(("ConfIndexer::index: error closing database in %s\n",
|
||||
@ -255,6 +258,7 @@ bool ConfIndexer::purgeFiles(std::list<string> &files, int flag)
|
||||
if (m_fsindexer)
|
||||
ret = m_fsindexer->purgeFiles(myfiles);
|
||||
|
||||
#ifndef DISABLE_WEB_INDEXER
|
||||
if (m_dobeagle && !myfiles.empty() && !(flag & IxFNoWeb)) {
|
||||
if (!m_beagler)
|
||||
m_beagler = new BeagleQueueIndexer(m_config, &m_db, m_updater);
|
||||
@ -264,6 +268,7 @@ bool ConfIndexer::purgeFiles(std::list<string> &files, int flag)
|
||||
ret = false;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// The close would be done in our destructor, but we want status here
|
||||
if (!m_db.close()) {
|
||||
|
||||
@ -16,20 +16,18 @@
|
||||
*/
|
||||
#ifndef _INDEXER_H_INCLUDED_
|
||||
#define _INDEXER_H_INCLUDED_
|
||||
#include "rclconfig.h"
|
||||
|
||||
#include <string>
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
#ifndef NO_NAMESPACES
|
||||
using std::string;
|
||||
using std::list;
|
||||
using std::map;
|
||||
using std::vector;
|
||||
#endif
|
||||
|
||||
#include "rclconfig.h"
|
||||
#include "rcldb.h"
|
||||
#include "rcldoc.h"
|
||||
#ifdef IDX_THREADS
|
||||
|
||||
@ -140,6 +140,7 @@ string mimetype(const string &fn, const struct stat *stp,
|
||||
|
||||
string mtype;
|
||||
|
||||
#ifndef _WIN32
|
||||
// Extended attribute has priority on everything, as per:
|
||||
// http://freedesktop.org/wiki/CommonExtendedAttributes
|
||||
if (pxattr::get(fn, "mime_type", &mtype)) {
|
||||
@ -150,6 +151,7 @@ string mimetype(const string &fn, const struct stat *stp,
|
||||
return mtype;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (cfg == 0) {
|
||||
LOGERR(("Mimetype: null config ??\n"));
|
||||
@ -177,7 +179,6 @@ string mimetype(const string &fn, const struct stat *stp,
|
||||
if (mtype.empty() && stp)
|
||||
mtype = mimetypefromdata(cfg, fn, usfc);
|
||||
|
||||
out:
|
||||
return mtype;
|
||||
}
|
||||
|
||||
|
||||
@ -17,10 +17,10 @@
|
||||
#ifndef _MIMETYPE_H_INCLUDED_
|
||||
#define _MIMETYPE_H_INCLUDED_
|
||||
|
||||
#include "safesysstat.h"
|
||||
#include <string>
|
||||
|
||||
class RclConfig;
|
||||
struct stat;
|
||||
|
||||
/**
|
||||
* Try to determine a mime type for file.
|
||||
|
||||
@ -469,7 +469,7 @@ bool RclFAM::getEvent(RclMonEvent& ev, int msecs)
|
||||
MONDEB(("RclFAM::getEvent: FAMNextEvent returned\n"));
|
||||
|
||||
map<int,string>::const_iterator it;
|
||||
if ((fe.filename[0] != '/') &&
|
||||
if ((!path_isabsolute(fe.filename)) &&
|
||||
(it = m_idtopath.find(fe.fr.reqnum)) != m_idtopath.end()) {
|
||||
ev.m_path = path_cat(it->second, fe.filename);
|
||||
} else {
|
||||
|
||||
@ -14,16 +14,18 @@
|
||||
* Free Software Foundation, Inc.,
|
||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "autoconfig.h"
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <signal.h>
|
||||
#include <errno.h>
|
||||
#ifndef _WIN32
|
||||
#include <sys/time.h>
|
||||
#include <sys/resource.h>
|
||||
#include <fcntl.h>
|
||||
#else
|
||||
#include <direct.h>
|
||||
#endif
|
||||
#include "safefcntl.h"
|
||||
#include "safeunistd.h"
|
||||
|
||||
#include <iostream>
|
||||
@ -42,10 +44,14 @@ using namespace std;
|
||||
#include "x11mon.h"
|
||||
#include "cancelcheck.h"
|
||||
#include "rcldb.h"
|
||||
#ifndef DISABLE_WEB_INDEXER
|
||||
#include "beaglequeue.h"
|
||||
#endif
|
||||
#include "recollindex.h"
|
||||
#include "fsindexer.h"
|
||||
#ifndef _WIN32
|
||||
#include "rclionice.h"
|
||||
#endif
|
||||
#include "execmd.h"
|
||||
#include "checkretryfailed.h"
|
||||
|
||||
@ -133,6 +139,7 @@ class MyUpdater : public DbIxStatusUpdater {
|
||||
return false;
|
||||
}
|
||||
|
||||
#ifndef DISABLE_X11MON
|
||||
// If we are in the monitor, we also need to check X11 status
|
||||
// during the initial indexing pass (else the user could log
|
||||
// out and the indexing would go on, not good (ie: if the user
|
||||
@ -142,7 +149,7 @@ class MyUpdater : public DbIxStatusUpdater {
|
||||
stopindexing = true;
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -177,11 +184,13 @@ static void makeIndexerOrExit(RclConfig *config, bool inPlaceReset)
|
||||
|
||||
void rclIxIonice(const RclConfig *config)
|
||||
{
|
||||
#ifndef _WIN32
|
||||
string clss, classdata;
|
||||
if (!config->getConfParam("monioniceclass", clss) || clss.empty())
|
||||
clss = "3";
|
||||
config->getConfParam("monioniceclassdata", classdata);
|
||||
rclionice(clss, classdata);
|
||||
#endif
|
||||
}
|
||||
|
||||
class MakeListWalkerCB : public FsTreeWalkerCB {
|
||||
@ -273,7 +282,7 @@ static bool checktopdirs(RclConfig *config, vector<string>& nonexist)
|
||||
|
||||
for (vector<string>::iterator it = tdl.begin(); it != tdl.end(); it++) {
|
||||
*it = path_tildexpand(*it);
|
||||
if (!it->size() || (*it)[0] != '/') {
|
||||
if (!it->size() || !path_isabsolute(*it)) {
|
||||
if ((*it)[0] == '~') {
|
||||
cerr << "Tilde expansion failed: " << *it << endl;
|
||||
LOGERR(("recollindex: tilde expansion failed: %s\n",
|
||||
@ -285,7 +294,7 @@ static bool checktopdirs(RclConfig *config, vector<string>& nonexist)
|
||||
}
|
||||
return false;
|
||||
}
|
||||
if (access(it->c_str(), 0) < 0) {
|
||||
if (!path_exists(*it)) {
|
||||
nonexist.push_back(*it);
|
||||
}
|
||||
}
|
||||
@ -295,7 +304,7 @@ static bool checktopdirs(RclConfig *config, vector<string>& nonexist)
|
||||
if (config->getConfParam("skippedPaths", &tdl, true)) {
|
||||
for (vector<string>::iterator it = tdl.begin(); it != tdl.end(); it++) {
|
||||
*it = path_tildexpand(*it);
|
||||
if (access(it->c_str(), 0) < 0) {
|
||||
if (!path_exists(*it)) {
|
||||
nonexist.push_back(*it);
|
||||
}
|
||||
}
|
||||
@ -304,7 +313,7 @@ static bool checktopdirs(RclConfig *config, vector<string>& nonexist)
|
||||
if (config->getConfParam("daemSkippedPaths", &tdl, true)) {
|
||||
for (vector<string>::iterator it = tdl.begin(); it != tdl.end(); it++) {
|
||||
*it = path_tildexpand(*it);
|
||||
if (access(it->c_str(), 0) < 0) {
|
||||
if (!path_exists(*it)) {
|
||||
nonexist.push_back(*it);
|
||||
}
|
||||
}
|
||||
@ -393,8 +402,10 @@ int main(int argc, char **argv)
|
||||
// The reexec struct is used by the daemon to shed memory after
|
||||
// the initial indexing pass and to restart when the configuration
|
||||
// changes
|
||||
#ifndef _WIN32
|
||||
o_reexec = new ReExec;
|
||||
o_reexec->init(argc, argv);
|
||||
#endif
|
||||
|
||||
thisprog = argv[0];
|
||||
argc--; argv++;
|
||||
@ -463,7 +474,9 @@ int main(int argc, char **argv)
|
||||
cerr << "Configuration problem: " << reason << endl;
|
||||
exit(1);
|
||||
}
|
||||
#ifndef _WIN32
|
||||
o_reexec->atexit(cleanup);
|
||||
#endif
|
||||
|
||||
vector<string> nonexist;
|
||||
if (!checktopdirs(config, nonexist))
|
||||
@ -511,9 +524,11 @@ int main(int argc, char **argv)
|
||||
if (op_flags & OPT_k) {
|
||||
indexerFlags &= ~ConfIndexer::IxFNoRetryFailed;
|
||||
} else {
|
||||
#ifndef _WIN32
|
||||
if (checkRetryFailed(config, false)) {
|
||||
indexerFlags &= ~ConfIndexer::IxFNoRetryFailed;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
Pidfile pidfile(config->getPidfile());
|
||||
@ -522,12 +537,13 @@ int main(int argc, char **argv)
|
||||
// Log something at LOGINFO to reset the trace file. Else at level
|
||||
// 3 it's not even truncated if all docs are up to date.
|
||||
LOGINFO(("recollindex: starting up\n"));
|
||||
|
||||
#ifndef _WIN32
|
||||
if (setpriority(PRIO_PROCESS, 0, 20) != 0) {
|
||||
LOGINFO(("recollindex: can't setpriority(), errno %d\n", errno));
|
||||
}
|
||||
// Try to ionice. This does not work on all platforms
|
||||
rclIxIonice(config);
|
||||
#endif
|
||||
|
||||
if (op_flags & (OPT_i|OPT_e)) {
|
||||
lockorexit(&pidfile);
|
||||
@ -596,15 +612,17 @@ int main(int argc, char **argv)
|
||||
lockorexit(&pidfile);
|
||||
if (!(op_flags&OPT_D)) {
|
||||
LOGDEB(("recollindex: daemonizing\n"));
|
||||
#ifndef _WIN32
|
||||
if (daemon(0,0) != 0) {
|
||||
fprintf(stderr, "daemon() failed, errno %d\n", errno);
|
||||
LOGERR(("daemon() failed, errno %d\n", errno));
|
||||
exit(1);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
// Need to rewrite pid, it changed
|
||||
pidfile.write_pid();
|
||||
|
||||
#ifndef _WIN32
|
||||
// Not too sure if I have to redo the nice thing after daemon(),
|
||||
// can't hurt anyway (easier than testing on all platforms...)
|
||||
if (setpriority(PRIO_PROCESS, 0, 20) != 0) {
|
||||
@ -612,6 +630,7 @@ int main(int argc, char **argv)
|
||||
}
|
||||
// Try to ionice. This does not work on all platforms
|
||||
rclIxIonice(config);
|
||||
#endif
|
||||
|
||||
if (sleepsecs > 0) {
|
||||
LOGDEB(("recollindex: sleeping %d\n", sleepsecs));
|
||||
@ -633,12 +652,15 @@ int main(int argc, char **argv)
|
||||
"not going into monitor mode\n"));
|
||||
exit(1);
|
||||
} else {
|
||||
#ifndef _WIN32
|
||||
// Record success of indexing pass with failed files retries.
|
||||
if (!(indexerFlags & ConfIndexer::IxFNoRetryFailed)) {
|
||||
checkRetryFailed(config, true);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
deleteZ(confindexer);
|
||||
#ifndef _WIN32
|
||||
o_reexec->insertArgs(vector<string>(1, "-n"));
|
||||
LOGINFO(("recollindex: reexecuting with -n after initial full pass\n"));
|
||||
// Note that -n will be inside the reexec when we come
|
||||
@ -646,6 +668,7 @@ int main(int argc, char **argv)
|
||||
// starting a config change exec to ensure that we do a
|
||||
// purging pass in this case.
|
||||
o_reexec->reexec();
|
||||
#endif
|
||||
}
|
||||
if (updater) {
|
||||
updater->status.phase = DbIxStatus::DBIXS_MONITOR;
|
||||
@ -672,11 +695,12 @@ int main(int argc, char **argv)
|
||||
makeIndexerOrExit(config, inPlaceReset);
|
||||
bool status = confindexer->index(rezero, ConfIndexer::IxTAll,
|
||||
indexerFlags);
|
||||
|
||||
#ifndef _WIN32
|
||||
// Record success of indexing pass with failed files retries.
|
||||
if (status && !(indexerFlags & ConfIndexer::IxFNoRetryFailed)) {
|
||||
checkRetryFailed(config, true);
|
||||
}
|
||||
#endif
|
||||
if (!status)
|
||||
cerr << "Indexing failed" << endl;
|
||||
if (!confindexer->getReason().empty())
|
||||
|
||||
@ -108,7 +108,7 @@ namespace Dijon
|
||||
*/
|
||||
virtual bool set_document_data(const std::string& mtype,
|
||||
const char *data_ptr,
|
||||
unsigned int data_length) = 0;
|
||||
size_t data_length) = 0;
|
||||
|
||||
/** (Re)initializes the filter with the given data.
|
||||
* Call next_document() to position the filter onto the first document.
|
||||
@ -140,7 +140,7 @@ namespace Dijon
|
||||
stat() calls The value is stored inside metaData, docsize
|
||||
key
|
||||
*/
|
||||
virtual void set_docsize(size_t size) = 0;
|
||||
virtual void set_docsize(off_t size) = 0;
|
||||
|
||||
// Going from one nested document to the next.
|
||||
|
||||
|
||||
@ -46,7 +46,7 @@ void reapXAttrs(const RclConfig* cfg, const string& path,
|
||||
map<string, string>& xfields)
|
||||
{
|
||||
LOGDEB2(("reapXAttrs: [%s]\n", path.c_str()));
|
||||
|
||||
#ifndef _WIN32
|
||||
// Retrieve xattrs names from files and mapping table from config
|
||||
vector<string> xnames;
|
||||
if (!pxattr::list(path, &xnames)) {
|
||||
@ -79,6 +79,7 @@ void reapXAttrs(const RclConfig* cfg, const string& path,
|
||||
xfields[key] = value;
|
||||
LOGDEB2(("reapXAttrs: [%s] -> [%s]\n", key.c_str(), value.c_str()));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void docFieldsFromXattrs(RclConfig *cfg, const map<string, string>& xfields,
|
||||
@ -93,6 +94,7 @@ void docFieldsFromXattrs(RclConfig *cfg, const map<string, string>& xfields,
|
||||
void reapMetaCmds(RclConfig* cfg, const string& path,
|
||||
map<string, string>& cfields)
|
||||
{
|
||||
#ifndef _WIN32
|
||||
const vector<MDReaper>& reapers = cfg->getMDReapers();
|
||||
if (reapers.empty())
|
||||
return;
|
||||
@ -111,6 +113,7 @@ void reapMetaCmds(RclConfig* cfg, const string& path,
|
||||
cfields[rp->fieldname] = output;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
// Set fields from external commands
|
||||
|
||||
@ -169,7 +169,7 @@ void FileInterner::init(const string &f, const struct stat *stp, RclConfig *cnf,
|
||||
l_mime = *imime;
|
||||
}
|
||||
|
||||
size_t docsize = stp->st_size;
|
||||
off_t docsize = stp->st_size;
|
||||
|
||||
if (!l_mime.empty()) {
|
||||
// Has mime: check for a compressed file. If so, create a
|
||||
@ -216,7 +216,7 @@ void FileInterner::init(const string &f, const struct stat *stp, RclConfig *cnf,
|
||||
m_mimetype = l_mime;
|
||||
RecollFilter *df = getMimeHandler(l_mime, m_cfg, !m_forPreview);
|
||||
|
||||
if (!df or df->is_unknown()) {
|
||||
if (!df || df->is_unknown()) {
|
||||
// No real handler for this type, for now :(
|
||||
LOGDEB(("FileInterner:: unprocessed mime: [%s] [%s]\n",
|
||||
l_mime.c_str(), f.c_str()));
|
||||
@ -629,7 +629,7 @@ void FileInterner::popHandler()
|
||||
{
|
||||
if (m_handlers.empty())
|
||||
return;
|
||||
int i = m_handlers.size() - 1;
|
||||
size_t i = m_handlers.size() - 1;
|
||||
if (m_tmpflgs[i]) {
|
||||
m_tempfiles.pop_back();
|
||||
m_tmpflgs[i] = false;
|
||||
|
||||
@ -17,10 +17,10 @@
|
||||
#include "autoconfig.h"
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/wait.h>
|
||||
#include <time.h>
|
||||
#include "safesyswait.h"
|
||||
|
||||
#include <list>
|
||||
using namespace std;
|
||||
|
||||
#include "cstr.h"
|
||||
#include "execmd.h"
|
||||
@ -32,6 +32,8 @@ using namespace std;
|
||||
#include "md5ut.h"
|
||||
#include "rclconfig.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
// This is called periodically by ExeCmd when it is waiting for data,
|
||||
// or when it does receive some. We may choose to interrupt the
|
||||
// command.
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2005 J.F.Dockes
|
||||
/* Copyright (C) 2005 J.F.Dockes
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
@ -32,7 +32,7 @@ using namespace std;
|
||||
#include "idfile.h"
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/wait.h>
|
||||
#include "safesyswait.h"
|
||||
|
||||
bool MimeHandlerExecMultiple::startCmd()
|
||||
{
|
||||
@ -90,6 +90,9 @@ bool MimeHandlerExecMultiple::readDataElement(string& name, string &data)
|
||||
LOGERR(("MHExecMultiple: getline error\n"));
|
||||
return false;
|
||||
}
|
||||
|
||||
LOGDEB1(("MHEM:rde: line [%s]\n", ibuf.c_str()));
|
||||
|
||||
// Empty line (end of message) ?
|
||||
if (!ibuf.compare("\n")) {
|
||||
LOGDEB(("MHExecMultiple: Got empty line\n"));
|
||||
@ -163,7 +166,7 @@ bool MimeHandlerExecMultiple::next_document()
|
||||
return false;
|
||||
}
|
||||
|
||||
if (m_cmd.getChildPid() < 0 && !startCmd()) {
|
||||
if (m_cmd.getChildPid() <= 0 && !startCmd()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@ -14,13 +14,15 @@
|
||||
* Free Software Foundation, Inc.,
|
||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
#include "autoconfig.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
#include <unistd.h>
|
||||
#include "safeunistd.h"
|
||||
#include <time.h>
|
||||
#include <cstdlib>
|
||||
#include "safesysstat.h"
|
||||
|
||||
#include <map>
|
||||
#include <sstream>
|
||||
|
||||
@ -22,7 +22,11 @@
|
||||
#include <sys/types.h>
|
||||
#include "safesysstat.h"
|
||||
#include <time.h>
|
||||
#ifndef _WIN32
|
||||
#include <regex.h>
|
||||
#else
|
||||
#include <regex>
|
||||
#endif
|
||||
|
||||
#include <cstring>
|
||||
#include <map>
|
||||
@ -70,6 +74,14 @@ static PTMutexInit o_mcache_mutex;
|
||||
* offsets for all message "From_" lines follow. The format is purely
|
||||
* binary, values are not even byte-swapped to be proc-idependant.
|
||||
*/
|
||||
|
||||
#ifdef _WIN32
|
||||
// vc++ does not let define an array of size o_b1size because non-const??
|
||||
#define M_o_b1size 1024
|
||||
#else
|
||||
#define M_o_b1size o_b1size
|
||||
#endif
|
||||
|
||||
class MboxCache {
|
||||
public:
|
||||
typedef MimeHandlerMbox::mbhoff_type mbhoff_type;
|
||||
@ -98,7 +110,7 @@ public:
|
||||
}
|
||||
FpKeeper keeper(&fp);
|
||||
|
||||
char blk1[o_b1size];
|
||||
char blk1[M_o_b1size];
|
||||
if (fread(blk1, 1, o_b1size, fp) != o_b1size) {
|
||||
LOGDEB0(("MboxCache::get_offsets: read blk1 errno %d\n", errno));
|
||||
return -1;
|
||||
@ -116,7 +128,7 @@ public:
|
||||
return -1;
|
||||
}
|
||||
mbhoff_type offset = -1;
|
||||
int ret;
|
||||
size_t ret;
|
||||
if ((ret = fread(&offset, 1, sizeof(mbhoff_type), fp))
|
||||
!= sizeof(mbhoff_type)) {
|
||||
LOGDEB0(("MboxCache::get_offsets: read ret %d errno %d\n",
|
||||
@ -184,7 +196,7 @@ public:
|
||||
m_dir = "mboxcache";
|
||||
m_dir = path_tildexpand(m_dir);
|
||||
// If not an absolute path, compute relative to config dir
|
||||
if (m_dir.at(0) != '/')
|
||||
if (!path_isabsolute(m_dir))
|
||||
m_dir = path_cat(config->getConfDir(), m_dir);
|
||||
m_ok = true;
|
||||
}
|
||||
@ -226,7 +238,6 @@ private:
|
||||
};
|
||||
|
||||
const size_t MboxCache::o_b1size = 1024;
|
||||
|
||||
static class MboxCache o_mcache;
|
||||
|
||||
static const string cstr_keyquirks("mhmboxquirks");
|
||||
@ -307,7 +318,7 @@ bool MimeHandlerMbox::set_document_file(const string& mt, const string &fn)
|
||||
typedef char line_type[LL+10];
|
||||
static inline void stripendnl(line_type& line, int& ll)
|
||||
{
|
||||
ll = strlen(line);
|
||||
ll = int(strlen(line));
|
||||
while (ll > 0) {
|
||||
if (line[ll-1] == '\n' || line[ll-1] == '\r') {
|
||||
line[ll-1] = 0;
|
||||
@ -376,9 +387,20 @@ static const char *frompat =
|
||||
// exactly like: From ^M (From followed by space and eol). We only
|
||||
// test for this if QUIRKS_TBIRD is set
|
||||
static const char *miniTbirdFrom = "^From $";
|
||||
|
||||
#ifndef _WIN32
|
||||
static regex_t fromregex;
|
||||
static regex_t minifromregex;
|
||||
#define M_regexec(A,B,C,D,E) regexec(&(A),B,C,D,E)
|
||||
#else
|
||||
basic_regex<char> fromregex;
|
||||
basic_regex<char> minifromregex;
|
||||
#define REG_ICASE std::regex_constants::icase
|
||||
#define REG_NOSUB std::regex_constants::nosubs
|
||||
#define REG_EXTENDED std::regex_constants::extended
|
||||
#define M_regexec(A, B, C, D, E) regex_match(B,A)
|
||||
|
||||
#endif
|
||||
|
||||
static bool regcompiled;
|
||||
static PTMutexInit o_regex_mutex;
|
||||
|
||||
@ -390,8 +412,13 @@ static void compileregexes()
|
||||
// that we are alone.
|
||||
if (regcompiled)
|
||||
return;
|
||||
#ifndef _WIN32
|
||||
regcomp(&fromregex, frompat, REG_NOSUB|REG_EXTENDED);
|
||||
regcomp(&minifromregex, miniTbirdFrom, REG_NOSUB|REG_EXTENDED);
|
||||
#else
|
||||
fromregex = basic_regex<char>(frompat, REG_NOSUB | REG_EXTENDED);
|
||||
minifromregex = basic_regex<char>(miniTbirdFrom, REG_NOSUB | REG_EXTENDED);
|
||||
#endif
|
||||
regcompiled = true;
|
||||
}
|
||||
|
||||
@ -440,9 +467,9 @@ bool MimeHandlerMbox::next_document()
|
||||
(off = o_mcache.get_offset(m_config, m_udi, mtarg)) >= 0 &&
|
||||
fseeko(fp, (off_t)off, SEEK_SET) >= 0 &&
|
||||
fgets(line, LL, fp) &&
|
||||
(!regexec(&fromregex, line, 0, 0, 0) ||
|
||||
(!M_regexec(fromregex, line, 0, 0, 0) ||
|
||||
((m_quirks & MBOXQUIRK_TBIRD) &&
|
||||
!regexec(&minifromregex, line, 0, 0, 0))) ) {
|
||||
!M_regexec(minifromregex, line, 0, 0, 0))) ) {
|
||||
LOGDEB0(("MimeHandlerMbox: Cache: From_ Ok\n"));
|
||||
fseeko(fp, (off_t)off, SEEK_SET);
|
||||
m_msgnum = mtarg -1;
|
||||
@ -485,9 +512,9 @@ bool MimeHandlerMbox::next_document()
|
||||
/* The 'F' compare is redundant but it improves performance
|
||||
A LOT */
|
||||
if (line[0] == 'F' && (
|
||||
!regexec(&fromregex, line, 0, 0, 0) ||
|
||||
!M_regexec(fromregex, line, 0, 0, 0) ||
|
||||
((m_quirks & MBOXQUIRK_TBIRD) &&
|
||||
!regexec(&minifromregex, line, 0, 0, 0)))
|
||||
!M_regexec(minifromregex, line, 0, 0, 0)))
|
||||
) {
|
||||
LOGDEB1(("MimeHandlerMbox: msgnum %d, "
|
||||
"From_ at line %d: [%s]\n", m_msgnum, m_lineno, line));
|
||||
|
||||
59
src/internfile/mh_null.h
Normal file
59
src/internfile/mh_null.h
Normal file
@ -0,0 +1,59 @@
|
||||
/* Copyright (C) 2004 J.F.Dockes
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc.,
|
||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
#ifndef _MH_NULL_H_INCLUDED_
|
||||
#define _MH_NULL_H_INCLUDED_
|
||||
|
||||
// It may make sense in some cases to set this null filter (no output)
|
||||
// instead of using recoll_noindex or leaving the default filter in
|
||||
// case one doesn't want to install it: this will avoid endless retries
|
||||
// to reindex the affected files, as recoll will think it has succeeded
|
||||
// indexing them. Downside: the files won't be indexed when one
|
||||
// actually installs the real filter, will need a -z
|
||||
// Actually used for empty files
|
||||
// Associated to application/x-zerosize, so use
|
||||
// <mimetype> = internal application/x-zerosize
|
||||
// in mimeconf
|
||||
#include <string>
|
||||
#include "cstr.h"
|
||||
#include "mimehandler.h"
|
||||
|
||||
class MimeHandlerNull : public RecollFilter {
|
||||
public:
|
||||
MimeHandlerNull(RclConfig *cnf, const std::string& id)
|
||||
: RecollFilter(cnf, id)
|
||||
{
|
||||
}
|
||||
virtual ~MimeHandlerNull()
|
||||
{
|
||||
}
|
||||
virtual bool set_document_file(const string& mt, const string& fn)
|
||||
{
|
||||
RecollFilter::set_document_file(mt, fn);
|
||||
return m_havedoc = true;
|
||||
}
|
||||
virtual bool next_document()
|
||||
{
|
||||
if (m_havedoc == false)
|
||||
return false;
|
||||
m_havedoc = false;
|
||||
m_metaData[cstr_dj_keycontent] = cstr_null;
|
||||
m_metaData[cstr_dj_keymt] = cstr_textplain;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
#endif /* _MH_NULL_H_INCLUDED_ */
|
||||
@ -18,7 +18,7 @@
|
||||
#define _MH_SYMLINK_H_INCLUDED_
|
||||
|
||||
#include <string>
|
||||
#include <unistd.h>
|
||||
#include "safeunistd.h"
|
||||
#include <errno.h>
|
||||
|
||||
#include "cstr.h"
|
||||
|
||||
@ -18,10 +18,13 @@
|
||||
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
#include "safefcntl.h"
|
||||
#include <sys/types.h>
|
||||
#include "safesysstat.h"
|
||||
#include "safeunistd.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
using namespace std;
|
||||
|
||||
#include "cstr.h"
|
||||
#include "mh_text.h"
|
||||
@ -32,16 +35,23 @@ using namespace std;
|
||||
#include "pxattr.h"
|
||||
#include "pathut.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
const int MB = 1024*1024;
|
||||
const int KB = 1024;
|
||||
|
||||
// Process a plain text file
|
||||
bool MimeHandlerText::set_document_file(const string& mt, const string &fn)
|
||||
{
|
||||
LOGDEB(("MimeHandlerText::set_document_file: [%s]\n", fn.c_str()));
|
||||
LOGDEB(("MimeHandlerText::set_document_file: [%s] offs %lld\n",
|
||||
fn.c_str(), m_offs));
|
||||
|
||||
RecollFilter::set_document_file(mt, fn);
|
||||
|
||||
m_fn = fn;
|
||||
// This should not be necessary, but it happens on msw that offset is large
|
||||
// negative at this point, could not find the reason (still trying).
|
||||
m_offs = 0;
|
||||
|
||||
// file size for oversize check
|
||||
long long fsize = path_filesize(m_fn);
|
||||
@ -51,9 +61,11 @@ bool MimeHandlerText::set_document_file(const string& mt, const string &fn)
|
||||
return false;
|
||||
}
|
||||
|
||||
#ifndef _WIN32
|
||||
// Check for charset defined in extended attribute as per:
|
||||
// http://freedesktop.org/wiki/CommonExtendedAttributes
|
||||
pxattr::get(m_fn, "charset", &m_charsetfromxattr);
|
||||
#endif
|
||||
|
||||
// Max file size parameter: texts over this size are not indexed
|
||||
int maxmbs = 20;
|
||||
|
||||
@ -19,7 +19,6 @@
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <string>
|
||||
using std::string;
|
||||
|
||||
#include "mimehandler.h"
|
||||
|
||||
@ -30,22 +29,22 @@ using std::string;
|
||||
*/
|
||||
class MimeHandlerText : public RecollFilter {
|
||||
public:
|
||||
MimeHandlerText(RclConfig *cnf, const string& id)
|
||||
: RecollFilter(cnf, id), m_paging(false), m_offs(0)
|
||||
MimeHandlerText(RclConfig *cnf, const std::string& id)
|
||||
: RecollFilter(cnf, id), m_paging(false), m_offs(0), m_pagesz(0)
|
||||
{
|
||||
}
|
||||
virtual ~MimeHandlerText()
|
||||
{
|
||||
}
|
||||
virtual bool set_document_file(const string& mt, const string &file_path);
|
||||
virtual bool set_document_string(const string&, const string&);
|
||||
virtual bool set_document_file(const std::string& mt, const std::string &file_path);
|
||||
virtual bool set_document_string(const std::string&, const std::string&);
|
||||
virtual bool is_data_input_ok(DataInput input) const {
|
||||
if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
virtual bool next_document();
|
||||
virtual bool skip_to_document(const string& s);
|
||||
virtual bool skip_to_document(const std::string& s);
|
||||
virtual void clear()
|
||||
{
|
||||
m_paging = false;
|
||||
@ -56,11 +55,11 @@ class MimeHandlerText : public RecollFilter {
|
||||
}
|
||||
private:
|
||||
bool m_paging;
|
||||
string m_text;
|
||||
string m_fn;
|
||||
std::string m_text;
|
||||
std::string m_fn;
|
||||
off_t m_offs; // Offset of next read in file if we're paging
|
||||
size_t m_pagesz;
|
||||
string m_charsetfromxattr;
|
||||
std::string m_charsetfromxattr;
|
||||
|
||||
bool readnext();
|
||||
};
|
||||
|
||||
@ -31,7 +31,6 @@ using namespace std;
|
||||
#include "rclconfig.h"
|
||||
#include "smallut.h"
|
||||
#include "md5ut.h"
|
||||
|
||||
#include "mh_exec.h"
|
||||
#include "mh_execm.h"
|
||||
#include "mh_html.h"
|
||||
@ -40,6 +39,7 @@ using namespace std;
|
||||
#include "mh_text.h"
|
||||
#include "mh_symlink.h"
|
||||
#include "mh_unknown.h"
|
||||
#include "mh_null.h"
|
||||
#include "ptmutex.h"
|
||||
|
||||
// Performance help: we use a pool of already known and created
|
||||
@ -163,6 +163,10 @@ static RecollFilter *mhFactory(RclConfig *config, const string &mime,
|
||||
LOGDEB2(("mhFactory(%s): ret MimeHandlerSymlink\n", mime.c_str()));
|
||||
MD5String("MimeHandlerSymlink", id);
|
||||
return nobuild ? 0 : new MimeHandlerSymlink(config, id);
|
||||
} else if ("application/x-zerosize" == lmime) {
|
||||
LOGDEB(("mhFactory(%s): ret MimeHandlerNull\n", mime.c_str()));
|
||||
MD5String("MimeHandlerNull", id);
|
||||
return nobuild ? 0 : new MimeHandlerNull(config, id);
|
||||
} else if (lmime.find("text/") == 0) {
|
||||
// Try to handle unknown text/xx as text/plain. This
|
||||
// only happen if the text/xx was defined as "internal" in
|
||||
@ -206,7 +210,7 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs,
|
||||
}
|
||||
|
||||
// Split command name and args, and build exec object
|
||||
list<string> cmdtoks;
|
||||
vector<string> cmdtoks;
|
||||
stringToStrings(cmdstr, cmdtoks);
|
||||
if (cmdtoks.empty()) {
|
||||
LOGERR(("mhExecFactory: bad config line for [%s]: [%s]\n",
|
||||
@ -216,7 +220,22 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs,
|
||||
MimeHandlerExec *h = multiple ?
|
||||
new MimeHandlerExecMultiple(cfg, id) :
|
||||
new MimeHandlerExec(cfg, id);
|
||||
list<string>::iterator it = cmdtoks.begin();
|
||||
vector<string>::iterator it = cmdtoks.begin();
|
||||
|
||||
// Special-case python and perl on windows: we need to also locate the
|
||||
// first argument which is the script name "python somescript.py".
|
||||
// On Unix, thanks to #!, we usually just run "somescript.py", but need
|
||||
// the same change if we ever want to use the same cmdling as windows
|
||||
if (!stringlowercmp("python", *it) || !stringlowercmp("perl", *it)) {
|
||||
if (cmdtoks.size() < 2) {
|
||||
LOGERR(("mhExecFactory: python/perl cmd: no script?. [%s]: [%s]\n",
|
||||
mtype.c_str(), hs.c_str()));
|
||||
}
|
||||
vector<string>::iterator it1(it);
|
||||
it1++;
|
||||
*it1 = cfg->findFilter(*it1);
|
||||
}
|
||||
|
||||
h->params.push_back(cfg->findFilter(*it++));
|
||||
h->params.insert(h->params.end(), it, cmdtoks.end());
|
||||
|
||||
|
||||
@ -86,12 +86,12 @@ public:
|
||||
return false;
|
||||
}
|
||||
virtual bool set_document_data(const std::string& mtype,
|
||||
const char *cp, unsigned int sz)
|
||||
const char *cp, size_t sz)
|
||||
{
|
||||
return set_document_string(mtype, std::string(cp, sz));
|
||||
}
|
||||
|
||||
virtual void set_docsize(size_t size)
|
||||
virtual void set_docsize(off_t size)
|
||||
{
|
||||
char csize[30];
|
||||
sprintf(csize, "%lld", (long long)size);
|
||||
|
||||
@ -23,6 +23,10 @@
|
||||
* -----END-LICENCE-----
|
||||
*/
|
||||
#include <time.h>
|
||||
#ifdef _WIN32
|
||||
// Local implementation in windows directory
|
||||
#include "strptime.h"
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
|
||||
@ -108,8 +108,12 @@ bool Uncomp::uncompressfile(const string& ifn,
|
||||
}
|
||||
|
||||
// Execute command and retrieve output file name, check that it exists
|
||||
#ifndef _WIN32
|
||||
ExecCmd ex;
|
||||
int status = ex.doexec(cmd, args, 0, &tfile);
|
||||
#else
|
||||
int status = -1;
|
||||
#endif
|
||||
if (status || tfile.empty()) {
|
||||
LOGERR(("uncompressfile: doexec: failed for [%s] status 0x%x\n",
|
||||
ifn.c_str(), status));
|
||||
|
||||
@ -14,7 +14,6 @@
|
||||
* Free Software Foundation, Inc.,
|
||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
#include <unistd.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdio>
|
||||
|
||||
@ -14,11 +14,8 @@
|
||||
* Free Software Foundation, Inc.,
|
||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
|
||||
|
||||
#include "autoconfig.h"
|
||||
|
||||
#include <unistd.h>
|
||||
#include <cstdlib>
|
||||
|
||||
#include <qapplication.h>
|
||||
@ -164,14 +161,6 @@ static void recollCleanup()
|
||||
LOGDEB2(("recollCleanup: done\n"));
|
||||
}
|
||||
|
||||
static void sigcleanup(int)
|
||||
{
|
||||
// We used to not call exit from here, because of the idxthread, but
|
||||
// this is now gone, so...
|
||||
recollNeedsExit = 1;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
void applyStyleSheet(const QString& ssfname)
|
||||
{
|
||||
const char *cfname = (const char *)ssfname.toLocal8Bit();
|
||||
@ -305,7 +294,7 @@ int main(int argc, char **argv)
|
||||
|
||||
|
||||
string reason;
|
||||
theconfig = recollinit(recollCleanup, sigcleanup, reason, &a_config);
|
||||
theconfig = recollinit(recollCleanup, 0, reason, &a_config);
|
||||
if (!theconfig || !theconfig->ok()) {
|
||||
QString msg = "Configuration problem: ";
|
||||
msg += QString::fromUtf8(reason.c_str());
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -16,6 +16,8 @@
|
||||
*/
|
||||
#include "autoconfig.h"
|
||||
|
||||
#include <signal.h>
|
||||
|
||||
#include <QMessageBox>
|
||||
#include <QTimer>
|
||||
|
||||
|
||||
@ -310,7 +310,7 @@ void RclMain::startNativeViewer(Rcl::Doc doc, int pagenum, QString term)
|
||||
istempfile = true;
|
||||
rememberTempFile(temp);
|
||||
fn = temp->filename();
|
||||
url = string("file://") + fn;
|
||||
url = path_pathtofileurl(fn);
|
||||
}
|
||||
|
||||
// If using an actual file, check that it exists, and if it is
|
||||
@ -335,7 +335,7 @@ void RclMain::startNativeViewer(Rcl::Doc doc, int pagenum, QString term)
|
||||
if (temp) {
|
||||
rememberTempFile(temp);
|
||||
fn = temp->filename();
|
||||
url = string("file://") + fn;
|
||||
url = path_pathtofileurl(fn);
|
||||
}
|
||||
}
|
||||
|
||||
@ -430,16 +430,16 @@ void RclMain::startManual()
|
||||
void RclMain::startManual(const string& index)
|
||||
{
|
||||
Rcl::Doc doc;
|
||||
doc.url = "file://";
|
||||
doc.url = path_cat(doc.url, theconfig->getDatadir());
|
||||
doc.url = path_cat(doc.url, "doc");
|
||||
doc.url = path_cat(doc.url, "usermanual.html");
|
||||
string path = theconfig->getDatadir();
|
||||
path = path_cat(path, "doc");
|
||||
path = path_cat(path, "usermanual.html");
|
||||
LOGDEB(("RclMain::startManual: help index is %s\n",
|
||||
index.empty()?"(null)":index.c_str()));
|
||||
if (!index.empty()) {
|
||||
doc.url += "#";
|
||||
doc.url += index;
|
||||
path += "#";
|
||||
path += index;
|
||||
}
|
||||
doc.url = path_pathtofileurl(path);
|
||||
doc.mimetype = "text/html";
|
||||
startNativeViewer(doc);
|
||||
}
|
||||
|
||||
@ -16,9 +16,6 @@
|
||||
*/
|
||||
#include "autoconfig.h"
|
||||
|
||||
#include <fcntl.h>
|
||||
#include "safeunistd.h"
|
||||
|
||||
#include <utility>
|
||||
#include MEMORY_INCLUDE
|
||||
|
||||
@ -138,10 +135,14 @@ void RclMain::init()
|
||||
|
||||
// idxstatus file. Make sure it exists before trying to watch it
|
||||
// (case where we're started on an older index, or if the status
|
||||
// file was deleted since indexing
|
||||
::close(::open(theconfig->getIdxStatusFile().c_str(), O_CREAT, 0600));
|
||||
m_watcher.addPath(QString::fromLocal8Bit(
|
||||
theconfig->getIdxStatusFile().c_str()));
|
||||
// file was deleted since indexing)
|
||||
QString idxfn =
|
||||
QString::fromLocal8Bit(theconfig->getIdxStatusFile().c_str());
|
||||
QFile qf(idxfn);
|
||||
qf.open(QIODevice::ReadWrite);
|
||||
qf.setPermissions(QFile::ReadOwner|QFile::WriteOwner);
|
||||
qf.close();
|
||||
m_watcher.addPath(idxfn);
|
||||
// At least some versions of qt4 don't display the status bar if
|
||||
// it's not created here.
|
||||
(void)statusBar();
|
||||
@ -728,7 +729,7 @@ void RclMain::initiateQuery()
|
||||
qApp->processEvents();
|
||||
if (progress.wasCanceled()) {
|
||||
// Just get out of there asap.
|
||||
_exit(1);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
qApp->processEvents();
|
||||
|
||||
@ -19,6 +19,7 @@
|
||||
#include <stdio.h>
|
||||
#include "safesysstat.h"
|
||||
#include "safeunistd.h"
|
||||
#include <signal.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
|
||||
@ -16,7 +16,6 @@
|
||||
*/
|
||||
#include "autoconfig.h"
|
||||
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
@ -17,7 +17,6 @@
|
||||
#include "autoconfig.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <list>
|
||||
|
||||
@ -14,6 +14,8 @@
|
||||
* Free Software Foundation, Inc.,
|
||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
#include "autoconfig.h"
|
||||
|
||||
#include "docseq.h"
|
||||
#include "filtseq.h"
|
||||
#include "sortseq.h"
|
||||
|
||||
@ -14,17 +14,20 @@
|
||||
* Free Software Foundation, Inc.,
|
||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
#include "autoconfig.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <time.h>
|
||||
|
||||
#include <list>
|
||||
using std::list;
|
||||
|
||||
#include "docseqdb.h"
|
||||
#include "rcldb.h"
|
||||
#include "debuglog.h"
|
||||
#include "wasatorcl.h"
|
||||
|
||||
using std::list;
|
||||
|
||||
DocSequenceDb::DocSequenceDb(STD_SHARED_PTR<Rcl::Query> q, const string &t,
|
||||
STD_SHARED_PTR<Rcl::SearchData> sdata)
|
||||
: DocSequence(t), m_q(q), m_sdata(sdata), m_fsdata(sdata),
|
||||
|
||||
@ -35,7 +35,7 @@ using std::list;
|
||||
bool RclDHistoryEntry::encode(string& value)
|
||||
{
|
||||
char chartime[30];
|
||||
sprintf(chartime,"%ld", unixtime);
|
||||
sprintf(chartime,"%lld", (long long)unixtime);
|
||||
string budi;
|
||||
base64_encode(udi, budi);
|
||||
value = string("U ") + string(chartime) + " " + budi;
|
||||
@ -161,5 +161,5 @@ int DocSequenceHistory::getResCnt()
|
||||
{
|
||||
if (m_hlist.empty())
|
||||
m_hlist = getDocHistory(m_hist);
|
||||
return m_hlist.size();
|
||||
return int(m_hlist.size());
|
||||
}
|
||||
|
||||
@ -16,6 +16,7 @@
|
||||
*/
|
||||
#ifndef _DOCSEQHIST_H_INCLUDED_
|
||||
#define _DOCSEQHIST_H_INCLUDED_
|
||||
#include <time.h>
|
||||
|
||||
#include "docseq.h"
|
||||
#include "dynconf.h"
|
||||
@ -28,13 +29,13 @@ namespace Rcl {
|
||||
class RclDHistoryEntry : public DynConfEntry {
|
||||
public:
|
||||
RclDHistoryEntry() : unixtime(0) {}
|
||||
RclDHistoryEntry(long t, const string& u)
|
||||
RclDHistoryEntry(time_t t, const string& u)
|
||||
: unixtime(t), udi(u) {}
|
||||
virtual ~RclDHistoryEntry() {}
|
||||
virtual bool decode(const string &value);
|
||||
virtual bool encode(string& value);
|
||||
virtual bool equal(const DynConfEntry& other);
|
||||
long unixtime;
|
||||
time_t unixtime;
|
||||
string udi;
|
||||
};
|
||||
|
||||
@ -57,7 +58,7 @@ private:
|
||||
Rcl::Db *m_db;
|
||||
RclDynConf *m_hist;
|
||||
int m_prevnum;
|
||||
long m_prevtime;
|
||||
time_t m_prevtime;
|
||||
std::string m_description; // This is just an nls translated 'doc history'
|
||||
std::list<RclDHistoryEntry> m_hlist;
|
||||
std::list<RclDHistoryEntry>::const_iterator m_it;
|
||||
|
||||
187
src/query/location.hh
Normal file
187
src/query/location.hh
Normal file
@ -0,0 +1,187 @@
|
||||
// A Bison parser, made by GNU Bison 3.0.2.
|
||||
|
||||
// Locations for Bison parsers in C++
|
||||
|
||||
// Copyright (C) 2002-2013 Free Software Foundation, Inc.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
// As a special exception, you may create a larger work that contains
|
||||
// part or all of the Bison parser skeleton and distribute that work
|
||||
// under terms of your choice, so long as that work isn't itself a
|
||||
// parser generator using the skeleton or a modified version thereof
|
||||
// as a parser skeleton. Alternatively, if you modify or redistribute
|
||||
// the parser skeleton itself, you may (at your option) remove this
|
||||
// special exception, which will cause the skeleton and the resulting
|
||||
// Bison output files to be licensed under the GNU General Public
|
||||
// License without this special exception.
|
||||
|
||||
// This special exception was added by the Free Software Foundation in
|
||||
// version 2.2 of Bison.
|
||||
|
||||
/**
|
||||
** \file location.hh
|
||||
** Define the yy::location class.
|
||||
*/
|
||||
|
||||
#ifndef YY_YY_LOCATION_HH_INCLUDED
|
||||
# define YY_YY_LOCATION_HH_INCLUDED
|
||||
|
||||
# include "position.hh"
|
||||
|
||||
|
||||
namespace yy {
|
||||
#line 46 "location.hh" // location.cc:291
|
||||
/// Abstract a location.
|
||||
class location
|
||||
{
|
||||
public:
|
||||
|
||||
/// Construct a location from \a b to \a e.
|
||||
location (const position& b, const position& e)
|
||||
: begin (b)
|
||||
, end (e)
|
||||
{
|
||||
}
|
||||
|
||||
/// Construct a 0-width location in \a p.
|
||||
explicit location (const position& p = position ())
|
||||
: begin (p)
|
||||
, end (p)
|
||||
{
|
||||
}
|
||||
|
||||
/// Construct a 0-width location in \a f, \a l, \a c.
|
||||
explicit location (std::string* f,
|
||||
unsigned int l = 1u,
|
||||
unsigned int c = 1u)
|
||||
: begin (f, l, c)
|
||||
, end (f, l, c)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
/// Initialization.
|
||||
void initialize (std::string* f = YY_NULLPTR,
|
||||
unsigned int l = 1u,
|
||||
unsigned int c = 1u)
|
||||
{
|
||||
begin.initialize (f, l, c);
|
||||
end = begin;
|
||||
}
|
||||
|
||||
/** \name Line and Column related manipulators
|
||||
** \{ */
|
||||
public:
|
||||
/// Reset initial location to final location.
|
||||
void step ()
|
||||
{
|
||||
begin = end;
|
||||
}
|
||||
|
||||
/// Extend the current location to the COUNT next columns.
|
||||
void columns (int count = 1)
|
||||
{
|
||||
end += count;
|
||||
}
|
||||
|
||||
/// Extend the current location to the COUNT next lines.
|
||||
void lines (int count = 1)
|
||||
{
|
||||
end.lines (count);
|
||||
}
|
||||
/** \} */
|
||||
|
||||
|
||||
public:
|
||||
/// Beginning of the located region.
|
||||
position begin;
|
||||
/// End of the located region.
|
||||
position end;
|
||||
};
|
||||
|
||||
/// Join two location objects to create a location.
|
||||
inline location operator+ (location res, const location& end)
|
||||
{
|
||||
res.end = end.end;
|
||||
return res;
|
||||
}
|
||||
|
||||
/// Change end position in place.
|
||||
inline location& operator+= (location& res, int width)
|
||||
{
|
||||
res.columns (width);
|
||||
return res;
|
||||
}
|
||||
|
||||
/// Change end position.
|
||||
inline location operator+ (location res, int width)
|
||||
{
|
||||
return res += width;
|
||||
}
|
||||
|
||||
/// Change end position in place.
|
||||
inline location& operator-= (location& res, int width)
|
||||
{
|
||||
return res += -width;
|
||||
}
|
||||
|
||||
/// Change end position.
|
||||
inline location operator- (const location& begin, int width)
|
||||
{
|
||||
return begin + -width;
|
||||
}
|
||||
|
||||
/// Compare two location objects.
|
||||
inline bool
|
||||
operator== (const location& loc1, const location& loc2)
|
||||
{
|
||||
return loc1.begin == loc2.begin && loc1.end == loc2.end;
|
||||
}
|
||||
|
||||
/// Compare two location objects.
|
||||
inline bool
|
||||
operator!= (const location& loc1, const location& loc2)
|
||||
{
|
||||
return !(loc1 == loc2);
|
||||
}
|
||||
|
||||
/** \brief Intercept output stream redirection.
|
||||
** \param ostr the destination output stream
|
||||
** \param loc a reference to the location to redirect
|
||||
**
|
||||
** Avoid duplicate information.
|
||||
*/
|
||||
template <typename YYChar>
|
||||
inline std::basic_ostream<YYChar>&
|
||||
operator<< (std::basic_ostream<YYChar>& ostr, const location& loc)
|
||||
{
|
||||
unsigned int end_col = 0 < loc.end.column ? loc.end.column - 1 : 0;
|
||||
ostr << loc.begin// << "(" << loc.end << ") "
|
||||
;
|
||||
if (loc.end.filename
|
||||
&& (!loc.begin.filename
|
||||
|| *loc.begin.filename != *loc.end.filename))
|
||||
ostr << '-' << loc.end.filename << ':' << loc.end.line << '.' << end_col;
|
||||
else if (loc.begin.line < loc.end.line)
|
||||
ostr << '-' << loc.end.line << '.' << end_col;
|
||||
else if (loc.begin.column < end_col)
|
||||
ostr << '-' << end_col;
|
||||
return ostr;
|
||||
}
|
||||
|
||||
|
||||
} // yy
|
||||
#line 187 "location.hh" // location.cc:291
|
||||
#endif // !YY_YY_LOCATION_HH_INCLUDED
|
||||
@ -15,7 +15,7 @@
|
||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
|
||||
|
||||
#include <limits.h>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <list>
|
||||
@ -54,8 +54,8 @@ struct MatchEntry {
|
||||
pair<int, int> offs;
|
||||
// Index of the search group this comes from: this is to relate a
|
||||
// match to the original user input.
|
||||
unsigned int grpidx;
|
||||
MatchEntry(int sta, int sto, unsigned int idx)
|
||||
size_t grpidx;
|
||||
MatchEntry(int sta, int sto, size_t idx)
|
||||
: offs(sta, sto), grpidx(idx)
|
||||
{
|
||||
}
|
||||
@ -105,7 +105,7 @@ class TextSplitPTR : public TextSplit {
|
||||
// pos, bts, bte));
|
||||
|
||||
// If this word is a search term, remember its byte-offset span.
|
||||
map<string, unsigned int>::const_iterator it = m_terms.find(dumb);
|
||||
map<string, size_t>::const_iterator it = m_terms.find(dumb);
|
||||
if (it != m_terms.end()) {
|
||||
tboffs.push_back(MatchEntry(bts, bte, (*it).second));
|
||||
}
|
||||
@ -135,7 +135,7 @@ private:
|
||||
int m_wcount;
|
||||
|
||||
// In: user query terms
|
||||
map<string, unsigned int> m_terms;
|
||||
map<string, size_t> m_terms;
|
||||
|
||||
// m_gterms holds all the terms in m_groups, as a set for quick lookup
|
||||
set<string> m_gterms;
|
||||
@ -214,7 +214,7 @@ static bool do_proximity_test(int window, vector<vector<int>* >& plists,
|
||||
bool TextSplitPTR::matchGroup(unsigned int grpidx)
|
||||
{
|
||||
const vector<string>& terms = m_hdata.groups[grpidx];
|
||||
int window = m_hdata.groups[grpidx].size() + m_hdata.slacks[grpidx];
|
||||
int window = int(m_hdata.groups[grpidx].size() + m_hdata.slacks[grpidx]);
|
||||
|
||||
LOGDEB1(("TextSplitPTR::matchGroup:d %d: %s\n", window,
|
||||
vecStringToString(terms).c_str()));
|
||||
@ -270,7 +270,7 @@ bool TextSplitPTR::matchGroup(unsigned int grpidx)
|
||||
for (vector<int>::iterator it = plists[0]->begin();
|
||||
it != plists[0]->end(); it++) {
|
||||
int pos = *it;
|
||||
int sta = int(10E9), sto = 0;
|
||||
int sta = INT_MAX, sto = 0;
|
||||
LOGDEB2(("MatchGroup: Testing at pos %d\n", pos));
|
||||
if (do_proximity_test(window,plists, 1, pos, pos, &sta, &sto, minpos)) {
|
||||
LOGDEB1(("TextSplitPTR::matchGroup: MATCH termpos [%d,%d]\n",
|
||||
@ -417,10 +417,10 @@ bool PlainToRich::plaintorich(const string& in,
|
||||
// If we still have terms positions, check (byte) position. If
|
||||
// we are at or after a term match, mark.
|
||||
if (tPosIt != tPosEnd) {
|
||||
int ibyteidx = chariter.getBpos();
|
||||
int ibyteidx = int(chariter.getBpos());
|
||||
if (ibyteidx == tPosIt->offs.first) {
|
||||
if (!intag && ibyteidx >= (int)headend) {
|
||||
*olit += startMatch(tPosIt->grpidx);
|
||||
*olit += startMatch((unsigned int)(tPosIt->grpidx));
|
||||
}
|
||||
inrcltag = 1;
|
||||
} else if (ibyteidx == tPosIt->offs.second) {
|
||||
|
||||
180
src/query/position.hh
Normal file
180
src/query/position.hh
Normal file
@ -0,0 +1,180 @@
|
||||
// A Bison parser, made by GNU Bison 3.0.2.
|
||||
|
||||
// Positions for Bison parsers in C++
|
||||
|
||||
// Copyright (C) 2002-2013 Free Software Foundation, Inc.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
// As a special exception, you may create a larger work that contains
|
||||
// part or all of the Bison parser skeleton and distribute that work
|
||||
// under terms of your choice, so long as that work isn't itself a
|
||||
// parser generator using the skeleton or a modified version thereof
|
||||
// as a parser skeleton. Alternatively, if you modify or redistribute
|
||||
// the parser skeleton itself, you may (at your option) remove this
|
||||
// special exception, which will cause the skeleton and the resulting
|
||||
// Bison output files to be licensed under the GNU General Public
|
||||
// License without this special exception.
|
||||
|
||||
// This special exception was added by the Free Software Foundation in
|
||||
// version 2.2 of Bison.
|
||||
|
||||
/**
|
||||
** \file position.hh
|
||||
** Define the yy::position class.
|
||||
*/
|
||||
|
||||
#ifndef YY_YY_POSITION_HH_INCLUDED
|
||||
# define YY_YY_POSITION_HH_INCLUDED
|
||||
|
||||
# include <algorithm> // std::max
|
||||
# include <iostream>
|
||||
# include <string>
|
||||
|
||||
# ifndef YY_NULLPTR
|
||||
# if defined __cplusplus && 201103L <= __cplusplus
|
||||
# define YY_NULLPTR nullptr
|
||||
# else
|
||||
# define YY_NULLPTR 0
|
||||
# endif
|
||||
# endif
|
||||
|
||||
|
||||
namespace yy {
|
||||
#line 56 "position.hh" // location.cc:291
|
||||
/// Abstract a position.
|
||||
class position
|
||||
{
|
||||
public:
|
||||
/// Construct a position.
|
||||
explicit position (std::string* f = YY_NULLPTR,
|
||||
unsigned int l = 1u,
|
||||
unsigned int c = 1u)
|
||||
: filename (f)
|
||||
, line (l)
|
||||
, column (c)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
/// Initialization.
|
||||
void initialize (std::string* fn = YY_NULLPTR,
|
||||
unsigned int l = 1u,
|
||||
unsigned int c = 1u)
|
||||
{
|
||||
filename = fn;
|
||||
line = l;
|
||||
column = c;
|
||||
}
|
||||
|
||||
/** \name Line and Column related manipulators
|
||||
** \{ */
|
||||
/// (line related) Advance to the COUNT next lines.
|
||||
void lines (int count = 1)
|
||||
{
|
||||
if (count)
|
||||
{
|
||||
column = 1u;
|
||||
line = add_ (line, count, 1);
|
||||
}
|
||||
}
|
||||
|
||||
/// (column related) Advance to the COUNT next columns.
|
||||
void columns (int count = 1)
|
||||
{
|
||||
column = add_ (column, count, 1);
|
||||
}
|
||||
/** \} */
|
||||
|
||||
/// File name to which this position refers.
|
||||
std::string* filename;
|
||||
/// Current line number.
|
||||
unsigned int line;
|
||||
/// Current column number.
|
||||
unsigned int column;
|
||||
|
||||
private:
|
||||
/// Compute max(min, lhs+rhs) (provided min <= lhs).
|
||||
static unsigned int add_ (unsigned int lhs, int rhs, unsigned int min)
|
||||
{
|
||||
return (0 < rhs || -static_cast<unsigned int>(rhs) < lhs
|
||||
? rhs + lhs
|
||||
: min);
|
||||
}
|
||||
};
|
||||
|
||||
/// Add and assign a position.
|
||||
inline position&
|
||||
operator+= (position& res, int width)
|
||||
{
|
||||
res.columns (width);
|
||||
return res;
|
||||
}
|
||||
|
||||
/// Add two position objects.
|
||||
inline position
|
||||
operator+ (position res, int width)
|
||||
{
|
||||
return res += width;
|
||||
}
|
||||
|
||||
/// Add and assign a position.
|
||||
inline position&
|
||||
operator-= (position& res, int width)
|
||||
{
|
||||
return res += -width;
|
||||
}
|
||||
|
||||
/// Add two position objects.
|
||||
inline position
|
||||
operator- (position res, int width)
|
||||
{
|
||||
return res -= width;
|
||||
}
|
||||
|
||||
/// Compare two position objects.
|
||||
inline bool
|
||||
operator== (const position& pos1, const position& pos2)
|
||||
{
|
||||
return (pos1.line == pos2.line
|
||||
&& pos1.column == pos2.column
|
||||
&& (pos1.filename == pos2.filename
|
||||
|| (pos1.filename && pos2.filename
|
||||
&& *pos1.filename == *pos2.filename)));
|
||||
}
|
||||
|
||||
/// Compare two position objects.
|
||||
inline bool
|
||||
operator!= (const position& pos1, const position& pos2)
|
||||
{
|
||||
return !(pos1 == pos2);
|
||||
}
|
||||
|
||||
/** \brief Intercept output stream redirection.
|
||||
** \param ostr the destination output stream
|
||||
** \param pos a reference to the position to redirect
|
||||
*/
|
||||
template <typename YYChar>
|
||||
inline std::basic_ostream<YYChar>&
|
||||
operator<< (std::basic_ostream<YYChar>& ostr, const position& pos)
|
||||
{
|
||||
if (pos.filename)
|
||||
ostr << *pos.filename << ':';
|
||||
return ostr << pos.line << '.' << pos.column;
|
||||
}
|
||||
|
||||
|
||||
} // yy
|
||||
#line 180 "position.hh" // location.cc:291
|
||||
#endif // !YY_YY_POSITION_HH_INCLUDED
|
||||
@ -77,7 +77,7 @@ void ResListPager::resultPageNext()
|
||||
if (m_winfirst < 0) {
|
||||
m_winfirst = 0;
|
||||
} else {
|
||||
m_winfirst += m_respage.size();
|
||||
m_winfirst += int(m_respage.size());
|
||||
}
|
||||
// Get the next page of results. Note that we look ahead by one to
|
||||
// determine if there is actually a next page
|
||||
@ -102,7 +102,7 @@ void ResListPager::resultPageNext()
|
||||
// Next button. We'd need to remove the Next link from the page
|
||||
// too.
|
||||
// Restore the m_winfirst value, let the current result vector alone
|
||||
m_winfirst -= m_respage.size();
|
||||
m_winfirst -= int(m_respage.size());
|
||||
} else {
|
||||
// No results at all (on first page)
|
||||
m_winfirst = -1;
|
||||
@ -213,9 +213,9 @@ void ResListPager::displayDoc(RclConfig *config, int i, Rcl::Doc& doc,
|
||||
// Size information. We print both doc and file if they differ a lot
|
||||
off_t fsize = -1, dsize = -1;
|
||||
if (!doc.dbytes.empty())
|
||||
dsize = atoll(doc.dbytes.c_str());
|
||||
dsize = static_cast<off_t>(atoll(doc.dbytes.c_str()));
|
||||
if (!doc.fbytes.empty())
|
||||
fsize = atoll(doc.fbytes.c_str());
|
||||
fsize = static_cast<off_t>(atoll(doc.fbytes.c_str()));
|
||||
string sizebuf;
|
||||
if (dsize > 0) {
|
||||
sizebuf = displayableBytes(dsize);
|
||||
|
||||
@ -64,7 +64,7 @@ public:
|
||||
int pageLastDocNum() {
|
||||
if (m_winfirst < 0 || m_respage.size() == 0)
|
||||
return -1;
|
||||
return m_winfirst + m_respage.size() - 1;
|
||||
return m_winfirst + int(m_respage.size()) - 1;
|
||||
}
|
||||
virtual int pageSize() const {return m_pagesize;}
|
||||
void pageNext();
|
||||
|
||||
@ -39,7 +39,7 @@ class DocSeqSorted : public DocSeqModifier {
|
||||
virtual bool canSort() {return true;}
|
||||
virtual bool setSortSpec(const DocSeqSortSpec &sortspec);
|
||||
virtual bool getDoc(int num, Rcl::Doc &doc, string *sh = 0);
|
||||
virtual int getResCnt() {return m_docsp.size();}
|
||||
virtual int getResCnt() {return int(m_docsp.size());}
|
||||
private:
|
||||
DocSeqSortSpec m_spec;
|
||||
std::vector<Rcl::Doc> m_docs;
|
||||
|
||||
158
src/query/stack.hh
Normal file
158
src/query/stack.hh
Normal file
@ -0,0 +1,158 @@
|
||||
// A Bison parser, made by GNU Bison 3.0.2.
|
||||
|
||||
// Stack handling for Bison parsers in C++
|
||||
|
||||
// Copyright (C) 2002-2013 Free Software Foundation, Inc.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
// As a special exception, you may create a larger work that contains
|
||||
// part or all of the Bison parser skeleton and distribute that work
|
||||
// under terms of your choice, so long as that work isn't itself a
|
||||
// parser generator using the skeleton or a modified version thereof
|
||||
// as a parser skeleton. Alternatively, if you modify or redistribute
|
||||
// the parser skeleton itself, you may (at your option) remove this
|
||||
// special exception, which will cause the skeleton and the resulting
|
||||
// Bison output files to be licensed under the GNU General Public
|
||||
// License without this special exception.
|
||||
|
||||
// This special exception was added by the Free Software Foundation in
|
||||
// version 2.2 of Bison.
|
||||
|
||||
/**
|
||||
** \file stack.hh
|
||||
** Define the yy::stack class.
|
||||
*/
|
||||
|
||||
#ifndef YY_YY_STACK_HH_INCLUDED
|
||||
# define YY_YY_STACK_HH_INCLUDED
|
||||
|
||||
# include <vector>
|
||||
|
||||
|
||||
namespace yy {
|
||||
#line 46 "stack.hh" // stack.hh:133
|
||||
template <class T, class S = std::vector<T> >
|
||||
class stack
|
||||
{
|
||||
public:
|
||||
// Hide our reversed order.
|
||||
typedef typename S::reverse_iterator iterator;
|
||||
typedef typename S::const_reverse_iterator const_iterator;
|
||||
|
||||
stack ()
|
||||
: seq_ ()
|
||||
{
|
||||
}
|
||||
|
||||
stack (unsigned int n)
|
||||
: seq_ (n)
|
||||
{
|
||||
}
|
||||
|
||||
inline
|
||||
T&
|
||||
operator[] (unsigned int i)
|
||||
{
|
||||
return seq_[seq_.size () - 1 - i];
|
||||
}
|
||||
|
||||
inline
|
||||
const T&
|
||||
operator[] (unsigned int i) const
|
||||
{
|
||||
return seq_[seq_.size () - 1 - i];
|
||||
}
|
||||
|
||||
/// Steal the contents of \a t.
|
||||
///
|
||||
/// Close to move-semantics.
|
||||
inline
|
||||
void
|
||||
push (T& t)
|
||||
{
|
||||
seq_.push_back (T());
|
||||
operator[](0).move (t);
|
||||
}
|
||||
|
||||
inline
|
||||
void
|
||||
pop (unsigned int n = 1)
|
||||
{
|
||||
for (; n; --n)
|
||||
seq_.pop_back ();
|
||||
}
|
||||
|
||||
void
|
||||
clear ()
|
||||
{
|
||||
seq_.clear ();
|
||||
}
|
||||
|
||||
inline
|
||||
typename S::size_type
|
||||
size () const
|
||||
{
|
||||
return seq_.size ();
|
||||
}
|
||||
|
||||
inline
|
||||
const_iterator
|
||||
begin () const
|
||||
{
|
||||
return seq_.rbegin ();
|
||||
}
|
||||
|
||||
inline
|
||||
const_iterator
|
||||
end () const
|
||||
{
|
||||
return seq_.rend ();
|
||||
}
|
||||
|
||||
private:
|
||||
stack (const stack&);
|
||||
stack& operator= (const stack&);
|
||||
/// The wrapped container.
|
||||
S seq_;
|
||||
};
|
||||
|
||||
/// Present a slice of the top of a stack.
|
||||
template <class T, class S = stack<T> >
|
||||
class slice
|
||||
{
|
||||
public:
|
||||
slice (const S& stack, unsigned int range)
|
||||
: stack_ (stack)
|
||||
, range_ (range)
|
||||
{
|
||||
}
|
||||
|
||||
inline
|
||||
const T&
|
||||
operator [] (unsigned int i) const
|
||||
{
|
||||
return stack_[range_ - i];
|
||||
}
|
||||
|
||||
private:
|
||||
const S& stack_;
|
||||
unsigned int range_;
|
||||
};
|
||||
|
||||
|
||||
} // yy
|
||||
#line 157 "stack.hh" // stack.hh:133
|
||||
|
||||
#endif // !YY_YY_STACK_HH_INCLUDED
|
||||
1517
src/query/wasaparse.cpp
Normal file
1517
src/query/wasaparse.cpp
Normal file
File diff suppressed because it is too large
Load Diff
476
src/query/wasaparse.hpp
Normal file
476
src/query/wasaparse.hpp
Normal file
@ -0,0 +1,476 @@
|
||||
// A Bison parser, made by GNU Bison 3.0.2.
|
||||
|
||||
// Skeleton interface for Bison LALR(1) parsers in C++
|
||||
|
||||
// Copyright (C) 2002-2013 Free Software Foundation, Inc.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
// As a special exception, you may create a larger work that contains
|
||||
// part or all of the Bison parser skeleton and distribute that work
|
||||
// under terms of your choice, so long as that work isn't itself a
|
||||
// parser generator using the skeleton or a modified version thereof
|
||||
// as a parser skeleton. Alternatively, if you modify or redistribute
|
||||
// the parser skeleton itself, you may (at your option) remove this
|
||||
// special exception, which will cause the skeleton and the resulting
|
||||
// Bison output files to be licensed under the GNU General Public
|
||||
// License without this special exception.
|
||||
|
||||
// This special exception was added by the Free Software Foundation in
|
||||
// version 2.2 of Bison.
|
||||
|
||||
/**
|
||||
** \file y.tab.h
|
||||
** Define the yy::parser class.
|
||||
*/
|
||||
|
||||
// C++ LALR(1) parser skeleton written by Akim Demaille.
|
||||
|
||||
#ifndef YY_YY_Y_TAB_H_INCLUDED
|
||||
# define YY_YY_Y_TAB_H_INCLUDED
|
||||
|
||||
|
||||
# include <vector>
|
||||
# include <iostream>
|
||||
# include <stdexcept>
|
||||
# include <string>
|
||||
# include "stack.hh"
|
||||
# include "location.hh"
|
||||
|
||||
|
||||
#ifndef YY_ATTRIBUTE
|
||||
# if (defined __GNUC__ \
|
||||
&& (2 < __GNUC__ || (__GNUC__ == 2 && 96 <= __GNUC_MINOR__))) \
|
||||
|| defined __SUNPRO_C && 0x5110 <= __SUNPRO_C
|
||||
# define YY_ATTRIBUTE(Spec) __attribute__(Spec)
|
||||
# else
|
||||
# define YY_ATTRIBUTE(Spec) /* empty */
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef YY_ATTRIBUTE_PURE
|
||||
# define YY_ATTRIBUTE_PURE YY_ATTRIBUTE ((__pure__))
|
||||
#endif
|
||||
|
||||
#ifndef YY_ATTRIBUTE_UNUSED
|
||||
# define YY_ATTRIBUTE_UNUSED YY_ATTRIBUTE ((__unused__))
|
||||
#endif
|
||||
|
||||
#if !defined _Noreturn \
|
||||
&& (!defined __STDC_VERSION__ || __STDC_VERSION__ < 201112)
|
||||
# if defined _MSC_VER && 1200 <= _MSC_VER
|
||||
# define _Noreturn __declspec (noreturn)
|
||||
# else
|
||||
# define _Noreturn YY_ATTRIBUTE ((__noreturn__))
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Suppress unused-variable warnings by "using" E. */
|
||||
#if ! defined lint || defined __GNUC__
|
||||
# define YYUSE(E) ((void) (E))
|
||||
#else
|
||||
# define YYUSE(E) /* empty */
|
||||
#endif
|
||||
|
||||
#if defined __GNUC__ && 407 <= __GNUC__ * 100 + __GNUC_MINOR__
|
||||
/* Suppress an incorrect diagnostic about yylval being uninitialized. */
|
||||
# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN \
|
||||
_Pragma ("GCC diagnostic push") \
|
||||
_Pragma ("GCC diagnostic ignored \"-Wuninitialized\"")\
|
||||
_Pragma ("GCC diagnostic ignored \"-Wmaybe-uninitialized\"")
|
||||
# define YY_IGNORE_MAYBE_UNINITIALIZED_END \
|
||||
_Pragma ("GCC diagnostic pop")
|
||||
#else
|
||||
# define YY_INITIAL_VALUE(Value) Value
|
||||
#endif
|
||||
#ifndef YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
|
||||
# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
|
||||
# define YY_IGNORE_MAYBE_UNINITIALIZED_END
|
||||
#endif
|
||||
#ifndef YY_INITIAL_VALUE
|
||||
# define YY_INITIAL_VALUE(Value) /* Nothing. */
|
||||
#endif
|
||||
|
||||
/* Debug traces. */
|
||||
#ifndef YYDEBUG
|
||||
# define YYDEBUG 0
|
||||
#endif
|
||||
|
||||
|
||||
namespace yy {
|
||||
#line 113 "y.tab.h" // lalr1.cc:372
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/// A Bison parser.
|
||||
class parser
|
||||
{
|
||||
public:
|
||||
#ifndef YYSTYPE
|
||||
/// Symbol semantic values.
|
||||
union semantic_type
|
||||
{
|
||||
#line 44 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:372
|
||||
|
||||
std::string *str;
|
||||
Rcl::SearchDataClauseSimple *cl;
|
||||
Rcl::SearchData *sd;
|
||||
|
||||
#line 133 "y.tab.h" // lalr1.cc:372
|
||||
};
|
||||
#else
|
||||
typedef YYSTYPE semantic_type;
|
||||
#endif
|
||||
/// Symbol locations.
|
||||
typedef location location_type;
|
||||
|
||||
/// Syntax errors thrown from user actions.
|
||||
struct syntax_error : std::runtime_error
|
||||
{
|
||||
syntax_error (const location_type& l, const std::string& m);
|
||||
location_type location;
|
||||
};
|
||||
|
||||
/// Tokens.
|
||||
struct token
|
||||
{
|
||||
enum yytokentype
|
||||
{
|
||||
WORD = 258,
|
||||
QUOTED = 259,
|
||||
QUALIFIERS = 260,
|
||||
AND = 261,
|
||||
UCONCAT = 262,
|
||||
OR = 263,
|
||||
EQUALS = 264,
|
||||
CONTAINS = 265,
|
||||
SMALLEREQ = 266,
|
||||
SMALLER = 267,
|
||||
GREATEREQ = 268,
|
||||
GREATER = 269
|
||||
};
|
||||
};
|
||||
|
||||
/// (External) token type, as returned by yylex.
|
||||
typedef token::yytokentype token_type;
|
||||
|
||||
/// Internal symbol number.
|
||||
typedef int symbol_number_type;
|
||||
|
||||
/// Internal symbol number for tokens (subsumed by symbol_number_type).
|
||||
typedef unsigned char token_number_type;
|
||||
|
||||
/// A complete symbol.
|
||||
///
|
||||
/// Expects its Base type to provide access to the symbol type
|
||||
/// via type_get().
|
||||
///
|
||||
/// Provide access to semantic value and location.
|
||||
template <typename Base>
|
||||
struct basic_symbol : Base
|
||||
{
|
||||
/// Alias to Base.
|
||||
typedef Base super_type;
|
||||
|
||||
/// Default constructor.
|
||||
basic_symbol ();
|
||||
|
||||
/// Copy constructor.
|
||||
basic_symbol (const basic_symbol& other);
|
||||
|
||||
/// Constructor for valueless symbols.
|
||||
basic_symbol (typename Base::kind_type t,
|
||||
const location_type& l);
|
||||
|
||||
/// Constructor for symbols with semantic value.
|
||||
basic_symbol (typename Base::kind_type t,
|
||||
const semantic_type& v,
|
||||
const location_type& l);
|
||||
|
||||
~basic_symbol ();
|
||||
|
||||
/// Destructive move, \a s is emptied into this.
|
||||
void move (basic_symbol& s);
|
||||
|
||||
/// The semantic value.
|
||||
semantic_type value;
|
||||
|
||||
/// The location.
|
||||
location_type location;
|
||||
|
||||
private:
|
||||
/// Assignment operator.
|
||||
basic_symbol& operator= (const basic_symbol& other);
|
||||
};
|
||||
|
||||
/// Type access provider for token (enum) based symbols.
|
||||
struct by_type
|
||||
{
|
||||
/// Default constructor.
|
||||
by_type ();
|
||||
|
||||
/// Copy constructor.
|
||||
by_type (const by_type& other);
|
||||
|
||||
/// The symbol type as needed by the constructor.
|
||||
typedef token_type kind_type;
|
||||
|
||||
/// Constructor from (external) token numbers.
|
||||
by_type (kind_type t);
|
||||
|
||||
/// Steal the symbol type from \a that.
|
||||
void move (by_type& that);
|
||||
|
||||
/// The (internal) type number (corresponding to \a type).
|
||||
/// -1 when this symbol is empty.
|
||||
symbol_number_type type_get () const;
|
||||
|
||||
/// The token.
|
||||
token_type token () const;
|
||||
|
||||
enum { empty = 0 };
|
||||
|
||||
/// The symbol type.
|
||||
/// -1 when this symbol is empty.
|
||||
token_number_type type;
|
||||
};
|
||||
|
||||
/// "External" symbols: returned by the scanner.
|
||||
typedef basic_symbol<by_type> symbol_type;
|
||||
|
||||
|
||||
/// Build a parser object.
|
||||
parser (WasaParserDriver* d_yyarg);
|
||||
virtual ~parser ();
|
||||
|
||||
/// Parse.
|
||||
/// \returns 0 iff parsing succeeded.
|
||||
virtual int parse ();
|
||||
|
||||
#if YYDEBUG
|
||||
/// The current debugging stream.
|
||||
std::ostream& debug_stream () const YY_ATTRIBUTE_PURE;
|
||||
/// Set the current debugging stream.
|
||||
void set_debug_stream (std::ostream &);
|
||||
|
||||
/// Type for debugging levels.
|
||||
typedef int debug_level_type;
|
||||
/// The current debugging level.
|
||||
debug_level_type debug_level () const YY_ATTRIBUTE_PURE;
|
||||
/// Set the current debugging level.
|
||||
void set_debug_level (debug_level_type l);
|
||||
#endif
|
||||
|
||||
/// Report a syntax error.
|
||||
/// \param loc where the syntax error is found.
|
||||
/// \param msg a description of the syntax error.
|
||||
virtual void error (const location_type& loc, const std::string& msg);
|
||||
|
||||
/// Report a syntax error.
|
||||
void error (const syntax_error& err);
|
||||
|
||||
private:
|
||||
/// This class is not copyable.
|
||||
parser (const parser&);
|
||||
parser& operator= (const parser&);
|
||||
|
||||
/// State numbers.
|
||||
typedef int state_type;
|
||||
|
||||
/// Generate an error message.
|
||||
/// \param yystate the state where the error occurred.
|
||||
/// \param yytoken the lookahead token type, or yyempty_.
|
||||
virtual std::string yysyntax_error_ (state_type yystate,
|
||||
symbol_number_type yytoken) const;
|
||||
|
||||
/// Compute post-reduction state.
|
||||
/// \param yystate the current state
|
||||
/// \param yysym the nonterminal to push on the stack
|
||||
state_type yy_lr_goto_state_ (state_type yystate, int yysym);
|
||||
|
||||
/// Whether the given \c yypact_ value indicates a defaulted state.
|
||||
/// \param yyvalue the value to check
|
||||
static bool yy_pact_value_is_default_ (int yyvalue);
|
||||
|
||||
/// Whether the given \c yytable_ value indicates a syntax error.
|
||||
/// \param yyvalue the value to check
|
||||
static bool yy_table_value_is_error_ (int yyvalue);
|
||||
|
||||
static const signed char yypact_ninf_;
|
||||
static const signed char yytable_ninf_;
|
||||
|
||||
/// Convert a scanner token number \a t to a symbol number.
|
||||
static token_number_type yytranslate_ (int t);
|
||||
|
||||
// Tables.
|
||||
// YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing
|
||||
// STATE-NUM.
|
||||
static const signed char yypact_[];
|
||||
|
||||
// YYDEFACT[STATE-NUM] -- Default reduction number in state STATE-NUM.
|
||||
// Performed when YYTABLE does not specify something else to do. Zero
|
||||
// means the default is an error.
|
||||
static const unsigned char yydefact_[];
|
||||
|
||||
// YYPGOTO[NTERM-NUM].
|
||||
static const signed char yypgoto_[];
|
||||
|
||||
// YYDEFGOTO[NTERM-NUM].
|
||||
static const signed char yydefgoto_[];
|
||||
|
||||
// YYTABLE[YYPACT[STATE-NUM]] -- What to do in state STATE-NUM. If
|
||||
// positive, shift that token. If negative, reduce the rule whose
|
||||
// number is the opposite. If YYTABLE_NINF, syntax error.
|
||||
static const signed char yytable_[];
|
||||
|
||||
static const signed char yycheck_[];
|
||||
|
||||
// YYSTOS[STATE-NUM] -- The (internal number of the) accessing
|
||||
// symbol of state STATE-NUM.
|
||||
static const unsigned char yystos_[];
|
||||
|
||||
// YYR1[YYN] -- Symbol number of symbol that rule YYN derives.
|
||||
static const unsigned char yyr1_[];
|
||||
|
||||
// YYR2[YYN] -- Number of symbols on the right hand side of rule YYN.
|
||||
static const unsigned char yyr2_[];
|
||||
|
||||
|
||||
/// Convert the symbol name \a n to a form suitable for a diagnostic.
|
||||
static std::string yytnamerr_ (const char *n);
|
||||
|
||||
|
||||
/// For a symbol, its name in clear.
|
||||
static const char* const yytname_[];
|
||||
#if YYDEBUG
|
||||
// YYRLINE[YYN] -- Source line where rule number YYN was defined.
|
||||
static const unsigned char yyrline_[];
|
||||
/// Report on the debug stream that the rule \a r is going to be reduced.
|
||||
virtual void yy_reduce_print_ (int r);
|
||||
/// Print the state stack on the debug stream.
|
||||
virtual void yystack_print_ ();
|
||||
|
||||
// Debugging.
|
||||
int yydebug_;
|
||||
std::ostream* yycdebug_;
|
||||
|
||||
/// \brief Display a symbol type, value and location.
|
||||
/// \param yyo The output stream.
|
||||
/// \param yysym The symbol.
|
||||
template <typename Base>
|
||||
void yy_print_ (std::ostream& yyo, const basic_symbol<Base>& yysym) const;
|
||||
#endif
|
||||
|
||||
/// \brief Reclaim the memory associated to a symbol.
|
||||
/// \param yymsg Why this token is reclaimed.
|
||||
/// If null, print nothing.
|
||||
/// \param yysym The symbol.
|
||||
template <typename Base>
|
||||
void yy_destroy_ (const char* yymsg, basic_symbol<Base>& yysym) const;
|
||||
|
||||
private:
|
||||
/// Type access provider for state based symbols.
|
||||
struct by_state
|
||||
{
|
||||
/// Default constructor.
|
||||
by_state ();
|
||||
|
||||
/// The symbol type as needed by the constructor.
|
||||
typedef state_type kind_type;
|
||||
|
||||
/// Constructor.
|
||||
by_state (kind_type s);
|
||||
|
||||
/// Copy constructor.
|
||||
by_state (const by_state& other);
|
||||
|
||||
/// Steal the symbol type from \a that.
|
||||
void move (by_state& that);
|
||||
|
||||
/// The (internal) type number (corresponding to \a state).
|
||||
/// "empty" when empty.
|
||||
symbol_number_type type_get () const;
|
||||
|
||||
enum { empty = 0 };
|
||||
|
||||
/// The state.
|
||||
state_type state;
|
||||
};
|
||||
|
||||
/// "Internal" symbol: element of the stack.
|
||||
struct stack_symbol_type : basic_symbol<by_state>
|
||||
{
|
||||
/// Superclass.
|
||||
typedef basic_symbol<by_state> super_type;
|
||||
/// Construct an empty symbol.
|
||||
stack_symbol_type ();
|
||||
/// Steal the contents from \a sym to build this.
|
||||
stack_symbol_type (state_type s, symbol_type& sym);
|
||||
/// Assignment, needed by push_back.
|
||||
stack_symbol_type& operator= (const stack_symbol_type& that);
|
||||
};
|
||||
|
||||
/// Stack type.
|
||||
typedef stack<stack_symbol_type> stack_type;
|
||||
|
||||
/// The stack.
|
||||
stack_type yystack_;
|
||||
|
||||
/// Push a new state on the stack.
|
||||
/// \param m a debug message to display
|
||||
/// if null, no trace is output.
|
||||
/// \param s the symbol
|
||||
/// \warning the contents of \a s.value is stolen.
|
||||
void yypush_ (const char* m, stack_symbol_type& s);
|
||||
|
||||
/// Push a new look ahead token on the state on the stack.
|
||||
/// \param m a debug message to display
|
||||
/// if null, no trace is output.
|
||||
/// \param s the state
|
||||
/// \param sym the symbol (for its value and location).
|
||||
/// \warning the contents of \a s.value is stolen.
|
||||
void yypush_ (const char* m, state_type s, symbol_type& sym);
|
||||
|
||||
/// Pop \a n symbols the three stacks.
|
||||
void yypop_ (unsigned int n = 1);
|
||||
|
||||
// Constants.
|
||||
enum
|
||||
{
|
||||
yyeof_ = 0,
|
||||
yylast_ = 59, ///< Last index in yytable_.
|
||||
yynnts_ = 7, ///< Number of nonterminal symbols.
|
||||
yyempty_ = -2,
|
||||
yyfinal_ = 14, ///< Termination state number.
|
||||
yyterror_ = 1,
|
||||
yyerrcode_ = 256,
|
||||
yyntokens_ = 18 ///< Number of tokens.
|
||||
};
|
||||
|
||||
|
||||
// User arguments.
|
||||
WasaParserDriver* d;
|
||||
};
|
||||
|
||||
|
||||
|
||||
} // yy
|
||||
#line 472 "y.tab.h" // lalr1.cc:372
|
||||
|
||||
|
||||
|
||||
|
||||
#endif // !YY_YY_Y_TAB_H_INCLUDED
|
||||
@ -161,10 +161,10 @@ bool WasaParserDriver::addClause(SearchData *sd,
|
||||
size_t size = strtoll(cl->gettext().c_str(), &cp, 10);
|
||||
if (*cp != 0) {
|
||||
switch (*cp) {
|
||||
case 'k': case 'K': size *= 1E3;break;
|
||||
case 'm': case 'M': size *= 1E6;break;
|
||||
case 'g': case 'G': size *= 1E9;break;
|
||||
case 't': case 'T': size *= 1E12;break;
|
||||
case 'k': case 'K': size *= 1000;break;
|
||||
case 'm': case 'M': size *= 1000*1000;break;
|
||||
case 'g': case 'G': size *= 1000*1000*1000;break;
|
||||
case 't': case 'T': size *= size_t(1000)*1000*1000*1000;break;
|
||||
default:
|
||||
m_reason = string("Bad multiplier suffix: ") + *cp;
|
||||
delete cl;
|
||||
|
||||
@ -29,7 +29,7 @@
|
||||
#include <vector>
|
||||
using namespace std;
|
||||
|
||||
#include "xapian.h"
|
||||
#include <xapian.h>
|
||||
|
||||
#include "debuglog.h"
|
||||
#include "rclconfig.h"
|
||||
|
||||
@ -97,6 +97,8 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
|
||||
|
||||
// Detect and skip CJK terms.
|
||||
Utf8Iter utfit(*it);
|
||||
if (utfit.eof()) // Empty term?? Seems to happen.
|
||||
continue;
|
||||
if (TextSplit::isCJK(*utfit)) {
|
||||
// LOGDEB(("stemskipped: Skipping CJK\n"));
|
||||
continue;
|
||||
|
||||
@ -82,7 +82,7 @@ bool Query::Native::getMatchTerms(unsigned long xdocid, vector<string>& terms)
|
||||
{
|
||||
if (!xenquire) {
|
||||
LOGERR(("Query::getMatchTerms: no query opened\n"));
|
||||
return -1;
|
||||
return false;
|
||||
}
|
||||
|
||||
terms.clear();
|
||||
@ -386,7 +386,7 @@ int Query::Native::makeAbstract(Xapian::docid docid,
|
||||
for (multimap<double, vector<string> >::reverse_iterator mit = byQ.rbegin();
|
||||
mit != byQ.rend(); mit++) {
|
||||
unsigned int maxgrpoccs;
|
||||
float q;
|
||||
double q;
|
||||
if (byQ.size() == 1) {
|
||||
maxgrpoccs = maxtotaloccs;
|
||||
q = 1.0;
|
||||
|
||||
@ -18,7 +18,7 @@
|
||||
|
||||
#include <stdio.h>
|
||||
#include <cstring>
|
||||
#include <unistd.h>
|
||||
#include "safeunistd.h"
|
||||
#include <math.h>
|
||||
#include <time.h>
|
||||
|
||||
@ -433,7 +433,7 @@ bool Db::Native::dbDataToRclDoc(Xapian::docid docid, std::string &data,
|
||||
string dbdir = m_rcldb->m_basedir;
|
||||
doc.idxi = 0;
|
||||
if (!m_rcldb->m_extraDbs.empty()) {
|
||||
unsigned int idxi = whatDbIdx(docid);
|
||||
int idxi = int(whatDbIdx(docid));
|
||||
|
||||
// idxi is in [0, extraDbs.size()]. 0 is for the main index,
|
||||
// idxi-1 indexes into the additional dbs array.
|
||||
@ -549,14 +549,13 @@ bool Db::Native::getPagePositions(Xapian::docid docid, vector<int>& vpos)
|
||||
return true;
|
||||
}
|
||||
|
||||
int Db::Native::getPageNumberForPosition(const vector<int>& pbreaks,
|
||||
unsigned int pos)
|
||||
int Db::Native::getPageNumberForPosition(const vector<int>& pbreaks, int pos)
|
||||
{
|
||||
if (pos < baseTextPosition) // Not in text body
|
||||
if (pos < int(baseTextPosition)) // Not in text body
|
||||
return -1;
|
||||
vector<int>::const_iterator it =
|
||||
upper_bound(pbreaks.begin(), pbreaks.end(), pos);
|
||||
return it - pbreaks.begin() + 1;
|
||||
return int(it - pbreaks.begin() + 1);
|
||||
}
|
||||
|
||||
// Note: we're passed a Xapian::Document* because Xapian
|
||||
@ -1420,10 +1419,11 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
|
||||
time_t mtime = atoll(doc.dmtime.empty() ? doc.fmtime.c_str() :
|
||||
doc.dmtime.c_str());
|
||||
struct tm tmb;
|
||||
localtime_r(&mtime, &tmb);
|
||||
struct tm *tmbp = &tmb;
|
||||
tmbp = localtime_r(&mtime, &tmb);
|
||||
char buf[9];
|
||||
snprintf(buf, 9, "%04d%02d%02d",
|
||||
tmb.tm_year+1900, tmb.tm_mon + 1, tmb.tm_mday);
|
||||
tmbp->tm_year+1900, tmbp->tm_mon + 1, tmbp->tm_mday);
|
||||
// Date (YYYYMMDD)
|
||||
newdocument.add_boolean_term(wrap_prefix(xapday_prefix) + string(buf));
|
||||
// Month (YYYYMM)
|
||||
|
||||
@ -120,7 +120,7 @@ class Db::Native {
|
||||
const string& uniterm);
|
||||
|
||||
bool getPagePositions(Xapian::docid docid, vector<int>& vpos);
|
||||
int getPageNumberForPosition(const vector<int>& pbreaks, unsigned int pos);
|
||||
int getPageNumberForPosition(const vector<int>& pbreaks, int pos);
|
||||
|
||||
bool dbDataToRclDoc(Xapian::docid docid, std::string &data, Doc &doc);
|
||||
|
||||
|
||||
@ -21,13 +21,15 @@
|
||||
#include "autoconfig.h"
|
||||
|
||||
#include <string>
|
||||
using namespace std;
|
||||
|
||||
#include "debuglog.h"
|
||||
#include "rcldb.h"
|
||||
#include "rcldb_p.h"
|
||||
#include "stemdb.h"
|
||||
#include "expansiondbs.h"
|
||||
#include "strmatcher.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Rcl {
|
||||
|
||||
@ -41,10 +43,10 @@ bool Db::filenameWildExp(const string& fnexp, vector<string>& names, int max)
|
||||
// get here currently anyway), and has no wildcards, we add * at
|
||||
// each end: match any substring
|
||||
if (pattern[0] == '"' && pattern[pattern.size()-1] == '"') {
|
||||
pattern = pattern.substr(1, pattern.size() -2);
|
||||
pattern = pattern.substr(1, pattern.size() -2);
|
||||
} else if (pattern.find_first_of(cstr_minwilds) == string::npos &&
|
||||
!unaciscapital(pattern)) {
|
||||
pattern = "*" + pattern + "*";
|
||||
!unaciscapital(pattern)) {
|
||||
pattern = "*" + pattern + "*";
|
||||
} // else let it be
|
||||
|
||||
LOGDEB(("Rcl::Db::filenameWildExp: pattern: [%s]\n", pattern.c_str()));
|
||||
@ -55,21 +57,21 @@ bool Db::filenameWildExp(const string& fnexp, vector<string>& names, int max)
|
||||
// stripping conditionally on indexstripchars.
|
||||
string pat1;
|
||||
if (unacmaybefold(pattern, pat1, "UTF-8", UNACOP_UNACFOLD)) {
|
||||
pattern.swap(pat1);
|
||||
pattern.swap(pat1);
|
||||
}
|
||||
|
||||
TermMatchResult result;
|
||||
if (!idxTermMatch(ET_WILD, string(), pattern, result, max,
|
||||
unsplitFilenameFieldName))
|
||||
return false;
|
||||
unsplitFilenameFieldName))
|
||||
return false;
|
||||
for (vector<TermMatchEntry>::const_iterator it = result.entries.begin();
|
||||
it != result.entries.end(); it++)
|
||||
names.push_back(it->term);
|
||||
it != result.entries.end(); it++)
|
||||
names.push_back(it->term);
|
||||
|
||||
if (names.empty()) {
|
||||
// Build an impossible query: we know its impossible because we
|
||||
// control the prefixes!
|
||||
names.push_back(wrap_prefix("XNONE") + "NoMatchingTerms");
|
||||
// Build an impossible query: we know its impossible because we
|
||||
// control the prefixes!
|
||||
names.push_back(wrap_prefix("XNONE") + "NoMatchingTerms");
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -82,11 +84,11 @@ bool Db::maxYearSpan(int *minyear, int *maxyear)
|
||||
*maxyear = -1000000;
|
||||
TermMatchResult result;
|
||||
if (!idxTermMatch(ET_WILD, string(), "*", result, -1, "xapyear")) {
|
||||
LOGINFO(("Rcl::Db:maxYearSpan: termMatch failed\n"));
|
||||
return false;
|
||||
LOGINFO(("Rcl::Db:maxYearSpan: termMatch failed\n"));
|
||||
return false;
|
||||
}
|
||||
for (vector<TermMatchEntry>::const_iterator it = result.entries.begin();
|
||||
it != result.entries.end(); it++) {
|
||||
it != result.entries.end(); it++) {
|
||||
if (!it->term.empty()) {
|
||||
int year = atoi(strip_prefix(it->term).c_str());
|
||||
if (year < *minyear)
|
||||
@ -102,11 +104,11 @@ bool Db::getAllDbMimeTypes(std::vector<std::string>& exp)
|
||||
{
|
||||
Rcl::TermMatchResult res;
|
||||
if (!idxTermMatch(Rcl::Db::ET_WILD, "", "*", res, -1, "mtype")) {
|
||||
return false;
|
||||
return false;
|
||||
}
|
||||
for (vector<Rcl::TermMatchEntry>::const_iterator rit = res.entries.begin();
|
||||
rit != res.entries.end(); rit++) {
|
||||
exp.push_back(Rcl::strip_prefix(rit->term));
|
||||
rit != res.entries.end(); rit++) {
|
||||
exp.push_back(Rcl::strip_prefix(rit->term));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -114,19 +116,19 @@ bool Db::getAllDbMimeTypes(std::vector<std::string>& exp)
|
||||
class TermMatchCmpByWcf {
|
||||
public:
|
||||
int operator()(const TermMatchEntry& l, const TermMatchEntry& r) {
|
||||
return r.wcf - l.wcf < 0;
|
||||
return r.wcf - l.wcf < 0;
|
||||
}
|
||||
};
|
||||
class TermMatchCmpByTerm {
|
||||
public:
|
||||
int operator()(const TermMatchEntry& l, const TermMatchEntry& r) {
|
||||
return l.term.compare(r.term) > 0;
|
||||
return l.term.compare(r.term) > 0;
|
||||
}
|
||||
};
|
||||
class TermMatchTermEqual {
|
||||
public:
|
||||
int operator()(const TermMatchEntry& l, const TermMatchEntry& r) {
|
||||
return !l.term.compare(r.term);
|
||||
return !l.term.compare(r.term);
|
||||
}
|
||||
};
|
||||
|
||||
@ -136,10 +138,10 @@ public:
|
||||
static void addPrefix(vector<TermMatchEntry>& terms, const string& prefix)
|
||||
{
|
||||
if (prefix.empty())
|
||||
return;
|
||||
return;
|
||||
for (vector<TermMatchEntry>::iterator it = terms.begin();
|
||||
it != terms.end(); it++)
|
||||
it->term.insert(0, prefix);
|
||||
it->term.insert(0, prefix);
|
||||
}
|
||||
|
||||
static const char *tmtptostr(int typ)
|
||||
@ -164,22 +166,22 @@ static const char *tmtptostr(int typ)
|
||||
// using the main index terms (filtering, retrieving stats, expansion
|
||||
// in some cases).
|
||||
bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
|
||||
TermMatchResult& res, int max, const string& field,
|
||||
vector<string>* multiwords)
|
||||
TermMatchResult& res, int max, const string& field,
|
||||
vector<string>* multiwords)
|
||||
{
|
||||
int matchtyp = matchTypeTp(typ_sens);
|
||||
if (!m_ndb || !m_ndb->m_isopen)
|
||||
return false;
|
||||
return false;
|
||||
Xapian::Database xrdb = m_ndb->xrdb;
|
||||
|
||||
bool diac_sensitive = (typ_sens & ET_DIACSENS) != 0;
|
||||
bool case_sensitive = (typ_sens & ET_CASESENS) != 0;
|
||||
|
||||
LOGDEB0(("Db::TermMatch: typ %s diacsens %d casesens %d lang [%s] term [%s]"
|
||||
" max %d field [%s] stripped %d init res.size %u\n",
|
||||
tmtptostr(matchtyp), diac_sensitive, case_sensitive, lang.c_str(),
|
||||
_term.c_str(), max, field.c_str(), o_index_stripchars,
|
||||
res.entries.size()));
|
||||
" max %d field [%s] stripped %d init res.size %u\n",
|
||||
tmtptostr(matchtyp), diac_sensitive, case_sensitive, lang.c_str(),
|
||||
_term.c_str(), max, field.c_str(), o_index_stripchars,
|
||||
res.entries.size()));
|
||||
|
||||
// If index is stripped, no case or diac expansion can be needed:
|
||||
// for the processing inside this routine, everything looks like
|
||||
@ -187,11 +189,11 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
|
||||
// Also, convert input to lowercase and strip its accents.
|
||||
string term = _term;
|
||||
if (o_index_stripchars) {
|
||||
diac_sensitive = case_sensitive = true;
|
||||
if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) {
|
||||
LOGERR(("Db::termMatch: unac failed for [%s]\n", _term.c_str()));
|
||||
return false;
|
||||
}
|
||||
diac_sensitive = case_sensitive = true;
|
||||
if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) {
|
||||
LOGERR(("Db::termMatch: unac failed for [%s]\n", _term.c_str()));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// The case/diac expansion db
|
||||
@ -199,125 +201,125 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
|
||||
XapComputableSynFamMember synac(xrdb, synFamDiCa, "all", &unacfoldtrans);
|
||||
|
||||
if (matchtyp == ET_WILD || matchtyp == ET_REGEXP) {
|
||||
STD_SHARED_PTR<StrMatcher> matcher;
|
||||
if (matchtyp == ET_WILD) {
|
||||
matcher = STD_SHARED_PTR<StrMatcher>(new StrWildMatcher(term));
|
||||
} else {
|
||||
matcher = STD_SHARED_PTR<StrMatcher>(new StrRegexpMatcher(term));
|
||||
}
|
||||
if (!diac_sensitive || !case_sensitive) {
|
||||
// Perform case/diac expansion on the exp as appropriate and
|
||||
// expand the result.
|
||||
vector<string> exp;
|
||||
if (diac_sensitive) {
|
||||
// Expand for diacritics and case, filtering for same diacritics
|
||||
SynTermTransUnac foldtrans(UNACOP_FOLD);
|
||||
synac.synKeyExpand(matcher.get(), exp, &foldtrans);
|
||||
} else if (case_sensitive) {
|
||||
// Expand for diacritics and case, filtering for same case
|
||||
SynTermTransUnac unactrans(UNACOP_UNAC);
|
||||
synac.synKeyExpand(matcher.get(), exp, &unactrans);
|
||||
} else {
|
||||
// Expand for diacritics and case, no filtering
|
||||
synac.synKeyExpand(matcher.get(), exp);
|
||||
}
|
||||
// Retrieve additional info and filter against the index itself
|
||||
for (vector<string>::const_iterator it = exp.begin();
|
||||
it != exp.end(); it++) {
|
||||
idxTermMatch(ET_NONE, "", *it, res, max, field);
|
||||
}
|
||||
// And also expand the original expression against the
|
||||
// main index: for the common case where the expression
|
||||
// had no case/diac expansion (no entry in the exp db if
|
||||
// the original term is lowercase and without accents).
|
||||
idxTermMatch(typ_sens, lang, term, res, max, field);
|
||||
} else {
|
||||
idxTermMatch(typ_sens, lang, term, res, max, field);
|
||||
}
|
||||
STD_SHARED_PTR<StrMatcher> matcher;
|
||||
if (matchtyp == ET_WILD) {
|
||||
matcher = STD_SHARED_PTR<StrMatcher>(new StrWildMatcher(term));
|
||||
} else {
|
||||
matcher = STD_SHARED_PTR<StrMatcher>(new StrRegexpMatcher(term));
|
||||
}
|
||||
if (!diac_sensitive || !case_sensitive) {
|
||||
// Perform case/diac expansion on the exp as appropriate and
|
||||
// expand the result.
|
||||
vector<string> exp;
|
||||
if (diac_sensitive) {
|
||||
// Expand for diacritics and case, filtering for same diacritics
|
||||
SynTermTransUnac foldtrans(UNACOP_FOLD);
|
||||
synac.synKeyExpand(matcher.get(), exp, &foldtrans);
|
||||
} else if (case_sensitive) {
|
||||
// Expand for diacritics and case, filtering for same case
|
||||
SynTermTransUnac unactrans(UNACOP_UNAC);
|
||||
synac.synKeyExpand(matcher.get(), exp, &unactrans);
|
||||
} else {
|
||||
// Expand for diacritics and case, no filtering
|
||||
synac.synKeyExpand(matcher.get(), exp);
|
||||
}
|
||||
// Retrieve additional info and filter against the index itself
|
||||
for (vector<string>::const_iterator it = exp.begin();
|
||||
it != exp.end(); it++) {
|
||||
idxTermMatch(ET_NONE, "", *it, res, max, field);
|
||||
}
|
||||
// And also expand the original expression against the
|
||||
// main index: for the common case where the expression
|
||||
// had no case/diac expansion (no entry in the exp db if
|
||||
// the original term is lowercase and without accents).
|
||||
idxTermMatch(typ_sens, lang, term, res, max, field);
|
||||
} else {
|
||||
idxTermMatch(typ_sens, lang, term, res, max, field);
|
||||
}
|
||||
|
||||
} else {
|
||||
// Expansion is STEM or NONE (which may still need synonyms
|
||||
// and case/diac exp)
|
||||
// Expansion is STEM or NONE (which may still need synonyms
|
||||
// and case/diac exp)
|
||||
|
||||
vector<string> lexp;
|
||||
if (diac_sensitive && case_sensitive) {
|
||||
// No case/diac expansion
|
||||
lexp.push_back(term);
|
||||
} else if (diac_sensitive) {
|
||||
// Expand for accents and case, filtering for same accents,
|
||||
SynTermTransUnac foldtrans(UNACOP_FOLD);
|
||||
synac.synExpand(term, lexp, &foldtrans);
|
||||
} else if (case_sensitive) {
|
||||
// Expand for accents and case, filtering for same case
|
||||
SynTermTransUnac unactrans(UNACOP_UNAC);
|
||||
synac.synExpand(term, lexp, &unactrans);
|
||||
} else {
|
||||
// We are neither accent- nor case- sensitive and may need stem
|
||||
// expansion or not. Expand for accents and case
|
||||
synac.synExpand(term, lexp);
|
||||
}
|
||||
vector<string> lexp;
|
||||
if (diac_sensitive && case_sensitive) {
|
||||
// No case/diac expansion
|
||||
lexp.push_back(term);
|
||||
} else if (diac_sensitive) {
|
||||
// Expand for accents and case, filtering for same accents,
|
||||
SynTermTransUnac foldtrans(UNACOP_FOLD);
|
||||
synac.synExpand(term, lexp, &foldtrans);
|
||||
} else if (case_sensitive) {
|
||||
// Expand for accents and case, filtering for same case
|
||||
SynTermTransUnac unactrans(UNACOP_UNAC);
|
||||
synac.synExpand(term, lexp, &unactrans);
|
||||
} else {
|
||||
// We are neither accent- nor case- sensitive and may need stem
|
||||
// expansion or not. Expand for accents and case
|
||||
synac.synExpand(term, lexp);
|
||||
}
|
||||
|
||||
if (matchtyp == ET_STEM || (typ_sens & ET_SYNEXP)) {
|
||||
if (matchtyp == ET_STEM || (typ_sens & ET_SYNEXP)) {
|
||||
// Note: if any of the above conds is true, we are insensitive to
|
||||
// diacs and case (enforced in searchdatatox:termexpand
|
||||
// Need stem expansion. Lowercase the result of accent and case
|
||||
// expansion for input to stemdb.
|
||||
for (unsigned int i = 0; i < lexp.size(); i++) {
|
||||
string lower;
|
||||
unacmaybefold(lexp[i], lower, "UTF-8", UNACOP_FOLD);
|
||||
lexp[i] = lower;
|
||||
}
|
||||
sort(lexp.begin(), lexp.end());
|
||||
lexp.erase(unique(lexp.begin(), lexp.end()), lexp.end());
|
||||
// Need stem expansion. Lowercase the result of accent and case
|
||||
// expansion for input to stemdb.
|
||||
for (unsigned int i = 0; i < lexp.size(); i++) {
|
||||
string lower;
|
||||
unacmaybefold(lexp[i], lower, "UTF-8", UNACOP_FOLD);
|
||||
lexp[i] = lower;
|
||||
}
|
||||
sort(lexp.begin(), lexp.end());
|
||||
lexp.erase(unique(lexp.begin(), lexp.end()), lexp.end());
|
||||
|
||||
if (matchtyp == ET_STEM) {
|
||||
StemDb sdb(xrdb);
|
||||
vector<string> exp1;
|
||||
for (vector<string>::const_iterator it = lexp.begin();
|
||||
it != lexp.end(); it++) {
|
||||
sdb.stemExpand(lang, *it, exp1);
|
||||
}
|
||||
exp1.swap(lexp);
|
||||
sort(lexp.begin(), lexp.end());
|
||||
lexp.erase(unique(lexp.begin(), lexp.end()), lexp.end());
|
||||
LOGDEB(("ExpTerm: stemexp: %s\n",
|
||||
stringsToString(lexp).c_str()));
|
||||
}
|
||||
if (matchtyp == ET_STEM) {
|
||||
StemDb sdb(xrdb);
|
||||
vector<string> exp1;
|
||||
for (vector<string>::const_iterator it = lexp.begin();
|
||||
it != lexp.end(); it++) {
|
||||
sdb.stemExpand(lang, *it, exp1);
|
||||
}
|
||||
exp1.swap(lexp);
|
||||
sort(lexp.begin(), lexp.end());
|
||||
lexp.erase(unique(lexp.begin(), lexp.end()), lexp.end());
|
||||
LOGDEB(("ExpTerm: stemexp: %s\n",
|
||||
stringsToString(lexp).c_str()));
|
||||
}
|
||||
|
||||
// Expand the result for synonyms. Note that doing it here
|
||||
// means that multi-term synonyms will not work
|
||||
// (e.g. stakhanovist -> "hard at work". We would have to
|
||||
// separate the multi-word expansions for our caller to
|
||||
// add them as phrases to the query. Not impossible, but
|
||||
// let's keep it at single words for now.
|
||||
if (m_syngroups.ok() && (typ_sens & ET_SYNEXP)) {
|
||||
LOGDEB(("ExpTerm: got syngroups\n"));
|
||||
vector<string> exp1(lexp);
|
||||
for (vector<string>::const_iterator it = lexp.begin();
|
||||
it != lexp.end(); it++) {
|
||||
vector<string> sg = m_syngroups.getgroup(*it);
|
||||
if (!sg.empty()) {
|
||||
LOGDEB(("ExpTerm: syns: %s -> %s\n",
|
||||
it->c_str(), stringsToString(sg).c_str()));
|
||||
for (vector<string>::const_iterator it1 = sg.begin();
|
||||
it1 != sg.end(); it1++) {
|
||||
if (it1->find_first_of(" ") != string::npos) {
|
||||
if (multiwords) {
|
||||
multiwords->push_back(*it1);
|
||||
}
|
||||
} else {
|
||||
exp1.push_back(*it1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
lexp.swap(exp1);
|
||||
sort(lexp.begin(), lexp.end());
|
||||
lexp.erase(unique(lexp.begin(), lexp.end()), lexp.end());
|
||||
}
|
||||
// Expand the result for synonyms. Note that doing it here
|
||||
// means that multi-term synonyms will not work
|
||||
// (e.g. stakhanovist -> "hard at work". We would have to
|
||||
// separate the multi-word expansions for our caller to
|
||||
// add them as phrases to the query. Not impossible, but
|
||||
// let's keep it at single words for now.
|
||||
if (m_syngroups.ok() && (typ_sens & ET_SYNEXP)) {
|
||||
LOGDEB(("ExpTerm: got syngroups\n"));
|
||||
vector<string> exp1(lexp);
|
||||
for (vector<string>::const_iterator it = lexp.begin();
|
||||
it != lexp.end(); it++) {
|
||||
vector<string> sg = m_syngroups.getgroup(*it);
|
||||
if (!sg.empty()) {
|
||||
LOGDEB(("ExpTerm: syns: %s -> %s\n",
|
||||
it->c_str(), stringsToString(sg).c_str()));
|
||||
for (vector<string>::const_iterator it1 = sg.begin();
|
||||
it1 != sg.end(); it1++) {
|
||||
if (it1->find_first_of(" ") != string::npos) {
|
||||
if (multiwords) {
|
||||
multiwords->push_back(*it1);
|
||||
}
|
||||
} else {
|
||||
exp1.push_back(*it1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
lexp.swap(exp1);
|
||||
sort(lexp.begin(), lexp.end());
|
||||
lexp.erase(unique(lexp.begin(), lexp.end()), lexp.end());
|
||||
}
|
||||
|
||||
// Expand the resulting list for case (all stemdb content
|
||||
// is lowercase)
|
||||
// Expand the resulting list for case (all stemdb content
|
||||
// is lowercase)
|
||||
vector<string> exp1;
|
||||
for (vector<string>::const_iterator it = lexp.begin();
|
||||
it != lexp.end(); it++) {
|
||||
@ -326,27 +328,27 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
|
||||
exp1.swap(lexp);
|
||||
sort(lexp.begin(), lexp.end());
|
||||
lexp.erase(unique(lexp.begin(), lexp.end()), lexp.end());
|
||||
}
|
||||
}
|
||||
|
||||
// Filter the result and get the stats, possibly add prefixes.
|
||||
LOGDEB(("ExpandTerm:TM: lexp: %s\n", stringsToString(lexp).c_str()));
|
||||
for (vector<string>::const_iterator it = lexp.begin();
|
||||
it != lexp.end(); it++) {
|
||||
idxTermMatch(Rcl::Db::ET_WILD, "", *it, res, max, field);
|
||||
}
|
||||
// Filter the result and get the stats, possibly add prefixes.
|
||||
LOGDEB(("ExpandTerm:TM: lexp: %s\n", stringsToString(lexp).c_str()));
|
||||
for (vector<string>::const_iterator it = lexp.begin();
|
||||
it != lexp.end(); it++) {
|
||||
idxTermMatch(Rcl::Db::ET_WILD, "", *it, res, max, field);
|
||||
}
|
||||
}
|
||||
|
||||
TermMatchCmpByTerm tcmp;
|
||||
sort(res.entries.begin(), res.entries.end(), tcmp);
|
||||
TermMatchTermEqual teq;
|
||||
vector<TermMatchEntry>::iterator uit =
|
||||
unique(res.entries.begin(), res.entries.end(), teq);
|
||||
unique(res.entries.begin(), res.entries.end(), teq);
|
||||
res.entries.resize(uit - res.entries.begin());
|
||||
TermMatchCmpByWcf wcmp;
|
||||
sort(res.entries.begin(), res.entries.end(), wcmp);
|
||||
if (max > 0) {
|
||||
// Would need a small max and big stem expansion...
|
||||
res.entries.resize(MIN(res.entries.size(), (unsigned int)max));
|
||||
// Would need a small max and big stem expansion...
|
||||
res.entries.resize(MIN(res.entries.size(), (unsigned int)max));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -354,114 +356,116 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
|
||||
// Second phase of wildcard/regexp term expansion after case/diac
|
||||
// expansion: expand against main index terms
|
||||
bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root,
|
||||
TermMatchResult& res, int max, const string& field)
|
||||
TermMatchResult& res, int max, const string& field)
|
||||
{
|
||||
int typ = matchTypeTp(typ_sens);
|
||||
LOGDEB1(("Db::idxTermMatch: typ %s lang [%s] term [%s] "
|
||||
"max %d field [%s] init res.size %u\n",
|
||||
tmtptostr(typ), lang.c_str(), root.c_str(),
|
||||
max, field.c_str(), res.entries.size()));
|
||||
"max %d field [%s] init res.size %u\n",
|
||||
tmtptostr(typ), lang.c_str(), root.c_str(),
|
||||
max, field.c_str(), res.entries.size()));
|
||||
|
||||
if (typ == ET_STEM) {
|
||||
LOGFATAL(("RCLDB: internal error: idxTermMatch called with ET_STEM\n"));
|
||||
abort();
|
||||
LOGFATAL(("RCLDB: internal error: idxTermMatch called with ET_STEM\n"));
|
||||
abort();
|
||||
}
|
||||
|
||||
Xapian::Database xdb = m_ndb->xrdb;
|
||||
|
||||
string prefix;
|
||||
if (!field.empty()) {
|
||||
const FieldTraits *ftp = 0;
|
||||
if (!fieldToTraits(field, &ftp, true) || ftp->pfx.empty()) {
|
||||
const FieldTraits *ftp = 0;
|
||||
if (!fieldToTraits(field, &ftp, true) || ftp->pfx.empty()) {
|
||||
LOGDEB(("Db::termMatch: field is not indexed (no prefix): [%s]\n",
|
||||
field.c_str()));
|
||||
} else {
|
||||
prefix = wrap_prefix(ftp->pfx);
|
||||
}
|
||||
prefix = wrap_prefix(ftp->pfx);
|
||||
}
|
||||
}
|
||||
res.prefix = prefix;
|
||||
|
||||
STD_SHARED_PTR<StrMatcher> matcher;
|
||||
if (typ == ET_REGEXP) {
|
||||
matcher = STD_SHARED_PTR<StrMatcher>(new StrRegexpMatcher(root));
|
||||
if (!matcher->ok()) {
|
||||
LOGERR(("termMatch: regcomp failed: %s\n",
|
||||
matcher->getreason().c_str()))
|
||||
return false;
|
||||
}
|
||||
matcher = STD_SHARED_PTR<StrMatcher>(new StrRegexpMatcher(root));
|
||||
if (!matcher->ok()) {
|
||||
LOGERR(("termMatch: regcomp failed: %s\n",
|
||||
matcher->getreason().c_str()))
|
||||
return false;
|
||||
}
|
||||
} else if (typ == ET_WILD) {
|
||||
matcher = STD_SHARED_PTR<StrMatcher>(new StrWildMatcher(root));
|
||||
matcher = STD_SHARED_PTR<StrMatcher>(new StrWildMatcher(root));
|
||||
}
|
||||
|
||||
// Find the initial section before any special char
|
||||
string::size_type es = string::npos;
|
||||
if (matcher) {
|
||||
es = matcher->baseprefixlen();
|
||||
es = matcher->baseprefixlen();
|
||||
}
|
||||
|
||||
// Initial section: the part of the prefix+expr before the
|
||||
// first wildcard character. We only scan the part of the
|
||||
// index where this matches
|
||||
string is;
|
||||
switch (es) {
|
||||
case string::npos: is = prefix + root; break;
|
||||
case 0: is = prefix; break;
|
||||
default: is = prefix + root.substr(0, es); break;
|
||||
if (es == string::npos) {
|
||||
is = prefix + root;
|
||||
} else if (es == 0) {
|
||||
is = prefix;
|
||||
} else {
|
||||
is = prefix + root.substr(0, es);
|
||||
}
|
||||
LOGDEB2(("termMatch: initsec: [%s]\n", is.c_str()));
|
||||
|
||||
for (int tries = 0; tries < 2; tries++) {
|
||||
try {
|
||||
Xapian::TermIterator it = xdb.allterms_begin();
|
||||
if (!is.empty())
|
||||
it.skip_to(is.c_str());
|
||||
for (int rcnt = 0; it != xdb.allterms_end(); it++) {
|
||||
// If we're beyond the terms matching the initial
|
||||
// section, end
|
||||
if (!is.empty() && (*it).find(is) != 0)
|
||||
break;
|
||||
try {
|
||||
Xapian::TermIterator it = xdb.allterms_begin();
|
||||
if (!is.empty())
|
||||
it.skip_to(is.c_str());
|
||||
for (int rcnt = 0; it != xdb.allterms_end(); it++) {
|
||||
// If we're beyond the terms matching the initial
|
||||
// section, end
|
||||
if (!is.empty() && (*it).find(is) != 0)
|
||||
break;
|
||||
|
||||
// Else try to match the term. The matcher content
|
||||
// is without prefix, so we remove this if any. We
|
||||
// just checked that the index term did begin with
|
||||
// the prefix.
|
||||
string term;
|
||||
if (!prefix.empty()) {
|
||||
term = (*it).substr(prefix.length());
|
||||
} else {
|
||||
if (has_prefix(*it)) {
|
||||
continue;
|
||||
}
|
||||
term = *it;
|
||||
}
|
||||
// Else try to match the term. The matcher content
|
||||
// is without prefix, so we remove this if any. We
|
||||
// just checked that the index term did begin with
|
||||
// the prefix.
|
||||
string term;
|
||||
if (!prefix.empty()) {
|
||||
term = (*it).substr(prefix.length());
|
||||
} else {
|
||||
if (has_prefix(*it)) {
|
||||
continue;
|
||||
}
|
||||
term = *it;
|
||||
}
|
||||
|
||||
if (matcher && !matcher->match(term))
|
||||
continue;
|
||||
if (matcher && !matcher->match(term))
|
||||
continue;
|
||||
|
||||
res.entries.push_back(
|
||||
TermMatchEntry(*it, xdb.get_collection_freq(*it),
|
||||
it.get_termfreq()));
|
||||
res.entries.push_back(
|
||||
TermMatchEntry(*it, xdb.get_collection_freq(*it),
|
||||
it.get_termfreq()));
|
||||
|
||||
// The problem with truncating here is that this is done
|
||||
// alphabetically and we may not keep the most frequent
|
||||
// terms. OTOH, not doing it may stall the program if
|
||||
// we are walking the whole term list. We compromise
|
||||
// by cutting at 2*max
|
||||
if (max > 0 && ++rcnt >= 2*max)
|
||||
break;
|
||||
}
|
||||
m_reason.erase();
|
||||
break;
|
||||
} catch (const Xapian::DatabaseModifiedError &e) {
|
||||
m_reason = e.get_msg();
|
||||
xdb.reopen();
|
||||
continue;
|
||||
} XCATCHERROR(m_reason);
|
||||
break;
|
||||
// The problem with truncating here is that this is done
|
||||
// alphabetically and we may not keep the most frequent
|
||||
// terms. OTOH, not doing it may stall the program if
|
||||
// we are walking the whole term list. We compromise
|
||||
// by cutting at 2*max
|
||||
if (max > 0 && ++rcnt >= 2*max)
|
||||
break;
|
||||
}
|
||||
m_reason.erase();
|
||||
break;
|
||||
} catch (const Xapian::DatabaseModifiedError &e) {
|
||||
m_reason = e.get_msg();
|
||||
xdb.reopen();
|
||||
continue;
|
||||
} XCATCHERROR(m_reason);
|
||||
break;
|
||||
}
|
||||
if (!m_reason.empty()) {
|
||||
LOGERR(("termMatch: %s\n", m_reason.c_str()));
|
||||
return false;
|
||||
LOGERR(("termMatch: %s\n", m_reason.c_str()));
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
@ -476,62 +480,62 @@ public:
|
||||
TermIter *Db::termWalkOpen()
|
||||
{
|
||||
if (!m_ndb || !m_ndb->m_isopen)
|
||||
return 0;
|
||||
return 0;
|
||||
TermIter *tit = new TermIter;
|
||||
if (tit) {
|
||||
tit->db = m_ndb->xrdb;
|
||||
tit->db = m_ndb->xrdb;
|
||||
XAPTRY(tit->it = tit->db.allterms_begin(), tit->db, m_reason);
|
||||
if (!m_reason.empty()) {
|
||||
LOGERR(("Db::termWalkOpen: xapian error: %s\n", m_reason.c_str()));
|
||||
return 0;
|
||||
}
|
||||
if (!m_reason.empty()) {
|
||||
LOGERR(("Db::termWalkOpen: xapian error: %s\n", m_reason.c_str()));
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return tit;
|
||||
}
|
||||
bool Db::termWalkNext(TermIter *tit, string &term)
|
||||
{
|
||||
XAPTRY(
|
||||
if (tit && tit->it != tit->db.allterms_end()) {
|
||||
term = *(tit->it)++;
|
||||
return true;
|
||||
}
|
||||
if (tit && tit->it != tit->db.allterms_end()) {
|
||||
term = *(tit->it)++;
|
||||
return true;
|
||||
}
|
||||
, tit->db, m_reason);
|
||||
|
||||
if (!m_reason.empty()) {
|
||||
LOGERR(("Db::termWalkOpen: xapian error: %s\n", m_reason.c_str()));
|
||||
LOGERR(("Db::termWalkOpen: xapian error: %s\n", m_reason.c_str()));
|
||||
}
|
||||
return false;
|
||||
}
|
||||
void Db::termWalkClose(TermIter *tit)
|
||||
{
|
||||
try {
|
||||
delete tit;
|
||||
delete tit;
|
||||
} catch (...) {}
|
||||
}
|
||||
|
||||
bool Db::termExists(const string& word)
|
||||
{
|
||||
if (!m_ndb || !m_ndb->m_isopen)
|
||||
return 0;
|
||||
return 0;
|
||||
|
||||
XAPTRY(if (!m_ndb->xrdb.term_exists(word)) return false,
|
||||
m_ndb->xrdb, m_reason);
|
||||
|
||||
if (!m_reason.empty()) {
|
||||
LOGERR(("Db::termWalkOpen: xapian error: %s\n", m_reason.c_str()));
|
||||
return false;
|
||||
LOGERR(("Db::termWalkOpen: xapian error: %s\n", m_reason.c_str()));
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Db::stemDiffers(const string& lang, const string& word,
|
||||
const string& base)
|
||||
const string& base)
|
||||
{
|
||||
Xapian::Stem stemmer(lang);
|
||||
if (!stemmer(word).compare(stemmer(base))) {
|
||||
LOGDEB2(("Rcl::Db::stemDiffers: same for %s and %s\n",
|
||||
word.c_str(), base.c_str()));
|
||||
return false;
|
||||
LOGDEB2(("Rcl::Db::stemDiffers: same for %s and %s\n",
|
||||
word.c_str(), base.c_str()));
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -242,7 +242,7 @@ void SearchData::simplify()
|
||||
j < i + clsubp->getSub()->m_query.size(); j++) {
|
||||
m_query[j]->setParent(this);
|
||||
}
|
||||
i += clsubp->getSub()->m_query.size() - 1;
|
||||
i += int(clsubp->getSub()->m_query.size()) - 1;
|
||||
|
||||
// We don't want the clauses to be deleted when the parent is, as we
|
||||
// know own them.
|
||||
|
||||
@ -270,7 +270,7 @@ public:
|
||||
{
|
||||
return m_parentSearch ? m_parentSearch->getMaxExp() : 10000;
|
||||
}
|
||||
int getMaxCl()
|
||||
size_t getMaxCl()
|
||||
{
|
||||
return m_parentSearch ? m_parentSearch->getMaxCl() : 100000;
|
||||
}
|
||||
@ -376,7 +376,7 @@ protected:
|
||||
std::string m_field; // Field specification if any
|
||||
HighlightData m_hldata;
|
||||
// Current count of Xapian clauses, to check against expansion limit
|
||||
int m_curcl;
|
||||
size_t m_curcl;
|
||||
bool processUserString(Rcl::Db &db, const string &iq,
|
||||
std::string &ermsg,
|
||||
void* pq, int slack = 0, bool useNear = false);
|
||||
|
||||
@ -840,7 +840,7 @@ bool SearchDataClauseSimple::processUserString(Rcl::Db &db, const string &iq,
|
||||
tpq.setTSQ(&splitter);
|
||||
splitter.text_to_words(*it);
|
||||
|
||||
slack += tpq.lastpos() - tpq.terms().size() + 1;
|
||||
slack += tpq.lastpos() - int(tpq.terms().size()) + 1;
|
||||
|
||||
LOGDEB0(("strToXapianQ: termcount: %d\n", tpq.terms().size()));
|
||||
switch (tpq.terms().size() + terminc) {
|
||||
@ -963,7 +963,7 @@ bool SearchDataClausePath::toNativeQuery(Rcl::Db &db, void *p)
|
||||
|
||||
vector<Xapian::Query> orqueries;
|
||||
|
||||
if (m_text[0] == '/')
|
||||
if (path_isabsolute(m_text))
|
||||
orqueries.push_back(Xapian::Query(wrap_prefix(pathelt_prefix)));
|
||||
else
|
||||
m_text = path_tildexpand(m_text);
|
||||
|
||||
@ -22,11 +22,12 @@
|
||||
|
||||
#include "autoconfig.h"
|
||||
|
||||
#include <unistd.h>
|
||||
#include "safeunistd.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
using namespace std;
|
||||
|
||||
#include <xapian.h>
|
||||
|
||||
@ -133,7 +133,8 @@ image/vnd.djvu = exec rcldjvu
|
||||
image/svg+xml = exec rclsvg
|
||||
image/x-xcf = execm rclimg
|
||||
inode/symlink = internal
|
||||
inode/x-empty = exec rclnull
|
||||
application/x-zerosize = internal
|
||||
inode/x-empty = internal application/x-zerosize
|
||||
message/rfc822 = internal
|
||||
text/calendar = execm rclics;mimetype=text/plain
|
||||
text/html = internal
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
# (C) 2004 J.F.Dockes. License: GPL
|
||||
#
|
||||
# Recoll default configuration file. This typically lives in
|
||||
# @prefix@/share/recoll/examples and provides default values. You can
|
||||
# $prefix/share/recoll/examples and provides default values. You can
|
||||
# override selected parameters by adding assigments to
|
||||
# ~/.recoll/recoll.conf (or $RECOLL_CONFDIR/recoll.conf)
|
||||
#
|
||||
@ -199,12 +199,13 @@ maxfsoccuppc = 0
|
||||
idxflushmb = 10
|
||||
|
||||
# Place to search for executable filters. If RECOLL_FILTERSDIR is set in
|
||||
# the environment, we use it instead
|
||||
filtersdir = @prefix@/share/recoll/filters
|
||||
# the environment, we use it instead. Defaults to $prefix/share/recoll/filters
|
||||
# filtersdir = /path/to/my/filters
|
||||
|
||||
# Place to search for icons. The only reason to change this would be if you
|
||||
# want to change the icons displayed in the result list
|
||||
iconsdir = @prefix@/share/recoll/images
|
||||
# want to change the icons displayed in the result list.
|
||||
# Defaults to $prefix/share/recoll/images
|
||||
# iconsdir = /path/to/my/icons
|
||||
|
||||
# Should we use the system's 'file -i' command as a final step in file type
|
||||
# identification ? This may be useful, but will usually cause the
|
||||
@ -16,21 +16,20 @@
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#ifdef RECOLL_DATADIR
|
||||
#ifdef BUILDING_RECOLL
|
||||
#include "autoconfig.h"
|
||||
#else
|
||||
#include "config.h"
|
||||
#endif /* RECOLL */
|
||||
#endif /* HAVE_CONFIG_H */
|
||||
|
||||
#ifdef RECOLL_DATADIR
|
||||
#ifdef BUILDING_RECOLL
|
||||
/* Yes, recoll unac is actually c++, lets face modernity, I will not be
|
||||
caught writing another binary search */
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include UNORDERED_MAP_INCLUDE
|
||||
|
||||
using std::string;
|
||||
@ -53,7 +52,7 @@ static inline bool is_except_char(unsigned short c, string& trans)
|
||||
trans = it->second;
|
||||
return true;
|
||||
}
|
||||
#endif /* RECOLL_DATADIR */
|
||||
#endif /* BUILDING_RECOLL*/
|
||||
|
||||
/*
|
||||
* If configure.in has not defined this symbol, assume const. It
|
||||
@ -14171,9 +14170,9 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length,
|
||||
char** outp, size_t* out_lengthp, int what)
|
||||
{
|
||||
char* out;
|
||||
int out_size;
|
||||
int out_length;
|
||||
unsigned int i;
|
||||
size_t out_size;
|
||||
size_t out_length;
|
||||
size_t i;
|
||||
|
||||
out_size = in_length > 0 ? in_length : 1024;
|
||||
|
||||
@ -14191,13 +14190,13 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length,
|
||||
for(i = 0; i < in_length; i += 2) {
|
||||
unsigned short c;
|
||||
unsigned short* p;
|
||||
int l;
|
||||
int k;
|
||||
size_t l;
|
||||
size_t k;
|
||||
c = (in[i] << 8) | (in[i + 1] & 0xff);
|
||||
/*
|
||||
* Lookup the tables for decomposition information
|
||||
*/
|
||||
#ifdef RECOLL_DATADIR
|
||||
#ifdef BUILDING_RECOLL
|
||||
// Exception unac/fold values set by user. There should be 3 arrays for
|
||||
// unac/fold/unac+fold. For now there is only one array, which used to
|
||||
// be set for unac+fold, and is mostly or only used to prevent diacritics
|
||||
@ -14220,11 +14219,11 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length,
|
||||
l = trans.size() / 2;
|
||||
}
|
||||
} else {
|
||||
#endif /* RECOLL_DATADIR */
|
||||
#endif /* BUILDING_RECOLL */
|
||||
unac_uf_char_utf16_(c, p, l, what)
|
||||
#ifdef RECOLL_DATADIR
|
||||
#ifdef BUILDING_RECOLL
|
||||
}
|
||||
#endif /* RECOLL_DATADIR */
|
||||
#endif /* BUILDING_RECOLL */
|
||||
|
||||
/*
|
||||
* Explain what's done in great detail
|
||||
@ -14237,7 +14236,7 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length,
|
||||
if(l == 0) {
|
||||
DEBUG_APPEND("untouched\n");
|
||||
} else {
|
||||
int i;
|
||||
size_t i;
|
||||
for(i = 0; i < l; i++)
|
||||
DEBUG_APPEND("0x%04x ", p[i]);
|
||||
DEBUG_APPEND("\n");
|
||||
@ -14437,10 +14436,11 @@ static int convert(const char* from, const char* to,
|
||||
const char* tmp = space;
|
||||
size_t tmp_length = 2;
|
||||
if(iconv(cd, (ICONV_CONST char **) &tmp, &tmp_length, &out, &out_remain) == (size_t)-1) {
|
||||
if(errno == E2BIG)
|
||||
if(errno == E2BIG) {
|
||||
/* fall thru to the E2BIG case below */;
|
||||
else
|
||||
goto out;
|
||||
} else {
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
/* The offending character was replaced by a SPACE, skip it. */
|
||||
in += 2;
|
||||
@ -14456,7 +14456,7 @@ static int convert(const char* from, const char* to,
|
||||
/*
|
||||
* The output does not fit in the current out buffer, enlarge it.
|
||||
*/
|
||||
int length = out - out_base;
|
||||
size_t length = out - out_base;
|
||||
out_size *= 2;
|
||||
{
|
||||
char *saved = out_base;
|
||||
@ -14562,7 +14562,7 @@ const char* unac_version(void)
|
||||
return UNAC_VERSION;
|
||||
}
|
||||
|
||||
#ifdef RECOLL_DATADIR
|
||||
#ifdef BUILDING_RECOLL
|
||||
void unac_set_except_translations(const char *spectrans)
|
||||
{
|
||||
except_trans.clear();
|
||||
@ -14615,4 +14615,4 @@ void unac_set_except_translations(const char *spectrans)
|
||||
free(out);
|
||||
}
|
||||
}
|
||||
#endif /* RECOLL_DATADIR */
|
||||
#endif /* BUILDING_RECOLL */
|
||||
|
||||
@ -1 +0,0 @@
|
||||
unac.c
|
||||
1
src/unac/unac.cpp
Normal file
1
src/unac/unac.cpp
Normal file
@ -0,0 +1 @@
|
||||
#include "unac.c"
|
||||
@ -114,7 +114,7 @@ int fold_string(const char* charset,
|
||||
/* To be called before starting threads in mt programs */
|
||||
void unac_init_mt();
|
||||
|
||||
#ifdef RECOLL_DATADIR
|
||||
#ifdef BUILDING_RECOLL
|
||||
#include <string>
|
||||
/**
|
||||
* Set exceptions for unaccenting, for characters which should not be
|
||||
@ -128,7 +128,7 @@ void unac_init_mt();
|
||||
* can't be an exception character, deal with it...
|
||||
*/
|
||||
void unac_set_except_translations(const char *spectrans);
|
||||
#endif /* RECOLL_DATADIR */
|
||||
#endif /* BUILDING_RECOLL */
|
||||
|
||||
/*
|
||||
* Return unac version number.
|
||||
|
||||
@ -109,8 +109,8 @@ trfileudi.o : fileudi.cpp fileudi.h
|
||||
EXECMD_OBJS= trexecmd.o
|
||||
trexecmd : $(EXECMD_OBJS)
|
||||
$(CXX) $(ALL_CXXFLAGS) -o trexecmd $(EXECMD_OBJS) $(LIBRECOLL)
|
||||
trexecmd.o : execmd.cpp execmd.h
|
||||
$(CXX) -o trexecmd.o -c $(ALL_CXXFLAGS) -DTEST_EXECMD execmd.cpp
|
||||
trexecmd.o : trexecmd.cpp execmd.h
|
||||
$(CXX) -o trexecmd.o -c $(ALL_CXXFLAGS) -I../xaposix trexecmd.cpp
|
||||
|
||||
TRANSCODE_OBJS= trtranscode.o
|
||||
transcode : $(TRANSCODE_OBJS)
|
||||
|
||||
@ -217,7 +217,7 @@ void base64_encode(const string &in, string &out)
|
||||
|
||||
out.clear();
|
||||
|
||||
int srclength = in.length();
|
||||
string::size_type srclength = in.length();
|
||||
int sidx = 0;
|
||||
while (2 < srclength) {
|
||||
input[0] = in[sidx++];
|
||||
@ -244,7 +244,7 @@ void base64_encode(const string &in, string &out)
|
||||
if (0 != srclength) {
|
||||
/* Get what's left. */
|
||||
input[0] = input[1] = input[2] = '\0';
|
||||
for (int i = 0; i < srclength; i++)
|
||||
for (string::size_type i = 0; i < srclength; i++)
|
||||
input[i] = in[sidx++];
|
||||
|
||||
output[0] = input[0] >> 2;
|
||||
|
||||
@ -184,7 +184,7 @@ public:
|
||||
// Offset of last write (newest header)
|
||||
off_t m_nheadoffs;
|
||||
// Pad size for newest entry.
|
||||
int m_npadsize;
|
||||
off_t m_npadsize;
|
||||
// Keep history or only last entry
|
||||
bool m_uniquentries;
|
||||
///////////////////// End header entries
|
||||
@ -956,10 +956,10 @@ bool CirCache::erase(const string& udi)
|
||||
// entry.
|
||||
class CCScanHookSpacer : public CCScanHook {
|
||||
public:
|
||||
UINT sizewanted;
|
||||
UINT sizeseen;
|
||||
off_t sizewanted;
|
||||
off_t sizeseen;
|
||||
vector<pair<string, off_t> > squashed_udis;
|
||||
CCScanHookSpacer(int sz)
|
||||
CCScanHookSpacer(off_t sz)
|
||||
: sizewanted(sz), sizeseen(0) {assert(sz > 0);}
|
||||
|
||||
virtual status takeone(off_t offs, const string& udi,
|
||||
@ -1009,14 +1009,14 @@ bool CirCache::put(const string& udi, const ConfSimple *iconf,
|
||||
|
||||
// Data compression ?
|
||||
const char *datap = data.c_str();
|
||||
unsigned int datalen = data.size();
|
||||
size_t datalen = data.size();
|
||||
unsigned short flags = 0;
|
||||
TempBuf compbuf;
|
||||
if (!(iflags & NoCompHint)) {
|
||||
ULONG len = compressBound(data.size());
|
||||
uLong len = compressBound(static_cast<uLong>(data.size()));
|
||||
char *bf = compbuf.setsize(len);
|
||||
if (bf != 0 &&
|
||||
compress((Bytef*)bf, &len, (Bytef*)data.c_str(), data.size())
|
||||
compress((Bytef*)bf, &len, (Bytef*)data.c_str(), static_cast<uLong>(data.size()))
|
||||
== Z_OK) {
|
||||
if (float(len) < 0.9 * float(data.size())) {
|
||||
// bf is local but it's our static buffer address
|
||||
@ -1034,16 +1034,16 @@ bool CirCache::put(const string& udi, const ConfSimple *iconf,
|
||||
}
|
||||
|
||||
// Characteristics for the new entry.
|
||||
int nsize = CIRCACHE_HEADER_SIZE + dic.size() + datalen;
|
||||
int nwriteoffs = m_d->m_oheadoffs;
|
||||
int npadsize = 0;
|
||||
off_t nsize = CIRCACHE_HEADER_SIZE + dic.size() + datalen;
|
||||
off_t nwriteoffs = m_d->m_oheadoffs;
|
||||
off_t npadsize = 0;
|
||||
bool extending = false;
|
||||
|
||||
LOGDEB(("CirCache::put: nsz %d oheadoffs %d\n", nsize, m_d->m_oheadoffs));
|
||||
|
||||
// Check if we can recover some pad space from the (physically) previous
|
||||
// entry.
|
||||
int recovpadsize = m_d->m_oheadoffs == CIRCACHE_FIRSTBLOCK_SIZE ?
|
||||
off_t recovpadsize = m_d->m_oheadoffs == CIRCACHE_FIRSTBLOCK_SIZE ?
|
||||
0 : m_d->m_npadsize;
|
||||
if (recovpadsize != 0) {
|
||||
// Need to read the latest entry's header, to rewrite it with a
|
||||
@ -1082,7 +1082,7 @@ bool CirCache::put(const string& udi, const ConfSimple *iconf,
|
||||
} else {
|
||||
// Scan the file until we have enough space for the new entry,
|
||||
// and determine the pad size up to the 1st preserved entry
|
||||
int scansize = nsize - recovpadsize;
|
||||
off_t scansize = nsize - recovpadsize;
|
||||
LOGDEB(("CirCache::put: scanning for size %d from offs %u\n",
|
||||
scansize, (UINT)m_d->m_oheadoffs));
|
||||
CCScanHookSpacer spacer(scansize);
|
||||
|
||||
@ -14,9 +14,7 @@
|
||||
* Free Software Foundation, Inc.,
|
||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
#include "autoconfig.h"
|
||||
|
||||
#ifndef TEST_CONFTREE
|
||||
|
||||
@ -70,7 +68,7 @@ void ConfSimple::parseinput(istream &input)
|
||||
}
|
||||
|
||||
{
|
||||
int ll = strlen(cline);
|
||||
size_t ll = strlen(cline);
|
||||
while (ll > 0 && (cline[ll-1] == '\n' || cline[ll-1] == '\r')) {
|
||||
cline[ll-1] = 0;
|
||||
ll--;
|
||||
@ -576,8 +574,8 @@ bool ConfSimple::hasNameAnywhere(const string& nm) const
|
||||
int ConfTree::get(const std::string &name, string &value, const string &sk)
|
||||
const
|
||||
{
|
||||
if (sk.empty() || sk[0] != '/') {
|
||||
// LOGDEB((stderr, "ConfTree::get: looking in global space\n"));
|
||||
if (sk.empty() || !path_isabsolute(sk) ) {
|
||||
// LOGDEB((stderr, "ConfTree::get: looking in global space for sk [%s]\n", sk.c_str()));
|
||||
return ConfSimple::get(name, value, sk);
|
||||
}
|
||||
|
||||
@ -590,15 +588,21 @@ int ConfTree::get(const std::string &name, string &value, const string &sk)
|
||||
|
||||
// Look in subkey and up its parents until root ('')
|
||||
for (;;) {
|
||||
// LOGDEB((stderr,"ConfTree::get: looking for '%s' in '%s'\n",
|
||||
// name.c_str(), msk.c_str()));
|
||||
// LOGDEB((stderr,"ConfTree::get: looking for '%s' in '%s'\n",
|
||||
// name.c_str(), msk.c_str()));
|
||||
if (ConfSimple::get(name, value, msk))
|
||||
return 1;
|
||||
string::size_type pos = msk.rfind("/");
|
||||
if (pos != string::npos) {
|
||||
msk.replace(pos, string::npos, string());
|
||||
} else
|
||||
} else {
|
||||
#ifdef _WIN32
|
||||
if (msk.size() == 2 && isalpha(msk[0]) && msk[1] == ':')
|
||||
msk.clear();
|
||||
else
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -15,14 +15,18 @@
|
||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
#ifndef TEST_COPYFILE
|
||||
#include "autoconfig.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
#include "safefcntl.h"
|
||||
#include <sys/types.h>
|
||||
#include "safesysstat.h"
|
||||
#include "safeunistd.h"
|
||||
#ifndef _WIN32
|
||||
#include <sys/time.h>
|
||||
#include <utime.h>
|
||||
#endif
|
||||
|
||||
#include <cstring>
|
||||
|
||||
@ -43,7 +47,7 @@ bool copyfile(const char *src, const char *dst, string &reason, int flags)
|
||||
|
||||
LOGDEB(("copyfile: %s to %s\n", src, dst));
|
||||
|
||||
if ((sfd = ::open(src, O_RDONLY)) < 0) {
|
||||
if ((sfd = ::open(src, O_RDONLY, 0)) < 0) {
|
||||
reason += string("open ") + src + ": " + strerror(errno);
|
||||
goto out;
|
||||
}
|
||||
@ -149,6 +153,7 @@ bool renameormove(const char *src, const char *dst, string &reason)
|
||||
return false;
|
||||
}
|
||||
|
||||
#ifndef _WIN32
|
||||
// Try to preserve modes, owner, times. This may fail for a number
|
||||
// of reasons
|
||||
if ((st1.st_mode & 0777) != (st.st_mode & 0777)) {
|
||||
@ -167,7 +172,7 @@ bool renameormove(const char *src, const char *dst, string &reason)
|
||||
times[1].tv_sec = st.st_mtime;
|
||||
times[1].tv_usec = 0;
|
||||
utimes(dst, times);
|
||||
|
||||
#endif
|
||||
// All ok, get rid of origin
|
||||
if (unlink(src) < 0) {
|
||||
reason += string("Can't unlink ") + src + "Error : " + strerror(errno);
|
||||
|
||||
@ -62,7 +62,6 @@ bool getCpuConf(CpuConf& cpus)
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#else // TEST_CPUCONF
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user