Merged the changes from the current windows port

This commit is contained in:
Jean-Francois Dockes 2015-09-25 16:14:27 +02:00
commit 5330685ec1
149 changed files with 10645 additions and 1575 deletions

View File

@ -11,6 +11,16 @@ libtool
*.lo
*~
\#*
*.obj
*.sdf
*.tlog
*.lib
*.idb
*.log
*.pdb
.vs
*.exe
*.ilk
ptrans
src/aclocal.m4
src/compile
@ -82,12 +92,7 @@ src/qtgui/recoll
src/qtgui/recoll.app
src/qtgui/recoll.pro
src/query/alldeps
src/query/location.hh
src/query/position.hh
src/query/recollq
src/query/stack.hh
src/query/wasaparse.cpp
src/query/wasaparse.hpp
src/sampleconf/rclmon.sh
src/sampleconf/recoll.conf
src/utils/alldeps

View File

@ -22,7 +22,8 @@ COMMONCPPFLAGS = -I. \
-I$(top_srcdir)/rcldb \
-I$(top_srcdir)/unac \
-I$(top_srcdir)/utils \
-I$(top_srcdir)/xaposix
-I$(top_srcdir)/xaposix \
-DBUILDING_RECOLL
AM_CPPFLAGS = -Wall -Wno-unused \
$(COMMONCPPFLAGS) \

View File

@ -47,7 +47,7 @@ BincStream::~BincStream(void)
}
//------------------------------------------------------------------------
string BincStream::popString(unsigned int size)
string BincStream::popString(std::string::size_type size)
{
if (size > nstr.length())
size = nstr.length();

View File

@ -25,6 +25,7 @@
*/
#ifndef convert_h_included
#define convert_h_included
#include <stddef.h>
#include <string>
#include <vector>
#include <iomanip>
@ -93,7 +94,7 @@ namespace Binc {
std::string tmp;
for (std::string::const_iterator i = s.begin();
i != s.end() && i + 1 != s.end(); i += 2) {
int n;
ptrdiff_t n;
unsigned char c = *i;
unsigned char d = *(i + 1);
@ -122,7 +123,7 @@ namespace Binc {
for (std::string::const_iterator i = s_in.begin(); i != s_in.end(); ++i) {
unsigned char c = (unsigned char)*i;
if (c <= 31 || c >= 127 || c == '\"' || c == '\\')
return "{" + toString(s_in.length()) + "}\r\n" + s_in;
return "{" + toString((unsigned long)s_in.length()) + "}\r\n" + s_in;
}
return "\"" + s_in + "\"";
@ -145,7 +146,7 @@ namespace Binc {
//----------------------------------------------------------------------
inline void chomp(std::string &s_in, const std::string &chars = " \t\r\n")
{
int n = s_in.length();
std::string::size_type n = s_in.length();
while (n > 1 && chars.find(s_in[n - 1]) != std::string::npos)
s_in.resize(n-- - 1);
}
@ -290,7 +291,7 @@ namespace Binc {
BincStream &operator << (char t);
//--
std::string popString(unsigned int size);
std::string popString(std::string::size_type size);
//--
char popChar(void);

View File

@ -25,7 +25,7 @@
*/
#ifndef mime_inputsource_h_included
#define mime_inputsource_h_included
#include "autoconfig.h"
// Data source for MIME parser
// Note about large files: we might want to change the unsigned int
@ -49,7 +49,7 @@ namespace Binc {
inline MimeInputSource(int fd, unsigned int start = 0);
virtual inline ~MimeInputSource(void);
virtual inline size_t fillRaw(char *raw, size_t nbytes);
virtual inline ssize_t fillRaw(char *raw, size_t nbytes);
virtual inline void reset(void);
virtual inline bool fillInputBuffer(void);
@ -87,7 +87,7 @@ namespace Binc {
{
}
inline size_t MimeInputSource::fillRaw(char *raw, size_t nbytes)
inline ssize_t MimeInputSource::fillRaw(char *raw, size_t nbytes)
{
return read(fd, raw, nbytes);
}
@ -179,7 +179,7 @@ namespace Binc {
class MimeInputSourceStream : public MimeInputSource {
public:
inline MimeInputSourceStream(istream& s, unsigned int start = 0);
virtual inline size_t fillRaw(char *raw, size_t nb);
virtual inline ssize_t fillRaw(char *raw, size_t nb);
virtual inline void reset(void);
private:
istream& s;
@ -191,7 +191,7 @@ namespace Binc {
{
}
inline size_t MimeInputSourceStream::fillRaw(char *raw, size_t nb)
inline ssize_t MimeInputSourceStream::fillRaw(char *raw, size_t nb)
{
// Why can't streams tell how many characters were actually read
// when hitting eof ?
@ -199,16 +199,16 @@ namespace Binc {
s.seekg(0, ios::end);
std::streampos lst = s.tellg();
s.seekg(st);
size_t nbytes = lst - st;
size_t nbytes = size_t(lst - st);
if (nbytes > nb) {
nbytes = nb;
}
if (nbytes <= 0) {
return (size_t)-1;
return (ssize_t)-1;
}
s.read(raw, nbytes);
return nbytes;
return static_cast<ssize_t>(nbytes);
}
inline void MimeInputSourceStream::reset(void)

View File

@ -306,9 +306,9 @@ void Binc::MimePart::parseMessageRFC822(vector<Binc::MimePart> *members,
bool Binc::MimePart::skipUntilBoundary(const string &delimiter,
unsigned int *nlines, bool *eof)
{
int endpos = delimiter.length();
string::size_type endpos = delimiter.length();
char *delimiterqueue = 0;
int delimiterpos = 0;
string::size_type delimiterpos = 0;
const char *delimiterStr = delimiter.c_str();
if (delimiter != "") {
delimiterqueue = new char[endpos];
@ -340,7 +340,7 @@ bool Binc::MimePart::skipUntilBoundary(const string &delimiter,
delimiterpos = 0;
if (compareStringToQueue(delimiterStr, delimiterqueue,
delimiterpos, endpos)) {
delimiterpos, int(endpos))) {
foundBoundary = true;
break;
}
@ -451,7 +451,7 @@ void Binc::MimePart::parseMultipart(const string &boundary,
skipUntilBoundary(delimiter, nlines, eof);
if (!eof)
*boundarysize = delimiter.size();
*boundarysize = int(delimiter.size());
postBoundaryProcessing(eof, nlines, boundarysize, foundendofpart);
@ -484,7 +484,7 @@ void Binc::MimePart::parseMultipart(const string &boundary,
skipUntilBoundary(delimiter, nlines, eof);
if (!*eof)
*boundarysize = delimiter.size();
*boundarysize = int(delimiter.size());
postBoundaryProcessing(eof, nlines, boundarysize, foundendofpart);
}
@ -528,7 +528,7 @@ void Binc::MimePart::parseSinglePart(const string &toboundary,
// *boundarysize = _toboundary.length();
char *boundaryqueue = 0;
int endpos = _toboundary.length();
size_t endpos = _toboundary.length();
if (toboundary != "") {
boundaryqueue = new char[endpos];
memset(boundaryqueue, 0, endpos);
@ -540,7 +540,7 @@ void Binc::MimePart::parseSinglePart(const string &toboundary,
string line;
bool toboundaryIsEmpty = (toboundary == "");
char c;
int boundarypos = 0;
string::size_type boundarypos = 0;
while (mimeSource->getChar(&c)) {
if (c == '\n') { ++*nbodylines; ++*nlines; }
@ -553,8 +553,8 @@ void Binc::MimePart::parseSinglePart(const string &toboundary,
boundarypos = 0;
if (compareStringToQueue(_toboundaryStr, boundaryqueue,
boundarypos, endpos)) {
*boundarysize = _toboundary.length();
boundarypos, int(endpos))) {
*boundarysize = static_cast<int>(_toboundary.length());
break;
}
}

View File

@ -119,7 +119,7 @@ int Binc::MimePart::doParseOnlyHeader(MimeInputSource *ms,
if (c == '\n') ++nlines;
if (c == ':') break;
if (c == '\n') {
for (int i = name.length() - 1; i >= 0; --i)
for (string::size_type i = name.length() - 1; i >= 0; --i)
mimeSource->ungetChar();
quit = true;

186
src/common/autoconfig-win.h Normal file
View File

@ -0,0 +1,186 @@
/* Manually edited version of autoconfig.h for windows. Many things are
overriden in the c++ code by ifdefs _WIN32 anyway */
#ifndef _AUTOCONFIG_H_INCLUDED
#define _AUTOCONFIG_H_INCLUDED
/* Define if building universal (internal helper macro) */
/* #undef AC_APPLE_UNIVERSAL_BUILD */
/* Path to the aspell api include file */
/* #undef ASPELL_INCLUDE "aspell-local.h" */
/* Path to the aspell program */
/* #define ASPELL_PROG "/usr/bin/aspell" */
/* No X11 session monitoring support */
#define DISABLE_X11MON
/* Path to the fam api include file */
/* #undef FAM_INCLUDE */
/* Path to the file program */
#define FILE_PROG "/usr/bin/file"
/* "Have C++0x" */
#undef HAVE_CXX0X_UNORDERED
/* Define to 1 if you have the <dlfcn.h> header file. */
#define HAVE_DLFCN_H 1
/* Define to 1 if you have the <inttypes.h> header file. */
#define HAVE_INTTYPES_H 1
/* Define to 1 if you have the `dl' library (-ldl). */
#define HAVE_LIBDL 1
/* Define to 1 if you have the `pthread' library (-lpthread). */
#define HAVE_LIBPTHREAD 1
/* Define to 1 if you have the `z' library (-lz). */
#define HAVE_LIBZ 1
/* Define to 1 if you have the <memory.h> header file. */
#define HAVE_MEMORY_H 1
/* Define to 1 if you have the `mkdtemp' function. */
/* #undef HAVE_MKDTEMP */
/* Define to 1 if you have the `posix_spawn,' function. */
/* #undef HAVE_POSIX_SPAWN_ */
/* Define to 1 if you have the `setrlimit' function. */
#define HAVE_SETRLIMIT 1
/* Has std::shared_ptr */
#define HAVE_SHARED_PTR_STD
/* Has std::tr1::shared_ptr */
/* #undef HAVE_SHARED_PTR_TR1 */
/* Define to 1 if you have the <spawn.h> header file. */
#define HAVE_SPAWN_H 1
/* Define to 1 if you have the <stdint.h> header file. */
#define HAVE_STDINT_H 1
/* Define to 1 if you have the <stdlib.h> header file. */
#define HAVE_STDLIB_H 1
/* Define to 1 if you have the <strings.h> header file. */
#define HAVE_STRINGS_H 1
/* Define to 1 if you have the <string.h> header file. */
#define HAVE_STRING_H 1
/* Define to 1 if you have the <sys/mount.h> header file. */
/* #undef HAVE_SYS_MOUNT_H */
/* Define to 1 if you have the <sys/param.h,> header file. */
/* #undef HAVE_SYS_PARAM_H_ */
/* Define to 1 if you have the <sys/statfs.h> header file. */
/* #undef HAVE_SYS_STATFS_H */
/* Define to 1 if you have the <sys/statvfs.h> header file. */
/* #undef HAVE_SYS_STATVFS_H */
/* Define to 1 if you have the <sys/stat.h> header file. */
#define HAVE_SYS_STAT_H 1
/* Define to 1 if you have the <sys/types.h> header file. */
#define HAVE_SYS_TYPES_H 1
/* Define to 1 if you have the <sys/vfs.h> header file. */
/* #undef HAVE_SYS_VFS_H */
/* "Have tr1" */
/* #undef HAVE_TR1_UNORDERED */
/* Define to 1 if you have the <unistd.h> header file. */
/* #undef HAVE_UNISTD_H */
/* Use multiple threads for indexing */
#define IDX_THREADS 1
/* Define to the sub-directory in which libtool stores uninstalled libraries.
*/
#define LT_OBJDIR ".libs/"
/* Define to the address where bug reports for this package should be sent. */
#define PACKAGE_BUGREPORT ""
/* Define to the full name of this package. */
#define PACKAGE_NAME "Recoll"
/* Define to the full name and version of this package. */
#define PACKAGE_STRING "Recoll 1.22.0"
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "recoll"
/* Define to the home page for this package. */
#define PACKAGE_URL ""
/* Define to the version of this package. */
#define PACKAGE_VERSION "1.22.0"
/* putenv parameter is const */
/* #undef PUTENV_ARG_CONST */
/* iconv parameter 2 is const char** */
#define RCL_ICONV_INBUF_CONST 1
/* Real time monitoring option */
#undef RCL_MONITOR
/* Split camelCase words */
/* #undef RCL_SPLIT_CAMELCASE */
/* Compile the aspell interface */
/* #undef RCL_USE_ASPELL */
/* Compile the fam interface */
/* #undef RCL_USE_FAM */
/* Compile the inotify interface */
#define RCL_USE_INOTIFY 1
/* Define to 1 if you have the ANSI C header files. */
#define STDC_HEADERS 1
/* Use posix_spawn() */
/* #undef USE_POSIX_SPAWN */
/* Enable using the system's 'file' command to id mime if we fail internally
*/
/* #undef USE_SYSTEM_FILE_COMMAND */
/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
significant byte first (like Motorola and SPARC, unlike Intel). */
#if defined AC_APPLE_UNIVERSAL_BUILD
# if defined __BIG_ENDIAN__
# define WORDS_BIGENDIAN 1
# endif
#else
# ifndef WORDS_BIGENDIAN
/* # undef WORDS_BIGENDIAN */
# endif
#endif
/* Define to 1 if the X Window System is missing or not being used. */
/* #undef X_DISPLAY_MISSING */
/* Enable large inode numbers on Mac OS X 10.5. */
#ifndef _DARWIN_USE_64_BIT_INODE
# define _DARWIN_USE_64_BIT_INODE 1
#endif
/* Number of bits in a file offset, on hosts where this is settable. */
/* #undef _FILE_OFFSET_BITS */
/* Define for large files, on AIX-style hosts. */
/* #undef _LARGE_FILES */
#define DISABLE_WEB_INDEXER
#include "conf_post.h"
#endif // already included

View File

@ -35,7 +35,7 @@ BeagleQueueCache::BeagleQueueCache(RclConfig *cnf)
ccdir = "webcache";
ccdir = path_tildexpand(ccdir);
// If not an absolute path, compute relative to config dir
if (ccdir.at(0) != '/')
if (!path_isabsolute(ccdir))
ccdir = path_cat(cnf->getConfDir(), ccdir);
int maxmbs = 40;

View File

@ -26,3 +26,42 @@
# define STD_SHARED_PTR RefCntr
#endif
#ifdef _WIN32
#include "safewindows.h"
typedef int pid_t;
inline int readlink(const char *cp, void *buf, int cnt) {
return -1;
}
#define HAVE_STRUCT_TIMESPEC
#define strdup _strdup
#define timegm _mkgmtime
#ifdef _MSC_VER
// gmtime is supposedly thread-safe on windows
#define gmtime_r(A, B) gmtime(A)
#define localtime_r(A,B) localtime(A)
#define PATH_MAX MAX_PATH
#define MAXPATHLEN PATH_MAX
typedef int mode_t;
#endif
typedef DWORD32 u_int32_t;
typedef DWORD64 u_int64_t;
typedef unsigned __int8 u_int8_t;
typedef int ssize_t;
#define strncasecmp _strnicmp
#define strcasecmp _stricmp
#define ftruncate _chsize_s
#define chdir _chdir
#define R_OK 4
#define W_OK 2
#define X_OK 4
#define RECOLL_DATADIR "C:\\recoll\\"
#define S_ISLNK(X) false
#define lstat stat
#define fseeko _fseeki64
#define ftello (off_t)_ftelli64
#define timegm _mkgmtime
#endif

View File

@ -19,11 +19,13 @@
#include <stdio.h>
#include <errno.h>
#ifndef _WIN32
#include <langinfo.h>
#include <sys/param.h>
#endif
#include <limits.h>
#include "safesysstat.h"
#include "safeunistd.h"
#include <sys/param.h>
#ifdef __FreeBSD__
#include <osreldate.h>
#endif
@ -45,6 +47,7 @@
#include "readfile.h"
#include "fstreewalk.h"
#include "cpuconf.h"
#include "execmd.h"
using namespace std;
@ -120,7 +123,8 @@ void RclConfig::zeroMe() {
bool RclConfig::isDefaultConfig() const
{
string defaultconf = path_cat(path_canon(path_home()), ".recoll/");
string defaultconf = path_cat(path_homedata(),
path_defaultrecollconfsubdir());
string specifiedconf = path_canon(m_confdir);
path_catslash(specifiedconf);
return !defaultconf.compare(specifiedconf);
@ -146,14 +150,7 @@ RclConfig::RclConfig(const string *argcnf)
}
// Compute our data dir name, typically /usr/local/share/recoll
const char *cdatadir = getenv("RECOLL_DATADIR");
if (cdatadir == 0) {
// If not in environment, use the compiled-in constant.
m_datadir = RECOLL_DATADIR;
} else {
m_datadir = cdatadir;
}
m_datadir = path_sharedatadir();
// We only do the automatic configuration creation thing for the default
// config dir, not if it was specified through -c or RECOLL_CONFDIR
bool autoconfdir = false;
@ -172,7 +169,7 @@ RclConfig::RclConfig(const string *argcnf)
m_confdir = path_canon(cp);
} else {
autoconfdir = true;
m_confdir = path_cat(path_home(), ".recoll/");
m_confdir = path_cat(path_homedata(), path_defaultrecollconfsubdir());
}
}
@ -200,6 +197,7 @@ RclConfig::RclConfig(const string *argcnf)
// is called from the main thread at once, by constructing a config
// from recollinit
if (o_localecharset.empty()) {
#ifndef _WIN32
const char *cp;
cp = nl_langinfo(CODESET);
// We don't keep US-ASCII. It's better to use a superset
@ -217,6 +215,9 @@ RclConfig::RclConfig(const string *argcnf)
// Use cp1252 instead of iso-8859-1, it's a superset.
o_localecharset = string(cstr_cp1252);
}
#else
o_localecharset = "UTF-8";
#endif
LOGDEB1(("RclConfig::getDefCharset: localecharset [%s]\n",
o_localecharset.c_str()));
}
@ -635,7 +636,7 @@ bool RclConfig::inStopSuffixes(const string& fni)
it != stoplist.end(); it++) {
STOPSUFFIXES->insert(SfString(stringtolower(*it)));
if (m_maxsufflen < it->length())
m_maxsufflen = it->length();
m_maxsufflen = int(it->length());
}
}
@ -1154,7 +1155,7 @@ string RclConfig::getConfdirPath(const char *varname, const char *dflt) const
} else {
result = path_tildexpand(result);
// If not an absolute path, compute relative to config dir
if (result.at(0) != '/') {
if (!path_isabsolute(result)) {
result = path_cat(getConfDir(), result);
}
}
@ -1212,7 +1213,7 @@ void RclConfig::urlrewrite(const string& dbdir, string& url) const
// This call always succeeds because the key comes from getNames()
if (m_ptrans->get(*it, npath, dbdir)) {
path = path.replace(0, it->size(), npath);
url = "file://" + path;
url = path_pathtofileurl(path);
}
break;
}
@ -1305,45 +1306,45 @@ vector<string> RclConfig::getDaemSkippedPaths() const
}
// Look up an executable filter. We look in $RECOLL_FILTERSDIR,
// filtersdir in config file, then let the system use the PATH
// Look up an executable filter. We add $RECOLL_FILTERSDIR,
// and filtersdir from the config file to the PATH, then use execmd::which()
string RclConfig::findFilter(const string &icmd) const
{
// If the path is absolute, this is it
if (icmd[0] == '/')
if (path_isabsolute(icmd))
return icmd;
string cmd;
const char *cp;
const char *cp = getenv("PATH");
if (!cp) //??
cp = "";
string PATH(cp);
// Filters dir from environment ?
// For historical reasons: check in personal config directory
PATH = getConfDir() + path_PATHsep() + PATH;
string temp;
// Prepend $datadir/filters
temp = path_cat(m_datadir, "filters");
PATH = temp + path_PATHsep() + PATH;
// Prepend possible configuration parameter?
if (getConfParam(string("filtersdir"), temp)) {
temp = path_tildexpand(temp);
PATH = temp + path_PATHsep() + PATH;
}
// Prepend possible environment variable
if ((cp = getenv("RECOLL_FILTERSDIR"))) {
cmd = path_cat(cp, icmd);
if (access(cmd.c_str(), X_OK) == 0)
return cmd;
}
// Filters dir as configuration parameter?
if (getConfParam(string("filtersdir"), cmd)) {
cmd = path_cat(cmd, icmd);
if (access(cmd.c_str(), X_OK) == 0)
return cmd;
PATH = string(cp) + path_PATHsep() + PATH;
}
// Filters dir as datadir subdir. Actually the standard case, but
// this is normally the same value found in config file (previous step)
cmd = path_cat(m_datadir, "filters");
cmd = path_cat(cmd, icmd);
if (access(cmd.c_str(), X_OK) == 0)
return cmd;
// Last resort for historical reasons: check in personal config
// directory
cmd = path_cat(getConfDir(), icmd);
if (access(cmd.c_str(), X_OK) == 0)
return cmd;
// Let the shell try to find it...
return icmd;
string cmd;
if (ExecCmd::which(icmd, cmd, PATH.c_str())) {
return cmd;
} else {
// Let the shell try to find it...
return icmd;
}
}
/**

View File

@ -17,6 +17,9 @@
#include "autoconfig.h"
#include <stdio.h>
#ifdef _WIN32
#include "safewindows.h"
#endif
#include <signal.h>
#include <locale.h>
#include <pthread.h>
@ -33,8 +36,6 @@
#include "smallut.h"
#include "execmd.h"
static const int catchedSigs[] = {SIGINT, SIGQUIT, SIGTERM, SIGUSR1, SIGUSR2};
static pthread_t mainthread_id;
static void siglogreopen(int)
@ -43,23 +44,17 @@ static void siglogreopen(int)
DebugLog::reopen();
}
RclConfig *recollinit(RclInitFlags flags,
void (*cleanup)(void), void (*sigcleanup)(int),
string &reason, const string *argcnf)
#ifndef _WIN32
// We would like to block SIGCHLD globally, but we can't because
// QT uses it. Have to block it inside execmd.cpp
static const int catchedSigs[] = {SIGINT, SIGQUIT, SIGTERM, SIGUSR1, SIGUSR2};
void initAsyncSigs(void (*sigcleanup)(int))
{
if (cleanup)
atexit(cleanup);
// We ignore SIGPIPE always. All pieces of code which can write to a pipe
// must check write() return values.
#ifndef _WIN32
signal(SIGPIPE, SIG_IGN);
// Make sure the locale is set. This is only for converting file names
// to utf8 for indexing.
setlocale(LC_CTYPE, "");
// We would like to block SIGCHLD globally, but we can't because
// QT uses it. Have to block it inside execmd.cpp
#endif
// Install app signal handler
if (sigcleanup) {
@ -75,11 +70,91 @@ RclConfig *recollinit(RclInitFlags flags,
}
}
// Install log rotate sig handler
{
struct sigaction action;
action.sa_handler = siglogreopen;
action.sa_flags = 0;
sigemptyset(&action.sa_mask);
if (signal(SIGHUP, SIG_IGN) != SIG_IGN) {
if (sigaction(SIGHUP, &action, 0) < 0) {
perror("Sigaction failed");
}
}
}
}
#else
// Windows signals etc.
//
// ^C can be caught by the signal() emulation, but not ^Break
// apparently, which is why we use the native approach too
//
// When a keyboard interrupt occurs, windows creates a thread inside
// the process and calls the handler. The process exits when the
// handler returns or after at most 10S
//
// In practise, only recollindex sets sigcleanup(), and the routine
// just sets a global termination flag. So we just call it and sleep,
// hoping that cleanup does not take more than what Windows will let
// us live.
static void (*l_sigcleanup)(int);
static BOOL WINAPI CtrlHandler(DWORD fdwCtrlType)
{
if (l_sigcleanup == 0)
return FALSE;
switch(fdwCtrlType) {
case CTRL_C_EVENT:
case CTRL_CLOSE_EVENT:
case CTRL_BREAK_EVENT:
case CTRL_LOGOFF_EVENT:
case CTRL_SHUTDOWN_EVENT:
l_sigcleanup(SIGINT);
Sleep(10000);
return TRUE;
default:
return FALSE;
}
}
static const int catchedSigs[] = {SIGINT, SIGTERM};
void initAsyncSigs(void (*sigcleanup)(int))
{
// Install app signal handler
if (sigcleanup) {
l_sigcleanup = sigcleanup;
for (unsigned int i = 0; i < sizeof(catchedSigs) / sizeof(int); i++) {
if (signal(catchedSigs[i], SIG_IGN) != SIG_IGN) {
signal(catchedSigs[i], sigcleanup);
}
}
}
SetConsoleCtrlHandler((PHANDLER_ROUTINE)CtrlHandler, TRUE);
}
#endif
RclConfig *recollinit(RclInitFlags flags,
void (*cleanup)(void), void (*sigcleanup)(int),
string &reason, const string *argcnf)
{
if (cleanup)
atexit(cleanup);
// Make sure the locale is set. This is only for converting file names
// to utf8 for indexing.
setlocale(LC_CTYPE, "");
DebugLog::getdbl()->setloglevel(DEBDEB1);
DebugLog::setfilename("stderr");
if (getenv("RECOLL_LOGDATE"))
DebugLog::getdbl()->logdate(1);
initAsyncSigs(sigcleanup);
RclConfig *config = new RclConfig(argcnf);
if (!config || !config->ok()) {
reason = "Configuration could not be built:\n";
@ -105,7 +180,7 @@ RclConfig *recollinit(RclInitFlags flags,
if (!logfilename.empty()) {
logfilename = path_tildexpand(logfilename);
// If not an absolute path or , compute relative to config dir
if (logfilename.at(0) != '/' &&
if (!path_isabsolute(logfilename) &&
!DebugLog::DebugLog::isspecialname(logfilename.c_str())) {
logfilename = path_cat(config->getConfDir(), logfilename);
}
@ -115,18 +190,6 @@ RclConfig *recollinit(RclInitFlags flags,
int lev = atoi(loglevel.c_str());
DebugLog::getdbl()->setloglevel(lev);
}
// Install log rotate sig handler
{
struct sigaction action;
action.sa_handler = siglogreopen;
action.sa_flags = 0;
sigemptyset(&action.sa_mask);
if (signal(SIGHUP, SIG_IGN) != SIG_IGN) {
if (sigaction(SIGHUP, &action, 0) < 0) {
perror("Sigaction failed");
}
}
}
// Make sure the locale charset is initialized (so that multiple
// threads don't try to do it at once).
@ -139,14 +202,20 @@ RclConfig *recollinit(RclInitFlags flags,
// Init smallut and pathut static values
pathut_init_mt();
smallut_init_mt();
// Init execmd.h static PATH and PATHELT splitting
{string bogus;
ExecCmd::which("nosuchcmd", bogus);
}
// Init Unac translation exceptions
string unacex;
if (config->getConfParam("unac_except_trans", unacex) && !unacex.empty())
unac_set_except_translations(unacex.c_str());
#ifndef IDX_THREADS
#ifndef _WIN32
ExecCmd::useVfork(true);
#endif
#else
// Keep threads init behind log init, but make sure it's done before
// we do the vfork choice ! The latter is not used any more actually,
@ -156,11 +225,15 @@ RclConfig *recollinit(RclInitFlags flags,
bool novfork;
config->getConfParam("novfork", &novfork);
if (novfork) {
#ifndef _WIN32
LOGDEB0(("rclinit: will use fork() for starting commands\n"));
ExecCmd::useVfork(false);
#endif
} else {
#ifndef _WIN32
LOGDEB0(("rclinit: will use vfork() for starting commands\n"));
ExecCmd::useVfork(true);
#endif
}
#endif
@ -179,10 +252,11 @@ RclConfig *recollinit(RclInitFlags flags,
return config;
}
// Signals are handled by the main thread. All others should call this routine
// to block possible signals
// Signals are handled by the main thread. All others should call this
// routine to block possible signals
void recoll_threadinit()
{
#ifndef _WIN32
sigset_t sset;
sigemptyset(&sset);
@ -190,6 +264,14 @@ void recoll_threadinit()
sigaddset(&sset, catchedSigs[i]);
sigaddset(&sset, SIGHUP);
pthread_sigmask(SIG_BLOCK, &sset, 0);
#else
// Not sure that this is needed at all or correct under windows.
for (unsigned int i = 0; i < sizeof(catchedSigs) / sizeof(int); i++) {
if (signal(catchedSigs[i], SIG_IGN) != SIG_IGN) {
signal(catchedSigs[i], SIG_IGN);
}
}
#endif
}
bool recoll_ismainthread()

View File

@ -110,7 +110,7 @@ bool SynGroups::setfile(const string& fn)
lnum++;
{
int ll = strlen(cline);
size_t ll = strlen(cline);
while (ll > 0 && (cline[ll-1] == '\n' || cline[ll-1] == '\r')) {
cline[ll-1] = 0;
ll--;

View File

@ -142,6 +142,8 @@ static inline int whatcc(unsigned int c)
} else {
vector<unsigned int>::iterator it =
lower_bound(vpuncblocks.begin(), vpuncblocks.end(), c);
if (it == vpuncblocks.end())
return LETTER;
if (c == *it)
return SPACE;
if ((it - vpuncblocks.begin()) % 2 == 1) {
@ -217,11 +219,11 @@ bool TextSplit::o_deHyphenate = false;
// Final term checkpoint: do some checking (the kind which is simpler
// to do here than in the main loop), then send term to our client.
inline bool TextSplit::emitterm(bool isspan, string &w, int pos,
int btstart, int btend)
size_t btstart, size_t btend)
{
LOGDEB2(("TextSplit::emitterm: [%s] pos %d\n", w.c_str(), pos));
unsigned int l = w.length();
int l = int(w.length());
#ifdef TEXTSPLIT_STATS
// Update word length statistics. Do this before we filter out
@ -230,7 +232,7 @@ inline bool TextSplit::emitterm(bool isspan, string &w, int pos,
m_stats.newsamp(m_wordChars);
#endif
if (l > 0 && l < (unsigned)m_maxWordLength) {
if (l > 0 && l < m_maxWordLength) {
// 1 byte word: we index single ascii letters and digits, but
// nothing else. We might want to turn this into a test for a
// single utf8 character instead ?
@ -245,9 +247,9 @@ inline bool TextSplit::emitterm(bool isspan, string &w, int pos,
}
}
if (pos != m_prevpos || l != m_prevlen) {
bool ret = takeword(w, pos, btstart, btend);
bool ret = takeword(w, pos, int(btstart), int(btend));
m_prevpos = pos;
m_prevlen = w.length();
m_prevlen = int(w.length());
return ret;
}
LOGDEB2(("TextSplit::emitterm:dup: [%s] pos %d\n", w.c_str(), pos));
@ -293,7 +295,7 @@ bool TextSplit::span_is_acronym(string *acronym)
// Generate terms from span. Have to take into account the
// flags: ONLYSPANS, NOSPANS, noNumbers
bool TextSplit::words_from_span(int bp)
bool TextSplit::words_from_span(size_t bp)
{
#if 0
cerr << "Span: [" << m_span << "] " << " w_i_s size: " <<
@ -305,10 +307,10 @@ bool TextSplit::words_from_span(int bp)
}
cerr << endl;
#endif
unsigned int spanwords = m_words_in_span.size();
int spanwords = int(m_words_in_span.size());
int pos = m_spanpos;
// Byte position of the span start
int spboffs = bp - m_span.size();
size_t spboffs = bp - m_span.size();
if (o_deHyphenate && spanwords == 2 &&
m_span[m_words_in_span[0].second] == '-') {
@ -322,13 +324,13 @@ bool TextSplit::words_from_span(int bp)
m_spanpos, spboffs, spboffs + m_words_in_span[1].second);
}
for (unsigned int i = 0;
for (int i = 0;
i < ((m_flags&TXTS_ONLYSPANS) ? 1 : spanwords);
i++, pos++) {
int deb = m_words_in_span[i].first;
for (unsigned int j = ((m_flags&TXTS_ONLYSPANS) ? spanwords-1 : i);
for (int j = ((m_flags&TXTS_ONLYSPANS) ? spanwords-1 : i);
j < ((m_flags&TXTS_NOSPANS) ? i+1 : spanwords);
j++) {
@ -362,11 +364,11 @@ bool TextSplit::words_from_span(int bp)
* @param spanerase Set if the current span is at its end. Process it.
* @param bp The current BYTE position in the stream
*/
inline bool TextSplit::doemit(bool spanerase, int bp)
inline bool TextSplit::doemit(bool spanerase, size_t bp)
{
LOGDEB2(("TextSplit::doemit: sper %d bp %d spp %d spanwords %u wS %d wL %d "
"inn %d span [%s]\n",
spanerase, bp, m_spanpos, m_words_in_span.size(),
spanerase, int(bp), m_spanpos, m_words_in_span.size(),
m_wordStart, m_wordLen, m_inNumber, m_span.c_str()));
if (m_wordLen) {
@ -404,8 +406,8 @@ inline bool TextSplit::doemit(bool spanerase, int bp)
case '\'':
m_span.resize(m_span.length()-1);
if (m_words_in_span.size() &&
m_words_in_span.back().second > m_span.size())
m_words_in_span.back().second = m_span.size();
m_words_in_span.back().second > int(m_span.size()))
m_words_in_span.back().second = int(m_span.size());
if (--bp < 0)
bp = 0;
break;
@ -422,7 +424,7 @@ inline bool TextSplit::doemit(bool spanerase, int bp)
} else {
m_wordStart = m_span.length();
m_wordStart = int(m_span.length());
}
@ -830,16 +832,16 @@ bool TextSplit::cjk_to_words(Utf8Iter *itp, unsigned int *cp)
}
// Take note of byte offset for this character.
boffs[nchars-1] = it.getBpos();
boffs[nchars-1] = int(it.getBpos());
// Output all new ngrams: they begin at each existing position
// and end after the new character. onlyspans->only output
// maximum words, nospans=> single chars
if (!(m_flags & TXTS_ONLYSPANS) || nchars == o_CJKNgramLen) {
unsigned int btend = it.getBpos() + it.getBlen();
unsigned int loopbeg = (m_flags & TXTS_NOSPANS) ? nchars-1 : 0;
unsigned int loopend = (m_flags & TXTS_ONLYSPANS) ? 1 : nchars;
for (unsigned int i = loopbeg; i < loopend; i++) {
int btend = int(it.getBpos() + it.getBlen());
int loopbeg = (m_flags & TXTS_NOSPANS) ? nchars-1 : 0;
int loopend = (m_flags & TXTS_ONLYSPANS) ? 1 : nchars;
for (int i = loopbeg; i < loopend; i++) {
if (!takeword(it.buffer().substr(boffs[i],
btend-boffs[i]),
m_wordpos - (nchars-i-1), boffs[i], btend)) {
@ -860,7 +862,7 @@ bool TextSplit::cjk_to_words(Utf8Iter *itp, unsigned int *cp)
// If onlyspans is set, there may be things to flush in the buffer
// first
if ((m_flags & TXTS_ONLYSPANS) && nchars > 0 && nchars != o_CJKNgramLen) {
unsigned int btend = it.getBpos(); // Current char is out
int btend = int(it.getBpos()); // Current char is out
if (!takeword(it.buffer().substr(boffs[0], btend-boffs[0]),
m_wordpos - nchars,
boffs[0], btend)) {

View File

@ -184,7 +184,7 @@ private:
// Current span. Might be jf.dockes@wanadoo.f
std::string m_span;
std::vector <std::pair<unsigned int, unsigned int> > m_words_in_span;
std::vector <std::pair<int, int> > m_words_in_span;
// Current word: no punctuation at all in there. Byte offset
// relative to the current span and byte length
@ -201,7 +201,7 @@ private:
// It may happen that our cleanup would result in emitting the
// same term twice. We try to avoid this
int m_prevpos;
unsigned int m_prevlen;
int m_prevlen;
#ifdef TEXTSPLIT_STATS
// Stats counters. These are processed in TextSplit rather than by a
@ -215,11 +215,11 @@ private:
// This processes cjk text:
bool cjk_to_words(Utf8Iter *it, unsigned int *cp);
bool emitterm(bool isspan, std::string &term, int pos, int bs, int be);
bool doemit(bool spanerase, int bp);
bool emitterm(bool isspan, std::string &term, int pos, size_t bs,size_t be);
bool doemit(bool spanerase, size_t bp);
void discardspan();
bool span_is_acronym(std::string *acronym);
bool words_from_span(int bp);
bool words_from_span(size_t bp);
};
#endif /* _TEXTSPLIT_H_INCLUDED_ */

View File

@ -408,13 +408,13 @@ AC_ARG_ENABLE(recollq,
need for Qt). This is done by default if --disable-qtgui is set but this
option enables forcing it.]),
enableRECOLLQ=$enableval, enableRECOLLQ="no")
if text X"$enableRECOLLQ" != X ; then
if test X"$enableRECOLLQ" != X ; then
AM_CONDITIONAL(MAKECMDLINE, [test X$enableRECOLLQ = Xyes])
else
AM_CONDITIONAL(MAKECMDLINE, [test X$enableQT = Xno])
fi
if test X$enableQT = Xyes ; then
if test X$QTDIR != X ; then
@ -586,6 +586,5 @@ AC_SUBST(RCLLIBVERSION)
AC_CONFIG_FILES(Makefile)
AC_CONFIG_FILES(common/rclversion.h)
AC_CONFIG_FILES(python/recoll/setup.py)
AC_CONFIG_FILES(sampleconf/recoll.conf)
AC_OUTPUT

165
src/filters/rcldoc.py Executable file
View File

@ -0,0 +1,165 @@
#!/usr/bin/env python
import rclexecm
import rclexec1
import re
import sys
import os
# Processing the output from antiword: create html header and tail, process
# continuation lines escape, HTML special characters, accumulate the data.
class WordProcessData:
def __init__(self, em):
self.em = em
self.out = ""
self.cont = ""
self.gotdata = False
# Line with continued word (ending in -)
# we strip the - which is not nice for actually hyphenated word.
# What to do ?
self.patcont = re.compile('''[\w][-]$''')
# Pattern for breaking continuation at last word start
self.patws = re.compile('''([\s])([\w]+)(-)$''')
def takeLine(self, line):
if not self.gotdata:
if line == "":
return
self.out = '<html><head><title></title>' + \
'<meta http-equiv="Content-Type"' + \
'content="text/html;charset=UTF-8">' + \
'</head><body><p>'
self.gotdata = True
if self.cont:
line = self.cont + line
self.cont = ""
if line == "\f":
self.out += "</p><hr><p>"
return
if self.patcont.search(line):
# Break at last whitespace
match = self.patws.search(line)
if match:
self.cont = line[match.start(2):match.end(2)]
line = line[0:match.start(1)]
else:
self.cont = line
line = ""
if line:
self.out += self.em.htmlescape(line) + "<br>"
else:
self.out += "<br>"
def wrapData(self):
if self.gotdata:
self.out += "</p></body></html>"
self.em.setmimetype("text/html")
return self.out
# Null data accumulator. We use this when antiword has fail, and the
# data actually comes from rclrtf, rcltext or vwWare, which all
# output HTML
class WordPassData:
def __init__(self, em):
self.out = ""
self.em = em
def takeLine(self, line):
self.out += line
def wrapData(self):
self.em.setmimetype("text/html")
return self.out
# Filter for msword docs. Try antiword, and if this fails, check for
# an rtf or text document (.doc are sometimes like this...). Also try
# vwWare if the doc is actually a word doc
class WordFilter:
def __init__(self, em, td):
self.em = em
self.ntry = 0
self.execdir = td
def reset(self):
self.ntry = 0
def hasControlChars(self, data):
for c in data:
if c < chr(32) and c != '\n' and c != '\t' and \
c != '\f' and c != '\r':
return True
return False
def mimetype(self, fn):
rtfprolog ="{\\rtf1"
docprolog = b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1"
try:
f = open(fn, "rb")
except:
return ""
data = f.read(100)
if data[0:6] == rtfprolog:
return "text/rtf"
elif data[0:8] == docprolog:
return "application/msword"
elif self.hasControlChars(data):
return "application/octet-stream"
else:
return "text/plain"
def getCmd(self, fn):
'''Return command to execute, and postprocessor, according to
our state: first try antiword, then others depending on mime
identification. Do 2 tries at most'''
if self.ntry == 0:
self.ntry = 1
cmd = rclexecm.which("antiword")
if cmd:
return ([cmd, "-t", "-i", "1", "-m", "UTF-8"],
WordProcessData(self.em))
else:
return ([],None)
elif self.ntry == 1:
self.ntry = 2
# antiword failed. Check for an rtf file, or text and
# process accordingly. It the doc is actually msword, try
# wvWare.
mt = self.mimetype(fn)
self.em.rclog("rcldoc.py: actual MIME type %s" % mt)
if mt == "text/plain":
return ([python, os.path.join(self.execdir, "rcltext.py")],
WordPassData(self.em))
elif mt == "text/rtf":
cmd = ["python", os.path.join(self.execdir, "rclrtf.py"),
"-s"]
self.em.rclog("rcldoc.py: returning cmd %s" % cmd)
return (cmd, WordPassData(self.em))
elif mt == "application/msword":
cmd = rclexecm.which("wvWare")
if cmd:
return ([cmd, "--nographics", "--charset=utf-8"],
WordPassData(self.em))
else:
return ([],None)
else:
return ([],None)
else:
return ([],None)
if __name__ == '__main__':
# Remember where we execute filters from, in case we need to exec another
execdir = os.path.dirname(sys.argv[0])
# Check that we have antiword. We could fallback to wvWare, but
# this is not what the old filter did.
if not rclexecm.which("antiword"):
print("RECFILTERROR HELPERNOTFOUND antiword")
sys.exit(1)
proto = rclexecm.RclExecM()
filter = WordFilter(proto, execdir)
extract = rclexec1.Executor(proto, filter)
rclexecm.main(proto, extract)

112
src/filters/rclexec1.py Normal file
View File

@ -0,0 +1,112 @@
#################################
# Copyright (C) 2014 J.F.Dockes
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
########################################################
# Common code for replacing the old shell scripts with Python execm
# ones: this implements the basic functions for a filter which
# executes a command to translate a simple file (like rclword with
# antiword).
#
# This was motivated by the Windows port: to replace shell and Unix
# utility (awk , etc usage). We can't just execute python scripts,
# this would be to slow. So this helps implementing a permanent script
# to repeatedly execute single commands.
import subprocess
import rclexecm
# This class has the code to execute the subprocess and call a
# data-specific post-processor. Command and processor are supplied by
# the object which we receive as a parameter, which in turn is defined
# in the actual executable filter (e.g. rcldoc.py)
class Executor:
opt_ignxval = 1
def __init__(self, em, flt):
self.em = em
self.flt = flt
self.currentindex = 0
def runCmd(self, cmd, filename, postproc, opt):
''' Substitute parameters and execute command, process output
with the specific postprocessor and return the complete text.
We expect cmd as a list of command name + arguments'''
try:
fullcmd = cmd + [filename]
proc = subprocess.Popen(fullcmd,
stdout = subprocess.PIPE)
stdout = proc.stdout
except subprocess.CalledProcessError as err:
self.em.rclog("extractone: Popen(%s) error: %s" % (fullcmd, err))
return (False, "")
except OSError as err:
self.em.rclog("extractone: Popen(%s) OS error: %s" % (fullcmd, err))
return (False, "")
for line in stdout:
postproc.takeLine(line.strip())
proc.wait()
if (opt & self.opt_ignxval) == 0 and proc.returncode:
self.em.rclog("extractone: [%s] returncode %d" % \
(filename, proc.returncode))
return False, postproc.wrapData()
else:
return True, postproc.wrapData()
def extractone(self, params):
#self.em.rclog("extractone %s %s" % (params["filename:"], \
# params["mimetype:"]))
self.flt.reset()
ok = False
if not params.has_key("filename:"):
self.em.rclog("extractone: no mime or file name")
return (ok, "", "", rclexecm.RclExecM.eofnow)
fn = params["filename:"]
while True:
cmdseq = self.flt.getCmd(fn)
cmd = cmdseq[0]
postproc = cmdseq[1]
opt = cmdseq[2] if len(cmdseq) == 3 else 0
if cmd:
ok, data = self.runCmd(cmd, fn, postproc, opt)
if ok:
break
else:
break
if ok:
return (ok, data, "", rclexecm.RclExecM.eofnext)
else:
return (ok, "", "", rclexecm.RclExecM.eofnow)
###### File type handler api, used by rclexecm ---------->
def openfile(self, params):
self.currentindex = 0
return True
def getipath(self, params):
return self.extractone(params)
def getnext(self, params):
if self.currentindex >= 1:
return (False, "", "", rclexecm.RclExecM.eofnow)
else:
ret= self.extractone(params)
self.currentindex += 1
return ret

View File

@ -1,10 +1,34 @@
#!/usr/bin/env python
#################################
# Copyright (C) 2014 J.F.Dockes
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
########################################################
## Recoll multifilter communication module and utilities
from __future__ import print_function
###########################################
## Generic recoll multifilter communication code
import sys
import os
import tempfile
import shutil
import getopt
############################################
# RclExecM implements the
# communication protocol with the recollindex process. It calls the
# object specific of the document type to actually get the data.
class RclExecM:
noteof = 0
eofnext = 1
@ -27,9 +51,13 @@ class RclExecM:
else:
self.maxmembersize = 50 * 1024
self.maxmembersize = self.maxmembersize * 1024
if sys.platform == "win32":
import msvcrt
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
msvcrt.setmode(sys.stdin.fileno(), os.O_BINARY)
def rclog(self, s, doexit = 0, exitvalue = 1):
print >> sys.stderr, "RCLMFILT:", self.myname, ":", s
print("RCLMFILT: %s: %s" % (self.myname, s), file=sys.stderr)
if doexit:
sys.exit(exitvalue)
@ -87,29 +115,29 @@ class RclExecM:
self.rclog("GOT UNICODE for ipath [%s]" % (ipath,))
docdata = docdata.encode("UTF-8")
print "Document:", len(docdata)
print("Document: %d" % len(docdata))
sys.stdout.write(docdata)
if len(ipath):
print "Ipath:", len(ipath)
print("Ipath: %d" % len(ipath))
sys.stdout.write(ipath)
if len(self.mimetype):
print "Mimetype:", len(self.mimetype)
print("Mimetype: %d" % len(self.mimetype))
sys.stdout.write(self.mimetype)
# If we're at the end of the contents, say so
if iseof == RclExecM.eofnow:
print "Eofnow: 0"
print("Eofnow: 0")
elif iseof == RclExecM.eofnext:
print "Eofnext: 0"
print("Eofnext: 0")
if iserror == RclExecM.subdocerror:
print "Subdocerror: 0"
print("Subdocerror: 0")
elif iserror == RclExecM.fileerror:
print "Fileerror: 0"
print("Fileerror: 0")
# End of message
print
print()
sys.stdout.flush()
#self.rclog("done writing data")
@ -168,67 +196,161 @@ class RclExecM:
self.processmessage(processor, params)
# Helper routine to test for program accessibility
def which(program):
def is_exe(fpath):
return os.path.exists(fpath) and os.access(fpath, os.X_OK)
def ext_candidates(fpath):
yield fpath
for ext in os.environ.get("PATHEXT", "").split(os.pathsep):
yield fpath + ext
def path_candidates():
yield os.path.dirname(sys.argv[0])
for path in os.environ["PATH"].split(os.pathsep):
yield path
fpath, fname = os.path.split(program)
if fpath:
if is_exe(program):
return program
else:
for path in path_candidates():
exe_file = os.path.join(path, program)
for candidate in ext_candidates(exe_file):
if is_exe(candidate):
return candidate
return None
# Temp dir helper
class SafeTmpDir:
def __init__(self, em):
self.em = em
self.toptmp = ""
self.tmpdir = ""
def __del__(self):
try:
if self.toptmp:
shutil.rmtree(self.tmpdir, True)
os.rmdir(self.toptmp)
except Exception as err:
self.em.rclog("delete dir failed for " + self.toptmp)
def getpath(self):
if not self.tmpdir:
envrcltmp = os.getenv('RECOLL_TMPDIR')
if envrcltmp:
self.toptmp = tempfile.mkdtemp(prefix='rcltmp', dir=envrcltmp)
else:
self.toptmp = tempfile.mkdtemp(prefix='rcltmp')
self.tmpdir = os.path.join(self.toptmp, 'rclsofftmp')
os.makedirs(self.tmpdir)
return self.tmpdir
# Common main routine for all python execm filters: either run the
# normal protocol engine or a local loop to test without recollindex
def main(proto, extract):
if len(sys.argv) == 1:
proto.mainloop(extract)
else:
# Got a file name parameter: TESTING without an execm parent
# Loop on all entries or get specific ipath
def mimetype_with_file(f):
cmd = 'file -i "' + f + '"'
fileout = os.popen(cmd).read()
lst = fileout.split(':')
mimetype = lst[len(lst)-1].strip()
lst = mimetype.split(';')
return lst[0].strip()
def mimetype_with_xdg(f):
cmd = 'xdg-mime query filetype "' + f + '"'
return os.popen(cmd).read().strip()
params = {'filename:': sys.argv[1]}
# Some filters (e.g. rclaudio) need/get a MIME type from the indexer
mimetype = mimetype_with_xdg(sys.argv[1])
params['mimetype:'] = mimetype
if not extract.openfile(params):
print "Open error"
sys.exit(1)
ipath = ""
if len(sys.argv) == 3:
ipath = sys.argv[2]
# mainloop does not return. Just in case
sys.exit(1)
if ipath != "":
params['ipath:'] = ipath
ok, data, ipath, eof = extract.getipath(params)
if ok:
print "== Found entry for ipath %s (mimetype [%s]):" % \
(ipath, proto.mimetype)
if isinstance(data, unicode):
bdata = data.encode("UTF-8")
else:
bdata = data
# Not running the main loop: either acting as single filter (when called
# from other filter for example), or debugging
def usage():
print("Usage: rclexecm.py [-d] [-s] [-i ipath] [filename]",
file=sys.stderr)
sys.exit(1)
actAsSingle = False
debugDumpData = False
ipath = ""
args = sys.argv[1:]
opts, args = getopt.getopt(args, "hdsi:")
for opt, arg in opts:
if opt in ['-h']:
usage()
elif opt in ['-s']:
actAsSingle = True
elif opt in ['-i']:
ipath = arg
elif opt in ['-d']:
debugDumpData = True
else:
print("unknown option %s\n"%opt, file=sys.stderr)
usage()
if len(args) != 1:
usage()
def mimetype_with_file(f):
cmd = 'file -i "' + f + '"'
fileout = os.popen(cmd).read()
lst = fileout.split(':')
mimetype = lst[len(lst)-1].strip()
lst = mimetype.split(';')
return lst[0].strip()
def mimetype_with_xdg(f):
cmd = 'xdg-mime query filetype "' + f + '"'
return os.popen(cmd).read().strip()
def debprint(s):
if not actAsSingle:
print(s)
params = {'filename:': args[0]}
# Some filters (e.g. rclaudio) need/get a MIME type from the indexer
mimetype = mimetype_with_xdg(args[0])
params['mimetype:'] = mimetype
if not extract.openfile(params):
print("Open error", file=sys.stderr)
sys.exit(1)
if ipath != "" or actAsSingle:
params['ipath:'] = ipath
ok, data, ipath, eof = extract.getipath(params)
if ok:
debprint("== Found entry for ipath %s (mimetype [%s]):" % \
(ipath, proto.mimetype))
if isinstance(data, unicode):
bdata = data.encode("UTF-8")
else:
bdata = data
if debugDumpData or actAsSingle:
sys.stdout.write(bdata)
print
else:
print "Got error, eof %d"%eof
print()
sys.exit(0)
else:
print("Got error, eof %d"%eof, file=sys.stderr)
sys.exit(1)
ecnt = 0
while 1:
ok, data, ipath, eof = extract.getnext(params)
if ok:
ecnt = ecnt + 1
print "== Entry %d ipath %s (mimetype [%s]):" % \
(ecnt, ipath, proto.mimetype)
if isinstance(data, unicode):
bdata = data.encode("UTF-8")
else:
bdata = data
#sys.stdout.write(bdata)
print
if eof != RclExecM.noteof:
break
ecnt = 0
while 1:
ok, data, ipath, eof = extract.getnext(params)
if ok:
ecnt = ecnt + 1
debprint("== Entry %d ipath %s (mimetype [%s]):" % \
(ecnt, ipath, proto.mimetype))
if isinstance(data, unicode):
bdata = data.encode("UTF-8")
else:
print "Not ok, eof %d" % eof
break
bdata = data
if debugDumpData:
sys.stdout.write(bdata)
print()
if eof != RclExecM.noteof:
sys.exit(0)
else:
print("Not ok, eof %d" % eof, file=sys.stderr)
sys.exit(1)
# Not sure this makes sense, but going on looping certainly does not
if actAsSingle:
sys.exit(0)

View File

@ -147,6 +147,9 @@ if ($@) {
exit(1);
}
binmode(STDIN) || die "cannot binmode STDIN";
binmode(STDOUT) || die "cannot binmode STDOUT";
#print STDERR "RCLIMG: Starting\n";
$| = 1;
while (1) {

View File

@ -109,7 +109,7 @@ class European8859TextClassifier:
if __name__ == "__main__":
f = open(sys.argv[1])
f = open(sys.argv[1], "rb")
rawtext = f.read()
f.close()

View File

@ -1,9 +0,0 @@
#!/bin/sh
# It may make sense in some cases to set this null filter (no output)
# instead of using recoll_noindex or leaving the default filter in
# case one doesn't want to install it: this will avoid endless retries
# to reindex the affected files, as recoll will think it has succeeded
# indexing them. Downside: the files won't be indexed when one
# actually installs the real filter, will need a -z
exit 0

224
src/filters/rclopxml.py Executable file
View File

@ -0,0 +1,224 @@
#!/usr/bin/env python
# Copyright (C) 2015 J.F.Dockes
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
######################################
from __future__ import print_function
import sys
import rclexecm
import rclxslt
import fnmatch
from zipfile import ZipFile
meta_stylesheet = '''<?xml version="1.0"?>
<xsl:stylesheet
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:dcterms="http://purl.org/dc/terms/"
xmlns:dcmitype="http://purl.org/dc/dcmitype/"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<!-- <xsl:output method="text"/> -->
<xsl:output omit-xml-declaration="yes"/>
<xsl:template match="cp:coreProperties">
<xsl:text>&#10;</xsl:text>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
<xsl:text>&#10;</xsl:text>
<xsl:apply-templates/>
</xsl:template>
<xsl:template match="dc:creator">
<meta>
<xsl:attribute name="name">
<!-- <xsl:value-of select="name()"/> pour sortir tous les meta avec
le meme nom que dans le xml (si on devenait dc-natif) -->
<xsl:text>author</xsl:text>
</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta>
<xsl:text>&#10;</xsl:text>
</xsl:template>
<xsl:template match="dcterms:modified">
<meta>
<xsl:attribute name="name">
<xsl:text>date</xsl:text>
</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta>
<xsl:text>&#10;</xsl:text>
</xsl:template>
<xsl:template match="*">
</xsl:template>
</xsl:stylesheet>
'''
word_tagmatch = 'w:p'
word_xmlns_decls = '''xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
xmlns:ve="http://schemas.openxmlformats.org/markup-compatibility/2006"
xmlns:o="urn:schemas-microsoft-com:office:office"
xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math"
xmlns:v="urn:schemas-microsoft-com:vml"
xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
xmlns:w10="urn:schemas-microsoft-com:office:word"
xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml"
'''
word_moretemplates = ''
xl_tagmatch = 'x:t'
xl_xmlns_decls='''xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
xmlns:x="http://schemas.openxmlformats.org/spreadsheetml/2006/main"
'''
xl_moretemplates = ''
pp_tagmatch = 'a:t'
pp_xmlns_decls = '''xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"
xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main"
'''
# I want to suppress text output for all except a:t, don't know how to do it
# help ! At least get rid of these:
pp_moretemplates = '''<xsl:template match="p:attrName">
</xsl:template>
'''
content_stylesheet = '''<?xml version="1.0"?>
<xsl:stylesheet @XMLNS_DECLS@ >
<xsl:output omit-xml-declaration="yes"/>
<xsl:template match="/">
<div>
<xsl:apply-templates/>
</div>
</xsl:template>
<xsl:template match="@TAGMATCH@">
<p>
<xsl:value-of select="."/>
</p>
</xsl:template>
@MORETEMPLATES@
</xsl:stylesheet>
'''
class OXExtractor:
def __init__(self, em):
self.em = em
self.currentindex = 0
# Replace values inside data style sheet, depending on type of doc
def computestylesheet(self, nm):
decls = globals()[nm + '_xmlns_decls']
stylesheet = content_stylesheet.replace('@XMLNS_DECLS@', decls)
tagmatch = globals()[nm + '_tagmatch']
stylesheet = stylesheet.replace('@TAGMATCH@', tagmatch)
moretmpl = globals()[nm + '_moretemplates']
stylesheet = stylesheet.replace('@MORETEMPLATES@', moretmpl)
return stylesheet
def extractone(self, params):
if not params.has_key("filename:"):
self.em.rclog("extractone: no mime or file name")
return (False, "", "", rclexecm.RclExecM.eofnow)
fn = params["filename:"]
try:
zip = ZipFile(fn)
except Exception as err:
self.em.rclog("unzip failed: " + str(err))
return (False, "", "", rclexecm.RclExecM.eofnow)
docdata = '<html><head>'
try:
metadata = zip.read("docProps/core.xml")
if metadata:
res = rclxslt.apply_sheet_data(meta_stylesheet, metadata)
docdata += res
except:
# To be checked. I'm under the impression that I get this when
# nothing matches?
# self.em.rclog("no/bad metadata in %s" % fn)
pass
docdata += '</head><body>'
try:
content= zip.read('word/document.xml')
stl = self.computestylesheet('word')
docdata += rclxslt.apply_sheet_data(stl, content)
except:
pass
try:
content = zip.read('xl/sharedStrings.xml')
stl = self.computestylesheet('xl')
docdata += rclxslt.apply_sheet_data(stl, content)
except:
pass
try:
stl = self.computestylesheet('pp')
# Note that we'd need a numeric sort really (else we get slide1
# slide11 slide2)
for fn in sorted(zip.namelist()):
if fnmatch.fnmatch(fn, 'ppt/slides/slide*.xml'):
content = zip.read(fn)
docdata += rclxslt.apply_sheet_data(stl, content)
except:
pass
docdata += '</body></html>'
return (True, docdata, "", rclexecm.RclExecM.eofnext)
###### File type handler api, used by rclexecm ---------->
def openfile(self, params):
self.currentindex = 0
return True
def getipath(self, params):
return self.extractone(params)
def getnext(self, params):
if self.currentindex >= 1:
return (False, "", "", rclexecm.RclExecM.eofnow)
else:
ret= self.extractone(params)
self.currentindex += 1
return ret
if __name__ == '__main__':
proto = rclexecm.RclExecM()
extract = OXExtractor(proto)
rclexecm.main(proto, extract)

58
src/filters/rclppt.py Executable file
View File

@ -0,0 +1,58 @@
#!/usr/bin/env python
import rclexecm
import rclexec1
import re
import sys
import os
# Processing the output from unrtf
class PPTProcessData:
def __init__(self, em):
self.em = em
self.out = ""
self.gotdata = 0
# Some versions of unrtf put out a garbled charset line.
# Apart from this, we pass the data untouched.
def takeLine(self, line):
if not self.gotdata:
self.out += '''<html><head>''' + \
'''<meta http-equiv="Content-Type" ''' + \
'''content="text/html;charset=UTF-8">''' + \
'''</head><body><pre>'''
self.gotdata = True
self.out += self.em.htmlescape(line)
def wrapData(self):
return self.out + '''</pre></body></html>'''
class PPTFilter:
def __init__(self, em):
self.em = em
self.ntry = 0
def reset(self):
self.ntry = 0
pass
def getCmd(self, fn):
if self.ntry:
return ([], None)
self.ntry = 1
cmd = rclexecm.which("ppt-dump.py")
if cmd:
# ppt-dump.py often exits 1 with valid data. Ignore exit value
return (["python", cmd, "--no-struct-output", "--dump-text"],
PPTProcessData(self.em), rclexec1.Executor.opt_ignxval)
else:
return ([], None)
if __name__ == '__main__':
if not rclexecm.which("ppt-dump.py"):
print("RECFILTERROR HELPERNOTFOUND ppt-dump.py")
sys.exit(1)
proto = rclexecm.RclExecM()
filter = PPTFilter(proto)
extract = rclexec1.Executor(proto, filter)
rclexecm.main(proto, extract)

60
src/filters/rclrtf.py Executable file
View File

@ -0,0 +1,60 @@
#!/usr/bin/env python
import rclexecm
import rclexec1
import re
import sys
import os
# Processing the output from unrtf
class RTFProcessData:
def __init__(self, em):
self.em = em
self.out = ""
self.gothead = 0
self.patendhead = re.compile('''</head>''')
self.patcharset = re.compile('''^<meta http-equiv=''')
# Some versions of unrtf put out a garbled charset line.
# Apart from this, we pass the data untouched.
def takeLine(self, line):
if not self.gothead:
if self.patendhead.search(line):
self.out += '<meta http-equiv="Content-Type" ' + \
'content="text/html;charset=UTF-8">' + "\n"
self.out += line + "\n"
self.gothead = 1
elif not self.patcharset.search(line):
self.out += line + "\n"
else:
self.out += line + "\n"
def wrapData(self):
return self.out
class RTFFilter:
def __init__(self, em):
self.em = em
self.ntry = 0
def reset(self):
self.ntry = 0
def getCmd(self, fn):
if self.ntry:
return ([], None)
self.ntry = 1
cmd = rclexecm.which("unrtf")
if cmd:
return ([cmd, "--nopict", "--html"], RTFProcessData(self.em))
else:
return ([], None)
if __name__ == '__main__':
if not rclexecm.which("unrtf"):
print("RECFILTERROR HELPERNOTFOUND antiword")
sys.exit(1)
proto = rclexecm.RclExecM()
filter = RTFFilter(proto)
extract = rclexec1.Executor(proto, filter)
rclexecm.main(proto, extract)

189
src/filters/rclsoff.py Executable file
View File

@ -0,0 +1,189 @@
#!/usr/bin/env python
# Copyright (C) 2014 J.F.Dockes
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
######################################
import sys
import rclexecm
import rclxslt
from zipfile import ZipFile
stylesheet_meta = '''<?xml version="1.0"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0"
xmlns:ooo="http://openoffice.org/2004/office"
exclude-result-prefixes="office xlink meta ooo dc"
>
<xsl:output method="html" encoding="UTF-8"/>
<xsl:template match="/office:document-meta">
<xsl:apply-templates select="office:meta/dc:description"/>
<xsl:apply-templates select="office:meta/dc:subject"/>
<xsl:apply-templates select="office:meta/dc:title"/>
<xsl:apply-templates select="office:meta/meta:keyword"/>
<xsl:apply-templates select="office:meta/dc:creator"/>
</xsl:template>
<xsl:template match="dc:title">
<title> <xsl:value-of select="."/> </title><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="dc:description">
<meta>
<xsl:attribute name="name">abstract</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="dc:subject">
<meta>
<xsl:attribute name="name">keywords</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="dc:creator">
<meta>
<xsl:attribute name="name">author</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="meta:keyword">
<meta>
<xsl:attribute name="name">keywords</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta><xsl:text>
</xsl:text>
</xsl:template>
</xsl:stylesheet>
'''
stylesheet_content = '''<?xml version="1.0"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0"
exclude-result-prefixes="text"
>
<xsl:output method="html" encoding="UTF-8"/>
<xsl:template match="text:p">
<p><xsl:apply-templates/></p><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="text:h">
<p><xsl:apply-templates/></p><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="text:s">
<xsl:text> </xsl:text>
</xsl:template>
<xsl:template match="text:line-break">
<br />
</xsl:template>
<xsl:template match="text:tab">
<xsl:text> </xsl:text>
</xsl:template>
</xsl:stylesheet>
'''
class OOExtractor:
def __init__(self, em):
self.em = em
self.currentindex = 0
def extractone(self, params):
if not params.has_key("filename:"):
self.em.rclog("extractone: no mime or file name")
return (False, "", "", rclexecm.RclExecM.eofnow)
fn = params["filename:"]
try:
zip = ZipFile(fn)
except Exception as err:
self.em.rclog("unzip failed: " + str(err))
return (False, "", "", rclexecm.RclExecM.eofnow)
docdata = '<html><head><meta http-equiv="Content-Type"' \
'content="text/html; charset=UTF-8"></head><body>'
try:
metadata = zip.read("meta.xml")
if metadata:
res = rclxslt.apply_sheet_data(stylesheet_meta, metadata)
docdata += res
except:
# To be checked. I'm under the impression that I get this when
# nothing matches?
#self.em.rclog("no/bad metadata in %s" % fn)
pass
try:
content = zip.read("content.xml")
if content:
res = rclxslt.apply_sheet_data(stylesheet_content, content)
docdata += res
docdata += '</body></html>'
except Exception as err:
self.em.rclog("bad data in %s" % fn)
return (False, "", "", rclexecm.RclExecM.eofnow)
return (True, docdata, "", rclexecm.RclExecM.eofnext)
###### File type handler api, used by rclexecm ---------->
def openfile(self, params):
self.currentindex = 0
return True
def getipath(self, params):
return self.extractone(params)
def getnext(self, params):
if self.currentindex >= 1:
return (False, "", "", rclexecm.RclExecM.eofnow)
else:
ret= self.extractone(params)
self.currentindex += 1
return ret
if __name__ == '__main__':
proto = rclexecm.RclExecM()
extract = OOExtractor(proto)
rclexecm.main(proto, extract)

140
src/filters/rclsvg.py Executable file
View File

@ -0,0 +1,140 @@
#!/usr/bin/env python
# Copyright (C) 2014 J.F.Dockes
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
######################################
import sys
import rclexecm
import rclxslt
stylesheet_all = '''<?xml version="1.0"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns:dc="http://purl.org/dc/elements/1.1/"
exclude-result-prefixes="svg"
>
<xsl:output method="html" encoding="UTF-8"/>
<xsl:template match="/">
<html>
<head>
<xsl:apply-templates select="svg:svg/svg:title"/>
<xsl:apply-templates select="svg:svg/svg:desc"/>
<xsl:apply-templates select="svg:svg/svg:metadata/descendant::dc:creator"/>
<xsl:apply-templates select="svg:svg/svg:metadata/descendant::dc:subject"/>
<xsl:apply-templates select="svg:svg/svg:metadata/descendant::dc:description"/>
</head>
<body>
<xsl:apply-templates select="//svg:text"/>
</body>
</html>
</xsl:template>
<xsl:template match="svg:desc">
<meta>
<xsl:attribute name="name">keywords</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="dc:creator">
<meta>
<xsl:attribute name="name">author</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="dc:subject">
<meta>
<xsl:attribute name="name">keywords</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="dc:description">
<meta>
<xsl:attribute name="name">description</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="svg:title">
<title><xsl:value-of select="."/></title><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="svg:text">
<p><xsl:value-of select="."/></p><xsl:text>
</xsl:text>
</xsl:template>
</xsl:stylesheet>
'''
class SVGExtractor:
def __init__(self, em):
self.em = em
self.currentindex = 0
def extractone(self, params):
if not params.has_key("filename:"):
self.em.rclog("extractone: no mime or file name")
return (False, "", "", rclexecm.RclExecM.eofnow)
fn = params["filename:"]
try:
data = open(fn, 'rb').read()
docdata = rclxslt.apply_sheet_data(stylesheet_all, data)
except Exception as err:
self.em.rclog("%s: bad data: " % (fn, err))
return (False, "", "", rclexecm.RclExecM.eofnow)
return (True, docdata, "", rclexecm.RclExecM.eofnow)
###### File type handler api, used by rclexecm ---------->
def openfile(self, params):
self.currentindex = 0
return True
def getipath(self, params):
return self.extractone(params)
def getnext(self, params):
if self.currentindex >= 1:
return (False, "", "", rclexecm.RclExecM.eofnow)
else:
ret= self.extractone(params)
self.currentindex += 1
return ret
if __name__ == '__main__':
proto = rclexecm.RclExecM()
extract = SVGExtractor(proto)
rclexecm.main(proto, extract)

51
src/filters/rcltext.py Executable file
View File

@ -0,0 +1,51 @@
#!/usr/bin/env python
import rclexecm
import sys
# Wrapping a text file. Recoll does it internally in most cases, but
# there is a reason this exists, just can't remember it ...
class TxtDump:
def __init__(self, em):
self.em = em
def extractone(self, params):
#self.em.rclog("extractone %s %s" % (params["filename:"], \
#params["mimetype:"]))
if not params.has_key("filename:"):
self.em.rclog("extractone: no file name")
return (False, "", "", rclexecm.RclExecM.eofnow)
fn = params["filename:"]
# No charset, so recoll will have to use its config to guess it
txt = '<html><head><title></title></head><body><pre>'
try:
f = open(fn, "rb")
txt += self.em.htmlescape(f.read())
except Exception as err:
self.em.rclog("TxtDump: %s : %s" % (fn, err))
return (False, "", "", rclexecm.RclExecM.eofnow)
txt += '</pre></body></html>'
return (True, txt, "", rclexecm.RclExecM.eofnext)
###### File type handler api, used by rclexecm ---------->
def openfile(self, params):
self.currentindex = 0
return True
def getipath(self, params):
return self.extractone(params)
def getnext(self, params):
if self.currentindex >= 1:
return (False, "", "", rclexecm.RclExecM.eofnow)
else:
ret= self.extractone(params)
self.currentindex += 1
return ret
if __name__ == '__main__':
proto = rclexecm.RclExecM()
extract = TxtDump(proto)
rclexecm.main(proto, extract)

65
src/filters/rclxls.py Executable file
View File

@ -0,0 +1,65 @@
#!/usr/bin/env python
import rclexecm
import rclexec1
import xlsxmltocsv
import re
import sys
import os
import xml.sax
# Processing the output from unrtf
class XLSProcessData:
def __init__(self, em):
self.em = em
self.out = ""
self.gotdata = 0
self.xmldata = ""
# Some versions of unrtf put out a garbled charset line.
# Apart from this, we pass the data untouched.
def takeLine(self, line):
if not self.gotdata:
self.out += '''<html><head>''' + \
'''<meta http-equiv="Content-Type" ''' + \
'''content="text/html;charset=UTF-8">''' + \
'''</head><body><pre>'''
self.gotdata = True
self.xmldata += line
def wrapData(self):
handler = xlsxmltocsv.XlsXmlHandler()
data = xml.sax.parseString(self.xmldata, handler)
self.out += self.em.htmlescape(handler.output)
return self.out + '''</pre></body></html>'''
class XLSFilter:
def __init__(self, em):
self.em = em
self.ntry = 0
def reset(self):
self.ntry = 0
pass
def getCmd(self, fn):
if self.ntry:
return ([], None)
self.ntry = 1
cmd = rclexecm.which("xls-dump.py")
if cmd:
# xls-dump.py often exits 1 with valid data. Ignore exit value
return (["python", cmd, "--dump-mode=canonical-xml", \
"--utf-8", "--catch"],
XLSProcessData(self.em), rclexec1.Executor.opt_ignxval)
else:
return ([], None)
if __name__ == '__main__':
if not rclexecm.which("ppt-dump.py"):
print("RECFILTERROR HELPERNOTFOUND ppt-dump.py")
sys.exit(1)
proto = rclexecm.RclExecM()
filter = XLSFilter(proto)
extract = rclexec1.Executor(proto, filter)
rclexecm.main(proto, extract)

98
src/filters/rclxml.py Executable file
View File

@ -0,0 +1,98 @@
#!/usr/bin/env python
# Copyright (C) 2014 J.F.Dockes
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
######################################
import sys
import rclexecm
import rclxslt
stylesheet_all = '''<?xml version="1.0"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="html" encoding="UTF-8"/>
<xsl:template match="/">
<html>
<head>
<xsl:if test="//*[local-name() = 'title']">
<title>
<xsl:value-of select="//*[local-name() = 'title'][1]"/>
</title>
</xsl:if>
</head>
<body>
<xsl:apply-templates/>
</body>
</html>
</xsl:template>
<xsl:template match="text()">
<xsl:if test="string-length(normalize-space(.)) &gt; 0">
<p><xsl:value-of select="."/></p>
<xsl:text>
</xsl:text>
</xsl:if>
</xsl:template>
<xsl:template match="*">
<xsl:apply-templates/>
</xsl:template>
</xsl:stylesheet>
'''
class XMLExtractor:
def __init__(self, em):
self.em = em
self.currentindex = 0
def extractone(self, params):
if not params.has_key("filename:"):
self.em.rclog("extractone: no mime or file name")
return (False, "", "", rclexecm.RclExecM.eofnow)
fn = params["filename:"]
try:
data = open(fn, 'rb').read()
docdata = rclxslt.apply_sheet_data(stylesheet_all, data)
except Exception as err:
self.em.rclog("%s: bad data: " % (fn, err))
return (False, "", "", rclexecm.RclExecM.eofnow)
return (True, docdata, "", rclexecm.RclExecM.eofnow)
###### File type handler api, used by rclexecm ---------->
def openfile(self, params):
self.currentindex = 0
return True
def getipath(self, params):
return self.extractone(params)
def getnext(self, params):
if self.currentindex >= 1:
return (False, "", "", rclexecm.RclExecM.eofnow)
else:
ret= self.extractone(params)
self.currentindex += 1
return ret
if __name__ == '__main__':
proto = rclexecm.RclExecM()
extract = XMLExtractor(proto)
rclexecm.main(proto, extract)

52
src/filters/rclxslt.py Normal file
View File

@ -0,0 +1,52 @@
# Copyright (C) 2014 J.F.Dockes
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
######################################
# Helper module for xslt-based filters
import sys
try:
import libxml2
import libxslt
except:
print "RECFILTERROR HELPERNOTFOUND python:libxml2/python:libxslt1"
sys.exit(1);
libxml2.substituteEntitiesDefault(1)
def apply_sheet_data(sheet, data):
styledoc = libxml2.parseMemory(sheet, len(sheet))
style = libxslt.parseStylesheetDoc(styledoc)
doc = libxml2.parseMemory(data, len(data))
result = style.applyStylesheet(doc, None)
res = style.saveResultToString(result)
style.freeStylesheet()
doc.freeDoc()
result.freeDoc()
return res
def apply_sheet_file(sheet, fn):
styledoc = libxml2.parseMemory(sheet, len(sheet))
style = libxslt.parseStylesheetDoc(styledoc)
doc = libxml2.parseFile(fn)
result = style.applyStylesheet(doc, None)
res = style.saveResultToString(result)
style.freeStylesheet()
doc.freeDoc()
result.freeDoc()
return res

View File

@ -15,10 +15,13 @@ else:
dquote = '"'
class XlsXmlHandler(xml.sax.handler.ContentHandler):
def __init__(self):
self.output = ""
def startElement(self, name, attrs):
if name == "worksheet":
if "name" in attrs:
print("%s" % attrs["name"].encode("UTF-8"))
self.output += "%s\n" % attrs["name"].encode("UTF-8")
elif name == "row":
self.cells = dict()
elif name == "label-cell" or name == "number-cell":
@ -30,7 +33,7 @@ class XlsXmlHandler(xml.sax.handler.ContentHandler):
self.cells[int(attrs["col"])] = value
else:
#??
sys.stdout.write("%s%s"%(value.encode("UTF-8"),sepstring))
self.output += "%s%s" % (value.encode("UTF-8"), sepstring)
elif name == "formula-cell":
if "formula-result" in attrs and "col" in attrs:
self.cells[int(attrs["col"])] = \
@ -40,17 +43,21 @@ class XlsXmlHandler(xml.sax.handler.ContentHandler):
if name == "row":
curidx = 0
for idx, value in self.cells.iteritems():
sys.stdout.write(sepstring * (idx - curidx))
sys.stdout.write('%s%s%s' % (dquote, value, dquote))
self.output += sepstring * (idx - curidx)
self.output += "%s%s%s" % (dquote, value, dquote)
curidx = idx
sys.stdout.write("\n")
self.output += "\n"
elif name == "worksheet":
print("")
self.output += "\n"
try:
xml.sax.parse(sys.stdin, XlsXmlHandler())
except BaseException as err:
error("xml-parse: %s\n" % (str(sys.exc_info()[:2]),))
sys.exit(1)
sys.exit(0)
if __name__ == '__main__':
try:
handler = XlsXmlHandler()
xml.sax.parse(sys.stdin, handler)
print(handler.output)
except BaseException as err:
error("xml-parse: %s\n" % (str(sys.exc_info()[:2]),))
sys.exit(1)
sys.exit(0)

View File

@ -14,9 +14,8 @@
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifdef HAVE_CONFIG_H
#include "autoconfig.h"
#endif
#include "debuglog.h"
@ -34,8 +33,10 @@ DocFetcher *docFetcherMake(const Rcl::Doc& idoc)
idoc.getmeta(Rcl::Doc::keybcknd, &backend);
if (backend.empty() || !backend.compare("FS")) {
return new FSDocFetcher;
#ifndef DISABLE_WEB_INDEXER
} else if (!backend.compare("BGL")) {
return new BGLDocFetcher;
#endif
} else {
LOGERR(("DocFetcherFactory: unknown backend [%s]\n", backend.c_str()));
return 0;

View File

@ -43,7 +43,6 @@
#include "fileudi.h"
#include "cancelcheck.h"
#include "rclinit.h"
#include "execmd.h"
#include "extrameta.h"
using namespace std;
@ -145,13 +144,11 @@ FsIndexer::~FsIndexer()
void *status;
if (m_haveInternQ) {
status = m_iwqueue.setTerminateAndWait();
LOGDEB0(("FsIndexer: internfile wrkr status: %ld (1->ok)\n",
long(status)));
LOGDEB0(("FsIndexer: internfile wrkr status: %p (1->ok)\n", status));
}
if (m_haveSplitQ) {
status = m_dwqueue.setTerminateAndWait();
LOGDEB0(("FsIndexer: dbupd worker status: %ld (1->ok)\n",
long(status)));
LOGDEB0(("FsIndexer: dbupd worker status: %p (1->ok)\n", status));
}
delete m_stableconfig;
#endif // IDX_THREADS
@ -259,7 +256,7 @@ static bool matchesSkipped(const vector<string>& tdl,
string canonpath = path_canon(path);
string mpath = canonpath;
string topdir;
while (mpath.length() > 1) {
while (!path_isroot(mpath)) { // we assume root not in skipped paths.
for (vector<string>::const_iterator it = tdl.begin();
it != tdl.end(); it++) {
// the topdirs members are already canonized.
@ -281,7 +278,7 @@ static bool matchesSkipped(const vector<string>& tdl,
mpath = path_getfather(mpath);
// getfather normally returns a path ending with /, canonic
// paths don't (except for '/' itself).
if (!mpath.empty() && mpath[mpath.size()-1] == '/')
if (!path_isroot(mpath) && mpath[mpath.size()-1] == '/')
mpath.erase(mpath.size()-1);
// should not be necessary, but lets be prudent. If the
// path did not shorten, something is seriously amiss
@ -330,7 +327,7 @@ bool FsIndexer::indexFiles(list<string>& files, int flags)
{
LOGDEB(("FsIndexer::indexFiles\n"));
m_noretryfailed = (flags & ConfIndexer::IxFNoRetryFailed) != 0;
int ret = false;
bool ret = false;
if (!init())
return false;
@ -703,7 +700,7 @@ FsIndexer::processonefile(RclConfig *config,
}
LOGDEB0(("processone: processing: [%s] %s\n",
displayableBytes(stp->st_size).c_str(), fn.c_str()));
displayableBytes(off_t(stp->st_size)).c_str(), fn.c_str()));
string utf8fn = compute_utf8fn(config, fn);
@ -772,7 +769,7 @@ FsIndexer::processonefile(RclConfig *config,
if (doc.fmtime.empty())
doc.fmtime = ascdate;
if (doc.url.empty())
doc.url = cstr_fileu + fn;
doc.url = path_pathtofileurl(fn);
const string *fnp = 0;
if (doc.ipath.empty()) {
if (!doc.peekmeta(Rcl::Doc::keyfn, &fnp) || fnp->empty())
@ -868,7 +865,7 @@ FsIndexer::processonefile(RclConfig *config,
fileDoc.meta[Rcl::Doc::keytcfn] = utf8fn;
fileDoc.haschildren = true;
fileDoc.mimetype = mimetype;
fileDoc.url = cstr_fileu + fn;
fileDoc.url = path_pathtofileurl(fn);
if (m_havelocalfields)
setlocalfields(localfields, fileDoc);
char cbuf[100];

View File

@ -14,9 +14,7 @@
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifdef HAVE_CONFIG_H
#include "autoconfig.h"
#endif
#include <stdio.h>
#include <errno.h>
@ -27,7 +25,9 @@
#include "debuglog.h"
#include "indexer.h"
#include "fsindexer.h"
#ifndef DISABLE_WEB_INDEXER
#include "beaglequeue.h"
#endif
#include "mimehandler.h"
#include "pathut.h"
@ -46,7 +46,9 @@ ConfIndexer::ConfIndexer(RclConfig *cnf, DbIxStatusUpdater *updfunc)
ConfIndexer::~ConfIndexer()
{
deleteZ(m_fsindexer);
#ifndef DISABLE_WEB_INDEXER
deleteZ(m_beagler);
#endif
}
// Determine if this is likely the first time that the user runs
@ -107,7 +109,7 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun, int flags)
return false;
}
}
#ifndef DISABLE_WEB_INDEXER
if (m_dobeagle && (typestorun & IxTBeagleQueue)) {
deleteZ(m_beagler);
m_beagler = new BeagleQueueIndexer(m_config, &m_db, m_updater);
@ -116,7 +118,7 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun, int flags)
return false;
}
}
#endif
if (typestorun == IxTAll) {
// Get rid of all database entries that don't exist in the
// filesystem anymore. Only if all *configured* indexers ran.
@ -173,6 +175,7 @@ bool ConfIndexer::indexFiles(list<string>& ifiles, int flag)
ret = m_fsindexer->indexFiles(myfiles, flag);
LOGDEB2(("ConfIndexer::indexFiles: fsindexer returned %d, "
"%d files remainining\n", ret, myfiles.size()));
#ifndef DISABLE_WEB_INDEXER
if (m_dobeagle && !myfiles.empty() && !(flag & IxFNoWeb)) {
if (!m_beagler)
@ -183,7 +186,7 @@ bool ConfIndexer::indexFiles(list<string>& ifiles, int flag)
ret = false;
}
}
#endif
// The close would be done in our destructor, but we want status here
if (!m_db.close()) {
LOGERR(("ConfIndexer::index: error closing database in %s\n",
@ -255,6 +258,7 @@ bool ConfIndexer::purgeFiles(std::list<string> &files, int flag)
if (m_fsindexer)
ret = m_fsindexer->purgeFiles(myfiles);
#ifndef DISABLE_WEB_INDEXER
if (m_dobeagle && !myfiles.empty() && !(flag & IxFNoWeb)) {
if (!m_beagler)
m_beagler = new BeagleQueueIndexer(m_config, &m_db, m_updater);
@ -264,6 +268,7 @@ bool ConfIndexer::purgeFiles(std::list<string> &files, int flag)
ret = false;
}
}
#endif
// The close would be done in our destructor, but we want status here
if (!m_db.close()) {

View File

@ -16,20 +16,18 @@
*/
#ifndef _INDEXER_H_INCLUDED_
#define _INDEXER_H_INCLUDED_
#include "rclconfig.h"
#include <string>
#include <list>
#include <map>
#include <vector>
#ifndef NO_NAMESPACES
using std::string;
using std::list;
using std::map;
using std::vector;
#endif
#include "rclconfig.h"
#include "rcldb.h"
#include "rcldoc.h"
#ifdef IDX_THREADS

View File

@ -140,6 +140,7 @@ string mimetype(const string &fn, const struct stat *stp,
string mtype;
#ifndef _WIN32
// Extended attribute has priority on everything, as per:
// http://freedesktop.org/wiki/CommonExtendedAttributes
if (pxattr::get(fn, "mime_type", &mtype)) {
@ -150,6 +151,7 @@ string mimetype(const string &fn, const struct stat *stp,
return mtype;
}
}
#endif
if (cfg == 0) {
LOGERR(("Mimetype: null config ??\n"));
@ -177,7 +179,6 @@ string mimetype(const string &fn, const struct stat *stp,
if (mtype.empty() && stp)
mtype = mimetypefromdata(cfg, fn, usfc);
out:
return mtype;
}

View File

@ -17,10 +17,10 @@
#ifndef _MIMETYPE_H_INCLUDED_
#define _MIMETYPE_H_INCLUDED_
#include "safesysstat.h"
#include <string>
class RclConfig;
struct stat;
/**
* Try to determine a mime type for file.

View File

@ -469,7 +469,7 @@ bool RclFAM::getEvent(RclMonEvent& ev, int msecs)
MONDEB(("RclFAM::getEvent: FAMNextEvent returned\n"));
map<int,string>::const_iterator it;
if ((fe.filename[0] != '/') &&
if ((!path_isabsolute(fe.filename)) &&
(it = m_idtopath.find(fe.fr.reqnum)) != m_idtopath.end()) {
ev.m_path = path_cat(it->second, fe.filename);
} else {

View File

@ -14,16 +14,18 @@
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifdef HAVE_CONFIG_H
#include "autoconfig.h"
#endif
#include <stdio.h>
#include <signal.h>
#include <errno.h>
#ifndef _WIN32
#include <sys/time.h>
#include <sys/resource.h>
#include <fcntl.h>
#else
#include <direct.h>
#endif
#include "safefcntl.h"
#include "safeunistd.h"
#include <iostream>
@ -42,10 +44,14 @@ using namespace std;
#include "x11mon.h"
#include "cancelcheck.h"
#include "rcldb.h"
#ifndef DISABLE_WEB_INDEXER
#include "beaglequeue.h"
#endif
#include "recollindex.h"
#include "fsindexer.h"
#ifndef _WIN32
#include "rclionice.h"
#endif
#include "execmd.h"
#include "checkretryfailed.h"
@ -133,6 +139,7 @@ class MyUpdater : public DbIxStatusUpdater {
return false;
}
#ifndef DISABLE_X11MON
// If we are in the monitor, we also need to check X11 status
// during the initial indexing pass (else the user could log
// out and the indexing would go on, not good (ie: if the user
@ -142,7 +149,7 @@ class MyUpdater : public DbIxStatusUpdater {
stopindexing = true;
return false;
}
#endif
return true;
}
@ -177,11 +184,13 @@ static void makeIndexerOrExit(RclConfig *config, bool inPlaceReset)
void rclIxIonice(const RclConfig *config)
{
#ifndef _WIN32
string clss, classdata;
if (!config->getConfParam("monioniceclass", clss) || clss.empty())
clss = "3";
config->getConfParam("monioniceclassdata", classdata);
rclionice(clss, classdata);
#endif
}
class MakeListWalkerCB : public FsTreeWalkerCB {
@ -273,7 +282,7 @@ static bool checktopdirs(RclConfig *config, vector<string>& nonexist)
for (vector<string>::iterator it = tdl.begin(); it != tdl.end(); it++) {
*it = path_tildexpand(*it);
if (!it->size() || (*it)[0] != '/') {
if (!it->size() || !path_isabsolute(*it)) {
if ((*it)[0] == '~') {
cerr << "Tilde expansion failed: " << *it << endl;
LOGERR(("recollindex: tilde expansion failed: %s\n",
@ -285,7 +294,7 @@ static bool checktopdirs(RclConfig *config, vector<string>& nonexist)
}
return false;
}
if (access(it->c_str(), 0) < 0) {
if (!path_exists(*it)) {
nonexist.push_back(*it);
}
}
@ -295,7 +304,7 @@ static bool checktopdirs(RclConfig *config, vector<string>& nonexist)
if (config->getConfParam("skippedPaths", &tdl, true)) {
for (vector<string>::iterator it = tdl.begin(); it != tdl.end(); it++) {
*it = path_tildexpand(*it);
if (access(it->c_str(), 0) < 0) {
if (!path_exists(*it)) {
nonexist.push_back(*it);
}
}
@ -304,7 +313,7 @@ static bool checktopdirs(RclConfig *config, vector<string>& nonexist)
if (config->getConfParam("daemSkippedPaths", &tdl, true)) {
for (vector<string>::iterator it = tdl.begin(); it != tdl.end(); it++) {
*it = path_tildexpand(*it);
if (access(it->c_str(), 0) < 0) {
if (!path_exists(*it)) {
nonexist.push_back(*it);
}
}
@ -393,8 +402,10 @@ int main(int argc, char **argv)
// The reexec struct is used by the daemon to shed memory after
// the initial indexing pass and to restart when the configuration
// changes
#ifndef _WIN32
o_reexec = new ReExec;
o_reexec->init(argc, argv);
#endif
thisprog = argv[0];
argc--; argv++;
@ -463,7 +474,9 @@ int main(int argc, char **argv)
cerr << "Configuration problem: " << reason << endl;
exit(1);
}
#ifndef _WIN32
o_reexec->atexit(cleanup);
#endif
vector<string> nonexist;
if (!checktopdirs(config, nonexist))
@ -511,9 +524,11 @@ int main(int argc, char **argv)
if (op_flags & OPT_k) {
indexerFlags &= ~ConfIndexer::IxFNoRetryFailed;
} else {
#ifndef _WIN32
if (checkRetryFailed(config, false)) {
indexerFlags &= ~ConfIndexer::IxFNoRetryFailed;
}
#endif
}
Pidfile pidfile(config->getPidfile());
@ -522,12 +537,13 @@ int main(int argc, char **argv)
// Log something at LOGINFO to reset the trace file. Else at level
// 3 it's not even truncated if all docs are up to date.
LOGINFO(("recollindex: starting up\n"));
#ifndef _WIN32
if (setpriority(PRIO_PROCESS, 0, 20) != 0) {
LOGINFO(("recollindex: can't setpriority(), errno %d\n", errno));
}
// Try to ionice. This does not work on all platforms
rclIxIonice(config);
#endif
if (op_flags & (OPT_i|OPT_e)) {
lockorexit(&pidfile);
@ -596,15 +612,17 @@ int main(int argc, char **argv)
lockorexit(&pidfile);
if (!(op_flags&OPT_D)) {
LOGDEB(("recollindex: daemonizing\n"));
#ifndef _WIN32
if (daemon(0,0) != 0) {
fprintf(stderr, "daemon() failed, errno %d\n", errno);
LOGERR(("daemon() failed, errno %d\n", errno));
exit(1);
}
#endif
}
// Need to rewrite pid, it changed
pidfile.write_pid();
#ifndef _WIN32
// Not too sure if I have to redo the nice thing after daemon(),
// can't hurt anyway (easier than testing on all platforms...)
if (setpriority(PRIO_PROCESS, 0, 20) != 0) {
@ -612,6 +630,7 @@ int main(int argc, char **argv)
}
// Try to ionice. This does not work on all platforms
rclIxIonice(config);
#endif
if (sleepsecs > 0) {
LOGDEB(("recollindex: sleeping %d\n", sleepsecs));
@ -633,12 +652,15 @@ int main(int argc, char **argv)
"not going into monitor mode\n"));
exit(1);
} else {
#ifndef _WIN32
// Record success of indexing pass with failed files retries.
if (!(indexerFlags & ConfIndexer::IxFNoRetryFailed)) {
checkRetryFailed(config, true);
}
#endif
}
deleteZ(confindexer);
#ifndef _WIN32
o_reexec->insertArgs(vector<string>(1, "-n"));
LOGINFO(("recollindex: reexecuting with -n after initial full pass\n"));
// Note that -n will be inside the reexec when we come
@ -646,6 +668,7 @@ int main(int argc, char **argv)
// starting a config change exec to ensure that we do a
// purging pass in this case.
o_reexec->reexec();
#endif
}
if (updater) {
updater->status.phase = DbIxStatus::DBIXS_MONITOR;
@ -672,11 +695,12 @@ int main(int argc, char **argv)
makeIndexerOrExit(config, inPlaceReset);
bool status = confindexer->index(rezero, ConfIndexer::IxTAll,
indexerFlags);
#ifndef _WIN32
// Record success of indexing pass with failed files retries.
if (status && !(indexerFlags & ConfIndexer::IxFNoRetryFailed)) {
checkRetryFailed(config, true);
}
#endif
if (!status)
cerr << "Indexing failed" << endl;
if (!confindexer->getReason().empty())

View File

@ -108,7 +108,7 @@ namespace Dijon
*/
virtual bool set_document_data(const std::string& mtype,
const char *data_ptr,
unsigned int data_length) = 0;
size_t data_length) = 0;
/** (Re)initializes the filter with the given data.
* Call next_document() to position the filter onto the first document.
@ -140,7 +140,7 @@ namespace Dijon
stat() calls The value is stored inside metaData, docsize
key
*/
virtual void set_docsize(size_t size) = 0;
virtual void set_docsize(off_t size) = 0;
// Going from one nested document to the next.

View File

@ -46,7 +46,7 @@ void reapXAttrs(const RclConfig* cfg, const string& path,
map<string, string>& xfields)
{
LOGDEB2(("reapXAttrs: [%s]\n", path.c_str()));
#ifndef _WIN32
// Retrieve xattrs names from files and mapping table from config
vector<string> xnames;
if (!pxattr::list(path, &xnames)) {
@ -79,6 +79,7 @@ void reapXAttrs(const RclConfig* cfg, const string& path,
xfields[key] = value;
LOGDEB2(("reapXAttrs: [%s] -> [%s]\n", key.c_str(), value.c_str()));
}
#endif
}
void docFieldsFromXattrs(RclConfig *cfg, const map<string, string>& xfields,
@ -93,6 +94,7 @@ void docFieldsFromXattrs(RclConfig *cfg, const map<string, string>& xfields,
void reapMetaCmds(RclConfig* cfg, const string& path,
map<string, string>& cfields)
{
#ifndef _WIN32
const vector<MDReaper>& reapers = cfg->getMDReapers();
if (reapers.empty())
return;
@ -111,6 +113,7 @@ void reapMetaCmds(RclConfig* cfg, const string& path,
cfields[rp->fieldname] = output;
}
}
#endif
}
// Set fields from external commands

View File

@ -169,7 +169,7 @@ void FileInterner::init(const string &f, const struct stat *stp, RclConfig *cnf,
l_mime = *imime;
}
size_t docsize = stp->st_size;
off_t docsize = stp->st_size;
if (!l_mime.empty()) {
// Has mime: check for a compressed file. If so, create a
@ -216,7 +216,7 @@ void FileInterner::init(const string &f, const struct stat *stp, RclConfig *cnf,
m_mimetype = l_mime;
RecollFilter *df = getMimeHandler(l_mime, m_cfg, !m_forPreview);
if (!df or df->is_unknown()) {
if (!df || df->is_unknown()) {
// No real handler for this type, for now :(
LOGDEB(("FileInterner:: unprocessed mime: [%s] [%s]\n",
l_mime.c_str(), f.c_str()));
@ -629,7 +629,7 @@ void FileInterner::popHandler()
{
if (m_handlers.empty())
return;
int i = m_handlers.size() - 1;
size_t i = m_handlers.size() - 1;
if (m_tmpflgs[i]) {
m_tempfiles.pop_back();
m_tmpflgs[i] = false;

View File

@ -17,10 +17,10 @@
#include "autoconfig.h"
#include <sys/types.h>
#include <sys/wait.h>
#include <time.h>
#include "safesyswait.h"
#include <list>
using namespace std;
#include "cstr.h"
#include "execmd.h"
@ -32,6 +32,8 @@ using namespace std;
#include "md5ut.h"
#include "rclconfig.h"
using namespace std;
// This is called periodically by ExeCmd when it is waiting for data,
// or when it does receive some. We may choose to interrupt the
// command.

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2005 J.F.Dockes
/* Copyright (C) 2005 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
@ -32,7 +32,7 @@ using namespace std;
#include "idfile.h"
#include <sys/types.h>
#include <sys/wait.h>
#include "safesyswait.h"
bool MimeHandlerExecMultiple::startCmd()
{
@ -90,6 +90,9 @@ bool MimeHandlerExecMultiple::readDataElement(string& name, string &data)
LOGERR(("MHExecMultiple: getline error\n"));
return false;
}
LOGDEB1(("MHEM:rde: line [%s]\n", ibuf.c_str()));
// Empty line (end of message) ?
if (!ibuf.compare("\n")) {
LOGDEB(("MHExecMultiple: Got empty line\n"));
@ -163,7 +166,7 @@ bool MimeHandlerExecMultiple::next_document()
return false;
}
if (m_cmd.getChildPid() < 0 && !startCmd()) {
if (m_cmd.getChildPid() <= 0 && !startCmd()) {
return false;
}

View File

@ -14,13 +14,15 @@
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include "autoconfig.h"
#include <stdio.h>
#include <fcntl.h>
#include <errno.h>
#include <unistd.h>
#include "safeunistd.h"
#include <time.h>
#include <cstdlib>
#include "safesysstat.h"
#include <map>
#include <sstream>

View File

@ -22,7 +22,11 @@
#include <sys/types.h>
#include "safesysstat.h"
#include <time.h>
#ifndef _WIN32
#include <regex.h>
#else
#include <regex>
#endif
#include <cstring>
#include <map>
@ -70,6 +74,14 @@ static PTMutexInit o_mcache_mutex;
* offsets for all message "From_" lines follow. The format is purely
* binary, values are not even byte-swapped to be proc-idependant.
*/
#ifdef _WIN32
// vc++ does not let define an array of size o_b1size because non-const??
#define M_o_b1size 1024
#else
#define M_o_b1size o_b1size
#endif
class MboxCache {
public:
typedef MimeHandlerMbox::mbhoff_type mbhoff_type;
@ -98,7 +110,7 @@ public:
}
FpKeeper keeper(&fp);
char blk1[o_b1size];
char blk1[M_o_b1size];
if (fread(blk1, 1, o_b1size, fp) != o_b1size) {
LOGDEB0(("MboxCache::get_offsets: read blk1 errno %d\n", errno));
return -1;
@ -116,7 +128,7 @@ public:
return -1;
}
mbhoff_type offset = -1;
int ret;
size_t ret;
if ((ret = fread(&offset, 1, sizeof(mbhoff_type), fp))
!= sizeof(mbhoff_type)) {
LOGDEB0(("MboxCache::get_offsets: read ret %d errno %d\n",
@ -184,7 +196,7 @@ public:
m_dir = "mboxcache";
m_dir = path_tildexpand(m_dir);
// If not an absolute path, compute relative to config dir
if (m_dir.at(0) != '/')
if (!path_isabsolute(m_dir))
m_dir = path_cat(config->getConfDir(), m_dir);
m_ok = true;
}
@ -226,7 +238,6 @@ private:
};
const size_t MboxCache::o_b1size = 1024;
static class MboxCache o_mcache;
static const string cstr_keyquirks("mhmboxquirks");
@ -307,7 +318,7 @@ bool MimeHandlerMbox::set_document_file(const string& mt, const string &fn)
typedef char line_type[LL+10];
static inline void stripendnl(line_type& line, int& ll)
{
ll = strlen(line);
ll = int(strlen(line));
while (ll > 0) {
if (line[ll-1] == '\n' || line[ll-1] == '\r') {
line[ll-1] = 0;
@ -376,9 +387,20 @@ static const char *frompat =
// exactly like: From ^M (From followed by space and eol). We only
// test for this if QUIRKS_TBIRD is set
static const char *miniTbirdFrom = "^From $";
#ifndef _WIN32
static regex_t fromregex;
static regex_t minifromregex;
#define M_regexec(A,B,C,D,E) regexec(&(A),B,C,D,E)
#else
basic_regex<char> fromregex;
basic_regex<char> minifromregex;
#define REG_ICASE std::regex_constants::icase
#define REG_NOSUB std::regex_constants::nosubs
#define REG_EXTENDED std::regex_constants::extended
#define M_regexec(A, B, C, D, E) regex_match(B,A)
#endif
static bool regcompiled;
static PTMutexInit o_regex_mutex;
@ -390,8 +412,13 @@ static void compileregexes()
// that we are alone.
if (regcompiled)
return;
#ifndef _WIN32
regcomp(&fromregex, frompat, REG_NOSUB|REG_EXTENDED);
regcomp(&minifromregex, miniTbirdFrom, REG_NOSUB|REG_EXTENDED);
#else
fromregex = basic_regex<char>(frompat, REG_NOSUB | REG_EXTENDED);
minifromregex = basic_regex<char>(miniTbirdFrom, REG_NOSUB | REG_EXTENDED);
#endif
regcompiled = true;
}
@ -440,9 +467,9 @@ bool MimeHandlerMbox::next_document()
(off = o_mcache.get_offset(m_config, m_udi, mtarg)) >= 0 &&
fseeko(fp, (off_t)off, SEEK_SET) >= 0 &&
fgets(line, LL, fp) &&
(!regexec(&fromregex, line, 0, 0, 0) ||
(!M_regexec(fromregex, line, 0, 0, 0) ||
((m_quirks & MBOXQUIRK_TBIRD) &&
!regexec(&minifromregex, line, 0, 0, 0))) ) {
!M_regexec(minifromregex, line, 0, 0, 0))) ) {
LOGDEB0(("MimeHandlerMbox: Cache: From_ Ok\n"));
fseeko(fp, (off_t)off, SEEK_SET);
m_msgnum = mtarg -1;
@ -485,9 +512,9 @@ bool MimeHandlerMbox::next_document()
/* The 'F' compare is redundant but it improves performance
A LOT */
if (line[0] == 'F' && (
!regexec(&fromregex, line, 0, 0, 0) ||
!M_regexec(fromregex, line, 0, 0, 0) ||
((m_quirks & MBOXQUIRK_TBIRD) &&
!regexec(&minifromregex, line, 0, 0, 0)))
!M_regexec(minifromregex, line, 0, 0, 0)))
) {
LOGDEB1(("MimeHandlerMbox: msgnum %d, "
"From_ at line %d: [%s]\n", m_msgnum, m_lineno, line));

59
src/internfile/mh_null.h Normal file
View File

@ -0,0 +1,59 @@
/* Copyright (C) 2004 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifndef _MH_NULL_H_INCLUDED_
#define _MH_NULL_H_INCLUDED_
// It may make sense in some cases to set this null filter (no output)
// instead of using recoll_noindex or leaving the default filter in
// case one doesn't want to install it: this will avoid endless retries
// to reindex the affected files, as recoll will think it has succeeded
// indexing them. Downside: the files won't be indexed when one
// actually installs the real filter, will need a -z
// Actually used for empty files
// Associated to application/x-zerosize, so use
// <mimetype> = internal application/x-zerosize
// in mimeconf
#include <string>
#include "cstr.h"
#include "mimehandler.h"
class MimeHandlerNull : public RecollFilter {
public:
MimeHandlerNull(RclConfig *cnf, const std::string& id)
: RecollFilter(cnf, id)
{
}
virtual ~MimeHandlerNull()
{
}
virtual bool set_document_file(const string& mt, const string& fn)
{
RecollFilter::set_document_file(mt, fn);
return m_havedoc = true;
}
virtual bool next_document()
{
if (m_havedoc == false)
return false;
m_havedoc = false;
m_metaData[cstr_dj_keycontent] = cstr_null;
m_metaData[cstr_dj_keymt] = cstr_textplain;
return true;
}
};
#endif /* _MH_NULL_H_INCLUDED_ */

View File

@ -18,7 +18,7 @@
#define _MH_SYMLINK_H_INCLUDED_
#include <string>
#include <unistd.h>
#include "safeunistd.h"
#include <errno.h>
#include "cstr.h"

View File

@ -18,10 +18,13 @@
#include <stdio.h>
#include <errno.h>
#include "safefcntl.h"
#include <sys/types.h>
#include "safesysstat.h"
#include "safeunistd.h"
#include <iostream>
#include <string>
using namespace std;
#include "cstr.h"
#include "mh_text.h"
@ -32,16 +35,23 @@ using namespace std;
#include "pxattr.h"
#include "pathut.h"
using namespace std;
const int MB = 1024*1024;
const int KB = 1024;
// Process a plain text file
bool MimeHandlerText::set_document_file(const string& mt, const string &fn)
{
LOGDEB(("MimeHandlerText::set_document_file: [%s]\n", fn.c_str()));
LOGDEB(("MimeHandlerText::set_document_file: [%s] offs %lld\n",
fn.c_str(), m_offs));
RecollFilter::set_document_file(mt, fn);
m_fn = fn;
// This should not be necessary, but it happens on msw that offset is large
// negative at this point, could not find the reason (still trying).
m_offs = 0;
// file size for oversize check
long long fsize = path_filesize(m_fn);
@ -51,9 +61,11 @@ bool MimeHandlerText::set_document_file(const string& mt, const string &fn)
return false;
}
#ifndef _WIN32
// Check for charset defined in extended attribute as per:
// http://freedesktop.org/wiki/CommonExtendedAttributes
pxattr::get(m_fn, "charset", &m_charsetfromxattr);
#endif
// Max file size parameter: texts over this size are not indexed
int maxmbs = 20;

View File

@ -19,7 +19,6 @@
#include <sys/types.h>
#include <string>
using std::string;
#include "mimehandler.h"
@ -30,22 +29,22 @@ using std::string;
*/
class MimeHandlerText : public RecollFilter {
public:
MimeHandlerText(RclConfig *cnf, const string& id)
: RecollFilter(cnf, id), m_paging(false), m_offs(0)
MimeHandlerText(RclConfig *cnf, const std::string& id)
: RecollFilter(cnf, id), m_paging(false), m_offs(0), m_pagesz(0)
{
}
virtual ~MimeHandlerText()
{
}
virtual bool set_document_file(const string& mt, const string &file_path);
virtual bool set_document_string(const string&, const string&);
virtual bool set_document_file(const std::string& mt, const std::string &file_path);
virtual bool set_document_string(const std::string&, const std::string&);
virtual bool is_data_input_ok(DataInput input) const {
if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING)
return true;
return false;
}
virtual bool next_document();
virtual bool skip_to_document(const string& s);
virtual bool skip_to_document(const std::string& s);
virtual void clear()
{
m_paging = false;
@ -56,11 +55,11 @@ class MimeHandlerText : public RecollFilter {
}
private:
bool m_paging;
string m_text;
string m_fn;
std::string m_text;
std::string m_fn;
off_t m_offs; // Offset of next read in file if we're paging
size_t m_pagesz;
string m_charsetfromxattr;
std::string m_charsetfromxattr;
bool readnext();
};

View File

@ -31,7 +31,6 @@ using namespace std;
#include "rclconfig.h"
#include "smallut.h"
#include "md5ut.h"
#include "mh_exec.h"
#include "mh_execm.h"
#include "mh_html.h"
@ -40,6 +39,7 @@ using namespace std;
#include "mh_text.h"
#include "mh_symlink.h"
#include "mh_unknown.h"
#include "mh_null.h"
#include "ptmutex.h"
// Performance help: we use a pool of already known and created
@ -163,6 +163,10 @@ static RecollFilter *mhFactory(RclConfig *config, const string &mime,
LOGDEB2(("mhFactory(%s): ret MimeHandlerSymlink\n", mime.c_str()));
MD5String("MimeHandlerSymlink", id);
return nobuild ? 0 : new MimeHandlerSymlink(config, id);
} else if ("application/x-zerosize" == lmime) {
LOGDEB(("mhFactory(%s): ret MimeHandlerNull\n", mime.c_str()));
MD5String("MimeHandlerNull", id);
return nobuild ? 0 : new MimeHandlerNull(config, id);
} else if (lmime.find("text/") == 0) {
// Try to handle unknown text/xx as text/plain. This
// only happen if the text/xx was defined as "internal" in
@ -206,7 +210,7 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs,
}
// Split command name and args, and build exec object
list<string> cmdtoks;
vector<string> cmdtoks;
stringToStrings(cmdstr, cmdtoks);
if (cmdtoks.empty()) {
LOGERR(("mhExecFactory: bad config line for [%s]: [%s]\n",
@ -216,7 +220,22 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs,
MimeHandlerExec *h = multiple ?
new MimeHandlerExecMultiple(cfg, id) :
new MimeHandlerExec(cfg, id);
list<string>::iterator it = cmdtoks.begin();
vector<string>::iterator it = cmdtoks.begin();
// Special-case python and perl on windows: we need to also locate the
// first argument which is the script name "python somescript.py".
// On Unix, thanks to #!, we usually just run "somescript.py", but need
// the same change if we ever want to use the same cmdling as windows
if (!stringlowercmp("python", *it) || !stringlowercmp("perl", *it)) {
if (cmdtoks.size() < 2) {
LOGERR(("mhExecFactory: python/perl cmd: no script?. [%s]: [%s]\n",
mtype.c_str(), hs.c_str()));
}
vector<string>::iterator it1(it);
it1++;
*it1 = cfg->findFilter(*it1);
}
h->params.push_back(cfg->findFilter(*it++));
h->params.insert(h->params.end(), it, cmdtoks.end());

View File

@ -86,12 +86,12 @@ public:
return false;
}
virtual bool set_document_data(const std::string& mtype,
const char *cp, unsigned int sz)
const char *cp, size_t sz)
{
return set_document_string(mtype, std::string(cp, sz));
}
virtual void set_docsize(size_t size)
virtual void set_docsize(off_t size)
{
char csize[30];
sprintf(csize, "%lld", (long long)size);

View File

@ -23,6 +23,10 @@
* -----END-LICENCE-----
*/
#include <time.h>
#ifdef _WIN32
// Local implementation in windows directory
#include "strptime.h"
#endif
#include <stdio.h>
#include <algorithm>
#include <cstring>

View File

@ -108,8 +108,12 @@ bool Uncomp::uncompressfile(const string& ifn,
}
// Execute command and retrieve output file name, check that it exists
#ifndef _WIN32
ExecCmd ex;
int status = ex.doexec(cmd, args, 0, &tfile);
#else
int status = -1;
#endif
if (status || tfile.empty()) {
LOGERR(("uncompressfile: doexec: failed for [%s] status 0x%x\n",
ifn.c_str(), status));

View File

@ -14,7 +14,6 @@
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include <unistd.h>
#include <algorithm>
#include <cstdio>

View File

@ -14,11 +14,8 @@
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include "autoconfig.h"
#include <unistd.h>
#include <cstdlib>
#include <qapplication.h>
@ -164,14 +161,6 @@ static void recollCleanup()
LOGDEB2(("recollCleanup: done\n"));
}
static void sigcleanup(int)
{
// We used to not call exit from here, because of the idxthread, but
// this is now gone, so...
recollNeedsExit = 1;
exit(1);
}
void applyStyleSheet(const QString& ssfname)
{
const char *cfname = (const char *)ssfname.toLocal8Bit();
@ -305,7 +294,7 @@ int main(int argc, char **argv)
string reason;
theconfig = recollinit(recollCleanup, sigcleanup, reason, &a_config);
theconfig = recollinit(recollCleanup, 0, reason, &a_config);
if (!theconfig || !theconfig->ok()) {
QString msg = "Configuration problem: ";
msg += QString::fromUtf8(reason.c_str());

File diff suppressed because it is too large Load Diff

View File

@ -16,6 +16,8 @@
*/
#include "autoconfig.h"
#include <signal.h>
#include <QMessageBox>
#include <QTimer>

View File

@ -310,7 +310,7 @@ void RclMain::startNativeViewer(Rcl::Doc doc, int pagenum, QString term)
istempfile = true;
rememberTempFile(temp);
fn = temp->filename();
url = string("file://") + fn;
url = path_pathtofileurl(fn);
}
// If using an actual file, check that it exists, and if it is
@ -335,7 +335,7 @@ void RclMain::startNativeViewer(Rcl::Doc doc, int pagenum, QString term)
if (temp) {
rememberTempFile(temp);
fn = temp->filename();
url = string("file://") + fn;
url = path_pathtofileurl(fn);
}
}
@ -430,16 +430,16 @@ void RclMain::startManual()
void RclMain::startManual(const string& index)
{
Rcl::Doc doc;
doc.url = "file://";
doc.url = path_cat(doc.url, theconfig->getDatadir());
doc.url = path_cat(doc.url, "doc");
doc.url = path_cat(doc.url, "usermanual.html");
string path = theconfig->getDatadir();
path = path_cat(path, "doc");
path = path_cat(path, "usermanual.html");
LOGDEB(("RclMain::startManual: help index is %s\n",
index.empty()?"(null)":index.c_str()));
if (!index.empty()) {
doc.url += "#";
doc.url += index;
path += "#";
path += index;
}
doc.url = path_pathtofileurl(path);
doc.mimetype = "text/html";
startNativeViewer(doc);
}

View File

@ -16,9 +16,6 @@
*/
#include "autoconfig.h"
#include <fcntl.h>
#include "safeunistd.h"
#include <utility>
#include MEMORY_INCLUDE
@ -138,10 +135,14 @@ void RclMain::init()
// idxstatus file. Make sure it exists before trying to watch it
// (case where we're started on an older index, or if the status
// file was deleted since indexing
::close(::open(theconfig->getIdxStatusFile().c_str(), O_CREAT, 0600));
m_watcher.addPath(QString::fromLocal8Bit(
theconfig->getIdxStatusFile().c_str()));
// file was deleted since indexing)
QString idxfn =
QString::fromLocal8Bit(theconfig->getIdxStatusFile().c_str());
QFile qf(idxfn);
qf.open(QIODevice::ReadWrite);
qf.setPermissions(QFile::ReadOwner|QFile::WriteOwner);
qf.close();
m_watcher.addPath(idxfn);
// At least some versions of qt4 don't display the status bar if
// it's not created here.
(void)statusBar();
@ -728,7 +729,7 @@ void RclMain::initiateQuery()
qApp->processEvents();
if (progress.wasCanceled()) {
// Just get out of there asap.
_exit(1);
exit(1);
}
qApp->processEvents();

View File

@ -19,6 +19,7 @@
#include <stdio.h>
#include "safesysstat.h"
#include "safeunistd.h"
#include <signal.h>
#include <string>

View File

@ -16,7 +16,6 @@
*/
#include "autoconfig.h"
#include <unistd.h>
#include <stdio.h>
#include <string>

View File

@ -17,7 +17,6 @@
#include "autoconfig.h"
#include <stdio.h>
#include <unistd.h>
#include <algorithm>
#include <list>

View File

@ -14,6 +14,8 @@
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include "autoconfig.h"
#include "docseq.h"
#include "filtseq.h"
#include "sortseq.h"

View File

@ -14,17 +14,20 @@
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include "autoconfig.h"
#include <math.h>
#include <time.h>
#include <list>
using std::list;
#include "docseqdb.h"
#include "rcldb.h"
#include "debuglog.h"
#include "wasatorcl.h"
using std::list;
DocSequenceDb::DocSequenceDb(STD_SHARED_PTR<Rcl::Query> q, const string &t,
STD_SHARED_PTR<Rcl::SearchData> sdata)
: DocSequence(t), m_q(q), m_sdata(sdata), m_fsdata(sdata),

View File

@ -35,7 +35,7 @@ using std::list;
bool RclDHistoryEntry::encode(string& value)
{
char chartime[30];
sprintf(chartime,"%ld", unixtime);
sprintf(chartime,"%lld", (long long)unixtime);
string budi;
base64_encode(udi, budi);
value = string("U ") + string(chartime) + " " + budi;
@ -161,5 +161,5 @@ int DocSequenceHistory::getResCnt()
{
if (m_hlist.empty())
m_hlist = getDocHistory(m_hist);
return m_hlist.size();
return int(m_hlist.size());
}

View File

@ -16,6 +16,7 @@
*/
#ifndef _DOCSEQHIST_H_INCLUDED_
#define _DOCSEQHIST_H_INCLUDED_
#include <time.h>
#include "docseq.h"
#include "dynconf.h"
@ -28,13 +29,13 @@ namespace Rcl {
class RclDHistoryEntry : public DynConfEntry {
public:
RclDHistoryEntry() : unixtime(0) {}
RclDHistoryEntry(long t, const string& u)
RclDHistoryEntry(time_t t, const string& u)
: unixtime(t), udi(u) {}
virtual ~RclDHistoryEntry() {}
virtual bool decode(const string &value);
virtual bool encode(string& value);
virtual bool equal(const DynConfEntry& other);
long unixtime;
time_t unixtime;
string udi;
};
@ -57,7 +58,7 @@ private:
Rcl::Db *m_db;
RclDynConf *m_hist;
int m_prevnum;
long m_prevtime;
time_t m_prevtime;
std::string m_description; // This is just an nls translated 'doc history'
std::list<RclDHistoryEntry> m_hlist;
std::list<RclDHistoryEntry>::const_iterator m_it;

187
src/query/location.hh Normal file
View File

@ -0,0 +1,187 @@
// A Bison parser, made by GNU Bison 3.0.2.
// Locations for Bison parsers in C++
// Copyright (C) 2002-2013 Free Software Foundation, Inc.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
// As a special exception, you may create a larger work that contains
// part or all of the Bison parser skeleton and distribute that work
// under terms of your choice, so long as that work isn't itself a
// parser generator using the skeleton or a modified version thereof
// as a parser skeleton. Alternatively, if you modify or redistribute
// the parser skeleton itself, you may (at your option) remove this
// special exception, which will cause the skeleton and the resulting
// Bison output files to be licensed under the GNU General Public
// License without this special exception.
// This special exception was added by the Free Software Foundation in
// version 2.2 of Bison.
/**
** \file location.hh
** Define the yy::location class.
*/
#ifndef YY_YY_LOCATION_HH_INCLUDED
# define YY_YY_LOCATION_HH_INCLUDED
# include "position.hh"
namespace yy {
#line 46 "location.hh" // location.cc:291
/// Abstract a location.
class location
{
public:
/// Construct a location from \a b to \a e.
location (const position& b, const position& e)
: begin (b)
, end (e)
{
}
/// Construct a 0-width location in \a p.
explicit location (const position& p = position ())
: begin (p)
, end (p)
{
}
/// Construct a 0-width location in \a f, \a l, \a c.
explicit location (std::string* f,
unsigned int l = 1u,
unsigned int c = 1u)
: begin (f, l, c)
, end (f, l, c)
{
}
/// Initialization.
void initialize (std::string* f = YY_NULLPTR,
unsigned int l = 1u,
unsigned int c = 1u)
{
begin.initialize (f, l, c);
end = begin;
}
/** \name Line and Column related manipulators
** \{ */
public:
/// Reset initial location to final location.
void step ()
{
begin = end;
}
/// Extend the current location to the COUNT next columns.
void columns (int count = 1)
{
end += count;
}
/// Extend the current location to the COUNT next lines.
void lines (int count = 1)
{
end.lines (count);
}
/** \} */
public:
/// Beginning of the located region.
position begin;
/// End of the located region.
position end;
};
/// Join two location objects to create a location.
inline location operator+ (location res, const location& end)
{
res.end = end.end;
return res;
}
/// Change end position in place.
inline location& operator+= (location& res, int width)
{
res.columns (width);
return res;
}
/// Change end position.
inline location operator+ (location res, int width)
{
return res += width;
}
/// Change end position in place.
inline location& operator-= (location& res, int width)
{
return res += -width;
}
/// Change end position.
inline location operator- (const location& begin, int width)
{
return begin + -width;
}
/// Compare two location objects.
inline bool
operator== (const location& loc1, const location& loc2)
{
return loc1.begin == loc2.begin && loc1.end == loc2.end;
}
/// Compare two location objects.
inline bool
operator!= (const location& loc1, const location& loc2)
{
return !(loc1 == loc2);
}
/** \brief Intercept output stream redirection.
** \param ostr the destination output stream
** \param loc a reference to the location to redirect
**
** Avoid duplicate information.
*/
template <typename YYChar>
inline std::basic_ostream<YYChar>&
operator<< (std::basic_ostream<YYChar>& ostr, const location& loc)
{
unsigned int end_col = 0 < loc.end.column ? loc.end.column - 1 : 0;
ostr << loc.begin// << "(" << loc.end << ") "
;
if (loc.end.filename
&& (!loc.begin.filename
|| *loc.begin.filename != *loc.end.filename))
ostr << '-' << loc.end.filename << ':' << loc.end.line << '.' << end_col;
else if (loc.begin.line < loc.end.line)
ostr << '-' << loc.end.line << '.' << end_col;
else if (loc.begin.column < end_col)
ostr << '-' << end_col;
return ostr;
}
} // yy
#line 187 "location.hh" // location.cc:291
#endif // !YY_YY_LOCATION_HH_INCLUDED

View File

@ -15,7 +15,7 @@
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include <limits.h>
#include <string>
#include <utility>
#include <list>
@ -54,8 +54,8 @@ struct MatchEntry {
pair<int, int> offs;
// Index of the search group this comes from: this is to relate a
// match to the original user input.
unsigned int grpidx;
MatchEntry(int sta, int sto, unsigned int idx)
size_t grpidx;
MatchEntry(int sta, int sto, size_t idx)
: offs(sta, sto), grpidx(idx)
{
}
@ -105,7 +105,7 @@ class TextSplitPTR : public TextSplit {
// pos, bts, bte));
// If this word is a search term, remember its byte-offset span.
map<string, unsigned int>::const_iterator it = m_terms.find(dumb);
map<string, size_t>::const_iterator it = m_terms.find(dumb);
if (it != m_terms.end()) {
tboffs.push_back(MatchEntry(bts, bte, (*it).second));
}
@ -135,7 +135,7 @@ private:
int m_wcount;
// In: user query terms
map<string, unsigned int> m_terms;
map<string, size_t> m_terms;
// m_gterms holds all the terms in m_groups, as a set for quick lookup
set<string> m_gterms;
@ -214,7 +214,7 @@ static bool do_proximity_test(int window, vector<vector<int>* >& plists,
bool TextSplitPTR::matchGroup(unsigned int grpidx)
{
const vector<string>& terms = m_hdata.groups[grpidx];
int window = m_hdata.groups[grpidx].size() + m_hdata.slacks[grpidx];
int window = int(m_hdata.groups[grpidx].size() + m_hdata.slacks[grpidx]);
LOGDEB1(("TextSplitPTR::matchGroup:d %d: %s\n", window,
vecStringToString(terms).c_str()));
@ -270,7 +270,7 @@ bool TextSplitPTR::matchGroup(unsigned int grpidx)
for (vector<int>::iterator it = plists[0]->begin();
it != plists[0]->end(); it++) {
int pos = *it;
int sta = int(10E9), sto = 0;
int sta = INT_MAX, sto = 0;
LOGDEB2(("MatchGroup: Testing at pos %d\n", pos));
if (do_proximity_test(window,plists, 1, pos, pos, &sta, &sto, minpos)) {
LOGDEB1(("TextSplitPTR::matchGroup: MATCH termpos [%d,%d]\n",
@ -417,10 +417,10 @@ bool PlainToRich::plaintorich(const string& in,
// If we still have terms positions, check (byte) position. If
// we are at or after a term match, mark.
if (tPosIt != tPosEnd) {
int ibyteidx = chariter.getBpos();
int ibyteidx = int(chariter.getBpos());
if (ibyteidx == tPosIt->offs.first) {
if (!intag && ibyteidx >= (int)headend) {
*olit += startMatch(tPosIt->grpidx);
*olit += startMatch((unsigned int)(tPosIt->grpidx));
}
inrcltag = 1;
} else if (ibyteidx == tPosIt->offs.second) {

180
src/query/position.hh Normal file
View File

@ -0,0 +1,180 @@
// A Bison parser, made by GNU Bison 3.0.2.
// Positions for Bison parsers in C++
// Copyright (C) 2002-2013 Free Software Foundation, Inc.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
// As a special exception, you may create a larger work that contains
// part or all of the Bison parser skeleton and distribute that work
// under terms of your choice, so long as that work isn't itself a
// parser generator using the skeleton or a modified version thereof
// as a parser skeleton. Alternatively, if you modify or redistribute
// the parser skeleton itself, you may (at your option) remove this
// special exception, which will cause the skeleton and the resulting
// Bison output files to be licensed under the GNU General Public
// License without this special exception.
// This special exception was added by the Free Software Foundation in
// version 2.2 of Bison.
/**
** \file position.hh
** Define the yy::position class.
*/
#ifndef YY_YY_POSITION_HH_INCLUDED
# define YY_YY_POSITION_HH_INCLUDED
# include <algorithm> // std::max
# include <iostream>
# include <string>
# ifndef YY_NULLPTR
# if defined __cplusplus && 201103L <= __cplusplus
# define YY_NULLPTR nullptr
# else
# define YY_NULLPTR 0
# endif
# endif
namespace yy {
#line 56 "position.hh" // location.cc:291
/// Abstract a position.
class position
{
public:
/// Construct a position.
explicit position (std::string* f = YY_NULLPTR,
unsigned int l = 1u,
unsigned int c = 1u)
: filename (f)
, line (l)
, column (c)
{
}
/// Initialization.
void initialize (std::string* fn = YY_NULLPTR,
unsigned int l = 1u,
unsigned int c = 1u)
{
filename = fn;
line = l;
column = c;
}
/** \name Line and Column related manipulators
** \{ */
/// (line related) Advance to the COUNT next lines.
void lines (int count = 1)
{
if (count)
{
column = 1u;
line = add_ (line, count, 1);
}
}
/// (column related) Advance to the COUNT next columns.
void columns (int count = 1)
{
column = add_ (column, count, 1);
}
/** \} */
/// File name to which this position refers.
std::string* filename;
/// Current line number.
unsigned int line;
/// Current column number.
unsigned int column;
private:
/// Compute max(min, lhs+rhs) (provided min <= lhs).
static unsigned int add_ (unsigned int lhs, int rhs, unsigned int min)
{
return (0 < rhs || -static_cast<unsigned int>(rhs) < lhs
? rhs + lhs
: min);
}
};
/// Add and assign a position.
inline position&
operator+= (position& res, int width)
{
res.columns (width);
return res;
}
/// Add two position objects.
inline position
operator+ (position res, int width)
{
return res += width;
}
/// Add and assign a position.
inline position&
operator-= (position& res, int width)
{
return res += -width;
}
/// Add two position objects.
inline position
operator- (position res, int width)
{
return res -= width;
}
/// Compare two position objects.
inline bool
operator== (const position& pos1, const position& pos2)
{
return (pos1.line == pos2.line
&& pos1.column == pos2.column
&& (pos1.filename == pos2.filename
|| (pos1.filename && pos2.filename
&& *pos1.filename == *pos2.filename)));
}
/// Compare two position objects.
inline bool
operator!= (const position& pos1, const position& pos2)
{
return !(pos1 == pos2);
}
/** \brief Intercept output stream redirection.
** \param ostr the destination output stream
** \param pos a reference to the position to redirect
*/
template <typename YYChar>
inline std::basic_ostream<YYChar>&
operator<< (std::basic_ostream<YYChar>& ostr, const position& pos)
{
if (pos.filename)
ostr << *pos.filename << ':';
return ostr << pos.line << '.' << pos.column;
}
} // yy
#line 180 "position.hh" // location.cc:291
#endif // !YY_YY_POSITION_HH_INCLUDED

View File

@ -77,7 +77,7 @@ void ResListPager::resultPageNext()
if (m_winfirst < 0) {
m_winfirst = 0;
} else {
m_winfirst += m_respage.size();
m_winfirst += int(m_respage.size());
}
// Get the next page of results. Note that we look ahead by one to
// determine if there is actually a next page
@ -102,7 +102,7 @@ void ResListPager::resultPageNext()
// Next button. We'd need to remove the Next link from the page
// too.
// Restore the m_winfirst value, let the current result vector alone
m_winfirst -= m_respage.size();
m_winfirst -= int(m_respage.size());
} else {
// No results at all (on first page)
m_winfirst = -1;
@ -213,9 +213,9 @@ void ResListPager::displayDoc(RclConfig *config, int i, Rcl::Doc& doc,
// Size information. We print both doc and file if they differ a lot
off_t fsize = -1, dsize = -1;
if (!doc.dbytes.empty())
dsize = atoll(doc.dbytes.c_str());
dsize = static_cast<off_t>(atoll(doc.dbytes.c_str()));
if (!doc.fbytes.empty())
fsize = atoll(doc.fbytes.c_str());
fsize = static_cast<off_t>(atoll(doc.fbytes.c_str()));
string sizebuf;
if (dsize > 0) {
sizebuf = displayableBytes(dsize);

View File

@ -64,7 +64,7 @@ public:
int pageLastDocNum() {
if (m_winfirst < 0 || m_respage.size() == 0)
return -1;
return m_winfirst + m_respage.size() - 1;
return m_winfirst + int(m_respage.size()) - 1;
}
virtual int pageSize() const {return m_pagesize;}
void pageNext();

View File

@ -39,7 +39,7 @@ class DocSeqSorted : public DocSeqModifier {
virtual bool canSort() {return true;}
virtual bool setSortSpec(const DocSeqSortSpec &sortspec);
virtual bool getDoc(int num, Rcl::Doc &doc, string *sh = 0);
virtual int getResCnt() {return m_docsp.size();}
virtual int getResCnt() {return int(m_docsp.size());}
private:
DocSeqSortSpec m_spec;
std::vector<Rcl::Doc> m_docs;

158
src/query/stack.hh Normal file
View File

@ -0,0 +1,158 @@
// A Bison parser, made by GNU Bison 3.0.2.
// Stack handling for Bison parsers in C++
// Copyright (C) 2002-2013 Free Software Foundation, Inc.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
// As a special exception, you may create a larger work that contains
// part or all of the Bison parser skeleton and distribute that work
// under terms of your choice, so long as that work isn't itself a
// parser generator using the skeleton or a modified version thereof
// as a parser skeleton. Alternatively, if you modify or redistribute
// the parser skeleton itself, you may (at your option) remove this
// special exception, which will cause the skeleton and the resulting
// Bison output files to be licensed under the GNU General Public
// License without this special exception.
// This special exception was added by the Free Software Foundation in
// version 2.2 of Bison.
/**
** \file stack.hh
** Define the yy::stack class.
*/
#ifndef YY_YY_STACK_HH_INCLUDED
# define YY_YY_STACK_HH_INCLUDED
# include <vector>
namespace yy {
#line 46 "stack.hh" // stack.hh:133
template <class T, class S = std::vector<T> >
class stack
{
public:
// Hide our reversed order.
typedef typename S::reverse_iterator iterator;
typedef typename S::const_reverse_iterator const_iterator;
stack ()
: seq_ ()
{
}
stack (unsigned int n)
: seq_ (n)
{
}
inline
T&
operator[] (unsigned int i)
{
return seq_[seq_.size () - 1 - i];
}
inline
const T&
operator[] (unsigned int i) const
{
return seq_[seq_.size () - 1 - i];
}
/// Steal the contents of \a t.
///
/// Close to move-semantics.
inline
void
push (T& t)
{
seq_.push_back (T());
operator[](0).move (t);
}
inline
void
pop (unsigned int n = 1)
{
for (; n; --n)
seq_.pop_back ();
}
void
clear ()
{
seq_.clear ();
}
inline
typename S::size_type
size () const
{
return seq_.size ();
}
inline
const_iterator
begin () const
{
return seq_.rbegin ();
}
inline
const_iterator
end () const
{
return seq_.rend ();
}
private:
stack (const stack&);
stack& operator= (const stack&);
/// The wrapped container.
S seq_;
};
/// Present a slice of the top of a stack.
template <class T, class S = stack<T> >
class slice
{
public:
slice (const S& stack, unsigned int range)
: stack_ (stack)
, range_ (range)
{
}
inline
const T&
operator [] (unsigned int i) const
{
return stack_[range_ - i];
}
private:
const S& stack_;
unsigned int range_;
};
} // yy
#line 157 "stack.hh" // stack.hh:133
#endif // !YY_YY_STACK_HH_INCLUDED

1517
src/query/wasaparse.cpp Normal file

File diff suppressed because it is too large Load Diff

476
src/query/wasaparse.hpp Normal file
View File

@ -0,0 +1,476 @@
// A Bison parser, made by GNU Bison 3.0.2.
// Skeleton interface for Bison LALR(1) parsers in C++
// Copyright (C) 2002-2013 Free Software Foundation, Inc.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
// As a special exception, you may create a larger work that contains
// part or all of the Bison parser skeleton and distribute that work
// under terms of your choice, so long as that work isn't itself a
// parser generator using the skeleton or a modified version thereof
// as a parser skeleton. Alternatively, if you modify or redistribute
// the parser skeleton itself, you may (at your option) remove this
// special exception, which will cause the skeleton and the resulting
// Bison output files to be licensed under the GNU General Public
// License without this special exception.
// This special exception was added by the Free Software Foundation in
// version 2.2 of Bison.
/**
** \file y.tab.h
** Define the yy::parser class.
*/
// C++ LALR(1) parser skeleton written by Akim Demaille.
#ifndef YY_YY_Y_TAB_H_INCLUDED
# define YY_YY_Y_TAB_H_INCLUDED
# include <vector>
# include <iostream>
# include <stdexcept>
# include <string>
# include "stack.hh"
# include "location.hh"
#ifndef YY_ATTRIBUTE
# if (defined __GNUC__ \
&& (2 < __GNUC__ || (__GNUC__ == 2 && 96 <= __GNUC_MINOR__))) \
|| defined __SUNPRO_C && 0x5110 <= __SUNPRO_C
# define YY_ATTRIBUTE(Spec) __attribute__(Spec)
# else
# define YY_ATTRIBUTE(Spec) /* empty */
# endif
#endif
#ifndef YY_ATTRIBUTE_PURE
# define YY_ATTRIBUTE_PURE YY_ATTRIBUTE ((__pure__))
#endif
#ifndef YY_ATTRIBUTE_UNUSED
# define YY_ATTRIBUTE_UNUSED YY_ATTRIBUTE ((__unused__))
#endif
#if !defined _Noreturn \
&& (!defined __STDC_VERSION__ || __STDC_VERSION__ < 201112)
# if defined _MSC_VER && 1200 <= _MSC_VER
# define _Noreturn __declspec (noreturn)
# else
# define _Noreturn YY_ATTRIBUTE ((__noreturn__))
# endif
#endif
/* Suppress unused-variable warnings by "using" E. */
#if ! defined lint || defined __GNUC__
# define YYUSE(E) ((void) (E))
#else
# define YYUSE(E) /* empty */
#endif
#if defined __GNUC__ && 407 <= __GNUC__ * 100 + __GNUC_MINOR__
/* Suppress an incorrect diagnostic about yylval being uninitialized. */
# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN \
_Pragma ("GCC diagnostic push") \
_Pragma ("GCC diagnostic ignored \"-Wuninitialized\"")\
_Pragma ("GCC diagnostic ignored \"-Wmaybe-uninitialized\"")
# define YY_IGNORE_MAYBE_UNINITIALIZED_END \
_Pragma ("GCC diagnostic pop")
#else
# define YY_INITIAL_VALUE(Value) Value
#endif
#ifndef YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
# define YY_IGNORE_MAYBE_UNINITIALIZED_END
#endif
#ifndef YY_INITIAL_VALUE
# define YY_INITIAL_VALUE(Value) /* Nothing. */
#endif
/* Debug traces. */
#ifndef YYDEBUG
# define YYDEBUG 0
#endif
namespace yy {
#line 113 "y.tab.h" // lalr1.cc:372
/// A Bison parser.
class parser
{
public:
#ifndef YYSTYPE
/// Symbol semantic values.
union semantic_type
{
#line 44 "/home/dockes/projets/fulltext/recoll/src/query/wasaparse.ypp" // lalr1.cc:372
std::string *str;
Rcl::SearchDataClauseSimple *cl;
Rcl::SearchData *sd;
#line 133 "y.tab.h" // lalr1.cc:372
};
#else
typedef YYSTYPE semantic_type;
#endif
/// Symbol locations.
typedef location location_type;
/// Syntax errors thrown from user actions.
struct syntax_error : std::runtime_error
{
syntax_error (const location_type& l, const std::string& m);
location_type location;
};
/// Tokens.
struct token
{
enum yytokentype
{
WORD = 258,
QUOTED = 259,
QUALIFIERS = 260,
AND = 261,
UCONCAT = 262,
OR = 263,
EQUALS = 264,
CONTAINS = 265,
SMALLEREQ = 266,
SMALLER = 267,
GREATEREQ = 268,
GREATER = 269
};
};
/// (External) token type, as returned by yylex.
typedef token::yytokentype token_type;
/// Internal symbol number.
typedef int symbol_number_type;
/// Internal symbol number for tokens (subsumed by symbol_number_type).
typedef unsigned char token_number_type;
/// A complete symbol.
///
/// Expects its Base type to provide access to the symbol type
/// via type_get().
///
/// Provide access to semantic value and location.
template <typename Base>
struct basic_symbol : Base
{
/// Alias to Base.
typedef Base super_type;
/// Default constructor.
basic_symbol ();
/// Copy constructor.
basic_symbol (const basic_symbol& other);
/// Constructor for valueless symbols.
basic_symbol (typename Base::kind_type t,
const location_type& l);
/// Constructor for symbols with semantic value.
basic_symbol (typename Base::kind_type t,
const semantic_type& v,
const location_type& l);
~basic_symbol ();
/// Destructive move, \a s is emptied into this.
void move (basic_symbol& s);
/// The semantic value.
semantic_type value;
/// The location.
location_type location;
private:
/// Assignment operator.
basic_symbol& operator= (const basic_symbol& other);
};
/// Type access provider for token (enum) based symbols.
struct by_type
{
/// Default constructor.
by_type ();
/// Copy constructor.
by_type (const by_type& other);
/// The symbol type as needed by the constructor.
typedef token_type kind_type;
/// Constructor from (external) token numbers.
by_type (kind_type t);
/// Steal the symbol type from \a that.
void move (by_type& that);
/// The (internal) type number (corresponding to \a type).
/// -1 when this symbol is empty.
symbol_number_type type_get () const;
/// The token.
token_type token () const;
enum { empty = 0 };
/// The symbol type.
/// -1 when this symbol is empty.
token_number_type type;
};
/// "External" symbols: returned by the scanner.
typedef basic_symbol<by_type> symbol_type;
/// Build a parser object.
parser (WasaParserDriver* d_yyarg);
virtual ~parser ();
/// Parse.
/// \returns 0 iff parsing succeeded.
virtual int parse ();
#if YYDEBUG
/// The current debugging stream.
std::ostream& debug_stream () const YY_ATTRIBUTE_PURE;
/// Set the current debugging stream.
void set_debug_stream (std::ostream &);
/// Type for debugging levels.
typedef int debug_level_type;
/// The current debugging level.
debug_level_type debug_level () const YY_ATTRIBUTE_PURE;
/// Set the current debugging level.
void set_debug_level (debug_level_type l);
#endif
/// Report a syntax error.
/// \param loc where the syntax error is found.
/// \param msg a description of the syntax error.
virtual void error (const location_type& loc, const std::string& msg);
/// Report a syntax error.
void error (const syntax_error& err);
private:
/// This class is not copyable.
parser (const parser&);
parser& operator= (const parser&);
/// State numbers.
typedef int state_type;
/// Generate an error message.
/// \param yystate the state where the error occurred.
/// \param yytoken the lookahead token type, or yyempty_.
virtual std::string yysyntax_error_ (state_type yystate,
symbol_number_type yytoken) const;
/// Compute post-reduction state.
/// \param yystate the current state
/// \param yysym the nonterminal to push on the stack
state_type yy_lr_goto_state_ (state_type yystate, int yysym);
/// Whether the given \c yypact_ value indicates a defaulted state.
/// \param yyvalue the value to check
static bool yy_pact_value_is_default_ (int yyvalue);
/// Whether the given \c yytable_ value indicates a syntax error.
/// \param yyvalue the value to check
static bool yy_table_value_is_error_ (int yyvalue);
static const signed char yypact_ninf_;
static const signed char yytable_ninf_;
/// Convert a scanner token number \a t to a symbol number.
static token_number_type yytranslate_ (int t);
// Tables.
// YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing
// STATE-NUM.
static const signed char yypact_[];
// YYDEFACT[STATE-NUM] -- Default reduction number in state STATE-NUM.
// Performed when YYTABLE does not specify something else to do. Zero
// means the default is an error.
static const unsigned char yydefact_[];
// YYPGOTO[NTERM-NUM].
static const signed char yypgoto_[];
// YYDEFGOTO[NTERM-NUM].
static const signed char yydefgoto_[];
// YYTABLE[YYPACT[STATE-NUM]] -- What to do in state STATE-NUM. If
// positive, shift that token. If negative, reduce the rule whose
// number is the opposite. If YYTABLE_NINF, syntax error.
static const signed char yytable_[];
static const signed char yycheck_[];
// YYSTOS[STATE-NUM] -- The (internal number of the) accessing
// symbol of state STATE-NUM.
static const unsigned char yystos_[];
// YYR1[YYN] -- Symbol number of symbol that rule YYN derives.
static const unsigned char yyr1_[];
// YYR2[YYN] -- Number of symbols on the right hand side of rule YYN.
static const unsigned char yyr2_[];
/// Convert the symbol name \a n to a form suitable for a diagnostic.
static std::string yytnamerr_ (const char *n);
/// For a symbol, its name in clear.
static const char* const yytname_[];
#if YYDEBUG
// YYRLINE[YYN] -- Source line where rule number YYN was defined.
static const unsigned char yyrline_[];
/// Report on the debug stream that the rule \a r is going to be reduced.
virtual void yy_reduce_print_ (int r);
/// Print the state stack on the debug stream.
virtual void yystack_print_ ();
// Debugging.
int yydebug_;
std::ostream* yycdebug_;
/// \brief Display a symbol type, value and location.
/// \param yyo The output stream.
/// \param yysym The symbol.
template <typename Base>
void yy_print_ (std::ostream& yyo, const basic_symbol<Base>& yysym) const;
#endif
/// \brief Reclaim the memory associated to a symbol.
/// \param yymsg Why this token is reclaimed.
/// If null, print nothing.
/// \param yysym The symbol.
template <typename Base>
void yy_destroy_ (const char* yymsg, basic_symbol<Base>& yysym) const;
private:
/// Type access provider for state based symbols.
struct by_state
{
/// Default constructor.
by_state ();
/// The symbol type as needed by the constructor.
typedef state_type kind_type;
/// Constructor.
by_state (kind_type s);
/// Copy constructor.
by_state (const by_state& other);
/// Steal the symbol type from \a that.
void move (by_state& that);
/// The (internal) type number (corresponding to \a state).
/// "empty" when empty.
symbol_number_type type_get () const;
enum { empty = 0 };
/// The state.
state_type state;
};
/// "Internal" symbol: element of the stack.
struct stack_symbol_type : basic_symbol<by_state>
{
/// Superclass.
typedef basic_symbol<by_state> super_type;
/// Construct an empty symbol.
stack_symbol_type ();
/// Steal the contents from \a sym to build this.
stack_symbol_type (state_type s, symbol_type& sym);
/// Assignment, needed by push_back.
stack_symbol_type& operator= (const stack_symbol_type& that);
};
/// Stack type.
typedef stack<stack_symbol_type> stack_type;
/// The stack.
stack_type yystack_;
/// Push a new state on the stack.
/// \param m a debug message to display
/// if null, no trace is output.
/// \param s the symbol
/// \warning the contents of \a s.value is stolen.
void yypush_ (const char* m, stack_symbol_type& s);
/// Push a new look ahead token on the state on the stack.
/// \param m a debug message to display
/// if null, no trace is output.
/// \param s the state
/// \param sym the symbol (for its value and location).
/// \warning the contents of \a s.value is stolen.
void yypush_ (const char* m, state_type s, symbol_type& sym);
/// Pop \a n symbols the three stacks.
void yypop_ (unsigned int n = 1);
// Constants.
enum
{
yyeof_ = 0,
yylast_ = 59, ///< Last index in yytable_.
yynnts_ = 7, ///< Number of nonterminal symbols.
yyempty_ = -2,
yyfinal_ = 14, ///< Termination state number.
yyterror_ = 1,
yyerrcode_ = 256,
yyntokens_ = 18 ///< Number of tokens.
};
// User arguments.
WasaParserDriver* d;
};
} // yy
#line 472 "y.tab.h" // lalr1.cc:372
#endif // !YY_YY_Y_TAB_H_INCLUDED

View File

@ -161,10 +161,10 @@ bool WasaParserDriver::addClause(SearchData *sd,
size_t size = strtoll(cl->gettext().c_str(), &cp, 10);
if (*cp != 0) {
switch (*cp) {
case 'k': case 'K': size *= 1E3;break;
case 'm': case 'M': size *= 1E6;break;
case 'g': case 'G': size *= 1E9;break;
case 't': case 'T': size *= 1E12;break;
case 'k': case 'K': size *= 1000;break;
case 'm': case 'M': size *= 1000*1000;break;
case 'g': case 'G': size *= 1000*1000*1000;break;
case 't': case 'T': size *= size_t(1000)*1000*1000*1000;break;
default:
m_reason = string("Bad multiplier suffix: ") + *cp;
delete cl;

View File

@ -29,7 +29,7 @@
#include <vector>
using namespace std;
#include "xapian.h"
#include <xapian.h>
#include "debuglog.h"
#include "rclconfig.h"

View File

@ -97,6 +97,8 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
// Detect and skip CJK terms.
Utf8Iter utfit(*it);
if (utfit.eof()) // Empty term?? Seems to happen.
continue;
if (TextSplit::isCJK(*utfit)) {
// LOGDEB(("stemskipped: Skipping CJK\n"));
continue;

View File

@ -82,7 +82,7 @@ bool Query::Native::getMatchTerms(unsigned long xdocid, vector<string>& terms)
{
if (!xenquire) {
LOGERR(("Query::getMatchTerms: no query opened\n"));
return -1;
return false;
}
terms.clear();
@ -386,7 +386,7 @@ int Query::Native::makeAbstract(Xapian::docid docid,
for (multimap<double, vector<string> >::reverse_iterator mit = byQ.rbegin();
mit != byQ.rend(); mit++) {
unsigned int maxgrpoccs;
float q;
double q;
if (byQ.size() == 1) {
maxgrpoccs = maxtotaloccs;
q = 1.0;

View File

@ -18,7 +18,7 @@
#include <stdio.h>
#include <cstring>
#include <unistd.h>
#include "safeunistd.h"
#include <math.h>
#include <time.h>
@ -433,7 +433,7 @@ bool Db::Native::dbDataToRclDoc(Xapian::docid docid, std::string &data,
string dbdir = m_rcldb->m_basedir;
doc.idxi = 0;
if (!m_rcldb->m_extraDbs.empty()) {
unsigned int idxi = whatDbIdx(docid);
int idxi = int(whatDbIdx(docid));
// idxi is in [0, extraDbs.size()]. 0 is for the main index,
// idxi-1 indexes into the additional dbs array.
@ -549,14 +549,13 @@ bool Db::Native::getPagePositions(Xapian::docid docid, vector<int>& vpos)
return true;
}
int Db::Native::getPageNumberForPosition(const vector<int>& pbreaks,
unsigned int pos)
int Db::Native::getPageNumberForPosition(const vector<int>& pbreaks, int pos)
{
if (pos < baseTextPosition) // Not in text body
if (pos < int(baseTextPosition)) // Not in text body
return -1;
vector<int>::const_iterator it =
upper_bound(pbreaks.begin(), pbreaks.end(), pos);
return it - pbreaks.begin() + 1;
return int(it - pbreaks.begin() + 1);
}
// Note: we're passed a Xapian::Document* because Xapian
@ -1420,10 +1419,11 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
time_t mtime = atoll(doc.dmtime.empty() ? doc.fmtime.c_str() :
doc.dmtime.c_str());
struct tm tmb;
localtime_r(&mtime, &tmb);
struct tm *tmbp = &tmb;
tmbp = localtime_r(&mtime, &tmb);
char buf[9];
snprintf(buf, 9, "%04d%02d%02d",
tmb.tm_year+1900, tmb.tm_mon + 1, tmb.tm_mday);
tmbp->tm_year+1900, tmbp->tm_mon + 1, tmbp->tm_mday);
// Date (YYYYMMDD)
newdocument.add_boolean_term(wrap_prefix(xapday_prefix) + string(buf));
// Month (YYYYMM)

View File

@ -120,7 +120,7 @@ class Db::Native {
const string& uniterm);
bool getPagePositions(Xapian::docid docid, vector<int>& vpos);
int getPageNumberForPosition(const vector<int>& pbreaks, unsigned int pos);
int getPageNumberForPosition(const vector<int>& pbreaks, int pos);
bool dbDataToRclDoc(Xapian::docid docid, std::string &data, Doc &doc);

View File

@ -21,13 +21,15 @@
#include "autoconfig.h"
#include <string>
using namespace std;
#include "debuglog.h"
#include "rcldb.h"
#include "rcldb_p.h"
#include "stemdb.h"
#include "expansiondbs.h"
#include "strmatcher.h"
using namespace std;
namespace Rcl {
@ -41,10 +43,10 @@ bool Db::filenameWildExp(const string& fnexp, vector<string>& names, int max)
// get here currently anyway), and has no wildcards, we add * at
// each end: match any substring
if (pattern[0] == '"' && pattern[pattern.size()-1] == '"') {
pattern = pattern.substr(1, pattern.size() -2);
pattern = pattern.substr(1, pattern.size() -2);
} else if (pattern.find_first_of(cstr_minwilds) == string::npos &&
!unaciscapital(pattern)) {
pattern = "*" + pattern + "*";
!unaciscapital(pattern)) {
pattern = "*" + pattern + "*";
} // else let it be
LOGDEB(("Rcl::Db::filenameWildExp: pattern: [%s]\n", pattern.c_str()));
@ -55,21 +57,21 @@ bool Db::filenameWildExp(const string& fnexp, vector<string>& names, int max)
// stripping conditionally on indexstripchars.
string pat1;
if (unacmaybefold(pattern, pat1, "UTF-8", UNACOP_UNACFOLD)) {
pattern.swap(pat1);
pattern.swap(pat1);
}
TermMatchResult result;
if (!idxTermMatch(ET_WILD, string(), pattern, result, max,
unsplitFilenameFieldName))
return false;
unsplitFilenameFieldName))
return false;
for (vector<TermMatchEntry>::const_iterator it = result.entries.begin();
it != result.entries.end(); it++)
names.push_back(it->term);
it != result.entries.end(); it++)
names.push_back(it->term);
if (names.empty()) {
// Build an impossible query: we know its impossible because we
// control the prefixes!
names.push_back(wrap_prefix("XNONE") + "NoMatchingTerms");
// Build an impossible query: we know its impossible because we
// control the prefixes!
names.push_back(wrap_prefix("XNONE") + "NoMatchingTerms");
}
return true;
}
@ -82,11 +84,11 @@ bool Db::maxYearSpan(int *minyear, int *maxyear)
*maxyear = -1000000;
TermMatchResult result;
if (!idxTermMatch(ET_WILD, string(), "*", result, -1, "xapyear")) {
LOGINFO(("Rcl::Db:maxYearSpan: termMatch failed\n"));
return false;
LOGINFO(("Rcl::Db:maxYearSpan: termMatch failed\n"));
return false;
}
for (vector<TermMatchEntry>::const_iterator it = result.entries.begin();
it != result.entries.end(); it++) {
it != result.entries.end(); it++) {
if (!it->term.empty()) {
int year = atoi(strip_prefix(it->term).c_str());
if (year < *minyear)
@ -102,11 +104,11 @@ bool Db::getAllDbMimeTypes(std::vector<std::string>& exp)
{
Rcl::TermMatchResult res;
if (!idxTermMatch(Rcl::Db::ET_WILD, "", "*", res, -1, "mtype")) {
return false;
return false;
}
for (vector<Rcl::TermMatchEntry>::const_iterator rit = res.entries.begin();
rit != res.entries.end(); rit++) {
exp.push_back(Rcl::strip_prefix(rit->term));
rit != res.entries.end(); rit++) {
exp.push_back(Rcl::strip_prefix(rit->term));
}
return true;
}
@ -114,19 +116,19 @@ bool Db::getAllDbMimeTypes(std::vector<std::string>& exp)
class TermMatchCmpByWcf {
public:
int operator()(const TermMatchEntry& l, const TermMatchEntry& r) {
return r.wcf - l.wcf < 0;
return r.wcf - l.wcf < 0;
}
};
class TermMatchCmpByTerm {
public:
int operator()(const TermMatchEntry& l, const TermMatchEntry& r) {
return l.term.compare(r.term) > 0;
return l.term.compare(r.term) > 0;
}
};
class TermMatchTermEqual {
public:
int operator()(const TermMatchEntry& l, const TermMatchEntry& r) {
return !l.term.compare(r.term);
return !l.term.compare(r.term);
}
};
@ -136,10 +138,10 @@ public:
static void addPrefix(vector<TermMatchEntry>& terms, const string& prefix)
{
if (prefix.empty())
return;
return;
for (vector<TermMatchEntry>::iterator it = terms.begin();
it != terms.end(); it++)
it->term.insert(0, prefix);
it->term.insert(0, prefix);
}
static const char *tmtptostr(int typ)
@ -164,22 +166,22 @@ static const char *tmtptostr(int typ)
// using the main index terms (filtering, retrieving stats, expansion
// in some cases).
bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
TermMatchResult& res, int max, const string& field,
vector<string>* multiwords)
TermMatchResult& res, int max, const string& field,
vector<string>* multiwords)
{
int matchtyp = matchTypeTp(typ_sens);
if (!m_ndb || !m_ndb->m_isopen)
return false;
return false;
Xapian::Database xrdb = m_ndb->xrdb;
bool diac_sensitive = (typ_sens & ET_DIACSENS) != 0;
bool case_sensitive = (typ_sens & ET_CASESENS) != 0;
LOGDEB0(("Db::TermMatch: typ %s diacsens %d casesens %d lang [%s] term [%s]"
" max %d field [%s] stripped %d init res.size %u\n",
tmtptostr(matchtyp), diac_sensitive, case_sensitive, lang.c_str(),
_term.c_str(), max, field.c_str(), o_index_stripchars,
res.entries.size()));
" max %d field [%s] stripped %d init res.size %u\n",
tmtptostr(matchtyp), diac_sensitive, case_sensitive, lang.c_str(),
_term.c_str(), max, field.c_str(), o_index_stripchars,
res.entries.size()));
// If index is stripped, no case or diac expansion can be needed:
// for the processing inside this routine, everything looks like
@ -187,11 +189,11 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
// Also, convert input to lowercase and strip its accents.
string term = _term;
if (o_index_stripchars) {
diac_sensitive = case_sensitive = true;
if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) {
LOGERR(("Db::termMatch: unac failed for [%s]\n", _term.c_str()));
return false;
}
diac_sensitive = case_sensitive = true;
if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) {
LOGERR(("Db::termMatch: unac failed for [%s]\n", _term.c_str()));
return false;
}
}
// The case/diac expansion db
@ -199,125 +201,125 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
XapComputableSynFamMember synac(xrdb, synFamDiCa, "all", &unacfoldtrans);
if (matchtyp == ET_WILD || matchtyp == ET_REGEXP) {
STD_SHARED_PTR<StrMatcher> matcher;
if (matchtyp == ET_WILD) {
matcher = STD_SHARED_PTR<StrMatcher>(new StrWildMatcher(term));
} else {
matcher = STD_SHARED_PTR<StrMatcher>(new StrRegexpMatcher(term));
}
if (!diac_sensitive || !case_sensitive) {
// Perform case/diac expansion on the exp as appropriate and
// expand the result.
vector<string> exp;
if (diac_sensitive) {
// Expand for diacritics and case, filtering for same diacritics
SynTermTransUnac foldtrans(UNACOP_FOLD);
synac.synKeyExpand(matcher.get(), exp, &foldtrans);
} else if (case_sensitive) {
// Expand for diacritics and case, filtering for same case
SynTermTransUnac unactrans(UNACOP_UNAC);
synac.synKeyExpand(matcher.get(), exp, &unactrans);
} else {
// Expand for diacritics and case, no filtering
synac.synKeyExpand(matcher.get(), exp);
}
// Retrieve additional info and filter against the index itself
for (vector<string>::const_iterator it = exp.begin();
it != exp.end(); it++) {
idxTermMatch(ET_NONE, "", *it, res, max, field);
}
// And also expand the original expression against the
// main index: for the common case where the expression
// had no case/diac expansion (no entry in the exp db if
// the original term is lowercase and without accents).
idxTermMatch(typ_sens, lang, term, res, max, field);
} else {
idxTermMatch(typ_sens, lang, term, res, max, field);
}
STD_SHARED_PTR<StrMatcher> matcher;
if (matchtyp == ET_WILD) {
matcher = STD_SHARED_PTR<StrMatcher>(new StrWildMatcher(term));
} else {
matcher = STD_SHARED_PTR<StrMatcher>(new StrRegexpMatcher(term));
}
if (!diac_sensitive || !case_sensitive) {
// Perform case/diac expansion on the exp as appropriate and
// expand the result.
vector<string> exp;
if (diac_sensitive) {
// Expand for diacritics and case, filtering for same diacritics
SynTermTransUnac foldtrans(UNACOP_FOLD);
synac.synKeyExpand(matcher.get(), exp, &foldtrans);
} else if (case_sensitive) {
// Expand for diacritics and case, filtering for same case
SynTermTransUnac unactrans(UNACOP_UNAC);
synac.synKeyExpand(matcher.get(), exp, &unactrans);
} else {
// Expand for diacritics and case, no filtering
synac.synKeyExpand(matcher.get(), exp);
}
// Retrieve additional info and filter against the index itself
for (vector<string>::const_iterator it = exp.begin();
it != exp.end(); it++) {
idxTermMatch(ET_NONE, "", *it, res, max, field);
}
// And also expand the original expression against the
// main index: for the common case where the expression
// had no case/diac expansion (no entry in the exp db if
// the original term is lowercase and without accents).
idxTermMatch(typ_sens, lang, term, res, max, field);
} else {
idxTermMatch(typ_sens, lang, term, res, max, field);
}
} else {
// Expansion is STEM or NONE (which may still need synonyms
// and case/diac exp)
// Expansion is STEM or NONE (which may still need synonyms
// and case/diac exp)
vector<string> lexp;
if (diac_sensitive && case_sensitive) {
// No case/diac expansion
lexp.push_back(term);
} else if (diac_sensitive) {
// Expand for accents and case, filtering for same accents,
SynTermTransUnac foldtrans(UNACOP_FOLD);
synac.synExpand(term, lexp, &foldtrans);
} else if (case_sensitive) {
// Expand for accents and case, filtering for same case
SynTermTransUnac unactrans(UNACOP_UNAC);
synac.synExpand(term, lexp, &unactrans);
} else {
// We are neither accent- nor case- sensitive and may need stem
// expansion or not. Expand for accents and case
synac.synExpand(term, lexp);
}
vector<string> lexp;
if (diac_sensitive && case_sensitive) {
// No case/diac expansion
lexp.push_back(term);
} else if (diac_sensitive) {
// Expand for accents and case, filtering for same accents,
SynTermTransUnac foldtrans(UNACOP_FOLD);
synac.synExpand(term, lexp, &foldtrans);
} else if (case_sensitive) {
// Expand for accents and case, filtering for same case
SynTermTransUnac unactrans(UNACOP_UNAC);
synac.synExpand(term, lexp, &unactrans);
} else {
// We are neither accent- nor case- sensitive and may need stem
// expansion or not. Expand for accents and case
synac.synExpand(term, lexp);
}
if (matchtyp == ET_STEM || (typ_sens & ET_SYNEXP)) {
if (matchtyp == ET_STEM || (typ_sens & ET_SYNEXP)) {
// Note: if any of the above conds is true, we are insensitive to
// diacs and case (enforced in searchdatatox:termexpand
// Need stem expansion. Lowercase the result of accent and case
// expansion for input to stemdb.
for (unsigned int i = 0; i < lexp.size(); i++) {
string lower;
unacmaybefold(lexp[i], lower, "UTF-8", UNACOP_FOLD);
lexp[i] = lower;
}
sort(lexp.begin(), lexp.end());
lexp.erase(unique(lexp.begin(), lexp.end()), lexp.end());
// Need stem expansion. Lowercase the result of accent and case
// expansion for input to stemdb.
for (unsigned int i = 0; i < lexp.size(); i++) {
string lower;
unacmaybefold(lexp[i], lower, "UTF-8", UNACOP_FOLD);
lexp[i] = lower;
}
sort(lexp.begin(), lexp.end());
lexp.erase(unique(lexp.begin(), lexp.end()), lexp.end());
if (matchtyp == ET_STEM) {
StemDb sdb(xrdb);
vector<string> exp1;
for (vector<string>::const_iterator it = lexp.begin();
it != lexp.end(); it++) {
sdb.stemExpand(lang, *it, exp1);
}
exp1.swap(lexp);
sort(lexp.begin(), lexp.end());
lexp.erase(unique(lexp.begin(), lexp.end()), lexp.end());
LOGDEB(("ExpTerm: stemexp: %s\n",
stringsToString(lexp).c_str()));
}
if (matchtyp == ET_STEM) {
StemDb sdb(xrdb);
vector<string> exp1;
for (vector<string>::const_iterator it = lexp.begin();
it != lexp.end(); it++) {
sdb.stemExpand(lang, *it, exp1);
}
exp1.swap(lexp);
sort(lexp.begin(), lexp.end());
lexp.erase(unique(lexp.begin(), lexp.end()), lexp.end());
LOGDEB(("ExpTerm: stemexp: %s\n",
stringsToString(lexp).c_str()));
}
// Expand the result for synonyms. Note that doing it here
// means that multi-term synonyms will not work
// (e.g. stakhanovist -> "hard at work". We would have to
// separate the multi-word expansions for our caller to
// add them as phrases to the query. Not impossible, but
// let's keep it at single words for now.
if (m_syngroups.ok() && (typ_sens & ET_SYNEXP)) {
LOGDEB(("ExpTerm: got syngroups\n"));
vector<string> exp1(lexp);
for (vector<string>::const_iterator it = lexp.begin();
it != lexp.end(); it++) {
vector<string> sg = m_syngroups.getgroup(*it);
if (!sg.empty()) {
LOGDEB(("ExpTerm: syns: %s -> %s\n",
it->c_str(), stringsToString(sg).c_str()));
for (vector<string>::const_iterator it1 = sg.begin();
it1 != sg.end(); it1++) {
if (it1->find_first_of(" ") != string::npos) {
if (multiwords) {
multiwords->push_back(*it1);
}
} else {
exp1.push_back(*it1);
}
}
}
}
lexp.swap(exp1);
sort(lexp.begin(), lexp.end());
lexp.erase(unique(lexp.begin(), lexp.end()), lexp.end());
}
// Expand the result for synonyms. Note that doing it here
// means that multi-term synonyms will not work
// (e.g. stakhanovist -> "hard at work". We would have to
// separate the multi-word expansions for our caller to
// add them as phrases to the query. Not impossible, but
// let's keep it at single words for now.
if (m_syngroups.ok() && (typ_sens & ET_SYNEXP)) {
LOGDEB(("ExpTerm: got syngroups\n"));
vector<string> exp1(lexp);
for (vector<string>::const_iterator it = lexp.begin();
it != lexp.end(); it++) {
vector<string> sg = m_syngroups.getgroup(*it);
if (!sg.empty()) {
LOGDEB(("ExpTerm: syns: %s -> %s\n",
it->c_str(), stringsToString(sg).c_str()));
for (vector<string>::const_iterator it1 = sg.begin();
it1 != sg.end(); it1++) {
if (it1->find_first_of(" ") != string::npos) {
if (multiwords) {
multiwords->push_back(*it1);
}
} else {
exp1.push_back(*it1);
}
}
}
}
lexp.swap(exp1);
sort(lexp.begin(), lexp.end());
lexp.erase(unique(lexp.begin(), lexp.end()), lexp.end());
}
// Expand the resulting list for case (all stemdb content
// is lowercase)
// Expand the resulting list for case (all stemdb content
// is lowercase)
vector<string> exp1;
for (vector<string>::const_iterator it = lexp.begin();
it != lexp.end(); it++) {
@ -326,27 +328,27 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
exp1.swap(lexp);
sort(lexp.begin(), lexp.end());
lexp.erase(unique(lexp.begin(), lexp.end()), lexp.end());
}
}
// Filter the result and get the stats, possibly add prefixes.
LOGDEB(("ExpandTerm:TM: lexp: %s\n", stringsToString(lexp).c_str()));
for (vector<string>::const_iterator it = lexp.begin();
it != lexp.end(); it++) {
idxTermMatch(Rcl::Db::ET_WILD, "", *it, res, max, field);
}
// Filter the result and get the stats, possibly add prefixes.
LOGDEB(("ExpandTerm:TM: lexp: %s\n", stringsToString(lexp).c_str()));
for (vector<string>::const_iterator it = lexp.begin();
it != lexp.end(); it++) {
idxTermMatch(Rcl::Db::ET_WILD, "", *it, res, max, field);
}
}
TermMatchCmpByTerm tcmp;
sort(res.entries.begin(), res.entries.end(), tcmp);
TermMatchTermEqual teq;
vector<TermMatchEntry>::iterator uit =
unique(res.entries.begin(), res.entries.end(), teq);
unique(res.entries.begin(), res.entries.end(), teq);
res.entries.resize(uit - res.entries.begin());
TermMatchCmpByWcf wcmp;
sort(res.entries.begin(), res.entries.end(), wcmp);
if (max > 0) {
// Would need a small max and big stem expansion...
res.entries.resize(MIN(res.entries.size(), (unsigned int)max));
// Would need a small max and big stem expansion...
res.entries.resize(MIN(res.entries.size(), (unsigned int)max));
}
return true;
}
@ -354,114 +356,116 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
// Second phase of wildcard/regexp term expansion after case/diac
// expansion: expand against main index terms
bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root,
TermMatchResult& res, int max, const string& field)
TermMatchResult& res, int max, const string& field)
{
int typ = matchTypeTp(typ_sens);
LOGDEB1(("Db::idxTermMatch: typ %s lang [%s] term [%s] "
"max %d field [%s] init res.size %u\n",
tmtptostr(typ), lang.c_str(), root.c_str(),
max, field.c_str(), res.entries.size()));
"max %d field [%s] init res.size %u\n",
tmtptostr(typ), lang.c_str(), root.c_str(),
max, field.c_str(), res.entries.size()));
if (typ == ET_STEM) {
LOGFATAL(("RCLDB: internal error: idxTermMatch called with ET_STEM\n"));
abort();
LOGFATAL(("RCLDB: internal error: idxTermMatch called with ET_STEM\n"));
abort();
}
Xapian::Database xdb = m_ndb->xrdb;
string prefix;
if (!field.empty()) {
const FieldTraits *ftp = 0;
if (!fieldToTraits(field, &ftp, true) || ftp->pfx.empty()) {
const FieldTraits *ftp = 0;
if (!fieldToTraits(field, &ftp, true) || ftp->pfx.empty()) {
LOGDEB(("Db::termMatch: field is not indexed (no prefix): [%s]\n",
field.c_str()));
} else {
prefix = wrap_prefix(ftp->pfx);
}
prefix = wrap_prefix(ftp->pfx);
}
}
res.prefix = prefix;
STD_SHARED_PTR<StrMatcher> matcher;
if (typ == ET_REGEXP) {
matcher = STD_SHARED_PTR<StrMatcher>(new StrRegexpMatcher(root));
if (!matcher->ok()) {
LOGERR(("termMatch: regcomp failed: %s\n",
matcher->getreason().c_str()))
return false;
}
matcher = STD_SHARED_PTR<StrMatcher>(new StrRegexpMatcher(root));
if (!matcher->ok()) {
LOGERR(("termMatch: regcomp failed: %s\n",
matcher->getreason().c_str()))
return false;
}
} else if (typ == ET_WILD) {
matcher = STD_SHARED_PTR<StrMatcher>(new StrWildMatcher(root));
matcher = STD_SHARED_PTR<StrMatcher>(new StrWildMatcher(root));
}
// Find the initial section before any special char
string::size_type es = string::npos;
if (matcher) {
es = matcher->baseprefixlen();
es = matcher->baseprefixlen();
}
// Initial section: the part of the prefix+expr before the
// first wildcard character. We only scan the part of the
// index where this matches
string is;
switch (es) {
case string::npos: is = prefix + root; break;
case 0: is = prefix; break;
default: is = prefix + root.substr(0, es); break;
if (es == string::npos) {
is = prefix + root;
} else if (es == 0) {
is = prefix;
} else {
is = prefix + root.substr(0, es);
}
LOGDEB2(("termMatch: initsec: [%s]\n", is.c_str()));
for (int tries = 0; tries < 2; tries++) {
try {
Xapian::TermIterator it = xdb.allterms_begin();
if (!is.empty())
it.skip_to(is.c_str());
for (int rcnt = 0; it != xdb.allterms_end(); it++) {
// If we're beyond the terms matching the initial
// section, end
if (!is.empty() && (*it).find(is) != 0)
break;
try {
Xapian::TermIterator it = xdb.allterms_begin();
if (!is.empty())
it.skip_to(is.c_str());
for (int rcnt = 0; it != xdb.allterms_end(); it++) {
// If we're beyond the terms matching the initial
// section, end
if (!is.empty() && (*it).find(is) != 0)
break;
// Else try to match the term. The matcher content
// is without prefix, so we remove this if any. We
// just checked that the index term did begin with
// the prefix.
string term;
if (!prefix.empty()) {
term = (*it).substr(prefix.length());
} else {
if (has_prefix(*it)) {
continue;
}
term = *it;
}
// Else try to match the term. The matcher content
// is without prefix, so we remove this if any. We
// just checked that the index term did begin with
// the prefix.
string term;
if (!prefix.empty()) {
term = (*it).substr(prefix.length());
} else {
if (has_prefix(*it)) {
continue;
}
term = *it;
}
if (matcher && !matcher->match(term))
continue;
if (matcher && !matcher->match(term))
continue;
res.entries.push_back(
TermMatchEntry(*it, xdb.get_collection_freq(*it),
it.get_termfreq()));
res.entries.push_back(
TermMatchEntry(*it, xdb.get_collection_freq(*it),
it.get_termfreq()));
// The problem with truncating here is that this is done
// alphabetically and we may not keep the most frequent
// terms. OTOH, not doing it may stall the program if
// we are walking the whole term list. We compromise
// by cutting at 2*max
if (max > 0 && ++rcnt >= 2*max)
break;
}
m_reason.erase();
break;
} catch (const Xapian::DatabaseModifiedError &e) {
m_reason = e.get_msg();
xdb.reopen();
continue;
} XCATCHERROR(m_reason);
break;
// The problem with truncating here is that this is done
// alphabetically and we may not keep the most frequent
// terms. OTOH, not doing it may stall the program if
// we are walking the whole term list. We compromise
// by cutting at 2*max
if (max > 0 && ++rcnt >= 2*max)
break;
}
m_reason.erase();
break;
} catch (const Xapian::DatabaseModifiedError &e) {
m_reason = e.get_msg();
xdb.reopen();
continue;
} XCATCHERROR(m_reason);
break;
}
if (!m_reason.empty()) {
LOGERR(("termMatch: %s\n", m_reason.c_str()));
return false;
LOGERR(("termMatch: %s\n", m_reason.c_str()));
return false;
}
return true;
@ -476,62 +480,62 @@ public:
TermIter *Db::termWalkOpen()
{
if (!m_ndb || !m_ndb->m_isopen)
return 0;
return 0;
TermIter *tit = new TermIter;
if (tit) {
tit->db = m_ndb->xrdb;
tit->db = m_ndb->xrdb;
XAPTRY(tit->it = tit->db.allterms_begin(), tit->db, m_reason);
if (!m_reason.empty()) {
LOGERR(("Db::termWalkOpen: xapian error: %s\n", m_reason.c_str()));
return 0;
}
if (!m_reason.empty()) {
LOGERR(("Db::termWalkOpen: xapian error: %s\n", m_reason.c_str()));
return 0;
}
}
return tit;
}
bool Db::termWalkNext(TermIter *tit, string &term)
{
XAPTRY(
if (tit && tit->it != tit->db.allterms_end()) {
term = *(tit->it)++;
return true;
}
if (tit && tit->it != tit->db.allterms_end()) {
term = *(tit->it)++;
return true;
}
, tit->db, m_reason);
if (!m_reason.empty()) {
LOGERR(("Db::termWalkOpen: xapian error: %s\n", m_reason.c_str()));
LOGERR(("Db::termWalkOpen: xapian error: %s\n", m_reason.c_str()));
}
return false;
}
void Db::termWalkClose(TermIter *tit)
{
try {
delete tit;
delete tit;
} catch (...) {}
}
bool Db::termExists(const string& word)
{
if (!m_ndb || !m_ndb->m_isopen)
return 0;
return 0;
XAPTRY(if (!m_ndb->xrdb.term_exists(word)) return false,
m_ndb->xrdb, m_reason);
if (!m_reason.empty()) {
LOGERR(("Db::termWalkOpen: xapian error: %s\n", m_reason.c_str()));
return false;
LOGERR(("Db::termWalkOpen: xapian error: %s\n", m_reason.c_str()));
return false;
}
return true;
}
bool Db::stemDiffers(const string& lang, const string& word,
const string& base)
const string& base)
{
Xapian::Stem stemmer(lang);
if (!stemmer(word).compare(stemmer(base))) {
LOGDEB2(("Rcl::Db::stemDiffers: same for %s and %s\n",
word.c_str(), base.c_str()));
return false;
LOGDEB2(("Rcl::Db::stemDiffers: same for %s and %s\n",
word.c_str(), base.c_str()));
return false;
}
return true;
}

View File

@ -242,7 +242,7 @@ void SearchData::simplify()
j < i + clsubp->getSub()->m_query.size(); j++) {
m_query[j]->setParent(this);
}
i += clsubp->getSub()->m_query.size() - 1;
i += int(clsubp->getSub()->m_query.size()) - 1;
// We don't want the clauses to be deleted when the parent is, as we
// know own them.

View File

@ -270,7 +270,7 @@ public:
{
return m_parentSearch ? m_parentSearch->getMaxExp() : 10000;
}
int getMaxCl()
size_t getMaxCl()
{
return m_parentSearch ? m_parentSearch->getMaxCl() : 100000;
}
@ -376,7 +376,7 @@ protected:
std::string m_field; // Field specification if any
HighlightData m_hldata;
// Current count of Xapian clauses, to check against expansion limit
int m_curcl;
size_t m_curcl;
bool processUserString(Rcl::Db &db, const string &iq,
std::string &ermsg,
void* pq, int slack = 0, bool useNear = false);

View File

@ -840,7 +840,7 @@ bool SearchDataClauseSimple::processUserString(Rcl::Db &db, const string &iq,
tpq.setTSQ(&splitter);
splitter.text_to_words(*it);
slack += tpq.lastpos() - tpq.terms().size() + 1;
slack += tpq.lastpos() - int(tpq.terms().size()) + 1;
LOGDEB0(("strToXapianQ: termcount: %d\n", tpq.terms().size()));
switch (tpq.terms().size() + terminc) {
@ -963,7 +963,7 @@ bool SearchDataClausePath::toNativeQuery(Rcl::Db &db, void *p)
vector<Xapian::Query> orqueries;
if (m_text[0] == '/')
if (path_isabsolute(m_text))
orqueries.push_back(Xapian::Query(wrap_prefix(pathelt_prefix)));
else
m_text = path_tildexpand(m_text);

View File

@ -22,11 +22,12 @@
#include "autoconfig.h"
#include <unistd.h>
#include "safeunistd.h"
#include <algorithm>
#include <map>
#include <iostream>
#include <string>
using namespace std;
#include <xapian.h>

View File

@ -133,7 +133,8 @@ image/vnd.djvu = exec rcldjvu
image/svg+xml = exec rclsvg
image/x-xcf = execm rclimg
inode/symlink = internal
inode/x-empty = exec rclnull
application/x-zerosize = internal
inode/x-empty = internal application/x-zerosize
message/rfc822 = internal
text/calendar = execm rclics;mimetype=text/plain
text/html = internal

View File

@ -1,7 +1,7 @@
# (C) 2004 J.F.Dockes. License: GPL
#
# Recoll default configuration file. This typically lives in
# @prefix@/share/recoll/examples and provides default values. You can
# $prefix/share/recoll/examples and provides default values. You can
# override selected parameters by adding assigments to
# ~/.recoll/recoll.conf (or $RECOLL_CONFDIR/recoll.conf)
#
@ -199,12 +199,13 @@ maxfsoccuppc = 0
idxflushmb = 10
# Place to search for executable filters. If RECOLL_FILTERSDIR is set in
# the environment, we use it instead
filtersdir = @prefix@/share/recoll/filters
# the environment, we use it instead. Defaults to $prefix/share/recoll/filters
# filtersdir = /path/to/my/filters
# Place to search for icons. The only reason to change this would be if you
# want to change the icons displayed in the result list
iconsdir = @prefix@/share/recoll/images
# want to change the icons displayed in the result list.
# Defaults to $prefix/share/recoll/images
# iconsdir = /path/to/my/icons
# Should we use the system's 'file -i' command as a final step in file type
# identification ? This may be useful, but will usually cause the

View File

@ -16,21 +16,20 @@
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifdef HAVE_CONFIG_H
#ifdef RECOLL_DATADIR
#ifdef BUILDING_RECOLL
#include "autoconfig.h"
#else
#include "config.h"
#endif /* RECOLL */
#endif /* HAVE_CONFIG_H */
#ifdef RECOLL_DATADIR
#ifdef BUILDING_RECOLL
/* Yes, recoll unac is actually c++, lets face modernity, I will not be
caught writing another binary search */
#include <vector>
#include <map>
#include <string>
#include <algorithm>
#include <iostream>
#include UNORDERED_MAP_INCLUDE
using std::string;
@ -53,7 +52,7 @@ static inline bool is_except_char(unsigned short c, string& trans)
trans = it->second;
return true;
}
#endif /* RECOLL_DATADIR */
#endif /* BUILDING_RECOLL*/
/*
* If configure.in has not defined this symbol, assume const. It
@ -14171,9 +14170,9 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length,
char** outp, size_t* out_lengthp, int what)
{
char* out;
int out_size;
int out_length;
unsigned int i;
size_t out_size;
size_t out_length;
size_t i;
out_size = in_length > 0 ? in_length : 1024;
@ -14191,13 +14190,13 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length,
for(i = 0; i < in_length; i += 2) {
unsigned short c;
unsigned short* p;
int l;
int k;
size_t l;
size_t k;
c = (in[i] << 8) | (in[i + 1] & 0xff);
/*
* Lookup the tables for decomposition information
*/
#ifdef RECOLL_DATADIR
#ifdef BUILDING_RECOLL
// Exception unac/fold values set by user. There should be 3 arrays for
// unac/fold/unac+fold. For now there is only one array, which used to
// be set for unac+fold, and is mostly or only used to prevent diacritics
@ -14220,11 +14219,11 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length,
l = trans.size() / 2;
}
} else {
#endif /* RECOLL_DATADIR */
#endif /* BUILDING_RECOLL */
unac_uf_char_utf16_(c, p, l, what)
#ifdef RECOLL_DATADIR
#ifdef BUILDING_RECOLL
}
#endif /* RECOLL_DATADIR */
#endif /* BUILDING_RECOLL */
/*
* Explain what's done in great detail
@ -14237,7 +14236,7 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length,
if(l == 0) {
DEBUG_APPEND("untouched\n");
} else {
int i;
size_t i;
for(i = 0; i < l; i++)
DEBUG_APPEND("0x%04x ", p[i]);
DEBUG_APPEND("\n");
@ -14437,10 +14436,11 @@ static int convert(const char* from, const char* to,
const char* tmp = space;
size_t tmp_length = 2;
if(iconv(cd, (ICONV_CONST char **) &tmp, &tmp_length, &out, &out_remain) == (size_t)-1) {
if(errno == E2BIG)
if(errno == E2BIG) {
/* fall thru to the E2BIG case below */;
else
goto out;
} else {
goto out;
}
} else {
/* The offending character was replaced by a SPACE, skip it. */
in += 2;
@ -14456,7 +14456,7 @@ static int convert(const char* from, const char* to,
/*
* The output does not fit in the current out buffer, enlarge it.
*/
int length = out - out_base;
size_t length = out - out_base;
out_size *= 2;
{
char *saved = out_base;
@ -14562,7 +14562,7 @@ const char* unac_version(void)
return UNAC_VERSION;
}
#ifdef RECOLL_DATADIR
#ifdef BUILDING_RECOLL
void unac_set_except_translations(const char *spectrans)
{
except_trans.clear();
@ -14615,4 +14615,4 @@ void unac_set_except_translations(const char *spectrans)
free(out);
}
}
#endif /* RECOLL_DATADIR */
#endif /* BUILDING_RECOLL */

View File

@ -1 +0,0 @@
unac.c

1
src/unac/unac.cpp Normal file
View File

@ -0,0 +1 @@
#include "unac.c"

View File

@ -114,7 +114,7 @@ int fold_string(const char* charset,
/* To be called before starting threads in mt programs */
void unac_init_mt();
#ifdef RECOLL_DATADIR
#ifdef BUILDING_RECOLL
#include <string>
/**
* Set exceptions for unaccenting, for characters which should not be
@ -128,7 +128,7 @@ void unac_init_mt();
* can't be an exception character, deal with it...
*/
void unac_set_except_translations(const char *spectrans);
#endif /* RECOLL_DATADIR */
#endif /* BUILDING_RECOLL */
/*
* Return unac version number.

View File

@ -109,8 +109,8 @@ trfileudi.o : fileudi.cpp fileudi.h
EXECMD_OBJS= trexecmd.o
trexecmd : $(EXECMD_OBJS)
$(CXX) $(ALL_CXXFLAGS) -o trexecmd $(EXECMD_OBJS) $(LIBRECOLL)
trexecmd.o : execmd.cpp execmd.h
$(CXX) -o trexecmd.o -c $(ALL_CXXFLAGS) -DTEST_EXECMD execmd.cpp
trexecmd.o : trexecmd.cpp execmd.h
$(CXX) -o trexecmd.o -c $(ALL_CXXFLAGS) -I../xaposix trexecmd.cpp
TRANSCODE_OBJS= trtranscode.o
transcode : $(TRANSCODE_OBJS)

View File

@ -217,7 +217,7 @@ void base64_encode(const string &in, string &out)
out.clear();
int srclength = in.length();
string::size_type srclength = in.length();
int sidx = 0;
while (2 < srclength) {
input[0] = in[sidx++];
@ -244,7 +244,7 @@ void base64_encode(const string &in, string &out)
if (0 != srclength) {
/* Get what's left. */
input[0] = input[1] = input[2] = '\0';
for (int i = 0; i < srclength; i++)
for (string::size_type i = 0; i < srclength; i++)
input[i] = in[sidx++];
output[0] = input[0] >> 2;

View File

@ -184,7 +184,7 @@ public:
// Offset of last write (newest header)
off_t m_nheadoffs;
// Pad size for newest entry.
int m_npadsize;
off_t m_npadsize;
// Keep history or only last entry
bool m_uniquentries;
///////////////////// End header entries
@ -956,10 +956,10 @@ bool CirCache::erase(const string& udi)
// entry.
class CCScanHookSpacer : public CCScanHook {
public:
UINT sizewanted;
UINT sizeseen;
off_t sizewanted;
off_t sizeseen;
vector<pair<string, off_t> > squashed_udis;
CCScanHookSpacer(int sz)
CCScanHookSpacer(off_t sz)
: sizewanted(sz), sizeseen(0) {assert(sz > 0);}
virtual status takeone(off_t offs, const string& udi,
@ -1009,14 +1009,14 @@ bool CirCache::put(const string& udi, const ConfSimple *iconf,
// Data compression ?
const char *datap = data.c_str();
unsigned int datalen = data.size();
size_t datalen = data.size();
unsigned short flags = 0;
TempBuf compbuf;
if (!(iflags & NoCompHint)) {
ULONG len = compressBound(data.size());
uLong len = compressBound(static_cast<uLong>(data.size()));
char *bf = compbuf.setsize(len);
if (bf != 0 &&
compress((Bytef*)bf, &len, (Bytef*)data.c_str(), data.size())
compress((Bytef*)bf, &len, (Bytef*)data.c_str(), static_cast<uLong>(data.size()))
== Z_OK) {
if (float(len) < 0.9 * float(data.size())) {
// bf is local but it's our static buffer address
@ -1034,16 +1034,16 @@ bool CirCache::put(const string& udi, const ConfSimple *iconf,
}
// Characteristics for the new entry.
int nsize = CIRCACHE_HEADER_SIZE + dic.size() + datalen;
int nwriteoffs = m_d->m_oheadoffs;
int npadsize = 0;
off_t nsize = CIRCACHE_HEADER_SIZE + dic.size() + datalen;
off_t nwriteoffs = m_d->m_oheadoffs;
off_t npadsize = 0;
bool extending = false;
LOGDEB(("CirCache::put: nsz %d oheadoffs %d\n", nsize, m_d->m_oheadoffs));
// Check if we can recover some pad space from the (physically) previous
// entry.
int recovpadsize = m_d->m_oheadoffs == CIRCACHE_FIRSTBLOCK_SIZE ?
off_t recovpadsize = m_d->m_oheadoffs == CIRCACHE_FIRSTBLOCK_SIZE ?
0 : m_d->m_npadsize;
if (recovpadsize != 0) {
// Need to read the latest entry's header, to rewrite it with a
@ -1082,7 +1082,7 @@ bool CirCache::put(const string& udi, const ConfSimple *iconf,
} else {
// Scan the file until we have enough space for the new entry,
// and determine the pad size up to the 1st preserved entry
int scansize = nsize - recovpadsize;
off_t scansize = nsize - recovpadsize;
LOGDEB(("CirCache::put: scanning for size %d from offs %u\n",
scansize, (UINT)m_d->m_oheadoffs));
CCScanHookSpacer spacer(scansize);

View File

@ -14,9 +14,7 @@
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "autoconfig.h"
#ifndef TEST_CONFTREE
@ -70,7 +68,7 @@ void ConfSimple::parseinput(istream &input)
}
{
int ll = strlen(cline);
size_t ll = strlen(cline);
while (ll > 0 && (cline[ll-1] == '\n' || cline[ll-1] == '\r')) {
cline[ll-1] = 0;
ll--;
@ -576,8 +574,8 @@ bool ConfSimple::hasNameAnywhere(const string& nm) const
int ConfTree::get(const std::string &name, string &value, const string &sk)
const
{
if (sk.empty() || sk[0] != '/') {
// LOGDEB((stderr, "ConfTree::get: looking in global space\n"));
if (sk.empty() || !path_isabsolute(sk) ) {
// LOGDEB((stderr, "ConfTree::get: looking in global space for sk [%s]\n", sk.c_str()));
return ConfSimple::get(name, value, sk);
}
@ -590,15 +588,21 @@ int ConfTree::get(const std::string &name, string &value, const string &sk)
// Look in subkey and up its parents until root ('')
for (;;) {
// LOGDEB((stderr,"ConfTree::get: looking for '%s' in '%s'\n",
// name.c_str(), msk.c_str()));
// LOGDEB((stderr,"ConfTree::get: looking for '%s' in '%s'\n",
// name.c_str(), msk.c_str()));
if (ConfSimple::get(name, value, msk))
return 1;
string::size_type pos = msk.rfind("/");
if (pos != string::npos) {
msk.replace(pos, string::npos, string());
} else
} else {
#ifdef _WIN32
if (msk.size() == 2 && isalpha(msk[0]) && msk[1] == ':')
msk.clear();
else
#endif
break;
}
}
return 0;
}

View File

@ -15,14 +15,18 @@
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifndef TEST_COPYFILE
#include "autoconfig.h"
#include <stdio.h>
#include <errno.h>
#include "safefcntl.h"
#include <sys/types.h>
#include "safesysstat.h"
#include "safeunistd.h"
#ifndef _WIN32
#include <sys/time.h>
#include <utime.h>
#endif
#include <cstring>
@ -43,7 +47,7 @@ bool copyfile(const char *src, const char *dst, string &reason, int flags)
LOGDEB(("copyfile: %s to %s\n", src, dst));
if ((sfd = ::open(src, O_RDONLY)) < 0) {
if ((sfd = ::open(src, O_RDONLY, 0)) < 0) {
reason += string("open ") + src + ": " + strerror(errno);
goto out;
}
@ -149,6 +153,7 @@ bool renameormove(const char *src, const char *dst, string &reason)
return false;
}
#ifndef _WIN32
// Try to preserve modes, owner, times. This may fail for a number
// of reasons
if ((st1.st_mode & 0777) != (st.st_mode & 0777)) {
@ -167,7 +172,7 @@ bool renameormove(const char *src, const char *dst, string &reason)
times[1].tv_sec = st.st_mtime;
times[1].tv_usec = 0;
utimes(dst, times);
#endif
// All ok, get rid of origin
if (unlink(src) < 0) {
reason += string("Can't unlink ") + src + "Error : " + strerror(errno);

View File

@ -62,7 +62,6 @@ bool getCpuConf(CpuConf& cpus)
}
#endif
#else // TEST_CPUCONF
#include <stdlib.h>

Some files were not shown because too many files have changed in this diff Show More