Add code to use libmagic (disabled in recoll for now). rclgrep -r option
This commit is contained in:
parent
78183a8e00
commit
5fc0066a34
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2004 J.F.Dockes
|
||||
/* Copyright (C) 2004-2022 J.F.Dockes
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
@ -18,9 +18,14 @@
|
||||
#include "autoconfig.h"
|
||||
|
||||
#include <ctype.h>
|
||||
|
||||
#include <string>
|
||||
#include <list>
|
||||
|
||||
#ifdef ENABLE_LIBMAGIC
|
||||
#include <magic.h>
|
||||
#endif
|
||||
|
||||
#include "mimetype.h"
|
||||
#include "log.h"
|
||||
#include "execmd.h"
|
||||
@ -53,6 +58,18 @@ static string mimetypefromdata(RclConfig *cfg, const string &fn, bool usfc)
|
||||
// First try the internal identifying routine
|
||||
string mime = idFile(fn.c_str());
|
||||
|
||||
#ifdef ENABLE_LIBMAGIC
|
||||
if (usfc && mime.empty()) {
|
||||
// Caching the open mgtoken would slightly improve performance but we'd need locking because
|
||||
// libmagic is not thread-safe
|
||||
auto mgtoken = magic_open(MAGIC_MIME_TYPE);
|
||||
if (mgtoken) {
|
||||
magic_load(mgtoken, nullptr);
|
||||
mime = magic_file(mgtoken, fn.c_str());
|
||||
magic_close(mgtoken);
|
||||
}
|
||||
}
|
||||
#else
|
||||
#ifdef USE_SYSTEM_FILE_COMMAND
|
||||
if (usfc && mime.empty()) {
|
||||
// Last resort: use "file -i", or its configured replacement.
|
||||
@ -132,7 +149,7 @@ static string mimetypefromdata(RclConfig *cfg, const string &fn, bool usfc)
|
||||
mime.clear();
|
||||
}
|
||||
#endif //USE_SYSTEM_FILE_COMMAND
|
||||
|
||||
#endif // Not libmagic
|
||||
return mime;
|
||||
}
|
||||
|
||||
@ -161,6 +178,11 @@ string mimetype(const string &fn, const struct PathStat *stp,
|
||||
|
||||
string mtype;
|
||||
|
||||
if (cfg && cfg->inStopSuffixes(fn)) {
|
||||
LOGDEB("mimetype: fn [" << fn << "] in stopsuffixes\n");
|
||||
return mtype;
|
||||
}
|
||||
|
||||
// Extended attribute has priority on everything, as per:
|
||||
// http://freedesktop.org/wiki/CommonExtendedAttributes
|
||||
if (pxattr::get(fn, "mime_type", &mtype)) {
|
||||
@ -177,11 +199,6 @@ string mimetype(const string &fn, const struct PathStat *stp,
|
||||
return mtype;
|
||||
}
|
||||
|
||||
if (cfg->inStopSuffixes(fn)) {
|
||||
LOGDEB("mimetype: fn [" << fn << "] in stopsuffixes\n");
|
||||
return mtype;
|
||||
}
|
||||
|
||||
// Compute file name suffix and search the mimetype map
|
||||
string::size_type dot = fn.find_first_of(".");
|
||||
while (dot != string::npos) {
|
||||
|
||||
@ -25,6 +25,7 @@ AM_CPPFLAGS = -Wall -Wno-unused -std=c++11 \
|
||||
-DRECOLL_DATADIR=\"${pkgdatadir}\" \
|
||||
-DREADFILE_ENABLE_ZLIB -DREADFILE_ENABLE_MINIZ -DREADFILE_ENABLE_MD5 \
|
||||
-D_GNU_SOURCE \
|
||||
-DENABLE_LIBMAGIC \
|
||||
$(DEFS)
|
||||
|
||||
bin_PROGRAMS = rclgrep
|
||||
@ -92,4 +93,4 @@ rclgrep_SOURCES = \
|
||||
../utils/wipedir.cpp \
|
||||
../utils/zlibut.cpp
|
||||
|
||||
rclgrep_LDADD = $(XSLT_LIBS) $(LIBICONV) $(LIBTHREADS)
|
||||
rclgrep_LDADD = $(XSLT_LIBS) $(LIBICONV) -lmagic $(LIBTHREADS)
|
||||
|
||||
@ -68,9 +68,9 @@ static int op_flags;
|
||||
#define OPT_A 0x1000
|
||||
#define OPT_B 0x2000
|
||||
#define OPT_C 0x4000
|
||||
#define OPT_r 0x8000
|
||||
|
||||
#define OPTVAL_RECOLL_CONFIG 1000
|
||||
#define OPTVAL_HELP 1001
|
||||
enum OptVal {OPTVAL_RECOLL_CONFIG=1000, OPTVAL_HELP, OPTVAL_INCLUDE,};
|
||||
|
||||
static struct option long_options[] = {
|
||||
{"regexp", required_argument, 0, 'e'},
|
||||
@ -89,6 +89,8 @@ static struct option long_options[] = {
|
||||
{"after-context", required_argument, 0, 'A'},
|
||||
{"before-context", required_argument, 0, 'B'},
|
||||
{"context", required_argument, 0, 'C'},
|
||||
{"recurse", 0, 0, 'r'},
|
||||
{"include", required_argument, 0, OPTVAL_INCLUDE},
|
||||
{0, 0, 0, 0}
|
||||
};
|
||||
|
||||
@ -211,14 +213,15 @@ void grepit(std::istream& instream, const Rcl::Doc& doc)
|
||||
if (matchcount) {
|
||||
std::cout << ppath << "\n";
|
||||
}
|
||||
} else if (op_flags & OPT_c) {
|
||||
} else if ((op_flags & OPT_c) && matchcount) {
|
||||
std::cout << ppath << "::" << matchcount << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
bool processpath(RclConfig *config, const std::string& path)
|
||||
{
|
||||
LOGINF("processpath: [" << path << "]\n");
|
||||
// LOGINF("processpath: [" << path << "]\n");
|
||||
// std::cerr << "processpath: [" << path << "]\n";
|
||||
if (path.empty()) {
|
||||
// stdin
|
||||
Rcl::Doc doc;
|
||||
@ -270,8 +273,8 @@ bool processpath(RclConfig *config, const std::string& path)
|
||||
|
||||
class WalkerCB : public FsTreeWalkerCB {
|
||||
public:
|
||||
WalkerCB(std::list<std::string>& files, const vector<string>& selpats, RclConfig *config)
|
||||
: m_files(files), m_pats(selpats), m_config(config) {}
|
||||
WalkerCB(const vector<string>& selpats, RclConfig *config)
|
||||
: m_pats(selpats), m_config(config) {}
|
||||
virtual FsTreeWalker::Status processone(
|
||||
const string& fn, const struct PathStat *, FsTreeWalker::CbFlag flg) {
|
||||
if (flg == FsTreeWalker::FtwRegular) {
|
||||
@ -288,15 +291,14 @@ public:
|
||||
}
|
||||
return FsTreeWalker::FtwOk;
|
||||
}
|
||||
std::list<std::string>& m_files;
|
||||
const vector<string>& m_pats;
|
||||
RclConfig *m_config{nullptr};
|
||||
};
|
||||
|
||||
bool recursive_grep(RclConfig *config, const string& top, const vector<string>& selpats)
|
||||
{
|
||||
std::list<std::string> files;
|
||||
WalkerCB cb(files, selpats, config);
|
||||
// std::cerr << "recursive_grep: top : [" << top << "]\n";
|
||||
WalkerCB cb(selpats, config);
|
||||
FsTreeWalker walker;
|
||||
current_topdir = top;
|
||||
if (path_isdir(top)) {
|
||||
@ -392,7 +394,7 @@ int main(int argc, char *argv[])
|
||||
std::string a_config;
|
||||
vector<string> selpatterns;
|
||||
|
||||
while ((ret = getopt_long(argc, argv, "A:B:C:ce:f:hHiLlnp:vx", long_options, NULL)) != -1) {
|
||||
while ((ret = getopt_long(argc, argv, "A:B:C:ce:f:hHiLlnp:rvx", long_options, NULL)) != -1) {
|
||||
switch (ret) {
|
||||
case 'A': op_flags |= OPT_A; aftercontext = atoi(optarg); break;
|
||||
case 'B': op_flags |= OPT_B; beforecontext = atoi(optarg); break;
|
||||
@ -407,10 +409,12 @@ int main(int argc, char *argv[])
|
||||
case 'l': op_flags |= OPT_l|OPT_c; break;
|
||||
case 'n': op_flags |= OPT_n; break;
|
||||
case 'p': op_flags |= OPT_p; selpatterns.push_back(optarg); break;
|
||||
case 'r': op_flags |= OPT_r|OPT_H; break;
|
||||
case 'v': op_flags |= OPT_v; break;
|
||||
case 'x': op_flags |= OPT_x; break;
|
||||
case OPTVAL_RECOLL_CONFIG: a_config = optarg; break;
|
||||
case OPTVAL_HELP: Usage(stdout); break;
|
||||
case OPTVAL_INCLUDE: selpatterns.push_back(optarg); break;
|
||||
default: Usage(); break;
|
||||
}
|
||||
}
|
||||
@ -455,14 +459,18 @@ int main(int argc, char *argv[])
|
||||
}
|
||||
}
|
||||
std::vector<std::string> paths;
|
||||
if (aremain == 0) {
|
||||
if (aremain == 0 && !(op_flags & OPT_r)) {
|
||||
// Read from stdin
|
||||
processpath(config, std::string());
|
||||
} else {
|
||||
if (aremain == 0) {
|
||||
paths.push_back(".");
|
||||
} else {
|
||||
while (aremain--) {
|
||||
paths.push_back(argv[optind++]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool status = processpaths(config, paths, selpatterns);
|
||||
return status ? 0 : 1;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user