Add code to use libmagic (disabled in recoll for now). rclgrep -r option

This commit is contained in:
Jean-Francois Dockes 2022-09-09 18:44:21 +02:00
parent 78183a8e00
commit 5fc0066a34
3 changed files with 47 additions and 21 deletions

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2004 J.F.Dockes /* Copyright (C) 2004-2022 J.F.Dockes
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or * the Free Software Foundation; either version 2 of the License, or
@ -18,9 +18,14 @@
#include "autoconfig.h" #include "autoconfig.h"
#include <ctype.h> #include <ctype.h>
#include <string> #include <string>
#include <list> #include <list>
#ifdef ENABLE_LIBMAGIC
#include <magic.h>
#endif
#include "mimetype.h" #include "mimetype.h"
#include "log.h" #include "log.h"
#include "execmd.h" #include "execmd.h"
@ -53,6 +58,18 @@ static string mimetypefromdata(RclConfig *cfg, const string &fn, bool usfc)
// First try the internal identifying routine // First try the internal identifying routine
string mime = idFile(fn.c_str()); string mime = idFile(fn.c_str());
#ifdef ENABLE_LIBMAGIC
if (usfc && mime.empty()) {
// Caching the open mgtoken would slightly improve performance but we'd need locking because
// libmagic is not thread-safe
auto mgtoken = magic_open(MAGIC_MIME_TYPE);
if (mgtoken) {
magic_load(mgtoken, nullptr);
mime = magic_file(mgtoken, fn.c_str());
magic_close(mgtoken);
}
}
#else
#ifdef USE_SYSTEM_FILE_COMMAND #ifdef USE_SYSTEM_FILE_COMMAND
if (usfc && mime.empty()) { if (usfc && mime.empty()) {
// Last resort: use "file -i", or its configured replacement. // Last resort: use "file -i", or its configured replacement.
@ -132,7 +149,7 @@ static string mimetypefromdata(RclConfig *cfg, const string &fn, bool usfc)
mime.clear(); mime.clear();
} }
#endif //USE_SYSTEM_FILE_COMMAND #endif //USE_SYSTEM_FILE_COMMAND
#endif // Not libmagic
return mime; return mime;
} }
@ -161,6 +178,11 @@ string mimetype(const string &fn, const struct PathStat *stp,
string mtype; string mtype;
if (cfg && cfg->inStopSuffixes(fn)) {
LOGDEB("mimetype: fn [" << fn << "] in stopsuffixes\n");
return mtype;
}
// Extended attribute has priority on everything, as per: // Extended attribute has priority on everything, as per:
// http://freedesktop.org/wiki/CommonExtendedAttributes // http://freedesktop.org/wiki/CommonExtendedAttributes
if (pxattr::get(fn, "mime_type", &mtype)) { if (pxattr::get(fn, "mime_type", &mtype)) {
@ -177,11 +199,6 @@ string mimetype(const string &fn, const struct PathStat *stp,
return mtype; return mtype;
} }
if (cfg->inStopSuffixes(fn)) {
LOGDEB("mimetype: fn [" << fn << "] in stopsuffixes\n");
return mtype;
}
// Compute file name suffix and search the mimetype map // Compute file name suffix and search the mimetype map
string::size_type dot = fn.find_first_of("."); string::size_type dot = fn.find_first_of(".");
while (dot != string::npos) { while (dot != string::npos) {

View File

@ -25,6 +25,7 @@ AM_CPPFLAGS = -Wall -Wno-unused -std=c++11 \
-DRECOLL_DATADIR=\"${pkgdatadir}\" \ -DRECOLL_DATADIR=\"${pkgdatadir}\" \
-DREADFILE_ENABLE_ZLIB -DREADFILE_ENABLE_MINIZ -DREADFILE_ENABLE_MD5 \ -DREADFILE_ENABLE_ZLIB -DREADFILE_ENABLE_MINIZ -DREADFILE_ENABLE_MD5 \
-D_GNU_SOURCE \ -D_GNU_SOURCE \
-DENABLE_LIBMAGIC \
$(DEFS) $(DEFS)
bin_PROGRAMS = rclgrep bin_PROGRAMS = rclgrep
@ -92,4 +93,4 @@ rclgrep_SOURCES = \
../utils/wipedir.cpp \ ../utils/wipedir.cpp \
../utils/zlibut.cpp ../utils/zlibut.cpp
rclgrep_LDADD = $(XSLT_LIBS) $(LIBICONV) $(LIBTHREADS) rclgrep_LDADD = $(XSLT_LIBS) $(LIBICONV) -lmagic $(LIBTHREADS)

View File

@ -68,9 +68,9 @@ static int op_flags;
#define OPT_A 0x1000 #define OPT_A 0x1000
#define OPT_B 0x2000 #define OPT_B 0x2000
#define OPT_C 0x4000 #define OPT_C 0x4000
#define OPT_r 0x8000
#define OPTVAL_RECOLL_CONFIG 1000 enum OptVal {OPTVAL_RECOLL_CONFIG=1000, OPTVAL_HELP, OPTVAL_INCLUDE,};
#define OPTVAL_HELP 1001
static struct option long_options[] = { static struct option long_options[] = {
{"regexp", required_argument, 0, 'e'}, {"regexp", required_argument, 0, 'e'},
@ -89,6 +89,8 @@ static struct option long_options[] = {
{"after-context", required_argument, 0, 'A'}, {"after-context", required_argument, 0, 'A'},
{"before-context", required_argument, 0, 'B'}, {"before-context", required_argument, 0, 'B'},
{"context", required_argument, 0, 'C'}, {"context", required_argument, 0, 'C'},
{"recurse", 0, 0, 'r'},
{"include", required_argument, 0, OPTVAL_INCLUDE},
{0, 0, 0, 0} {0, 0, 0, 0}
}; };
@ -211,14 +213,15 @@ void grepit(std::istream& instream, const Rcl::Doc& doc)
if (matchcount) { if (matchcount) {
std::cout << ppath << "\n"; std::cout << ppath << "\n";
} }
} else if (op_flags & OPT_c) { } else if ((op_flags & OPT_c) && matchcount) {
std::cout << ppath << "::" << matchcount << "\n"; std::cout << ppath << "::" << matchcount << "\n";
} }
} }
bool processpath(RclConfig *config, const std::string& path) bool processpath(RclConfig *config, const std::string& path)
{ {
LOGINF("processpath: [" << path << "]\n"); // LOGINF("processpath: [" << path << "]\n");
// std::cerr << "processpath: [" << path << "]\n";
if (path.empty()) { if (path.empty()) {
// stdin // stdin
Rcl::Doc doc; Rcl::Doc doc;
@ -270,8 +273,8 @@ bool processpath(RclConfig *config, const std::string& path)
class WalkerCB : public FsTreeWalkerCB { class WalkerCB : public FsTreeWalkerCB {
public: public:
WalkerCB(std::list<std::string>& files, const vector<string>& selpats, RclConfig *config) WalkerCB(const vector<string>& selpats, RclConfig *config)
: m_files(files), m_pats(selpats), m_config(config) {} : m_pats(selpats), m_config(config) {}
virtual FsTreeWalker::Status processone( virtual FsTreeWalker::Status processone(
const string& fn, const struct PathStat *, FsTreeWalker::CbFlag flg) { const string& fn, const struct PathStat *, FsTreeWalker::CbFlag flg) {
if (flg == FsTreeWalker::FtwRegular) { if (flg == FsTreeWalker::FtwRegular) {
@ -288,15 +291,14 @@ public:
} }
return FsTreeWalker::FtwOk; return FsTreeWalker::FtwOk;
} }
std::list<std::string>& m_files;
const vector<string>& m_pats; const vector<string>& m_pats;
RclConfig *m_config{nullptr}; RclConfig *m_config{nullptr};
}; };
bool recursive_grep(RclConfig *config, const string& top, const vector<string>& selpats) bool recursive_grep(RclConfig *config, const string& top, const vector<string>& selpats)
{ {
std::list<std::string> files; // std::cerr << "recursive_grep: top : [" << top << "]\n";
WalkerCB cb(files, selpats, config); WalkerCB cb(selpats, config);
FsTreeWalker walker; FsTreeWalker walker;
current_topdir = top; current_topdir = top;
if (path_isdir(top)) { if (path_isdir(top)) {
@ -392,7 +394,7 @@ int main(int argc, char *argv[])
std::string a_config; std::string a_config;
vector<string> selpatterns; vector<string> selpatterns;
while ((ret = getopt_long(argc, argv, "A:B:C:ce:f:hHiLlnp:vx", long_options, NULL)) != -1) { while ((ret = getopt_long(argc, argv, "A:B:C:ce:f:hHiLlnp:rvx", long_options, NULL)) != -1) {
switch (ret) { switch (ret) {
case 'A': op_flags |= OPT_A; aftercontext = atoi(optarg); break; case 'A': op_flags |= OPT_A; aftercontext = atoi(optarg); break;
case 'B': op_flags |= OPT_B; beforecontext = atoi(optarg); break; case 'B': op_flags |= OPT_B; beforecontext = atoi(optarg); break;
@ -407,10 +409,12 @@ int main(int argc, char *argv[])
case 'l': op_flags |= OPT_l|OPT_c; break; case 'l': op_flags |= OPT_l|OPT_c; break;
case 'n': op_flags |= OPT_n; break; case 'n': op_flags |= OPT_n; break;
case 'p': op_flags |= OPT_p; selpatterns.push_back(optarg); break; case 'p': op_flags |= OPT_p; selpatterns.push_back(optarg); break;
case 'r': op_flags |= OPT_r|OPT_H; break;
case 'v': op_flags |= OPT_v; break; case 'v': op_flags |= OPT_v; break;
case 'x': op_flags |= OPT_x; break; case 'x': op_flags |= OPT_x; break;
case OPTVAL_RECOLL_CONFIG: a_config = optarg; break; case OPTVAL_RECOLL_CONFIG: a_config = optarg; break;
case OPTVAL_HELP: Usage(stdout); break; case OPTVAL_HELP: Usage(stdout); break;
case OPTVAL_INCLUDE: selpatterns.push_back(optarg); break;
default: Usage(); break; default: Usage(); break;
} }
} }
@ -455,12 +459,16 @@ int main(int argc, char *argv[])
} }
} }
std::vector<std::string> paths; std::vector<std::string> paths;
if (aremain == 0) { if (aremain == 0 && !(op_flags & OPT_r)) {
// Read from stdin // Read from stdin
processpath(config, std::string()); processpath(config, std::string());
} else { } else {
while (aremain--) { if (aremain == 0) {
paths.push_back(argv[optind++]); paths.push_back(".");
} else {
while (aremain--) {
paths.push_back(argv[optind++]);
}
} }
} }