From 02c14a628115eaa1bf013ca27c5df434d9a07ef9 Mon Sep 17 00:00:00 2001 From: dockes Date: Mon, 11 Dec 2006 14:50:53 +0000 Subject: [PATCH] rationalize stopsuffix list usage --- src/common/rclconfig.cpp | 10 +++------- src/common/rclconfig.h | 4 ++-- src/index/mimetype.cpp | 16 ++++++---------- src/utils/smallut.cpp | 38 +++++++++++++++++++++++++++++++++++++- src/utils/smallut.h | 5 ++++- 5 files changed, 52 insertions(+), 21 deletions(-) diff --git a/src/common/rclconfig.cpp b/src/common/rclconfig.cpp index c0ee6ada..164e7f88 100644 --- a/src/common/rclconfig.cpp +++ b/src/common/rclconfig.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.33 2006-11-20 15:28:14 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.34 2006-12-11 14:50:53 dockes Exp $ (C) 2004 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -230,7 +230,7 @@ std::list RclConfig::getAllMimeTypes() return lst; } -bool RclConfig::getStopSuffixes(list& sufflist) +const list* RclConfig::getStopSuffixes() { if (stopsuffixes == 0 && (stopsuffixes = new list) != 0) { string stp; @@ -239,11 +239,7 @@ bool RclConfig::getStopSuffixes(list& sufflist) } } - if (stopsuffixes) { - sufflist = *stopsuffixes; - return true; - } - return false; + return stopsuffixes; } string RclConfig::getMimeTypeFromSuffix(const string &suff) diff --git a/src/common/rclconfig.h b/src/common/rclconfig.h index 59244f78..e9ba6e66 100644 --- a/src/common/rclconfig.h +++ b/src/common/rclconfig.h @@ -16,7 +16,7 @@ */ #ifndef _RCLCONFIG_H_INCLUDED_ #define _RCLCONFIG_H_INCLUDED_ -/* @(#$Id: rclconfig.h,v 1.25 2006-11-20 15:28:04 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: rclconfig.h,v 1.26 2006-12-11 14:50:53 dockes Exp $ (C) 2004 J.F.Dockes */ #include #include @@ -84,7 +84,7 @@ class RclConfig { * The list is initialized on first call, and not changed for subsequent * setKeydirs. */ - bool getStopSuffixes(list& sufflist); + const list* getStopSuffixes(); /** * Check in mimeconf if input mime type is a compressed one, and diff --git a/src/index/mimetype.cpp b/src/index/mimetype.cpp index 5e179c79..4afa5193 100644 --- a/src/index/mimetype.cpp +++ b/src/index/mimetype.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.17 2006-03-21 11:04:39 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.18 2006-12-11 14:50:53 dockes Exp $ (C) 2004 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -109,15 +109,11 @@ string mimetype(const string &fn, RclConfig *cfg, bool usfc) if (cfg == 0) return ""; - list stoplist; - cfg->getStopSuffixes(stoplist); - if (!stoplist.empty()) { - for (list::const_iterator it = stoplist.begin(); - it != stoplist.end(); it++) { - if (it->length() > fn.length()) - continue; - if (!stringicmp(fn.substr(fn.length() - it->length(), - string::npos), *it)) { + const list* stoplist = cfg->getStopSuffixes(); + if (stoplist && !stoplist->empty()) { + for (list::const_iterator it = stoplist->begin(); + it != stoplist->end(); it++) { + if (!stringisuffcmp(fn, *it)) { LOGDEB(("mimetype: fn %s in stoplist (%s)\n", fn.c_str(), it->c_str())); return ""; diff --git a/src/utils/smallut.cpp b/src/utils/smallut.cpp index eecb103a..44c44258 100644 --- a/src/utils/smallut.cpp +++ b/src/utils/smallut.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: smallut.cpp,v 1.20 2006-12-07 07:07:18 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: smallut.cpp,v 1.21 2006-12-11 14:50:53 dockes Exp $ (C) 2004 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -120,6 +120,21 @@ int stringicmp(const string & s1, const string& s2) } } +extern int stringisuffcmp(const string& s1, const string& s2) +{ + string::const_reverse_iterator r1 = s1.rbegin(), re1 = s1.rend(), + r2 = s2.rbegin(), re2 = s2.rend(); + while (r1 != re1 && r2 != re2) { + char c1 = ::toupper(*r1); + char c2 = ::toupper(*r2); + if (c1 != c2) { + return c1 > c2 ? 1 : -1; + } + ++r1; ++r2; + } + return 0; +} + // s1 is already lowercase int stringlowercmp(const string & s1, const string& s2) { @@ -570,9 +585,22 @@ struct spair pairs[] = { {"a", "Ab"}, }; int npairs = sizeof(pairs) / sizeof(struct spair); +struct spair suffpairs[] = { + {"", ""}, + {"", "a"}, + {"a", ""}, + {"a", "a"}, + {"toto.txt", ".txt"}, + {"TXT", "toto.txt"}, + {"toto.txt", ".txt1"}, + {"toto.txt1", ".txt"}, +}; +int nsuffpairs = sizeof(suffpairs) / sizeof(struct spair); + int main(int argc, char **argv) { +#if 0 for (int i = 0; i < npairs; i++) { { int c = stringicmp(pairs[i].s1, pairs[i].s2); @@ -591,6 +619,14 @@ int main(int argc, char **argv) } printf("\n"); } +#else + for (int i = 0; i < nsuffpairs; i++) { + int c = stringisuffcmp(suffpairs[i].s1, suffpairs[i].s2); + printf("[%s] %s [%s] \n", suffpairs[i].s1, + c == 0 ? "matches" : c < 0 ? "<" : ">", suffpairs[i].s2); + } +#endif + } #endif diff --git a/src/utils/smallut.h b/src/utils/smallut.h index 351b9364..132473e9 100644 --- a/src/utils/smallut.h +++ b/src/utils/smallut.h @@ -16,7 +16,7 @@ */ #ifndef _SMALLUT_H_INCLUDED_ #define _SMALLUT_H_INCLUDED_ -/* @(#$Id: smallut.h,v 1.20 2006-12-07 07:07:18 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: smallut.h,v 1.21 2006-12-11 14:50:53 dockes Exp $ (C) 2004 J.F.Dockes */ #include #include #include @@ -31,6 +31,9 @@ extern int stringicmp(const string& s1, const string& s2); extern int stringlowercmp(const string& alreadylower, const string& s2); extern int stringuppercmp(const string& alreadyupper, const string& s2); +// Is one string the end part of the other ? +extern int stringisuffcmp(const string& s1, const string& s2); + // Compare charset names, removing the more common spelling variations extern bool samecharset(const string &cs1, const string &cs2);