Implement date: date range filter/searches. Remove restriction on pure negative queries
This commit is contained in:
parent
3a6b34d24b
commit
ceb996c8fb
@ -63,6 +63,7 @@ bool dump_contents(RclConfig *rclconfig, TempDir& tmpdir, Rcl::Doc& idoc)
|
||||
|
||||
static char *thisprog;
|
||||
static char usage [] =
|
||||
" -P: Show date span for documents in index\n"
|
||||
" [-o|-a|-f] <query string>\n"
|
||||
" Runs a recoll query and displays result lines. \n"
|
||||
" Default: will interpret the argument(s) as a xesam query string\n"
|
||||
@ -110,6 +111,7 @@ static int op_flags;
|
||||
#define OPT_s 0x4000
|
||||
#define OPT_A 0x8000
|
||||
#define OPT_i 0x10000
|
||||
#define OPT_P 0x20000
|
||||
|
||||
int recollq(RclConfig **cfp, int argc, char **argv)
|
||||
{
|
||||
@ -148,6 +150,7 @@ int recollq(RclConfig **cfp, int argc, char **argv)
|
||||
if (limit <= 0) limit = INT_MAX;
|
||||
argc--; goto b1;
|
||||
case 'o': op_flags |= OPT_o; break;
|
||||
case 'P': op_flags |= OPT_P; break;
|
||||
case 'q': op_flags |= OPT_q; break;
|
||||
case 'S': op_flags |= OPT_S; if (argc < 2) Usage();
|
||||
sortfield = *(++argv);
|
||||
@ -161,13 +164,6 @@ int recollq(RclConfig **cfp, int argc, char **argv)
|
||||
b1: argc--; argv++;
|
||||
}
|
||||
|
||||
if (argc < 1) {
|
||||
Usage();
|
||||
}
|
||||
string qs = *argv++;argc--;
|
||||
while (argc > 0) {
|
||||
qs += string(" ") + *argv++;argc--;
|
||||
}
|
||||
string reason;
|
||||
*cfp = recollinit(0, 0, reason, &a_config);
|
||||
RclConfig *rclconfig = *cfp;
|
||||
@ -176,21 +172,10 @@ int recollq(RclConfig **cfp, int argc, char **argv)
|
||||
exit(1);
|
||||
}
|
||||
|
||||
{
|
||||
string uq;
|
||||
string charset = rclconfig->getDefCharset(true);
|
||||
int ercnt;
|
||||
if (!transcode(qs, uq, charset, "UTF-8", &ercnt)) {
|
||||
fprintf(stderr, "Can't convert command line args to utf-8\n");
|
||||
exit(1);
|
||||
} else if (ercnt) {
|
||||
fprintf(stderr, "%d errors while converting arguments from %s "
|
||||
"to utf-8\n", ercnt, charset.c_str());
|
||||
}
|
||||
qs = uq;
|
||||
if (argc < 1 && !(op_flags & OPT_P)) {
|
||||
Usage();
|
||||
}
|
||||
|
||||
|
||||
Rcl::Db rcldb(rclconfig);
|
||||
if (!extra_dbs.empty()) {
|
||||
for (list<string>::iterator it = extra_dbs.begin();
|
||||
@ -208,6 +193,39 @@ int recollq(RclConfig **cfp, int argc, char **argv)
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (op_flags & OPT_P) {
|
||||
int minyear, maxyear;
|
||||
if (!rcldb.maxYearSpan(&minyear, &maxyear)) {
|
||||
cerr << "maxYearSpan failed: " << rcldb.getReason() << endl;
|
||||
exit(1);
|
||||
} else {
|
||||
cout << "Min year " << minyear << " Max year " << maxyear << endl;
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
if (argc < 1) {
|
||||
Usage();
|
||||
}
|
||||
string qs = *argv++;argc--;
|
||||
while (argc > 0) {
|
||||
qs += string(" ") + *argv++;argc--;
|
||||
}
|
||||
|
||||
{
|
||||
string uq;
|
||||
string charset = rclconfig->getDefCharset(true);
|
||||
int ercnt;
|
||||
if (!transcode(qs, uq, charset, "UTF-8", &ercnt)) {
|
||||
fprintf(stderr, "Can't convert command line args to utf-8\n");
|
||||
exit(1);
|
||||
} else if (ercnt) {
|
||||
fprintf(stderr, "%d errors while converting arguments from %s "
|
||||
"to utf-8\n", ercnt, charset.c_str());
|
||||
}
|
||||
qs = uq;
|
||||
}
|
||||
|
||||
Rcl::SearchData *sd = 0;
|
||||
|
||||
if (op_flags & (OPT_a|OPT_o|OPT_f)) {
|
||||
|
||||
@ -42,14 +42,13 @@ Rcl::SearchData *wasaStringToRcl(const string &qs, string &reason,
|
||||
return 0;
|
||||
Rcl::SearchData *rq = wasaQueryToRcl(wq, autosuffs);
|
||||
if (rq == 0) {
|
||||
reason = "Failed translating wasa query structure to recoll";
|
||||
reason = "Failed translating xesam query structure to recoll";
|
||||
return 0;
|
||||
}
|
||||
return rq;
|
||||
}
|
||||
|
||||
Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa,
|
||||
const string& autosuffs)
|
||||
Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa, const string& autosuffs)
|
||||
{
|
||||
if (wasa == 0)
|
||||
return 0;
|
||||
@ -119,6 +118,19 @@ Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa,
|
||||
}
|
||||
}
|
||||
|
||||
// Handle "date" spec
|
||||
if (!stringicmp("date", (*it)->m_fieldspec)) {
|
||||
DateInterval di;
|
||||
if (!parsedateinterval((*it)->m_value, &di)) {
|
||||
LOGERR(("wasaQueryToRcl: bad date interval format\n"));
|
||||
// Process rest of query anyway ?
|
||||
break;
|
||||
}
|
||||
LOGDEB(("wasaQueryToRcl:: date span: %d-%d-%d/%d-%d-%d\n",
|
||||
di.y1,di.m1,di.d1, di.y2,di.m2,di.d2))
|
||||
sdata->setDateSpan(&di);
|
||||
break;
|
||||
}
|
||||
|
||||
// "Regular" processing follows:
|
||||
unsigned int mods = (unsigned int)(*it)->m_modifiers;
|
||||
@ -151,7 +163,7 @@ Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa,
|
||||
|
||||
case WasaQuery::OP_EXCL:
|
||||
LOGDEB2(("wasaQueryToRcl: excl clause [%s]:[%s]\n",
|
||||
(*it)->m_fieldspec.c_str(), (*it)->m_value.c_str()));
|
||||
(*it)->m_fieldspec.c_str(), (*it)->m_value.c_str()));
|
||||
if (wasa->m_op != WasaQuery::OP_AND) {
|
||||
LOGERR(("wasaQueryToRcl: negative clause inside OR list!\n"));
|
||||
continue;
|
||||
|
||||
@ -767,6 +767,10 @@ bool Db::fieldToPrefix(const string& fld, string &pfx)
|
||||
fldToPrefs["keyword"] = "K";
|
||||
fldToPrefs["tag"] = "K";
|
||||
fldToPrefs["tags"] = "K";
|
||||
|
||||
fldToPrefs["xapyear"] = "Y";
|
||||
fldToPrefs["xapyearmon"] = "M";
|
||||
fldToPrefs["xapdate"] = "D";
|
||||
}
|
||||
|
||||
if (m_config && m_config->getFieldPrefix(fld, pfx))
|
||||
@ -1365,6 +1369,28 @@ bool Db::filenameWildExp(const string& fnexp, list<string>& names)
|
||||
return true;
|
||||
}
|
||||
|
||||
// Walk the Y terms and return min/max
|
||||
bool Db::maxYearSpan(int *minyear, int *maxyear)
|
||||
{
|
||||
*minyear = 1000000;
|
||||
*maxyear = -1000000;
|
||||
TermMatchResult result;
|
||||
if (!termMatch(ET_WILD, string(), "*", result, 5000, "xapyear"))
|
||||
return false;
|
||||
for (list<TermMatchEntry>::const_iterator it = result.entries.begin();
|
||||
it != result.entries.end(); it++) {
|
||||
if (!it->term.empty()) {
|
||||
int year = atoi(it->term.c_str()+1);
|
||||
if (year < *minyear)
|
||||
*minyear = year;
|
||||
if (year > *maxyear)
|
||||
*maxyear = year;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
class TermMatchCmpByWcf {
|
||||
public:
|
||||
int operator()(const TermMatchEntry& l, const TermMatchEntry& r) {
|
||||
|
||||
@ -171,6 +171,8 @@ class Db {
|
||||
const string& field = "",
|
||||
string *prefix = 0
|
||||
);
|
||||
/** Return min and max years for doc mod times in db */
|
||||
bool maxYearSpan(int *minyear, int *maxyear);
|
||||
|
||||
/** Special filename wildcard to XSFN terms expansion.
|
||||
internal/searchdata use only */
|
||||
|
||||
@ -46,33 +46,107 @@ typedef vector<SearchDataClause *>::const_iterator qlist_cit_t;
|
||||
|
||||
static const int original_term_wqf_booster = 10;
|
||||
|
||||
/* The dates-to-query routine is is lifted quasi-verbatim but
|
||||
* modified from xapian-omega:date.cc. Copyright info:
|
||||
*
|
||||
* Copyright 1999,2000,2001 BrightStation PLC
|
||||
* Copyright 2001 James Aylett
|
||||
* Copyright 2001,2002 Ananova Ltd
|
||||
* Copyright 2002 Intercede 1749 Ltd
|
||||
* Copyright 2002,2003,2006 Olly Betts
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
|
||||
* USA
|
||||
*/
|
||||
static Xapian::Query
|
||||
date_range_filter(int y1, int m1, int d1, int y2, int m2, int d2)
|
||||
{
|
||||
// Xapian uses a smallbuf and snprintf. Can't be bothered, we're
|
||||
// only doing %d's !
|
||||
char buf[200];
|
||||
sprintf(buf, "D%04d%02d", y1, m1);
|
||||
vector<Xapian::Query> v;
|
||||
|
||||
int d_last = monthdays(m1, y1);
|
||||
int d_end = d_last;
|
||||
if (y1 == y2 && m1 == m2 && d2 < d_last) {
|
||||
d_end = d2;
|
||||
}
|
||||
// Deal with any initial partial month
|
||||
if (d1 > 1 || d_end < d_last) {
|
||||
for ( ; d1 <= d_end ; d1++) {
|
||||
sprintf(buf + 7, "%02d", d1);
|
||||
v.push_back(Xapian::Query(buf));
|
||||
}
|
||||
} else {
|
||||
buf[0] = 'M';
|
||||
v.push_back(Xapian::Query(buf));
|
||||
}
|
||||
|
||||
if (y1 == y2 && m1 == m2) {
|
||||
return Xapian::Query(Xapian::Query::OP_OR, v.begin(), v.end());
|
||||
}
|
||||
|
||||
int m_last = (y1 < y2) ? 12 : m2 - 1;
|
||||
while (++m1 <= m_last) {
|
||||
sprintf(buf + 5, "%02d", m1);
|
||||
buf[0] = 'M';
|
||||
v.push_back(Xapian::Query(buf));
|
||||
}
|
||||
|
||||
if (y1 < y2) {
|
||||
while (++y1 < y2) {
|
||||
sprintf(buf + 1, "%04d", y1);
|
||||
buf[0] = 'Y';
|
||||
v.push_back(Xapian::Query(buf));
|
||||
}
|
||||
sprintf(buf + 1, "%04d", y2);
|
||||
buf[0] = 'M';
|
||||
for (m1 = 1; m1 < m2; m1++) {
|
||||
sprintf(buf + 5, "%02d", m1);
|
||||
v.push_back(Xapian::Query(buf));
|
||||
}
|
||||
}
|
||||
|
||||
sprintf(buf + 5, "%02d", m2);
|
||||
|
||||
// Deal with any final partial month
|
||||
if (d2 < monthdays(m2, y2)) {
|
||||
buf[0] = 'D';
|
||||
for (d1 = 1 ; d1 <= d2; d1++) {
|
||||
sprintf(buf + 7, "%02d", d1);
|
||||
v.push_back(Xapian::Query(buf));
|
||||
}
|
||||
} else {
|
||||
buf[0] = 'M';
|
||||
v.push_back(Xapian::Query(buf));
|
||||
}
|
||||
|
||||
return Xapian::Query(Xapian::Query::OP_OR, v.begin(), v.end());
|
||||
}
|
||||
|
||||
bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
|
||||
{
|
||||
Xapian::Query xq;
|
||||
m_reason.erase();
|
||||
|
||||
if (m_query.size() < 1) {
|
||||
if (!m_query.size() && !m_haveDates) {
|
||||
m_reason = "empty query";
|
||||
return false;
|
||||
}
|
||||
|
||||
// It's not allowed to have a pure negative query and also it
|
||||
// seems that Xapian doesn't like the first element to be AND_NOT
|
||||
qlist_it_t itnotneg = m_query.end();
|
||||
for (qlist_it_t it = m_query.begin(); it != m_query.end(); it++) {
|
||||
if ((*it)->m_tp != SCLT_EXCL) {
|
||||
itnotneg = it;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (itnotneg == m_query.end()) {
|
||||
LOGERR(("SearchData::toNativeQuery: can't have all negative clauses"));
|
||||
m_reason = "Can't have only negative clauses";
|
||||
return false;
|
||||
}
|
||||
if ((*m_query.begin())->m_tp == SCLT_EXCL)
|
||||
iter_swap(m_query.begin(), itnotneg);
|
||||
|
||||
// Walk the clause list translating each in turn and building the
|
||||
// Xapian query tree
|
||||
for (qlist_it_t it = m_query.begin(); it != m_query.end(); it++) {
|
||||
@ -91,12 +165,59 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
|
||||
// addClause())
|
||||
Xapian::Query::op op;
|
||||
if (m_tp == SCLT_AND) {
|
||||
op = (*it)->m_tp == SCLT_EXCL ?
|
||||
Xapian::Query::OP_AND_NOT: Xapian::Query::OP_AND;
|
||||
if ((*it)->m_tp == SCLT_EXCL) {
|
||||
op = Xapian::Query::OP_AND_NOT;
|
||||
} else {
|
||||
op = Xapian::Query::OP_AND;
|
||||
}
|
||||
} else {
|
||||
op = Xapian::Query::OP_OR;
|
||||
}
|
||||
xq = xq.empty() ? nq : Xapian::Query(op, xq, nq);
|
||||
if (xq.empty()) {
|
||||
if (op == Xapian::Query::OP_AND_NOT)
|
||||
xq = Xapian::Query(op, Xapian::Query::MatchAll, nq);
|
||||
else
|
||||
xq = nq;
|
||||
} else {
|
||||
xq = Xapian::Query(op, xq, nq);
|
||||
}
|
||||
}
|
||||
|
||||
if (m_haveDates) {
|
||||
// If one of the extremities is unset, compute db extremas
|
||||
if (m_dates.y1 == 0 || m_dates.y2 == 0) {
|
||||
int minyear = 1970, maxyear = 2100;
|
||||
if (!db.maxYearSpan(&minyear, &maxyear)) {
|
||||
LOGERR(("Can't retrieve index min/max dates\n"));
|
||||
//whatever, go on.
|
||||
}
|
||||
if (m_dates.y1 == 0) {
|
||||
m_dates.y1 = minyear;
|
||||
m_dates.m1 = 1;
|
||||
m_dates.d1 = 1;
|
||||
}
|
||||
if (m_dates.y2 == 0) {
|
||||
m_dates.y2 = maxyear;
|
||||
m_dates.m2 = 12;
|
||||
m_dates.d2 = 31;
|
||||
}
|
||||
}
|
||||
LOGDEB(("Db::toNativeQuery: date interval: %d-%d-%d/%d-%d-%d\n",
|
||||
m_dates.y1, m_dates.m1, m_dates.d1,
|
||||
m_dates.y2, m_dates.m2, m_dates.d2));
|
||||
Xapian::Query dq = date_range_filter(m_dates.y1, m_dates.m1, m_dates.d1,
|
||||
m_dates.y2, m_dates.m2, m_dates.d2);
|
||||
if (dq.empty()) {
|
||||
LOGINFO(("Db::toNativeQuery: date filter is empty\n"));
|
||||
}
|
||||
// If no probabilistic query is provided then promote the daterange
|
||||
// filter to be THE query instead of filtering an empty query.
|
||||
if (xq.empty()) {
|
||||
LOGINFO(("Db::toNativeQuery: proba query is empty\n"));
|
||||
xq = dq;
|
||||
} else {
|
||||
xq = Xapian::Query(Xapian::Query::OP_FILTER, xq, dq);
|
||||
}
|
||||
}
|
||||
|
||||
// Add the file type filtering clause if any
|
||||
@ -116,7 +237,6 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
|
||||
}
|
||||
}
|
||||
|
||||
list<Xapian::Query> pqueries;
|
||||
Xapian::Query tq;
|
||||
for (vector<string>::iterator it = exptps.begin();
|
||||
it != exptps.end(); it++) {
|
||||
@ -157,6 +277,7 @@ void SearchData::erase() {
|
||||
m_topdir.erase();
|
||||
m_description.erase();
|
||||
m_reason.erase();
|
||||
m_haveDates = false;
|
||||
}
|
||||
|
||||
// Am I a file name only search ? This is to turn off term highlighting
|
||||
|
||||
@ -30,6 +30,7 @@
|
||||
|
||||
#include "rcldb.h"
|
||||
#include "refcntr.h"
|
||||
#include "smallut.h"
|
||||
|
||||
#ifndef NO_NAMESPACES
|
||||
using std::vector;
|
||||
@ -70,7 +71,9 @@ class SearchDataClause;
|
||||
*/
|
||||
class SearchData {
|
||||
public:
|
||||
SearchData(SClType tp) : m_tp(tp), m_haveWildCards(false) {}
|
||||
SearchData(SClType tp)
|
||||
: m_tp(tp), m_haveDates(false), m_haveWildCards(false)
|
||||
{}
|
||||
~SearchData() {erase();}
|
||||
|
||||
/** Make pristine */
|
||||
@ -88,6 +91,18 @@ public:
|
||||
/** We become the owner of cl and will delete it */
|
||||
bool addClause(SearchDataClause *cl);
|
||||
|
||||
/** Set/get top subdirectory for filtering results */
|
||||
void setTopdir(const string& t) {m_topdir = t;}
|
||||
string getTopdir() {return m_topdir;}
|
||||
|
||||
/** Set date span for filtering results */
|
||||
void setDateSpan(DateInterval *dip) {m_dates = *dip; m_haveDates = true;}
|
||||
|
||||
/** Add file type for filtering results */
|
||||
void addFiletype(const string& ft) {m_filetypes.push_back(ft);}
|
||||
|
||||
void setStemlang(const string& lang = "english") {m_stemlang = lang;}
|
||||
|
||||
/** Retrieve error description */
|
||||
string getReason() {return m_reason;}
|
||||
|
||||
@ -107,17 +122,14 @@ public:
|
||||
*/
|
||||
string getDescription() {return m_description;}
|
||||
void setDescription(const string& d) {m_description = d;}
|
||||
/** Get/set top subdirectory for filtering results */
|
||||
string getTopdir() {return m_topdir;}
|
||||
void setTopdir(const string& t) {m_topdir = t;}
|
||||
/** Add file type for filtering results */
|
||||
void addFiletype(const string& ft) {m_filetypes.push_back(ft);}
|
||||
void setStemlang(const string& lang = "english") {m_stemlang = lang;}
|
||||
|
||||
private:
|
||||
SClType m_tp; // Only SCLT_AND or SCLT_OR here
|
||||
vector<SearchDataClause*> m_query;
|
||||
vector<string> m_filetypes; // Restrict to filetypes if set.
|
||||
string m_topdir; // Restrict to subtree.
|
||||
bool m_haveDates;
|
||||
DateInterval m_dates; // Restrict to date interval
|
||||
// Printable expanded version of the complete query, retrieved/set
|
||||
// from rcldb after the Xapian::setQuery() call
|
||||
string m_description;
|
||||
|
||||
@ -29,6 +29,7 @@ static char rcsid[] = "@(#$Id: smallut.cpp,v 1.35 2008-11-19 10:06:49 dockes Exp
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
|
||||
#include "smallut.h"
|
||||
#include "utf8iter.h"
|
||||
@ -281,6 +282,7 @@ template <class T> bool stringToStrings(const string &s, T &tokens,
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool stringToStrings(const string &s, list<string> &tokens,
|
||||
const string& as)
|
||||
{
|
||||
@ -707,10 +709,253 @@ float Chrono::secs(int frozen)
|
||||
gettime(CLOCK_REALTIME, &tv);
|
||||
float secs = (float)(frozen?frozen_tv.tv_sec:tv.tv_sec - m_secs);
|
||||
float nsecs = (float)(frozen?frozen_tv.tv_nsec:tv.tv_nsec - m_nsecs);
|
||||
//fprintf(stderr, "secs %.2f nsecs %.2f\n", secs, nsecs);
|
||||
return secs + nsecs * 1e-9;
|
||||
}
|
||||
|
||||
// Date is Y[-M[-D]]
|
||||
static bool parsedate(vector<string>::const_iterator& it,
|
||||
vector<string>::const_iterator end, DateInterval *dip)
|
||||
{
|
||||
dip->y1 = dip->m1 = dip->d1 = dip->y2 = dip->m2 = dip->d2 = 0;
|
||||
if (it == end || sscanf(it++->c_str(), "%d", &dip->y1) != 1) {
|
||||
return false;
|
||||
}
|
||||
if (it == end || *it == "/")
|
||||
return true;
|
||||
if (*it++ != "-") {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (it == end || sscanf(it++->c_str(), "%d", &dip->m1) != 1) {
|
||||
return false;
|
||||
}
|
||||
if (it == end || *it == "/")
|
||||
return true;
|
||||
if (*it++ != "-") {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (it == end || sscanf(it++->c_str(), "%d", &dip->d1) != 1) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Called with the 'P' already processed. Period ends at end of string
|
||||
// or at '/'. We dont' do a lot effort at validation and will happily
|
||||
// accept 10Y1Y4Y (the last wins)
|
||||
static bool parseperiod(vector<string>::const_iterator& it,
|
||||
vector<string>::const_iterator end, DateInterval *dip)
|
||||
{
|
||||
dip->y1 = dip->m1 = dip->d1 = dip->y2 = dip->m2 = dip->d2 = 0;
|
||||
while (it != end) {
|
||||
int value;
|
||||
if (sscanf(it++->c_str(), "%d", &value) != 1) {
|
||||
return false;
|
||||
}
|
||||
if (it == end || it->empty())
|
||||
return false;
|
||||
switch (it->at(0)) {
|
||||
case 'Y': case 'y': dip->y1 = value;break;
|
||||
case 'M': case 'm': dip->m1 = value;break;
|
||||
case 'D': case 'd': dip->d1 = value;break;
|
||||
default: return false;
|
||||
}
|
||||
it++;
|
||||
if (it == end)
|
||||
return true;
|
||||
if (*it == "/") {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static void cerrdip(const string& s, DateInterval *dip)
|
||||
{
|
||||
cerr << s << dip->y1 << "-" << dip->m1 << "-" << dip->d1 << "/"
|
||||
<< dip->y2 << "-" << dip->m2 << "-" << dip->d2
|
||||
<< endl;
|
||||
}
|
||||
|
||||
// Compute date + period. Won't work out of the unix era.
|
||||
// or pre-1970 dates. Just convert everything to unixtime and
|
||||
// seconds (with average durations for months/years), add and convert
|
||||
// back
|
||||
static bool addperiod(DateInterval *dp, DateInterval *pp)
|
||||
{
|
||||
struct tm tm;
|
||||
// Create a struct tm with possibly non normalized fields and let
|
||||
// timegm sort it out
|
||||
memset(&tm, 0, sizeof(tm));
|
||||
tm.tm_year = dp->y1 - 1900 + pp->y1;
|
||||
tm.tm_mon = dp->m1 + pp->m1 -1;
|
||||
tm.tm_mday = dp->d1 + pp->d1;
|
||||
time_t tres = timegm(&tm);
|
||||
// Convert back to normalized tm, then output
|
||||
gmtime_r(&tres, &tm);
|
||||
dp->y1 = tm.tm_year + 1900;
|
||||
dp->m1 = tm.tm_mon + 1;
|
||||
dp->d1 = tm.tm_mday;
|
||||
//cerrdip("Addperiod return", dp);
|
||||
return true;
|
||||
}
|
||||
int monthdays(int mon, int year)
|
||||
{
|
||||
switch (mon) {
|
||||
case 2: return (year % 4) == 0 ? 29 : 28;
|
||||
case 1:case 3:case 5:case 7: case 8:case 10:case 12: return 31;
|
||||
default: return 30;
|
||||
}
|
||||
}
|
||||
bool parsedateinterval(const string& s, DateInterval *dip)
|
||||
{
|
||||
vector<string> vs;
|
||||
dip->y1 = dip->m1 = dip->d1 = dip->y2 = dip->m2 = dip->d2 = 0;
|
||||
DateInterval p1, p2, d1, d2;
|
||||
p1 = p2 = d1 = d2 = *dip;
|
||||
bool hasp1 = false, hasp2 = false, hasd1 = false, hasd2 = false,
|
||||
hasslash = false;
|
||||
|
||||
if (!stringToStrings(s, vs, "PYMDpymd-/")) {
|
||||
return false;
|
||||
}
|
||||
if (vs.empty())
|
||||
return false;
|
||||
|
||||
vector<string>::const_iterator it = vs.begin();
|
||||
if (*it == "P" || *it == "p") {
|
||||
it++;
|
||||
if (!parseperiod(it, vs.end(), &p1)) {
|
||||
return false;
|
||||
}
|
||||
hasp1 = true;
|
||||
//cerrdip("p1", &p1);
|
||||
p1.y1 = -p1.y1;
|
||||
p1.m1 = -p1.m1;
|
||||
p1.d1 = -p1.d1;
|
||||
} else if (*it == "/") {
|
||||
hasslash = true;
|
||||
goto secondelt;
|
||||
} else {
|
||||
if (!parsedate(it, vs.end(), &d1)) {
|
||||
return false;
|
||||
}
|
||||
hasd1 = true;
|
||||
}
|
||||
|
||||
// Got one element and/or /
|
||||
secondelt:
|
||||
if (it != vs.end()) {
|
||||
if (*it != "/") {
|
||||
return false;
|
||||
}
|
||||
hasslash = true;
|
||||
it++;
|
||||
if (it == vs.end()) {
|
||||
// ok
|
||||
} else if (*it == "P" || *it == "p") {
|
||||
it++;
|
||||
if (!parseperiod(it, vs.end(), &p2)) {
|
||||
return false;
|
||||
}
|
||||
hasp2 = true;
|
||||
} else {
|
||||
if (!parsedate(it, vs.end(), &d2)) {
|
||||
return false;
|
||||
}
|
||||
hasd2 = true;
|
||||
}
|
||||
}
|
||||
|
||||
// 2 periods dont' make sense
|
||||
if (hasp1 && hasp2) {
|
||||
return false;
|
||||
}
|
||||
// Nothing at all doesn't either
|
||||
if (!hasp1 && !hasd1 && !hasp2 && !hasd2) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Empty part means today IF other part is period, else means
|
||||
// forever (stays at 0)
|
||||
time_t now = time(0);
|
||||
struct tm *tmnow = gmtime(&now);
|
||||
if ((!hasp1 && !hasd1) && hasp2) {
|
||||
d1.y1 = 1900 + tmnow->tm_year;
|
||||
d1.m1 = tmnow->tm_mon + 1;
|
||||
d1.d1 = tmnow->tm_mday;
|
||||
hasd1 = true;
|
||||
} else if ((!hasp2 && !hasd2) && hasp1) {
|
||||
d2.y1 = 1900 + tmnow->tm_year;
|
||||
d2.m1 = tmnow->tm_mon + 1;
|
||||
d2.d1 = tmnow->tm_mday;
|
||||
hasd2 = true;
|
||||
}
|
||||
|
||||
// Incomplete dates have different meanings depending if there is
|
||||
// a period or not (actual or infinite indicated by a / + empty)
|
||||
//
|
||||
// If there is no explicit period, an incomplete date indicates a
|
||||
// period of the size of the uncompleted elements. Ex: 1999
|
||||
// actually means 1999/P12M
|
||||
//
|
||||
// If there is a period, the incomplete date should be extended
|
||||
// to the beginning or end of the unspecified portion. Ex: 1999/
|
||||
// means 1999-01-01/ and /1999 means /1999-12-31
|
||||
if (hasd1) {
|
||||
if (!(hasslash || hasp2)) {
|
||||
if (d1.m1 == 0) {
|
||||
p2.m1 = 12;
|
||||
d1.m1 = 1;
|
||||
d1.d1 = 1;
|
||||
} else if (d1.d1 == 0) {
|
||||
d1.d1 = 1;
|
||||
p2.d1 = monthdays(d1.m1, d1.y1);
|
||||
}
|
||||
hasp2 = true;
|
||||
} else {
|
||||
if (d1.m1 == 0) {
|
||||
d1.m1 = 1;
|
||||
d1.d1 = 1;
|
||||
} else if (d1.d1 == 0) {
|
||||
d1.d1 = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
// if hasd2 is true we had a /
|
||||
if (hasd2) {
|
||||
if (d2.m1 == 0) {
|
||||
d2.m1 = 12;
|
||||
d2.d1 = 31;
|
||||
} else if (d2.d1 == 0) {
|
||||
d2.d1 = monthdays(d2.m1, d2.y1);
|
||||
}
|
||||
}
|
||||
if (hasp1) {
|
||||
// Compute d1
|
||||
d1 = d2;
|
||||
if (!addperiod(&d1, &p1)) {
|
||||
return false;
|
||||
}
|
||||
} else if (hasp2) {
|
||||
// Compute d2
|
||||
d2 = d1;
|
||||
if (!addperiod(&d2, &p2)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
dip->y1 = d1.y1;
|
||||
dip->m1 = d1.m1;
|
||||
dip->d1 = d1.d1;
|
||||
dip->y2 = d2.y1;
|
||||
dip->m2 = d2.m1;
|
||||
dip->d2 = d2.d1;
|
||||
return true;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#include <string>
|
||||
@ -750,13 +995,33 @@ struct spair suffpairs[] = {
|
||||
};
|
||||
int nsuffpairs = sizeof(suffpairs) / sizeof(struct spair);
|
||||
|
||||
|
||||
// Periods test strings
|
||||
const char* periods[] = {
|
||||
"2001", // Year 2001
|
||||
"2001/", // 2001 or later
|
||||
"2001/P3Y", // 2001 -> 2004 or 2005, ambiguous
|
||||
"2001-01-01/P3Y", // 01-2001 -> 01 2004
|
||||
"2001-03-03/2001-05-01", // Explicit one
|
||||
"P3M/", // 3 months ago to now
|
||||
"P1Y1M/2001-03-01", // 2000-02-01/2001-03-01
|
||||
"/2001", // From the epoch to the end of 2001
|
||||
};
|
||||
const int nperiods = sizeof(periods) / sizeof(char*);
|
||||
|
||||
const char *thisprog;
|
||||
static void cerrdip(const string& s, DateInterval *dip)
|
||||
{
|
||||
cerr << s << dip->y1 << "-" << dip->m1 << "-" << dip->d1 << "/"
|
||||
<< dip->y2 << "-" << dip->m2 << "-" << dip->d2
|
||||
<< endl;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
thisprog = *argv++;argc--;
|
||||
|
||||
#if 1
|
||||
#if 0
|
||||
if (argc <=0 ) {
|
||||
cerr << "Usage: smallut <stringtosplit>" << endl;
|
||||
exit(1);
|
||||
@ -771,6 +1036,29 @@ int main(int argc, char **argv)
|
||||
cerr << "[" << *it << "] ";
|
||||
cerr << endl;
|
||||
exit(0);
|
||||
#elif 0
|
||||
if (argc <=0 ) {
|
||||
cerr << "Usage: smallut <dateinterval>" << endl;
|
||||
exit(1);
|
||||
}
|
||||
string s = *argv++;argc--;
|
||||
DateInterval di;
|
||||
if (!parsedateinterval(s, &di)) {
|
||||
cerr << "Parse failed" << endl;
|
||||
exit(1);
|
||||
}
|
||||
cerrdip("", &di);
|
||||
exit(0);
|
||||
#elif 1
|
||||
DateInterval di;
|
||||
for (int i = 0; i < nperiods; i++) {
|
||||
if (!parsedateinterval(periods[i], &di)) {
|
||||
cerr << "Parsing failed for [" << periods[i] << "]" << endl;
|
||||
} else {
|
||||
cerrdip(string(periods[i]).append(" : "), &di);
|
||||
}
|
||||
}
|
||||
exit(0);
|
||||
#elif 0
|
||||
for (int i = 0; i < npairs; i++) {
|
||||
{
|
||||
|
||||
@ -56,6 +56,23 @@ extern int stringisuffcmp(const string& s1, const string& s2);
|
||||
// Compare charset names, removing the more common spelling variations
|
||||
extern bool samecharset(const string &cs1, const string &cs2);
|
||||
|
||||
// Parse date interval specifier into pair of y,m,d dates. The format
|
||||
// for the time interval is based on a subset of iso 8601 with
|
||||
// the addition of open intervals, and removal of all time indications.
|
||||
// 'P' is the Period indicator, it's followed by a length in
|
||||
// years/months/days (or any subset thereof)
|
||||
// Dates: YYYY-MM-DD YYYY-MM YYYY
|
||||
// Periods: P[nY][nM][nD] where n is an integer value.
|
||||
// At least one of YMD must be specified
|
||||
// The separator for the interval is /. Interval examples
|
||||
// YYYY/ (from YYYY) YYYY-MM-DD/P3Y (3 years after date) etc.
|
||||
// This returns a pair of y,m,d dates.
|
||||
struct DateInterval {
|
||||
int y1;int m1;int d1; int y2;int m2;int d2;
|
||||
};
|
||||
bool parsedateinterval(const string&s, DateInterval *di);
|
||||
int monthdays(int mon, int year);
|
||||
|
||||
/**
|
||||
* Parse input string into list of strings.
|
||||
*
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user