Implement date: date range filter/searches. Remove restriction on pure negative queries

2010-09-11 12:07:53 +02:00 · 2010-09-11 12:07:53 +02:00 · ceb996c8fb
commit ceb996c8fb
parent 3a6b34d24b
8 changed files with 551 additions and 55 deletions
--- a/src/query/recollq.cpp
+++ b/src/query/recollq.cpp
@ -63,6 +63,7 @@ bool dump_contents(RclConfig *rclconfig, TempDir& tmpdir, Rcl::Doc& idoc)

 static char *thisprog;
 static char usage [] =
+" -P: Show date span for documents in index\n"
 " [-o|-a|-f] <query string>\n"
 " Runs a recoll query and displays result lines. \n"
 "  Default: will interpret the argument(s) as a xesam query string\n"
@ -110,6 +111,7 @@ static int     op_flags;
 #define OPT_s     0x4000
 #define OPT_A     0x8000
 #define OPT_i     0x10000
+#define OPT_P     0x20000

 int recollq(RclConfig **cfp, int argc, char **argv)
 {
@ -148,6 +150,7 @@ int recollq(RclConfig **cfp, int argc, char **argv)
 		if (limit <= 0) limit = INT_MAX;
 		argc--; goto b1;
            case 'o':   op_flags |= OPT_o; break;
+            case 'P':   op_flags |= OPT_P; break;
            case 'q':   op_flags |= OPT_q; break;
 	    case 'S':	op_flags |= OPT_S; if (argc < 2)  Usage();
 		sortfield = *(++argv);
@ -161,13 +164,6 @@ int recollq(RclConfig **cfp, int argc, char **argv)
    b1: argc--; argv++;
    }

-    if (argc < 1) {
-	Usage();
-    }
-    string qs = *argv++;argc--;
-    while (argc > 0) {
-	qs += string(" ") + *argv++;argc--;
-    }
    string reason;
    *cfp = recollinit(0, 0, reason, &a_config);
    RclConfig *rclconfig = *cfp;
@ -176,21 +172,10 @@ int recollq(RclConfig **cfp, int argc, char **argv)
 	exit(1);
    }

-    {
-	string uq;
-	string charset = rclconfig->getDefCharset(true);
-	int ercnt;
-	if (!transcode(qs, uq, charset, "UTF-8", &ercnt)) {
-	    fprintf(stderr, "Can't convert command line args to utf-8\n");
-	    exit(1);
-	} else if (ercnt) {
-	    fprintf(stderr, "%d errors while converting arguments from %s "
-		    "to utf-8\n", ercnt, charset.c_str());
-	}
-	qs = uq;
+    if (argc < 1 && !(op_flags & OPT_P)) {
+	Usage();
    }

-
    Rcl::Db rcldb(rclconfig);
    if (!extra_dbs.empty()) {
        for (list<string>::iterator it = extra_dbs.begin();
@ -208,6 +193,39 @@ int recollq(RclConfig **cfp, int argc, char **argv)
 	exit(1);
    }

+    if (op_flags & OPT_P) {
+        int minyear, maxyear;
+        if (!rcldb.maxYearSpan(&minyear, &maxyear)) {
+            cerr << "maxYearSpan failed: " << rcldb.getReason() << endl;
+            exit(1);
+        } else {
+            cout << "Min year " << minyear << " Max year " << maxyear << endl;
+            exit(0);
+        }
+    }
+
+    if (argc < 1) {
+	Usage();
+    }
+    string qs = *argv++;argc--;
+    while (argc > 0) {
+	qs += string(" ") + *argv++;argc--;
+    }
+
+    {
+	string uq;
+	string charset = rclconfig->getDefCharset(true);
+	int ercnt;
+	if (!transcode(qs, uq, charset, "UTF-8", &ercnt)) {
+	    fprintf(stderr, "Can't convert command line args to utf-8\n");
+	    exit(1);
+	} else if (ercnt) {
+	    fprintf(stderr, "%d errors while converting arguments from %s "
+		    "to utf-8\n", ercnt, charset.c_str());
+	}
+	qs = uq;
+    }
+
    Rcl::SearchData *sd = 0;

    if (op_flags & (OPT_a|OPT_o|OPT_f)) {
--- a/src/query/wasatorcl.cpp
+++ b/src/query/wasatorcl.cpp
@ -42,14 +42,13 @@ Rcl::SearchData *wasaStringToRcl(const string &qs, string &reason,
 	return 0;
    Rcl::SearchData *rq = wasaQueryToRcl(wq, autosuffs);
    if (rq == 0) {
-	reason = "Failed translating wasa query structure to recoll";
+	reason = "Failed translating xesam query structure to recoll";
 	return 0;
    }
    return rq;
 }

-Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa, 
-                                const string& autosuffs)
+Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa, const string& autosuffs)
 {
    if (wasa == 0)
 	return 0;
@ -119,6 +118,19 @@ Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa,
                }
            }

+            // Handle "date" spec
+	    if (!stringicmp("date", (*it)->m_fieldspec)) {
+                DateInterval di;
+                if (!parsedateinterval((*it)->m_value, &di)) {
+                    LOGERR(("wasaQueryToRcl: bad date interval format\n"));
+                    // Process rest of query anyway ?
+                    break;
+                }
+                LOGDEB(("wasaQueryToRcl:: date span:  %d-%d-%d/%d-%d-%d\n",
+                        di.y1,di.m1,di.d1, di.y2,di.m2,di.d2))
+		sdata->setDateSpan(&di);
+		break;
+	    } 

            // "Regular" processing follows:
 	    unsigned int mods = (unsigned int)(*it)->m_modifiers;
@ -151,7 +163,7 @@ Rcl::SearchData *wasaQueryToRcl(WasaQuery *wasa,
 	    
 	case WasaQuery::OP_EXCL:
 	    LOGDEB2(("wasaQueryToRcl: excl clause [%s]:[%s]\n", 
-		     (*it)->m_fieldspec.c_str(), (*it)->m_value.c_str()));
+                    (*it)->m_fieldspec.c_str(), (*it)->m_value.c_str()));
 	    if (wasa->m_op != WasaQuery::OP_AND) {
 		LOGERR(("wasaQueryToRcl: negative clause inside OR list!\n"));
 		continue;
--- a/src/rcldb/rcldb.cpp
+++ b/src/rcldb/rcldb.cpp
@ -767,6 +767,10 @@ bool Db::fieldToPrefix(const string& fld, string &pfx)
 	fldToPrefs["keyword"] = "K";
 	fldToPrefs["tag"] = "K";
 	fldToPrefs["tags"] = "K";
+
+        fldToPrefs["xapyear"] = "Y";
+        fldToPrefs["xapyearmon"] = "M";
+        fldToPrefs["xapdate"] = "D";
    }

    if (m_config && m_config->getFieldPrefix(fld, pfx))
@ -1365,6 +1369,28 @@ bool Db::filenameWildExp(const string& fnexp, list<string>& names)
    return true;
 }

+// Walk the Y terms and return min/max
+bool Db::maxYearSpan(int *minyear, int *maxyear)
+{
+    *minyear = 1000000; 
+    *maxyear = -1000000;
+    TermMatchResult result;
+    if (!termMatch(ET_WILD, string(), "*", result, 5000, "xapyear"))
+	return false;
+    for (list<TermMatchEntry>::const_iterator it = result.entries.begin();
+	 it != result.entries.end(); it++) {
+        if (!it->term.empty()) {
+            int year = atoi(it->term.c_str()+1);
+            if (year < *minyear)
+                *minyear = year;
+            if (year > *maxyear)
+                *maxyear = year;
+        }
+    }
+    return true;
+}
+
+
 class TermMatchCmpByWcf {
 public:
    int operator()(const TermMatchEntry& l, const TermMatchEntry& r) {
--- a/src/rcldb/rcldb.h
+++ b/src/rcldb/rcldb.h
@ -171,6 +171,8 @@ class Db {
 		   const string& field = "",
                   string *prefix = 0
        );
+    /** Return min and max years for doc mod times in db */
+    bool maxYearSpan(int *minyear, int *maxyear);

    /** Special filename wildcard to XSFN terms expansion.
 	internal/searchdata use only */
--- a/src/rcldb/searchdata.cpp
+++ b/src/rcldb/searchdata.cpp
@ -46,33 +46,107 @@ typedef  vector<SearchDataClause *>::const_iterator qlist_cit_t;

 static const int original_term_wqf_booster = 10;

+/* The dates-to-query routine is is lifted quasi-verbatim but
+ *  modified from xapian-omega:date.cc. Copyright info:
+ *
+ * Copyright 1999,2000,2001 BrightStation PLC
+ * Copyright 2001 James Aylett
+ * Copyright 2001,2002 Ananova Ltd
+ * Copyright 2002 Intercede 1749 Ltd
+ * Copyright 2002,2003,2006 Olly Betts
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+ * USA
+ */
+static Xapian::Query
+date_range_filter(int y1, int m1, int d1, int y2, int m2, int d2)
+{
+    // Xapian uses a smallbuf and snprintf. Can't be bothered, we're
+    // only doing %d's !
+    char buf[200];
+    sprintf(buf, "D%04d%02d", y1, m1);
+    vector<Xapian::Query> v;
+
+    int d_last = monthdays(m1, y1);
+    int d_end = d_last;
+    if (y1 == y2 && m1 == m2 && d2 < d_last) {
+	d_end = d2;
+    }
+    // Deal with any initial partial month
+    if (d1 > 1 || d_end < d_last) {
+    	for ( ; d1 <= d_end ; d1++) {
+	    sprintf(buf + 7, "%02d", d1);
+	    v.push_back(Xapian::Query(buf));
+	}
+    } else {
+	buf[0] = 'M';
+	v.push_back(Xapian::Query(buf));
+    }
+    
+    if (y1 == y2 && m1 == m2) {
+	return Xapian::Query(Xapian::Query::OP_OR, v.begin(), v.end());
+    }
+
+    int m_last = (y1 < y2) ? 12 : m2 - 1;
+    while (++m1 <= m_last) {
+	sprintf(buf + 5, "%02d", m1);
+	buf[0] = 'M';
+	v.push_back(Xapian::Query(buf));
+    }
+	
+    if (y1 < y2) {
+	while (++y1 < y2) {
+	    sprintf(buf + 1, "%04d", y1);
+	    buf[0] = 'Y';
+	    v.push_back(Xapian::Query(buf));
+	}
+	sprintf(buf + 1, "%04d", y2);
+	buf[0] = 'M';
+	for (m1 = 1; m1 < m2; m1++) {
+	    sprintf(buf + 5, "%02d", m1);
+	    v.push_back(Xapian::Query(buf));
+	}
+    }
+	
+    sprintf(buf + 5, "%02d", m2);
+
+    // Deal with any final partial month
+    if (d2 < monthdays(m2, y2)) {
+	buf[0] = 'D';
+    	for (d1 = 1 ; d1 <= d2; d1++) {
+	    sprintf(buf + 7, "%02d", d1);
+	    v.push_back(Xapian::Query(buf));
+	}
+    } else {
+	buf[0] = 'M';
+	v.push_back(Xapian::Query(buf));
+    }
+
+    return Xapian::Query(Xapian::Query::OP_OR, v.begin(), v.end());
+}
+
 bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
 {
    Xapian::Query xq;
    m_reason.erase();

-    if (m_query.size() < 1) {
+    if (!m_query.size() && !m_haveDates) {
 	m_reason = "empty query";
 	return false;
    }

-    // It's not allowed to have a pure negative query and also it
-    // seems that Xapian doesn't like the first element to be AND_NOT
-    qlist_it_t itnotneg = m_query.end();
-    for (qlist_it_t it = m_query.begin(); it != m_query.end(); it++) {
-	if ((*it)->m_tp != SCLT_EXCL) {
-	    itnotneg = it;
-	    break;
-	}
-    }
-    if (itnotneg == m_query.end()) {
-	LOGERR(("SearchData::toNativeQuery: can't have all negative clauses"));
-	m_reason = "Can't have only negative clauses";
-	return false;
-    }
-    if ((*m_query.begin())->m_tp == SCLT_EXCL) 
-	iter_swap(m_query.begin(), itnotneg);
-
    // Walk the clause list translating each in turn and building the 
    // Xapian query tree
    for (qlist_it_t it = m_query.begin(); it != m_query.end(); it++) {
@ -91,12 +165,59 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
 	// addClause())
 	Xapian::Query::op op;
 	if (m_tp == SCLT_AND) {
-	    op = (*it)->m_tp == SCLT_EXCL ? 
-		Xapian::Query::OP_AND_NOT: Xapian::Query::OP_AND;
+            if ((*it)->m_tp == SCLT_EXCL) {
+                op =  Xapian::Query::OP_AND_NOT;
+            } else {
+                op =  Xapian::Query::OP_AND;
+            }
 	} else {
 	    op = Xapian::Query::OP_OR;
 	}
-	xq = xq.empty() ? nq : Xapian::Query(op, xq, nq);
+        if (xq.empty()) {
+            if (op == Xapian::Query::OP_AND_NOT)
+                xq = Xapian::Query(op, Xapian::Query::MatchAll, nq);
+            else 
+                xq = nq;
+        } else {
+            xq = Xapian::Query(op, xq, nq);
+        }
+    }
+        
+    if (m_haveDates) {
+        // If one of the extremities is unset, compute db extremas
+        if (m_dates.y1 == 0 || m_dates.y2 == 0) {
+            int minyear = 1970, maxyear = 2100;
+            if (!db.maxYearSpan(&minyear, &maxyear)) {
+                LOGERR(("Can't retrieve index min/max dates\n"));
+                //whatever, go on.
+            }
+            if (m_dates.y1 == 0) {
+                m_dates.y1 = minyear;
+                m_dates.m1 = 1;
+                m_dates.d1 = 1;
+            }
+            if (m_dates.y2 == 0) {
+                m_dates.y2 = maxyear;
+                m_dates.m2 = 12;
+                m_dates.d2 = 31;
+            }
+        }
+        LOGDEB(("Db::toNativeQuery: date interval: %d-%d-%d/%d-%d-%d\n",
+                m_dates.y1, m_dates.m1, m_dates.d1,
+                m_dates.y2, m_dates.m2, m_dates.d2));
+        Xapian::Query dq = date_range_filter(m_dates.y1, m_dates.m1, m_dates.d1,
+                m_dates.y2, m_dates.m2, m_dates.d2);
+        if (dq.empty()) {
+            LOGINFO(("Db::toNativeQuery: date filter is empty\n"));
+        }
+        // If no probabilistic query is provided then promote the daterange
+        // filter to be THE query instead of filtering an empty query.
+        if (xq.empty()) {
+            LOGINFO(("Db::toNativeQuery: proba query is empty\n"));
+            xq = dq;
+        } else {
+            xq = Xapian::Query(Xapian::Query::OP_FILTER, xq, dq);
+        }
    }

    // Add the file type filtering clause if any
@ -116,7 +237,6 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
 	    }
 	}
 	    
-	list<Xapian::Query> pqueries;
 	Xapian::Query tq;
 	for (vector<string>::iterator it = exptps.begin(); 
 	     it != exptps.end(); it++) {
@ -157,6 +277,7 @@ void SearchData::erase() {
    m_topdir.erase();
    m_description.erase();
    m_reason.erase();
+    m_haveDates = false;
 }

 // Am I a file name only search ? This is to turn off term highlighting
--- a/src/rcldb/searchdata.h
+++ b/src/rcldb/searchdata.h
@ -30,6 +30,7 @@

 #include "rcldb.h"
 #include "refcntr.h"
+#include "smallut.h"

 #ifndef NO_NAMESPACES
 using std::vector;
@ -70,7 +71,9 @@ class SearchDataClause;
 */
 class SearchData {
 public:
-    SearchData(SClType tp) : m_tp(tp), m_haveWildCards(false) {}
+    SearchData(SClType tp) 
+        : m_tp(tp), m_haveDates(false), m_haveWildCards(false) 
+    {}
    ~SearchData() {erase();}

    /** Make pristine */
@ -88,6 +91,18 @@ public:
    /** We become the owner of cl and will delete it */
    bool addClause(SearchDataClause *cl);

+    /** Set/get top subdirectory for filtering results */
+    void setTopdir(const string& t) {m_topdir = t;}
+    string getTopdir() {return m_topdir;}
+
+    /** Set date span for filtering results */
+    void setDateSpan(DateInterval *dip) {m_dates = *dip; m_haveDates = true;}
+
+    /** Add file type for filtering results */
+    void addFiletype(const string& ft) {m_filetypes.push_back(ft);}
+
+    void setStemlang(const string& lang = "english") {m_stemlang = lang;}
+
    /** Retrieve error description */
    string getReason() {return m_reason;}

@ -107,17 +122,14 @@ public:
     */
    string getDescription() {return m_description;}
    void setDescription(const string& d) {m_description = d;}
-    /** Get/set top subdirectory for filtering results */
-    string getTopdir() {return m_topdir;}
-    void setTopdir(const string& t) {m_topdir = t;}
-    /** Add file type for filtering results */
-    void addFiletype(const string& ft) {m_filetypes.push_back(ft);}
-    void setStemlang(const string& lang = "english") {m_stemlang = lang;}
+
 private:
    SClType                   m_tp; // Only SCLT_AND or SCLT_OR here
    vector<SearchDataClause*> m_query;
    vector<string>            m_filetypes; // Restrict to filetypes if set.
    string                    m_topdir; // Restrict to subtree.
+    bool                      m_haveDates;
+    DateInterval              m_dates; // Restrict to date interval
    // Printable expanded version of the complete query, retrieved/set
    // from rcldb after the Xapian::setQuery() call
    string m_description; 
--- a/src/utils/smallut.cpp
+++ b/src/utils/smallut.cpp
@ -29,6 +29,7 @@ static char rcsid[] = "@(#$Id: smallut.cpp,v 1.35 2008-11-19 10:06:49 dockes Exp
 #include <sys/stat.h>

 #include <string>
+#include <iostream>

 #include "smallut.h"
 #include "utf8iter.h"
@ -281,6 +282,7 @@ template <class T> bool stringToStrings(const string &s, T &tokens,
    }
    return true;
 }
+
 bool stringToStrings(const string &s, list<string> &tokens, 
                     const string& as)
 {
@ -707,10 +709,253 @@ float Chrono::secs(int frozen)
  gettime(CLOCK_REALTIME, &tv);
  float secs = (float)(frozen?frozen_tv.tv_sec:tv.tv_sec - m_secs);
  float nsecs = (float)(frozen?frozen_tv.tv_nsec:tv.tv_nsec - m_nsecs); 
-  //fprintf(stderr, "secs %.2f nsecs %.2f\n", secs, nsecs);
  return secs + nsecs * 1e-9;
 }

+// Date is Y[-M[-D]]
+static bool parsedate(vector<string>::const_iterator& it, 
+              vector<string>::const_iterator end, DateInterval *dip)
+{
+    dip->y1 = dip->m1 = dip->d1 = dip->y2 = dip->m2 = dip->d2 = 0;
+    if (it == end || sscanf(it++->c_str(), "%d", &dip->y1) != 1) {
+        return false;
+    }
+    if (it == end || *it == "/")
+        return true;
+    if (*it++ != "-") {
+        return false;
+    }
+
+    if (it == end || sscanf(it++->c_str(), "%d", &dip->m1) != 1) {
+        return false;
+    }
+    if (it == end || *it == "/")
+        return true;
+    if (*it++ != "-") {
+        return false;
+    }
+
+    if (it == end || sscanf(it++->c_str(), "%d", &dip->d1) != 1) {
+        return -1;
+    }
+
+    return true;
+}
+
+// Called with the 'P' already processed. Period ends at end of string
+// or at '/'. We dont' do a lot effort at validation and will happily
+// accept 10Y1Y4Y (the last wins)
+static bool parseperiod(vector<string>::const_iterator& it, 
+                        vector<string>::const_iterator end, DateInterval *dip)
+{
+    dip->y1 = dip->m1 = dip->d1 = dip->y2 = dip->m2 = dip->d2 = 0;
+    while (it != end) {
+        int value;
+        if (sscanf(it++->c_str(), "%d", &value) != 1) {
+            return false;
+        }
+        if (it == end || it->empty())
+            return false;
+        switch (it->at(0)) {
+        case 'Y': case 'y': dip->y1 = value;break;
+        case 'M': case 'm': dip->m1 = value;break;
+        case 'D': case 'd': dip->d1 = value;break;
+        default: return false;
+        }
+        it++;
+        if (it == end)
+            return true;
+        if (*it == "/") {
+            return true;
+        }
+    }
+    return true;
+}
+
+static void cerrdip(const string& s, DateInterval *dip)
+{
+    cerr << s << dip->y1 << "-" << dip->m1 << "-" << dip->d1 << "/"
+         << dip->y2 << "-" << dip->m2 << "-" << dip->d2 
+         << endl;
+}
+
+// Compute date + period. Won't work out of the unix era. 
+// or pre-1970 dates. Just convert everything to unixtime and
+// seconds (with average durations for months/years), add and convert
+// back
+static bool addperiod(DateInterval *dp, DateInterval *pp)
+{
+    struct tm tm;
+    // Create a struct tm with possibly non normalized fields and let
+    // timegm sort it out
+    memset(&tm, 0, sizeof(tm));
+    tm.tm_year = dp->y1 - 1900 + pp->y1;
+    tm.tm_mon = dp->m1 + pp->m1 -1;
+    tm.tm_mday = dp->d1 + pp->d1;
+    time_t tres = timegm(&tm);
+    // Convert back to normalized tm, then output
+    gmtime_r(&tres, &tm);
+    dp->y1 = tm.tm_year + 1900;
+    dp->m1 = tm.tm_mon + 1;
+    dp->d1 = tm.tm_mday;
+    //cerrdip("Addperiod return", dp);
+    return true;
+}
+int monthdays(int mon, int year)
+{
+    switch (mon) {
+    case 2: return (year % 4) == 0 ? 29 : 28;
+    case 1:case 3:case 5:case 7: case 8:case 10:case 12: return 31;
+    default: return 30;
+    }
+}
+bool parsedateinterval(const string& s, DateInterval *dip)
+{
+    vector<string> vs;
+    dip->y1 = dip->m1 = dip->d1 = dip->y2 = dip->m2 = dip->d2 = 0;
+    DateInterval p1, p2, d1, d2;
+    p1 = p2 = d1 = d2 = *dip;
+    bool hasp1 = false, hasp2 = false, hasd1 = false, hasd2 = false, 
+        hasslash = false;
+
+    if (!stringToStrings(s, vs, "PYMDpymd-/")) {
+        return false;
+    }
+    if (vs.empty())
+        return false;
+
+    vector<string>::const_iterator it = vs.begin();
+    if (*it == "P" || *it == "p") {
+        it++;
+        if (!parseperiod(it, vs.end(), &p1)) {
+            return false;
+        }
+        hasp1 = true;
+        //cerrdip("p1", &p1);
+        p1.y1 = -p1.y1;
+        p1.m1 = -p1.m1;
+        p1.d1 = -p1.d1;
+    } else if (*it == "/") {
+        hasslash = true;
+        goto secondelt;
+    } else {
+        if (!parsedate(it, vs.end(), &d1)) {
+            return false;
+        }
+        hasd1 = true;
+    }
+
+    // Got one element and/or /
+secondelt:
+    if (it != vs.end()) {
+        if (*it != "/") {
+            return false;
+        }
+        hasslash = true;
+        it++;
+        if (it == vs.end()) {
+            // ok
+        } else if (*it == "P" || *it == "p") {
+            it++;
+            if (!parseperiod(it, vs.end(), &p2)) {
+                return false;
+            }
+        hasp2 = true;
+        } else {
+            if (!parsedate(it, vs.end(), &d2)) {
+                return false;
+            }
+            hasd2 = true;
+        }
+    }
+
+    // 2 periods dont' make sense
+    if (hasp1 && hasp2) {
+        return false;
+    }
+    // Nothing at all doesn't either
+    if (!hasp1 && !hasd1 && !hasp2 && !hasd2) {
+        return false;
+    }
+
+    // Empty part means today IF other part is period, else means
+    // forever (stays at 0)
+    time_t now = time(0);
+    struct tm *tmnow = gmtime(&now);
+    if ((!hasp1 && !hasd1) && hasp2) {
+        d1.y1 = 1900 + tmnow->tm_year;
+        d1.m1 = tmnow->tm_mon + 1;
+        d1.d1 = tmnow->tm_mday;
+        hasd1 = true;
+    } else if ((!hasp2 && !hasd2) && hasp1) {
+        d2.y1 = 1900 + tmnow->tm_year;
+        d2.m1 = tmnow->tm_mon + 1;
+        d2.d1 = tmnow->tm_mday;
+        hasd2 = true;
+    }
+
+    // Incomplete dates have different meanings depending if there is
+    // a period or not (actual or infinite indicated by a / + empty)
+    //
+    // If there is no explicit period, an incomplete date indicates a
+    // period of the size of the uncompleted elements. Ex: 1999
+    // actually means 1999/P12M
+    // 
+    // If there is a period, the incomplete date should be extended
+    // to the beginning or end of the unspecified portion. Ex: 1999/
+    // means 1999-01-01/ and /1999 means /1999-12-31
+    if (hasd1) {
+        if (!(hasslash || hasp2)) {
+            if (d1.m1 == 0) {
+                p2.m1 = 12;
+                d1.m1 = 1;
+                d1.d1 = 1;
+            } else if (d1.d1 == 0) {
+                d1.d1 = 1;
+                p2.d1 = monthdays(d1.m1, d1.y1);
+            }
+            hasp2 = true;
+        } else {
+            if (d1.m1 == 0) {
+                d1.m1 = 1;
+                d1.d1 = 1;
+            } else if (d1.d1 == 0) {
+                d1.d1 = 1;
+            }
+        }
+    }
+    // if hasd2 is true we had a /
+    if (hasd2) {
+        if (d2.m1 == 0) {
+            d2.m1 = 12;
+            d2.d1 = 31;
+        } else if (d2.d1 == 0) {
+            d2.d1 = monthdays(d2.m1, d2.y1);
+        }
+    }
+    if (hasp1) {
+        // Compute d1
+        d1 = d2;
+        if (!addperiod(&d1, &p1)) {
+            return false;
+        }
+    } else if (hasp2) {
+        // Compute d2
+        d2 = d1;
+        if (!addperiod(&d2, &p2)) {
+            return false;
+        }
+    }
+
+    dip->y1 = d1.y1;
+    dip->m1 = d1.m1;
+    dip->d1 = d1.d1;
+    dip->y2 = d2.y1;
+    dip->m2 = d2.m1;
+    dip->d2 = d2.d1;
+    return true;
+}
+
 #else

 #include <string>
@ -750,13 +995,33 @@ struct spair suffpairs[] = {
 };
 int nsuffpairs = sizeof(suffpairs) / sizeof(struct spair);

+
+// Periods test strings
+const char* periods[] = {
+    "2001",    // Year 2001
+    "2001/",  // 2001 or later 
+    "2001/P3Y", // 2001 -> 2004 or 2005, ambiguous
+    "2001-01-01/P3Y", // 01-2001 -> 01 2004
+    "2001-03-03/2001-05-01", // Explicit one
+    "P3M/", // 3 months ago to now
+    "P1Y1M/2001-03-01", // 2000-02-01/2001-03-01
+    "/2001", // From the epoch to the end of 2001
+};
+const int nperiods = sizeof(periods) / sizeof(char*);
+
 const char *thisprog;
+static void cerrdip(const string& s, DateInterval *dip)
+{
+    cerr << s << dip->y1 << "-" << dip->m1 << "-" << dip->d1 << "/"
+         << dip->y2 << "-" << dip->m2 << "-" << dip->d2 
+         << endl;
+}

 int main(int argc, char **argv)
 {
    thisprog = *argv++;argc--;

-#if 1
+#if 0
    if (argc <=0 ) {
        cerr << "Usage: smallut <stringtosplit>" << endl;
        exit(1);
@ -771,6 +1036,29 @@ int main(int argc, char **argv)
        cerr << "[" << *it << "] ";
    cerr << endl;
    exit(0);
+#elif 0
+    if (argc <=0 ) {
+        cerr << "Usage: smallut <dateinterval>" << endl;
+        exit(1);
+    }
+    string s = *argv++;argc--;
+    DateInterval di;
+    if (!parsedateinterval(s, &di)) {
+        cerr << "Parse failed" << endl;
+        exit(1);
+    }
+    cerrdip("", &di);
+    exit(0);
+#elif 1
+    DateInterval di;
+    for (int i = 0; i < nperiods; i++) {
+        if (!parsedateinterval(periods[i], &di)) {
+            cerr << "Parsing failed for [" << periods[i] << "]" << endl;
+        } else {
+            cerrdip(string(periods[i]).append(" : "), &di);
+        }
+    }
+    exit(0);
 #elif 0
    for (int i = 0; i < npairs; i++) {
 	{
--- a/src/utils/smallut.h
+++ b/src/utils/smallut.h
@ -56,6 +56,23 @@ extern int stringisuffcmp(const string& s1, const string& s2);
 // Compare charset names, removing the more common spelling variations
 extern bool samecharset(const string &cs1, const string &cs2);

+// Parse date interval specifier into pair of y,m,d dates.  The format
+// for the time interval is based on a subset of iso 8601 with 
+// the addition of open intervals, and removal of all time indications.
+// 'P' is the Period indicator, it's followed by a length in
+// years/months/days (or any subset thereof)
+// Dates: YYYY-MM-DD YYYY-MM YYYY
+// Periods: P[nY][nM][nD] where n is an integer value. 
+// At least one of YMD must be specified
+// The separator for the interval is /. Interval examples
+// YYYY/ (from YYYY) YYYY-MM-DD/P3Y (3 years after date) etc.
+// This returns a pair of y,m,d dates.
+struct DateInterval {
+    int y1;int m1;int d1; int y2;int m2;int d2;
+};
+bool parsedateinterval(const string&s, DateInterval *di);
+int monthdays(int mon, int year);
+
 /**
 * Parse input string into list of strings. 
 *