From 27584674d547c33645421053bbb1004a79ebd806 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Wed, 30 Oct 2013 16:56:18 +0100 Subject: [PATCH] added python api tests --- tests/pythonapi/doc.py | 53 +++++++++++++++++++++++++ tests/pythonapi/pythonapi.sh | 30 ++++++++++++++ tests/pythonapi/pythonapi.txt | 75 +++++++++++++++++++++++++++++++++++ tests/pythonapi/simple.py | 38 ++++++++++++++++++ tests/pythonapi/url.py | 47 ++++++++++++++++++++++ 5 files changed, 243 insertions(+) create mode 100644 tests/pythonapi/doc.py create mode 100755 tests/pythonapi/pythonapi.sh create mode 100644 tests/pythonapi/pythonapi.txt create mode 100644 tests/pythonapi/simple.py create mode 100644 tests/pythonapi/url.py diff --git a/tests/pythonapi/doc.py b/tests/pythonapi/doc.py new file mode 100644 index 00000000..77bfafc9 --- /dev/null +++ b/tests/pythonapi/doc.py @@ -0,0 +1,53 @@ +import sys +from recoll import recoll + +if sys.version_info[0] >= 3: + ISP3 = True +else: + ISP3 = False + +def utf8string(s): + if ISP3: + return s + else: + return s.encode('utf8') +if ISP3: + def u(x): + return x +else: + import codecs + def u(x): + return codecs.unicode_escape_decode(x)[0] + +db = recoll.connect() +query = db.query() + +nres = query.execute("testfield:testfieldvalue", stemming=0) +qs = "Xapian query: [%s]" % query.getxquery() +print(utf8string(qs)) + +print("Result count: %d %d" % (nres, query.rowcount)) + +for doc in query: + print("doc.title: [%s]"%utf8string(doc.title)) + print("doc.testfield: [%s]"%utf8string(doc.testfield)) + for fld in ('title', 'testfield', 'filename'): + print("getattr(doc, %s) -> [%s]"%(fld,utf8string(getattr(doc, fld)))) + print("doc.get(%s) -> [%s]"%(fld,utf8string(doc.get(fld)))) + print("\nfor fld in doc.keys():") + for fld in doc.keys(): + print(utf8string("[%s] -> [%s]" % (fld, getattr(doc, fld)))) + print("\nfor k,v in doc.items().items():") + for k,v in doc.items().items(): + print(utf8string("[%s] -> [%s]" % (k, v))) + +print("\nAccented query:") +uqs = u('title:"\u00e9t\u00e9 \u00e0 no\u00ebl"') +print("User query [%s]"%utf8string(uqs)) +nres = query.execute(uqs, stemming=0) +#nres = query.execute('title:"ete a noel"', stemming=0) +qs = "Xapian query: [%s]" % query.getxquery() +print(utf8string(qs)) +print("nres %d" %(nres,)) +doc = query.fetchone() +print("doc.title: [%s]"%utf8string(doc.title)) diff --git a/tests/pythonapi/pythonapi.sh b/tests/pythonapi/pythonapi.sh new file mode 100755 index 00000000..09a1c78f --- /dev/null +++ b/tests/pythonapi/pythonapi.sh @@ -0,0 +1,30 @@ +#!/bin/sh + +# Test the Python API + +thisdir=`dirname $0` +topdir=$thisdir/.. +. $topdir/shared.sh + +initvariables $0 + +xrun() +{ + echo $* + $* +} + +( + t2=$toptmp/python2out + t3=$toptmp/python3out + for i in *.py;do python $i ;done > $t2 + for i in *.py;do python3 $i ;done > $t3 + + if ! cmp $t2 $t3 ; then + echo "Python2 and Python 3 outputs differ: $t2 $t3" + fi + for i in *.py;do xrun python $i ;done +) 2> $mystderr | egrep -v '^Recoll query: ' > $mystdout + +diff -w ${myname}.txt $mystdout > $mydiffs 2>&1 +checkresult diff --git a/tests/pythonapi/pythonapi.txt b/tests/pythonapi/pythonapi.txt new file mode 100644 index 00000000..b0beb11e --- /dev/null +++ b/tests/pythonapi/pythonapi.txt @@ -0,0 +1,75 @@ +python doc.py +Xapian query: [(TSTFLDtestfieldvalue:(wqf=11))] +Result count: 1 1 +doc.title: [HTML fields test file: été à noël] +doc.testfield: [testfieldvalue] +getattr(doc, title) -> [HTML fields test file: été à noël] +doc.get(title) -> [HTML fields test file: été à noël] +getattr(doc, testfield) -> [testfieldvalue] +doc.get(testfield) -> [testfieldvalue] +getattr(doc, filename) -> [htmlfield.html] +doc.get(filename) -> [htmlfield.html] + +for fld in doc.keys(): +[abstract] -> [ ThisIsTheFieldHtmlTestFile] +[caption] -> [HTML fields test file: été à noël] +[dbytes] -> [27] +[fbytes] -> [266] +[filename] -> [htmlfield.html] +[fmtime] -> [01383142914] +[ipath] -> [] +[mtime] -> [01383142914] +[mtype] -> [text/html] +[origcharset] -> [utf-8] +[pcbytes] -> [266] +[rcludi] -> [/home/dockes/projets/fulltext/testrecoll/html/htmlfield.html|] +[relevancyrating] -> [100%] +[sig] -> [2661383142914] +[testfield] -> [testfieldvalue] +[title] -> [HTML fields test file: été à noël] +[url] -> [file:///home/dockes/projets/fulltext/testrecoll/html/htmlfield.html] + +for k,v in doc.items().items(): +[testfield] -> [testfieldvalue] +[ipath] -> [] +[url] -> [file:///home/dockes/projets/fulltext/testrecoll/html/htmlfield.html] +[abstract] -> [ ThisIsTheFieldHtmlTestFile] +[pcbytes] -> [266] +[fbytes] -> [266] +[filename] -> [htmlfield.html] +[mtype] -> [text/html] +[caption] -> [HTML fields test file: été à noël] +[fmtime] -> [01383142914] +[dbytes] -> [27] +[sig] -> [2661383142914] +[mtime] -> [01383142914] +[title] -> [HTML fields test file: été à noël] +[rcludi] -> [/home/dockes/projets/fulltext/testrecoll/html/htmlfield.html|] +[relevancyrating] -> [100%] +[origcharset] -> [utf-8] + +Accented query: +User query [title:"été à noël"] +Xapian query: [(10 * (Sete PHRASE 3 Sa PHRASE 3 Snoel))] +nres 1 +doc.title: [HTML fields test file: été à noël] +python simple.py +Xapian query: [(huniique:(wqf=11))] +Result count: 2 2 +for i in range(nres): +Home.ics +unique.txt + +for doc in query: +Home.ics +unique.txt + +Catched bad mode. (ok) +python url.py +Xapian query: [((latin1name_uniquexxx:(wqf=11) AND XPiso8859name))] +Result count: 1 1 +iso8859-1_????????.txt +Contents: [LATIN1NAME_UNIQUEXXX + Contenu du fichier dont le nom est: +àáâãäåæç +] diff --git a/tests/pythonapi/simple.py b/tests/pythonapi/simple.py new file mode 100644 index 00000000..54691ee9 --- /dev/null +++ b/tests/pythonapi/simple.py @@ -0,0 +1,38 @@ +import sys +from recoll import recoll + +if sys.version_info[0] >= 3: + ISP3 = True +else: + ISP3 = False + +def utf8string(s): + if ISP3: + return s + else: + return s.encode('utf8') + +db = recoll.connect() +query = db.query() + +nres = query.execute("huniique", stemlang="english") +qs = "Xapian query: [%s]" % query.getxquery() +print(utf8string(qs)) + +print("Result count: %d %d" % (nres, query.rowcount)) + +print("for i in range(nres):") +for i in range(nres): + doc = query.fetchone() + print(utf8string(doc.filename)) + +query.scroll(0, 'absolute') +print("\nfor doc in query:") +for doc in query: + print(utf8string(doc.filename)) + +try: + query.scroll(0, 'badmode') +except: + print("\nCatched bad mode. (ok)") + diff --git a/tests/pythonapi/url.py b/tests/pythonapi/url.py new file mode 100644 index 00000000..f9f85ac1 --- /dev/null +++ b/tests/pythonapi/url.py @@ -0,0 +1,47 @@ +import sys +from recoll import recoll + +# Test the doc.getbinurl() method. +# Select file with a binary name (actually iso8859-1), open it and +# convert/print the contents (also iso8859-1) + +if sys.version_info[0] >= 3: + ISP3 = True +else: + ISP3 = False + +def utf8string(s): + if ISP3: + return s + else: + return s.encode('utf8') +if ISP3: + def u(x): + return x +else: + import codecs + def u(x): + return codecs.unicode_escape_decode(x)[0] + +db = recoll.connect() +query = db.query() + +# This should select a file with an iso8859-1 file name +nres = query.execute("LATIN1NAME_UNIQUEXXX dir:iso8859name", stemming=0) +qs = "Xapian query: [%s]" % query.getxquery() +print(utf8string(qs)) + +print("Result count: %d %d" % (nres, query.rowcount)) + +for doc in query: + print(utf8string(doc.filename)) + burl = doc.getbinurl() + bytesname = burl[7:] + f = open(bytesname, 'rb') + s = f.read() + f.close() + if ISP3: + content = str(s, "iso8859-1") + else: + content = unicode(s, "iso8859-1") + print("Contents: [%s]"%utf8string(content))