Fix pdf duplicate detection which was broken by setting the md5 attribute twice, for the doc and for the html content. May have affected some other doc types

2020-07-27 09:08:47 +02:00 · 2020-07-27 09:08:47 +02:00 · 5cfd95226d
commit 5cfd95226d
parent b2e68740ba
1 changed files with 7 additions and 0 deletions
--- a/src/internfile/internfile.cpp
+++ b/src/internfile/internfile.cpp
@ -559,6 +559,13 @@ bool FileInterner::dijontorcl(Rcl::Doc& doc)
            const string *fnp = 0;
            if (!doc.peekmeta(Rcl::Doc::keyfn, &fnp) || fnp->empty())
                doc.meta[Rcl::Doc::keyfn] = ent.second;
+        } else if (ent.first == cstr_dj_keymd5) {
+            // Only if not set during the stack walk: we want the md5
+            // from the actual document, not from further conversions,
+            // as computed, e.g. by the html to text handler
+            const string *val = 0;
+            if (!doc.peekmeta(Rcl::Doc::keymd5, &val) || val->empty())
+                doc.meta[Rcl::Doc::keymd5] = ent.second;
        } else if (ent.first == cstr_dj_keymt || 
                   ent.first == cstr_dj_keycharset) {
            // don't need/want these.