Added document extractor

2023-03-02 10:05:26 +11:00 · 2023-03-02 10:05:26 +11:00 · b1eaf9c7bc
commit b1eaf9c7bc
parent a892b0bc41
8 changed files with 77 additions and 31 deletions
--- a/app/library/models.py
+++ b/app/library/models.py
@ -202,13 +202,14 @@ class Work(models.Model):
    def folder(self):
        return f"{slugify(self.composer)}/{slugify(self.name)}-{self.pk:04d}"
-    def extract(self, *tags):
+    def tagged_sections(self, *tags):
        qs = self.docs.filter(sections__tag__in=tags)
        qs = qs.annotate(Count('sections'), end=Min('sections__end'), start=Max('sections__start')) \
            .filter(sections__count=len(tags))
-        
+        return qs
-        return list(qs.values_list('upload', 'start', 'end'))
+
    def list_sections(self, *tags):
        return list(self.tagged_sections(*tags).values_list('upload', 'start', 'end'))
    @property
    def digital_parts(self):
@ -276,6 +277,14 @@ class Work(models.Model):
        assigned = set(self.assigned_instruments())
        return [ x for x in self.orchestration.as_list() if not x[0] in assigned ]
    def music_tags(self):
        tags = dict(self.orchestration.as_list())
        for section in Section.objects.filter(doc__work_id=self.pk):
            tags.setdefault(section.tag, section.name)
        return tags.items()
    def __str__(self):
        return f"{self.name} ({self.composer})"
--- a/app/library/pdf_utils.py
+++ b/app/library/pdf_utils.py
@ -24,12 +24,12 @@ def extract_and_concat(items):
        if count == 0:
            continue
-        if start is None:
+        if not start:
            sections.append(source)
        else:
-            if end is None:
+            if not end:
                end = start
            dest = os.path.join(d.name, f'section_{i}.pdf')
--- a/app/library/templates/library/document_annotate.html
+++ b/app/library/templates/library/document_annotate.html
@ -75,7 +75,7 @@
    </div>
  </div>
  <ul id="unassigned-area">
-  {% for tag, inst in document.work.unassigned_instruments %}
+  {% for tag, inst in document.work.music_tags %}
    <li class="is-clickable" onclick="assignInstrument('{{tag}}', this)")>{{ inst }}</li>
  {% endfor %}
    <li><a onclick="document.getElementById('add-modal').classList.add('is-active')">Add instrument</a></li>
--- a/app/library/templates/library/work_detail.html
+++ b/app/library/templates/library/work_detail.html
@ -87,15 +87,18 @@
                    Digital Parts
                </h4>
                <div class="tags">
-                    {% if work.digital_parts %}
+                    {% with sections=work.digital_parts %}
-                    <a class="tag is-danger" href="{% url 'work_partset' collection.pk work.pk %}">Full Set</a>
+                        {% if sections %}
-                    {% endif %}
+                        <a class="tag is-danger" href="{% url 'work_partset' collection.pk work.pk %}">Full Set</a>
-                    {% for section in work.digital_parts %}
+                        {% endif %}
-                    <a class="tag is-info" href="{% url 'part_download' collection.pk section.pk section.filename %}"
+                        {% regroup sections by tag as unique_sections %}
-                        target="section_{{ section.pk }}" rel="">{{ section.name }}</a>
+                        {% for section in unique_sections %}
-                    {% empty %}
+                        <a class="tag is-info" href="{% url 'work_download' collection.pk work.pk %}?tag={{ section.list.0.tag }}"
-                    <p class="is-italic">No digital parts available</p>
+                            target="section_{{ section.list.0.pk }}" rel="">{{ section.list.0.name }}</a>
-                    {% endfor %}
+                        {% empty %}
                        <p class="is-italic">No digital parts available</p>
                        {% endfor %}
                    {% endwith %}
                </div>
            </div>
        </div>
--- a/app/library/tests.py
+++ b/app/library/tests.py
@ -162,15 +162,15 @@ class LibraryTestCase(AccessTestCase):
            doc.sections.create(tag=g)
        # no tags - get nothing (should it be everything?)
-        self.assertEqual(work.extract(), [])
+        self.assertEqual(work.list_sections(), [])
        # single tag - should get just that range
-        self.assertEqual(work.extract('vl-1'), [('sel/beethoven/some_quartet/some_quartet_vl-1.pdf', None, None)])
+        self.assertEqual(work.list_sections('vl-1'), [('sel/beethoven/some_quartet/some_quartet_vl-1.pdf', None, None)])
        # single tag - returns all documents with that range
-        result = work.extract('mvmt-2')
+        result = work.list_sections('mvmt-2')
        self.assertEqual(len(result), 4)
        # multiple tags - returns the overlapping portion of all documents that have all tags
-        self.assertEqual(work.extract('vl-1', 'mvmt-2'), [('sel/beethoven/some_quartet/some_quartet_vl-1.pdf', 4, 8)])
+        self.assertEqual(work.list_sections('vl-1', 'mvmt-2'), [('sel/beethoven/some_quartet/some_quartet_vl-1.pdf', 4, 8)])
-        self.assertEqual(work.extract('vl-1', 'vl-2'), [])
+        self.assertEqual(work.list_sections('vl-1', 'vl-2'), [])
--- a/app/library/urls.py
+++ b/app/library/urls.py
@ -27,6 +27,7 @@ urlpatterns = [
    path('collections/<int:collection>/works/<int:pk>/partset', views.WorkPartSetView.as_view(), name="work_partset"),
    path('collections/<int:collection>/works/<int:pk>/add_to_project', views.WorkAddToProject.as_view(), name="work_add_to_project"),
    path('collections/<int:collection>/works/<int:pk>/upload', views.WorkAddDocumentView.as_view(), name="document_add"),
    path('collections/<int:collection>/works/<int:pk>/download', views.WorkDownloadView.as_view(), name="work_download"),
    path('collections/<int:collection>/docs/<int:pk>/delete', views.DocumentDeleteView.as_view(), name="document_delete"),
    path('collections/<int:collection>/docs/<int:pk>/download', views.DocumentDownloadView.as_view(), name="document_download"),
--- a/app/library/views/init.py
+++ b/app/library/views/init.py
@ -19,7 +19,7 @@ import re
 from interface.views import EnsembleMixin, ProjectMixin, AuthorizedResourceMixin
 from interface.models import Project
 from library.models import Collection, Work, Document, Section
-from library.music_tags import MUSIC_TAGS, MUSIC_TAG_BY_NAME
+from library.music_tags import MUSIC_TAGS, MUSIC_TAG_BY_NAME, MusicTag
 from library import forms, models
 from library.pdf_utils import extract_pages, extract_and_concat
@ -308,6 +308,38 @@ class WorkPartSetView(CollectionMixin, DetailView):
            works = works.filter(collection__allowed_ensembles__ensemble=self.request.ensemble_id)
        return works
 class WorkDownloadView(CollectionMixin, SingleObjectMixin, View):
    model = models.Work
    def get(self, request, *args, **kwargs):
        self.object = self.get_object()
        tags = request.GET.getlist('tag')
        if not tags:
            raise Http404("No tags given")
        sections = list(self.object.tagged_sections(*tags))
        print(sections)
        if len(sections) == 0:
            raise Http404("No matching sections")
        if len(sections) == 1 and sections[0].start == 0:
            # bypass extraction and redirect to the url
            logger.debug("Redirecting to url")
            return redirect(sections[0].upload.url)
        result = extract_and_concat([ (s.upload.path, s.upload.name, s.start, s.end, 1) for s in sections ])
        tag_names = " - ".join([ str(MusicTag.from_tag(tag)) for tag in tags ])
        download_name = f'{self.object.name} - {tag_names}.pdf'
        response = FileResponse(result, content_type="application/pdf")
        response['Content-Disposition'] = f'inline; filename="{download_name}"'
        return response
 class WorkAddDocumentView(CollectionMixin, CreateView):
    template_name = "interface/default_form.html"
    model = Document
--- a/app/library/views/api.py
+++ b/app/library/views/api.py
@ -35,6 +35,7 @@ class WorkExportView(EnsembleMixin, WorkMixin, View):
 from interface.views import AuthorizedResourceMixin
 from rest_framework import routers, serializers, viewsets
 from rest_framework.exceptions import APIException
 from library.models import Collection, Work, Document, Section, WorkMeta
@ -72,14 +73,12 @@ class SectionSerializer(serializers.ModelSerializer):
    def to_internal_value(self, data):
        tag, start, end = data.split(":")
-        try:
+        start = int(start)
-            start = int(start)
+        end = int(end)
-        except:
+        if start < 1:
-            start = 0
+            start = None
-        try:
+        if end < 1:
-            end = int(end)
+            end = None
        except:
            end = 0
        return super().to_internal_value({'tag': tag, 'start': start, 'end': end})
 class DocumentSerializer(serializers.ModelSerializer):
@ -139,7 +138,9 @@ class WorkSerializer(serializers.ModelSerializer):
                filename = os.path.basename(url.path)
                r = requests.get(d['upload'], stream=True)
-                f = TemporaryUploadedFile(filename, r.headers['content-type'], r.headers['content-length'], r.encoding)
+                if r.status_code != 200:
                    raise APIException("Failed to download file")
                f = TemporaryUploadedFile(filename, r.headers['content-type'], r.headers.get('content-length'), r.encoding)
                shutil.copyfileobj(r.raw, f.file)
                r.close()
                d['upload'] = f