Added document extractor

This commit is contained in:
Tris Forster 2023-03-02 10:05:26 +11:00
parent a892b0bc41
commit b1eaf9c7bc
8 changed files with 77 additions and 31 deletions

View File

@ -202,13 +202,14 @@ class Work(models.Model):
def folder(self): def folder(self):
return f"{slugify(self.composer)}/{slugify(self.name)}-{self.pk:04d}" return f"{slugify(self.composer)}/{slugify(self.name)}-{self.pk:04d}"
def extract(self, *tags): def tagged_sections(self, *tags):
qs = self.docs.filter(sections__tag__in=tags) qs = self.docs.filter(sections__tag__in=tags)
qs = qs.annotate(Count('sections'), end=Min('sections__end'), start=Max('sections__start')) \ qs = qs.annotate(Count('sections'), end=Min('sections__end'), start=Max('sections__start')) \
.filter(sections__count=len(tags)) .filter(sections__count=len(tags))
return qs
return list(qs.values_list('upload', 'start', 'end'))
def list_sections(self, *tags):
return list(self.tagged_sections(*tags).values_list('upload', 'start', 'end'))
@property @property
def digital_parts(self): def digital_parts(self):
@ -276,6 +277,14 @@ class Work(models.Model):
assigned = set(self.assigned_instruments()) assigned = set(self.assigned_instruments())
return [ x for x in self.orchestration.as_list() if not x[0] in assigned ] return [ x for x in self.orchestration.as_list() if not x[0] in assigned ]
def music_tags(self):
tags = dict(self.orchestration.as_list())
for section in Section.objects.filter(doc__work_id=self.pk):
tags.setdefault(section.tag, section.name)
return tags.items()
def __str__(self): def __str__(self):
return f"{self.name} ({self.composer})" return f"{self.name} ({self.composer})"

View File

@ -24,12 +24,12 @@ def extract_and_concat(items):
if count == 0: if count == 0:
continue continue
if start is None: if not start:
sections.append(source) sections.append(source)
else: else:
if end is None: if not end:
end = start end = start
dest = os.path.join(d.name, f'section_{i}.pdf') dest = os.path.join(d.name, f'section_{i}.pdf')

View File

@ -75,7 +75,7 @@
</div> </div>
</div> </div>
<ul id="unassigned-area"> <ul id="unassigned-area">
{% for tag, inst in document.work.unassigned_instruments %} {% for tag, inst in document.work.music_tags %}
<li class="is-clickable" onclick="assignInstrument('{{tag}}', this)")>{{ inst }}</li> <li class="is-clickable" onclick="assignInstrument('{{tag}}', this)")>{{ inst }}</li>
{% endfor %} {% endfor %}
<li><a onclick="document.getElementById('add-modal').classList.add('is-active')">Add instrument</a></li> <li><a onclick="document.getElementById('add-modal').classList.add('is-active')">Add instrument</a></li>

View File

@ -87,15 +87,18 @@
Digital Parts Digital Parts
</h4> </h4>
<div class="tags"> <div class="tags">
{% if work.digital_parts %} {% with sections=work.digital_parts %}
<a class="tag is-danger" href="{% url 'work_partset' collection.pk work.pk %}">Full Set</a> {% if sections %}
{% endif %} <a class="tag is-danger" href="{% url 'work_partset' collection.pk work.pk %}">Full Set</a>
{% for section in work.digital_parts %} {% endif %}
<a class="tag is-info" href="{% url 'part_download' collection.pk section.pk section.filename %}" {% regroup sections by tag as unique_sections %}
target="section_{{ section.pk }}" rel="">{{ section.name }}</a> {% for section in unique_sections %}
{% empty %} <a class="tag is-info" href="{% url 'work_download' collection.pk work.pk %}?tag={{ section.list.0.tag }}"
<p class="is-italic">No digital parts available</p> target="section_{{ section.list.0.pk }}" rel="">{{ section.list.0.name }}</a>
{% endfor %} {% empty %}
<p class="is-italic">No digital parts available</p>
{% endfor %}
{% endwith %}
</div> </div>
</div> </div>
</div> </div>

View File

@ -162,15 +162,15 @@ class LibraryTestCase(AccessTestCase):
doc.sections.create(tag=g) doc.sections.create(tag=g)
# no tags - get nothing (should it be everything?) # no tags - get nothing (should it be everything?)
self.assertEqual(work.extract(), []) self.assertEqual(work.list_sections(), [])
# single tag - should get just that range # single tag - should get just that range
self.assertEqual(work.extract('vl-1'), [('sel/beethoven/some_quartet/some_quartet_vl-1.pdf', None, None)]) self.assertEqual(work.list_sections('vl-1'), [('sel/beethoven/some_quartet/some_quartet_vl-1.pdf', None, None)])
# single tag - returns all documents with that range # single tag - returns all documents with that range
result = work.extract('mvmt-2') result = work.list_sections('mvmt-2')
self.assertEqual(len(result), 4) self.assertEqual(len(result), 4)
# multiple tags - returns the overlapping portion of all documents that have all tags # multiple tags - returns the overlapping portion of all documents that have all tags
self.assertEqual(work.extract('vl-1', 'mvmt-2'), [('sel/beethoven/some_quartet/some_quartet_vl-1.pdf', 4, 8)]) self.assertEqual(work.list_sections('vl-1', 'mvmt-2'), [('sel/beethoven/some_quartet/some_quartet_vl-1.pdf', 4, 8)])
self.assertEqual(work.extract('vl-1', 'vl-2'), []) self.assertEqual(work.list_sections('vl-1', 'vl-2'), [])

View File

@ -27,6 +27,7 @@ urlpatterns = [
path('collections/<int:collection>/works/<int:pk>/partset', views.WorkPartSetView.as_view(), name="work_partset"), path('collections/<int:collection>/works/<int:pk>/partset', views.WorkPartSetView.as_view(), name="work_partset"),
path('collections/<int:collection>/works/<int:pk>/add_to_project', views.WorkAddToProject.as_view(), name="work_add_to_project"), path('collections/<int:collection>/works/<int:pk>/add_to_project', views.WorkAddToProject.as_view(), name="work_add_to_project"),
path('collections/<int:collection>/works/<int:pk>/upload', views.WorkAddDocumentView.as_view(), name="document_add"), path('collections/<int:collection>/works/<int:pk>/upload', views.WorkAddDocumentView.as_view(), name="document_add"),
path('collections/<int:collection>/works/<int:pk>/download', views.WorkDownloadView.as_view(), name="work_download"),
path('collections/<int:collection>/docs/<int:pk>/delete', views.DocumentDeleteView.as_view(), name="document_delete"), path('collections/<int:collection>/docs/<int:pk>/delete', views.DocumentDeleteView.as_view(), name="document_delete"),
path('collections/<int:collection>/docs/<int:pk>/download', views.DocumentDownloadView.as_view(), name="document_download"), path('collections/<int:collection>/docs/<int:pk>/download', views.DocumentDownloadView.as_view(), name="document_download"),

View File

@ -19,7 +19,7 @@ import re
from interface.views import EnsembleMixin, ProjectMixin, AuthorizedResourceMixin from interface.views import EnsembleMixin, ProjectMixin, AuthorizedResourceMixin
from interface.models import Project from interface.models import Project
from library.models import Collection, Work, Document, Section from library.models import Collection, Work, Document, Section
from library.music_tags import MUSIC_TAGS, MUSIC_TAG_BY_NAME from library.music_tags import MUSIC_TAGS, MUSIC_TAG_BY_NAME, MusicTag
from library import forms, models from library import forms, models
from library.pdf_utils import extract_pages, extract_and_concat from library.pdf_utils import extract_pages, extract_and_concat
@ -308,6 +308,38 @@ class WorkPartSetView(CollectionMixin, DetailView):
works = works.filter(collection__allowed_ensembles__ensemble=self.request.ensemble_id) works = works.filter(collection__allowed_ensembles__ensemble=self.request.ensemble_id)
return works return works
class WorkDownloadView(CollectionMixin, SingleObjectMixin, View):
model = models.Work
def get(self, request, *args, **kwargs):
self.object = self.get_object()
tags = request.GET.getlist('tag')
if not tags:
raise Http404("No tags given")
sections = list(self.object.tagged_sections(*tags))
print(sections)
if len(sections) == 0:
raise Http404("No matching sections")
if len(sections) == 1 and sections[0].start == 0:
# bypass extraction and redirect to the url
logger.debug("Redirecting to url")
return redirect(sections[0].upload.url)
result = extract_and_concat([ (s.upload.path, s.upload.name, s.start, s.end, 1) for s in sections ])
tag_names = " - ".join([ str(MusicTag.from_tag(tag)) for tag in tags ])
download_name = f'{self.object.name} - {tag_names}.pdf'
response = FileResponse(result, content_type="application/pdf")
response['Content-Disposition'] = f'inline; filename="{download_name}"'
return response
class WorkAddDocumentView(CollectionMixin, CreateView): class WorkAddDocumentView(CollectionMixin, CreateView):
template_name = "interface/default_form.html" template_name = "interface/default_form.html"
model = Document model = Document

View File

@ -35,6 +35,7 @@ class WorkExportView(EnsembleMixin, WorkMixin, View):
from interface.views import AuthorizedResourceMixin from interface.views import AuthorizedResourceMixin
from rest_framework import routers, serializers, viewsets from rest_framework import routers, serializers, viewsets
from rest_framework.exceptions import APIException
from library.models import Collection, Work, Document, Section, WorkMeta from library.models import Collection, Work, Document, Section, WorkMeta
@ -72,14 +73,12 @@ class SectionSerializer(serializers.ModelSerializer):
def to_internal_value(self, data): def to_internal_value(self, data):
tag, start, end = data.split(":") tag, start, end = data.split(":")
try: start = int(start)
start = int(start) end = int(end)
except: if start < 1:
start = 0 start = None
try: if end < 1:
end = int(end) end = None
except:
end = 0
return super().to_internal_value({'tag': tag, 'start': start, 'end': end}) return super().to_internal_value({'tag': tag, 'start': start, 'end': end})
class DocumentSerializer(serializers.ModelSerializer): class DocumentSerializer(serializers.ModelSerializer):
@ -139,7 +138,9 @@ class WorkSerializer(serializers.ModelSerializer):
filename = os.path.basename(url.path) filename = os.path.basename(url.path)
r = requests.get(d['upload'], stream=True) r = requests.get(d['upload'], stream=True)
f = TemporaryUploadedFile(filename, r.headers['content-type'], r.headers['content-length'], r.encoding) if r.status_code != 200:
raise APIException("Failed to download file")
f = TemporaryUploadedFile(filename, r.headers['content-type'], r.headers.get('content-length'), r.encoding)
shutil.copyfileobj(r.raw, f.file) shutil.copyfileobj(r.raw, f.file)
r.close() r.close()
d['upload'] = f d['upload'] = f