Added document extractor

This commit is contained in:
Tris Forster 2023-03-02 10:05:26 +11:00
parent a892b0bc41
commit b1eaf9c7bc
8 changed files with 77 additions and 31 deletions

View File

@ -202,13 +202,14 @@ class Work(models.Model):
def folder(self):
return f"{slugify(self.composer)}/{slugify(self.name)}-{self.pk:04d}"
def extract(self, *tags):
def tagged_sections(self, *tags):
qs = self.docs.filter(sections__tag__in=tags)
qs = qs.annotate(Count('sections'), end=Min('sections__end'), start=Max('sections__start')) \
.filter(sections__count=len(tags))
return list(qs.values_list('upload', 'start', 'end'))
return qs
def list_sections(self, *tags):
return list(self.tagged_sections(*tags).values_list('upload', 'start', 'end'))
@property
def digital_parts(self):
@ -276,6 +277,14 @@ class Work(models.Model):
assigned = set(self.assigned_instruments())
return [ x for x in self.orchestration.as_list() if not x[0] in assigned ]
def music_tags(self):
tags = dict(self.orchestration.as_list())
for section in Section.objects.filter(doc__work_id=self.pk):
tags.setdefault(section.tag, section.name)
return tags.items()
def __str__(self):
return f"{self.name} ({self.composer})"

View File

@ -24,12 +24,12 @@ def extract_and_concat(items):
if count == 0:
continue
if start is None:
if not start:
sections.append(source)
else:
if end is None:
if not end:
end = start
dest = os.path.join(d.name, f'section_{i}.pdf')

View File

@ -75,7 +75,7 @@
</div>
</div>
<ul id="unassigned-area">
{% for tag, inst in document.work.unassigned_instruments %}
{% for tag, inst in document.work.music_tags %}
<li class="is-clickable" onclick="assignInstrument('{{tag}}', this)")>{{ inst }}</li>
{% endfor %}
<li><a onclick="document.getElementById('add-modal').classList.add('is-active')">Add instrument</a></li>

View File

@ -87,15 +87,18 @@
Digital Parts
</h4>
<div class="tags">
{% if work.digital_parts %}
<a class="tag is-danger" href="{% url 'work_partset' collection.pk work.pk %}">Full Set</a>
{% endif %}
{% for section in work.digital_parts %}
<a class="tag is-info" href="{% url 'part_download' collection.pk section.pk section.filename %}"
target="section_{{ section.pk }}" rel="">{{ section.name }}</a>
{% empty %}
<p class="is-italic">No digital parts available</p>
{% endfor %}
{% with sections=work.digital_parts %}
{% if sections %}
<a class="tag is-danger" href="{% url 'work_partset' collection.pk work.pk %}">Full Set</a>
{% endif %}
{% regroup sections by tag as unique_sections %}
{% for section in unique_sections %}
<a class="tag is-info" href="{% url 'work_download' collection.pk work.pk %}?tag={{ section.list.0.tag }}"
target="section_{{ section.list.0.pk }}" rel="">{{ section.list.0.name }}</a>
{% empty %}
<p class="is-italic">No digital parts available</p>
{% endfor %}
{% endwith %}
</div>
</div>
</div>

View File

@ -162,15 +162,15 @@ class LibraryTestCase(AccessTestCase):
doc.sections.create(tag=g)
# no tags - get nothing (should it be everything?)
self.assertEqual(work.extract(), [])
self.assertEqual(work.list_sections(), [])
# single tag - should get just that range
self.assertEqual(work.extract('vl-1'), [('sel/beethoven/some_quartet/some_quartet_vl-1.pdf', None, None)])
self.assertEqual(work.list_sections('vl-1'), [('sel/beethoven/some_quartet/some_quartet_vl-1.pdf', None, None)])
# single tag - returns all documents with that range
result = work.extract('mvmt-2')
result = work.list_sections('mvmt-2')
self.assertEqual(len(result), 4)
# multiple tags - returns the overlapping portion of all documents that have all tags
self.assertEqual(work.extract('vl-1', 'mvmt-2'), [('sel/beethoven/some_quartet/some_quartet_vl-1.pdf', 4, 8)])
self.assertEqual(work.extract('vl-1', 'vl-2'), [])
self.assertEqual(work.list_sections('vl-1', 'mvmt-2'), [('sel/beethoven/some_quartet/some_quartet_vl-1.pdf', 4, 8)])
self.assertEqual(work.list_sections('vl-1', 'vl-2'), [])

View File

@ -27,6 +27,7 @@ urlpatterns = [
path('collections/<int:collection>/works/<int:pk>/partset', views.WorkPartSetView.as_view(), name="work_partset"),
path('collections/<int:collection>/works/<int:pk>/add_to_project', views.WorkAddToProject.as_view(), name="work_add_to_project"),
path('collections/<int:collection>/works/<int:pk>/upload', views.WorkAddDocumentView.as_view(), name="document_add"),
path('collections/<int:collection>/works/<int:pk>/download', views.WorkDownloadView.as_view(), name="work_download"),
path('collections/<int:collection>/docs/<int:pk>/delete', views.DocumentDeleteView.as_view(), name="document_delete"),
path('collections/<int:collection>/docs/<int:pk>/download', views.DocumentDownloadView.as_view(), name="document_download"),

View File

@ -19,7 +19,7 @@ import re
from interface.views import EnsembleMixin, ProjectMixin, AuthorizedResourceMixin
from interface.models import Project
from library.models import Collection, Work, Document, Section
from library.music_tags import MUSIC_TAGS, MUSIC_TAG_BY_NAME
from library.music_tags import MUSIC_TAGS, MUSIC_TAG_BY_NAME, MusicTag
from library import forms, models
from library.pdf_utils import extract_pages, extract_and_concat
@ -308,6 +308,38 @@ class WorkPartSetView(CollectionMixin, DetailView):
works = works.filter(collection__allowed_ensembles__ensemble=self.request.ensemble_id)
return works
class WorkDownloadView(CollectionMixin, SingleObjectMixin, View):
model = models.Work
def get(self, request, *args, **kwargs):
self.object = self.get_object()
tags = request.GET.getlist('tag')
if not tags:
raise Http404("No tags given")
sections = list(self.object.tagged_sections(*tags))
print(sections)
if len(sections) == 0:
raise Http404("No matching sections")
if len(sections) == 1 and sections[0].start == 0:
# bypass extraction and redirect to the url
logger.debug("Redirecting to url")
return redirect(sections[0].upload.url)
result = extract_and_concat([ (s.upload.path, s.upload.name, s.start, s.end, 1) for s in sections ])
tag_names = " - ".join([ str(MusicTag.from_tag(tag)) for tag in tags ])
download_name = f'{self.object.name} - {tag_names}.pdf'
response = FileResponse(result, content_type="application/pdf")
response['Content-Disposition'] = f'inline; filename="{download_name}"'
return response
class WorkAddDocumentView(CollectionMixin, CreateView):
template_name = "interface/default_form.html"
model = Document

View File

@ -35,6 +35,7 @@ class WorkExportView(EnsembleMixin, WorkMixin, View):
from interface.views import AuthorizedResourceMixin
from rest_framework import routers, serializers, viewsets
from rest_framework.exceptions import APIException
from library.models import Collection, Work, Document, Section, WorkMeta
@ -72,14 +73,12 @@ class SectionSerializer(serializers.ModelSerializer):
def to_internal_value(self, data):
tag, start, end = data.split(":")
try:
start = int(start)
except:
start = 0
try:
end = int(end)
except:
end = 0
start = int(start)
end = int(end)
if start < 1:
start = None
if end < 1:
end = None
return super().to_internal_value({'tag': tag, 'start': start, 'end': end})
class DocumentSerializer(serializers.ModelSerializer):
@ -139,7 +138,9 @@ class WorkSerializer(serializers.ModelSerializer):
filename = os.path.basename(url.path)
r = requests.get(d['upload'], stream=True)
f = TemporaryUploadedFile(filename, r.headers['content-type'], r.headers['content-length'], r.encoding)
if r.status_code != 200:
raise APIException("Failed to download file")
f = TemporaryUploadedFile(filename, r.headers['content-type'], r.headers.get('content-length'), r.encoding)
shutil.copyfileobj(r.raw, f.file)
r.close()
d['upload'] = f