Improved auto tagging

This commit is contained in:
Tris Forster 2023-03-27 10:34:20 +11:00
parent fb700b15d6
commit 04bf1ab1f3
3 changed files with 41 additions and 13 deletions

View File

@ -10,13 +10,15 @@ no Number
""" """
# taken from https://imslp.org/wiki/IMSLP:Abbreviations_for_MusicTags # taken from https://imslp.org/wiki/IMSLP:Abbreviations_for_MusicTags
# Abbreviations at the top will take precidence in reverse lookups # Include any aliases at the top
INSTRUMENTS = """ INSTRUMENTS = """
score Score score Score
cb Double bass cb Double Bass
mall Mallet percussion mall Mallet Percussion
vln Violin
vla Viola
acc Accordion acc Accordion
afl Alto flute afl Alto flute
@ -229,6 +231,35 @@ class MusicTag(namedtuple('MusicTag', ('name', 'variant'), defaults=[None])):
return f"{self.name} {self.variant}" return f"{self.name} {self.variant}"
return self.name return self.name
import re
PATTERNS = [re.compile('([A-Za-z]+)[_\- ]*(\d+)'), re.compile('([A-Za-z]+)()')]
def auto_tag(filename):
'''
>>> auto_tag('Ode to Joy - Violin 1.pdf')
MusicTag(name='Violin', variant=1)
>>> auto_tag('Ode to Joy_Cello.pdf')
MusicTag(name='Cello', variant=None)
>>> auto_tag('Ode to Joy violin - 1.pdf')
MusicTag(name='Violin', variant=1)
>>> auto_tag('Ode to Joy - vla.pdf')
MusicTag(name='Viola', variant=None)
>>> auto_tag('Ode to Joy - fl-2 (piccolo).pdf')
MusicTag(name='Flute', variant=2)
'''
for pattern in PATTERNS:
for inst, ordinal in pattern.findall(filename):
inst = inst.lower()
ordinal = int(ordinal) if ordinal else None
if inst in MUSIC_TAG_BY_NAME:
return MusicTag(inst.title(), ordinal)
if inst in MUSIC_NAME_BY_TAG:
return MusicTag(MUSIC_NAME_BY_TAG[inst], ordinal)
if __name__ == "__main__": if __name__ == "__main__":
import doctest import doctest
print(doctest.testmod()) print(doctest.testmod())

View File

@ -2,6 +2,9 @@
<ul class="menu-list"> <ul class="menu-list">
{% if project %} {% if project %}
<li><a href="{% url 'item_list' project=project.pk %}">My Music</a></li> <li><a href="{% url 'item_list' project=project.pk %}">My Music</a></li>
{% endif %}
{% if collection %}
{% endif %} {% endif %}
{% if request.user.is_authenticated %} {% if request.user.is_authenticated %}
<li><a href="{% url 'work_list' %}">Library</a></li> <li><a href="{% url 'work_list' %}">Library</a></li>

View File

@ -19,7 +19,7 @@ import re
from interface.views import EnsembleMixin, ProjectMixin, AuthorizedResourceMixin from interface.views import EnsembleMixin, ProjectMixin, AuthorizedResourceMixin
from interface.models import Project from interface.models import Project
from library.models import Collection, Work, Document, Section from library.models import Collection, Work, Document, Section
from library.music_tags import MUSIC_TAGS, MUSIC_TAG_BY_NAME, MusicTag from library.music_tags import MUSIC_TAGS, MusicTag, auto_tag
from library import forms, models from library import forms, models
from library.pdf_utils import extract_pages, extract_and_concat from library.pdf_utils import extract_pages, extract_and_concat
@ -381,15 +381,9 @@ class WorkAddDocumentView(CollectionMixin, CreateView):
#name, ext = os.path.splitext(os.path.basename(doc.upload.name)) #name, ext = os.path.splitext(os.path.basename(doc.upload.name))
if doc.doctype == models.Document.DOCTYPE_PDF: if doc.doctype == models.Document.DOCTYPE_PDF:
parts = re.split(r'[^A-Za-z]+', orig_name) inst = auto_tag(orig_name)
parts.reverse() if inst:
for word in parts: doc.sections.create(tag=inst.abbreviate())
try:
tag = MUSIC_TAG_BY_NAME[word.lower()]
doc.sections.create(tag=tag)
break
except KeyError:
pass
if self.request.headers['Accept'] == 'application/json': if self.request.headers['Accept'] == 'application/json':
filename = os.path.basename(doc.upload.name) filename = os.path.basename(doc.upload.name)