76 lines
2.3 KiB
Python
76 lines
2.3 KiB
Python
from library.models import Collection, Work, WorkMeta, Document
|
|
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def sync_work(work: Work):
|
|
logger.info("Syncing '%s'", work.name)
|
|
folder_id = work.meta_info.get(name="folderid").value
|
|
|
|
storage = work.collection.storage.instance()
|
|
prefix = work.collection.storage.name
|
|
_, files = storage.listdir(folder_id)
|
|
|
|
existing = set(
|
|
[
|
|
storage.parse_id(x.partition(":")[2])
|
|
for x in work.docs.values_list("upload", flat=True)
|
|
]
|
|
)
|
|
logger.debug("%d existing documents", len(existing))
|
|
|
|
for file in files:
|
|
if file.id in existing:
|
|
logger.debug("%30s: Skipping existing (%s)", file.name, file.id)
|
|
existing.discard(file.id)
|
|
continue
|
|
|
|
if not file.name.lower().endswith(".pdf"):
|
|
logger.debug("%40s: Not a PDF", file.name)
|
|
continue
|
|
|
|
logger.info("%40s: Adding", file.name)
|
|
doc = work.docs.create(upload=f"{prefix}:{file}", doctype=Document.DOCTYPE_PDF)
|
|
doc.auto_tag()
|
|
|
|
for uri in existing:
|
|
logger.warning("Local entry not in folder: %s", uri)
|
|
|
|
|
|
def sync_collection(collection: Collection, sync_existing: bool = False):
|
|
logger.info("Syncing '%s'", collection)
|
|
|
|
if not collection.storage.storage.endswith("GDriveLinkStorage"):
|
|
raise RuntimeError("Not a gdrive storage")
|
|
|
|
if not collection.prefix:
|
|
raise KeyError("Prefix must store folder id")
|
|
|
|
existing = dict(
|
|
WorkMeta.objects.filter(
|
|
work__collection=collection, name="folderid"
|
|
).values_list("value", "work_id")
|
|
)
|
|
|
|
storage = collection.storage.instance()
|
|
folders, _ = storage.listdir(collection.prefix)
|
|
|
|
for folder in folders:
|
|
if folder.id in existing:
|
|
if sync_existing:
|
|
logger.info("%40s: Syncing (%s)", folder.name, folder.id[:12])
|
|
sync_work(Work.objects.get(pk=existing[folder.id]))
|
|
del existing[folder.id]
|
|
continue
|
|
|
|
logger.info("%40s: Adding", folder.name)
|
|
work = Work(name=folder.name, collection=collection)
|
|
work.save()
|
|
work.meta_info.create(name="folderid", value=folder.id)
|
|
sync_work(work)
|
|
|
|
for folderid, work in existing:
|
|
logger.warning("Folder for work %d no longer in drive (%s)", work, folderid)
|