2026-05-13 09:32:22 +10:00

76 lines
2.3 KiB
Python

from library.models import Collection, Work, WorkMeta, Document
import logging
logger = logging.getLogger(__name__)
def sync_work(work: Work):
logger.info("Syncing '%s'", work.name)
folder_id = work.meta_info.get(name="folderid").value
storage = work.collection.storage.instance()
prefix = work.collection.storage.name
_, files = storage.listdir(folder_id)
existing = set(
[
storage.parse_id(x.partition(":")[2])
for x in work.docs.values_list("upload", flat=True)
]
)
logger.debug("%d existing documents", len(existing))
for file in files:
if file.id in existing:
logger.debug("%30s: Skipping existing (%s)", file.name, file.id)
existing.discard(file.id)
continue
if not file.name.lower().endswith(".pdf"):
logger.debug("%40s: Not a PDF", file.name)
continue
logger.info("%40s: Adding", file.name)
doc = work.docs.create(upload=f"{prefix}:{file}", doctype=Document.DOCTYPE_PDF)
doc.auto_tag()
for uri in existing:
logger.warning("Local entry not in folder: %s", uri)
def sync_collection(collection: Collection, sync_existing: bool = False):
logger.info("Syncing '%s'", collection)
if not collection.storage.storage.endswith("GDriveLinkStorage"):
raise RuntimeError("Not a gdrive storage")
if not collection.prefix:
raise KeyError("Prefix must store folder id")
existing = dict(
WorkMeta.objects.filter(
work__collection=collection, name="folderid"
).values_list("value", "work_id")
)
storage = collection.storage.instance()
folders, _ = storage.listdir(collection.prefix)
for folder in folders:
if folder.id in existing:
if sync_existing:
logger.info("%40s: Syncing (%s)", folder.name, folder.id[:12])
sync_work(Work.objects.get(pk=existing[folder.id]))
del existing[folder.id]
continue
logger.info("%40s: Adding", folder.name)
work = Work(name=folder.name, collection=collection)
work.save()
work.meta_info.create(name="folderid", value=folder.id)
sync_work(work)
for folderid, work in existing:
logger.warning("Folder for work %d no longer in drive (%s)", work, folderid)