From 5468f6d3e7c722bed5349936297b07494d1b747e Mon Sep 17 00:00:00 2001 From: Tris Forster Date: Wed, 27 May 2026 22:46:08 +1000 Subject: [PATCH] Added handling of gdrive resource_keys - fixes #16 --- polyphonic/library/gdrive/__init__.py | 25 +++++--- polyphonic/library/gdrive/storage.py | 90 +++++++++++++++++---------- 2 files changed, 72 insertions(+), 43 deletions(-) diff --git a/polyphonic/library/gdrive/__init__.py b/polyphonic/library/gdrive/__init__.py index 0f1df82..f1205e2 100644 --- a/polyphonic/library/gdrive/__init__.py +++ b/polyphonic/library/gdrive/__init__.py @@ -7,7 +7,10 @@ logger = logging.getLogger(__name__) def sync_work(work: Work): - folder_id = work.meta_info.get(name="folderid").value + try: + folder_id = work.meta_info.get(name="folderid").value + except WorkMeta.DoesNotExist as err: + raise IndexError("Work not currently linked to a gdrive folder") from err logger.info("Syncing '%s' from %r", work.name, folder_id) @@ -15,7 +18,7 @@ def sync_work(work: Work): existing = set( [ - storage.parse_id(x.partition(":")[2]) + storage.parse_resource(x.partition(":")[2]).id for x in work.docs.values_list("upload", flat=True) ] ) @@ -42,16 +45,19 @@ def sync_work(work: Work): logger.warning("Local entry not in folder: %s", uri) +def sync_partial_collection(collection: Collection, sync_existing: bool = True): + + works = Work.objects.filter(collection=collection, meta_info__name="folderid") + + for work in works: + sync_work(work) + + def sync_collection(collection: Collection, sync_existing: bool = False): logger.info("Syncing '%s'", collection) if not collection.storage.storage.endswith("GDriveLinkStorage"): - raise RuntimeError("Not a gdrive storage") - - try: - folder_id = collection.settings["folder_id"] - except KeyError: - raise KeyError("Missing 'folder_id' in settings") + return sync_partial_collection(collection, sync_existing) existing = dict( WorkMeta.objects.filter( @@ -59,8 +65,9 @@ def sync_collection(collection: Collection, sync_existing: bool = False): ).values_list("value", "work_id") ) + folder = collection.prefix storage = collection.storage.instance() - folders, _ = storage.listdir(folder_id) + folders, _ = storage.listdir(folder) for folder in folders: if folder[0] == "_": diff --git a/polyphonic/library/gdrive/storage.py b/polyphonic/library/gdrive/storage.py index 548bbbf..b3d9f08 100644 --- a/polyphonic/library/gdrive/storage.py +++ b/polyphonic/library/gdrive/storage.py @@ -8,19 +8,26 @@ import logging logger = logging.getLogger(__name__) -SHARED_FOLDER = re.compile(r"https://drive.google.com/drive[u0-9\/]+folders/([\w\-]+)") -SHARED_FILE = re.compile(r"https://drive.google.com/file/d/([\w\-]+)") +SHARED_FOLDER = re.compile( + r"https://drive.google.com/drive[u0-9\/]+folders/([\w\-]+)(\?resourcekey=([\w\-]+))?" +) +SHARED_FILE = re.compile( + r"https://drive.google.com/file/d/([\w\-]+)(\?resourcekey=([\w\-]+))?" +) FILES_API = "https://www.googleapis.com/drive/v3/files" -class DriveObject(namedtuple("DriveObject", ("id", "name"))): +class DriveObject(namedtuple("DriveObject", ("id", "key", "name"))): @classmethod - def from_string(cls, s): - return cls(*s.split("/", 1)) + def from_string(cls, s: str): + resource, _, name = s.partition("/") + id, _, key = resource.partition("#") + return cls(id, key, name) def __str__(self): - return f"{self.id}/{self.name}" + resource = f"{self.id}#{self.key}" if self.key else self.id + return f"{resource}/{self.name}" class GDriveLinkStorage(Storage): @@ -30,21 +37,28 @@ class GDriveLinkStorage(Storage): self.api_key = api_key super().__init__() - def parse_id(self, name): - parts = name.split("/") - return parts[0] + def parse_resource(self, name) -> DriveObject: + return DriveObject.from_string(name) - def extract_id(self, url, *patterns): - logger.debug("EXTRACT_ID: %r", url) + def extract_resource(self, url, *patterns) -> DriveObject: + logger.debug("EXTRACT_RESOURCE: %r", url) for pattern in patterns: match = pattern.match(url) if match: - return match.groups()[0] + groups = match.groups() + logger.debug(groups) + if len(groups) == 3: + return DriveObject(groups[0], groups[2], "") + return DriveObject(groups[0], None, "") raise FileNotFoundError(f"Not a valid url: {url}") - def get_json(self, url): - logger.debug("GET_JSON: %s", url) - response = requests.get(url) + def get_json(self, url, resource: DriveObject): + headers = {} + if resource.key: + headers["X-Goog-Drive-Resource-Keys"] = f"{resource.id}/{resource.key}" + + logger.debug("GET_JSON: %s %r", url, headers) + response = requests.get(url, headers=headers) data = response.json() logger.debug("Data: %r", data) return data @@ -55,31 +69,36 @@ class GDriveLinkStorage(Storage): if path == "": return [], [] - folder_id = self.parse_id(path) - url = f"{FILES_API}?q='{folder_id}'+in+parents&key={self.api_key}" - data = self.get_json(url) + folder = self.parse_resource(path) + url = f"{FILES_API}?q='{folder.id}'+in+parents&key={self.api_key}" + data = self.get_json(url, folder) files = [] folders = [] for x in data["files"]: if x["mimeType"] == "application/vnd.google-apps.folder": # folders.append(f"{x['id']}/{x['name']}") - folders.append(DriveObject(x["id"], x["name"])) + folders.append(DriveObject(x["id"], x.get("resourceKey"), x["name"])) else: # files.append(f"{x['id']}/{x['name']}") - files.append(DriveObject(x["id"], x["name"])) + files.append(DriveObject(x["id"], x.get("resourceKey"), x["name"])) return folders, files def get_meta(self, name): - file_id = self.parse_id(name) - url = f"{FILES_API}/{file_id}?key={self.api_key}" - return self.get_json(url) + file_resource = self.parse_resource(name) + url = f"{FILES_API}/{file_resource.id}?key={self.api_key}" + return self.get_json(url, file_resource) def open(self, name, mode="rb"): - file_id = self.parse_id(name) - url = f"{FILES_API}/{file_id}?alt=media&key={self.api_key}" - logger.info("URL: %s", url) - response = requests.get(url, stream=True) + resource = self.parse_resource(name) + url = f"{FILES_API}/{resource.id}?alt=media&key={self.api_key}" + + headers = {} + if resource.key: + headers["X-Goog-Drive-Resource-Keys"] = f"{resource.id}/{resource.key}" + logger.info("URL: %s [%r]", url, headers) + + response = requests.get(url, headers=headers, stream=True) return GzipFile(name, "rb", 9, response.raw) def size(self, name): @@ -90,25 +109,28 @@ class GDriveLinkStorage(Storage): def url(self, name): logger.debug("URL: %r", name) - file_id = self.parse_id(name) - return f"https://drive.usercontent.google.com/download?export=download&id={file_id}&confirm=yes" + resource = self.parse_resource(name) + uri = f"https://drive.usercontent.google.com/download?export=download&id={resource.id}&confirm=yes" + if resource.key: + uri += f"&resourcekey=${resource.key}" + return uri def get_folder_id(self, url): try: - return self.extract_id(url, SHARED_FOLDER) + return self.extract_resource(url, SHARED_FOLDER) except FileNotFoundError: return None def get_file_id(self, url): try: - return self.extract_id(url, SHARED_FILE) + return self.extract_resource(url, SHARED_FILE) except FileNotFoundError: return None def import_link(self, url) -> str: - file_id = self.extract_id(url, SHARED_FILE) - meta = self.get_meta(file_id) - return f"{file_id}/{meta['name']}" + file_resource = self.extract_resource(url, SHARED_FILE) + meta = self.get_meta(file_resource) + return f"{file_resource}/{meta['name']}" """