Added handling of gdrive resource_keys - fixes #16

This commit is contained in:
Tris Forster 2026-05-27 22:46:08 +10:00
parent e46d8145a7
commit 5468f6d3e7
2 changed files with 72 additions and 43 deletions

View File

@ -7,7 +7,10 @@ logger = logging.getLogger(__name__)
def sync_work(work: Work):
try:
folder_id = work.meta_info.get(name="folderid").value
except WorkMeta.DoesNotExist as err:
raise IndexError("Work not currently linked to a gdrive folder") from err
logger.info("Syncing '%s' from %r", work.name, folder_id)
@ -15,7 +18,7 @@ def sync_work(work: Work):
existing = set(
[
storage.parse_id(x.partition(":")[2])
storage.parse_resource(x.partition(":")[2]).id
for x in work.docs.values_list("upload", flat=True)
]
)
@ -42,16 +45,19 @@ def sync_work(work: Work):
logger.warning("Local entry not in folder: %s", uri)
def sync_partial_collection(collection: Collection, sync_existing: bool = True):
works = Work.objects.filter(collection=collection, meta_info__name="folderid")
for work in works:
sync_work(work)
def sync_collection(collection: Collection, sync_existing: bool = False):
logger.info("Syncing '%s'", collection)
if not collection.storage.storage.endswith("GDriveLinkStorage"):
raise RuntimeError("Not a gdrive storage")
try:
folder_id = collection.settings["folder_id"]
except KeyError:
raise KeyError("Missing 'folder_id' in settings")
return sync_partial_collection(collection, sync_existing)
existing = dict(
WorkMeta.objects.filter(
@ -59,8 +65,9 @@ def sync_collection(collection: Collection, sync_existing: bool = False):
).values_list("value", "work_id")
)
folder = collection.prefix
storage = collection.storage.instance()
folders, _ = storage.listdir(folder_id)
folders, _ = storage.listdir(folder)
for folder in folders:
if folder[0] == "_":

View File

@ -8,19 +8,26 @@ import logging
logger = logging.getLogger(__name__)
SHARED_FOLDER = re.compile(r"https://drive.google.com/drive[u0-9\/]+folders/([\w\-]+)")
SHARED_FILE = re.compile(r"https://drive.google.com/file/d/([\w\-]+)")
SHARED_FOLDER = re.compile(
r"https://drive.google.com/drive[u0-9\/]+folders/([\w\-]+)(\?resourcekey=([\w\-]+))?"
)
SHARED_FILE = re.compile(
r"https://drive.google.com/file/d/([\w\-]+)(\?resourcekey=([\w\-]+))?"
)
FILES_API = "https://www.googleapis.com/drive/v3/files"
class DriveObject(namedtuple("DriveObject", ("id", "name"))):
class DriveObject(namedtuple("DriveObject", ("id", "key", "name"))):
@classmethod
def from_string(cls, s):
return cls(*s.split("/", 1))
def from_string(cls, s: str):
resource, _, name = s.partition("/")
id, _, key = resource.partition("#")
return cls(id, key, name)
def __str__(self):
return f"{self.id}/{self.name}"
resource = f"{self.id}#{self.key}" if self.key else self.id
return f"{resource}/{self.name}"
class GDriveLinkStorage(Storage):
@ -30,21 +37,28 @@ class GDriveLinkStorage(Storage):
self.api_key = api_key
super().__init__()
def parse_id(self, name):
parts = name.split("/")
return parts[0]
def parse_resource(self, name) -> DriveObject:
return DriveObject.from_string(name)
def extract_id(self, url, *patterns):
logger.debug("EXTRACT_ID: %r", url)
def extract_resource(self, url, *patterns) -> DriveObject:
logger.debug("EXTRACT_RESOURCE: %r", url)
for pattern in patterns:
match = pattern.match(url)
if match:
return match.groups()[0]
groups = match.groups()
logger.debug(groups)
if len(groups) == 3:
return DriveObject(groups[0], groups[2], "")
return DriveObject(groups[0], None, "")
raise FileNotFoundError(f"Not a valid url: {url}")
def get_json(self, url):
logger.debug("GET_JSON: %s", url)
response = requests.get(url)
def get_json(self, url, resource: DriveObject):
headers = {}
if resource.key:
headers["X-Goog-Drive-Resource-Keys"] = f"{resource.id}/{resource.key}"
logger.debug("GET_JSON: %s %r", url, headers)
response = requests.get(url, headers=headers)
data = response.json()
logger.debug("Data: %r", data)
return data
@ -55,31 +69,36 @@ class GDriveLinkStorage(Storage):
if path == "":
return [], []
folder_id = self.parse_id(path)
url = f"{FILES_API}?q='{folder_id}'+in+parents&key={self.api_key}"
data = self.get_json(url)
folder = self.parse_resource(path)
url = f"{FILES_API}?q='{folder.id}'+in+parents&key={self.api_key}"
data = self.get_json(url, folder)
files = []
folders = []
for x in data["files"]:
if x["mimeType"] == "application/vnd.google-apps.folder":
# folders.append(f"{x['id']}/{x['name']}")
folders.append(DriveObject(x["id"], x["name"]))
folders.append(DriveObject(x["id"], x.get("resourceKey"), x["name"]))
else:
# files.append(f"{x['id']}/{x['name']}")
files.append(DriveObject(x["id"], x["name"]))
files.append(DriveObject(x["id"], x.get("resourceKey"), x["name"]))
return folders, files
def get_meta(self, name):
file_id = self.parse_id(name)
url = f"{FILES_API}/{file_id}?key={self.api_key}"
return self.get_json(url)
file_resource = self.parse_resource(name)
url = f"{FILES_API}/{file_resource.id}?key={self.api_key}"
return self.get_json(url, file_resource)
def open(self, name, mode="rb"):
file_id = self.parse_id(name)
url = f"{FILES_API}/{file_id}?alt=media&key={self.api_key}"
logger.info("URL: %s", url)
response = requests.get(url, stream=True)
resource = self.parse_resource(name)
url = f"{FILES_API}/{resource.id}?alt=media&key={self.api_key}"
headers = {}
if resource.key:
headers["X-Goog-Drive-Resource-Keys"] = f"{resource.id}/{resource.key}"
logger.info("URL: %s [%r]", url, headers)
response = requests.get(url, headers=headers, stream=True)
return GzipFile(name, "rb", 9, response.raw)
def size(self, name):
@ -90,25 +109,28 @@ class GDriveLinkStorage(Storage):
def url(self, name):
logger.debug("URL: %r", name)
file_id = self.parse_id(name)
return f"https://drive.usercontent.google.com/download?export=download&id={file_id}&confirm=yes"
resource = self.parse_resource(name)
uri = f"https://drive.usercontent.google.com/download?export=download&id={resource.id}&confirm=yes"
if resource.key:
uri += f"&resourcekey=${resource.key}"
return uri
def get_folder_id(self, url):
try:
return self.extract_id(url, SHARED_FOLDER)
return self.extract_resource(url, SHARED_FOLDER)
except FileNotFoundError:
return None
def get_file_id(self, url):
try:
return self.extract_id(url, SHARED_FILE)
return self.extract_resource(url, SHARED_FILE)
except FileNotFoundError:
return None
def import_link(self, url) -> str:
file_id = self.extract_id(url, SHARED_FILE)
meta = self.get_meta(file_id)
return f"{file_id}/{meta['name']}"
file_resource = self.extract_resource(url, SHARED_FILE)
meta = self.get_meta(file_resource)
return f"{file_resource}/{meta['name']}"
"""