Added handling of gdrive resource_keys - fixes #16
This commit is contained in:
parent
e46d8145a7
commit
5468f6d3e7
@ -7,7 +7,10 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def sync_work(work: Work):
|
||||
folder_id = work.meta_info.get(name="folderid").value
|
||||
try:
|
||||
folder_id = work.meta_info.get(name="folderid").value
|
||||
except WorkMeta.DoesNotExist as err:
|
||||
raise IndexError("Work not currently linked to a gdrive folder") from err
|
||||
|
||||
logger.info("Syncing '%s' from %r", work.name, folder_id)
|
||||
|
||||
@ -15,7 +18,7 @@ def sync_work(work: Work):
|
||||
|
||||
existing = set(
|
||||
[
|
||||
storage.parse_id(x.partition(":")[2])
|
||||
storage.parse_resource(x.partition(":")[2]).id
|
||||
for x in work.docs.values_list("upload", flat=True)
|
||||
]
|
||||
)
|
||||
@ -42,16 +45,19 @@ def sync_work(work: Work):
|
||||
logger.warning("Local entry not in folder: %s", uri)
|
||||
|
||||
|
||||
def sync_partial_collection(collection: Collection, sync_existing: bool = True):
|
||||
|
||||
works = Work.objects.filter(collection=collection, meta_info__name="folderid")
|
||||
|
||||
for work in works:
|
||||
sync_work(work)
|
||||
|
||||
|
||||
def sync_collection(collection: Collection, sync_existing: bool = False):
|
||||
logger.info("Syncing '%s'", collection)
|
||||
|
||||
if not collection.storage.storage.endswith("GDriveLinkStorage"):
|
||||
raise RuntimeError("Not a gdrive storage")
|
||||
|
||||
try:
|
||||
folder_id = collection.settings["folder_id"]
|
||||
except KeyError:
|
||||
raise KeyError("Missing 'folder_id' in settings")
|
||||
return sync_partial_collection(collection, sync_existing)
|
||||
|
||||
existing = dict(
|
||||
WorkMeta.objects.filter(
|
||||
@ -59,8 +65,9 @@ def sync_collection(collection: Collection, sync_existing: bool = False):
|
||||
).values_list("value", "work_id")
|
||||
)
|
||||
|
||||
folder = collection.prefix
|
||||
storage = collection.storage.instance()
|
||||
folders, _ = storage.listdir(folder_id)
|
||||
folders, _ = storage.listdir(folder)
|
||||
|
||||
for folder in folders:
|
||||
if folder[0] == "_":
|
||||
|
||||
@ -8,19 +8,26 @@ import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SHARED_FOLDER = re.compile(r"https://drive.google.com/drive[u0-9\/]+folders/([\w\-]+)")
|
||||
SHARED_FILE = re.compile(r"https://drive.google.com/file/d/([\w\-]+)")
|
||||
SHARED_FOLDER = re.compile(
|
||||
r"https://drive.google.com/drive[u0-9\/]+folders/([\w\-]+)(\?resourcekey=([\w\-]+))?"
|
||||
)
|
||||
SHARED_FILE = re.compile(
|
||||
r"https://drive.google.com/file/d/([\w\-]+)(\?resourcekey=([\w\-]+))?"
|
||||
)
|
||||
|
||||
FILES_API = "https://www.googleapis.com/drive/v3/files"
|
||||
|
||||
|
||||
class DriveObject(namedtuple("DriveObject", ("id", "name"))):
|
||||
class DriveObject(namedtuple("DriveObject", ("id", "key", "name"))):
|
||||
@classmethod
|
||||
def from_string(cls, s):
|
||||
return cls(*s.split("/", 1))
|
||||
def from_string(cls, s: str):
|
||||
resource, _, name = s.partition("/")
|
||||
id, _, key = resource.partition("#")
|
||||
return cls(id, key, name)
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.id}/{self.name}"
|
||||
resource = f"{self.id}#{self.key}" if self.key else self.id
|
||||
return f"{resource}/{self.name}"
|
||||
|
||||
|
||||
class GDriveLinkStorage(Storage):
|
||||
@ -30,21 +37,28 @@ class GDriveLinkStorage(Storage):
|
||||
self.api_key = api_key
|
||||
super().__init__()
|
||||
|
||||
def parse_id(self, name):
|
||||
parts = name.split("/")
|
||||
return parts[0]
|
||||
def parse_resource(self, name) -> DriveObject:
|
||||
return DriveObject.from_string(name)
|
||||
|
||||
def extract_id(self, url, *patterns):
|
||||
logger.debug("EXTRACT_ID: %r", url)
|
||||
def extract_resource(self, url, *patterns) -> DriveObject:
|
||||
logger.debug("EXTRACT_RESOURCE: %r", url)
|
||||
for pattern in patterns:
|
||||
match = pattern.match(url)
|
||||
if match:
|
||||
return match.groups()[0]
|
||||
groups = match.groups()
|
||||
logger.debug(groups)
|
||||
if len(groups) == 3:
|
||||
return DriveObject(groups[0], groups[2], "")
|
||||
return DriveObject(groups[0], None, "")
|
||||
raise FileNotFoundError(f"Not a valid url: {url}")
|
||||
|
||||
def get_json(self, url):
|
||||
logger.debug("GET_JSON: %s", url)
|
||||
response = requests.get(url)
|
||||
def get_json(self, url, resource: DriveObject):
|
||||
headers = {}
|
||||
if resource.key:
|
||||
headers["X-Goog-Drive-Resource-Keys"] = f"{resource.id}/{resource.key}"
|
||||
|
||||
logger.debug("GET_JSON: %s %r", url, headers)
|
||||
response = requests.get(url, headers=headers)
|
||||
data = response.json()
|
||||
logger.debug("Data: %r", data)
|
||||
return data
|
||||
@ -55,31 +69,36 @@ class GDriveLinkStorage(Storage):
|
||||
if path == "":
|
||||
return [], []
|
||||
|
||||
folder_id = self.parse_id(path)
|
||||
url = f"{FILES_API}?q='{folder_id}'+in+parents&key={self.api_key}"
|
||||
data = self.get_json(url)
|
||||
folder = self.parse_resource(path)
|
||||
url = f"{FILES_API}?q='{folder.id}'+in+parents&key={self.api_key}"
|
||||
data = self.get_json(url, folder)
|
||||
files = []
|
||||
folders = []
|
||||
for x in data["files"]:
|
||||
if x["mimeType"] == "application/vnd.google-apps.folder":
|
||||
# folders.append(f"{x['id']}/{x['name']}")
|
||||
folders.append(DriveObject(x["id"], x["name"]))
|
||||
folders.append(DriveObject(x["id"], x.get("resourceKey"), x["name"]))
|
||||
else:
|
||||
# files.append(f"{x['id']}/{x['name']}")
|
||||
files.append(DriveObject(x["id"], x["name"]))
|
||||
files.append(DriveObject(x["id"], x.get("resourceKey"), x["name"]))
|
||||
|
||||
return folders, files
|
||||
|
||||
def get_meta(self, name):
|
||||
file_id = self.parse_id(name)
|
||||
url = f"{FILES_API}/{file_id}?key={self.api_key}"
|
||||
return self.get_json(url)
|
||||
file_resource = self.parse_resource(name)
|
||||
url = f"{FILES_API}/{file_resource.id}?key={self.api_key}"
|
||||
return self.get_json(url, file_resource)
|
||||
|
||||
def open(self, name, mode="rb"):
|
||||
file_id = self.parse_id(name)
|
||||
url = f"{FILES_API}/{file_id}?alt=media&key={self.api_key}"
|
||||
logger.info("URL: %s", url)
|
||||
response = requests.get(url, stream=True)
|
||||
resource = self.parse_resource(name)
|
||||
url = f"{FILES_API}/{resource.id}?alt=media&key={self.api_key}"
|
||||
|
||||
headers = {}
|
||||
if resource.key:
|
||||
headers["X-Goog-Drive-Resource-Keys"] = f"{resource.id}/{resource.key}"
|
||||
logger.info("URL: %s [%r]", url, headers)
|
||||
|
||||
response = requests.get(url, headers=headers, stream=True)
|
||||
return GzipFile(name, "rb", 9, response.raw)
|
||||
|
||||
def size(self, name):
|
||||
@ -90,25 +109,28 @@ class GDriveLinkStorage(Storage):
|
||||
|
||||
def url(self, name):
|
||||
logger.debug("URL: %r", name)
|
||||
file_id = self.parse_id(name)
|
||||
return f"https://drive.usercontent.google.com/download?export=download&id={file_id}&confirm=yes"
|
||||
resource = self.parse_resource(name)
|
||||
uri = f"https://drive.usercontent.google.com/download?export=download&id={resource.id}&confirm=yes"
|
||||
if resource.key:
|
||||
uri += f"&resourcekey=${resource.key}"
|
||||
return uri
|
||||
|
||||
def get_folder_id(self, url):
|
||||
try:
|
||||
return self.extract_id(url, SHARED_FOLDER)
|
||||
return self.extract_resource(url, SHARED_FOLDER)
|
||||
except FileNotFoundError:
|
||||
return None
|
||||
|
||||
def get_file_id(self, url):
|
||||
try:
|
||||
return self.extract_id(url, SHARED_FILE)
|
||||
return self.extract_resource(url, SHARED_FILE)
|
||||
except FileNotFoundError:
|
||||
return None
|
||||
|
||||
def import_link(self, url) -> str:
|
||||
file_id = self.extract_id(url, SHARED_FILE)
|
||||
meta = self.get_meta(file_id)
|
||||
return f"{file_id}/{meta['name']}"
|
||||
file_resource = self.extract_resource(url, SHARED_FILE)
|
||||
meta = self.get_meta(file_resource)
|
||||
return f"{file_resource}/{meta['name']}"
|
||||
|
||||
|
||||
"""
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user