Files
Cloud-Index/storages/onedrive.py

687 lines
24 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from datetime import datetime, timedelta
from io import BytesIO
from typing import Any, Dict, Optional
import requests
from PIL import Image
from config import Config
from .base import BaseStorage
class _InvalidGrant(Exception):
"""内部异常:用于标记 invalid_grant 以便进行下一次尝试"""
pass
class OnedriveStorage(BaseStorage):
"""基于 OneDrive 的存储实现,支持自动令牌刷新"""
def __init__(self):
"""初始化 OneDrive 存储客户端"""
self.client_id = Config.ONEDRIVE_CLIENT_ID
self.client_secret = Config.ONEDRIVE_CLIENT_SECRET
self.refresh_token = Config.ONEDRIVE_REFRESH_TOKEN
self.folder_id = Config.ONEDRIVE_FOLDER_ID
self.graph_api_url = "https://graph.microsoft.com/v1.0"
if not (self.client_id and self.client_secret and self.refresh_token):
raise RuntimeError("ONEDRIVE_CLIENT_ID, ONEDRIVE_CLIENT_SECRET, and ONEDRIVE_REFRESH_TOKEN must be set")
# 如果没有指定 folder_id使用 /me/drive/root
self.folder_item_id = self.folder_id or "root"
# 初始化 access_token 并刷新
self.access_token = None
self._refresh_token()
def _refresh_token(self) -> None:
"""刷新 OneDrive 访问令牌"""
configured_scopes = getattr(Config, "ONEDRIVE_SCOPES", None)
attempts = [None] + ([configured_scopes] if configured_scopes else []) + ["Files.ReadWrite.All offline_access"]
token_json = self._perform_refresh_attempts(attempts)
self.access_token = token_json["access_token"]
new_refresh = token_json.get("refresh_token")
if new_refresh:
self.refresh_token = new_refresh
self._access_token_expires_in = token_json.get("expires_in")
def _perform_refresh_attempts(self, attempts: list) -> dict:
errors: list[str] = []
for idx, scope in enumerate(attempts, start=1):
try:
token_json = self._do_refresh_attempt(scope)
return token_json
except _InvalidGrant as e:
errors.append(f"Attempt {idx} invalid_grant: {str(e)}")
continue
raise RuntimeError("Failed to refresh OneDrive token: " + " | ".join(errors))
def _do_refresh_attempt(self, scope: str | None) -> dict:
url = "https://login.microsoftonline.com/common/oauth2/v2.0/token"
headers = {"Content-Type": "application/x-www-form-urlencoded"}
payload = {
"client_id": self.client_id,
"grant_type": "refresh_token",
"refresh_token": self.refresh_token,
}
if scope:
payload["scope"] = scope
if self.client_secret:
payload["client_secret"] = self.client_secret
redirect_uri = getattr(Config, "ONEDRIVE_REDIRECT_URI", None)
if redirect_uri:
payload["redirect_uri"] = redirect_uri
resp = requests.post(url, data=payload, headers=headers, timeout=20)
try:
detail = resp.json()
except Exception:
detail = resp.text
if resp.status_code != 200:
# 如果是 invalid_grant抛出内部异常以触发下一次尝试
if isinstance(detail, dict) and detail.get("error") == "invalid_grant":
raise _InvalidGrant(detail)
raise RuntimeError(f"Token endpoint error {resp.status_code}: {detail}")
if isinstance(detail, dict) and detail.get("error"):
if detail.get("error") == "invalid_grant":
raise _InvalidGrant(detail)
raise RuntimeError(f"Token response error: {detail}")
token_json = detail if isinstance(detail, dict) else {}
access_token = token_json.get("access_token")
if not access_token:
# 视为无效,交给下一次尝试
raise _InvalidGrant({"error": "missing_access_token", "detail": detail})
return token_json
def _try_refresh(self, func):
"""尝试执行函数,如果失败则刷新令牌后重试(处理令牌过期)"""
try:
return func()
except Exception as e:
if "401" in str(e) or "Unauthorized" in str(e):
# 令牌过期,刷新并重试
self._refresh_token()
return func()
raise
def _api_request(self, method: str, url: str, **kwargs):
"""
执行 API 请求,自动处理令牌过期
Args:
method: HTTP 方法 (GET, POST, PUT, DELETE, PATCH)
url: API URL
**kwargs: 其他请求参数
Returns:
响应对象
"""
def _do_request():
response = requests.request(method, url, headers=self._headers(), **kwargs)
if response.status_code == 401:
raise RuntimeError("Unauthorized - OneDrive access token expired")
return response
return self._try_refresh(_do_request)
def _headers(self) -> Dict[str, str]:
"""返回 API 请求的公共头部信息"""
return {
"Authorization": f"Bearer {self.access_token}",
"Content-Type": "application/json",
}
def _verify_connection(self) -> None:
"""验证 OneDrive 连接是否有效"""
try:
url = f"{self.graph_api_url}/me/drive"
response = requests.get(url, headers=self._headers(), timeout=10)
if response.status_code != 200:
raise RuntimeError(f"OneDrive connection failed: {response.text}")
except Exception as e:
raise RuntimeError(f"Failed to connect to OneDrive: {str(e)}") from None
def _get_folder_id(self, path: str) -> Optional[str]:
"""获取指定路径的文件夹 ID"""
if not path or path == "/":
return self.folder_item_id
try:
# 直接获取该路径对应项的元数据(而不是其子项),以拿到该文件夹自身的 id
path = path.strip("/")
if self.folder_item_id == "root":
url = f"{self.graph_api_url}/me/drive/root:/{path}:"
else:
url = f"{self.graph_api_url}/me/drive/items/{self.folder_item_id}:/{path}:"
response = requests.get(url, headers=self._headers(), timeout=10)
if response.status_code == 200:
item = response.json()
if item.get("folder"):
return item.get("id")
except Exception:
pass
return None
def list_objects(self, prefix: str = "") -> Dict[str, Any]:
"""
列出存储桶中的对象
Args:
prefix: 对象前缀(用于目录浏览)
Returns:
包含对象列表的字典
"""
try:
prefix = prefix.rstrip("/") if prefix else ""
# 直接基于路径列出,避免先查 ID 再列出造成的额外往返
select = "$select=name,size,lastModifiedDateTime,id,folder,file"
if prefix:
if self.folder_item_id == "root":
url = f"{self.graph_api_url}/me/drive/root:/{prefix}:/children?{select}"
else:
url = f"{self.graph_api_url}/me/drive/items/{self.folder_item_id}:/{prefix}:/children?{select}"
else:
if self.folder_item_id == "root":
url = f"{self.graph_api_url}/me/drive/root/children?{select}"
else:
url = f"{self.graph_api_url}/me/drive/items/{self.folder_item_id}/children?{select}"
response = self._api_request("GET", url)
response.raise_for_status()
items = response.json().get("value", [])
files = []
folders = []
for item in items:
# 跳过特殊文件
if item.get("name", "").startswith("."):
continue
item_path = f"{prefix}/{item.get('name')}" if prefix else item.get("name")
if "folder" in item:
# 这是一个文件夹
folders.append({"Prefix": f"{item_path}/"})
else:
# 这是一个文件
size = item.get("size", 0)
modified_time = item.get("lastModifiedDateTime", datetime.now().isoformat())
# 解析 ISO 格式时间
try:
if isinstance(modified_time, str):
modified_time = datetime.fromisoformat(modified_time.replace("Z", "+00:00"))
except Exception:
modified_time = datetime.now()
files.append(
{
"Key": item_path,
"Size": size,
"LastModified": modified_time,
"ETag": item.get("id", ""),
}
)
return {
"Contents": sorted(files, key=lambda x: x["Key"]),
"CommonPrefixes": sorted(folders, key=lambda x: x["Prefix"]),
}
except Exception as e:
import traceback
traceback.print_exc()
raise RuntimeError(f"Failed to list OneDrive objects: {str(e)}") from None
def get_object_info(self, key: str) -> Dict[str, Any]:
"""
获取对象基本信息
Args:
key: 对象键名
Returns:
对象元数据
"""
try:
key = key.strip("/")
url = f"{self.graph_api_url}/me/drive/items/{self.folder_item_id}:/{key}:"
response = self._api_request("GET", url)
response.raise_for_status()
item = response.json()
return {
"Key": key,
"Size": item.get("size", 0),
"LastModified": item.get("lastModifiedDateTime", datetime.now().isoformat()),
"ETag": item.get("id", ""),
}
except Exception as e:
raise RuntimeError(f"Failed to get OneDrive object info: {str(e)}") from None
def get_object(self, key: str) -> Dict[str, Any]:
"""
获取对象内容
Args:
key: 对象键名
Returns:
包含对象内容的字典
"""
try:
key = key.strip("/")
# 获取下载 URL
url = f"{self.graph_api_url}/me/drive/items/{self.folder_item_id}:/{key}:/content"
response = self._api_request("GET", url)
response.raise_for_status()
return {
"Body": response.content,
"ContentType": response.headers.get("Content-Type", "application/octet-stream"),
}
except Exception as e:
raise RuntimeError(f"Failed to get OneDrive object: {str(e)}") from None
def generate_presigned_url(self, key: str, expires: int = None) -> str:
"""
为指定对象生成预签名 URL
Args:
key: 对象键名
expires: 过期时间(秒)
Returns:
预签名 URL失败返回 None
"""
try:
expires = expires or Config.PRESIGNED_URL_EXPIRES
key = key.strip("/")
# OneDrive 会自动处理预签名 URL
url = f"{self.graph_api_url}/me/drive/items/{self.folder_item_id}:/{key}:/createLink"
request_body = {
"type": "view",
"expirationDateTime": (datetime.utcnow() + timedelta(seconds=expires)).isoformat() + "Z",
}
response = requests.post(url, headers=self._headers(), json=request_body)
if response.status_code == 201:
return response.json().get("link", {}).get("webUrl")
return None
except Exception:
return None
def get_public_url(self, key: str) -> str:
"""
生成对象的公共访问 URL
Args:
key: 对象键名
Returns:
公共 URL未配置返回 None
"""
try:
key = key.strip("/")
# OneDrive 的共享 URL
url = f"{self.graph_api_url}/me/drive/items/{self.folder_item_id}:/{key}:/webUrl"
response = requests.get(url, headers=self._headers())
if response.status_code == 200:
return response.json().get("webUrl")
return None
except Exception:
return None
def upload_file(self, key: str, file_data: bytes, content_type: str = None) -> bool:
"""
上传文件
Args:
key: 对象键名
file_data: 文件内容
content_type: 文件类型MIME type可选
Returns:
上传成功返回 True失败返回 False
"""
try:
key = key.strip("/")
url = f"{self.graph_api_url}/me/drive/items/{self.folder_item_id}:/{key}:/content"
# 自定义头部(因为我们需要指定 Content-Type
headers = {
"Authorization": f"Bearer {self.access_token}",
"Content-Type": content_type or "application/octet-stream",
}
response = requests.put(url, headers=headers, data=file_data)
response.raise_for_status()
return response.status_code == 200 or response.status_code == 201
except Exception as e:
raise RuntimeError(
f"Failed to upload file to OneDrive: {str(e)}" if isinstance(e, Exception) else str(e)
) from None
def delete_file(self, key: str) -> bool:
"""
删除文件
Args:
key: 对象键名
Returns:
删除成功返回 True失败返回 False
"""
try:
key = key.strip("/")
url = f"{self.graph_api_url}/me/drive/items/{self.folder_item_id}:/{key}:"
response = self._api_request("DELETE", url)
return response.status_code == 204
except Exception as e:
raise RuntimeError(f"Failed to delete file from OneDrive: {str(e)}") from None
def copy_file(self, source_key: str, dest_key: str) -> bool:
"""
复制文件
Args:
source_key: 源文件键名
dest_key: 目标文件键名
Returns:
复制成功返回 True失败返回 False
"""
try:
source = source_key.strip("/")
destination = dest_key.strip("/")
# 获取源文件 ID
source_url = f"{self.graph_api_url}/me/drive/items/{self.folder_item_id}:/{source}:"
source_response = self._api_request("GET", source_url)
source_response.raise_for_status()
source_item = source_response.json()
# 复制文件
copy_url = f"{self.graph_api_url}/me/drive/items/{source_item.get('id')}/copy"
copy_body = {
"parentReference": {"id": self.folder_item_id},
"name": destination.split("/")[-1],
}
copy_response = self._api_request("POST", copy_url, json=copy_body)
copy_response.raise_for_status()
return copy_response.status_code == 202 or copy_response.status_code == 200
except Exception as e:
raise RuntimeError(f"Failed to copy file in OneDrive: {str(e)}") from None
def generate_thumbnail(self, file_path: str) -> bytes:
"""
生成图片缩略图
Args:
file_path: 文件路径
Returns:
缩略图字节数据
"""
try:
file_path = file_path.strip("/")
# 获取缩略图 URL
url = f"{self.graph_api_url}/me/drive/items/{self.folder_item_id}:/{file_path}:/thumbnails"
response = requests.get(url, headers=self._headers())
response.raise_for_status()
thumbnails = response.json().get("value", [])
if thumbnails:
thumb_set = thumbnails[0]
# 获取中等大小的缩略图
thumb_url = thumb_set.get("c", {}).get("url") or thumb_set.get("m", {}).get("url")
if thumb_url:
thumb_response = requests.get(thumb_url)
if thumb_response.status_code == 200:
return thumb_response.content
# 如果没有缩略图,尝试生成一个
return self._generate_fallback_thumbnail(file_path)
except Exception:
return None
def _generate_fallback_thumbnail(self, file_path: str) -> bytes:
"""
生成备用缩略图(当 OneDrive 没有缩略图时)
Args:
file_path: 文件路径
Returns:
缩略图字节数据
"""
try:
file_obj = self.get_object(file_path)
img = Image.open(BytesIO(file_obj["Body"]))
# 调整大小
img.thumbnail(Config.THUMB_SIZE, Image.Resampling.LANCZOS)
# 保存为 PNG
thumb_buffer = BytesIO()
img.save(thumb_buffer, format="PNG")
return thumb_buffer.getvalue()
except Exception:
return None
def rename_file(self, old_key: str, new_key: str) -> bool:
"""
重命名文件
Args:
old_key: 旧的文件键名
new_key: 新的文件键名
Returns:
重命名成功返回 True失败返回 False
"""
try:
old_key = old_key.strip("/")
new_key = new_key.strip("/")
# 获取旧文件的 ID
url = f"{self.graph_api_url}/me/drive/items/{self.folder_item_id}:/{old_key}:"
response = self._api_request("GET", url)
response.raise_for_status()
item_id = response.json().get("id")
# 更新文件名
update_url = f"{self.graph_api_url}/me/drive/items/{item_id}"
update_body = {"name": new_key.split("/")[-1]}
response = self._api_request("PATCH", update_url, json=update_body)
response.raise_for_status()
return response.status_code == 200
except Exception as e:
raise RuntimeError(
f"Failed to rename file in OneDrive: {str(e)}" if isinstance(e, Exception) else str(e)
) from None
def delete_folder(self, prefix: str) -> bool:
"""
删除文件夹及其中的所有文件
Args:
prefix: 文件夹前缀
Returns:
删除成功返回 True失败返回 False
"""
try:
prefix = prefix.rstrip("/")
# 获取文件夹中的所有项目
items = self.list_objects(prefix)
# 删除所有文件
for file in items.get("Contents", []):
self.delete_file(file["Key"])
# 删除所有子文件夹
for folder in items.get("CommonPrefixes", []):
self.delete_folder(folder["Prefix"])
# 删除文件夹本身
url = f"{self.graph_api_url}/me/drive/items/{self.folder_item_id}:/{prefix}:"
response = self._api_request("DELETE", url)
return response.status_code == 204
except Exception as e:
raise RuntimeError(
f"Failed to delete folder from OneDrive: {str(e)}" if isinstance(e, Exception) else str(e)
) from None
def rename_folder(self, old_prefix: str, new_prefix: str) -> bool:
"""
重命名文件夹
Args:
old_prefix: 旧的文件夹前缀
new_prefix: 新的文件夹前缀
Returns:
重命名成功返回 True失败返回 False
"""
try:
old_prefix = old_prefix.rstrip("/")
new_prefix = new_prefix.rstrip("/")
# 获取旧文件夹的 ID
url = f"{self.graph_api_url}/me/drive/items/{self.folder_item_id}:/{old_prefix}:"
response = self._api_request("GET", url)
response.raise_for_status()
item_id = response.json().get("id")
# 更新文件夹名
update_url = f"{self.graph_api_url}/me/drive/items/{item_id}"
update_body = {"name": new_prefix.split("/")[-1]}
response = self._api_request("PATCH", update_url, json=update_body)
response.raise_for_status()
return response.status_code == 200
except Exception as e:
raise RuntimeError(
f"Failed to rename folder in OneDrive: {str(e)}" if isinstance(e, Exception) else str(e)
) from None
def copy_folder(self, source_prefix: str, dest_prefix: str) -> bool:
"""
复制文件夹及其中的所有文件
Args:
source_prefix: 源文件夹前缀
dest_prefix: 目标文件夹前缀
Returns:
复制成功返回 True失败返回 False
"""
try:
source_prefix = source_prefix.rstrip("/")
dest_prefix = dest_prefix.rstrip("/")
# 创建目标文件夹
self.create_folder(dest_prefix + "/")
# 获取源文件夹中的所有项目
items = self.list_objects(source_prefix)
# 复制所有文件
for file in items.get("Contents", []):
source_key = file["Key"]
# 保持相对路径
relative_path = source_key[len(source_prefix) + 1 :]
dest_key = f"{dest_prefix}/{relative_path}"
self.copy_file(source_key, dest_key)
# 递归复制子文件夹
for folder in items.get("CommonPrefixes", []):
source_folder = folder["Prefix"].rstrip("/")
relative_folder = source_folder[len(source_prefix) + 1 :]
dest_folder = f"{dest_prefix}/{relative_folder}"
self.copy_folder(source_folder, dest_folder)
return True
except Exception as e:
raise RuntimeError(
f"Failed to copy folder in OneDrive: {str(e)}" if isinstance(e, Exception) else str(e)
) from None
def create_folder(self, key: str) -> bool:
"""
创建文件夹
Args:
key: 文件夹路径(以 / 结尾)
Returns:
创建成功返回 True失败返回 False
"""
try:
key = key.rstrip("/")
# 获取父文件夹 ID
parts = key.split("/")
parent_id = self.folder_item_id
# 创建每一层文件夹
for _, folder_name in enumerate(parts):
# 检查文件夹是否已存在
existing_url = f"{self.graph_api_url}/me/drive/items/{parent_id}:/{folder_name}:"
existing_response = self._api_request("GET", existing_url)
if existing_response.status_code == 200:
parent_id = existing_response.json().get("id")
continue
# 创建新文件夹
create_url = f"{self.graph_api_url}/me/drive/items/{parent_id}/children"
create_body = {"name": folder_name, "folder": {}}
create_response = self._api_request("POST", create_url, json=create_body)
create_response.raise_for_status()
parent_id = create_response.json().get("id")
return True
except Exception as e:
raise RuntimeError(
f"Failed to create folder in OneDrive: {str(e)}" if isinstance(e, Exception) else str(e)
) from None