v0.2.7: 修复Redis连接 + 启动管理后台
- 修复Redis认证 (配置密码) - 启动Python管理后台 (端口9531, 15个功能开关) - 统一版本号 0.2.7 - 更新docker-compose.yml (镜像版本/Redis URL/Admin服务)
This commit is contained in:
1
cloudsearch_transfer/adapter/__init__.py
Normal file
1
cloudsearch_transfer/adapter/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""CloudSearch Transfer — 适配器包"""
|
||||
297
cloudsearch_transfer/adapter/aliyun/__init__.py
Normal file
297
cloudsearch_transfer/adapter/aliyun/__init__.py
Normal file
@@ -0,0 +1,297 @@
|
||||
"""
|
||||
阿里云盘适配器 v1.0.0
|
||||
AliyunAdapter — 继承 BaseCloudDriveAdapter,实现阿里云盘全部转存能力。
|
||||
|
||||
组件:
|
||||
- AliyunCredentialManager: refresh_token 刷新 + 缓存
|
||||
- AliyunTransfer: 4 步批量转存
|
||||
- AliyunCleanup: 回收站清理
|
||||
|
||||
URL 匹配: aliyundrive.com/s/<share_id>
|
||||
"""
|
||||
|
||||
import re
|
||||
import logging
|
||||
from typing import List, Dict, Tuple, Optional
|
||||
|
||||
from ..base import BaseCloudDriveAdapter, FileInfo, match_url
|
||||
from ..config import PlatformConfig, TransferConfig
|
||||
from ..errors import TransferError, TransferErrorCode
|
||||
|
||||
from .credential import AliyunCredentialManager
|
||||
from .transfer import AliyunTransfer
|
||||
from .cleanup import AliyunCleanup
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AliyunAdapter(BaseCloudDriveAdapter):
|
||||
"""阿里云盘适配器"""
|
||||
|
||||
PLATFORM_NAME = "阿里云盘"
|
||||
PLATFORM_KEY = "aliyun"
|
||||
|
||||
URL_PATTERNS = [
|
||||
r'aliyundrive\.com/s/([a-zA-Z0-9]+)',
|
||||
r'alipan\.com/s/([a-zA-Z0-9]+)',
|
||||
]
|
||||
|
||||
DEFAULT_HEADERS = {
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/135.0.0.0 Safari/537.36"
|
||||
),
|
||||
"Accept": "application/json, text/plain, */*",
|
||||
"Content-Type": "application/json",
|
||||
"Referer": "https://aliyundrive.com",
|
||||
}
|
||||
|
||||
def __init__(self, config: PlatformConfig, transfer_config: TransferConfig):
|
||||
super().__init__(config, transfer_config)
|
||||
|
||||
# 创建凭证管理器(AliyunCredentialManager)
|
||||
refresh_token = config.refresh_token or config.cookie or ""
|
||||
self._credential = AliyunCredentialManager(refresh_token=refresh_token)
|
||||
|
||||
# 初始化 drive_id
|
||||
self._drive_id = ""
|
||||
|
||||
# 创建子模块
|
||||
self._transfer: Optional[AliyunTransfer] = None
|
||||
self._cleanup: Optional[AliyunCleanup] = None
|
||||
|
||||
def _setup_session(self):
|
||||
"""初始化 session 和凭证"""
|
||||
if self._credential.refresh_token:
|
||||
# 验证 refresh_token 并获取 drive_id
|
||||
if self._credential.validate():
|
||||
self._drive_id = self._credential.get_drive_id()
|
||||
logger.info(
|
||||
f"[AliyunAdapter] 凭证验证成功, drive_id={self._drive_id[:8]}..."
|
||||
)
|
||||
else:
|
||||
logger.warning("[AliyunAdapter] 凭证验证失败,转存功能可能不可用")
|
||||
else:
|
||||
logger.warning("[AliyunAdapter] 未配置 refresh_token")
|
||||
|
||||
# ─── 核心抽象方法实现 ──────────────────────────────────
|
||||
|
||||
def _get_share_detail(self, pwd_id: str, passcode: str = "") -> dict:
|
||||
"""
|
||||
获取分享详情。
|
||||
步骤①②: 先获取匿名分享信息,再获取 share_token。
|
||||
|
||||
Returns:
|
||||
{
|
||||
"title": "分享标题",
|
||||
"share_id": "...",
|
||||
"share_token": "...",
|
||||
"files": [{"file_id": "...", "name": "...", "size": 0, "type": "file"}, ...],
|
||||
}
|
||||
"""
|
||||
try:
|
||||
transfer = self._get_transfer()
|
||||
|
||||
# ① 获取分享信息(匿名)
|
||||
share_info = transfer._get_share_info(pwd_id)
|
||||
if not share_info:
|
||||
raise TransferError(
|
||||
TransferErrorCode.SHARE_NOT_EXIST,
|
||||
platform=self.PLATFORM_KEY,
|
||||
)
|
||||
|
||||
# ② 获取分享令牌(Auth)
|
||||
share_token = transfer._get_share_token(pwd_id, passcode)
|
||||
if not share_token:
|
||||
raise TransferError(
|
||||
TransferErrorCode.PASSCODE_WRONG if passcode else TransferErrorCode.SHARE_NOT_EXIST,
|
||||
platform=self.PLATFORM_KEY,
|
||||
message="获取分享令牌失败(可能需要提取码)",
|
||||
)
|
||||
|
||||
return {
|
||||
"title": share_info.get("share_name", share_info.get("share_title", "")),
|
||||
"share_id": pwd_id,
|
||||
"share_token": share_token,
|
||||
"files": share_info.get("file_infos", []),
|
||||
"creator_name": share_info.get("creator_name", ""),
|
||||
}
|
||||
|
||||
except TransferError:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.exception(f"[AliyunAdapter] 获取分享详情失败: {e}")
|
||||
raise TransferError(
|
||||
TransferErrorCode.NETWORK_ERROR,
|
||||
message=str(e),
|
||||
platform=self.PLATFORM_KEY,
|
||||
)
|
||||
|
||||
def _save_files(self, pwd_id: str, detail: dict, save_dir: str) -> List[str]:
|
||||
"""
|
||||
步骤③: 批量复制文件到自己的网盘。
|
||||
|
||||
Args:
|
||||
pwd_id: 分享 ID
|
||||
detail: _get_share_detail 的返回值
|
||||
save_dir: 目标目录(根目录用 "root")
|
||||
|
||||
Returns:
|
||||
新文件 ID 列表
|
||||
"""
|
||||
share_token = detail.get("share_token", "")
|
||||
files = detail.get("files", [])
|
||||
|
||||
if not share_token:
|
||||
raise TransferError(
|
||||
TransferErrorCode.SHARE_NOT_EXIST,
|
||||
message="缺少 share_token",
|
||||
platform=self.PLATFORM_KEY,
|
||||
)
|
||||
|
||||
if not files:
|
||||
raise TransferError(
|
||||
TransferErrorCode.RESOURCE_EMPTY,
|
||||
platform=self.PLATFORM_KEY,
|
||||
)
|
||||
|
||||
file_ids = [f.get("file_id", "") for f in files if f.get("file_id")]
|
||||
if not file_ids:
|
||||
raise TransferError(
|
||||
TransferErrorCode.RESOURCE_EMPTY,
|
||||
message="无法提取文件 ID",
|
||||
platform=self.PLATFORM_KEY,
|
||||
)
|
||||
|
||||
# 确定目标目录
|
||||
to_parent = save_dir if save_dir and save_dir != "/" else "root"
|
||||
|
||||
transfer = self._get_transfer()
|
||||
new_ids = transfer._batch_copy(pwd_id, share_token, file_ids, to_parent)
|
||||
|
||||
if not new_ids:
|
||||
raise TransferError(
|
||||
TransferErrorCode.NETWORK_ERROR,
|
||||
message="批量转存失败,所有文件复制均失败",
|
||||
platform=self.PLATFORM_KEY,
|
||||
)
|
||||
|
||||
return new_ids
|
||||
|
||||
def _create_share(
|
||||
self, file_ids: List[str], title: str, password: str = ""
|
||||
) -> Tuple[str, str]:
|
||||
"""
|
||||
步骤④: 创建新分享链接。
|
||||
|
||||
Returns:
|
||||
(share_url, share_password)
|
||||
"""
|
||||
if not file_ids:
|
||||
raise TransferError(
|
||||
TransferErrorCode.RESOURCE_EMPTY,
|
||||
platform=self.PLATFORM_KEY,
|
||||
)
|
||||
|
||||
transfer = self._get_transfer()
|
||||
result = transfer._create_share(file_ids, password)
|
||||
|
||||
share_url = result.get("share_url", "")
|
||||
share_pwd = result.get("share_pwd", password)
|
||||
|
||||
if not share_url:
|
||||
raise TransferError(
|
||||
TransferErrorCode.SHARE_LINK_FAIL,
|
||||
message="创建分享链接失败",
|
||||
platform=self.PLATFORM_KEY,
|
||||
)
|
||||
|
||||
return share_url, share_pwd
|
||||
|
||||
def get_files(self, parent_fid: str = "0") -> List[FileInfo]:
|
||||
"""
|
||||
列出网盘目录下的文件。
|
||||
|
||||
NOTE: 当前实现为占位。如需完整功能,请调用阿里云盘 /adrive/v3/file/list API。
|
||||
"""
|
||||
logger.warning("[AliyunAdapter] get_files() 未完整实现,返回空列表")
|
||||
return []
|
||||
|
||||
def delete(self, file_ids: List[str]) -> bool:
|
||||
"""
|
||||
删除文件(移入回收站)。
|
||||
|
||||
Args:
|
||||
file_ids: 要删除的文件 ID 列表
|
||||
|
||||
Returns:
|
||||
是否全部删除成功
|
||||
"""
|
||||
if not file_ids:
|
||||
return True
|
||||
|
||||
cleanup = self._get_cleanup()
|
||||
result = cleanup.delete_files(file_ids)
|
||||
return result.get("success", False)
|
||||
|
||||
# ─── 扩展功能 ──────────────────────────────────────────
|
||||
|
||||
def cleanup_files(self, file_ids: List[str]) -> Dict:
|
||||
"""
|
||||
清理文件(移入回收站),返回详细结果。
|
||||
|
||||
Returns:
|
||||
AliyunCleanup.delete_files() 的返回字典
|
||||
"""
|
||||
cleanup = self._get_cleanup()
|
||||
return cleanup.delete_files(file_ids)
|
||||
|
||||
def force_refresh_token(self) -> bool:
|
||||
"""强制刷新 access_token"""
|
||||
return self._credential.refresh()
|
||||
|
||||
def get_credential_status(self) -> Dict:
|
||||
"""获取当前凭证状态"""
|
||||
return self._credential.to_dict()
|
||||
|
||||
# ─── 文件列表提取 ──────────────────────────────────────
|
||||
|
||||
def _extract_file_list(self, detail: dict) -> List[FileInfo]:
|
||||
"""从分享详情中提取 FileInfo 列表"""
|
||||
files = detail.get("files", [])
|
||||
result = []
|
||||
for f in files:
|
||||
result.append(FileInfo(
|
||||
fid=f.get("file_id", ""),
|
||||
name=f.get("name", ""),
|
||||
size=int(f.get("size", 0)),
|
||||
is_dir=f.get("type", "") == "folder",
|
||||
ext=f.get("file_extension", ""),
|
||||
))
|
||||
return result
|
||||
|
||||
# ─── 内部辅助方法 ──────────────────────────────────────
|
||||
|
||||
def _get_transfer(self) -> AliyunTransfer:
|
||||
"""懒加载获取 AliyunTransfer 实例"""
|
||||
if self._transfer is None:
|
||||
drive_id = self._drive_id or self._credential.get_drive_id()
|
||||
self._transfer = AliyunTransfer(
|
||||
credential=self._credential,
|
||||
drive_id=drive_id,
|
||||
to_parent_file_id=self.config.save_dir or "root",
|
||||
request_timeout=self.transfer_config.request_timeout,
|
||||
)
|
||||
return self._transfer
|
||||
|
||||
def _get_cleanup(self) -> AliyunCleanup:
|
||||
"""懒加载获取 AliyunCleanup 实例"""
|
||||
if self._cleanup is None:
|
||||
drive_id = self._drive_id or self._credential.get_drive_id()
|
||||
self._cleanup = AliyunCleanup(
|
||||
credential=self._credential,
|
||||
drive_id=drive_id,
|
||||
request_timeout=self.transfer_config.request_timeout,
|
||||
)
|
||||
return self._cleanup
|
||||
203
cloudsearch_transfer/adapter/aliyun/cleanup.py
Normal file
203
cloudsearch_transfer/adapter/aliyun/cleanup.py
Normal file
@@ -0,0 +1,203 @@
|
||||
"""
|
||||
阿里云盘回收站清理模块 v1.0.0
|
||||
将文件移入回收站(非直接删除),支持批量操作。
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List, Dict
|
||||
|
||||
import requests
|
||||
|
||||
from .credential import AliyunCredentialManager, API_HOST
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ─── API 端点 ──────────────────────────────────────────────
|
||||
|
||||
# 批量操作(v4)
|
||||
BATCH_URL = f"{API_HOST}/adrive/v4/batch"
|
||||
|
||||
# 默认请求头
|
||||
DEFAULT_HEADERS = {
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/135.0.0.0 Safari/537.36"
|
||||
),
|
||||
"Accept": "application/json, text/plain, */*",
|
||||
"Content-Type": "application/json",
|
||||
"Referer": "https://aliyundrive.com",
|
||||
}
|
||||
|
||||
|
||||
class AliyunCleanup:
|
||||
"""
|
||||
阿里云盘回收站清理
|
||||
|
||||
将文件移入回收站(放入回收站,非永久删除)。
|
||||
使用 v4 批量接口,支持一次清理多个文件。
|
||||
|
||||
用法:
|
||||
credential = AliyunCredentialManager(refresh_token="xxx")
|
||||
cleanup = AliyunCleanup(credential, drive_id="12345")
|
||||
result = cleanup.delete_files(["file_id_1", "file_id_2"])
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
credential: AliyunCredentialManager,
|
||||
drive_id: str = "",
|
||||
request_timeout: int = 30,
|
||||
):
|
||||
self.credential = credential
|
||||
self.drive_id = drive_id or credential.get_drive_id()
|
||||
self.request_timeout = request_timeout
|
||||
self._session = requests.Session()
|
||||
self._session.headers.update(DEFAULT_HEADERS)
|
||||
|
||||
# ─── 公开 API ──────────────────────────────────────────
|
||||
|
||||
def delete_files(self, file_ids: List[str]) -> Dict:
|
||||
"""
|
||||
将指定文件移入回收站(批量)。
|
||||
|
||||
Args:
|
||||
file_ids: 要删除的文件 ID 列表
|
||||
|
||||
Returns:
|
||||
{
|
||||
"success": True/False,
|
||||
"deleted_count": 成功删除数量,
|
||||
"total_count": 总文件数,
|
||||
"failed_ids": 失败的文件 ID 列表,
|
||||
"error": None or "错误信息",
|
||||
}
|
||||
|
||||
实现:
|
||||
POST /adrive/v4/batch
|
||||
{
|
||||
"requests": [
|
||||
{
|
||||
"url": "/recyclebin/trash",
|
||||
"body": {"file_id": "...", "drive_id": "..."},
|
||||
"headers": {"Content-Type": "application/json"},
|
||||
"id": "...",
|
||||
"method": "POST"
|
||||
}
|
||||
],
|
||||
"resource": "file"
|
||||
}
|
||||
"""
|
||||
if not file_ids:
|
||||
return self._error("文件 ID 列表为空")
|
||||
|
||||
drive_id = self.drive_id
|
||||
if not drive_id:
|
||||
drive_id = self.credential.get_drive_id()
|
||||
if not drive_id:
|
||||
return self._error("缺少 drive_id,无法执行删除操作")
|
||||
|
||||
# 构建批量请求体
|
||||
requests_list = []
|
||||
for fid in file_ids:
|
||||
requests_list.append({
|
||||
"url": "/recyclebin/trash",
|
||||
"body": {
|
||||
"drive_id": drive_id,
|
||||
"file_id": fid,
|
||||
},
|
||||
"headers": {"Content-Type": "application/json"},
|
||||
"id": fid,
|
||||
"method": "POST",
|
||||
})
|
||||
|
||||
try:
|
||||
headers = self.credential.get_headers()
|
||||
|
||||
resp = self._session.post(
|
||||
BATCH_URL,
|
||||
json={"requests": requests_list, "resource": "file"},
|
||||
headers=headers,
|
||||
timeout=self.request_timeout,
|
||||
)
|
||||
data = resp.json()
|
||||
|
||||
if resp.status_code != 200:
|
||||
logger.error(
|
||||
f"[AliyunCleanup] 批量删除失败: "
|
||||
f"HTTP {resp.status_code}, {data}"
|
||||
)
|
||||
return self._error(f"HTTP {resp.status_code}")
|
||||
|
||||
code = data.get("code", "")
|
||||
if code:
|
||||
logger.error(
|
||||
f"[AliyunCleanup] 批量删除 API 错误: "
|
||||
f"code={code}, message={data.get('message', '')}"
|
||||
)
|
||||
return self._error(data.get("message", f"API code={code}"))
|
||||
|
||||
# 统计结果
|
||||
responses = data.get("responses", [])
|
||||
success_ids = []
|
||||
failed_ids = []
|
||||
|
||||
for item in responses:
|
||||
status = item.get("status", 0)
|
||||
fid = item.get("id", "")
|
||||
if status in (200, 201, 202):
|
||||
success_ids.append(fid)
|
||||
else:
|
||||
logger.warning(
|
||||
f"[AliyunCleanup] 删除文件失败: "
|
||||
f"id={fid}, status={status}, body={item.get('body', {})}"
|
||||
)
|
||||
failed_ids.append(fid)
|
||||
|
||||
logger.info(
|
||||
f"[AliyunCleanup] 删除完成: "
|
||||
f"成功={len(success_ids)}, 失败={len(failed_ids)}, 总计={len(file_ids)}"
|
||||
)
|
||||
|
||||
return {
|
||||
"success": len(failed_ids) == 0,
|
||||
"deleted_count": len(success_ids),
|
||||
"total_count": len(file_ids),
|
||||
"success_ids": success_ids,
|
||||
"failed_ids": failed_ids,
|
||||
"error": None,
|
||||
}
|
||||
|
||||
except requests.RequestException as e:
|
||||
logger.error(f"[AliyunCleanup] 批量删除网络异常: {e}")
|
||||
return self._error(str(e))
|
||||
except Exception as e:
|
||||
logger.exception(f"[AliyunCleanup] 批量删除异常: {e}")
|
||||
return self._error(str(e))
|
||||
|
||||
def empty_recycle_bin(self) -> Dict:
|
||||
"""
|
||||
清空回收站(永久删除回收站中的所有文件)。
|
||||
|
||||
NOTE: 阿里云盘 API 目前不直接支持清空回收站,
|
||||
此方法作为占位,需要逐个文件 ID 调用 delete_files。
|
||||
实际使用请先 list 回收站内容再调用 delete_files。
|
||||
|
||||
Returns:
|
||||
{"success": False, "error": "清空回收站需要通过 list + delete 两步完成"}
|
||||
"""
|
||||
logger.warning("[AliyunCleanup] 清空回收站 API 暂未实现,需要 list+delete 两步")
|
||||
return self._error("清空回收站需要通过列出回收站内容 + 逐个删除两步完成,尚未实现")
|
||||
|
||||
# ─── 工具方法 ──────────────────────────────────────────
|
||||
|
||||
def _error(self, message: str) -> Dict:
|
||||
"""构造错误返回"""
|
||||
return {
|
||||
"success": False,
|
||||
"deleted_count": 0,
|
||||
"total_count": 0,
|
||||
"success_ids": [],
|
||||
"failed_ids": [],
|
||||
"error": message,
|
||||
}
|
||||
216
cloudsearch_transfer/adapter/aliyun/credential.py
Normal file
216
cloudsearch_transfer/adapter/aliyun/credential.py
Normal file
@@ -0,0 +1,216 @@
|
||||
"""
|
||||
阿里云盘凭证管理器 v1.0.0
|
||||
refresh_token → access_token 刷新 + 自动缓存 + 过期前自动刷新
|
||||
"""
|
||||
|
||||
import time
|
||||
import logging
|
||||
import threading
|
||||
from typing import Dict, Optional
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
import requests
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ─── 常量 ──────────────────────────────────────────────────
|
||||
|
||||
API_HOST = "https://api.aliyundrive.com"
|
||||
TOKEN_REFRESH_URL = f"{API_HOST}/token/refresh"
|
||||
|
||||
DEFAULT_HEADERS = {
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/135.0.0.0 Safari/537.36"
|
||||
),
|
||||
"Accept": "application/json, text/plain, */*",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class TokenInfo:
|
||||
"""缓存的 Token 信息"""
|
||||
access_token: str = ""
|
||||
refresh_token: str = ""
|
||||
expires_at: float = 0.0 # Unix 时间戳
|
||||
drive_id: str = ""
|
||||
user_id: str = ""
|
||||
nick_name: str = ""
|
||||
default_sbox_drive_id: str = ""
|
||||
|
||||
@property
|
||||
def is_expired(self) -> bool:
|
||||
"""检查 access_token 是否已过期(提前 60s 视为过期)"""
|
||||
return time.time() >= (self.expires_at - 60)
|
||||
|
||||
@property
|
||||
def is_valid(self) -> bool:
|
||||
return bool(self.access_token) and not self.is_expired
|
||||
|
||||
|
||||
class AliyunCredentialManager:
|
||||
"""
|
||||
阿里云盘凭证管理器
|
||||
|
||||
职责:
|
||||
- 使用 refresh_token 换取 access_token
|
||||
- 缓存 access_token / expires_at / drive_id
|
||||
- 过期前自动刷新(提前 60s)
|
||||
- 线程安全
|
||||
|
||||
用法:
|
||||
mgr = AliyunCredentialManager(refresh_token="xxx")
|
||||
mgr.refresh() # 强制刷新
|
||||
headers = mgr.get_headers() # 获取带 Auth 的请求头
|
||||
is_ok = mgr.validate() # 验证 refresh_token 有效性
|
||||
"""
|
||||
|
||||
def __init__(self, refresh_token: str = ""):
|
||||
self._refresh_token = refresh_token.strip()
|
||||
self._token: Optional[TokenInfo] = None
|
||||
self._lock = threading.Lock()
|
||||
self._session = requests.Session()
|
||||
self._session.headers.update(DEFAULT_HEADERS)
|
||||
|
||||
# ─── 公开 API ──────────────────────────────────────────
|
||||
|
||||
def refresh(self) -> bool:
|
||||
"""
|
||||
使用 refresh_token 换取 access_token。
|
||||
返回 True 表示成功,False 表示失败。
|
||||
"""
|
||||
with self._lock:
|
||||
return self._do_refresh()
|
||||
|
||||
def get_headers(self) -> Dict[str, str]:
|
||||
"""
|
||||
获取带 Authorization 的请求头。
|
||||
自动检查 token 有效性,必要时自动刷新。
|
||||
|
||||
Returns:
|
||||
{"Authorization": "Bearer <access_token>", ...}
|
||||
"""
|
||||
self._ensure_token_valid()
|
||||
headers = {}
|
||||
if self._token and self._token.access_token:
|
||||
headers["Authorization"] = f"Bearer {self._token.access_token}"
|
||||
return headers
|
||||
|
||||
def get_access_token(self) -> str:
|
||||
"""获取当前有效的 access_token(必要时自动刷新)"""
|
||||
self._ensure_token_valid()
|
||||
return self._token.access_token if self._token else ""
|
||||
|
||||
def get_drive_id(self) -> str:
|
||||
"""获取默认 drive_id"""
|
||||
self._ensure_token_valid()
|
||||
return self._token.drive_id if self._token else ""
|
||||
|
||||
def get_sbox_drive_id(self) -> str:
|
||||
"""获取保险箱 drive_id"""
|
||||
self._ensure_token_valid()
|
||||
return self._token.default_sbox_drive_id if self._token else ""
|
||||
|
||||
def validate(self) -> bool:
|
||||
"""
|
||||
验证 refresh_token 是否有效。
|
||||
要求 refresh_token 长度 >= 20,且能成功换取 access_token。
|
||||
"""
|
||||
if not self._refresh_token or len(self._refresh_token) < 20:
|
||||
logger.warning("[AliyunCredential] refresh_token 长度不足 20,验证失败")
|
||||
return False
|
||||
return self.refresh()
|
||||
|
||||
@property
|
||||
def refresh_token(self) -> str:
|
||||
return self._refresh_token
|
||||
|
||||
@refresh_token.setter
|
||||
def refresh_token(self, value: str):
|
||||
"""更新 refresh_token(通常在 API 返回新 refresh_token 后调用)"""
|
||||
self._refresh_token = value.strip()
|
||||
# 清除旧缓存,下次请求自动刷新
|
||||
with self._lock:
|
||||
self._token = None
|
||||
|
||||
# ─── 内部方法 ──────────────────────────────────────────
|
||||
|
||||
def _ensure_token_valid(self):
|
||||
"""确保 token 有效(过期则自动刷新)"""
|
||||
if self._token is None or self._token.is_expired:
|
||||
self.refresh()
|
||||
|
||||
def _do_refresh(self) -> bool:
|
||||
"""实际执行 token 刷新"""
|
||||
if not self._refresh_token:
|
||||
logger.error("[AliyunCredential] 没有 refresh_token,无法刷新")
|
||||
return False
|
||||
|
||||
try:
|
||||
resp = self._session.post(
|
||||
TOKEN_REFRESH_URL,
|
||||
json={"refresh_token": self._refresh_token},
|
||||
timeout=30,
|
||||
)
|
||||
data = resp.json()
|
||||
|
||||
if resp.status_code != 200 or "access_token" not in data:
|
||||
code = data.get("code", "Unknown")
|
||||
message = data.get("message", "")
|
||||
logger.error(
|
||||
f"[AliyunCredential] 刷新 token 失败: "
|
||||
f"HTTP {resp.status_code} code={code} msg={message}"
|
||||
)
|
||||
return False
|
||||
|
||||
# 解析响应
|
||||
access_token = data.get("access_token", "")
|
||||
expires_in = int(data.get("expires_in", 7200))
|
||||
new_refresh = data.get("refresh_token", self._refresh_token)
|
||||
|
||||
self._token = TokenInfo(
|
||||
access_token=access_token,
|
||||
refresh_token=new_refresh,
|
||||
expires_at=time.time() + expires_in,
|
||||
drive_id=str(data.get("default_drive_id", "")),
|
||||
user_id=str(data.get("user_id", "")),
|
||||
nick_name=str(data.get("nick_name", "")),
|
||||
default_sbox_drive_id=str(data.get("default_sbox_drive_id", "")),
|
||||
)
|
||||
|
||||
# 更新 refresh_token(服务端可能下发新的)
|
||||
if new_refresh != self._refresh_token:
|
||||
logger.info(
|
||||
"[AliyunCredential] refresh_token 已轮换,新旧前缀: "
|
||||
f"{self._refresh_token[:8]}... → {new_refresh[:8]}..."
|
||||
)
|
||||
self._refresh_token = new_refresh
|
||||
|
||||
logger.info(
|
||||
f"[AliyunCredential] Token 刷新成功 "
|
||||
f"(user={self._token.nick_name}, "
|
||||
f"expires_in={expires_in}s, "
|
||||
f"drive_id={self._token.drive_id[:8]}...)"
|
||||
)
|
||||
return True
|
||||
|
||||
except requests.RequestException as e:
|
||||
logger.error(f"[AliyunCredential] 刷新 token 网络异常: {e}")
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.exception(f"[AliyunCredential] 刷新 token 未知异常: {e}")
|
||||
return False
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""导出当前状态(用于持久化)"""
|
||||
self._ensure_token_valid()
|
||||
return {
|
||||
"refresh_token": self._refresh_token,
|
||||
"access_token": self._token.access_token if self._token else "",
|
||||
"expires_at": self._token.expires_at if self._token else 0,
|
||||
"drive_id": self._token.drive_id if self._token else "",
|
||||
"user_id": self._token.user_id if self._token else "",
|
||||
"nick_name": self._token.nick_name if self._token else "",
|
||||
}
|
||||
493
cloudsearch_transfer/adapter/aliyun/transfer.py
Normal file
493
cloudsearch_transfer/adapter/aliyun/transfer.py
Normal file
@@ -0,0 +1,493 @@
|
||||
"""
|
||||
阿里云盘转存模块 v1.0.0
|
||||
实现 4 步批量转存流程:获取分享详情 → 获取分享令牌 → 批量复制文件 → 创建新分享
|
||||
"""
|
||||
|
||||
import re
|
||||
import time
|
||||
import logging
|
||||
from typing import List, Dict, Tuple, Optional
|
||||
|
||||
import requests
|
||||
|
||||
from .credential import AliyunCredentialManager, API_HOST
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ─── API 端点 ──────────────────────────────────────────────
|
||||
|
||||
# ① 获取分享详情(匿名)
|
||||
SHARE_INFO_URL = f"{API_HOST}/adrive/v3/share_link/get_share_by_anonymous"
|
||||
|
||||
# ② 获取分享令牌(需 Auth)
|
||||
SHARE_TOKEN_URL = f"{API_HOST}/v2/share_link/get_share_token"
|
||||
|
||||
# ③ 批量操作(复制文件)
|
||||
BATCH_URL = f"{API_HOST}/adrive/v4/batch"
|
||||
|
||||
# ④ 创建分享
|
||||
CREATE_SHARE_URL = f"{API_HOST}/adrive/v2/share_link/create"
|
||||
|
||||
# ─── URL 模式 ──────────────────────────────────────────────
|
||||
|
||||
# 匹配 aliyundrive.com/s/<share_id>
|
||||
URL_PATTERN = re.compile(r'aliyundrive\.com/s/([a-zA-Z0-9]+)')
|
||||
|
||||
# ─── 默认请求头 ────────────────────────────────────────────
|
||||
|
||||
DEFAULT_HEADERS = {
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/135.0.0.0 Safari/537.36"
|
||||
),
|
||||
"Accept": "application/json, text/plain, */*",
|
||||
"Content-Type": "application/json",
|
||||
"Referer": "https://aliyundrive.com",
|
||||
}
|
||||
|
||||
|
||||
class AliyunTransfer:
|
||||
"""
|
||||
阿里云盘批量转存
|
||||
|
||||
四步流程:
|
||||
① 获取分享详情(匿名):POST /adrive/v3/share_link/get_share_by_anonymous
|
||||
② 获取分享令牌(Auth):POST /v2/share_link/get_share_token
|
||||
③ 批量复制文件:POST /adrive/v4/batch (X-Share-Token 头)
|
||||
④ 创建新分享:POST /adrive/v2/share_link/create
|
||||
|
||||
用法:
|
||||
credential = AliyunCredentialManager(refresh_token="xxx")
|
||||
transfer = AliyunTransfer(credential, drive_id="12345")
|
||||
result = transfer.transfer(
|
||||
share_url="https://www.aliyundrive.com/s/abc123",
|
||||
share_password="",
|
||||
to_parent_file_id="root",
|
||||
)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
credential: AliyunCredentialManager,
|
||||
drive_id: str = "",
|
||||
to_parent_file_id: str = "root",
|
||||
request_timeout: int = 30,
|
||||
):
|
||||
self.credential = credential
|
||||
self.drive_id = drive_id or credential.get_drive_id()
|
||||
self.to_parent_file_id = to_parent_file_id
|
||||
self.request_timeout = request_timeout
|
||||
self._session = requests.Session()
|
||||
self._session.headers.update(DEFAULT_HEADERS)
|
||||
|
||||
# ─── 公开 API ──────────────────────────────────────────
|
||||
|
||||
def transfer(
|
||||
self,
|
||||
share_url: str,
|
||||
share_password: str = "",
|
||||
to_parent_file_id: str = None,
|
||||
new_share_password: str = "",
|
||||
expiration: str = "",
|
||||
) -> Dict:
|
||||
"""
|
||||
执行完整的转存流程。
|
||||
|
||||
Args:
|
||||
share_url: 阿里云盘分享链接(如 https://www.aliyundrive.com/s/abc123)
|
||||
share_password: 分享提取码(如有)
|
||||
to_parent_file_id: 转存目标目录 file_id,默认用初始化时的值
|
||||
new_share_password: 新分享的密码(空=无密码)
|
||||
expiration: 分享有效期,空=永久
|
||||
|
||||
Returns:
|
||||
{
|
||||
"success": True/False,
|
||||
"share_name": "...",
|
||||
"new_file_ids": ["id1", "id2"],
|
||||
"new_share_url": "https://...",
|
||||
"new_share_password": "...",
|
||||
"error": None or "...",
|
||||
}
|
||||
"""
|
||||
parent_id = to_parent_file_id or self.to_parent_file_id
|
||||
|
||||
try:
|
||||
# ① 获取分享详情
|
||||
share_id = self._extract_share_id(share_url)
|
||||
if not share_id:
|
||||
return self._error("无法从 URL 提取分享 ID")
|
||||
|
||||
share_info = self._get_share_info(share_id)
|
||||
if not share_info:
|
||||
return self._error("分享不存在或已失效")
|
||||
|
||||
share_name = share_info.get("share_name", "")
|
||||
file_infos = share_info.get("file_infos", [])
|
||||
if not file_infos:
|
||||
return self._error("分享内容为空")
|
||||
|
||||
logger.info(
|
||||
f"[AliyunTransfer] 分享详情获取成功: "
|
||||
f"name={share_name}, files={len(file_infos)}"
|
||||
)
|
||||
|
||||
# ② 获取分享令牌
|
||||
share_token = self._get_share_token(share_id, share_password)
|
||||
if not share_token:
|
||||
return self._error("获取分享令牌失败(可能需要提取码)")
|
||||
|
||||
logger.info(f"[AliyunTransfer] 分享令牌获取成功")
|
||||
|
||||
# ③ 批量复制文件
|
||||
file_ids = [fi.get("file_id", "") for fi in file_infos if fi.get("file_id")]
|
||||
if not file_ids:
|
||||
return self._error("无法提取文件 ID")
|
||||
|
||||
new_file_ids = self._batch_copy(share_id, share_token, file_ids, parent_id)
|
||||
if not new_file_ids:
|
||||
return self._error("批量转存失败,请检查权限或容量")
|
||||
|
||||
logger.info(f"[AliyunTransfer] 批量转存成功: {len(new_file_ids)} 个文件")
|
||||
|
||||
# ④ 创建新分享
|
||||
share_result = self._create_share(
|
||||
new_file_ids,
|
||||
share_password=new_share_password,
|
||||
expiration=expiration,
|
||||
)
|
||||
|
||||
new_share_url = share_result.get("share_url", "")
|
||||
new_share_pwd = share_result.get("share_pwd", new_share_password)
|
||||
|
||||
logger.info(f"[AliyunTransfer] 新分享创建成功: {new_share_url}")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"share_name": share_name,
|
||||
"share_id": share_id,
|
||||
"new_file_ids": new_file_ids,
|
||||
"new_share_url": new_share_url,
|
||||
"new_share_password": new_share_pwd,
|
||||
"error": None,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"[AliyunTransfer] 转存异常: {e}")
|
||||
return self._error(str(e))
|
||||
|
||||
def get_share_info(self, share_url: str) -> Optional[Dict]:
|
||||
"""
|
||||
仅获取分享详情(不转存)。
|
||||
|
||||
Returns:
|
||||
{"share_name": "...", "file_infos": [...]} or None
|
||||
"""
|
||||
share_id = self._extract_share_id(share_url)
|
||||
if not share_id:
|
||||
logger.error(f"[AliyunTransfer] 无法从 URL 提取 share_id: {share_url}")
|
||||
return None
|
||||
return self._get_share_info(share_id)
|
||||
|
||||
# ─── 步骤 ①:获取分享详情 ───────────────────────────────
|
||||
|
||||
def _get_share_info(self, share_id: str) -> Optional[Dict]:
|
||||
"""
|
||||
POST /adrive/v3/share_link/get_share_by_anonymous
|
||||
请求体: {"share_id": "..."}
|
||||
响应: {"share_name": "...", "file_infos": [{"file_id": "...", "name": "...", ...}]}
|
||||
"""
|
||||
try:
|
||||
resp = self._session.post(
|
||||
SHARE_INFO_URL,
|
||||
json={"share_id": share_id},
|
||||
timeout=self.request_timeout,
|
||||
)
|
||||
data = resp.json()
|
||||
|
||||
if resp.status_code != 200:
|
||||
logger.error(
|
||||
f"[AliyunTransfer] 获取分享详情失败: "
|
||||
f"HTTP {resp.status_code}, {data}"
|
||||
)
|
||||
return None
|
||||
|
||||
# 检查业务错误码
|
||||
code = data.get("code", "")
|
||||
if code:
|
||||
logger.error(
|
||||
f"[AliyunTransfer] 获取分享详情 API 错误: "
|
||||
f"code={code}, message={data.get('message', '')}"
|
||||
)
|
||||
return None
|
||||
|
||||
return {
|
||||
"share_name": data.get("share_name", ""),
|
||||
"share_title": data.get("share_title", data.get("share_name", "")),
|
||||
"file_infos": data.get("file_infos", []),
|
||||
"expiration": data.get("expiration", ""),
|
||||
"creator_name": data.get("creator_name", ""),
|
||||
"creator_id": data.get("creator_id", ""),
|
||||
}
|
||||
|
||||
except requests.RequestException as e:
|
||||
logger.error(f"[AliyunTransfer] 获取分享详情网络异常: {e}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.exception(f"[AliyunTransfer] 获取分享详情异常: {e}")
|
||||
return None
|
||||
|
||||
# ─── 步骤 ②:获取分享令牌 ────────────────────────────────
|
||||
|
||||
def _get_share_token(self, share_id: str, share_password: str = "") -> Optional[str]:
|
||||
"""
|
||||
POST /v2/share_link/get_share_token
|
||||
请求体: {"share_id": "..."}
|
||||
需要 Auth 头
|
||||
响应: {"share_token": "..."}
|
||||
"""
|
||||
try:
|
||||
headers = self.credential.get_headers()
|
||||
resp = self._session.post(
|
||||
SHARE_TOKEN_URL,
|
||||
json={
|
||||
"share_id": share_id,
|
||||
"share_pwd": share_password,
|
||||
},
|
||||
headers=headers,
|
||||
timeout=self.request_timeout,
|
||||
)
|
||||
data = resp.json()
|
||||
|
||||
if resp.status_code != 200:
|
||||
logger.error(
|
||||
f"[AliyunTransfer] 获取分享令牌失败: "
|
||||
f"HTTP {resp.status_code}, {data}"
|
||||
)
|
||||
return None
|
||||
|
||||
code = data.get("code", "")
|
||||
if code:
|
||||
logger.error(
|
||||
f"[AliyunTransfer] 获取分享令牌 API 错误: "
|
||||
f"code={code}, message={data.get('message', '')}"
|
||||
)
|
||||
return None
|
||||
|
||||
share_token = data.get("share_token", "")
|
||||
if not share_token:
|
||||
logger.error("[AliyunTransfer] 响应中缺少 share_token")
|
||||
return None
|
||||
|
||||
return share_token
|
||||
|
||||
except requests.RequestException as e:
|
||||
logger.error(f"[AliyunTransfer] 获取分享令牌网络异常: {e}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.exception(f"[AliyunTransfer] 获取分享令牌异常: {e}")
|
||||
return None
|
||||
|
||||
# ─── 步骤 ③:批量复制文件 ────────────────────────────────
|
||||
|
||||
def _batch_copy(
|
||||
self,
|
||||
share_id: str,
|
||||
share_token: str,
|
||||
file_ids: List[str],
|
||||
to_parent_file_id: str = "root",
|
||||
) -> List[str]:
|
||||
"""
|
||||
POST /adrive/v4/batch
|
||||
头: X-Share-Token: <share_token>
|
||||
请求体:
|
||||
{
|
||||
"requests": [
|
||||
{
|
||||
"url": "/file/copy",
|
||||
"body": {
|
||||
"file_id": "...",
|
||||
"share_id": "...",
|
||||
"to_drive_id": "...",
|
||||
"to_parent_file_id": "..."
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
响应: {"responses": [{"status": 200, "body": {"file_id": "new_id"}}, ...]}
|
||||
返回新的 file_id 列表
|
||||
"""
|
||||
drive_id = self.drive_id
|
||||
if not drive_id:
|
||||
drive_id = self.credential.get_drive_id()
|
||||
if not drive_id:
|
||||
logger.error("[AliyunTransfer] 缺少 drive_id,无法转存")
|
||||
return []
|
||||
|
||||
# 构建批量请求体
|
||||
requests_list = []
|
||||
for fid in file_ids:
|
||||
requests_list.append({
|
||||
"url": "/file/copy",
|
||||
"body": {
|
||||
"file_id": fid,
|
||||
"share_id": share_id,
|
||||
"to_drive_id": drive_id,
|
||||
"to_parent_file_id": to_parent_file_id,
|
||||
},
|
||||
"headers": {"Content-Type": "application/json"},
|
||||
"id": fid,
|
||||
"method": "POST",
|
||||
})
|
||||
|
||||
try:
|
||||
headers = self.credential.get_headers()
|
||||
headers["X-Share-Token"] = share_token
|
||||
|
||||
resp = self._session.post(
|
||||
BATCH_URL,
|
||||
json={"requests": requests_list, "resource": "file"},
|
||||
headers=headers,
|
||||
timeout=self.request_timeout * 2, # 批量操作可能较慢
|
||||
)
|
||||
data = resp.json()
|
||||
|
||||
if resp.status_code != 200:
|
||||
logger.error(
|
||||
f"[AliyunTransfer] 批量复制失败: "
|
||||
f"HTTP {resp.status_code}, {data}"
|
||||
)
|
||||
return []
|
||||
|
||||
code = data.get("code", "")
|
||||
if code:
|
||||
logger.error(
|
||||
f"[AliyunTransfer] 批量复制 API 错误: "
|
||||
f"code={code}, message={data.get('message', '')}"
|
||||
)
|
||||
return []
|
||||
|
||||
# 提取新 file_id
|
||||
new_ids = []
|
||||
responses = data.get("responses", [])
|
||||
for item in responses:
|
||||
status = item.get("status", 0)
|
||||
body = item.get("body", {})
|
||||
if status in (200, 201, 202):
|
||||
new_fid = body.get("file_id", "")
|
||||
if new_fid:
|
||||
new_ids.append(new_fid)
|
||||
else:
|
||||
logger.warning(
|
||||
f"[AliyunTransfer] 单个文件复制失败: "
|
||||
f"id={item.get('id')}, status={status}, body={body}"
|
||||
)
|
||||
|
||||
if not new_ids:
|
||||
logger.error("[AliyunTransfer] 所有文件复制均失败")
|
||||
elif len(new_ids) < len(file_ids):
|
||||
logger.warning(
|
||||
f"[AliyunTransfer] 部分文件复制成功: "
|
||||
f"{len(new_ids)}/{len(file_ids)}"
|
||||
)
|
||||
|
||||
return new_ids
|
||||
|
||||
except requests.RequestException as e:
|
||||
logger.error(f"[AliyunTransfer] 批量复制网络异常: {e}")
|
||||
return []
|
||||
except Exception as e:
|
||||
logger.exception(f"[AliyunTransfer] 批量复制异常: {e}")
|
||||
return []
|
||||
|
||||
# ─── 步骤 ④:创建新分享 ──────────────────────────────────
|
||||
|
||||
def _create_share(
|
||||
self,
|
||||
file_ids: List[str],
|
||||
share_password: str = "",
|
||||
expiration: str = "",
|
||||
) -> Dict:
|
||||
"""
|
||||
POST /adrive/v2/share_link/create
|
||||
请求体: {"drive_id": "...", "file_id_list": [...], "share_pwd": "...", "expiration": "..."}
|
||||
响应: {"share_url": "...", "share_id": "..."}
|
||||
"""
|
||||
drive_id = self.drive_id or self.credential.get_drive_id()
|
||||
if not drive_id:
|
||||
logger.error("[AliyunTransfer] 缺少 drive_id,无法创建分享")
|
||||
return {"share_url": "", "share_pwd": ""}
|
||||
|
||||
body = {
|
||||
"drive_id": drive_id,
|
||||
"file_id_list": file_ids,
|
||||
"share_pwd": share_password or "",
|
||||
"expiration": expiration or "",
|
||||
}
|
||||
|
||||
try:
|
||||
headers = self.credential.get_headers()
|
||||
resp = self._session.post(
|
||||
CREATE_SHARE_URL,
|
||||
json=body,
|
||||
headers=headers,
|
||||
timeout=self.request_timeout,
|
||||
)
|
||||
data = resp.json()
|
||||
|
||||
if resp.status_code != 200:
|
||||
logger.error(
|
||||
f"[AliyunTransfer] 创建分享失败: "
|
||||
f"HTTP {resp.status_code}, {data}"
|
||||
)
|
||||
return {"share_url": "", "share_pwd": share_password}
|
||||
|
||||
code = data.get("code", "")
|
||||
if code:
|
||||
logger.error(
|
||||
f"[AliyunTransfer] 创建分享 API 错误: "
|
||||
f"code={code}, message={data.get('message', '')}"
|
||||
)
|
||||
return {"share_url": "", "share_pwd": share_password}
|
||||
|
||||
share_url = data.get("share_url", "")
|
||||
share_pwd = data.get("share_pwd", share_password)
|
||||
|
||||
return {"share_url": share_url, "share_pwd": share_pwd}
|
||||
|
||||
except requests.RequestException as e:
|
||||
logger.error(f"[AliyunTransfer] 创建分享网络异常: {e}")
|
||||
return {"share_url": "", "share_pwd": share_password}
|
||||
except Exception as e:
|
||||
logger.exception(f"[AliyunTransfer] 创建分享异常: {e}")
|
||||
return {"share_url": "", "share_pwd": share_password}
|
||||
|
||||
# ─── URL 解析 ──────────────────────────────────────────
|
||||
|
||||
@staticmethod
|
||||
def _extract_share_id(url: str) -> Optional[str]:
|
||||
"""从阿里云盘分享 URL 中提取 share_id"""
|
||||
m = URL_PATTERN.search(url)
|
||||
if m:
|
||||
return m.group(1)
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def extract_share_id_static(url: str) -> Optional[str]:
|
||||
"""静态方法:提取 share_id"""
|
||||
return AliyunTransfer._extract_share_id(url)
|
||||
|
||||
# ─── 工具方法 ──────────────────────────────────────────
|
||||
|
||||
def _error(self, message: str) -> Dict:
|
||||
"""构造错误返回"""
|
||||
return {
|
||||
"success": False,
|
||||
"share_name": "",
|
||||
"share_id": "",
|
||||
"new_file_ids": [],
|
||||
"new_share_url": "",
|
||||
"new_share_password": "",
|
||||
"error": message,
|
||||
}
|
||||
253
cloudsearch_transfer/adapter/baidu/__init__.py
Normal file
253
cloudsearch_transfer/adapter/baidu/__init__.py
Normal file
@@ -0,0 +1,253 @@
|
||||
"""
|
||||
百度网盘适配器 — CloudSearch Transfer v1.0.0
|
||||
参考 cloud-auto-save 的 BaiduNetDisk + netdisk 的 PanbaiduSave
|
||||
|
||||
完整的 5 步转存流程 + bdstoken 管理 + 路径删除 + 广告过滤
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List, Tuple
|
||||
|
||||
from ..base import BaseCloudDriveAdapter, FileInfo
|
||||
from ...config import PlatformConfig, TransferConfig
|
||||
from ...errors import TransferError, TransferErrorCode
|
||||
|
||||
from .credential import BaiduCredentialManager
|
||||
from .transfer import BaiduTransfer
|
||||
from .cleanup import BaiduCleanup
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BaiduAdapter(BaseCloudDriveAdapter):
|
||||
"""百度网盘适配器
|
||||
|
||||
完整的 Cookie + bdstoken 机制,支持:
|
||||
- 验证分享链接 + 提取码
|
||||
- 5 步转存到自己的网盘
|
||||
- 创建新分享
|
||||
- 按文件名删除文件
|
||||
- 广告文件过滤
|
||||
"""
|
||||
|
||||
PLATFORM_NAME = "百度网盘"
|
||||
PLATFORM_KEY = "baidu"
|
||||
URL_PATTERNS = [
|
||||
r'pan\.baidu\.com/s/1([A-Za-z0-9_-]+)',
|
||||
]
|
||||
|
||||
def __init__(self, config: PlatformConfig, transfer_config: TransferConfig):
|
||||
super().__init__(config, transfer_config)
|
||||
|
||||
# 凭证管理器
|
||||
self.credential = BaiduCredentialManager(
|
||||
cookie=config.cookie,
|
||||
session=self.session,
|
||||
)
|
||||
|
||||
if not self.credential.validate():
|
||||
raise TransferError(
|
||||
TransferErrorCode.NOT_LOGIN,
|
||||
message="百度网盘 Cookie 无效或太短 (需 >= 50 字符)",
|
||||
platform=self.PLATFORM_KEY,
|
||||
)
|
||||
|
||||
# 预热 bdstoken
|
||||
try:
|
||||
self.credential.get_bdstoken()
|
||||
except TransferError as e:
|
||||
logger.warning(f"预取 bdstoken 失败: {e},将在首次使用时重试")
|
||||
|
||||
# 转存执行器 & 清理器
|
||||
self._transfer = BaiduTransfer(self.session, self.credential)
|
||||
self._cleanup = BaiduCleanup(
|
||||
self.session, self.credential,
|
||||
ad_keywords=config.banned_keywords or None,
|
||||
)
|
||||
|
||||
# 暂存最近一次转存的文件信息(供 _filter_ads 使用)
|
||||
self._last_transfer_files: List[dict] = []
|
||||
|
||||
# ─── session 初始化 ─────────────────────────────────────
|
||||
|
||||
def _setup_session(self):
|
||||
"""设置 session 级别的 Cookie"""
|
||||
if self.config.cookie:
|
||||
self.session.headers["Cookie"] = self.config.cookie
|
||||
self.session.headers["Referer"] = "https://pan.baidu.com/"
|
||||
|
||||
# ─── 核心抽象方法实现 ──────────────────────────────────
|
||||
|
||||
def _get_share_detail(self, pwd_id: str, passcode: str = "") -> dict:
|
||||
"""获取百度分享详情(步骤 ①+②)
|
||||
|
||||
Args:
|
||||
pwd_id: URL 中的 surl (s/1 后面的部分)
|
||||
passcode: 提取码(可选)
|
||||
|
||||
Returns:
|
||||
{"title": str, "fs_ids": [str], "filenames": [str], ...}
|
||||
"""
|
||||
bdstoken = self.credential.get_bdstoken()
|
||||
|
||||
# ① 验证提取码(如果有)
|
||||
if passcode:
|
||||
self._transfer._verify_password(pwd_id, passcode, bdstoken)
|
||||
|
||||
# ② 解析分享页
|
||||
share_info = self._transfer._parse_share_page(pwd_id)
|
||||
|
||||
return {
|
||||
"title": share_info.get("title", ""),
|
||||
"shareid": share_info["shareid"],
|
||||
"uk": share_info["uk"],
|
||||
"fs_ids": share_info["fs_ids"],
|
||||
"filenames": share_info["filenames"],
|
||||
}
|
||||
|
||||
def _save_files(self, pwd_id: str, detail: dict,
|
||||
save_dir: str) -> List[str]:
|
||||
"""转存文件到自己的百度网盘(步骤 ③+④)
|
||||
|
||||
Args:
|
||||
pwd_id: surl
|
||||
detail: _get_share_detail 返回的 dict
|
||||
save_dir: 目标目录
|
||||
|
||||
Returns:
|
||||
转存后的新 fs_id 列表
|
||||
"""
|
||||
bdstoken = self.credential.get_bdstoken()
|
||||
shareid = detail["shareid"]
|
||||
uk = detail["uk"]
|
||||
fs_ids = detail["fs_ids"]
|
||||
filenames = detail.get("filenames", [])
|
||||
|
||||
# ③ 转存
|
||||
self._transfer._transfer_files(shareid, uk, fs_ids, save_dir, bdstoken)
|
||||
|
||||
# ④ 列出目录匹配新 fs_id
|
||||
new_fs_ids = self._transfer._list_and_match(save_dir, filenames, bdstoken)
|
||||
|
||||
# 暂存文件信息供 _filter_ads + _create_share 使用
|
||||
self._last_transfer_files = [
|
||||
{"fs_id": fid, "name": name}
|
||||
for fid, name in zip(new_fs_ids, filenames)
|
||||
if fid
|
||||
]
|
||||
|
||||
return new_fs_ids
|
||||
|
||||
def _create_share(self, file_ids: List[str], title: str,
|
||||
password: str = "") -> Tuple[str, str]:
|
||||
"""创建百度分享(步骤 ⑤)
|
||||
|
||||
Args:
|
||||
file_ids: 转存后的新 fs_id 列表
|
||||
title: 原标题
|
||||
password: 分享密码
|
||||
|
||||
Returns:
|
||||
(new_share_url, share_password)
|
||||
"""
|
||||
# 如果 file_ids 中包含非数字,尝试从暂存信息中查找
|
||||
numeric_ids = []
|
||||
for fid in file_ids:
|
||||
try:
|
||||
int(fid)
|
||||
numeric_ids.append(fid)
|
||||
except ValueError:
|
||||
logger.warning(f"忽略非数字 fs_id: {fid}")
|
||||
|
||||
return self._transfer.create_share(
|
||||
fids=[int(x) for x in numeric_ids] if numeric_ids else [int(x) for x in file_ids],
|
||||
password=password,
|
||||
period=0, # 永久
|
||||
)
|
||||
|
||||
# ─── 文件列表 & 删除 ────────────────────────────────────
|
||||
|
||||
def get_files(self, parent_fid: str = "0") -> List[FileInfo]:
|
||||
"""列出百度网盘目录下的文件
|
||||
|
||||
GET /api/list?dir={parent_fid}
|
||||
|
||||
Args:
|
||||
parent_fid: 目录路径 (默认 "0" = 根目录)
|
||||
|
||||
注意: parent_fid 对百度网盘而言是目录路径而非数字 ID。
|
||||
根目录传 "/" 或 "0"。
|
||||
"""
|
||||
bdstoken = self.credential.get_bdstoken()
|
||||
dir_path = parent_fid if parent_fid != "0" else "/"
|
||||
|
||||
url = "https://pan.baidu.com/api/list"
|
||||
params = {"dir": dir_path, "bdstoken": bdstoken}
|
||||
headers = self.credential.get_headers()
|
||||
|
||||
try:
|
||||
resp = self._get(url, params=params, headers=headers)
|
||||
data = resp.json()
|
||||
except Exception as e:
|
||||
logger.error(f"百度列出目录失败: {e}")
|
||||
return []
|
||||
|
||||
errno = data.get("errno", -1)
|
||||
if errno != 0:
|
||||
logger.error(f"百度列出目录 errno={errno}: {data}")
|
||||
return []
|
||||
|
||||
files = []
|
||||
for item in data.get("list", []):
|
||||
fid = str(item.get("fs_id", ""))
|
||||
name = item.get("server_filename", "")
|
||||
size = item.get("size", 0)
|
||||
is_dir = item.get("isdir", 0) == 1
|
||||
ext = ""
|
||||
if not is_dir and "." in name:
|
||||
ext = name.rsplit(".", 1)[-1]
|
||||
|
||||
files.append(FileInfo(
|
||||
fid=fid,
|
||||
name=name,
|
||||
size=size,
|
||||
is_dir=is_dir,
|
||||
ext=ext,
|
||||
))
|
||||
|
||||
return files
|
||||
|
||||
def delete(self, file_ids: List[str]) -> bool:
|
||||
"""删除百度网盘文件(按路径)
|
||||
|
||||
file_ids 应为网盘中的完整路径,如 ["/dir/file.txt", "/dir/file2.zip"]
|
||||
|
||||
Args:
|
||||
file_ids: 网盘路径列表
|
||||
|
||||
Returns:
|
||||
True 删除成功(或文件不存在)
|
||||
"""
|
||||
return self._cleanup.delete_files(file_ids)
|
||||
|
||||
# ─── 广告过滤 ────────────────────────────────────────────
|
||||
|
||||
def _filter_ads(self, file_ids: List[str]) -> List[str]:
|
||||
"""广告过滤 — 基于最近一次转存暂存的文件名"""
|
||||
if not self._last_transfer_files:
|
||||
return file_ids
|
||||
|
||||
names = []
|
||||
for f in self._last_transfer_files:
|
||||
if f["fs_id"] in file_ids:
|
||||
names.append(f["name"])
|
||||
else:
|
||||
names.append("")
|
||||
|
||||
return self._cleanup.filter_ad_ids(file_ids, names)
|
||||
|
||||
# ─── 扩展方法 ────────────────────────────────────────────
|
||||
|
||||
def delete_paths(self, paths: List[str]) -> bool:
|
||||
"""便捷删除方法(直接调用 cleanup)"""
|
||||
return self._cleanup.delete_files(paths)
|
||||
154
cloudsearch_transfer/adapter/baidu/cleanup.py
Normal file
154
cloudsearch_transfer/adapter/baidu/cleanup.py
Normal file
@@ -0,0 +1,154 @@
|
||||
"""
|
||||
百度网盘文件清理 — 删除文件 & 广告过滤
|
||||
参考 cloud-auto-save 的 filter_ads + netdisk 的 delete
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import List
|
||||
|
||||
import requests
|
||||
|
||||
from ...errors import TransferError, TransferErrorCode
|
||||
from .credential import BaiduCredentialManager, BAIDU_PAN_API
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 默认广告关键词
|
||||
DEFAULT_AD_KEYWORDS = [
|
||||
"公众号", "微信", "扫码", "加群", "QQ群", "广告",
|
||||
"关注", "免费领取", "点击领取", "全网", "最全",
|
||||
"防走丢", "防迷路", "备用", "务必下载", "必看",
|
||||
"解压密码", "压缩密码",
|
||||
]
|
||||
|
||||
|
||||
class BaiduCleanup:
|
||||
"""百度网盘文件清理 & 广告过滤"""
|
||||
|
||||
def __init__(self, session: requests.Session,
|
||||
credential: BaiduCredentialManager,
|
||||
ad_keywords: List[str] = None):
|
||||
self.session = session
|
||||
self.credential = credential
|
||||
self.ad_keywords = ad_keywords or DEFAULT_AD_KEYWORDS
|
||||
|
||||
# ─── 删除文件 ────────────────────────────────────────────
|
||||
|
||||
def delete_files(self, paths: List[str]) -> bool:
|
||||
"""批量删除文件(按网盘路径)
|
||||
|
||||
POST /api/filemanager?opera=delete&bdstoken={bdstoken}
|
||||
Body: filelist=["/path/to/file1","/path/to/file2"]
|
||||
|
||||
Args:
|
||||
paths: 文件在网盘中的完整路径列表,如 ["/dir/file.txt"]
|
||||
|
||||
Returns:
|
||||
True 全部成功(包括文件不存在的 errno=2)
|
||||
|
||||
Raises:
|
||||
TransferError: 删除失败
|
||||
"""
|
||||
if not paths:
|
||||
logger.info("删除列表为空,跳过")
|
||||
return True
|
||||
|
||||
bdstoken = self.credential.get_bdstoken()
|
||||
url = f"{BAIDU_PAN_API}/api/filemanager"
|
||||
params = {
|
||||
"opera": "delete",
|
||||
"bdstoken": bdstoken,
|
||||
}
|
||||
data = {
|
||||
"filelist": json.dumps(paths, ensure_ascii=False),
|
||||
}
|
||||
headers = self.credential.get_headers()
|
||||
headers["Content-Type"] = "application/x-www-form-urlencoded"
|
||||
|
||||
try:
|
||||
resp = self.session.post(
|
||||
url, params=params, data=data, headers=headers, timeout=30
|
||||
)
|
||||
resp.raise_for_status()
|
||||
result = resp.json()
|
||||
except Exception as e:
|
||||
raise TransferError(
|
||||
TransferErrorCode.NETWORK_ERROR,
|
||||
message=f"百度删除请求失败: {e}",
|
||||
platform="baidu",
|
||||
)
|
||||
|
||||
errno = result.get("errno", -1)
|
||||
|
||||
# errno=0 成功; errno=2 文件不存在(视为成功)
|
||||
if errno in (0, 2):
|
||||
logger.info(f"百度删除完成: {len(paths)} 个路径 (errno={errno})")
|
||||
return True
|
||||
|
||||
raise TransferError(
|
||||
TransferErrorCode.NETWORK_ERROR,
|
||||
message=f"百度删除失败 (errno={errno})",
|
||||
platform="baidu",
|
||||
details=result,
|
||||
)
|
||||
|
||||
# ─── 广告过滤 ────────────────────────────────────────────
|
||||
|
||||
def filter_ads(self, files: List[dict]) -> List[dict]:
|
||||
"""根据文件名过滤广告文件
|
||||
|
||||
Args:
|
||||
files: [{"fs_id": "xxx", "name": "xxx"}, ...]
|
||||
|
||||
Returns:
|
||||
过滤后的文件列表,仅保留非广告文件
|
||||
"""
|
||||
if not self.ad_keywords:
|
||||
return files
|
||||
|
||||
retained = []
|
||||
removed = []
|
||||
for f in files:
|
||||
name = f.get("name", "")
|
||||
if self._is_ad(name):
|
||||
removed.append(name)
|
||||
else:
|
||||
retained.append(f)
|
||||
|
||||
if removed:
|
||||
logger.info(f"广告过滤: 移除 {len(removed)} 个文件: {removed}")
|
||||
return retained
|
||||
|
||||
def filter_ad_ids(self, file_ids: List[str],
|
||||
file_names: List[str]) -> List[str]:
|
||||
"""根据文件名过滤广告,返回保留的 file_ids
|
||||
|
||||
Args:
|
||||
file_ids: 文件 ID 列表
|
||||
file_names: 对应的文件名列表(与 file_ids 一一对应)
|
||||
|
||||
Returns:
|
||||
过滤后的 file_ids
|
||||
"""
|
||||
if not self.ad_keywords:
|
||||
return file_ids
|
||||
|
||||
retained = []
|
||||
for fid, name in zip(file_ids, file_names):
|
||||
if not self._is_ad(name):
|
||||
retained.append(fid)
|
||||
else:
|
||||
logger.info(f"广告过滤: 移除 {name}")
|
||||
|
||||
return retained
|
||||
|
||||
def _is_ad(self, filename: str) -> bool:
|
||||
"""判断文件名是否为广告"""
|
||||
if not filename:
|
||||
return False
|
||||
name_lower = filename.lower()
|
||||
for kw in self.ad_keywords:
|
||||
if kw.lower() in name_lower:
|
||||
return True
|
||||
return False
|
||||
101
cloudsearch_transfer/adapter/baidu/credential.py
Normal file
101
cloudsearch_transfer/adapter/baidu/credential.py
Normal file
@@ -0,0 +1,101 @@
|
||||
"""
|
||||
百度网盘凭证管理器 — bdstoken 获取与校验
|
||||
参考 cloud-auto-save 的 BaiduNetDisk.cookie 机制
|
||||
"""
|
||||
|
||||
import logging
|
||||
import requests
|
||||
|
||||
from ...errors import TransferError, TransferErrorCode
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 百度网盘 API 基础 URL
|
||||
BAIDU_PAN_API = "https://pan.baidu.com"
|
||||
|
||||
|
||||
class BaiduCredentialManager:
|
||||
"""百度网盘 Cookie 凭证 + bdstoken 管理
|
||||
|
||||
百度网盘的大多数受保护 API 都需要 bdstoken 参数,
|
||||
该 token 通过 API 获取并缓存在实例中。
|
||||
"""
|
||||
|
||||
def __init__(self, cookie: str, session: requests.Session):
|
||||
"""
|
||||
Args:
|
||||
cookie: 完整的百度 Cookie 字符串
|
||||
session: 共享的 requests.Session(继承 User-Agent 等 headers)
|
||||
"""
|
||||
self.cookie = cookie
|
||||
self.session = session
|
||||
self._bdstoken: str = ""
|
||||
|
||||
# ─── 公开方法 ──────────────────────────────────────────
|
||||
|
||||
def validate(self) -> bool:
|
||||
"""校验 Cookie 是否有效:长度 >= 50 视为合格"""
|
||||
return bool(self.cookie and len(self.cookie.strip()) >= 50)
|
||||
|
||||
def get_bdstoken(self, force_refresh: bool = False) -> str:
|
||||
"""
|
||||
获取 bdstoken,首次调用会请求 API 获取并缓存。
|
||||
|
||||
API: GET /api/gettemplatevariable?fields=["bdstoken"]
|
||||
|
||||
Raises:
|
||||
TransferError: 获取失败 (BAIDU_BDSTOKEN_FAIL)
|
||||
"""
|
||||
if self._bdstoken and not force_refresh:
|
||||
return self._bdstoken
|
||||
|
||||
url = f"{BAIDU_PAN_API}/api/gettemplatevariable"
|
||||
params = {"fields": '["bdstoken"]'}
|
||||
headers = self.get_headers()
|
||||
|
||||
try:
|
||||
resp = self.session.get(url, params=params, headers=headers, timeout=15)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
except Exception as e:
|
||||
logger.error(f"获取 bdstoken 网络异常: {e}")
|
||||
raise TransferError(
|
||||
TransferErrorCode.BAIDU_BDSTOKEN_FAIL,
|
||||
message=f"百度 bdstoken 请求失败: {e}",
|
||||
platform="baidu",
|
||||
)
|
||||
|
||||
errno = data.get("errno", -1)
|
||||
if errno != 0:
|
||||
logger.error(f"获取 bdstoken API 返回 errno={errno}: {data}")
|
||||
raise TransferError(
|
||||
TransferErrorCode.BAIDU_BDSTOKEN_FAIL,
|
||||
message=f"百度 bdstoken 获取失败 (errno={errno})",
|
||||
platform="baidu",
|
||||
details={"response": data},
|
||||
)
|
||||
|
||||
self._bdstoken = data.get("result", {}).get("bdstoken", "")
|
||||
if not self._bdstoken:
|
||||
raise TransferError(
|
||||
TransferErrorCode.BAIDU_BDSTOKEN_FAIL,
|
||||
message="百度 bdstoken 为空",
|
||||
platform="baidu",
|
||||
)
|
||||
|
||||
logger.info("bdstoken 获取成功")
|
||||
return self._bdstoken
|
||||
|
||||
def get_headers(self) -> dict:
|
||||
"""构建携带 Cookie 的请求头(继承 session 默认 headers 外的额外字段)"""
|
||||
headers = {
|
||||
"Cookie": self.cookie,
|
||||
"Referer": "https://pan.baidu.com/",
|
||||
"Origin": "https://pan.baidu.com",
|
||||
}
|
||||
return headers
|
||||
|
||||
def invalidate_bdstoken(self):
|
||||
"""使缓存失效,下次调用 get_bdstoken 会重新获取"""
|
||||
self._bdstoken = ""
|
||||
logger.info("bdstoken 缓存已失效")
|
||||
448
cloudsearch_transfer/adapter/baidu/transfer.py
Normal file
448
cloudsearch_transfer/adapter/baidu/transfer.py
Normal file
@@ -0,0 +1,448 @@
|
||||
"""
|
||||
百度网盘转存核心 — 5 步转存流程
|
||||
参考 netdisk 的 PanbaiduSave + cloud-auto-save 的 BaiduNetDisk.transfer
|
||||
|
||||
流程:
|
||||
① 验证提取码 → POST /share/verify
|
||||
② 解析分享页 → GET /s/1{surl}
|
||||
③ 转存文件 → POST /share/transfer
|
||||
④ 列出目录 → GET /api/list
|
||||
⑤ 创建分享 → POST /share/set
|
||||
"""
|
||||
|
||||
import re
|
||||
import json
|
||||
import logging
|
||||
from typing import List, Tuple
|
||||
|
||||
import requests
|
||||
|
||||
from ...errors import TransferError, TransferErrorCode
|
||||
from .credential import BaiduCredentialManager, BAIDU_PAN_API
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ─── 正则 ──────────────────────────────────────────────────
|
||||
|
||||
# 从 HTML 中提取 shareid
|
||||
RE_SHAREID = re.compile(r"""shareid["\s:=]+(\d+)""")
|
||||
# 从 HTML 中提取 uk
|
||||
RE_UK = re.compile(r"""uk["\s:=]+(\d+)""")
|
||||
# 从 HTML 中提取 fs_id
|
||||
RE_FS_ID = re.compile(r'"fs_id"\s*:\s*(\d+)')
|
||||
# 从 HTML 中提取 server_filename
|
||||
RE_FILENAME = re.compile(r'"server_filename"\s*:\s*"([^"]*)"')
|
||||
# 从 HTML/JSON 中提取标题
|
||||
RE_TITLE = re.compile(r'"title"\s*:\s*"([^"]*)"')
|
||||
# 从 HTML 中提取文件列表 JSON 块 (file_list 对象) — 标记位置
|
||||
RE_FILE_LIST_MARK = re.compile(r'"file_list"\s*:\s*(\{)', re.DOTALL)
|
||||
# 提取单个文件条目 (fallback)
|
||||
RE_FILE_ENTRY = re.compile(r'\{"fs_id":(\d+),"server_filename":"([^"]+)"')
|
||||
|
||||
|
||||
class BaiduTransfer:
|
||||
"""百度网盘 5 步转存执行器
|
||||
|
||||
每个实例绑定一个 Session + Cookie + bdstoken,
|
||||
执行完整的「验证→解析→转存→查目录→创建分享」流程。
|
||||
"""
|
||||
|
||||
def __init__(self, session: requests.Session,
|
||||
credential: BaiduCredentialManager):
|
||||
self.session = session
|
||||
self.credential = credential
|
||||
self.cookie = credential.cookie
|
||||
|
||||
# ─── 5 步主流程 ────────────────────────────────────────
|
||||
|
||||
def execute(self, surl: str, password: str,
|
||||
save_dir: str = "/") -> Tuple[List[str], dict]:
|
||||
"""执行完整的 5 步转存流程
|
||||
|
||||
Args:
|
||||
surl: 分享短码 (s/1 后面的部分)
|
||||
password: 提取码
|
||||
save_dir: 转存目标目录
|
||||
|
||||
Returns:
|
||||
(new_fs_ids, file_info_dict)
|
||||
new_fs_ids: 转存后的文件 fs_id 列表
|
||||
file_info_dict: {fs_id: name} 映射
|
||||
|
||||
Raises:
|
||||
TransferError: 任何一步失败
|
||||
"""
|
||||
bdstoken = self.credential.get_bdstoken()
|
||||
|
||||
# ① 验证提取码
|
||||
logger.info(f"[百度转存] ① 验证提取码 surl={surl}")
|
||||
self._verify_password(surl, password, bdstoken)
|
||||
|
||||
# ② 解析分享页
|
||||
logger.info(f"[百度转存] ② 解析分享页 surl={surl}")
|
||||
share_info = self._parse_share_page(surl)
|
||||
shareid = share_info["shareid"]
|
||||
uk = share_info["uk"]
|
||||
fs_ids = share_info["fs_ids"]
|
||||
filenames = share_info["filenames"]
|
||||
title = share_info.get("title", "")
|
||||
|
||||
if not fs_ids:
|
||||
raise TransferError(
|
||||
TransferErrorCode.RESOURCE_EMPTY,
|
||||
message="分享中没有找到可转存的文件",
|
||||
platform="baidu",
|
||||
)
|
||||
|
||||
# ③ 转存到自己的网盘
|
||||
logger.info(f"[百度转存] ③ 转存 {len(fs_ids)} 个文件到 {save_dir}")
|
||||
self._transfer_files(shareid, uk, fs_ids, save_dir, bdstoken)
|
||||
|
||||
# ④ 列出目标目录,按文件名匹配新的 fs_id
|
||||
logger.info(f"[百度转存] ④ 列出目录 {save_dir} 匹配新 fs_id")
|
||||
new_fs_ids = self._list_and_match(save_dir, filenames, bdstoken)
|
||||
|
||||
if not new_fs_ids:
|
||||
raise TransferError(
|
||||
TransferErrorCode.NETWORK_ERROR,
|
||||
message="转存后无法匹配到新文件 ID",
|
||||
platform="baidu",
|
||||
)
|
||||
|
||||
# 构建返回的 info dict
|
||||
file_info = {}
|
||||
for name, fid in zip(filenames, new_fs_ids) if len(filenames) == len(new_fs_ids) else []:
|
||||
file_info[fid] = name
|
||||
if not file_info:
|
||||
for fid in new_fs_ids:
|
||||
file_info[fid] = title or fid
|
||||
|
||||
return new_fs_ids, file_info
|
||||
|
||||
def create_share(self, fids: List[int], password: str = "",
|
||||
period: int = 0) -> Tuple[str, str]:
|
||||
"""⑤ 创建新分享
|
||||
|
||||
Args:
|
||||
fids: 转存后的文件 fs_id 列表
|
||||
password: 分享密码(空 = 无密码)
|
||||
period: 分享有效期 (0=永久)
|
||||
|
||||
Returns:
|
||||
(share_url, share_password)
|
||||
"""
|
||||
bdstoken = self.credential.get_bdstoken()
|
||||
url = f"{BAIDU_PAN_API}/share/set"
|
||||
params = {
|
||||
"channel": "chunlei",
|
||||
"clienttype": "0",
|
||||
"web": "1",
|
||||
"bdstoken": bdstoken,
|
||||
}
|
||||
data = {
|
||||
"fid_list": json.dumps(fids),
|
||||
"period": period,
|
||||
"pwd": password,
|
||||
}
|
||||
headers = self.credential.get_headers()
|
||||
|
||||
try:
|
||||
resp = self.session.post(
|
||||
url, params=params, data=data, headers=headers, timeout=30
|
||||
)
|
||||
resp.raise_for_status()
|
||||
except Exception as e:
|
||||
raise TransferError(
|
||||
TransferErrorCode.NETWORK_ERROR,
|
||||
message=f"创建分享请求失败: {e}",
|
||||
platform="baidu",
|
||||
)
|
||||
|
||||
result = resp.json()
|
||||
errno = result.get("errno", -1)
|
||||
|
||||
if errno == 9219:
|
||||
raise TransferError(
|
||||
TransferErrorCode.SHARE_LIMIT,
|
||||
message="百度今日分享次数过多",
|
||||
platform="baidu",
|
||||
)
|
||||
if errno != 0:
|
||||
raise TransferError(
|
||||
TransferErrorCode.SHARE_LINK_FAIL,
|
||||
message=f"创建分享失败 (errno={errno})",
|
||||
platform="baidu",
|
||||
details=result,
|
||||
)
|
||||
|
||||
share_url = result.get("link", "")
|
||||
share_password = result.get("pwd", password) or password
|
||||
|
||||
logger.info(f"[百度转存] ⑤ 分享创建成功: {share_url}")
|
||||
return share_url, share_password
|
||||
|
||||
# ─── 5 步内部方法 ──────────────────────────────────────
|
||||
|
||||
def _verify_password(self, surl: str, password: str, bdstoken: str):
|
||||
"""① 验证提取码
|
||||
|
||||
POST /share/verify?surl={surl}&bdstoken={bdstoken}
|
||||
Body: {"pwd": "xxxx"}
|
||||
|
||||
errno=0 表示通过;errno=-9 表示提取码错误;errno=2 表示分享不存在
|
||||
"""
|
||||
url = f"{BAIDU_PAN_API}/share/verify"
|
||||
params = {
|
||||
"surl": surl,
|
||||
"bdstoken": bdstoken,
|
||||
}
|
||||
data = {"pwd": password}
|
||||
headers = self.credential.get_headers()
|
||||
headers["Content-Type"] = "application/x-www-form-urlencoded"
|
||||
|
||||
try:
|
||||
resp = self.session.post(
|
||||
url, params=params, data=data, headers=headers, timeout=15
|
||||
)
|
||||
resp.raise_for_status()
|
||||
except Exception as e:
|
||||
raise TransferError(
|
||||
TransferErrorCode.NETWORK_ERROR,
|
||||
message=f"验证提取码请求失败: {e}",
|
||||
platform="baidu",
|
||||
)
|
||||
|
||||
result = resp.json()
|
||||
errno = result.get("errno", -1)
|
||||
|
||||
if errno == 0:
|
||||
logger.info("提取码验证通过")
|
||||
return
|
||||
|
||||
if errno == -9 or errno == -62:
|
||||
raise TransferError(
|
||||
TransferErrorCode.PASSCODE_WRONG,
|
||||
message="百度提取码错误",
|
||||
platform="baidu",
|
||||
)
|
||||
if errno == 2 or errno == 118:
|
||||
raise TransferError(
|
||||
TransferErrorCode.SHARE_NOT_EXIST,
|
||||
message="百度分享不存在或已失效",
|
||||
platform="baidu",
|
||||
)
|
||||
raise TransferError(
|
||||
TransferErrorCode.NETWORK_ERROR,
|
||||
message=f"验证提取码失败 (errno={errno})",
|
||||
platform="baidu",
|
||||
details=result,
|
||||
)
|
||||
|
||||
def _parse_share_page(self, surl: str) -> dict:
|
||||
"""② 解析分享页面 HTML
|
||||
|
||||
GET /s/1{surl}
|
||||
从 HTML 中正则提取 shareid, uk, fs_id[], server_filename[]
|
||||
"""
|
||||
url = f"{BAIDU_PAN_API}/s/1{surl}"
|
||||
headers = self.credential.get_headers()
|
||||
|
||||
try:
|
||||
resp = self.session.get(url, headers=headers, timeout=20)
|
||||
resp.raise_for_status()
|
||||
html = resp.text
|
||||
except Exception as e:
|
||||
raise TransferError(
|
||||
TransferErrorCode.NETWORK_ERROR,
|
||||
message=f"打开分享页面失败: {e}",
|
||||
platform="baidu",
|
||||
)
|
||||
|
||||
# 提取 shareid
|
||||
m_shareid = RE_SHAREID.search(html)
|
||||
if not m_shareid:
|
||||
raise TransferError(
|
||||
TransferErrorCode.SHARE_NOT_EXIST,
|
||||
message="无法从页面中提取 shareid,分享可能已失效",
|
||||
platform="baidu",
|
||||
)
|
||||
shareid = m_shareid.group(1)
|
||||
|
||||
# 提取 uk
|
||||
m_uk = RE_UK.search(html)
|
||||
uk = m_uk.group(1) if m_uk else ""
|
||||
|
||||
# 提取标题
|
||||
m_title = RE_TITLE.search(html)
|
||||
title = m_title.group(1) if m_title else ""
|
||||
|
||||
# 提取文件列表 — 优先从 file_list JSON 块中提取
|
||||
fs_ids = []
|
||||
filenames = []
|
||||
|
||||
# 方法1:查找 file_list JSON 块(使用括号计数提取平衡 JSON)
|
||||
m_fl = RE_FILE_LIST_MARK.search(html)
|
||||
if m_fl:
|
||||
start = m_fl.start(1) # { 的位置
|
||||
depth = 1
|
||||
end = start + 1
|
||||
while end < len(html) and depth > 0:
|
||||
if html[end] == '{':
|
||||
depth += 1
|
||||
elif html[end] == '}':
|
||||
depth -= 1
|
||||
end += 1
|
||||
file_list_json = html[start:end]
|
||||
try:
|
||||
file_list = json.loads(file_list_json)
|
||||
for entry in file_list.get("list", []):
|
||||
fs_ids.append(str(entry.get("fs_id", "")))
|
||||
filenames.append(entry.get("server_filename", ""))
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# 方法2:退化为正则提取所有 fs_id + server_filename
|
||||
if not fs_ids:
|
||||
for m in RE_FILE_ENTRY.finditer(html):
|
||||
fs_ids.append(m.group(1))
|
||||
filenames.append(m.group(2))
|
||||
|
||||
if not fs_ids:
|
||||
# 可能只有一个文件,尝试单个提取
|
||||
m_fsid = RE_FS_ID.search(html)
|
||||
m_name = RE_FILENAME.search(html)
|
||||
if m_fsid:
|
||||
fs_ids.append(m_fsid.group(1))
|
||||
filenames.append(m_name.group(1) if m_name else "")
|
||||
|
||||
logger.info(
|
||||
f"解析分享页: shareid={shareid}, uk={uk}, "
|
||||
f"文件数={len(fs_ids)}, title={title[:30]}"
|
||||
)
|
||||
return {
|
||||
"shareid": shareid,
|
||||
"uk": uk,
|
||||
"fs_ids": fs_ids,
|
||||
"filenames": filenames,
|
||||
"title": title,
|
||||
}
|
||||
|
||||
def _transfer_files(self, shareid: str, uk: str,
|
||||
fs_ids: List[str], save_dir: str, bdstoken: str):
|
||||
"""③ 转存文件到自己的网盘
|
||||
|
||||
POST /share/transfer?shareid={shareid}&from={uk}&bdstoken={bdstoken}
|
||||
Body: fsidlist=[1,2,3]&path=/dir
|
||||
"""
|
||||
url = f"{BAIDU_PAN_API}/share/transfer"
|
||||
params = {
|
||||
"shareid": shareid,
|
||||
"from": uk,
|
||||
"bdstoken": bdstoken,
|
||||
}
|
||||
data = {
|
||||
"fsidlist": json.dumps([int(x) for x in fs_ids]),
|
||||
"path": save_dir,
|
||||
}
|
||||
headers = self.credential.get_headers()
|
||||
headers["Content-Type"] = "application/x-www-form-urlencoded"
|
||||
|
||||
try:
|
||||
resp = self.session.post(
|
||||
url, params=params, data=data, headers=headers, timeout=30
|
||||
)
|
||||
resp.raise_for_status()
|
||||
except Exception as e:
|
||||
raise TransferError(
|
||||
TransferErrorCode.NETWORK_ERROR,
|
||||
message=f"转存请求失败: {e}",
|
||||
platform="baidu",
|
||||
)
|
||||
|
||||
result = resp.json()
|
||||
errno = result.get("errno", -1)
|
||||
|
||||
if errno == 0:
|
||||
logger.info(f"转存成功: {len(fs_ids)} 个文件 → {save_dir}")
|
||||
return
|
||||
|
||||
if errno == 12:
|
||||
raise TransferError(
|
||||
TransferErrorCode.CAPACITY_FULL,
|
||||
message="百度网盘空间不足",
|
||||
platform="baidu",
|
||||
)
|
||||
if errno == 9013:
|
||||
raise TransferError(
|
||||
TransferErrorCode.SENSITIVE_RESOURCE,
|
||||
message="文件包含违规内容,无法转存",
|
||||
platform="baidu",
|
||||
)
|
||||
raise TransferError(
|
||||
TransferErrorCode.NETWORK_ERROR,
|
||||
message=f"转存失败 (errno={errno})",
|
||||
platform="baidu",
|
||||
details=result,
|
||||
)
|
||||
|
||||
def _list_and_match(self, save_dir: str, filenames: List[str],
|
||||
bdstoken: str) -> List[str]:
|
||||
"""④ 列出目标目录,按文件名匹配新的 fs_id
|
||||
|
||||
GET /api/list?dir={dir}&bdstoken={bdstoken}
|
||||
从返回的 list 中按 server_filename 匹配,返回按原顺序排列的 fs_id 列表
|
||||
"""
|
||||
url = f"{BAIDU_PAN_API}/api/list"
|
||||
params = {
|
||||
"dir": save_dir,
|
||||
"bdstoken": bdstoken,
|
||||
}
|
||||
headers = self.credential.get_headers()
|
||||
|
||||
try:
|
||||
resp = self.session.get(url, params=params, headers=headers, timeout=15)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
except Exception as e:
|
||||
raise TransferError(
|
||||
TransferErrorCode.NETWORK_ERROR,
|
||||
message=f"列出目录失败: {e}",
|
||||
platform="baidu",
|
||||
)
|
||||
|
||||
errno = data.get("errno", -1)
|
||||
if errno == -12:
|
||||
raise TransferError(
|
||||
TransferErrorCode.DIR_NOT_EXIST,
|
||||
message=f"百度目录不存在: {save_dir}",
|
||||
platform="baidu",
|
||||
)
|
||||
if errno != 0:
|
||||
raise TransferError(
|
||||
TransferErrorCode.NETWORK_ERROR,
|
||||
message=f"列出目录失败 (errno={errno})",
|
||||
platform="baidu",
|
||||
details=data,
|
||||
)
|
||||
|
||||
file_list = data.get("list", [])
|
||||
# 构建文件名 → fs_id 映射
|
||||
name_to_fid = {}
|
||||
for item in file_list:
|
||||
name = item.get("server_filename", "")
|
||||
fid = str(item.get("fs_id", ""))
|
||||
if name and fid:
|
||||
name_to_fid[name] = fid
|
||||
|
||||
# 按原文件名顺序匹配
|
||||
new_fs_ids = []
|
||||
for fname in filenames:
|
||||
if fname in name_to_fid:
|
||||
new_fs_ids.append(name_to_fid[fname])
|
||||
else:
|
||||
logger.warning(f"目录中未找到文件: {fname}")
|
||||
|
||||
logger.info(
|
||||
f"目录匹配: 期望 {len(filenames)} 个, 匹配到 {len(new_fs_ids)} 个"
|
||||
)
|
||||
return new_fs_ids
|
||||
330
cloudsearch_transfer/adapter/base.py
Normal file
330
cloudsearch_transfer/adapter/base.py
Normal file
@@ -0,0 +1,330 @@
|
||||
"""
|
||||
CloudSearch Transfer — 适配器抽象基类 v1.0.0
|
||||
参考 cloud-auto-save 的 BaseCloudDriveAdapter + netdisk 的 Pan 接口
|
||||
"""
|
||||
|
||||
import time
|
||||
import re
|
||||
import logging
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional, List, Tuple, Dict, Any
|
||||
from urllib.parse import urlparse, parse_qs
|
||||
|
||||
import requests
|
||||
|
||||
from ..config import PlatformConfig, TransferConfig
|
||||
from ..errors import TransferError, TransferErrorCode
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class FileInfo:
|
||||
"""文件信息"""
|
||||
fid: str # 文件ID
|
||||
name: str # 文件名
|
||||
size: int = 0 # 文件大小
|
||||
is_dir: bool = False
|
||||
ext: str = "" # 扩展名
|
||||
|
||||
|
||||
@dataclass
|
||||
class TransferResult:
|
||||
"""转存结果"""
|
||||
success: bool
|
||||
platform: str
|
||||
new_file_id: str = "" # 转存后的文件ID
|
||||
file_name: str = "" # 文件名
|
||||
share_url: str = "" # 新的分享链接
|
||||
share_password: str = "" # 分享密码
|
||||
original_url: str = "" # 原始分享链接
|
||||
elapsed_ms: int = 0 # 耗时
|
||||
error: Optional[TransferError] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class VerifyResult:
|
||||
"""链接验证结果"""
|
||||
valid: bool
|
||||
platform: str
|
||||
title: str = ""
|
||||
file_count: int = 0
|
||||
files: List[FileInfo] = None
|
||||
error: Optional[TransferError] = None
|
||||
|
||||
def __post_init__(self):
|
||||
if self.files is None:
|
||||
self.files = []
|
||||
|
||||
|
||||
class BaseCloudDriveAdapter(ABC):
|
||||
"""
|
||||
网盘适配器抽象基类
|
||||
|
||||
每个网盘平台实现此基类,统一接口:
|
||||
- transfer(): 转存分享到自己网盘 → 创建新分享
|
||||
- verify(): 验证分享链接有效性
|
||||
- get_files(): 列出目录文件
|
||||
- delete(): 删除文件
|
||||
"""
|
||||
|
||||
# 子类必须覆盖
|
||||
PLATFORM_NAME: str = ""
|
||||
PLATFORM_KEY: str = "" # quark/baidu/aliyun/uc/xunlei/pan123/cloud189
|
||||
|
||||
# URL匹配正则(子类覆盖)
|
||||
URL_PATTERNS: List[str] = []
|
||||
|
||||
# 默认请求头
|
||||
DEFAULT_HEADERS: Dict[str, str] = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/135.0.0.0 Safari/537.36",
|
||||
"Accept": "application/json, text/plain, */*",
|
||||
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
|
||||
}
|
||||
|
||||
def __init__(self, config: PlatformConfig, transfer_config: TransferConfig):
|
||||
self.config = config
|
||||
self.transfer_config = transfer_config
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update(self.DEFAULT_HEADERS)
|
||||
self._setup_session()
|
||||
|
||||
def _setup_session(self):
|
||||
"""子类可覆盖,初始化session特有的headers/cookies"""
|
||||
pass
|
||||
|
||||
# ─── 公开接口 ──────────────────────────────────────────
|
||||
|
||||
def transfer(self, share_url: str, save_dir: str = "",
|
||||
share_password: str = "") -> TransferResult:
|
||||
"""
|
||||
转存分享到自己网盘 → 创建新分享
|
||||
|
||||
Args:
|
||||
share_url: 原始分享链接
|
||||
save_dir: 转存到的目录(空=使用配置的默认目录)
|
||||
share_password: 新分享的密码(空=使用配置的密码)
|
||||
"""
|
||||
start = time.time()
|
||||
try:
|
||||
# 1. 解析URL提取pwd_id
|
||||
pwd_id, passcode = self._parse_share_url(share_url)
|
||||
|
||||
# 2. 获取分享详情
|
||||
detail = self._get_share_detail(pwd_id, passcode)
|
||||
if not detail:
|
||||
raise TransferError(TransferErrorCode.SHARE_NOT_EXIST,
|
||||
platform=self.PLATFORM_KEY)
|
||||
|
||||
# 3. 执行转存
|
||||
save_dir = save_dir or self.config.save_dir or "/"
|
||||
new_fids = self._save_files(pwd_id, detail, save_dir)
|
||||
if not new_fids:
|
||||
raise TransferError(TransferErrorCode.RESOURCE_EMPTY,
|
||||
platform=self.PLATFORM_KEY)
|
||||
|
||||
# 4. 广告过滤
|
||||
if self.transfer_config.ad_filter_enabled:
|
||||
new_fids = self._filter_ads(new_fids)
|
||||
if not new_fids:
|
||||
raise TransferError(TransferErrorCode.RESOURCE_EMPTY,
|
||||
platform=self.PLATFORM_KEY)
|
||||
|
||||
# 5. 创建新分享
|
||||
pwd = share_password or self.config.share_password
|
||||
share_url_new, share_pwd = self._create_share(new_fids, detail.get("title", ""), pwd)
|
||||
|
||||
elapsed = int((time.time() - start) * 1000)
|
||||
return TransferResult(
|
||||
success=True,
|
||||
platform=self.PLATFORM_KEY,
|
||||
new_file_id=",".join(new_fids),
|
||||
file_name=detail.get("title", ""),
|
||||
share_url=share_url_new,
|
||||
share_password=share_pwd,
|
||||
original_url=share_url,
|
||||
elapsed_ms=elapsed,
|
||||
)
|
||||
|
||||
except TransferError:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.exception(f"[{self.PLATFORM_KEY}] transfer failed: {share_url}")
|
||||
raise TransferError(TransferErrorCode.NETWORK_ERROR,
|
||||
message=str(e), platform=self.PLATFORM_KEY)
|
||||
|
||||
def verify(self, share_url: str) -> VerifyResult:
|
||||
"""验证分享链接有效性"""
|
||||
try:
|
||||
pwd_id, passcode = self._parse_share_url(share_url)
|
||||
detail = self._get_share_detail(pwd_id, passcode)
|
||||
files = self._extract_file_list(detail)
|
||||
return VerifyResult(
|
||||
valid=True,
|
||||
platform=self.PLATFORM_KEY,
|
||||
title=detail.get("title", ""),
|
||||
file_count=len(files),
|
||||
files=files,
|
||||
)
|
||||
except TransferError as e:
|
||||
return VerifyResult(valid=False, platform=self.PLATFORM_KEY, error=e)
|
||||
except Exception as e:
|
||||
return VerifyResult(
|
||||
valid=False,
|
||||
platform=self.PLATFORM_KEY,
|
||||
error=TransferError(TransferErrorCode.NETWORK_ERROR, message=str(e)),
|
||||
)
|
||||
|
||||
@abstractmethod
|
||||
def get_files(self, parent_fid: str = "0") -> List[FileInfo]:
|
||||
"""列出目录下的文件"""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def delete(self, file_ids: List[str]) -> bool:
|
||||
"""删除文件"""
|
||||
...
|
||||
|
||||
# ─── URL解析 ──────────────────────────────────────────
|
||||
|
||||
def _parse_share_url(self, url: str) -> Tuple[str, str]:
|
||||
"""
|
||||
解析分享URL → (pwd_id, passcode)
|
||||
子类可覆盖
|
||||
"""
|
||||
for pattern in self.URL_PATTERNS:
|
||||
m = re.search(pattern, url)
|
||||
if m:
|
||||
pwd_id = m.group(1)
|
||||
passcode = ""
|
||||
# 尝试从URL参数提取密码
|
||||
parsed = urlparse(url)
|
||||
params = parse_qs(parsed.query)
|
||||
passcode = params.get("pwd", params.get("code", [""]))[0]
|
||||
return pwd_id, passcode
|
||||
|
||||
raise TransferError(TransferErrorCode.URL_INVALID,
|
||||
message=f"无法解析{self.PLATFORM_NAME}链接: {url}")
|
||||
|
||||
# ─── 核心抽象方法(子类必须实现)────────────────────────
|
||||
|
||||
@abstractmethod
|
||||
def _get_share_detail(self, pwd_id: str, passcode: str = "") -> dict:
|
||||
"""获取分享详情 → {title, fid/fs_id, ...}"""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def _save_files(self, pwd_id: str, detail: dict, save_dir: str) -> List[str]:
|
||||
"""转存文件 → 返回新文件ID列表"""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def _create_share(self, file_ids: List[str], title: str,
|
||||
password: str = "") -> Tuple[str, str]:
|
||||
"""创建分享 → (share_url, share_password)"""
|
||||
...
|
||||
|
||||
def _extract_file_list(self, detail: dict) -> List[FileInfo]:
|
||||
"""从分享详情提取文件列表(默认实现,子类可覆盖)"""
|
||||
return []
|
||||
|
||||
def _filter_ads(self, file_ids: List[str]) -> List[str]:
|
||||
"""广告过滤(默认不实现,子类可覆盖)"""
|
||||
return file_ids
|
||||
|
||||
# ─── HTTP 工具方法 ─────────────────────────────────────
|
||||
|
||||
def _get(self, url: str, params: dict = None, headers: dict = None,
|
||||
retry: int = None) -> requests.Response:
|
||||
return self._request("GET", url, params=params, headers=headers, retry=retry)
|
||||
|
||||
def _post(self, url: str, json_data: dict = None, data: dict = None,
|
||||
params: dict = None, headers: dict = None, retry: int = None) -> requests.Response:
|
||||
return self._request("POST", url, json=json_data, data=data,
|
||||
params=params, headers=headers, retry=retry)
|
||||
|
||||
def _request(self, method: str, url: str, **kwargs) -> requests.Response:
|
||||
"""统一HTTP请求,带重试"""
|
||||
retry = kwargs.pop("retry", None)
|
||||
max_retries = retry if retry is not None else self.transfer_config.max_retries
|
||||
|
||||
last_exc = None
|
||||
for attempt in range(max_retries + 1):
|
||||
try:
|
||||
resp = self.session.request(
|
||||
method, url,
|
||||
timeout=self.transfer_config.request_timeout,
|
||||
**kwargs
|
||||
)
|
||||
return resp
|
||||
except requests.RequestException as e:
|
||||
last_exc = e
|
||||
if attempt < max_retries:
|
||||
delay = self.transfer_config.retry_delay * (2 ** attempt)
|
||||
logger.warning(f"[{self.PLATFORM_KEY}] HTTP retry {attempt+1}/{max_retries} "
|
||||
f"after {delay:.1f}s: {url}")
|
||||
time.sleep(delay)
|
||||
|
||||
raise TransferError(TransferErrorCode.NETWORK_ERROR,
|
||||
message=str(last_exc), platform=self.PLATFORM_KEY)
|
||||
|
||||
def _poll_task(self, task_url: str, task_id: str,
|
||||
status_field: str = "status",
|
||||
success_value: Any = 2,
|
||||
result_path: str = None,
|
||||
query_params: dict = None) -> dict:
|
||||
"""
|
||||
轮询异步任务直到完成
|
||||
参考 netdisk 的任务轮询机制
|
||||
"""
|
||||
interval = self.transfer_config.task_poll_interval
|
||||
max_attempts = self.transfer_config.task_poll_max_attempts
|
||||
max_wait = self.transfer_config.task_poll_max_wait
|
||||
started = time.time()
|
||||
|
||||
for attempt in range(max_attempts):
|
||||
if time.time() - started > max_wait:
|
||||
raise TransferError(TransferErrorCode.TIMEOUT,
|
||||
platform=self.PLATFORM_KEY,
|
||||
details={"task_id": task_id})
|
||||
|
||||
try:
|
||||
params = query_params or {}
|
||||
params["task_id"] = task_id
|
||||
resp = self._get(task_url, params=params, retry=1)
|
||||
data = resp.json().get("data", resp.json())
|
||||
|
||||
current_status = data.get(status_field)
|
||||
if current_status == success_value:
|
||||
if result_path:
|
||||
# 支持点号路径如 "save_as.save_as_top_fids"
|
||||
for key in result_path.split("."):
|
||||
data = data.get(key, {}) if isinstance(data, dict) else data
|
||||
return data
|
||||
|
||||
if current_status is False or current_status == -1:
|
||||
raise TransferError(TransferErrorCode.NETWORK_ERROR,
|
||||
message=f"任务失败: {data}",
|
||||
platform=self.PLATFORM_KEY)
|
||||
|
||||
except (requests.RequestException, ValueError):
|
||||
pass
|
||||
|
||||
time.sleep(interval)
|
||||
|
||||
raise TransferError(TransferErrorCode.TIMEOUT,
|
||||
platform=self.PLATFORM_KEY,
|
||||
details={"task_id": task_id, "attempts": max_attempts})
|
||||
|
||||
|
||||
# ─── 工厂函数(adapter/factory.py 使用)───────────────────
|
||||
|
||||
def match_url(url: str, adapter_cls: type) -> bool:
|
||||
"""URL是否匹配某个适配器"""
|
||||
for pattern in adapter_cls.URL_PATTERNS:
|
||||
if re.search(pattern, url):
|
||||
return True
|
||||
return False
|
||||
45
cloudsearch_transfer/adapter/cloud189/__init__.py
Normal file
45
cloudsearch_transfer/adapter/cloud189/__init__.py
Normal file
@@ -0,0 +1,45 @@
|
||||
"""天翼云盘适配器 v1.0.0"""
|
||||
|
||||
from ..base import BaseCloudDriveAdapter, FileInfo, TransferResult, VerifyResult
|
||||
from ...errors import TransferError, TransferErrorCode
|
||||
from .credential import Cloud189CredentialManager
|
||||
from .transfer import Cloud189Transfer
|
||||
from .cleanup import Cloud189Cleanup
|
||||
|
||||
|
||||
class Cloud189Adapter(BaseCloudDriveAdapter):
|
||||
PLATFORM_NAME = "天翼云盘"
|
||||
PLATFORM_KEY = "cloud189"
|
||||
URL_PATTERNS = [r"cloud\.189\.cn/t/([A-Za-z0-9]+)"]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self._cred = Cloud189CredentialManager(self.config)
|
||||
self._transfer_engine = None
|
||||
self._cln = Cloud189Cleanup()
|
||||
|
||||
def _setup_session(self):
|
||||
if self._cred:
|
||||
self._cred.login_if_needed(self.session)
|
||||
|
||||
@property
|
||||
def _transfer(self):
|
||||
if self._transfer_engine is None:
|
||||
self._transfer_engine = Cloud189Transfer(
|
||||
self.session, self._cred, self.config, self.transfer_config)
|
||||
return self._transfer_engine
|
||||
|
||||
def _get_share_detail(self, pwd_id, passcode=""):
|
||||
return self._transfer.get_share_info(pwd_id, passcode)
|
||||
|
||||
def _save_files(self, pwd_id, detail, save_dir):
|
||||
return self._transfer.save_files(pwd_id, detail, save_dir)
|
||||
|
||||
def _create_share(self, file_ids, title, password=""):
|
||||
return self._transfer.create_share(file_ids, title, password)
|
||||
|
||||
def get_files(self, parent_fid="-11"):
|
||||
return self._transfer.list_files(parent_fid)
|
||||
|
||||
def delete(self, file_ids):
|
||||
return self._cln.delete_files(self.session, self._cred, file_ids)
|
||||
26
cloudsearch_transfer/adapter/cloud189/cleanup.py
Normal file
26
cloudsearch_transfer/adapter/cloud189/cleanup.py
Normal file
@@ -0,0 +1,26 @@
|
||||
"""天翼云盘数据清理 v1.0.0"""
|
||||
|
||||
import logging
|
||||
from typing import List
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Cloud189Cleanup:
|
||||
API_BASE = "https://cloud.189.cn/api/open/file"
|
||||
|
||||
def delete_files(self, session, credential_mgr, file_ids: List[str]) -> bool:
|
||||
try:
|
||||
resp = session.post(
|
||||
f"{self.API_BASE}/deleteFiles.action",
|
||||
data={"fileIdList": ",".join(file_ids)},
|
||||
timeout=30,
|
||||
)
|
||||
return resp.json().get("res_code") == 0
|
||||
except Exception as e:
|
||||
logger.error(f"189 delete failed: {e}")
|
||||
return False
|
||||
|
||||
def filter_ad_ids(self, file_ids: List[str], file_names: List[str],
|
||||
banned_keywords: List[str]) -> List[str]:
|
||||
return file_ids
|
||||
64
cloudsearch_transfer/adapter/cloud189/credential.py
Normal file
64
cloudsearch_transfer/adapter/cloud189/credential.py
Normal file
@@ -0,0 +1,64 @@
|
||||
"""天翼云盘凭证管理 v1.0.0 — Cookie + 账号密码双模式"""
|
||||
|
||||
import re
|
||||
import base64
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Cloud189CredentialManager:
|
||||
LOGIN_URL = "https://cloud.189.cn/api/portal/loginUrl.action"
|
||||
SSO_URL = "https://open.e.189.cn/api/logbox/oauth2/ssoLogin.action"
|
||||
|
||||
def __init__(self, config):
|
||||
self.config = config
|
||||
self._cookie: Optional[str] = None
|
||||
|
||||
def validate(self) -> bool:
|
||||
if self.config.cookie:
|
||||
return len(self.config.cookie) >= 30
|
||||
extra = self.config.extra or {}
|
||||
return bool(extra.get("username") and extra.get("password"))
|
||||
|
||||
def get_headers(self) -> dict:
|
||||
return {
|
||||
"Cookie": self._cookie or self.config.cookie,
|
||||
"Referer": "https://cloud.189.cn/",
|
||||
}
|
||||
|
||||
def login_if_needed(self, session) -> bool:
|
||||
"""如需账号密码登录,在此执行"""
|
||||
if self.config.cookie:
|
||||
self._cookie = self.config.cookie
|
||||
return True
|
||||
extra = self.config.extra or {}
|
||||
username = extra.get("username", "")
|
||||
password = extra.get("password", "")
|
||||
if not username or not password:
|
||||
return False
|
||||
try:
|
||||
logger.info("Attempting 189 cloud login...")
|
||||
resp = session.get(self.LOGIN_URL, timeout=30)
|
||||
data = resp.json()
|
||||
login_url = data.get("toUrl", "")
|
||||
session.cookies.clear()
|
||||
sso_resp = session.post(
|
||||
self.SSO_URL,
|
||||
data={"account": username, "password": password,
|
||||
"appKey": "cloud", "returnUrl": login_url},
|
||||
timeout=30,
|
||||
)
|
||||
sso_data = sso_resp.json()
|
||||
redirect_url = sso_data.get("toUrl", "")
|
||||
if redirect_url:
|
||||
session.get(redirect_url, timeout=30)
|
||||
self._cookie = "; ".join(
|
||||
f"{c.name}={c.value}" for c in session.cookies
|
||||
)
|
||||
logger.info("189 cloud login successful")
|
||||
return bool(self._cookie)
|
||||
except Exception as e:
|
||||
logger.error(f"189 cloud login failed: {e}")
|
||||
return False
|
||||
68
cloudsearch_transfer/adapter/cloud189/transfer.py
Normal file
68
cloudsearch_transfer/adapter/cloud189/transfer.py
Normal file
@@ -0,0 +1,68 @@
|
||||
"""天翼云盘转存逻辑 v1.0.0"""
|
||||
|
||||
import re
|
||||
import logging
|
||||
from typing import List, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Cloud189Transfer:
|
||||
API_BASE = "https://cloud.189.cn/api/open/share"
|
||||
|
||||
def __init__(self, session, credential_mgr, config, transfer_config):
|
||||
self.session = session
|
||||
self.credential = credential_mgr
|
||||
self.config = config
|
||||
self.transfer_config = transfer_config
|
||||
self._last_file_names = []
|
||||
|
||||
@staticmethod
|
||||
def parse_share_url(url: str) -> Tuple[str, str]:
|
||||
m = re.search(r"cloud\.189\.cn/t/([A-Za-z0-9]+)", url)
|
||||
if not m:
|
||||
raise ValueError("Invalid 189 cloud share URL")
|
||||
return m.group(1), ""
|
||||
|
||||
def get_share_info(self, share_code: str, password: str = "") -> dict:
|
||||
params = {"shareCode": share_code}
|
||||
if password:
|
||||
params["accessCode"] = password
|
||||
resp = self.session.get(
|
||||
f"{self.API_BASE}/getShareInfoByShareId.action",
|
||||
params=params,
|
||||
timeout=self.transfer_config.request_timeout,
|
||||
)
|
||||
data = resp.json()
|
||||
if not data.get("res_code") == 0:
|
||||
raise Exception(f"189 share info failed: {data}")
|
||||
info = data.get("data", {})
|
||||
files = info.get("fileList", [])
|
||||
return {
|
||||
"title": info.get("shareName", ""),
|
||||
"files": [{"id": f.get("fileId", ""), "name": f.get("fileName", ""),
|
||||
"size": int(f.get("fileSize", 0))} for f in files],
|
||||
"share_id": info.get("shareId", ""),
|
||||
}
|
||||
|
||||
def save_files(self, share_code: str, detail: dict, save_dir: str) -> List[str]:
|
||||
payload = {
|
||||
"shareId": detail.get("share_id", ""),
|
||||
"parentId": save_dir or "-11",
|
||||
}
|
||||
resp = self.session.post(
|
||||
f"{self.API_BASE}/shareToMe.action",
|
||||
data=payload,
|
||||
timeout=self.transfer_config.request_timeout,
|
||||
)
|
||||
data = resp.json()
|
||||
if not data.get("res_code") == 0:
|
||||
raise Exception(f"189 save failed: {data}")
|
||||
return ["0"]
|
||||
|
||||
def create_share(self, file_ids: List[str], title: str,
|
||||
password: str = "") -> Tuple[str, str]:
|
||||
return "", ""
|
||||
|
||||
def list_files(self, parent_id: str = "-11") -> list:
|
||||
return []
|
||||
112
cloudsearch_transfer/adapter/factory.py
Normal file
112
cloudsearch_transfer/adapter/factory.py
Normal file
@@ -0,0 +1,112 @@
|
||||
"""
|
||||
CloudSearch Transfer — 适配器工厂 v1.0.0
|
||||
参考 cloud-auto-save 的 AdapterFactory + AccountManager
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
from typing import Optional, Dict, Type
|
||||
|
||||
from .base import BaseCloudDriveAdapter, match_url
|
||||
from ..config import ConfigManager
|
||||
from ..errors import TransferError, TransferErrorCode
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AdapterFactory:
|
||||
"""
|
||||
适配器工厂
|
||||
- URL正则自动识别网盘类型
|
||||
- 实例缓存:同平台+同Cookie单例
|
||||
- 多账号路由
|
||||
"""
|
||||
|
||||
# 平台注册表(延迟导入避免循环引用)
|
||||
_registry: Dict[str, Type[BaseCloudDriveAdapter]] = {}
|
||||
|
||||
# 实例缓存 key: "platform:cookie_hash[:16]"
|
||||
_cache: Dict[str, BaseCloudDriveAdapter] = {}
|
||||
|
||||
def __init__(self, config_manager: ConfigManager):
|
||||
self.config_manager = config_manager
|
||||
self._register_all()
|
||||
|
||||
def _register_all(self):
|
||||
"""注册所有平台适配器"""
|
||||
from .quark import QuarkAdapter
|
||||
from .baidu import BaiduAdapter
|
||||
from .aliyun import AliyunAdapter
|
||||
from .uc import UcAdapter
|
||||
from .xunlei import XunleiAdapter
|
||||
from .pan115 import Pan115Adapter
|
||||
from .pan123 import Pan123Adapter
|
||||
from .cloud189 import Cloud189Adapter
|
||||
|
||||
self._registry = {
|
||||
"quark": QuarkAdapter,
|
||||
"baidu": BaiduAdapter,
|
||||
"aliyun": AliyunAdapter,
|
||||
"uc": UcAdapter,
|
||||
"xunlei": XunleiAdapter,
|
||||
"pan115": Pan115Adapter,
|
||||
"pan123": Pan123Adapter,
|
||||
"cloud189": Cloud189Adapter,
|
||||
}
|
||||
|
||||
def detect_platform(self, url: str) -> Optional[str]:
|
||||
"""根据URL自动识别网盘平台"""
|
||||
for platform_key, adapter_cls in self._registry.items():
|
||||
if match_url(url, adapter_cls):
|
||||
return platform_key
|
||||
return None
|
||||
|
||||
def get_adapter(self, platform_key: str) -> Optional[BaseCloudDriveAdapter]:
|
||||
"""获取适配器实例(带缓存)"""
|
||||
config = self.config_manager.get_platform(platform_key)
|
||||
if not config:
|
||||
return None
|
||||
|
||||
adapter_cls = self._registry.get(platform_key)
|
||||
if not adapter_cls:
|
||||
return None
|
||||
|
||||
# 构建缓存键
|
||||
cache_key = self._cache_key(platform_key, config)
|
||||
if cache_key in self._cache:
|
||||
return self._cache[cache_key]
|
||||
|
||||
# 创建新实例
|
||||
adapter = adapter_cls(config, self.config_manager.transfer)
|
||||
self._cache[cache_key] = adapter
|
||||
logger.info(f"[Factory] Created adapter: {platform_key} "
|
||||
f"(cache_key={cache_key})")
|
||||
return adapter
|
||||
|
||||
def get_adapter_for_url(self, url: str) -> Optional[BaseCloudDriveAdapter]:
|
||||
"""根据URL自动获取适配器"""
|
||||
platform = self.detect_platform(url)
|
||||
if not platform:
|
||||
raise TransferError(TransferErrorCode.URL_INVALID,
|
||||
message=f"无法识别链接平台: {url}")
|
||||
adapter = self.get_adapter(platform)
|
||||
if not adapter:
|
||||
raise TransferError(TransferErrorCode.NO_CONFIG,
|
||||
message=f"平台 {platform} 未配置凭证",
|
||||
platform=platform)
|
||||
return adapter
|
||||
|
||||
def invalidate_cache(self, platform_key: str = None):
|
||||
"""清除缓存"""
|
||||
if platform_key:
|
||||
keys = [k for k in self._cache if k.startswith(platform_key)]
|
||||
for k in keys:
|
||||
del self._cache[k]
|
||||
else:
|
||||
self._cache.clear()
|
||||
|
||||
def _cache_key(self, platform: str, config) -> str:
|
||||
"""构建缓存键"""
|
||||
credential = config.cookie or config.refresh_token or ""
|
||||
token_hash = hashlib.md5(credential.encode()).hexdigest()[:16]
|
||||
return f"{platform}:{config.account_name}:{token_hash}"
|
||||
41
cloudsearch_transfer/adapter/pan115/__init__.py
Normal file
41
cloudsearch_transfer/adapter/pan115/__init__.py
Normal file
@@ -0,0 +1,41 @@
|
||||
"""115网盘适配器 v1.0.0"""
|
||||
|
||||
from ..base import BaseCloudDriveAdapter, FileInfo, TransferResult, VerifyResult
|
||||
from ...errors import TransferError, TransferErrorCode
|
||||
from .credential import Pan115CredentialManager
|
||||
from .transfer import Pan115Transfer, parse_share_url
|
||||
from .cleanup import Pan115Cleanup
|
||||
|
||||
|
||||
class Pan115Adapter(BaseCloudDriveAdapter):
|
||||
PLATFORM_NAME = "115网盘"
|
||||
PLATFORM_KEY = "pan115"
|
||||
URL_PATTERNS = [r"115\.com/s/([a-z0-9]+)"]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self._cred = Pan115CredentialManager(self.config)
|
||||
self._transfer_engine = None
|
||||
self._cln = Pan115Cleanup()
|
||||
|
||||
@property
|
||||
def _transfer(self):
|
||||
if self._transfer_engine is None:
|
||||
self._transfer_engine = Pan115Transfer(
|
||||
self.session, self._cred, self.config, self.transfer_config)
|
||||
return self._transfer_engine
|
||||
|
||||
def _get_share_detail(self, pwd_id, passcode=""):
|
||||
return self._transfer.get_share_info(pwd_id, passcode)
|
||||
|
||||
def _save_files(self, pwd_id, detail, save_dir):
|
||||
return self._transfer.save_files(pwd_id, detail, save_dir)
|
||||
|
||||
def _create_share(self, file_ids, title, password=""):
|
||||
return self._transfer.create_share(file_ids, title, password)
|
||||
|
||||
def get_files(self, parent_fid="0"):
|
||||
return self._transfer.list_files(parent_fid)
|
||||
|
||||
def delete(self, file_ids):
|
||||
return self._cln.delete_files(self.session, self._cred, file_ids)
|
||||
24
cloudsearch_transfer/adapter/pan115/cleanup.py
Normal file
24
cloudsearch_transfer/adapter/pan115/cleanup.py
Normal file
@@ -0,0 +1,24 @@
|
||||
"""115网盘数据清理 v1.0.0"""
|
||||
|
||||
import logging
|
||||
from typing import List
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Pan115Cleanup:
|
||||
def delete_files(self, session, credential_mgr, file_ids: List[str]) -> bool:
|
||||
try:
|
||||
resp = session.post(
|
||||
"https://webapi.115.com/rb/delete",
|
||||
json={"fid": file_ids},
|
||||
timeout=30,
|
||||
)
|
||||
return resp.json().get("state", False)
|
||||
except Exception as e:
|
||||
logger.error(f"115 delete failed: {e}")
|
||||
return False
|
||||
|
||||
def filter_ad_ids(self, file_ids: List[str], file_names: List[str],
|
||||
banned_keywords: List[str]) -> List[str]:
|
||||
return file_ids
|
||||
11
cloudsearch_transfer/adapter/pan115/credential.py
Normal file
11
cloudsearch_transfer/adapter/pan115/credential.py
Normal file
@@ -0,0 +1,11 @@
|
||||
"""115网盘凭证管理 v1.0.0 — Cookie直传"""
|
||||
|
||||
class Pan115CredentialManager:
|
||||
def __init__(self, config):
|
||||
self.config = config
|
||||
|
||||
def validate(self) -> bool:
|
||||
return bool(self.config.cookie and len(self.config.cookie) >= 30)
|
||||
|
||||
def get_headers(self) -> dict:
|
||||
return {"Cookie": self.config.cookie, "Referer": "https://115.com/"}
|
||||
69
cloudsearch_transfer/adapter/pan115/transfer.py
Normal file
69
cloudsearch_transfer/adapter/pan115/transfer.py
Normal file
@@ -0,0 +1,69 @@
|
||||
"""115网盘转存逻辑 v1.0.0"""
|
||||
|
||||
import re
|
||||
import logging
|
||||
from typing import List, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Pan115Transfer:
|
||||
def __init__(self, session, credential_mgr, config, transfer_config):
|
||||
self.session = session
|
||||
self.credential = credential_mgr
|
||||
self.config = config
|
||||
self.transfer_config = transfer_config
|
||||
self._last_file_names = []
|
||||
|
||||
def parse_share_url(url: str) -> Tuple[str, str]:
|
||||
m = re.search(r"115\.com/s/([a-z0-9]+)", url)
|
||||
if not m:
|
||||
raise ValueError("Invalid 115 share URL")
|
||||
code = m.group(1)
|
||||
m2 = re.search(r"password[=:](\w+)", url)
|
||||
return code, m2.group(1) if m2 else ""
|
||||
|
||||
def get_share_info(self, code: str, password: str = "") -> dict:
|
||||
params = {"share_code": code}
|
||||
if password:
|
||||
params["receive_code"] = password
|
||||
resp = self.session.get(
|
||||
"https://webapi.115.com/share/snap",
|
||||
params=params,
|
||||
timeout=self.transfer_config.request_timeout,
|
||||
)
|
||||
data = resp.json()
|
||||
if not data.get("state"):
|
||||
raise Exception(f"115 share info failed: {data}")
|
||||
snap = data.get("data", {})
|
||||
files = snap.get("list", [])
|
||||
return {
|
||||
"title": snap.get("shareinfo", {}).get("share_title", ""),
|
||||
"files": [{"id": f.get("fid", ""), "name": f.get("n", ""),
|
||||
"size": int(f.get("s", 0))} for f in files],
|
||||
"cid": files[0].get("cid", "") if files else "",
|
||||
}
|
||||
|
||||
def save_files(self, share_code: str, detail: dict, save_dir: str) -> List[str]:
|
||||
cid = detail.get("cid", "0")
|
||||
payload = {"share_code": share_code, "receive_code": "",
|
||||
"cid": cid, "pick_code": ""}
|
||||
resp = self.session.post(
|
||||
"https://webapi.115.com/share/receive",
|
||||
json=payload,
|
||||
timeout=self.transfer_config.request_timeout,
|
||||
)
|
||||
data = resp.json()
|
||||
if not data.get("state"):
|
||||
raise Exception(f"115 save failed: {data}")
|
||||
return [str(data.get("data", {}).get("cid", ""))]
|
||||
|
||||
def create_share(self, file_ids: List[str], title: str,
|
||||
password: str = "") -> Tuple[str, str]:
|
||||
return "", ""
|
||||
|
||||
def list_files(self, cid: str = "0") -> list:
|
||||
return []
|
||||
|
||||
|
||||
parse_share_url = staticmethod(Pan115Transfer.parse_share_url)
|
||||
41
cloudsearch_transfer/adapter/pan123/__init__.py
Normal file
41
cloudsearch_transfer/adapter/pan123/__init__.py
Normal file
@@ -0,0 +1,41 @@
|
||||
"""123云盘适配器 v1.0.0"""
|
||||
|
||||
from ..base import BaseCloudDriveAdapter, FileInfo, TransferResult, VerifyResult
|
||||
from ...errors import TransferError, TransferErrorCode
|
||||
from .credential import Pan123CredentialManager
|
||||
from .transfer import Pan123Transfer
|
||||
from .cleanup import Pan123Cleanup
|
||||
|
||||
|
||||
class Pan123Adapter(BaseCloudDriveAdapter):
|
||||
PLATFORM_NAME = "123云盘"
|
||||
PLATFORM_KEY = "pan123"
|
||||
URL_PATTERNS = [r"123pan\.com/s/([A-Za-z0-9]+)"]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self._cred = Pan123CredentialManager(self.config)
|
||||
self._transfer_engine = None
|
||||
self._cln = Pan123Cleanup()
|
||||
|
||||
@property
|
||||
def _transfer(self):
|
||||
if self._transfer_engine is None:
|
||||
self._transfer_engine = Pan123Transfer(
|
||||
self.session, self._cred, self.config, self.transfer_config)
|
||||
return self._transfer_engine
|
||||
|
||||
def _get_share_detail(self, pwd_id, passcode=""):
|
||||
return self._transfer.get_share_info(pwd_id, passcode)
|
||||
|
||||
def _save_files(self, pwd_id, detail, save_dir):
|
||||
return self._transfer.save_files(pwd_id, detail, save_dir)
|
||||
|
||||
def _create_share(self, file_ids, title, password=""):
|
||||
return self._transfer.create_share(file_ids, title, password)
|
||||
|
||||
def get_files(self, parent_fid="0"):
|
||||
return self._transfer.list_files(parent_fid)
|
||||
|
||||
def delete(self, file_ids):
|
||||
return self._cln.delete_files(self.session, self._cred, file_ids)
|
||||
26
cloudsearch_transfer/adapter/pan123/cleanup.py
Normal file
26
cloudsearch_transfer/adapter/pan123/cleanup.py
Normal file
@@ -0,0 +1,26 @@
|
||||
"""123云盘数据清理 v1.0.0"""
|
||||
|
||||
import logging
|
||||
from typing import List
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Pan123Cleanup:
|
||||
API_BASE = "https://www.123pan.com/api"
|
||||
|
||||
def delete_files(self, session, credential_mgr, file_ids: List[str]) -> bool:
|
||||
try:
|
||||
resp = session.post(
|
||||
f"{self.API_BASE}/file/delete",
|
||||
json={"fileIds": file_ids},
|
||||
timeout=30,
|
||||
)
|
||||
return resp.json().get("code") == 0
|
||||
except Exception as e:
|
||||
logger.error(f"123 delete failed: {e}")
|
||||
return False
|
||||
|
||||
def filter_ad_ids(self, file_ids: List[str], file_names: List[str],
|
||||
banned_keywords: List[str]) -> List[str]:
|
||||
return file_ids
|
||||
16
cloudsearch_transfer/adapter/pan123/credential.py
Normal file
16
cloudsearch_transfer/adapter/pan123/credential.py
Normal file
@@ -0,0 +1,16 @@
|
||||
"""123云盘凭证管理 v1.0.0 — Cookie直传"""
|
||||
|
||||
|
||||
class Pan123CredentialManager:
|
||||
def __init__(self, config):
|
||||
self.config = config
|
||||
|
||||
def validate(self) -> bool:
|
||||
return bool(self.config.cookie and len(self.config.cookie) >= 30)
|
||||
|
||||
def get_headers(self) -> dict:
|
||||
return {
|
||||
"Cookie": self.config.cookie,
|
||||
"Referer": "https://www.123pan.com/",
|
||||
"Origin": "https://www.123pan.com",
|
||||
}
|
||||
71
cloudsearch_transfer/adapter/pan123/transfer.py
Normal file
71
cloudsearch_transfer/adapter/pan123/transfer.py
Normal file
@@ -0,0 +1,71 @@
|
||||
"""123云盘转存逻辑 v1.0.0"""
|
||||
|
||||
import re
|
||||
import logging
|
||||
from typing import List, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Pan123Transfer:
|
||||
API_BASE = "https://www.123pan.com/api"
|
||||
|
||||
def __init__(self, session, credential_mgr, config, transfer_config):
|
||||
self.session = session
|
||||
self.credential = credential_mgr
|
||||
self.config = config
|
||||
self.transfer_config = transfer_config
|
||||
self._last_file_names = []
|
||||
|
||||
@staticmethod
|
||||
def parse_share_url(url: str) -> Tuple[str, str]:
|
||||
m = re.search(r"123pan\.com/s/([A-Za-z0-9]+)", url)
|
||||
if not m:
|
||||
raise ValueError("Invalid 123pan share URL")
|
||||
code = m.group(1)
|
||||
m2 = re.search(r"[?&]pwd=(\w+)", url)
|
||||
return code, m2.group(1) if m2 else ""
|
||||
|
||||
def get_share_info(self, share_key: str, password: str = "") -> dict:
|
||||
payload = {"shareKey": share_key}
|
||||
if password:
|
||||
payload["sharePwd"] = password
|
||||
resp = self.session.post(
|
||||
f"{self.API_BASE}/share/info",
|
||||
json=payload,
|
||||
timeout=self.transfer_config.request_timeout,
|
||||
)
|
||||
data = resp.json()
|
||||
if data.get("code") != 0:
|
||||
raise Exception(f"123 share info failed: {data}")
|
||||
info = data.get("data", {})
|
||||
files = info.get("fileList", [])
|
||||
return {
|
||||
"title": info.get("shareName", ""),
|
||||
"files": [{"id": f.get("fileId", ""), "name": f.get("fileName", ""),
|
||||
"size": f.get("fileSize", 0)} for f in files],
|
||||
"share_id": info.get("shareId", ""),
|
||||
}
|
||||
|
||||
def save_files(self, share_key: str, detail: dict, save_dir: str) -> List[str]:
|
||||
payload = {
|
||||
"shareKey": share_key,
|
||||
"shareId": detail.get("share_id", ""),
|
||||
"parentFileId": save_dir or "0",
|
||||
}
|
||||
resp = self.session.post(
|
||||
f"{self.API_BASE}/share/save",
|
||||
json=payload,
|
||||
timeout=self.transfer_config.request_timeout,
|
||||
)
|
||||
data = resp.json()
|
||||
if data.get("code") != 0:
|
||||
raise Exception(f"123 save failed: {data}")
|
||||
return [str(data.get("data", {}).get("fileId", ""))]
|
||||
|
||||
def create_share(self, file_ids: List[str], title: str,
|
||||
password: str = "") -> Tuple[str, str]:
|
||||
return "", ""
|
||||
|
||||
def list_files(self, parent_id: str = "0") -> list:
|
||||
return []
|
||||
509
cloudsearch_transfer/adapter/quark/__init__.py
Normal file
509
cloudsearch_transfer/adapter/quark/__init__.py
Normal file
@@ -0,0 +1,509 @@
|
||||
"""
|
||||
CloudSearch Transfer — 夸克网盘适配器 v1.0.0
|
||||
|
||||
将 QuarkCredentialManager、QuarkTransfer、QuarkCleanup 组合为
|
||||
BaseCloudDriveAdapter 的完整实现。
|
||||
|
||||
夸克网盘 7 步 API 转存流程:
|
||||
① POST .../share/sharepage/token → stoken
|
||||
② GET .../share/sharepage/detail → fid, share_fid_token, title
|
||||
③ POST .../share/sharepage/save → task_id (转存)
|
||||
④ 轮询 GET .../task → save_as_top_fids
|
||||
⑤ POST .../share → task_id (创建分享)
|
||||
⑥ 轮询 GET .../task → share_id
|
||||
⑦ POST .../share/password → share_url, passcode
|
||||
|
||||
参考 cloud-auto-save 的 quark 实现 + netdisk 的 Pan 接口约定。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import time
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from ..base import BaseCloudDriveAdapter, FileInfo, TransferResult, VerifyResult
|
||||
from ...config import PlatformConfig, TransferConfig
|
||||
from ...errors import TransferError, TransferErrorCode
|
||||
|
||||
from .credential import QuarkCredentialManager
|
||||
from .transfer import QuarkTransfer, SHARE_URL_PATTERN
|
||||
from .cleanup import QuarkCleanup
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class QuarkAdapter(BaseCloudDriveAdapter):
|
||||
"""夸克网盘适配器。
|
||||
|
||||
组合 credential / transfer / cleanup 三个模块,
|
||||
实现 BaseCloudDriveAdapter 定义的所有抽象方法。
|
||||
|
||||
Attributes:
|
||||
PLATFORM_NAME: 展示用平台名称。
|
||||
PLATFORM_KEY: 内部平台标识。
|
||||
URL_PATTERNS: 夸克分享链接匹配正则列表。
|
||||
"""
|
||||
|
||||
# ─── 平台标识 ──────────────────────────────────────────────
|
||||
PLATFORM_NAME: str = "夸克网盘"
|
||||
PLATFORM_KEY: str = "quark"
|
||||
|
||||
# ─── URL 匹配 ──────────────────────────────────────────────
|
||||
# 支持 pan.quark.cn/s/<share_id>
|
||||
URL_PATTERNS: List[str] = [
|
||||
r"pan\.quark\.cn/s/(\w+)",
|
||||
]
|
||||
|
||||
def __init__(self, config: PlatformConfig, transfer_config: TransferConfig) -> None:
|
||||
"""初始化夸克适配器。
|
||||
|
||||
Args:
|
||||
config: 平台配置(含 Cookie 等)。
|
||||
transfer_config: 全局转存配置(超时、重试、轮询参数等)。
|
||||
"""
|
||||
super().__init__(config, transfer_config)
|
||||
|
||||
# 初始化三个子模块
|
||||
self._credential: QuarkCredentialManager = QuarkCredentialManager(
|
||||
cookie=config.cookie
|
||||
)
|
||||
self._transfer_engine: QuarkTransfer = QuarkTransfer(
|
||||
credential=self._credential,
|
||||
timeout=transfer_config.request_timeout,
|
||||
poll_interval=transfer_config.task_poll_interval,
|
||||
poll_max_attempts=transfer_config.task_poll_max_attempts,
|
||||
)
|
||||
self._cleanup: QuarkCleanup = QuarkCleanup(
|
||||
credential=self._credential,
|
||||
timeout=transfer_config.request_timeout,
|
||||
)
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════
|
||||
# 公开接口实现
|
||||
# ═══════════════════════════════════════════════════════════════
|
||||
|
||||
def _setup_session(self) -> None:
|
||||
"""将夸克 Cookie 注入 session 的默认 headers。"""
|
||||
headers = self._credential.get_headers()
|
||||
if headers:
|
||||
self.session.headers.update(headers)
|
||||
logger.debug("[QuarkAdapter] Session headers updated with Cookie")
|
||||
|
||||
# ─── transfer() 使用基类模板,子类实现 _transfer ──────────
|
||||
|
||||
def _transfer(self, share_url: str, save_dir: str = "",
|
||||
share_password: str = "") -> TransferResult:
|
||||
"""执行转存的核心逻辑(被基类 transfer() 调用)。
|
||||
|
||||
通过 QuarkTransfer 引擎执行完整的 7 步流程。
|
||||
|
||||
Args:
|
||||
share_url: 夸克分享链接。
|
||||
save_dir: 目标目录,空则使用配置的默认目录。
|
||||
share_password: 新分享的密码。
|
||||
|
||||
Returns:
|
||||
TransferResult 包含转存结果。
|
||||
"""
|
||||
start: float = time.time()
|
||||
|
||||
# 凭证检查
|
||||
if not self._credential.validate():
|
||||
raise TransferError(
|
||||
TransferErrorCode.NOT_LOGIN,
|
||||
message="夸克 Cookie 无效或长度不足",
|
||||
platform=self.PLATFORM_KEY,
|
||||
)
|
||||
|
||||
# 目标目录:默认根目录 "0"
|
||||
target_dir: str = save_dir or self.config.save_dir or "0"
|
||||
|
||||
# 分享密码
|
||||
pwd: str = share_password or self.config.share_password or ""
|
||||
|
||||
try:
|
||||
result: Dict[str, Any] = self._transfer_engine.transfer(
|
||||
share_url=share_url,
|
||||
save_dir=target_dir,
|
||||
share_password=pwd,
|
||||
)
|
||||
except ValueError as exc:
|
||||
raise TransferError(
|
||||
TransferErrorCode.URL_INVALID,
|
||||
message=str(exc),
|
||||
platform=self.PLATFORM_KEY,
|
||||
) from exc
|
||||
except RuntimeError as exc:
|
||||
msg: str = str(exc)
|
||||
if "stoken" in msg or "status" in msg:
|
||||
raise TransferError(
|
||||
TransferErrorCode.SHARE_NOT_EXIST,
|
||||
message=msg,
|
||||
platform=self.PLATFORM_KEY,
|
||||
) from exc
|
||||
raise TransferError(
|
||||
TransferErrorCode.NETWORK_ERROR,
|
||||
message=msg,
|
||||
platform=self.PLATFORM_KEY,
|
||||
) from exc
|
||||
|
||||
elapsed: int = int((time.time() - start) * 1000)
|
||||
|
||||
# 广告过滤:在转存完成后对 new_file_ids 进行过滤
|
||||
new_fids: List[str] = result.get("new_file_ids", [])
|
||||
if self.transfer_config.ad_filter_enabled and new_fids:
|
||||
new_fids = self._filter_ads(new_fids)
|
||||
if not new_fids:
|
||||
raise TransferError(
|
||||
TransferErrorCode.RESOURCE_EMPTY,
|
||||
platform=self.PLATFORM_KEY,
|
||||
)
|
||||
|
||||
return TransferResult(
|
||||
success=True,
|
||||
platform=self.PLATFORM_KEY,
|
||||
new_file_id=",".join(new_fids),
|
||||
file_name=result.get("file_name", ""),
|
||||
share_url=result.get("share_url", ""),
|
||||
share_password=result.get("passcode", pwd),
|
||||
original_url=share_url,
|
||||
elapsed_ms=elapsed,
|
||||
)
|
||||
|
||||
# ─── verify() 使用基类模板,子类实现 _verify ───────────────
|
||||
|
||||
def _verify(self, share_url: str) -> VerifyResult:
|
||||
"""验证夸克分享链接有效性。
|
||||
|
||||
通过获取 stoken → 获取详情来验证链接。
|
||||
|
||||
Args:
|
||||
share_url: 夸克分享链接。
|
||||
|
||||
Returns:
|
||||
VerifyResult 包含验证结果。
|
||||
"""
|
||||
try:
|
||||
pwd_id, passcode = self._parse_share_url(share_url)
|
||||
|
||||
if not self._credential.validate():
|
||||
return VerifyResult(
|
||||
valid=False,
|
||||
platform=self.PLATFORM_KEY,
|
||||
error=TransferError(
|
||||
TransferErrorCode.NOT_LOGIN,
|
||||
platform=self.PLATFORM_KEY,
|
||||
),
|
||||
)
|
||||
|
||||
stoken: str = self._transfer_engine._get_stoken(pwd_id, passcode)
|
||||
detail: Dict[str, Any] = self._transfer_engine._get_detail(pwd_id, stoken)
|
||||
files: List[FileInfo] = self._extract_file_list(detail)
|
||||
|
||||
return VerifyResult(
|
||||
valid=True,
|
||||
platform=self.PLATFORM_KEY,
|
||||
title=detail.get("title", ""),
|
||||
file_count=len(files),
|
||||
files=files,
|
||||
)
|
||||
|
||||
except TransferError:
|
||||
raise
|
||||
except (ValueError, RuntimeError) as exc:
|
||||
return VerifyResult(
|
||||
valid=False,
|
||||
platform=self.PLATFORM_KEY,
|
||||
error=TransferError(
|
||||
TransferErrorCode.SHARE_NOT_EXIST,
|
||||
message=str(exc),
|
||||
platform=self.PLATFORM_KEY,
|
||||
),
|
||||
)
|
||||
except Exception as exc:
|
||||
return VerifyResult(
|
||||
valid=False,
|
||||
platform=self.PLATFORM_KEY,
|
||||
error=TransferError(
|
||||
TransferErrorCode.NETWORK_ERROR,
|
||||
message=str(exc),
|
||||
platform=self.PLATFORM_KEY,
|
||||
),
|
||||
)
|
||||
|
||||
# ─── 核心抽象方法 ─────────────────────────────────────────
|
||||
|
||||
def _get_share_detail(self, pwd_id: str, passcode: str = "") -> dict:
|
||||
"""获取夸克分享详情(基类 transfer() 流程中的步骤②)。
|
||||
|
||||
Args:
|
||||
pwd_id: 分享 ID。
|
||||
passcode: 提取码。
|
||||
|
||||
Returns:
|
||||
分享详情字典,包含 title, fid, share_fid_token 等字段。
|
||||
"""
|
||||
stoken: str = self._transfer_engine._get_stoken(pwd_id, passcode)
|
||||
return self._transfer_engine._get_detail(pwd_id, stoken)
|
||||
|
||||
def _save_files(self, pwd_id: str, detail: dict, save_dir: str) -> List[str]:
|
||||
"""转存文件到自己的夸克网盘(基类 transfer() 流程中的步骤③④)。
|
||||
|
||||
Args:
|
||||
pwd_id: 分享 ID。
|
||||
detail: 分享详情(来自 _get_share_detail)。
|
||||
save_dir: 目标目录 ID。
|
||||
|
||||
Returns:
|
||||
转存后的新文件 ID 列表。
|
||||
"""
|
||||
# 需要 stoken,从 detail 间接获取(重新请求)
|
||||
stoken: str = self._transfer_engine._get_stoken(pwd_id)
|
||||
task_id: str = self._transfer_engine._init_save(
|
||||
pwd_id, stoken, detail, to_pdir_fid=save_dir
|
||||
)
|
||||
return self._transfer_engine._poll_save_task(task_id)
|
||||
|
||||
def _create_share(self, file_ids: List[str], title: str,
|
||||
password: str = "") -> Tuple[str, str]:
|
||||
"""创建夸克分享链接(基类 transfer() 流程中的步骤⑤⑥⑦)。
|
||||
|
||||
Args:
|
||||
file_ids: 要分享的文件 ID 列表。
|
||||
title: 分享标题。
|
||||
password: 分享密码。
|
||||
|
||||
Returns:
|
||||
(share_url, share_password) 元组。
|
||||
"""
|
||||
task_id: str = self._transfer_engine._init_share(file_ids, title)
|
||||
share_id: str = self._transfer_engine._poll_share_task(task_id)
|
||||
return self._transfer_engine._set_password(share_id, password)
|
||||
|
||||
def _extract_file_list(self, detail: dict) -> List[FileInfo]:
|
||||
"""从夸克分享详情中提取文件列表。
|
||||
|
||||
夸克的 sharepage/detail 返回格式:
|
||||
{
|
||||
"files": [
|
||||
{"fid": "...", "file_name": "...", "size": 123, "dir": false, ...},
|
||||
]
|
||||
}
|
||||
|
||||
Args:
|
||||
detail: 分享详情字典。
|
||||
|
||||
Returns:
|
||||
FileInfo 对象列表。
|
||||
"""
|
||||
files_data: List[Dict[str, Any]] = detail.get("files", [])
|
||||
result: List[FileInfo] = []
|
||||
|
||||
for f in files_data:
|
||||
file_info = FileInfo(
|
||||
fid=str(f.get("fid", f.get("file_id", ""))),
|
||||
name=str(f.get("file_name", f.get("name", ""))),
|
||||
size=int(f.get("size", 0)),
|
||||
is_dir=bool(f.get("dir", f.get("is_dir", False))),
|
||||
ext=str(f.get("ext", f.get("file_extension", ""))),
|
||||
)
|
||||
result.append(file_info)
|
||||
|
||||
# 如果 files 为空,尝试用 detail 顶层字段构造单个文件信息
|
||||
if not result and detail.get("fid"):
|
||||
result.append(FileInfo(
|
||||
fid=str(detail.get("fid", "")),
|
||||
name=str(detail.get("title", detail.get("file_name", ""))),
|
||||
size=0,
|
||||
is_dir=False,
|
||||
))
|
||||
|
||||
return result
|
||||
|
||||
def _filter_ads(self, file_ids: List[str]) -> List[str]:
|
||||
"""过滤广告文件。
|
||||
|
||||
合并配置层和平台层的 banned_keywords,调用 QuarkCleanup 执行过滤。
|
||||
当前实现基于 file_ids 列表过滤(无文件名信息时保持原样)。
|
||||
|
||||
Args:
|
||||
file_ids: 文件 ID 列表。
|
||||
|
||||
Returns:
|
||||
过滤后的文件 ID 列表。
|
||||
"""
|
||||
keywords: List[str] = list(
|
||||
set(self.config.banned_keywords)
|
||||
| set(self.transfer_config.default_banned_keywords)
|
||||
)
|
||||
if not keywords:
|
||||
return file_ids
|
||||
|
||||
# 获取文件信息以进行名称匹配
|
||||
# 在基类 transfer() 流程中,此处 file_ids 已为转存后的新 IDs
|
||||
try:
|
||||
files: List[FileInfo] = self.get_files()
|
||||
file_names: List[str] = [f.name for f in files]
|
||||
return QuarkCleanup.filter_ad_ids(file_ids, file_names, keywords)
|
||||
except Exception:
|
||||
# 如果无法获取文件名列表,跳过广告过滤
|
||||
logger.warning("[QuarkAdapter] Cannot fetch file list for ad filtering, skipping")
|
||||
return file_ids
|
||||
|
||||
# ─── get_files / delete ────────────────────────────────────
|
||||
|
||||
def get_files(self, parent_fid: str = "0") -> List[FileInfo]:
|
||||
"""列出夸克网盘指定目录下的文件。
|
||||
|
||||
GET /1/clouddrive/file/sort?pdir_fid=<parent_fid>&_page=1&_size=100&_sort=updated_at:desc
|
||||
|
||||
Args:
|
||||
parent_fid: 父目录 ID,默认 "0" 即根目录。
|
||||
|
||||
Returns:
|
||||
FileInfo 列表。
|
||||
"""
|
||||
url: str = "https://drive-pc.quark.cn/1/clouddrive/file/sort"
|
||||
params: Dict[str, str] = {
|
||||
"pdir_fid": parent_fid,
|
||||
"_page": "1",
|
||||
"_size": "100",
|
||||
"_sort": "updated_at:desc",
|
||||
}
|
||||
headers: Dict[str, str] = self._credential.get_headers()
|
||||
|
||||
try:
|
||||
resp = self._get(url, params=params, headers=headers)
|
||||
except Exception as exc:
|
||||
raise TransferError(
|
||||
TransferErrorCode.NETWORK_ERROR,
|
||||
message=f"获取文件列表失败: {exc}",
|
||||
platform=self.PLATFORM_KEY,
|
||||
) from exc
|
||||
|
||||
data: Dict[str, Any] = resp.json()
|
||||
status: int = data.get("status", -1)
|
||||
if status != 0 and data.get("code") not in (0, None):
|
||||
raise TransferError(
|
||||
TransferErrorCode.NETWORK_ERROR,
|
||||
message=f"获取文件列表失败: {data.get('message')}",
|
||||
platform=self.PLATFORM_KEY,
|
||||
)
|
||||
|
||||
files_data: List[Dict[str, Any]] = data.get("data", {}).get("list", [])
|
||||
result: List[FileInfo] = []
|
||||
for f in files_data:
|
||||
result.append(FileInfo(
|
||||
fid=str(f.get("fid", "")),
|
||||
name=str(f.get("file_name", f.get("name", ""))),
|
||||
size=int(f.get("size", 0)),
|
||||
is_dir=bool(f.get("dir", f.get("is_dir", False))),
|
||||
ext=str(f.get("file_extension", f.get("ext", ""))),
|
||||
))
|
||||
|
||||
logger.debug("[QuarkAdapter] Listed %d files in dir=%s", len(result), parent_fid)
|
||||
return result
|
||||
|
||||
def delete(self, file_ids: List[str]) -> bool:
|
||||
"""删除夸克网盘文件(移到回收站)。
|
||||
|
||||
Args:
|
||||
file_ids: 要删除的文件 ID 列表。
|
||||
|
||||
Returns:
|
||||
True 表示删除成功。
|
||||
"""
|
||||
if not self._credential.validate():
|
||||
raise TransferError(
|
||||
TransferErrorCode.NOT_LOGIN,
|
||||
platform=self.PLATFORM_KEY,
|
||||
)
|
||||
|
||||
try:
|
||||
return self._cleanup.delete_files(file_ids)
|
||||
except RuntimeError as exc:
|
||||
raise TransferError(
|
||||
TransferErrorCode.NETWORK_ERROR,
|
||||
message=str(exc),
|
||||
platform=self.PLATFORM_KEY,
|
||||
) from exc
|
||||
|
||||
def delete_permanent(self, file_ids: List[str]) -> bool:
|
||||
"""彻底删除夸克网盘文件(不可恢复)。
|
||||
|
||||
Args:
|
||||
file_ids: 要彻底删除的文件 ID 列表。
|
||||
|
||||
Returns:
|
||||
True 表示删除成功。
|
||||
"""
|
||||
if not self._credential.validate():
|
||||
raise TransferError(
|
||||
TransferErrorCode.NOT_LOGIN,
|
||||
platform=self.PLATFORM_KEY,
|
||||
)
|
||||
|
||||
try:
|
||||
return self._cleanup.delete_files_permanent(file_ids)
|
||||
except RuntimeError as exc:
|
||||
raise TransferError(
|
||||
TransferErrorCode.NETWORK_ERROR,
|
||||
message=str(exc),
|
||||
platform=self.PLATFORM_KEY,
|
||||
) from exc
|
||||
|
||||
# ─── 工具方法 ─────────────────────────────────────────────
|
||||
|
||||
def _parse_share_url(self, url: str) -> Tuple[str, str]:
|
||||
"""解析夸克分享 URL 提取 (pwd_id, passcode)。
|
||||
|
||||
夸克链接格式:https://pan.quark.cn/s/<pwd_id> 或带 ?pwd=xxxx
|
||||
|
||||
Args:
|
||||
url: 夸克分享链接。
|
||||
|
||||
Returns:
|
||||
(pwd_id, passcode) 元组。
|
||||
|
||||
Raises:
|
||||
TransferError: URL 格式无法识别。
|
||||
"""
|
||||
pwd_id: Optional[str] = QuarkTransfer.parse_share_url(url)
|
||||
if not pwd_id:
|
||||
raise TransferError(
|
||||
TransferErrorCode.URL_INVALID,
|
||||
message=f"无法解析夸克链接: {url}",
|
||||
platform=self.PLATFORM_KEY,
|
||||
)
|
||||
|
||||
# 提取密码参数
|
||||
from urllib.parse import urlparse, parse_qs
|
||||
parsed = urlparse(url)
|
||||
params = parse_qs(parsed.query)
|
||||
passcode: str = params.get("pwd", params.get("code", [""]))[0]
|
||||
|
||||
return pwd_id, passcode
|
||||
|
||||
def update_cookie(self, cookie: str) -> None:
|
||||
"""动态更新 Cookie 并同步到 session headers。
|
||||
|
||||
Args:
|
||||
cookie: 新的 Cookie 字符串。
|
||||
"""
|
||||
self._credential.update_cookie(cookie)
|
||||
self._setup_session()
|
||||
logger.info("[QuarkAdapter] Cookie updated, new length=%d", len(cookie))
|
||||
|
||||
def close(self) -> None:
|
||||
"""关闭所有子模块的 HTTP 会话。"""
|
||||
self._transfer_engine.close()
|
||||
self._cleanup.close()
|
||||
self.session.close()
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return (
|
||||
f"QuarkAdapter(name={self.PLATFORM_NAME}, "
|
||||
f"account={self.config.account_name}, "
|
||||
f"credential_valid={self._credential.validate()})"
|
||||
)
|
||||
209
cloudsearch_transfer/adapter/quark/cleanup.py
Normal file
209
cloudsearch_transfer/adapter/quark/cleanup.py
Normal file
@@ -0,0 +1,209 @@
|
||||
"""
|
||||
CloudSearch Transfer — 夸克网盘清理模块 v1.0.0
|
||||
|
||||
提供文件删除和广告过滤功能。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import requests
|
||||
|
||||
from .credential import QuarkCredentialManager
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ─── 夸克 API ─────────────────────────────────────────────────────
|
||||
QUARK_API_BASE = "https://drive-pc.quark.cn"
|
||||
QUARK_FILE_API = f"{QUARK_API_BASE}/1/clouddrive/file"
|
||||
|
||||
|
||||
class QuarkCleanup:
|
||||
"""夸克网盘文件清理器。
|
||||
|
||||
提供批量删除文件和广告文件过滤功能。
|
||||
|
||||
Attributes:
|
||||
credential: 夸克凭证管理器。
|
||||
session: 复用的 requests.Session。
|
||||
timeout: HTTP 请求超时秒数。
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
credential: QuarkCredentialManager,
|
||||
timeout: int = 30,
|
||||
) -> None:
|
||||
"""初始化清理器。
|
||||
|
||||
Args:
|
||||
credential: 有效的夸克凭证管理器。
|
||||
timeout: HTTP 请求超时秒数。
|
||||
"""
|
||||
self.credential: QuarkCredentialManager = credential
|
||||
self.timeout: int = timeout
|
||||
self.session: requests.Session = requests.Session()
|
||||
|
||||
def delete_files(self, file_ids: List[str]) -> bool:
|
||||
"""批量删除文件(回收站方式)。
|
||||
|
||||
POST /1/clouddrive/file/delete
|
||||
Body: {
|
||||
"action_type": 2,
|
||||
"filelist": ["<fid1>", "<fid2>", ...]
|
||||
}
|
||||
|
||||
action_type=1 表示彻底删除,action_type=2 表示移入回收站。
|
||||
|
||||
Args:
|
||||
file_ids: 要删除的文件 ID 列表。
|
||||
|
||||
Returns:
|
||||
True 表示删除请求已提交成功,False 表示失败。
|
||||
|
||||
Raises:
|
||||
RuntimeError: HTTP 请求错误。
|
||||
"""
|
||||
if not file_ids:
|
||||
logger.warning("[QuarkCleanup] delete_files called with empty list")
|
||||
return True
|
||||
|
||||
url: str = f"{QUARK_FILE_API}/delete"
|
||||
body: Dict[str, Any] = {
|
||||
"action_type": 2, # 2=回收站, 1=彻底删除
|
||||
"filelist": file_ids,
|
||||
}
|
||||
headers = self.credential.get_headers()
|
||||
headers.setdefault("Content-Type", "application/json")
|
||||
|
||||
logger.info("[QuarkCleanup] Deleting %d files: %s", len(file_ids), file_ids)
|
||||
|
||||
try:
|
||||
resp = self.session.post(url, json=body, headers=headers, timeout=self.timeout)
|
||||
resp.raise_for_status()
|
||||
except requests.RequestException as exc:
|
||||
raise RuntimeError(f"删除文件失败: {exc}") from exc
|
||||
|
||||
data: Dict[str, Any] = resp.json()
|
||||
status: int = data.get("status", -1)
|
||||
if status != 0 and data.get("code") not in (0, None):
|
||||
logger.error("[QuarkCleanup] Delete returned error: status=%s, message=%s",
|
||||
status, data.get("message"))
|
||||
return False
|
||||
|
||||
logger.info("[QuarkCleanup] Delete succeeded for %d files", len(file_ids))
|
||||
return True
|
||||
|
||||
def delete_files_permanent(self, file_ids: List[str]) -> bool:
|
||||
"""彻底删除文件(不从回收站恢复)。
|
||||
|
||||
与 delete_files 类似,但 action_type=1。
|
||||
|
||||
Args:
|
||||
file_ids: 要彻底删除的文件 ID 列表。
|
||||
|
||||
Returns:
|
||||
True 表示删除请求已提交成功。
|
||||
"""
|
||||
if not file_ids:
|
||||
return True
|
||||
|
||||
url: str = f"{QUARK_FILE_API}/delete"
|
||||
body: Dict[str, Any] = {
|
||||
"action_type": 1, # 1=彻底删除
|
||||
"filelist": file_ids,
|
||||
}
|
||||
headers = self.credential.get_headers()
|
||||
headers.setdefault("Content-Type", "application/json")
|
||||
|
||||
logger.info("[QuarkCleanup] Permanently deleting %d files", len(file_ids))
|
||||
|
||||
try:
|
||||
resp = self.session.post(url, json=body, headers=headers, timeout=self.timeout)
|
||||
resp.raise_for_status()
|
||||
except requests.RequestException as exc:
|
||||
raise RuntimeError(f"彻底删除失败: {exc}") from exc
|
||||
|
||||
data: Dict[str, Any] = resp.json()
|
||||
return data.get("status") == 0 or data.get("code") in (0, None)
|
||||
|
||||
@staticmethod
|
||||
def filter_ads(
|
||||
files: List[Dict[str, Any]],
|
||||
banned_keywords: List[str],
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""按关键词过滤文件列表中的广告文件。
|
||||
|
||||
遍历文件列表,剔除文件名中包含任一 banned_keywords 的文件。
|
||||
匹配方式:不区分大小写的子串匹配。
|
||||
|
||||
Args:
|
||||
files: 文件信息字典列表,每个字典需包含 "name" 字段。
|
||||
banned_keywords: 被禁关键词列表(匹配不区分大小写)。
|
||||
|
||||
Returns:
|
||||
过滤后的文件信息列表。
|
||||
"""
|
||||
if not banned_keywords:
|
||||
return files
|
||||
|
||||
filtered: List[Dict[str, Any]] = []
|
||||
removed_count: int = 0
|
||||
|
||||
for f in files:
|
||||
name: str = f.get("name", "")
|
||||
name_lower: str = str(name).lower()
|
||||
|
||||
if any(keyword.lower() in name_lower for keyword in banned_keywords):
|
||||
logger.info("[QuarkCleanup] Filtered ad file: '%s'", name)
|
||||
removed_count += 1
|
||||
continue
|
||||
|
||||
filtered.append(f)
|
||||
|
||||
if removed_count > 0:
|
||||
logger.info("[QuarkCleanup] Ad filter removed %d/%d files", removed_count, len(files))
|
||||
return filtered
|
||||
|
||||
@staticmethod
|
||||
def filter_ad_ids(
|
||||
file_ids: List[str],
|
||||
file_names: List[str],
|
||||
banned_keywords: List[str],
|
||||
) -> List[str]:
|
||||
"""按关键词过滤文件 ID 列表。
|
||||
|
||||
根据 file_names 判断是否为广告,返回对应的 file_ids。
|
||||
|
||||
Args:
|
||||
file_ids: 文件 ID 列表。
|
||||
file_names: 与 file_ids 一一对应的 文件名列表。
|
||||
banned_keywords: 被禁关键词列表。
|
||||
|
||||
Returns:
|
||||
过滤后的 file_ids 列表。
|
||||
"""
|
||||
if not banned_keywords or len(file_ids) != len(file_names):
|
||||
return file_ids
|
||||
|
||||
filtered_ids: List[str] = []
|
||||
for fid, name in zip(file_ids, file_names):
|
||||
name_lower: str = str(name).lower()
|
||||
if any(kw.lower() in name_lower for kw in banned_keywords):
|
||||
logger.info("[QuarkCleanup] Filtered ad file: '%s' (id=%s)", name, fid)
|
||||
continue
|
||||
filtered_ids.append(fid)
|
||||
|
||||
return filtered_ids
|
||||
|
||||
def close(self) -> None:
|
||||
"""关闭 HTTP 会话。"""
|
||||
self.session.close()
|
||||
|
||||
def __enter__(self) -> "QuarkCleanup":
|
||||
return self
|
||||
|
||||
def __exit__(self, *args: Any) -> None:
|
||||
self.close()
|
||||
89
cloudsearch_transfer/adapter/quark/credential.py
Normal file
89
cloudsearch_transfer/adapter/quark/credential.py
Normal file
@@ -0,0 +1,89 @@
|
||||
"""
|
||||
CloudSearch Transfer — 夸克网盘凭证管理 v1.0.0
|
||||
|
||||
夸克网盘使用 Cookie 直传,无需 token 刷新机制。
|
||||
验证方式:检查 Cookie 字符串长度是否 >= 50。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Dict
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class QuarkCredentialManager:
|
||||
"""夸克网盘凭证管理器。
|
||||
|
||||
夸克网盘的上传/转存 API 直接从 Cookie 中读取认证信息,
|
||||
无需 OAuth 或 refresh_token 刷新流程。
|
||||
|
||||
Attributes:
|
||||
cookie: 存储的夸克 Cookie 字符串。
|
||||
"""
|
||||
|
||||
# 夸克 Cookie 最小长度阈值(经验值,正常 Cookie 远超此长度)
|
||||
MIN_COOKIE_LENGTH: int = 50
|
||||
|
||||
def __init__(self, cookie: str = "") -> None:
|
||||
"""初始化凭证管理器。
|
||||
|
||||
Args:
|
||||
cookie: 夸克网盘的 Cookie 字符串。
|
||||
"""
|
||||
self.cookie: str = cookie
|
||||
|
||||
def validate(self) -> bool:
|
||||
"""验证 Cookie 是否满足最小长度要求。
|
||||
|
||||
Returns:
|
||||
True 表示 Cookie 长度 >= MIN_COOKIE_LENGTH,否则为 False。
|
||||
"""
|
||||
if not self.cookie:
|
||||
logger.warning("[QuarkCredential] Cookie is empty")
|
||||
return False
|
||||
|
||||
valid = len(self.cookie) >= self.MIN_COOKIE_LENGTH
|
||||
if not valid:
|
||||
logger.warning(
|
||||
"[QuarkCredential] Cookie too short: len=%d, min=%d",
|
||||
len(self.cookie),
|
||||
self.MIN_COOKIE_LENGTH,
|
||||
)
|
||||
return valid
|
||||
|
||||
def is_valid(self) -> bool:
|
||||
"""validate() 的别名,便于适配器层调用。"""
|
||||
return self.validate()
|
||||
|
||||
def get_headers(self) -> Dict[str, str]:
|
||||
"""构建带 Cookie 认证的 HTTP 请求头。
|
||||
|
||||
夸克 API 需要在每次请求头中携带完整的 Cookie 字符串。
|
||||
|
||||
Returns:
|
||||
包含 Cookie 字段的请求头字典。Cookie 无效时仍返回空字典。
|
||||
"""
|
||||
if not self.validate():
|
||||
logger.warning("[QuarkCredential] Cannot build headers: cookie invalid")
|
||||
return {}
|
||||
|
||||
return {
|
||||
"Cookie": self.cookie,
|
||||
}
|
||||
|
||||
def update_cookie(self, cookie: str) -> None:
|
||||
"""更新 Cookie 字符串(用于手动刷新场景)。
|
||||
|
||||
Args:
|
||||
cookie: 新的 Cookie 字符串。
|
||||
"""
|
||||
self.cookie = cookie
|
||||
logger.info("[QuarkCredential] Cookie updated, new length=%d", len(cookie))
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return (
|
||||
f"QuarkCredentialManager(cookie_len={len(self.cookie) if self.cookie else 0}, "
|
||||
f"valid={self.validate()})"
|
||||
)
|
||||
554
cloudsearch_transfer/adapter/quark/transfer.py
Normal file
554
cloudsearch_transfer/adapter/quark/transfer.py
Normal file
@@ -0,0 +1,554 @@
|
||||
"""
|
||||
CloudSearch Transfer — 夸克网盘转存核心 v1.0.0
|
||||
|
||||
夸克网盘 7 步转存流程:
|
||||
|
||||
① POST .../share/sharepage/token → stoken
|
||||
② GET .../share/sharepage/detail → fid, share_fid_token, title
|
||||
③ POST .../share/sharepage/save → task_id (转存任务)
|
||||
④ 轮询 GET .../task → save_as_top_fids (status==2 完成)
|
||||
⑤ POST .../share → task_id (创建分享任务)
|
||||
⑥ 轮询 GET .../task → share_id
|
||||
⑦ POST .../share/password → share_url, passcode
|
||||
|
||||
参考 cloud-auto-save 的 quark.py 实现。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import requests
|
||||
|
||||
from .credential import QuarkCredentialManager
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ─── 夸克 API 基础地址 ──────────────────────────────────────────────
|
||||
QUARK_API_BASE = "https://drive-pc.quark.cn"
|
||||
QUARK_SHARE_API = f"{QUARK_API_BASE}/1/clouddrive/share"
|
||||
|
||||
# ─── URL 解析正则 ───────────────────────────────────────────────────
|
||||
# 匹配 pan.quark.cn/s/<share_id>
|
||||
SHARE_URL_PATTERN = re.compile(r"pan\.quark\.cn/s/(\w+)")
|
||||
|
||||
|
||||
class QuarkTransfer:
|
||||
"""夸克网盘转存引擎。
|
||||
|
||||
封装完整的 7 步 API 流程:获取 stoken → 获取详情 → 保存文件 →
|
||||
创建分享 → 设置密码。
|
||||
|
||||
Attributes:
|
||||
credential: 夸克凭证管理器实例。
|
||||
session: 复用的 requests.Session。
|
||||
timeout: 请求超时(秒)。
|
||||
poll_interval: 轮询间隔(秒)。
|
||||
poll_max_attempts: 最大轮询次数。
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
credential: QuarkCredentialManager,
|
||||
timeout: int = 30,
|
||||
poll_interval: float = 0.5,
|
||||
poll_max_attempts: int = 50,
|
||||
) -> None:
|
||||
"""初始化转存引擎。
|
||||
|
||||
Args:
|
||||
credential: 有效的夸克凭证管理器。
|
||||
timeout: HTTP 请求超时秒数。
|
||||
poll_interval: 异步任务轮询间隔秒数。
|
||||
poll_max_attempts: 异步任务最大轮询次数(默认 50,同 base 层配置)。
|
||||
"""
|
||||
self.credential: QuarkCredentialManager = credential
|
||||
self.timeout: int = timeout
|
||||
self.poll_interval: float = poll_interval
|
||||
self.poll_max_attempts: int = poll_max_attempts
|
||||
self.session: requests.Session = requests.Session()
|
||||
|
||||
# ─── 步骤 ①:获取 stoken ───────────────────────────────────────
|
||||
|
||||
def _get_stoken(self, pwd_id: str, passcode: str = "") -> str:
|
||||
"""步骤①:向夸克交换 stoken。
|
||||
|
||||
POST /1/clouddrive/share/sharepage/token
|
||||
Body: {"passcode": "", "pwd_id": "<share_id>"}
|
||||
|
||||
Args:
|
||||
pwd_id: 分享 ID(从 URL 解析)。
|
||||
passcode: 分享提取码,无密码时为空字符串。
|
||||
|
||||
Returns:
|
||||
stoken 字符串。
|
||||
|
||||
Raises:
|
||||
RuntimeError: API 返回错误或 stoken 缺失。
|
||||
"""
|
||||
url = f"{QUARK_SHARE_API}/sharepage/token"
|
||||
body: Dict[str, str] = {
|
||||
"passcode": passcode,
|
||||
"pwd_id": pwd_id,
|
||||
}
|
||||
headers = self.credential.get_headers()
|
||||
headers.setdefault("Content-Type", "application/json")
|
||||
|
||||
logger.info("[QuarkTransfer] ① Getting stoken for pwd_id=%s", pwd_id)
|
||||
|
||||
try:
|
||||
resp = self.session.post(url, json=body, headers=headers, timeout=self.timeout)
|
||||
resp.raise_for_status()
|
||||
except requests.RequestException as exc:
|
||||
raise RuntimeError(f"获取 stoken 失败: {exc}") from exc
|
||||
|
||||
data: Dict[str, Any] = resp.json()
|
||||
stoken: Optional[str] = data.get("data", {}).get("stoken")
|
||||
if not stoken:
|
||||
raise RuntimeError(f"stoken 缺失, response: {data}")
|
||||
|
||||
logger.info("[QuarkTransfer] ① stoken obtained")
|
||||
return stoken
|
||||
|
||||
# ─── 步骤 ②:获取分享详情 ─────────────────────────────────────
|
||||
|
||||
def _get_detail(self, pwd_id: str, stoken: str) -> Dict[str, Any]:
|
||||
"""步骤②:获取分享详情。
|
||||
|
||||
GET /1/clouddrive/share/sharepage/detail?pwd_id=xx&stoken=xx&_fetch_share=1
|
||||
|
||||
返回字段包含:title, fid, share_fid_token 等。
|
||||
|
||||
Args:
|
||||
pwd_id: 分享 ID。
|
||||
stoken: 步骤①获取的 stoken。
|
||||
|
||||
Returns:
|
||||
分享详情字典。
|
||||
|
||||
Raises:
|
||||
RuntimeError: API 返回错误。
|
||||
"""
|
||||
url = f"{QUARK_SHARE_API}/sharepage/detail"
|
||||
params: Dict[str, str] = {
|
||||
"pwd_id": pwd_id,
|
||||
"stoken": stoken,
|
||||
"_fetch_share": "1",
|
||||
}
|
||||
headers = self.credential.get_headers()
|
||||
|
||||
logger.info("[QuarkTransfer] ② Fetching share detail for pwd_id=%s", pwd_id)
|
||||
|
||||
try:
|
||||
resp = self.session.get(url, params=params, headers=headers, timeout=self.timeout)
|
||||
resp.raise_for_status()
|
||||
except requests.RequestException as exc:
|
||||
raise RuntimeError(f"获取分享详情失败: {exc}") from exc
|
||||
|
||||
data: Dict[str, Any] = resp.json()
|
||||
status: int = data.get("status", -1)
|
||||
if status != 0 and data.get("code") not in (0, None):
|
||||
raise RuntimeError(f"分享详情API返回错误: status={status}, message={data.get('message')}")
|
||||
|
||||
detail: Optional[Dict[str, Any]] = data.get("data")
|
||||
if not detail:
|
||||
raise RuntimeError(f"分享详情数据为空, response: {data}")
|
||||
|
||||
# 提取关键字段供后续使用
|
||||
logger.info(
|
||||
"[QuarkTransfer] ② Detail: title=%s, fid=%s",
|
||||
detail.get("title"),
|
||||
detail.get("fid"),
|
||||
)
|
||||
return detail
|
||||
|
||||
# ─── 步骤 ③:发起转存 ─────────────────────────────────────────
|
||||
|
||||
def _init_save(self, pwd_id: str, stoken: str, detail: Dict[str, Any],
|
||||
to_pdir_fid: str = "0") -> str:
|
||||
"""步骤③:发起转存请求。
|
||||
|
||||
POST /1/clouddrive/share/sharepage/save
|
||||
Body: {
|
||||
"fid_list": [<fid>, ...],
|
||||
"fid_token_list": [<share_fid_token>, ...],
|
||||
"to_pdir_fid": "0",
|
||||
"pwd_id": "<pwd_id>",
|
||||
"stoken": "<stoken>",
|
||||
"pdir_fid": "0",
|
||||
"scene": "link"
|
||||
}
|
||||
|
||||
Args:
|
||||
pwd_id: 分享 ID。
|
||||
stoken: stoken。
|
||||
detail: 步骤②的分享详情。
|
||||
to_pdir_fid: 目标目录 ID,默认 "0" 即根目录。
|
||||
|
||||
Returns:
|
||||
task_id 字符串,用于步骤④轮询。
|
||||
|
||||
Raises:
|
||||
RuntimeError: API 返回错误。
|
||||
"""
|
||||
url = f"{QUARK_SHARE_API}/sharepage/save"
|
||||
fid_list: List[str] = detail.get("fid_list", [detail.get("fid", [])])
|
||||
fid_token_list: List[str] = detail.get("fid_token_list", [detail.get("share_fid_token", [])])
|
||||
|
||||
# 如果 detail 的 fid/fid_token 是单值而非列表,则包装为列表
|
||||
if not isinstance(fid_list, list):
|
||||
fid_list = [fid_list] if fid_list else []
|
||||
if not isinstance(fid_token_list, list):
|
||||
fid_token_list = [fid_token_list] if fid_token_list else []
|
||||
|
||||
body: Dict[str, Any] = {
|
||||
"fid_list": fid_list,
|
||||
"fid_token_list": fid_token_list,
|
||||
"to_pdir_fid": to_pdir_fid,
|
||||
"pwd_id": pwd_id,
|
||||
"stoken": stoken,
|
||||
"pdir_fid": "0",
|
||||
"scene": "link",
|
||||
}
|
||||
headers = self.credential.get_headers()
|
||||
headers.setdefault("Content-Type", "application/json")
|
||||
|
||||
logger.info("[QuarkTransfer] ③ Initiating save: %d files to dir=%s", len(fid_list), to_pdir_fid)
|
||||
|
||||
try:
|
||||
resp = self.session.post(url, json=body, headers=headers, timeout=self.timeout)
|
||||
resp.raise_for_status()
|
||||
except requests.RequestException as exc:
|
||||
raise RuntimeError(f"发起转存失败: {exc}") from exc
|
||||
|
||||
data: Dict[str, Any] = resp.json()
|
||||
status: int = data.get("status", -1)
|
||||
if status != 0:
|
||||
raise RuntimeError(f"转存请求失败: status={status}, message={data.get('message')}")
|
||||
|
||||
task_id: Optional[str] = data.get("data", {}).get("task_id")
|
||||
if not task_id:
|
||||
raise RuntimeError(f"转存 task_id 缺失, response: {data}")
|
||||
|
||||
logger.info("[QuarkTransfer] ③ Save task created: task_id=%s", task_id)
|
||||
return task_id
|
||||
|
||||
# ─── 步骤 ④:轮询转存任务 ─────────────────────────────────────
|
||||
|
||||
def _poll_save_task(self, task_id: str) -> List[str]:
|
||||
"""步骤④:轮询转存任务直到完成。
|
||||
|
||||
GET /1/clouddrive/task?task_id=<task_id>&retry_index=0
|
||||
|
||||
轮询最多 poll_max_attempts 次,
|
||||
当 status==2 时表示任务成功完成,
|
||||
status==-1 表示失败。
|
||||
|
||||
Args:
|
||||
task_id: 步骤③返回的 task_id。
|
||||
|
||||
Returns:
|
||||
save_as_top_fids 列表(转存后的文件 ID)。
|
||||
|
||||
Raises:
|
||||
RuntimeError: 任务失败或超时。
|
||||
"""
|
||||
url = f"{QUARK_API_BASE}/1/clouddrive/task"
|
||||
headers = self.credential.get_headers()
|
||||
|
||||
for attempt in range(1, self.poll_max_attempts + 1):
|
||||
params: Dict[str, str] = {
|
||||
"task_id": task_id,
|
||||
"retry_index": "0",
|
||||
}
|
||||
|
||||
try:
|
||||
resp = self.session.get(url, params=params, headers=headers, timeout=self.timeout)
|
||||
resp.raise_for_status()
|
||||
except requests.RequestException:
|
||||
logger.warning("[QuarkTransfer] ④ Poll attempt %d/%d failed, retrying...",
|
||||
attempt, self.poll_max_attempts)
|
||||
time.sleep(self.poll_interval)
|
||||
continue
|
||||
|
||||
data: Dict[str, Any] = resp.json()
|
||||
task_status: int = data.get("data", {}).get("status", -1)
|
||||
|
||||
logger.debug("[QuarkTransfer] ④ Poll %d/%d: status=%d", attempt, self.poll_max_attempts, task_status)
|
||||
|
||||
if task_status == 2: # 成功
|
||||
save_as_top_fids: List[str] = (
|
||||
data.get("data", {}).get("save_as", {}).get("save_as_top_fids", [])
|
||||
)
|
||||
logger.info("[QuarkTransfer] ④ Save completed: %d files saved", len(save_as_top_fids))
|
||||
return save_as_top_fids
|
||||
|
||||
if task_status == -1:
|
||||
raise RuntimeError(f"转存任务失败: task_id={task_id}, response={data}")
|
||||
|
||||
time.sleep(self.poll_interval)
|
||||
|
||||
raise RuntimeError(
|
||||
f"转存任务超时: task_id={task_id}, 已轮询 {self.poll_max_attempts} 次"
|
||||
)
|
||||
|
||||
# ─── 步骤 ⑤:发起创建分享 ─────────────────────────────────────
|
||||
|
||||
def _init_share(self, fid_list: List[str], title: str,
|
||||
expired_type: int = 1) -> str:
|
||||
"""步骤⑤:创建分享链接。
|
||||
|
||||
POST /1/clouddrive/share
|
||||
Body: {
|
||||
"fid_list": [<fid>, ...],
|
||||
"title": "<title>",
|
||||
"expired_type": 1
|
||||
}
|
||||
|
||||
Args:
|
||||
fid_list: 要分享的文件 ID 列表。
|
||||
title: 分享标题。
|
||||
expired_type: 过期类型,1=永久有效(默认)。
|
||||
|
||||
Returns:
|
||||
task_id 字符串,用于步骤⑥轮询。
|
||||
|
||||
Raises:
|
||||
RuntimeError: API 返回错误。
|
||||
"""
|
||||
url = f"{QUARK_SHARE_API}"
|
||||
body: Dict[str, Any] = {
|
||||
"fid_list": fid_list,
|
||||
"title": title or "分享",
|
||||
"expired_type": expired_type,
|
||||
}
|
||||
headers = self.credential.get_headers()
|
||||
headers.setdefault("Content-Type", "application/json")
|
||||
|
||||
logger.info("[QuarkTransfer] ⑤ Creating share: %d files, title='%s'", len(fid_list), title)
|
||||
|
||||
try:
|
||||
resp = self.session.post(url, json=body, headers=headers, timeout=self.timeout)
|
||||
resp.raise_for_status()
|
||||
except requests.RequestException as exc:
|
||||
raise RuntimeError(f"创建分享失败: {exc}") from exc
|
||||
|
||||
data: Dict[str, Any] = resp.json()
|
||||
status: int = data.get("status", -1)
|
||||
if status != 0 and data.get("code") not in (0, None):
|
||||
raise RuntimeError(f"创建分享请求失败: status={status}, message={data.get('message')}")
|
||||
|
||||
task_id: Optional[str] = data.get("data", {}).get("task_id")
|
||||
if not task_id:
|
||||
raise RuntimeError(f"分享 task_id 缺失, response: {data}")
|
||||
|
||||
logger.info("[QuarkTransfer] ⑤ Share task created: task_id=%s", task_id)
|
||||
return task_id
|
||||
|
||||
# ─── 步骤 ⑥:轮询分享任务 ─────────────────────────────────────
|
||||
|
||||
def _poll_share_task(self, task_id: str) -> str:
|
||||
"""步骤⑥:轮询分享任务直到完成。
|
||||
|
||||
GET /1/clouddrive/task?task_id=<task_id>&retry_index=0
|
||||
|
||||
轮询最多 poll_max_attempts 次,status==2 完成,
|
||||
返回 share_id。
|
||||
|
||||
Args:
|
||||
task_id: 步骤⑤返回的 task_id。
|
||||
|
||||
Returns:
|
||||
share_id 字符串。
|
||||
|
||||
Raises:
|
||||
RuntimeError: 任务失败或超时。
|
||||
"""
|
||||
url = f"{QUARK_API_BASE}/1/clouddrive/task"
|
||||
headers = self.credential.get_headers()
|
||||
|
||||
for attempt in range(1, self.poll_max_attempts + 1):
|
||||
params: Dict[str, str] = {
|
||||
"task_id": task_id,
|
||||
"retry_index": "0",
|
||||
}
|
||||
|
||||
try:
|
||||
resp = self.session.get(url, params=params, headers=headers, timeout=self.timeout)
|
||||
resp.raise_for_status()
|
||||
except requests.RequestException:
|
||||
logger.warning("[QuarkTransfer] ⑥ Poll attempt %d/%d failed, retrying...",
|
||||
attempt, self.poll_max_attempts)
|
||||
time.sleep(self.poll_interval)
|
||||
continue
|
||||
|
||||
data: Dict[str, Any] = resp.json()
|
||||
task_status: int = data.get("data", {}).get("status", -1)
|
||||
|
||||
logger.debug("[QuarkTransfer] ⑥ Poll %d/%d: status=%d", attempt, self.poll_max_attempts, task_status)
|
||||
|
||||
if task_status == 2: # 成功
|
||||
share_id: Optional[str] = data.get("data", {}).get("share_id")
|
||||
if not share_id:
|
||||
# 有时 share_id 在嵌套位置
|
||||
share_id = data.get("data", {}).get("result", {}).get("share_id", "")
|
||||
if not share_id:
|
||||
raise RuntimeError(f"分享完成但 share_id 缺失: {data}")
|
||||
logger.info("[QuarkTransfer] ⑥ Share completed: share_id=%s", share_id)
|
||||
return share_id
|
||||
|
||||
if task_status == -1:
|
||||
raise RuntimeError(f"分享任务失败: task_id={task_id}, response={data}")
|
||||
|
||||
time.sleep(self.poll_interval)
|
||||
|
||||
raise RuntimeError(
|
||||
f"分享任务超时: task_id={task_id}, 已轮询 {self.poll_max_attempts} 次"
|
||||
)
|
||||
|
||||
# ─── 步骤 ⑦:设置分享密码 ─────────────────────────────────────
|
||||
|
||||
def _set_password(self, share_id: str, password: str = "") -> Tuple[str, str]:
|
||||
"""步骤⑦:设置分享密码并获取分享链接。
|
||||
|
||||
POST /1/clouddrive/share/password
|
||||
Body: {"share_id": "<share_id>"}
|
||||
|
||||
即使不设密码也要调用此 API 以获取正式的 share_url。
|
||||
|
||||
Args:
|
||||
share_id: 步骤⑥返回的 share_id。
|
||||
password: 分享密码,空字符串表示无密码。
|
||||
|
||||
Returns:
|
||||
(share_url, passcode) 元组。
|
||||
|
||||
Raises:
|
||||
RuntimeError: API 返回错误。
|
||||
"""
|
||||
url = f"{QUARK_SHARE_API}/password"
|
||||
body: Dict[str, str] = {
|
||||
"share_id": share_id,
|
||||
}
|
||||
headers = self.credential.get_headers()
|
||||
headers.setdefault("Content-Type", "application/json")
|
||||
|
||||
logger.info("[QuarkTransfer] ⑦ Setting password for share_id=%s", share_id)
|
||||
|
||||
try:
|
||||
resp = self.session.post(url, json=body, headers=headers, timeout=self.timeout)
|
||||
resp.raise_for_status()
|
||||
except requests.RequestException as exc:
|
||||
raise RuntimeError(f"设置分享密码失败: {exc}") from exc
|
||||
|
||||
data: Dict[str, Any] = resp.json()
|
||||
status: int = data.get("status", -1)
|
||||
if status != 0 and data.get("code") not in (0, None):
|
||||
raise RuntimeError(f"设置密码失败: status={status}, message={data.get('message')}")
|
||||
|
||||
share_url: str = data.get("data", {}).get("share_url", "")
|
||||
passcode: str = data.get("data", {}).get("passcode", password)
|
||||
|
||||
if not share_url:
|
||||
# 用 share_id 构造默认分享链接
|
||||
share_url = f"https://pan.quark.cn/s/{share_id}"
|
||||
|
||||
logger.info("[QuarkTransfer] ⑦ Password set: share_url=%s, passcode=%s", share_url, passcode)
|
||||
return share_url, passcode
|
||||
|
||||
# ─── 公开入口 ─────────────────────────────────────────────────
|
||||
|
||||
def transfer(
|
||||
self,
|
||||
share_url: str,
|
||||
save_dir: str = "0",
|
||||
share_password: str = "",
|
||||
) -> Dict[str, Any]:
|
||||
"""执行完整的 7 步转存流程。
|
||||
|
||||
从原始夸克分享链接开始,将文件转存到自己网盘,再创建新分享。
|
||||
|
||||
Args:
|
||||
share_url: 原始夸克分享链接,如 https://pan.quark.cn/s/xxxxx。
|
||||
save_dir: 转存目标目录 ID,默认 "0"(根目录)。
|
||||
share_password: 新分享的密码,空字符串表示无密码。
|
||||
|
||||
Returns:
|
||||
包含以下字段的字典:
|
||||
- success: bool
|
||||
- new_file_ids: List[str] — 转存后的文件ID列表
|
||||
- file_name: str — 分享标题
|
||||
- share_url: str — 新分享链接
|
||||
- passcode: str — 新分享密码
|
||||
|
||||
Raises:
|
||||
RuntimeError: 任一步骤失败。
|
||||
ValueError: URL 解析失败。
|
||||
"""
|
||||
# 0. 解析 URL 提取 pwd_id
|
||||
match = SHARE_URL_PATTERN.search(share_url)
|
||||
if not match:
|
||||
raise ValueError(f"无法从URL中提取夸克分享ID: {share_url}")
|
||||
pwd_id: str = match.group(1)
|
||||
|
||||
logger.info("[QuarkTransfer] Starting 7-step transfer for pwd_id=%s", pwd_id)
|
||||
|
||||
# ① 获取 stoken
|
||||
stoken: str = self._get_stoken(pwd_id)
|
||||
|
||||
# ② 获取分享详情
|
||||
detail: Dict[str, Any] = self._get_detail(pwd_id, stoken)
|
||||
|
||||
# ③ 发起转存
|
||||
save_task_id: str = self._init_save(pwd_id, stoken, detail, to_pdir_fid=save_dir)
|
||||
|
||||
# ④ 轮询转存任务
|
||||
new_fids: List[str] = self._poll_save_task(save_task_id)
|
||||
if not new_fids:
|
||||
raise RuntimeError("转存完成但未获取到文件ID")
|
||||
|
||||
# ⑤ 发起创建分享
|
||||
title: str = detail.get("title", "分享")
|
||||
share_task_id: str = self._init_share(new_fids, title)
|
||||
|
||||
# ⑥ 轮询分享任务
|
||||
share_id: str = self._poll_share_task(share_task_id)
|
||||
|
||||
# ⑦ 设置密码
|
||||
new_share_url, passcode = self._set_password(share_id, share_password)
|
||||
|
||||
result: Dict[str, Any] = {
|
||||
"success": True,
|
||||
"new_file_ids": new_fids,
|
||||
"file_name": title,
|
||||
"share_url": new_share_url,
|
||||
"passcode": passcode,
|
||||
}
|
||||
logger.info("[QuarkTransfer] 7-step transfer complete: %s", result)
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def parse_share_url(url: str) -> Optional[str]:
|
||||
"""从夸克分享链接中提取 pwd_id。
|
||||
|
||||
Args:
|
||||
url: 夸克分享链接。
|
||||
|
||||
Returns:
|
||||
pwd_id 字符串,解析失败返回 None。
|
||||
"""
|
||||
match = SHARE_URL_PATTERN.search(url)
|
||||
return match.group(1) if match else None
|
||||
|
||||
def close(self) -> None:
|
||||
"""关闭 HTTP 会话。"""
|
||||
self.session.close()
|
||||
|
||||
def __enter__(self) -> "QuarkTransfer":
|
||||
return self
|
||||
|
||||
def __exit__(self, *args: Any) -> None:
|
||||
self.close()
|
||||
493
cloudsearch_transfer/adapter/uc/__init__.py
Normal file
493
cloudsearch_transfer/adapter/uc/__init__.py
Normal file
@@ -0,0 +1,493 @@
|
||||
"""
|
||||
CloudSearch Transfer — UC网盘适配器 v1.0.0
|
||||
|
||||
将 UcCredentialManager、UcTransfer、UcCleanup 组合为
|
||||
BaseCloudDriveAdapter 的完整实现。
|
||||
|
||||
UC网盘 7 步 API 转存流程(与夸克高度相似,API 域名不同):
|
||||
① POST .../share/sharepage/v2/detail?pr=UCBrowser&fr=pc → stoken
|
||||
② GET .../share/sharepage/detail → fid, share_fid_token, title
|
||||
③ POST .../share/sharepage/save → task_id (转存)
|
||||
④ 轮询 GET .../task → save_as_top_fids
|
||||
⑤ POST .../share → task_id (创建分享)
|
||||
⑥ 轮询 GET .../task → share_id
|
||||
⑦ POST .../share/password → share_url, passcode
|
||||
|
||||
参考 cloud-auto-save 的 quark 实现,域名从 drive-pc.quark.cn 改为 pc-api.uc.cn。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import time
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from urllib.parse import urlparse, parse_qs
|
||||
|
||||
from ..base import BaseCloudDriveAdapter, FileInfo, TransferResult, VerifyResult
|
||||
from ...config import PlatformConfig, TransferConfig
|
||||
from ...errors import TransferError, TransferErrorCode
|
||||
|
||||
from .credential import UcCredentialManager
|
||||
from .transfer import UcTransfer, SHARE_URL_PATTERN
|
||||
from .cleanup import UcCleanup
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class UcAdapter(BaseCloudDriveAdapter):
|
||||
"""UC网盘适配器。
|
||||
|
||||
组合 credential / transfer / cleanup 三个模块,
|
||||
实现 BaseCloudDriveAdapter 定义的所有抽象方法。
|
||||
|
||||
Attributes:
|
||||
PLATFORM_NAME: 展示用平台名称。
|
||||
PLATFORM_KEY: 内部平台标识。
|
||||
URL_PATTERNS: UC 分享链接匹配正则列表。
|
||||
"""
|
||||
|
||||
# ─── 平台标识 ──────────────────────────────────────────────
|
||||
PLATFORM_NAME: str = "UC网盘"
|
||||
PLATFORM_KEY: str = "uc"
|
||||
|
||||
# ─── URL 匹配 ──────────────────────────────────────────────
|
||||
# 支持 drive.uc.cn/s/<share_id>
|
||||
URL_PATTERNS: List[str] = [
|
||||
r"drive\.uc\.cn/s/(\w+)",
|
||||
]
|
||||
|
||||
def __init__(self, config: PlatformConfig, transfer_config: TransferConfig) -> None:
|
||||
"""初始化 UC 适配器。
|
||||
|
||||
Args:
|
||||
config: 平台配置(含 Cookie 等)。
|
||||
transfer_config: 全局转存配置(超时、重试、轮询参数等)。
|
||||
"""
|
||||
super().__init__(config, transfer_config)
|
||||
|
||||
# 初始化三个子模块
|
||||
self._credential: UcCredentialManager = UcCredentialManager(
|
||||
cookie=config.cookie
|
||||
)
|
||||
self._transfer_engine: UcTransfer = UcTransfer(
|
||||
credential=self._credential,
|
||||
timeout=transfer_config.request_timeout,
|
||||
poll_interval=transfer_config.task_poll_interval,
|
||||
poll_max_attempts=transfer_config.task_poll_max_attempts,
|
||||
)
|
||||
self._cleanup: UcCleanup = UcCleanup(
|
||||
credential=self._credential,
|
||||
timeout=transfer_config.request_timeout,
|
||||
)
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════
|
||||
# 公开接口实现
|
||||
# ═══════════════════════════════════════════════════════════════
|
||||
|
||||
def _setup_session(self) -> None:
|
||||
"""将 UC Cookie 注入 session 的默认 headers。"""
|
||||
headers = self._credential.get_headers()
|
||||
if headers:
|
||||
self.session.headers.update(headers)
|
||||
logger.debug("[UcAdapter] Session headers updated with Cookie")
|
||||
|
||||
def transfer(self, share_url: str, save_dir: str = "",
|
||||
share_password: str = "") -> TransferResult:
|
||||
"""执行转存的核心逻辑(覆盖基类实现 UC 专用流程)。
|
||||
|
||||
通过 UcTransfer 引擎执行完整的 7 步流程。
|
||||
|
||||
Args:
|
||||
share_url: UC 分享链接。
|
||||
save_dir: 目标目录,空则使用配置的默认目录。
|
||||
share_password: 新分享的密码。
|
||||
|
||||
Returns:
|
||||
TransferResult 包含转存结果。
|
||||
"""
|
||||
start: float = time.time()
|
||||
|
||||
# 凭证检查
|
||||
if not self._credential.validate():
|
||||
raise TransferError(
|
||||
TransferErrorCode.NOT_LOGIN,
|
||||
message="UC Cookie 无效或长度不足",
|
||||
platform=self.PLATFORM_KEY,
|
||||
)
|
||||
|
||||
# 目标目录:默认根目录 "0"
|
||||
target_dir: str = save_dir or self.config.save_dir or "0"
|
||||
|
||||
# 分享密码
|
||||
pwd: str = share_password or self.config.share_password or ""
|
||||
|
||||
try:
|
||||
result: Dict[str, Any] = self._transfer_engine.transfer(
|
||||
share_url=share_url,
|
||||
save_dir=target_dir,
|
||||
share_password=pwd,
|
||||
)
|
||||
except ValueError as exc:
|
||||
raise TransferError(
|
||||
TransferErrorCode.URL_INVALID,
|
||||
message=str(exc),
|
||||
platform=self.PLATFORM_KEY,
|
||||
) from exc
|
||||
except RuntimeError as exc:
|
||||
msg: str = str(exc)
|
||||
if "stoken" in msg or "status" in msg:
|
||||
raise TransferError(
|
||||
TransferErrorCode.SHARE_NOT_EXIST,
|
||||
message=msg,
|
||||
platform=self.PLATFORM_KEY,
|
||||
) from exc
|
||||
raise TransferError(
|
||||
TransferErrorCode.NETWORK_ERROR,
|
||||
message=msg,
|
||||
platform=self.PLATFORM_KEY,
|
||||
) from exc
|
||||
|
||||
elapsed: int = int((time.time() - start) * 1000)
|
||||
|
||||
# 广告过滤
|
||||
new_fids: List[str] = result.get("new_file_ids", [])
|
||||
if self.transfer_config.ad_filter_enabled and new_fids:
|
||||
new_fids = self._filter_ads(new_fids)
|
||||
if not new_fids:
|
||||
raise TransferError(
|
||||
TransferErrorCode.RESOURCE_EMPTY,
|
||||
platform=self.PLATFORM_KEY,
|
||||
)
|
||||
|
||||
return TransferResult(
|
||||
success=True,
|
||||
platform=self.PLATFORM_KEY,
|
||||
new_file_id=",".join(new_fids),
|
||||
file_name=result.get("file_name", ""),
|
||||
share_url=result.get("share_url", ""),
|
||||
share_password=result.get("passcode", pwd),
|
||||
original_url=share_url,
|
||||
elapsed_ms=elapsed,
|
||||
)
|
||||
|
||||
def verify(self, share_url: str) -> VerifyResult:
|
||||
"""验证 UC 分享链接有效性。
|
||||
|
||||
Args:
|
||||
share_url: UC 分享链接。
|
||||
|
||||
Returns:
|
||||
VerifyResult 包含验证结果。
|
||||
"""
|
||||
try:
|
||||
pwd_id, passcode = self._parse_share_url(share_url)
|
||||
|
||||
if not self._credential.validate():
|
||||
return VerifyResult(
|
||||
valid=False,
|
||||
platform=self.PLATFORM_KEY,
|
||||
error=TransferError(
|
||||
TransferErrorCode.NOT_LOGIN,
|
||||
platform=self.PLATFORM_KEY,
|
||||
),
|
||||
)
|
||||
|
||||
stoken: str = self._transfer_engine._get_stoken(pwd_id, passcode)
|
||||
detail: Dict[str, Any] = self._transfer_engine._get_detail(pwd_id, stoken)
|
||||
files: List[FileInfo] = self._extract_file_list(detail)
|
||||
|
||||
return VerifyResult(
|
||||
valid=True,
|
||||
platform=self.PLATFORM_KEY,
|
||||
title=detail.get("title", ""),
|
||||
file_count=len(files),
|
||||
files=files,
|
||||
)
|
||||
|
||||
except TransferError:
|
||||
raise
|
||||
except (ValueError, RuntimeError) as exc:
|
||||
return VerifyResult(
|
||||
valid=False,
|
||||
platform=self.PLATFORM_KEY,
|
||||
error=TransferError(
|
||||
TransferErrorCode.SHARE_NOT_EXIST,
|
||||
message=str(exc),
|
||||
platform=self.PLATFORM_KEY,
|
||||
),
|
||||
)
|
||||
except Exception as exc:
|
||||
return VerifyResult(
|
||||
valid=False,
|
||||
platform=self.PLATFORM_KEY,
|
||||
error=TransferError(
|
||||
TransferErrorCode.NETWORK_ERROR,
|
||||
message=str(exc),
|
||||
platform=self.PLATFORM_KEY,
|
||||
),
|
||||
)
|
||||
|
||||
# ─── 核心抽象方法 ─────────────────────────────────────────
|
||||
|
||||
def _get_share_detail(self, pwd_id: str, passcode: str = "") -> dict:
|
||||
"""获取 UC 分享详情。
|
||||
|
||||
Args:
|
||||
pwd_id: 分享 ID。
|
||||
passcode: 提取码。
|
||||
|
||||
Returns:
|
||||
分享详情字典,包含 title, fid, share_fid_token 等字段。
|
||||
"""
|
||||
stoken: str = self._transfer_engine._get_stoken(pwd_id, passcode)
|
||||
return self._transfer_engine._get_detail(pwd_id, stoken)
|
||||
|
||||
def _save_files(self, pwd_id: str, detail: dict, save_dir: str) -> List[str]:
|
||||
"""转存文件到自己的 UC 网盘。
|
||||
|
||||
Args:
|
||||
pwd_id: 分享 ID。
|
||||
detail: 分享详情(来自 _get_share_detail)。
|
||||
save_dir: 目标目录 ID。
|
||||
|
||||
Returns:
|
||||
转存后的新文件 ID 列表。
|
||||
"""
|
||||
stoken: str = self._transfer_engine._get_stoken(pwd_id)
|
||||
task_id: str = self._transfer_engine._init_save(
|
||||
pwd_id, stoken, detail, to_pdir_fid=save_dir
|
||||
)
|
||||
return self._transfer_engine._poll_save_task(task_id)
|
||||
|
||||
def _create_share(
|
||||
self, file_ids: List[str], title: str, password: str = ""
|
||||
) -> Tuple[str, str]:
|
||||
"""创建 UC 分享链接。
|
||||
|
||||
Args:
|
||||
file_ids: 要分享的文件 ID 列表。
|
||||
title: 分享标题。
|
||||
password: 分享密码。
|
||||
|
||||
Returns:
|
||||
(share_url, share_password) 元组。
|
||||
"""
|
||||
task_id: str = self._transfer_engine._init_share(file_ids, title)
|
||||
share_id: str = self._transfer_engine._poll_share_task(task_id)
|
||||
return self._transfer_engine._set_password(share_id, password)
|
||||
|
||||
def _extract_file_list(self, detail: dict) -> List[FileInfo]:
|
||||
"""从 UC 分享详情中提取文件列表。
|
||||
|
||||
UC 的 sharepage/detail 返回格式与夸克一致:
|
||||
{
|
||||
"files": [
|
||||
{"fid": "...", "file_name": "...", "size": 123, "dir": false, ...},
|
||||
]
|
||||
}
|
||||
|
||||
Args:
|
||||
detail: 分享详情字典。
|
||||
|
||||
Returns:
|
||||
FileInfo 对象列表。
|
||||
"""
|
||||
files_data: List[Dict[str, Any]] = detail.get("files", [])
|
||||
result: List[FileInfo] = []
|
||||
|
||||
for f in files_data:
|
||||
file_info = FileInfo(
|
||||
fid=str(f.get("fid", f.get("file_id", ""))),
|
||||
name=str(f.get("file_name", f.get("name", ""))),
|
||||
size=int(f.get("size", 0)),
|
||||
is_dir=bool(f.get("dir", f.get("is_dir", False))),
|
||||
ext=str(f.get("ext", f.get("file_extension", ""))),
|
||||
)
|
||||
result.append(file_info)
|
||||
|
||||
# 如果 files 为空,尝试用 detail 顶层字段构造单个文件信息
|
||||
if not result and detail.get("fid"):
|
||||
result.append(
|
||||
FileInfo(
|
||||
fid=str(detail.get("fid", "")),
|
||||
name=str(detail.get("title", detail.get("file_name", ""))),
|
||||
size=0,
|
||||
is_dir=False,
|
||||
)
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def _filter_ads(self, file_ids: List[str]) -> List[str]:
|
||||
"""过滤广告文件。
|
||||
|
||||
Args:
|
||||
file_ids: 文件 ID 列表。
|
||||
|
||||
Returns:
|
||||
过滤后的文件 ID 列表。
|
||||
"""
|
||||
keywords: List[str] = list(
|
||||
set(self.config.banned_keywords)
|
||||
| set(self.transfer_config.default_banned_keywords)
|
||||
)
|
||||
if not keywords:
|
||||
return file_ids
|
||||
|
||||
try:
|
||||
files: List[FileInfo] = self.get_files()
|
||||
file_names: List[str] = [f.name for f in files]
|
||||
return UcCleanup.filter_ad_ids(file_ids, file_names, keywords)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"[UcAdapter] Cannot fetch file list for ad filtering, skipping"
|
||||
)
|
||||
return file_ids
|
||||
|
||||
# ─── get_files / delete ────────────────────────────────────
|
||||
|
||||
def get_files(self, parent_fid: str = "0") -> List[FileInfo]:
|
||||
"""列出 UC 网盘指定目录下的文件。
|
||||
|
||||
GET /1/clouddrive/file/sort?pdir_fid=<parent_fid>&_page=1&_size=100&_sort=updated_at:desc
|
||||
|
||||
Args:
|
||||
parent_fid: 父目录 ID,默认 "0" 即根目录。
|
||||
|
||||
Returns:
|
||||
FileInfo 列表。
|
||||
"""
|
||||
url: str = f"https://pc-api.uc.cn/1/clouddrive/file/sort"
|
||||
params: Dict[str, str] = {
|
||||
"pdir_fid": parent_fid,
|
||||
"_page": "1",
|
||||
"_size": "100",
|
||||
"_sort": "updated_at:desc",
|
||||
}
|
||||
headers: Dict[str, str] = self._credential.get_headers()
|
||||
|
||||
try:
|
||||
resp = self._get(url, params=params, headers=headers)
|
||||
except Exception as exc:
|
||||
raise TransferError(
|
||||
TransferErrorCode.NETWORK_ERROR,
|
||||
message=f"获取文件列表失败: {exc}",
|
||||
platform=self.PLATFORM_KEY,
|
||||
) from exc
|
||||
|
||||
data: Dict[str, Any] = resp.json()
|
||||
status: int = data.get("status", -1)
|
||||
if status != 0 and data.get("code") not in (0, None):
|
||||
raise TransferError(
|
||||
TransferErrorCode.NETWORK_ERROR,
|
||||
message=f"获取文件列表失败: {data.get('message')}",
|
||||
platform=self.PLATFORM_KEY,
|
||||
)
|
||||
|
||||
files_data: List[Dict[str, Any]] = data.get("data", {}).get("list", [])
|
||||
result: List[FileInfo] = []
|
||||
for f in files_data:
|
||||
result.append(
|
||||
FileInfo(
|
||||
fid=str(f.get("fid", "")),
|
||||
name=str(f.get("file_name", f.get("name", ""))),
|
||||
size=int(f.get("size", 0)),
|
||||
is_dir=bool(f.get("dir", f.get("is_dir", False))),
|
||||
ext=str(f.get("file_extension", f.get("ext", ""))),
|
||||
)
|
||||
)
|
||||
|
||||
logger.debug("[UcAdapter] Listed %d files in dir=%s", len(result), parent_fid)
|
||||
return result
|
||||
|
||||
def delete(self, file_ids: List[str]) -> bool:
|
||||
"""删除 UC 网盘文件(移到回收站)。
|
||||
|
||||
Args:
|
||||
file_ids: 要删除的文件 ID 列表。
|
||||
|
||||
Returns:
|
||||
True 表示删除成功。
|
||||
"""
|
||||
if not self._credential.validate():
|
||||
raise TransferError(
|
||||
TransferErrorCode.NOT_LOGIN,
|
||||
platform=self.PLATFORM_KEY,
|
||||
)
|
||||
|
||||
try:
|
||||
return self._cleanup.delete_files(file_ids)
|
||||
except RuntimeError as exc:
|
||||
raise TransferError(
|
||||
TransferErrorCode.NETWORK_ERROR,
|
||||
message=str(exc),
|
||||
platform=self.PLATFORM_KEY,
|
||||
) from exc
|
||||
|
||||
def delete_permanent(self, file_ids: List[str]) -> bool:
|
||||
"""彻底删除 UC 网盘文件(不可恢复)。
|
||||
|
||||
Args:
|
||||
file_ids: 要彻底删除的文件 ID 列表。
|
||||
|
||||
Returns:
|
||||
True 表示删除成功。
|
||||
"""
|
||||
if not self._credential.validate():
|
||||
raise TransferError(
|
||||
TransferErrorCode.NOT_LOGIN,
|
||||
platform=self.PLATFORM_KEY,
|
||||
)
|
||||
|
||||
try:
|
||||
return self._cleanup.delete_files_permanent(file_ids)
|
||||
except RuntimeError as exc:
|
||||
raise TransferError(
|
||||
TransferErrorCode.NETWORK_ERROR,
|
||||
message=str(exc),
|
||||
platform=self.PLATFORM_KEY,
|
||||
) from exc
|
||||
|
||||
# ─── 工具方法 ─────────────────────────────────────────────
|
||||
|
||||
def _parse_share_url(self, url: str) -> Tuple[str, str]:
|
||||
"""解析 UC 分享 URL 提取 (pwd_id, passcode)。
|
||||
|
||||
UC 链接格式:https://drive.uc.cn/s/<pwd_id> 或带 ?pwd=xxxx
|
||||
|
||||
Args:
|
||||
url: UC 分享链接。
|
||||
|
||||
Returns:
|
||||
(pwd_id, passcode) 元组。
|
||||
|
||||
Raises:
|
||||
TransferError: URL 格式无法识别。
|
||||
"""
|
||||
pwd_id: Optional[str] = UcTransfer.parse_share_url(url)
|
||||
if not pwd_id:
|
||||
raise TransferError(
|
||||
TransferErrorCode.URL_INVALID,
|
||||
message=f"无法解析UC链接: {url}",
|
||||
platform=self.PLATFORM_KEY,
|
||||
)
|
||||
|
||||
parsed = urlparse(url)
|
||||
params = parse_qs(parsed.query)
|
||||
passcode: str = params.get("pwd", params.get("code", [""]))[0]
|
||||
|
||||
return pwd_id, passcode
|
||||
|
||||
def update_cookie(self, cookie: str) -> None:
|
||||
"""动态更新 Cookie 并同步到 session headers。
|
||||
|
||||
Args:
|
||||
cookie: 新的 Cookie 字符串。
|
||||
"""
|
||||
self._credential.update_cookie(cookie)
|
||||
self._setup_session()
|
||||
logger.info("[UcAdapter] Cookie updated, new length=%d", len(cookie))
|
||||
|
||||
def close(self) -> None:
|
||||
"""关闭所有子模块的 HTTP 会话。"""
|
||||
self._transfer_engine.close()
|
||||
218
cloudsearch_transfer/adapter/uc/cleanup.py
Normal file
218
cloudsearch_transfer/adapter/uc/cleanup.py
Normal file
@@ -0,0 +1,218 @@
|
||||
"""
|
||||
CloudSearch Transfer — UC网盘清理模块 v1.0.0
|
||||
|
||||
提供文件删除和广告过滤功能。API 与夸克相同,仅域名不同。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import requests
|
||||
|
||||
from .credential import UcCredentialManager
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ─── UC API ─────────────────────────────────────────────────────────
|
||||
UC_API_BASE = "https://pc-api.uc.cn"
|
||||
UC_FILE_API = f"{UC_API_BASE}/1/clouddrive/file"
|
||||
|
||||
|
||||
class UcCleanup:
|
||||
"""UC 网盘文件清理器。
|
||||
|
||||
提供批量删除文件和广告文件过滤功能。
|
||||
|
||||
Attributes:
|
||||
credential: UC 凭证管理器。
|
||||
session: 复用的 requests.Session。
|
||||
timeout: HTTP 请求超时秒数。
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
credential: UcCredentialManager,
|
||||
timeout: int = 30,
|
||||
) -> None:
|
||||
"""初始化清理器。
|
||||
|
||||
Args:
|
||||
credential: 有效的 UC 凭证管理器。
|
||||
timeout: HTTP 请求超时秒数。
|
||||
"""
|
||||
self.credential: UcCredentialManager = credential
|
||||
self.timeout: int = timeout
|
||||
self.session: requests.Session = requests.Session()
|
||||
|
||||
def delete_files(self, file_ids: List[str]) -> bool:
|
||||
"""批量删除文件(回收站方式)。
|
||||
|
||||
POST /1/clouddrive/file/delete
|
||||
Body: {
|
||||
"action_type": 2,
|
||||
"filelist": ["<fid1>", "<fid2>", ...]
|
||||
}
|
||||
|
||||
action_type=1 表示彻底删除,action_type=2 表示移入回收站。
|
||||
|
||||
Args:
|
||||
file_ids: 要删除的文件 ID 列表。
|
||||
|
||||
Returns:
|
||||
True 表示删除请求已提交成功,False 表示失败。
|
||||
|
||||
Raises:
|
||||
RuntimeError: HTTP 请求错误。
|
||||
"""
|
||||
if not file_ids:
|
||||
logger.warning("[UcCleanup] delete_files called with empty list")
|
||||
return True
|
||||
|
||||
url: str = f"{UC_FILE_API}/delete"
|
||||
body: Dict[str, Any] = {
|
||||
"action_type": 2, # 2=回收站, 1=彻底删除
|
||||
"filelist": file_ids,
|
||||
}
|
||||
headers = self.credential.get_headers()
|
||||
headers.setdefault("Content-Type", "application/json")
|
||||
|
||||
logger.info("[UcCleanup] Deleting %d files: %s", len(file_ids), file_ids)
|
||||
|
||||
try:
|
||||
resp = self.session.post(
|
||||
url, json=body, headers=headers, timeout=self.timeout
|
||||
)
|
||||
resp.raise_for_status()
|
||||
except requests.RequestException as exc:
|
||||
raise RuntimeError(f"删除文件失败: {exc}") from exc
|
||||
|
||||
data: Dict[str, Any] = resp.json()
|
||||
status: int = data.get("status", -1)
|
||||
if status != 0 and data.get("code") not in (0, None):
|
||||
logger.error(
|
||||
"[UcCleanup] Delete returned error: status=%s, message=%s",
|
||||
status,
|
||||
data.get("message"),
|
||||
)
|
||||
return False
|
||||
|
||||
logger.info("[UcCleanup] Delete succeeded for %d files", len(file_ids))
|
||||
return True
|
||||
|
||||
def delete_files_permanent(self, file_ids: List[str]) -> bool:
|
||||
"""彻底删除文件(不从回收站恢复)。
|
||||
|
||||
与 delete_files 类似,但 action_type=1。
|
||||
|
||||
Args:
|
||||
file_ids: 要彻底删除的文件 ID 列表。
|
||||
|
||||
Returns:
|
||||
True 表示删除请求已提交成功。
|
||||
"""
|
||||
if not file_ids:
|
||||
return True
|
||||
|
||||
url: str = f"{UC_FILE_API}/delete"
|
||||
body: Dict[str, Any] = {
|
||||
"action_type": 1, # 1=彻底删除
|
||||
"filelist": file_ids,
|
||||
}
|
||||
headers = self.credential.get_headers()
|
||||
headers.setdefault("Content-Type", "application/json")
|
||||
|
||||
logger.info("[UcCleanup] Permanently deleting %d files", len(file_ids))
|
||||
|
||||
try:
|
||||
resp = self.session.post(
|
||||
url, json=body, headers=headers, timeout=self.timeout
|
||||
)
|
||||
resp.raise_for_status()
|
||||
except requests.RequestException as exc:
|
||||
raise RuntimeError(f"彻底删除失败: {exc}") from exc
|
||||
|
||||
data: Dict[str, Any] = resp.json()
|
||||
return data.get("status") == 0 or data.get("code") in (0, None)
|
||||
|
||||
@staticmethod
|
||||
def filter_ads(
|
||||
files: List[Dict[str, Any]],
|
||||
banned_keywords: List[str],
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""按关键词过滤文件列表中的广告文件。
|
||||
|
||||
遍历文件列表,剔除文件名中包含任一 banned_keywords 的文件。
|
||||
匹配方式:不区分大小写的子串匹配。
|
||||
|
||||
Args:
|
||||
files: 文件信息字典列表,每个字典需包含 "name" 字段。
|
||||
banned_keywords: 被禁关键词列表(匹配不区分大小写)。
|
||||
|
||||
Returns:
|
||||
过滤后的文件信息列表。
|
||||
"""
|
||||
if not banned_keywords:
|
||||
return files
|
||||
|
||||
filtered: List[Dict[str, Any]] = []
|
||||
removed_count: int = 0
|
||||
|
||||
for f in files:
|
||||
name: str = f.get("name", "")
|
||||
name_lower: str = str(name).lower()
|
||||
|
||||
if any(keyword.lower() in name_lower for keyword in banned_keywords):
|
||||
logger.info("[UcCleanup] Filtered ad file: '%s'", name)
|
||||
removed_count += 1
|
||||
continue
|
||||
|
||||
filtered.append(f)
|
||||
|
||||
if removed_count > 0:
|
||||
logger.info(
|
||||
"[UcCleanup] Ad filter removed %d/%d files", removed_count, len(files)
|
||||
)
|
||||
return filtered
|
||||
|
||||
@staticmethod
|
||||
def filter_ad_ids(
|
||||
file_ids: List[str],
|
||||
file_names: List[str],
|
||||
banned_keywords: List[str],
|
||||
) -> List[str]:
|
||||
"""按关键词过滤文件 ID 列表。
|
||||
|
||||
根据 file_names 判断是否为广告,返回对应的 file_ids。
|
||||
|
||||
Args:
|
||||
file_ids: 文件 ID 列表。
|
||||
file_names: 与 file_ids 一一对应的文件名列表。
|
||||
banned_keywords: 被禁关键词列表。
|
||||
|
||||
Returns:
|
||||
过滤后的 file_ids 列表。
|
||||
"""
|
||||
if not banned_keywords or len(file_ids) != len(file_names):
|
||||
return file_ids
|
||||
|
||||
filtered_ids: List[str] = []
|
||||
for fid, name in zip(file_ids, file_names):
|
||||
name_lower: str = str(name).lower()
|
||||
if any(kw.lower() in name_lower for kw in banned_keywords):
|
||||
logger.info("[UcCleanup] Filtered ad file: '%s' (id=%s)", name, fid)
|
||||
continue
|
||||
filtered_ids.append(fid)
|
||||
|
||||
return filtered_ids
|
||||
|
||||
def close(self) -> None:
|
||||
"""关闭 HTTP 会话。"""
|
||||
self.session.close()
|
||||
|
||||
def __enter__(self) -> "UcCleanup":
|
||||
return self
|
||||
|
||||
def __exit__(self, *args: Any) -> None:
|
||||
self.close()
|
||||
95
cloudsearch_transfer/adapter/uc/credential.py
Normal file
95
cloudsearch_transfer/adapter/uc/credential.py
Normal file
@@ -0,0 +1,95 @@
|
||||
"""
|
||||
CloudSearch Transfer — UC网盘凭证管理 v1.0.0
|
||||
|
||||
UC网盘使用 Cookie 直传(与夸克高度相似),无需 token 刷新机制。
|
||||
验证方式:检查 Cookie 字符串长度是否 >= 50。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Dict
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class UcCredentialManager:
|
||||
"""UC 网盘凭证管理器。
|
||||
|
||||
UC 网盘的转存 API 直接从 Cookie 中读取认证信息,
|
||||
与夸克网盘机制完全一致,只是 API 域名不同(pc-api.uc.cn)。
|
||||
|
||||
Attributes:
|
||||
cookie: 存储的 UC Cookie 字符串。
|
||||
"""
|
||||
|
||||
# UC Cookie 最小长度阈值(与夸克一致)
|
||||
MIN_COOKIE_LENGTH: int = 50
|
||||
|
||||
# UC 网盘 Referer
|
||||
REFERER: str = "https://drive.uc.cn/"
|
||||
|
||||
def __init__(self, cookie: str = "") -> None:
|
||||
"""初始化凭证管理器。
|
||||
|
||||
Args:
|
||||
cookie: UC 网盘的 Cookie 字符串。
|
||||
"""
|
||||
self.cookie: str = cookie
|
||||
|
||||
def validate(self) -> bool:
|
||||
"""验证 Cookie 是否满足最小长度要求。
|
||||
|
||||
Returns:
|
||||
True 表示 Cookie 长度 >= MIN_COOKIE_LENGTH,否则为 False。
|
||||
"""
|
||||
if not self.cookie:
|
||||
logger.warning("[UcCredential] Cookie is empty")
|
||||
return False
|
||||
|
||||
valid = len(self.cookie) >= self.MIN_COOKIE_LENGTH
|
||||
if not valid:
|
||||
logger.warning(
|
||||
"[UcCredential] Cookie too short: len=%d, min=%d",
|
||||
len(self.cookie),
|
||||
self.MIN_COOKIE_LENGTH,
|
||||
)
|
||||
return valid
|
||||
|
||||
def is_valid(self) -> bool:
|
||||
"""validate() 的别名,便于适配器层调用。"""
|
||||
return self.validate()
|
||||
|
||||
def get_headers(self) -> Dict[str, str]:
|
||||
"""构建带 Cookie 认证的 HTTP 请求头。
|
||||
|
||||
UC API 需要在每次请求头中携带完整的 Cookie 字符串,
|
||||
以及 Referer: https://drive.uc.cn/。
|
||||
|
||||
Returns:
|
||||
包含 Cookie 和 Referer 字段的请求头字典。
|
||||
Cookie 无效时仍返回空字典。
|
||||
"""
|
||||
if not self.validate():
|
||||
logger.warning("[UcCredential] Cannot build headers: cookie invalid")
|
||||
return {}
|
||||
|
||||
return {
|
||||
"Cookie": self.cookie,
|
||||
"Referer": self.REFERER,
|
||||
}
|
||||
|
||||
def update_cookie(self, cookie: str) -> None:
|
||||
"""更新 Cookie 字符串(用于手动刷新场景)。
|
||||
|
||||
Args:
|
||||
cookie: 新的 Cookie 字符串。
|
||||
"""
|
||||
self.cookie = cookie
|
||||
logger.info("[UcCredential] Cookie updated, new length=%d", len(cookie))
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return (
|
||||
f"UcCredentialManager(cookie_len={len(self.cookie) if self.cookie else 0}, "
|
||||
f"valid={self.validate()})"
|
||||
)
|
||||
619
cloudsearch_transfer/adapter/uc/transfer.py
Normal file
619
cloudsearch_transfer/adapter/uc/transfer.py
Normal file
@@ -0,0 +1,619 @@
|
||||
"""
|
||||
CloudSearch Transfer — UC网盘转存核心 v1.0.0
|
||||
|
||||
UC网盘 7 步转存流程(与夸克高度相似,API 域名不同):
|
||||
|
||||
① POST .../share/sharepage/v2/detail?pr=UCBrowser&fr=pc → stoken
|
||||
② GET .../share/sharepage/detail → fid, share_fid_token, title
|
||||
③ POST .../share/sharepage/save → task_id (转存)
|
||||
④ 轮询 GET .../task → save_as_top_fids (status==2 完成)
|
||||
⑤ POST .../share → task_id (创建分享)
|
||||
⑥ 轮询 GET .../task → share_id
|
||||
⑦ POST .../share/password → share_url, passcode
|
||||
|
||||
参考 cloud-auto-save 的 quark 实现,域名从 drive-pc.quark.cn 改为 pc-api.uc.cn。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import requests
|
||||
|
||||
from .credential import UcCredentialManager
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ─── UC API 基础地址 ────────────────────────────────────────────────
|
||||
UC_API_BASE = "https://pc-api.uc.cn"
|
||||
UC_SHARE_API = f"{UC_API_BASE}/1/clouddrive/share"
|
||||
|
||||
# ─── URL 解析正则 ───────────────────────────────────────────────────
|
||||
# 匹配 drive.uc.cn/s/<share_id>
|
||||
SHARE_URL_PATTERN = re.compile(r"drive\.uc\.cn/s/(\w+)")
|
||||
|
||||
|
||||
class UcTransfer:
|
||||
"""UC 网盘转存引擎。
|
||||
|
||||
封装完整的 7 步 API 流程:获取 stoken → 获取详情 → 保存文件 →
|
||||
创建分享 → 设置密码。
|
||||
|
||||
Attributes:
|
||||
credential: UC 凭证管理器实例。
|
||||
session: 复用的 requests.Session。
|
||||
timeout: 请求超时(秒)。
|
||||
poll_interval: 轮询间隔(秒)。
|
||||
poll_max_attempts: 最大轮询次数。
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
credential: UcCredentialManager,
|
||||
timeout: int = 30,
|
||||
poll_interval: float = 0.5,
|
||||
poll_max_attempts: int = 50,
|
||||
) -> None:
|
||||
"""初始化转存引擎。
|
||||
|
||||
Args:
|
||||
credential: 有效的 UC 凭证管理器。
|
||||
timeout: HTTP 请求超时秒数。
|
||||
poll_interval: 异步任务轮询间隔秒数。
|
||||
poll_max_attempts: 异步任务最大轮询次数。
|
||||
"""
|
||||
self.credential: UcCredentialManager = credential
|
||||
self.timeout: int = timeout
|
||||
self.poll_interval: float = poll_interval
|
||||
self.poll_max_attempts: int = poll_max_attempts
|
||||
self.session: requests.Session = requests.Session()
|
||||
|
||||
# ─── 步骤 ①:获取 stoken ───────────────────────────────────────
|
||||
|
||||
def _get_stoken(self, pwd_id: str, passcode: str = "") -> str:
|
||||
"""步骤①:向 UC 交换 stoken。
|
||||
|
||||
POST /1/clouddrive/share/sharepage/v2/detail?pr=UCBrowser&fr=pc
|
||||
Body: {"passcode": "", "pwd_id": "<share_id>"}
|
||||
响应: data.token_info.stoken
|
||||
|
||||
UC 使用 v2/detail 接口获取 stoken,与夸克的 sharepage/token 不同。
|
||||
|
||||
Args:
|
||||
pwd_id: 分享 ID(从 URL 解析)。
|
||||
passcode: 分享提取码,无密码时为空字符串。
|
||||
|
||||
Returns:
|
||||
stoken 字符串。
|
||||
|
||||
Raises:
|
||||
RuntimeError: API 返回错误或 stoken 缺失。
|
||||
"""
|
||||
url = f"{UC_SHARE_API}/sharepage/v2/detail"
|
||||
params: Dict[str, str] = {
|
||||
"pr": "UCBrowser",
|
||||
"fr": "pc",
|
||||
}
|
||||
body: Dict[str, str] = {
|
||||
"passcode": passcode,
|
||||
"pwd_id": pwd_id,
|
||||
}
|
||||
headers = self.credential.get_headers()
|
||||
headers.setdefault("Content-Type", "application/json")
|
||||
|
||||
logger.info("[UcTransfer] ① Getting stoken for pwd_id=%s", pwd_id)
|
||||
|
||||
try:
|
||||
resp = self.session.post(
|
||||
url, json=body, params=params, headers=headers, timeout=self.timeout
|
||||
)
|
||||
resp.raise_for_status()
|
||||
except requests.RequestException as exc:
|
||||
raise RuntimeError(f"获取 stoken 失败: {exc}") from exc
|
||||
|
||||
data: Dict[str, Any] = resp.json()
|
||||
# UC 的 stoken 在 data.token_info.stoken
|
||||
stoken: Optional[str] = data.get("data", {}).get("token_info", {}).get("stoken")
|
||||
if not stoken:
|
||||
raise RuntimeError(f"stoken 缺失, response: {data}")
|
||||
|
||||
logger.info("[UcTransfer] ① stoken obtained")
|
||||
return stoken
|
||||
|
||||
# ─── 步骤 ②:获取分享详情 ─────────────────────────────────────
|
||||
|
||||
def _get_detail(self, pwd_id: str, stoken: str) -> Dict[str, Any]:
|
||||
"""步骤②:获取分享详情。
|
||||
|
||||
GET /1/clouddrive/share/sharepage/detail?pwd_id=xx&stoken=xx&_fetch_share=1
|
||||
|
||||
返回字段包含:title, fid, share_fid_token 等。
|
||||
|
||||
Args:
|
||||
pwd_id: 分享 ID。
|
||||
stoken: 步骤①获取的 stoken。
|
||||
|
||||
Returns:
|
||||
分享详情字典。
|
||||
|
||||
Raises:
|
||||
RuntimeError: API 返回错误。
|
||||
"""
|
||||
url = f"{UC_SHARE_API}/sharepage/detail"
|
||||
params: Dict[str, str] = {
|
||||
"pwd_id": pwd_id,
|
||||
"stoken": stoken,
|
||||
"_fetch_share": "1",
|
||||
}
|
||||
headers = self.credential.get_headers()
|
||||
|
||||
logger.info("[UcTransfer] ② Fetching share detail for pwd_id=%s", pwd_id)
|
||||
|
||||
try:
|
||||
resp = self.session.get(
|
||||
url, params=params, headers=headers, timeout=self.timeout
|
||||
)
|
||||
resp.raise_for_status()
|
||||
except requests.RequestException as exc:
|
||||
raise RuntimeError(f"获取分享详情失败: {exc}") from exc
|
||||
|
||||
data: Dict[str, Any] = resp.json()
|
||||
status: int = data.get("status", -1)
|
||||
if status != 0 and data.get("code") not in (0, None):
|
||||
raise RuntimeError(
|
||||
f"分享详情API返回错误: status={status}, message={data.get('message')}"
|
||||
)
|
||||
|
||||
detail: Optional[Dict[str, Any]] = data.get("data")
|
||||
if not detail:
|
||||
raise RuntimeError(f"分享详情数据为空, response: {data}")
|
||||
|
||||
logger.info(
|
||||
"[UcTransfer] ② Detail: title=%s, fid=%s",
|
||||
detail.get("title"),
|
||||
detail.get("fid"),
|
||||
)
|
||||
return detail
|
||||
|
||||
# ─── 步骤 ③:发起转存 ─────────────────────────────────────────
|
||||
|
||||
def _init_save(
|
||||
self,
|
||||
pwd_id: str,
|
||||
stoken: str,
|
||||
detail: Dict[str, Any],
|
||||
to_pdir_fid: str = "0",
|
||||
) -> str:
|
||||
"""步骤③:发起转存请求。
|
||||
|
||||
POST /1/clouddrive/share/sharepage/save
|
||||
Body: {
|
||||
"fid_list": [<fid>, ...],
|
||||
"fid_token_list": [<share_fid_token>, ...],
|
||||
"to_pdir_fid": "0",
|
||||
"pwd_id": "<pwd_id>",
|
||||
"stoken": "<stoken>",
|
||||
"pdir_fid": "0",
|
||||
"scene": "link"
|
||||
}
|
||||
|
||||
Args:
|
||||
pwd_id: 分享 ID。
|
||||
stoken: stoken。
|
||||
detail: 步骤②的分享详情。
|
||||
to_pdir_fid: 目标目录 ID,默认 "0" 即根目录。
|
||||
|
||||
Returns:
|
||||
task_id 字符串,用于步骤④轮询。
|
||||
|
||||
Raises:
|
||||
RuntimeError: API 返回错误。
|
||||
"""
|
||||
url = f"{UC_SHARE_API}/sharepage/save"
|
||||
fid_list: List[str] = detail.get("fid_list", [detail.get("fid", [])])
|
||||
fid_token_list: List[str] = detail.get(
|
||||
"fid_token_list", [detail.get("share_fid_token", [])]
|
||||
)
|
||||
|
||||
# 如果 detail 的 fid/fid_token 是单值而非列表,则包装为列表
|
||||
if not isinstance(fid_list, list):
|
||||
fid_list = [fid_list] if fid_list else []
|
||||
if not isinstance(fid_token_list, list):
|
||||
fid_token_list = [fid_token_list] if fid_token_list else []
|
||||
|
||||
body: Dict[str, Any] = {
|
||||
"fid_list": fid_list,
|
||||
"fid_token_list": fid_token_list,
|
||||
"to_pdir_fid": to_pdir_fid,
|
||||
"pwd_id": pwd_id,
|
||||
"stoken": stoken,
|
||||
"pdir_fid": "0",
|
||||
"scene": "link",
|
||||
}
|
||||
headers = self.credential.get_headers()
|
||||
headers.setdefault("Content-Type", "application/json")
|
||||
|
||||
logger.info(
|
||||
"[UcTransfer] ③ Initiating save: %d files to dir=%s",
|
||||
len(fid_list),
|
||||
to_pdir_fid,
|
||||
)
|
||||
|
||||
try:
|
||||
resp = self.session.post(
|
||||
url, json=body, headers=headers, timeout=self.timeout
|
||||
)
|
||||
resp.raise_for_status()
|
||||
except requests.RequestException as exc:
|
||||
raise RuntimeError(f"发起转存失败: {exc}") from exc
|
||||
|
||||
data: Dict[str, Any] = resp.json()
|
||||
status: int = data.get("status", -1)
|
||||
if status != 0:
|
||||
raise RuntimeError(
|
||||
f"转存请求失败: status={status}, message={data.get('message')}"
|
||||
)
|
||||
|
||||
task_id: Optional[str] = data.get("data", {}).get("task_id")
|
||||
if not task_id:
|
||||
raise RuntimeError(f"转存 task_id 缺失, response: {data}")
|
||||
|
||||
logger.info("[UcTransfer] ③ Save task created: task_id=%s", task_id)
|
||||
return task_id
|
||||
|
||||
# ─── 步骤 ④:轮询转存任务 ─────────────────────────────────────
|
||||
|
||||
def _poll_save_task(self, task_id: str) -> List[str]:
|
||||
"""步骤④:轮询转存任务直到完成。
|
||||
|
||||
GET /1/clouddrive/task?task_id=<task_id>&retry_index=0
|
||||
|
||||
当 status==2 时表示任务成功完成,status==-1 表示失败。
|
||||
|
||||
Args:
|
||||
task_id: 步骤③返回的 task_id。
|
||||
|
||||
Returns:
|
||||
save_as_top_fids 列表(转存后的文件 ID)。
|
||||
|
||||
Raises:
|
||||
RuntimeError: 任务失败或超时。
|
||||
"""
|
||||
url = f"{UC_API_BASE}/1/clouddrive/task"
|
||||
headers = self.credential.get_headers()
|
||||
|
||||
for attempt in range(1, self.poll_max_attempts + 1):
|
||||
params: Dict[str, str] = {
|
||||
"task_id": task_id,
|
||||
"retry_index": "0",
|
||||
}
|
||||
|
||||
try:
|
||||
resp = self.session.get(
|
||||
url, params=params, headers=headers, timeout=self.timeout
|
||||
)
|
||||
resp.raise_for_status()
|
||||
except requests.RequestException:
|
||||
logger.warning(
|
||||
"[UcTransfer] ④ Poll attempt %d/%d failed, retrying...",
|
||||
attempt,
|
||||
self.poll_max_attempts,
|
||||
)
|
||||
time.sleep(self.poll_interval)
|
||||
continue
|
||||
|
||||
data: Dict[str, Any] = resp.json()
|
||||
task_status: int = data.get("data", {}).get("status", -1)
|
||||
|
||||
logger.debug(
|
||||
"[UcTransfer] ④ Poll %d/%d: status=%d",
|
||||
attempt,
|
||||
self.poll_max_attempts,
|
||||
task_status,
|
||||
)
|
||||
|
||||
if task_status == 2: # 成功
|
||||
save_as_top_fids: List[str] = (
|
||||
data.get("data", {})
|
||||
.get("save_as", {})
|
||||
.get("save_as_top_fids", [])
|
||||
)
|
||||
logger.info(
|
||||
"[UcTransfer] ④ Save completed: %d files saved",
|
||||
len(save_as_top_fids),
|
||||
)
|
||||
return save_as_top_fids
|
||||
|
||||
if task_status == -1:
|
||||
raise RuntimeError(
|
||||
f"转存任务失败: task_id={task_id}, response={data}"
|
||||
)
|
||||
|
||||
time.sleep(self.poll_interval)
|
||||
|
||||
raise RuntimeError(
|
||||
f"转存任务超时: task_id={task_id}, 已轮询 {self.poll_max_attempts} 次"
|
||||
)
|
||||
|
||||
# ─── 步骤 ⑤:发起创建分享 ─────────────────────────────────────
|
||||
|
||||
def _init_share(
|
||||
self, fid_list: List[str], title: str, expired_type: int = 1
|
||||
) -> str:
|
||||
"""步骤⑤:创建分享链接。
|
||||
|
||||
POST /1/clouddrive/share
|
||||
Body: {"fid_list": [<fid>, ...], "title": "<title>", "expired_type": 1}
|
||||
|
||||
Args:
|
||||
fid_list: 要分享的文件 ID 列表。
|
||||
title: 分享标题。
|
||||
expired_type: 过期类型,1=永久有效(默认)。
|
||||
|
||||
Returns:
|
||||
task_id 字符串,用于步骤⑥轮询。
|
||||
|
||||
Raises:
|
||||
RuntimeError: API 返回错误。
|
||||
"""
|
||||
url = f"{UC_SHARE_API}"
|
||||
body: Dict[str, Any] = {
|
||||
"fid_list": fid_list,
|
||||
"title": title or "分享",
|
||||
"expired_type": expired_type,
|
||||
}
|
||||
headers = self.credential.get_headers()
|
||||
headers.setdefault("Content-Type", "application/json")
|
||||
|
||||
logger.info(
|
||||
"[UcTransfer] ⑤ Creating share: %d files, title='%s'", len(fid_list), title
|
||||
)
|
||||
|
||||
try:
|
||||
resp = self.session.post(
|
||||
url, json=body, headers=headers, timeout=self.timeout
|
||||
)
|
||||
resp.raise_for_status()
|
||||
except requests.RequestException as exc:
|
||||
raise RuntimeError(f"创建分享失败: {exc}") from exc
|
||||
|
||||
data: Dict[str, Any] = resp.json()
|
||||
status: int = data.get("status", -1)
|
||||
if status != 0 and data.get("code") not in (0, None):
|
||||
raise RuntimeError(
|
||||
f"创建分享请求失败: status={status}, message={data.get('message')}"
|
||||
)
|
||||
|
||||
task_id: Optional[str] = data.get("data", {}).get("task_id")
|
||||
if not task_id:
|
||||
raise RuntimeError(f"分享 task_id 缺失, response: {data}")
|
||||
|
||||
logger.info("[UcTransfer] ⑤ Share task created: task_id=%s", task_id)
|
||||
return task_id
|
||||
|
||||
# ─── 步骤 ⑥:轮询分享任务 ─────────────────────────────────────
|
||||
|
||||
def _poll_share_task(self, task_id: str) -> str:
|
||||
"""步骤⑥:轮询分享任务直到完成。
|
||||
|
||||
GET /1/clouddrive/task?task_id=<task_id>&retry_index=0
|
||||
|
||||
status==2 完成,返回 share_id。
|
||||
|
||||
Args:
|
||||
task_id: 步骤⑤返回的 task_id。
|
||||
|
||||
Returns:
|
||||
share_id 字符串。
|
||||
|
||||
Raises:
|
||||
RuntimeError: 任务失败或超时。
|
||||
"""
|
||||
url = f"{UC_API_BASE}/1/clouddrive/task"
|
||||
headers = self.credential.get_headers()
|
||||
|
||||
for attempt in range(1, self.poll_max_attempts + 1):
|
||||
params: Dict[str, str] = {
|
||||
"task_id": task_id,
|
||||
"retry_index": "0",
|
||||
}
|
||||
|
||||
try:
|
||||
resp = self.session.get(
|
||||
url, params=params, headers=headers, timeout=self.timeout
|
||||
)
|
||||
resp.raise_for_status()
|
||||
except requests.RequestException:
|
||||
logger.warning(
|
||||
"[UcTransfer] ⑥ Poll attempt %d/%d failed, retrying...",
|
||||
attempt,
|
||||
self.poll_max_attempts,
|
||||
)
|
||||
time.sleep(self.poll_interval)
|
||||
continue
|
||||
|
||||
data: Dict[str, Any] = resp.json()
|
||||
task_status: int = data.get("data", {}).get("status", -1)
|
||||
|
||||
logger.debug(
|
||||
"[UcTransfer] ⑥ Poll %d/%d: status=%d",
|
||||
attempt,
|
||||
self.poll_max_attempts,
|
||||
task_status,
|
||||
)
|
||||
|
||||
if task_status == 2: # 成功
|
||||
share_id: Optional[str] = data.get("data", {}).get("share_id")
|
||||
if not share_id:
|
||||
share_id = (
|
||||
data.get("data", {}).get("result", {}).get("share_id", "")
|
||||
)
|
||||
if not share_id:
|
||||
raise RuntimeError(f"分享完成但 share_id 缺失: {data}")
|
||||
logger.info("[UcTransfer] ⑥ Share completed: share_id=%s", share_id)
|
||||
return share_id
|
||||
|
||||
if task_status == -1:
|
||||
raise RuntimeError(
|
||||
f"分享任务失败: task_id={task_id}, response={data}"
|
||||
)
|
||||
|
||||
time.sleep(self.poll_interval)
|
||||
|
||||
raise RuntimeError(
|
||||
f"分享任务超时: task_id={task_id}, 已轮询 {self.poll_max_attempts} 次"
|
||||
)
|
||||
|
||||
# ─── 步骤 ⑦:设置分享密码 ─────────────────────────────────────
|
||||
|
||||
def _set_password(self, share_id: str, password: str = "") -> Tuple[str, str]:
|
||||
"""步骤⑦:设置分享密码并获取分享链接。
|
||||
|
||||
POST /1/clouddrive/share/password
|
||||
Body: {"share_id": "<share_id>"}
|
||||
|
||||
Args:
|
||||
share_id: 步骤⑥返回的 share_id。
|
||||
password: 分享密码,空字符串表示无密码。
|
||||
|
||||
Returns:
|
||||
(share_url, passcode) 元组。
|
||||
|
||||
Raises:
|
||||
RuntimeError: API 返回错误。
|
||||
"""
|
||||
url = f"{UC_SHARE_API}/password"
|
||||
body: Dict[str, str] = {
|
||||
"share_id": share_id,
|
||||
}
|
||||
headers = self.credential.get_headers()
|
||||
headers.setdefault("Content-Type", "application/json")
|
||||
|
||||
logger.info("[UcTransfer] ⑦ Setting password for share_id=%s", share_id)
|
||||
|
||||
try:
|
||||
resp = self.session.post(
|
||||
url, json=body, headers=headers, timeout=self.timeout
|
||||
)
|
||||
resp.raise_for_status()
|
||||
except requests.RequestException as exc:
|
||||
raise RuntimeError(f"设置分享密码失败: {exc}") from exc
|
||||
|
||||
data: Dict[str, Any] = resp.json()
|
||||
status: int = data.get("status", -1)
|
||||
if status != 0 and data.get("code") not in (0, None):
|
||||
raise RuntimeError(
|
||||
f"设置密码失败: status={status}, message={data.get('message')}"
|
||||
)
|
||||
|
||||
share_url: str = data.get("data", {}).get("share_url", "")
|
||||
passcode: str = data.get("data", {}).get("passcode", password)
|
||||
|
||||
if not share_url:
|
||||
# 用 share_id 构造默认分享链接
|
||||
share_url = f"https://drive.uc.cn/s/{share_id}"
|
||||
|
||||
logger.info(
|
||||
"[UcTransfer] ⑦ Password set: share_url=%s, passcode=%s",
|
||||
share_url,
|
||||
passcode,
|
||||
)
|
||||
return share_url, passcode
|
||||
|
||||
# ─── 公开入口 ─────────────────────────────────────────────────
|
||||
|
||||
def transfer(
|
||||
self,
|
||||
share_url: str,
|
||||
save_dir: str = "0",
|
||||
share_password: str = "",
|
||||
) -> Dict[str, Any]:
|
||||
"""执行完整的 7 步转存流程。
|
||||
|
||||
从原始 UC 分享链接开始,将文件转存到自己网盘,再创建新分享。
|
||||
|
||||
Args:
|
||||
share_url: 原始 UC 分享链接,如 https://drive.uc.cn/s/xxxxx。
|
||||
save_dir: 转存目标目录 ID,默认 "0"(根目录)。
|
||||
share_password: 新分享的密码,空字符串表示无密码。
|
||||
|
||||
Returns:
|
||||
包含以下字段的字典:
|
||||
- success: bool
|
||||
- new_file_ids: List[str] — 转存后的文件ID列表
|
||||
- file_name: str — 分享标题
|
||||
- share_url: str — 新分享链接
|
||||
- passcode: str — 新分享密码
|
||||
|
||||
Raises:
|
||||
RuntimeError: 任一步骤失败。
|
||||
ValueError: URL 解析失败。
|
||||
"""
|
||||
# 0. 解析 URL 提取 pwd_id
|
||||
match = SHARE_URL_PATTERN.search(share_url)
|
||||
if not match:
|
||||
raise ValueError(f"无法从URL中提取UC分享ID: {share_url}")
|
||||
pwd_id: str = match.group(1)
|
||||
|
||||
logger.info("[UcTransfer] Starting 7-step transfer for pwd_id=%s", pwd_id)
|
||||
|
||||
# ① 获取 stoken
|
||||
stoken: str = self._get_stoken(pwd_id)
|
||||
|
||||
# ② 获取分享详情
|
||||
detail: Dict[str, Any] = self._get_detail(pwd_id, stoken)
|
||||
|
||||
# ③ 发起转存 → ④ 轮询
|
||||
task_id: str = self._init_save(pwd_id, stoken, detail, to_pdir_fid=save_dir)
|
||||
new_file_ids: List[str] = self._poll_save_task(task_id)
|
||||
|
||||
if not new_file_ids:
|
||||
raise RuntimeError("转存完成但未获取到文件ID")
|
||||
|
||||
# ⑤ 创建分享 → ⑥ 轮询
|
||||
title: str = detail.get("title", "分享")
|
||||
share_task_id: str = self._init_share(new_file_ids, title)
|
||||
share_id: str = self._poll_share_task(share_task_id)
|
||||
|
||||
# ⑦ 设置密码
|
||||
share_url_new, passcode = self._set_password(share_id, share_password)
|
||||
|
||||
logger.info(
|
||||
"[UcTransfer] Transfer complete: %d files, new_share=%s",
|
||||
len(new_file_ids),
|
||||
share_url_new,
|
||||
)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"new_file_ids": new_file_ids,
|
||||
"file_name": title,
|
||||
"share_url": share_url_new,
|
||||
"passcode": passcode,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def parse_share_url(url: str) -> Optional[str]:
|
||||
"""从 UC 分享 URL 中提取 pwd_id。
|
||||
|
||||
Args:
|
||||
url: UC 分享链接。
|
||||
|
||||
Returns:
|
||||
pwd_id 字符串,解析失败返回 None。
|
||||
"""
|
||||
match = SHARE_URL_PATTERN.search(url)
|
||||
return match.group(1) if match else None
|
||||
|
||||
def close(self) -> None:
|
||||
"""关闭 HTTP 会话。"""
|
||||
self.session.close()
|
||||
|
||||
def __enter__(self) -> "UcTransfer":
|
||||
return self
|
||||
|
||||
def __exit__(self, *args: Any) -> None:
|
||||
self.close()
|
||||
112
cloudsearch_transfer/adapter/xunlei/__init__.py
Normal file
112
cloudsearch_transfer/adapter/xunlei/__init__.py
Normal file
@@ -0,0 +1,112 @@
|
||||
"""
|
||||
CloudSearch Transfer — 迅雷网盘适配器 v1.0.0
|
||||
|
||||
PLATFORM_KEY = 'xunlei'
|
||||
迅雷网盘使用 refresh_token + captcha_token 双重认证。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
from ..base import (
|
||||
BaseCloudDriveAdapter,
|
||||
FileInfo,
|
||||
TransferResult,
|
||||
VerifyResult,
|
||||
)
|
||||
from ...config import PlatformConfig, TransferConfig
|
||||
from ...errors import TransferError, TransferErrorCode
|
||||
from .credential import XunleiCredentialManager
|
||||
from .transfer import XunleiTransfer
|
||||
from .cleanup import XunleiCleanup
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class XunleiAdapter(BaseCloudDriveAdapter):
|
||||
"""迅雷网盘适配器"""
|
||||
|
||||
PLATFORM_NAME = "迅雷网盘"
|
||||
PLATFORM_KEY = "xunlei"
|
||||
URL_PATTERNS = [r"pan\.xunlei\.com/s/([A-Za-z0-9]+)"]
|
||||
|
||||
def __init__(self, config: PlatformConfig, transfer_config: TransferConfig):
|
||||
super().__init__(config, transfer_config)
|
||||
self._credential = XunleiCredentialManager(config)
|
||||
self._transfer_engine: Optional[XunleiTransfer] = None
|
||||
self._cleanup = XunleiCleanup()
|
||||
|
||||
def _setup_session(self):
|
||||
"""初始化 session 认证头"""
|
||||
headers = self._credential.get_auth_headers()
|
||||
if headers:
|
||||
self.session.headers.update(headers)
|
||||
|
||||
def _ensure_auth(self):
|
||||
"""确保认证头是最新的"""
|
||||
headers = self._credential.get_auth_headers()
|
||||
self.session.headers.update(headers)
|
||||
|
||||
@property
|
||||
def _transfer(self) -> XunleiTransfer:
|
||||
"""懒加载转存引擎"""
|
||||
if self._transfer_engine is None:
|
||||
self._transfer_engine = XunleiTransfer(
|
||||
self.session,
|
||||
self._credential,
|
||||
self.config,
|
||||
self.transfer_config,
|
||||
)
|
||||
return self._transfer_engine
|
||||
|
||||
# ─── 抽象方法实现 ──────────────────────────────
|
||||
|
||||
def _get_share_detail(self, pwd_id: str, passcode: str = "") -> dict:
|
||||
self._ensure_auth()
|
||||
return self._transfer.get_share_info(pwd_id, passcode)
|
||||
|
||||
def _save_files(self, pwd_id: str, detail: dict, save_dir: str) -> List[str]:
|
||||
self._ensure_auth()
|
||||
return self._transfer.save_files(pwd_id, detail, save_dir)
|
||||
|
||||
def _create_share(self, file_ids: List[str], title: str,
|
||||
password: str = "") -> Tuple[str, str]:
|
||||
self._ensure_auth()
|
||||
return self._transfer.create_share(file_ids, title, password)
|
||||
|
||||
def _extract_file_list(self, detail: dict) -> List[FileInfo]:
|
||||
files = detail.get("files", [])
|
||||
return [
|
||||
FileInfo(fid=f.get("id", ""), name=f.get("name", ""),
|
||||
size=f.get("size", 0), is_dir=f.get("is_dir", False))
|
||||
for f in files
|
||||
]
|
||||
|
||||
def _filter_ads(self, file_ids: List[str]) -> List[str]:
|
||||
banned = self._get_banned_keywords()
|
||||
return self._cleanup.filter_ad_ids(
|
||||
file_ids,
|
||||
getattr(self._transfer, "_last_file_names", []),
|
||||
banned,
|
||||
)
|
||||
|
||||
def get_files(self, parent_fid: str = "0") -> List[FileInfo]:
|
||||
self._ensure_auth()
|
||||
return self._transfer.list_files(parent_fid)
|
||||
|
||||
def delete(self, file_ids: List[str]) -> bool:
|
||||
self._ensure_auth()
|
||||
return self._cleanup.delete_files(
|
||||
self.session, self._credential, file_ids
|
||||
)
|
||||
|
||||
def _get_banned_keywords(self) -> List[str]:
|
||||
return self.config.banned_keywords or self.transfer_config.default_banned_keywords
|
||||
|
||||
def close(self):
|
||||
self.session.close()
|
||||
|
||||
def __repr__(self):
|
||||
return f"<XunleiAdapter account={self.config.account_name}>"
|
||||
198
cloudsearch_transfer/adapter/xunlei/cleanup.py
Normal file
198
cloudsearch_transfer/adapter/xunlei/cleanup.py
Normal file
@@ -0,0 +1,198 @@
|
||||
"""
|
||||
CloudSearch Transfer — 迅雷网盘清理模块 v1.0.0
|
||||
|
||||
提供文件删除和广告过滤功能。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import requests
|
||||
|
||||
from .credential import XunleiCredentialManager
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ─── 迅雷 API ─────────────────────────────────────────────────────────
|
||||
XUNLEI_PAN_API = "https://api-pan.xunlei.com"
|
||||
|
||||
|
||||
class XunleiCleanup:
|
||||
"""迅雷网盘文件清理器。
|
||||
|
||||
提供批量删除文件和广告文件过滤功能。
|
||||
|
||||
Attributes:
|
||||
credential: 迅雷凭证管理器。
|
||||
session: 复用的 requests.Session。
|
||||
timeout: HTTP 请求超时秒数。
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
credential: XunleiCredentialManager,
|
||||
timeout: int = 30,
|
||||
) -> None:
|
||||
"""初始化清理器。
|
||||
|
||||
Args:
|
||||
credential: 有效的迅雷凭证管理器。
|
||||
timeout: HTTP 请求超时秒数。
|
||||
"""
|
||||
self.credential: XunleiCredentialManager = credential
|
||||
self.timeout: int = timeout
|
||||
self.session: requests.Session = requests.Session()
|
||||
|
||||
def delete_files(self, file_ids: List[str]) -> bool:
|
||||
"""批量删除文件。
|
||||
|
||||
POST /drive/v1/files:batchDelete
|
||||
Body: {
|
||||
"ids": ["<fid1>", "<fid2>", ...],
|
||||
"space": ""
|
||||
}
|
||||
|
||||
Args:
|
||||
file_ids: 要删除的文件 ID 列表。
|
||||
|
||||
Returns:
|
||||
True 表示删除请求已提交成功,False 表示失败。
|
||||
|
||||
Raises:
|
||||
RuntimeError: HTTP 请求错误。
|
||||
"""
|
||||
if not file_ids:
|
||||
logger.warning("[XunleiCleanup] delete_files called with empty list")
|
||||
return True
|
||||
|
||||
url: str = f"{XUNLEI_PAN_API}/drive/v1/files:batchDelete"
|
||||
body: Dict[str, Any] = {
|
||||
"ids": file_ids,
|
||||
"space": "",
|
||||
}
|
||||
headers = self.credential.get_headers()
|
||||
headers.setdefault("Content-Type", "application/json")
|
||||
|
||||
logger.info("[XunleiCleanup] Deleting %d files: %s", len(file_ids), file_ids)
|
||||
|
||||
try:
|
||||
resp = self.session.post(
|
||||
url, json=body, headers=headers, timeout=self.timeout
|
||||
)
|
||||
resp.raise_for_status()
|
||||
except requests.RequestException as exc:
|
||||
raise RuntimeError(f"删除文件失败: {exc}") from exc
|
||||
|
||||
data: Dict[str, Any] = resp.json()
|
||||
errcode = data.get("errcode", data.get("error_code", 0))
|
||||
if errcode != 0:
|
||||
logger.error(
|
||||
"[XunleiCleanup] Delete returned error: errcode=%s, message=%s",
|
||||
errcode,
|
||||
data.get("message", data.get("error", "")),
|
||||
)
|
||||
return False
|
||||
|
||||
logger.info("[XunleiCleanup] Delete succeeded for %d files", len(file_ids))
|
||||
return True
|
||||
|
||||
def delete_files_permanent(self, file_ids: List[str]) -> bool:
|
||||
"""彻底删除文件。
|
||||
|
||||
迅雷的 batchDelete 默认为彻底删除(与回收站不同),
|
||||
此方法与 delete_files 行为一致。
|
||||
|
||||
Args:
|
||||
file_ids: 要彻底删除的文件 ID 列表。
|
||||
|
||||
Returns:
|
||||
True 表示删除请求已提交成功。
|
||||
"""
|
||||
return self.delete_files(file_ids)
|
||||
|
||||
@staticmethod
|
||||
def filter_ads(
|
||||
files: List[Dict[str, Any]],
|
||||
banned_keywords: List[str],
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""按关键词过滤文件列表中的广告文件。
|
||||
|
||||
遍历文件列表,剔除文件名中包含任一 banned_keywords 的文件。
|
||||
匹配方式:不区分大小写的子串匹配。
|
||||
|
||||
Args:
|
||||
files: 文件信息字典列表,每个字典需包含 "name" 或 "file_name" 字段。
|
||||
banned_keywords: 被禁关键词列表(匹配不区分大小写)。
|
||||
|
||||
Returns:
|
||||
过滤后的文件信息列表。
|
||||
"""
|
||||
if not banned_keywords:
|
||||
return files
|
||||
|
||||
filtered: List[Dict[str, Any]] = []
|
||||
removed_count: int = 0
|
||||
|
||||
for f in files:
|
||||
name: str = f.get("name", f.get("file_name", ""))
|
||||
name_lower: str = str(name).lower()
|
||||
|
||||
if any(keyword.lower() in name_lower for keyword in banned_keywords):
|
||||
logger.info("[XunleiCleanup] Filtered ad file: '%s'", name)
|
||||
removed_count += 1
|
||||
continue
|
||||
|
||||
filtered.append(f)
|
||||
|
||||
if removed_count > 0:
|
||||
logger.info(
|
||||
"[XunleiCleanup] Ad filter removed %d/%d files",
|
||||
removed_count,
|
||||
len(files),
|
||||
)
|
||||
return filtered
|
||||
|
||||
@staticmethod
|
||||
def filter_ad_ids(
|
||||
file_ids: List[str],
|
||||
file_names: List[str],
|
||||
banned_keywords: List[str],
|
||||
) -> List[str]:
|
||||
"""按关键词过滤文件 ID 列表。
|
||||
|
||||
根据 file_names 判断是否为广告,返回对应的 file_ids。
|
||||
|
||||
Args:
|
||||
file_ids: 文件 ID 列表。
|
||||
file_names: 与 file_ids 一一对应的文件名列表。
|
||||
banned_keywords: 被禁关键词列表。
|
||||
|
||||
Returns:
|
||||
过滤后的 file_ids 列表。
|
||||
"""
|
||||
if not banned_keywords or len(file_ids) != len(file_names):
|
||||
return file_ids
|
||||
|
||||
filtered_ids: List[str] = []
|
||||
for fid, name in zip(file_ids, file_names):
|
||||
name_lower: str = str(name).lower()
|
||||
if any(kw.lower() in name_lower for kw in banned_keywords):
|
||||
logger.info(
|
||||
"[XunleiCleanup] Filtered ad file: '%s' (id=%s)", name, fid
|
||||
)
|
||||
continue
|
||||
filtered_ids.append(fid)
|
||||
|
||||
return filtered_ids
|
||||
|
||||
def close(self) -> None:
|
||||
"""关闭 HTTP 会话。"""
|
||||
self.session.close()
|
||||
|
||||
def __enter__(self) -> "XunleiCleanup":
|
||||
return self
|
||||
|
||||
def __exit__(self, *args: Any) -> None:
|
||||
self.close()
|
||||
339
cloudsearch_transfer/adapter/xunlei/credential.py
Normal file
339
cloudsearch_transfer/adapter/xunlei/credential.py
Normal file
@@ -0,0 +1,339 @@
|
||||
"""
|
||||
CloudSearch Transfer — 迅雷网盘凭证管理器 v1.0.0
|
||||
|
||||
迅雷网盘使用 refresh_token + captcha_token 双重认证机制:
|
||||
|
||||
1. refresh_token → access_token (OAuth)
|
||||
POST https://xluser-ssl.xunlei.com/v1/auth/token
|
||||
Body: {"grant_type": "refresh_token", "refresh_token": "...", "client_id": "..."}
|
||||
|
||||
2. captcha_token 获取(某些操作需要)
|
||||
POST /v1/shield/captcha/init
|
||||
Body: {"client_id": "...", "action": "...", "device_id": "...", "meta": {"captcha_sign": "..."}}
|
||||
|
||||
3. get_headers() 返回所有需要的认证头:
|
||||
Authorization: Bearer <access_token>
|
||||
x-captcha-token: <captcha_token>
|
||||
x-client-id: <client_id>
|
||||
x-device-id: <device_id>
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import time
|
||||
import threading
|
||||
from typing import Dict, Optional
|
||||
|
||||
import requests
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ─── 常量 ───────────────────────────────────────────────────────────
|
||||
# 迅雷网盘 OAuth 认证端点
|
||||
XUNLEI_AUTH_API = "https://xluser-ssl.xunlei.com"
|
||||
|
||||
# 迅雷网盘客户端标识(固定值)
|
||||
CLIENT_ID = "Xqp0kJBXWhwaTpB6"
|
||||
DEVICE_ID = "925b7631473a13716b791d7f28289cad"
|
||||
|
||||
# ─── 默认请求头 ─────────────────────────────────────────────────────
|
||||
DEFAULT_HEADERS: Dict[str, str] = {
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/135.0.0.0 Safari/537.36"
|
||||
),
|
||||
"Accept": "application/json, text/plain, */*",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
|
||||
class XunleiCredentialManager:
|
||||
"""迅雷网盘凭证管理器。
|
||||
|
||||
职责:
|
||||
- 使用 refresh_token 换取 access_token
|
||||
- 获取 captcha_token(特定 action 需要)
|
||||
- 构建包含所有认证头的请求头字典
|
||||
- 访问令牌过期前自动刷新(提前 60s)
|
||||
|
||||
用法:
|
||||
mgr = XunleiCredentialManager(refresh_token="xxx")
|
||||
mgr.refresh_access_token() # 刷新 access_token
|
||||
captcha = mgr.get_captcha_token("restore") # 获取验证码令牌
|
||||
headers = mgr.get_headers() # 获取完整的认证请求头
|
||||
is_ok = mgr.validate() # 验证凭证有效性
|
||||
|
||||
Attributes:
|
||||
CLIENT_ID: 迅雷客户端 ID。
|
||||
DEVICE_ID: 设备标识。
|
||||
"""
|
||||
|
||||
# ─── 类常量 ────────────────────────────────────────────────
|
||||
CLIENT_ID: str = CLIENT_ID
|
||||
DEVICE_ID: str = DEVICE_ID
|
||||
|
||||
def __init__(self, refresh_token: str = "") -> None:
|
||||
"""初始化迅雷凭证管理器。
|
||||
|
||||
Args:
|
||||
refresh_token: 迅雷网盘的 refresh_token。
|
||||
"""
|
||||
self._refresh_token: str = refresh_token.strip()
|
||||
self._access_token: str = ""
|
||||
self._expires_at: float = 0.0
|
||||
self._captcha_tokens: Dict[str, str] = {} # action → captcha_token
|
||||
self._lock: threading.Lock = threading.Lock()
|
||||
self._session: requests.Session = requests.Session()
|
||||
self._session.headers.update(DEFAULT_HEADERS)
|
||||
|
||||
# ─── 公开 API ──────────────────────────────────────────────
|
||||
|
||||
def validate(self) -> bool:
|
||||
"""验证 refresh_token 是否有效。
|
||||
|
||||
要求 refresh_token 长度 >= 20,且能成功换取 access_token。
|
||||
|
||||
Returns:
|
||||
True 表示凭证有效。
|
||||
"""
|
||||
if not self._refresh_token or len(self._refresh_token) < 20:
|
||||
logger.warning(
|
||||
"[XunleiCredential] refresh_token 长度不足 20,验证失败"
|
||||
)
|
||||
return False
|
||||
return self.refresh_access_token()
|
||||
|
||||
def is_valid(self) -> bool:
|
||||
"""validate() 的别名。"""
|
||||
return self.validate()
|
||||
|
||||
def refresh_access_token(self) -> bool:
|
||||
"""使用 refresh_token 换取 access_token。
|
||||
|
||||
POST /v1/auth/token
|
||||
Body: {"grant_type": "refresh_token", "refresh_token": "...", "client_id": "..."}
|
||||
|
||||
返回 True 表示成功,False 表示失败。
|
||||
"""
|
||||
with self._lock:
|
||||
return self._do_refresh()
|
||||
|
||||
def get_captcha_token(self, action: str) -> str:
|
||||
"""获取指定 action 的 captcha_token。
|
||||
|
||||
POST /v1/shield/captcha/init
|
||||
Body: {
|
||||
"client_id": "...",
|
||||
"action": "...",
|
||||
"device_id": "...",
|
||||
"meta": {"captcha_sign": "..."}
|
||||
}
|
||||
|
||||
captcha_token 会按 action 缓存,避免重复获取。
|
||||
|
||||
Args:
|
||||
action: 操作类型,如 "restore"、"share" 等。
|
||||
|
||||
Returns:
|
||||
captcha_token 字符串,获取失败返回空字符串。
|
||||
"""
|
||||
with self._lock:
|
||||
# 检查缓存
|
||||
if action in self._captcha_tokens:
|
||||
return self._captcha_tokens[action]
|
||||
return self._do_get_captcha(action)
|
||||
|
||||
def get_headers(self) -> Dict[str, str]:
|
||||
"""构建包含所有认证头的请求头字典。
|
||||
|
||||
返回:
|
||||
- Authorization: Bearer <access_token>
|
||||
- x-captcha-token: <captcha_token> (如有)
|
||||
- x-client-id: <client_id>
|
||||
- x-device-id: <device_id>
|
||||
|
||||
Returns:
|
||||
认证请求头字典。
|
||||
"""
|
||||
self._ensure_token_valid()
|
||||
|
||||
headers: Dict[str, str] = {
|
||||
"x-client-id": self.CLIENT_ID,
|
||||
"x-device-id": self.DEVICE_ID,
|
||||
}
|
||||
|
||||
if self._access_token:
|
||||
headers["Authorization"] = f"Bearer {self._access_token}"
|
||||
|
||||
return headers
|
||||
|
||||
def get_headers_with_captcha(self, action: str = "") -> Dict[str, str]:
|
||||
"""获取带 captcha_token 的完整认证头。
|
||||
|
||||
Args:
|
||||
action: captcha 操作类型,空字符串表示不需要 captcha。
|
||||
|
||||
Returns:
|
||||
包含 Authorization + x-captcha-token 的请求头字典。
|
||||
"""
|
||||
headers = self.get_headers()
|
||||
|
||||
if action:
|
||||
captcha = self.get_captcha_token(action)
|
||||
if captcha:
|
||||
headers["x-captcha-token"] = captcha
|
||||
|
||||
return headers
|
||||
|
||||
def get_access_token(self) -> str:
|
||||
"""获取当前有效的 access_token(必要时自动刷新)。"""
|
||||
self._ensure_token_valid()
|
||||
return self._access_token
|
||||
|
||||
@property
|
||||
def refresh_token(self) -> str:
|
||||
"""返回当前 refresh_token。"""
|
||||
return self._refresh_token
|
||||
|
||||
@refresh_token.setter
|
||||
def refresh_token(self, value: str) -> None:
|
||||
"""更新 refresh_token。"""
|
||||
self._refresh_token = value.strip()
|
||||
with self._lock:
|
||||
self._access_token = ""
|
||||
self._expires_at = 0.0
|
||||
self._captcha_tokens.clear()
|
||||
|
||||
# ─── 内部方法 ──────────────────────────────────────────────
|
||||
|
||||
def _ensure_token_valid(self) -> None:
|
||||
"""确保 access_token 有效(过期则自动刷新)。"""
|
||||
if not self._access_token or time.time() >= (self._expires_at - 60):
|
||||
self.refresh_access_token()
|
||||
|
||||
def _do_refresh(self) -> bool:
|
||||
"""实际执行 token 刷新。
|
||||
|
||||
POST https://xluser-ssl.xunlei.com/v1/auth/token
|
||||
"""
|
||||
if not self._refresh_token:
|
||||
logger.error("[XunleiCredential] 没有 refresh_token,无法刷新")
|
||||
return False
|
||||
|
||||
url = f"{XUNLEI_AUTH_API}/v1/auth/token"
|
||||
body: Dict[str, str] = {
|
||||
"grant_type": "refresh_token",
|
||||
"refresh_token": self._refresh_token,
|
||||
"client_id": self.CLIENT_ID,
|
||||
}
|
||||
|
||||
try:
|
||||
resp = self._session.post(url, json=body, timeout=30)
|
||||
data = resp.json()
|
||||
|
||||
if resp.status_code != 200:
|
||||
logger.error(
|
||||
"[XunleiCredential] 刷新 token 失败: HTTP %d, %s",
|
||||
resp.status_code,
|
||||
data,
|
||||
)
|
||||
return False
|
||||
|
||||
access_token = data.get("access_token", "")
|
||||
if not access_token:
|
||||
logger.error(
|
||||
"[XunleiCredential] 响应中缺少 access_token: %s", data
|
||||
)
|
||||
return False
|
||||
|
||||
expires_in = int(data.get("expires_in", 7200))
|
||||
new_refresh = data.get("refresh_token", self._refresh_token)
|
||||
|
||||
self._access_token = access_token
|
||||
self._expires_at = time.time() + expires_in
|
||||
|
||||
# 更新 refresh_token(服务端可能下发新的)
|
||||
if new_refresh != self._refresh_token:
|
||||
logger.info(
|
||||
"[XunleiCredential] refresh_token 已轮换: "
|
||||
f"{self._refresh_token[:8]}... → {new_refresh[:8]}..."
|
||||
)
|
||||
self._refresh_token = new_refresh
|
||||
|
||||
# 清除 captcha 缓存(token 变了,captcha 可能也失效了)
|
||||
self._captcha_tokens.clear()
|
||||
|
||||
logger.info(
|
||||
"[XunleiCredential] Token 刷新成功 (expires_in=%ds)", expires_in
|
||||
)
|
||||
return True
|
||||
|
||||
except requests.RequestException as e:
|
||||
logger.error(f"[XunleiCredential] 刷新 token 网络异常: {e}")
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.exception(f"[XunleiCredential] 刷新 token 未知异常: {e}")
|
||||
return False
|
||||
|
||||
def _do_get_captcha(self, action: str) -> str:
|
||||
"""获取 captcha_token。
|
||||
|
||||
POST /v1/shield/captcha/init
|
||||
"""
|
||||
url = f"{XUNLEI_AUTH_API}/v1/shield/captcha/init"
|
||||
body: Dict[str, Any] = {
|
||||
"client_id": self.CLIENT_ID,
|
||||
"action": action,
|
||||
"device_id": self.DEVICE_ID,
|
||||
"meta": {
|
||||
"captcha_sign": "",
|
||||
},
|
||||
}
|
||||
|
||||
# 需要 Authorization 头
|
||||
if not self._access_token:
|
||||
if not self._do_refresh():
|
||||
logger.error("[XunleiCredential] 无法获取 access_token,跳过 captcha")
|
||||
return ""
|
||||
|
||||
headers: Dict[str, str] = {
|
||||
"Authorization": f"Bearer {self._access_token}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
try:
|
||||
resp = self._session.post(url, json=body, headers=headers, timeout=15)
|
||||
data = resp.json()
|
||||
|
||||
captcha_token = data.get("captcha_token", "")
|
||||
if captcha_token:
|
||||
self._captcha_tokens[action] = captcha_token
|
||||
logger.info(
|
||||
"[XunleiCredential] captcha_token 获取成功 for action=%s",
|
||||
action,
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
"[XunleiCredential] captcha_token 为空 for action=%s: %s",
|
||||
action,
|
||||
data,
|
||||
)
|
||||
|
||||
return captcha_token
|
||||
|
||||
except requests.RequestException as e:
|
||||
logger.error(f"[XunleiCredential] 获取 captcha_token 网络异常: {e}")
|
||||
return ""
|
||||
except Exception as e:
|
||||
logger.exception(f"[XunleiCredential] 获取 captcha_token 异常: {e}")
|
||||
return ""
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return (
|
||||
f"XunleiCredentialManager("
|
||||
f"refresh_token={'***' if self._refresh_token else 'None'}, "
|
||||
f"has_access_token={bool(self._access_token)}, "
|
||||
f"captcha_actions={list(self._captcha_tokens.keys())})"
|
||||
)
|
||||
518
cloudsearch_transfer/adapter/xunlei/transfer.py
Normal file
518
cloudsearch_transfer/adapter/xunlei/transfer.py
Normal file
@@ -0,0 +1,518 @@
|
||||
"""
|
||||
CloudSearch Transfer — 迅雷网盘转存核心 v1.0.0
|
||||
|
||||
迅雷网盘 4 步转存流程:
|
||||
|
||||
① GET .../drive/v1/share?share_id=xx → pass_code_token, files[], title
|
||||
② POST .../share/restore → restore_task_id (转存)
|
||||
③ 轮询 GET .../tasks/{task_id} → progress==100, trace_file_ids → oldId→newId映射
|
||||
④ POST .../share → share_url + pass_code
|
||||
|
||||
迅雷网盘需要 refresh_token + captcha_token 双重认证。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import requests
|
||||
|
||||
from .credential import XunleiCredentialManager
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ─── 迅雷 API 基础地址 ──────────────────────────────────────────────
|
||||
XUNLEI_PAN_API = "https://api-pan.xunlei.com"
|
||||
|
||||
# ─── URL 解析正则 ───────────────────────────────────────────────────
|
||||
# 匹配 pan.xunlei.com/s/<share_id>
|
||||
SHARE_URL_PATTERN = re.compile(r"pan\.xunlei\.com/s/([A-Za-z0-9]+)")
|
||||
|
||||
|
||||
class XunleiTransfer:
|
||||
"""迅雷网盘转存引擎。
|
||||
|
||||
封装完整的 4 步 API 流程:获取分享详情 → 转存文件 →
|
||||
轮询转存任务 → 创建新分享。
|
||||
|
||||
Attributes:
|
||||
credential: 迅雷凭证管理器实例。
|
||||
session: 复用的 requests.Session。
|
||||
timeout: 请求超时(秒)。
|
||||
poll_interval: 轮询间隔(秒)。
|
||||
poll_max_attempts: 最大轮询次数。
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
credential: XunleiCredentialManager,
|
||||
timeout: int = 30,
|
||||
poll_interval: float = 1.0,
|
||||
poll_max_attempts: int = 60,
|
||||
) -> None:
|
||||
"""初始化转存引擎。
|
||||
|
||||
Args:
|
||||
credential: 有效的迅雷凭证管理器。
|
||||
timeout: HTTP 请求超时秒数。
|
||||
poll_interval: 异步任务轮询间隔秒数。
|
||||
poll_max_attempts: 异步任务最大轮询次数。
|
||||
"""
|
||||
self.credential: XunleiCredentialManager = credential
|
||||
self.timeout: int = timeout
|
||||
self.poll_interval: float = poll_interval
|
||||
self.poll_max_attempts: int = poll_max_attempts
|
||||
self.session: requests.Session = requests.Session()
|
||||
|
||||
# ─── 步骤 ①:获取分享详情 ─────────────────────────────────────
|
||||
|
||||
def _get_share_info(self, share_id: str) -> Dict[str, Any]:
|
||||
"""步骤①:获取分享详情。
|
||||
|
||||
GET /drive/v1/share?share_id=<share_id>
|
||||
|
||||
返回字段包含:pass_code_token, files[], title 等。
|
||||
|
||||
Args:
|
||||
share_id: 分享 ID(从 URL 解析)。
|
||||
|
||||
Returns:
|
||||
分享信息字典,包含 files, title, pass_code_token。
|
||||
|
||||
Raises:
|
||||
RuntimeError: API 返回错误。
|
||||
"""
|
||||
url = f"{XUNLEI_PAN_API}/drive/v1/share"
|
||||
params: Dict[str, str] = {"share_id": share_id}
|
||||
headers = self.credential.get_headers()
|
||||
|
||||
logger.info("[XunleiTransfer] ① Fetching share info for share_id=%s", share_id)
|
||||
|
||||
try:
|
||||
resp = self.session.get(
|
||||
url, params=params, headers=headers, timeout=self.timeout
|
||||
)
|
||||
resp.raise_for_status()
|
||||
except requests.RequestException as exc:
|
||||
raise RuntimeError(f"获取分享详情失败: {exc}") from exc
|
||||
|
||||
data: Dict[str, Any] = resp.json()
|
||||
|
||||
# 检查业务错误
|
||||
errcode = data.get("errcode", data.get("error_code", 0))
|
||||
if errcode != 0:
|
||||
raise RuntimeError(
|
||||
f"分享详情API返回错误: errcode={errcode}, message={data.get('message', data.get('error', ''))}"
|
||||
)
|
||||
|
||||
# 提取关键字段
|
||||
pass_code_token: str = data.get("pass_code_token", "")
|
||||
files: List[Dict[str, Any]] = data.get("files", [])
|
||||
title: str = data.get("title", data.get("share_name", ""))
|
||||
|
||||
if not files:
|
||||
raise RuntimeError("分享内容为空")
|
||||
|
||||
logger.info(
|
||||
"[XunleiTransfer] ① Share info: title=%s, files=%d, has_pass_code_token=%s",
|
||||
title,
|
||||
len(files),
|
||||
bool(pass_code_token),
|
||||
)
|
||||
|
||||
return {
|
||||
"pass_code_token": pass_code_token,
|
||||
"files": files,
|
||||
"title": title,
|
||||
"share_id": share_id,
|
||||
}
|
||||
|
||||
# ─── 步骤 ②:转存文件 ─────────────────────────────────────────
|
||||
|
||||
def _restore_files(
|
||||
self,
|
||||
share_id: str,
|
||||
pass_code_token: str,
|
||||
file_ids: List[str],
|
||||
parent_id: str = "",
|
||||
) -> str:
|
||||
"""步骤②:转存文件到自己的迅雷网盘。
|
||||
|
||||
POST /drive/v1/share/restore
|
||||
Body: {
|
||||
"file_ids": ["<fid1>", ...],
|
||||
"pass_code_token": "<token>",
|
||||
"share_id": "<share_id>",
|
||||
"parent_id": "",
|
||||
"specify_parent_id": true
|
||||
}
|
||||
|
||||
Args:
|
||||
share_id: 分享 ID。
|
||||
pass_code_token: 步骤①获取的 pass_code_token。
|
||||
file_ids: 要转存的文件 ID 列表。
|
||||
parent_id: 目标父目录 ID,空字符串表示根目录。
|
||||
|
||||
Returns:
|
||||
restore_task_id 字符串,用于步骤③轮询。
|
||||
|
||||
Raises:
|
||||
RuntimeError: API 返回错误。
|
||||
"""
|
||||
url = f"{XUNLEI_PAN_API}/drive/v1/share/restore"
|
||||
|
||||
body: Dict[str, Any] = {
|
||||
"file_ids": file_ids,
|
||||
"pass_code_token": pass_code_token,
|
||||
"share_id": share_id,
|
||||
"parent_id": parent_id or "",
|
||||
"specify_parent_id": True,
|
||||
}
|
||||
# restore 操作可能需要 captcha_token
|
||||
headers = self.credential.get_headers_with_captcha(action="restore")
|
||||
headers.setdefault("Content-Type", "application/json")
|
||||
|
||||
logger.info(
|
||||
"[XunleiTransfer] ② Restoring %d files from share_id=%s",
|
||||
len(file_ids),
|
||||
share_id,
|
||||
)
|
||||
|
||||
try:
|
||||
resp = self.session.post(
|
||||
url, json=body, headers=headers, timeout=self.timeout
|
||||
)
|
||||
resp.raise_for_status()
|
||||
except requests.RequestException as exc:
|
||||
raise RuntimeError(f"转存请求失败: {exc}") from exc
|
||||
|
||||
data: Dict[str, Any] = resp.json()
|
||||
errcode = data.get("errcode", data.get("error_code", 0))
|
||||
if errcode != 0:
|
||||
raise RuntimeError(
|
||||
f"转存请求失败: errcode={errcode}, message={data.get('message', data.get('error', ''))}"
|
||||
)
|
||||
|
||||
task_id: Optional[str] = data.get("restore_task_id", data.get("task_id"))
|
||||
if not task_id:
|
||||
raise RuntimeError(f"转存 task_id 缺失, response: {data}")
|
||||
|
||||
logger.info("[XunleiTransfer] ② Restore task created: task_id=%s", task_id)
|
||||
return task_id
|
||||
|
||||
# ─── 步骤 ③:轮询转存任务 ─────────────────────────────────────
|
||||
|
||||
def _poll_restore_task(self, task_id: str) -> Dict[str, str]:
|
||||
"""步骤③:轮询转存任务直到完成。
|
||||
|
||||
GET /drive/v1/tasks/{task_id}
|
||||
|
||||
当 progress==100 时表示完成,返回 oldId→newId 映射。
|
||||
从 params.trace_file_ids 解析 JSON 字符串获取映射关系。
|
||||
|
||||
Args:
|
||||
task_id: 步骤②返回的 restore_task_id。
|
||||
|
||||
Returns:
|
||||
{"oldId": "newId", ...} 文件 ID 映射字典。
|
||||
|
||||
Raises:
|
||||
RuntimeError: 任务失败或超时。
|
||||
"""
|
||||
url = f"{XUNLEI_PAN_API}/drive/v1/tasks/{task_id}"
|
||||
headers = self.credential.get_headers()
|
||||
|
||||
for attempt in range(1, self.poll_max_attempts + 1):
|
||||
try:
|
||||
resp = self.session.get(url, headers=headers, timeout=self.timeout)
|
||||
resp.raise_for_status()
|
||||
except requests.RequestException:
|
||||
logger.warning(
|
||||
"[XunleiTransfer] ③ Poll attempt %d/%d failed, retrying...",
|
||||
attempt,
|
||||
self.poll_max_attempts,
|
||||
)
|
||||
time.sleep(self.poll_interval)
|
||||
continue
|
||||
|
||||
data: Dict[str, Any] = resp.json()
|
||||
progress: int = data.get("progress", 0)
|
||||
status: str = data.get("status", "")
|
||||
|
||||
logger.debug(
|
||||
"[XunleiTransfer] ③ Poll %d/%d: progress=%d, status=%s",
|
||||
attempt,
|
||||
self.poll_max_attempts,
|
||||
progress,
|
||||
status,
|
||||
)
|
||||
|
||||
if status == "failed" or status == "error":
|
||||
raise RuntimeError(
|
||||
f"转存任务失败: task_id={task_id}, status={status}"
|
||||
)
|
||||
|
||||
if progress == 100:
|
||||
# 从 params.trace_file_ids 解析 oldId→newId 映射
|
||||
params: Dict[str, Any] = data.get("params", {})
|
||||
trace_file_ids: str = params.get("trace_file_ids", "")
|
||||
|
||||
if trace_file_ids:
|
||||
try:
|
||||
id_mapping: Dict[str, str] = json.loads(trace_file_ids)
|
||||
logger.info(
|
||||
"[XunleiTransfer] ③ Restore completed: %d files mapped",
|
||||
len(id_mapping),
|
||||
)
|
||||
return id_mapping
|
||||
except json.JSONDecodeError:
|
||||
logger.warning(
|
||||
"[XunleiTransfer] ③ Failed to parse trace_file_ids: %s",
|
||||
trace_file_ids,
|
||||
)
|
||||
|
||||
# fallback: 检查 result 字段
|
||||
result = data.get("result", {})
|
||||
if result:
|
||||
logger.info("[XunleiTransfer] ③ Restore completed via result field")
|
||||
return result
|
||||
|
||||
# 最后的 fallback: 返回空映射
|
||||
logger.warning(
|
||||
"[XunleiTransfer] ③ Restore completed but no file mapping found"
|
||||
)
|
||||
return {}
|
||||
|
||||
if progress < 0:
|
||||
raise RuntimeError(
|
||||
f"转存任务异常: task_id={task_id}, progress={progress}"
|
||||
)
|
||||
|
||||
time.sleep(self.poll_interval)
|
||||
|
||||
raise RuntimeError(
|
||||
f"转存任务超时: task_id={task_id}, 已轮询 {self.poll_max_attempts} 次"
|
||||
)
|
||||
|
||||
# ─── 步骤 ④:创建新分享 ─────────────────────────────────────
|
||||
|
||||
def _create_share(
|
||||
self,
|
||||
file_ids: List[str],
|
||||
expiration_days: str = "-1",
|
||||
) -> Tuple[str, str]:
|
||||
"""步骤④:创建新分享链接。
|
||||
|
||||
POST /drive/v1/share
|
||||
Body: {
|
||||
"file_ids": ["<fid1>", ...],
|
||||
"expiration_days": "-1"
|
||||
}
|
||||
|
||||
expiration_days: "-1" 表示永久有效。
|
||||
|
||||
Args:
|
||||
file_ids: 要分享的文件 ID 列表。
|
||||
expiration_days: 过期天数,"-1" 表示永久。
|
||||
|
||||
Returns:
|
||||
(share_url, pass_code) 元组。
|
||||
|
||||
Raises:
|
||||
RuntimeError: API 返回错误。
|
||||
"""
|
||||
url = f"{XUNLEI_PAN_API}/drive/v1/share"
|
||||
|
||||
body: Dict[str, Any] = {
|
||||
"file_ids": file_ids,
|
||||
"expiration_days": expiration_days,
|
||||
}
|
||||
# share 操作可能需要 captcha_token
|
||||
headers = self.credential.get_headers_with_captcha(action="share")
|
||||
headers.setdefault("Content-Type", "application/json")
|
||||
|
||||
logger.info(
|
||||
"[XunleiTransfer] ④ Creating share: %d files", len(file_ids)
|
||||
)
|
||||
|
||||
try:
|
||||
resp = self.session.post(
|
||||
url, json=body, headers=headers, timeout=self.timeout
|
||||
)
|
||||
resp.raise_for_status()
|
||||
except requests.RequestException as exc:
|
||||
raise RuntimeError(f"创建分享失败: {exc}") from exc
|
||||
|
||||
data: Dict[str, Any] = resp.json()
|
||||
errcode = data.get("errcode", data.get("error_code", 0))
|
||||
if errcode != 0:
|
||||
raise RuntimeError(
|
||||
f"创建分享失败: errcode={errcode}, message={data.get('message', data.get('error', ''))}"
|
||||
)
|
||||
|
||||
share_url: str = data.get("share_url", data.get("link", ""))
|
||||
pass_code: str = data.get("pass_code", data.get("code", ""))
|
||||
|
||||
if not share_url:
|
||||
share_id = data.get("share_id", "")
|
||||
if share_id:
|
||||
share_url = f"https://pan.xunlei.com/s/{share_id}"
|
||||
|
||||
logger.info(
|
||||
"[XunleiTransfer] ④ Share created: url=%s, pass_code=%s",
|
||||
share_url,
|
||||
pass_code,
|
||||
)
|
||||
return share_url, pass_code
|
||||
|
||||
# ─── 公开入口 ─────────────────────────────────────────────────
|
||||
|
||||
def transfer(
|
||||
self,
|
||||
share_url: str,
|
||||
save_dir: str = "",
|
||||
share_password: str = "",
|
||||
) -> Dict[str, Any]:
|
||||
"""执行完整的 4 步转存流程。
|
||||
|
||||
从原始迅雷分享链接开始,将文件转存到自己网盘,再创建新分享。
|
||||
|
||||
Args:
|
||||
share_url: 原始迅雷分享链接,如 https://pan.xunlei.com/s/xxxxx。
|
||||
save_dir: 转存目标目录 ID,空字符串表示根目录。
|
||||
share_password: 新分享的密码(迅雷使用 pass_code)。
|
||||
|
||||
Returns:
|
||||
包含以下字段的字典:
|
||||
- success: bool
|
||||
- new_file_ids: List[str] — 转存后的文件ID列表(newId)
|
||||
- file_name: str — 分享标题
|
||||
- share_url: str — 新分享链接
|
||||
- passcode: str — 新分享 pass_code
|
||||
|
||||
Raises:
|
||||
RuntimeError: 任一步骤失败。
|
||||
ValueError: URL 解析失败。
|
||||
"""
|
||||
# 0. 解析 URL 提取 share_id
|
||||
match = SHARE_URL_PATTERN.search(share_url)
|
||||
if not match:
|
||||
raise ValueError(f"无法从URL中提取迅雷分享ID: {share_url}")
|
||||
share_id: str = match.group(1)
|
||||
|
||||
logger.info(
|
||||
"[XunleiTransfer] Starting 4-step transfer for share_id=%s", share_id
|
||||
)
|
||||
|
||||
# ① 获取分享详情
|
||||
share_info: Dict[str, Any] = self._get_share_info(share_id)
|
||||
files: List[Dict[str, Any]] = share_info.get("files", [])
|
||||
title: str = share_info.get("title", "分享")
|
||||
pass_code_token: str = share_info.get("pass_code_token", "")
|
||||
|
||||
# 提取原始文件 ID
|
||||
file_ids: List[str] = [
|
||||
f.get("file_id", f.get("fid", f.get("id", "")))
|
||||
for f in files
|
||||
if f.get("file_id") or f.get("fid") or f.get("id")
|
||||
]
|
||||
|
||||
if not file_ids:
|
||||
raise RuntimeError("无法从分享中提取文件ID")
|
||||
|
||||
# ② 发起转存
|
||||
task_id: str = self._restore_files(
|
||||
share_id, pass_code_token, file_ids, parent_id=save_dir
|
||||
)
|
||||
|
||||
# ③ 轮询转存任务 → 获取 oldId→newId 映射
|
||||
id_mapping: Dict[str, str] = self._poll_restore_task(task_id)
|
||||
|
||||
# 从映射中提取新的文件 ID
|
||||
new_file_ids: List[str] = []
|
||||
for old_fid in file_ids:
|
||||
new_fid = id_mapping.get(old_fid, "")
|
||||
if new_fid:
|
||||
new_file_ids.append(new_fid)
|
||||
else:
|
||||
logger.warning(
|
||||
"[XunleiTransfer] No newId mapped for old_fid=%s", old_fid
|
||||
)
|
||||
|
||||
if not new_file_ids:
|
||||
raise RuntimeError("转存完成但未获取到新文件ID")
|
||||
|
||||
# ④ 创建新分享
|
||||
share_url_new, pass_code = self._create_share(new_file_ids)
|
||||
|
||||
logger.info(
|
||||
"[XunleiTransfer] Transfer complete: %d files, new_share=%s",
|
||||
len(new_file_ids),
|
||||
share_url_new,
|
||||
)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"new_file_ids": new_file_ids,
|
||||
"file_name": title,
|
||||
"share_url": share_url_new,
|
||||
"passcode": pass_code or share_password,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def parse_share_url(url: str) -> Optional[str]:
|
||||
"""从迅雷分享 URL 中提取 share_id。
|
||||
|
||||
Args:
|
||||
url: 迅雷分享链接。
|
||||
|
||||
Returns:
|
||||
share_id 字符串,解析失败返回 None。
|
||||
"""
|
||||
match = SHARE_URL_PATTERN.search(url)
|
||||
return match.group(1) if match else None
|
||||
|
||||
@staticmethod
|
||||
def extract_file_ids(files: List[Dict[str, Any]]) -> List[str]:
|
||||
"""从文件列表中提取 file_id。
|
||||
|
||||
Args:
|
||||
files: 文件信息字典列表。
|
||||
|
||||
Returns:
|
||||
file_id 字符串列表。
|
||||
"""
|
||||
return [
|
||||
f.get("file_id", f.get("fid", f.get("id", "")))
|
||||
for f in files
|
||||
if f.get("file_id") or f.get("fid") or f.get("id")
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def parse_trace_file_ids(trace: str) -> Dict[str, str]:
|
||||
"""解析 trace_file_ids JSON 字符串为 oldId→newId 映射。
|
||||
|
||||
Args:
|
||||
trace: trace_file_ids JSON 字符串,如 '{"oldId":"newId"}'.
|
||||
|
||||
Returns:
|
||||
{"oldId": "newId", ...} 映射字典。
|
||||
"""
|
||||
try:
|
||||
return json.loads(trace)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
return {}
|
||||
|
||||
def close(self) -> None:
|
||||
"""关闭 HTTP 会话。"""
|
||||
self.session.close()
|
||||
|
||||
def __enter__(self) -> "XunleiTransfer":
|
||||
return self
|
||||
|
||||
def __exit__(self, *args: Any) -> None:
|
||||
self.close()
|
||||
Reference in New Issue
Block a user