v0.2.7: 修复Redis连接 + 启动管理后台
- 修复Redis认证 (配置密码) - 启动Python管理后台 (端口9531, 15个功能开关) - 统一版本号 0.2.7 - 更新docker-compose.yml (镜像版本/Redis URL/Admin服务)
This commit is contained in:
253
cloudsearch_transfer/adapter/baidu/__init__.py
Normal file
253
cloudsearch_transfer/adapter/baidu/__init__.py
Normal file
@@ -0,0 +1,253 @@
|
||||
"""
|
||||
百度网盘适配器 — CloudSearch Transfer v1.0.0
|
||||
参考 cloud-auto-save 的 BaiduNetDisk + netdisk 的 PanbaiduSave
|
||||
|
||||
完整的 5 步转存流程 + bdstoken 管理 + 路径删除 + 广告过滤
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List, Tuple
|
||||
|
||||
from ..base import BaseCloudDriveAdapter, FileInfo
|
||||
from ...config import PlatformConfig, TransferConfig
|
||||
from ...errors import TransferError, TransferErrorCode
|
||||
|
||||
from .credential import BaiduCredentialManager
|
||||
from .transfer import BaiduTransfer
|
||||
from .cleanup import BaiduCleanup
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BaiduAdapter(BaseCloudDriveAdapter):
|
||||
"""百度网盘适配器
|
||||
|
||||
完整的 Cookie + bdstoken 机制,支持:
|
||||
- 验证分享链接 + 提取码
|
||||
- 5 步转存到自己的网盘
|
||||
- 创建新分享
|
||||
- 按文件名删除文件
|
||||
- 广告文件过滤
|
||||
"""
|
||||
|
||||
PLATFORM_NAME = "百度网盘"
|
||||
PLATFORM_KEY = "baidu"
|
||||
URL_PATTERNS = [
|
||||
r'pan\.baidu\.com/s/1([A-Za-z0-9_-]+)',
|
||||
]
|
||||
|
||||
def __init__(self, config: PlatformConfig, transfer_config: TransferConfig):
|
||||
super().__init__(config, transfer_config)
|
||||
|
||||
# 凭证管理器
|
||||
self.credential = BaiduCredentialManager(
|
||||
cookie=config.cookie,
|
||||
session=self.session,
|
||||
)
|
||||
|
||||
if not self.credential.validate():
|
||||
raise TransferError(
|
||||
TransferErrorCode.NOT_LOGIN,
|
||||
message="百度网盘 Cookie 无效或太短 (需 >= 50 字符)",
|
||||
platform=self.PLATFORM_KEY,
|
||||
)
|
||||
|
||||
# 预热 bdstoken
|
||||
try:
|
||||
self.credential.get_bdstoken()
|
||||
except TransferError as e:
|
||||
logger.warning(f"预取 bdstoken 失败: {e},将在首次使用时重试")
|
||||
|
||||
# 转存执行器 & 清理器
|
||||
self._transfer = BaiduTransfer(self.session, self.credential)
|
||||
self._cleanup = BaiduCleanup(
|
||||
self.session, self.credential,
|
||||
ad_keywords=config.banned_keywords or None,
|
||||
)
|
||||
|
||||
# 暂存最近一次转存的文件信息(供 _filter_ads 使用)
|
||||
self._last_transfer_files: List[dict] = []
|
||||
|
||||
# ─── session 初始化 ─────────────────────────────────────
|
||||
|
||||
def _setup_session(self):
|
||||
"""设置 session 级别的 Cookie"""
|
||||
if self.config.cookie:
|
||||
self.session.headers["Cookie"] = self.config.cookie
|
||||
self.session.headers["Referer"] = "https://pan.baidu.com/"
|
||||
|
||||
# ─── 核心抽象方法实现 ──────────────────────────────────
|
||||
|
||||
def _get_share_detail(self, pwd_id: str, passcode: str = "") -> dict:
|
||||
"""获取百度分享详情(步骤 ①+②)
|
||||
|
||||
Args:
|
||||
pwd_id: URL 中的 surl (s/1 后面的部分)
|
||||
passcode: 提取码(可选)
|
||||
|
||||
Returns:
|
||||
{"title": str, "fs_ids": [str], "filenames": [str], ...}
|
||||
"""
|
||||
bdstoken = self.credential.get_bdstoken()
|
||||
|
||||
# ① 验证提取码(如果有)
|
||||
if passcode:
|
||||
self._transfer._verify_password(pwd_id, passcode, bdstoken)
|
||||
|
||||
# ② 解析分享页
|
||||
share_info = self._transfer._parse_share_page(pwd_id)
|
||||
|
||||
return {
|
||||
"title": share_info.get("title", ""),
|
||||
"shareid": share_info["shareid"],
|
||||
"uk": share_info["uk"],
|
||||
"fs_ids": share_info["fs_ids"],
|
||||
"filenames": share_info["filenames"],
|
||||
}
|
||||
|
||||
def _save_files(self, pwd_id: str, detail: dict,
|
||||
save_dir: str) -> List[str]:
|
||||
"""转存文件到自己的百度网盘(步骤 ③+④)
|
||||
|
||||
Args:
|
||||
pwd_id: surl
|
||||
detail: _get_share_detail 返回的 dict
|
||||
save_dir: 目标目录
|
||||
|
||||
Returns:
|
||||
转存后的新 fs_id 列表
|
||||
"""
|
||||
bdstoken = self.credential.get_bdstoken()
|
||||
shareid = detail["shareid"]
|
||||
uk = detail["uk"]
|
||||
fs_ids = detail["fs_ids"]
|
||||
filenames = detail.get("filenames", [])
|
||||
|
||||
# ③ 转存
|
||||
self._transfer._transfer_files(shareid, uk, fs_ids, save_dir, bdstoken)
|
||||
|
||||
# ④ 列出目录匹配新 fs_id
|
||||
new_fs_ids = self._transfer._list_and_match(save_dir, filenames, bdstoken)
|
||||
|
||||
# 暂存文件信息供 _filter_ads + _create_share 使用
|
||||
self._last_transfer_files = [
|
||||
{"fs_id": fid, "name": name}
|
||||
for fid, name in zip(new_fs_ids, filenames)
|
||||
if fid
|
||||
]
|
||||
|
||||
return new_fs_ids
|
||||
|
||||
def _create_share(self, file_ids: List[str], title: str,
|
||||
password: str = "") -> Tuple[str, str]:
|
||||
"""创建百度分享(步骤 ⑤)
|
||||
|
||||
Args:
|
||||
file_ids: 转存后的新 fs_id 列表
|
||||
title: 原标题
|
||||
password: 分享密码
|
||||
|
||||
Returns:
|
||||
(new_share_url, share_password)
|
||||
"""
|
||||
# 如果 file_ids 中包含非数字,尝试从暂存信息中查找
|
||||
numeric_ids = []
|
||||
for fid in file_ids:
|
||||
try:
|
||||
int(fid)
|
||||
numeric_ids.append(fid)
|
||||
except ValueError:
|
||||
logger.warning(f"忽略非数字 fs_id: {fid}")
|
||||
|
||||
return self._transfer.create_share(
|
||||
fids=[int(x) for x in numeric_ids] if numeric_ids else [int(x) for x in file_ids],
|
||||
password=password,
|
||||
period=0, # 永久
|
||||
)
|
||||
|
||||
# ─── 文件列表 & 删除 ────────────────────────────────────
|
||||
|
||||
def get_files(self, parent_fid: str = "0") -> List[FileInfo]:
|
||||
"""列出百度网盘目录下的文件
|
||||
|
||||
GET /api/list?dir={parent_fid}
|
||||
|
||||
Args:
|
||||
parent_fid: 目录路径 (默认 "0" = 根目录)
|
||||
|
||||
注意: parent_fid 对百度网盘而言是目录路径而非数字 ID。
|
||||
根目录传 "/" 或 "0"。
|
||||
"""
|
||||
bdstoken = self.credential.get_bdstoken()
|
||||
dir_path = parent_fid if parent_fid != "0" else "/"
|
||||
|
||||
url = "https://pan.baidu.com/api/list"
|
||||
params = {"dir": dir_path, "bdstoken": bdstoken}
|
||||
headers = self.credential.get_headers()
|
||||
|
||||
try:
|
||||
resp = self._get(url, params=params, headers=headers)
|
||||
data = resp.json()
|
||||
except Exception as e:
|
||||
logger.error(f"百度列出目录失败: {e}")
|
||||
return []
|
||||
|
||||
errno = data.get("errno", -1)
|
||||
if errno != 0:
|
||||
logger.error(f"百度列出目录 errno={errno}: {data}")
|
||||
return []
|
||||
|
||||
files = []
|
||||
for item in data.get("list", []):
|
||||
fid = str(item.get("fs_id", ""))
|
||||
name = item.get("server_filename", "")
|
||||
size = item.get("size", 0)
|
||||
is_dir = item.get("isdir", 0) == 1
|
||||
ext = ""
|
||||
if not is_dir and "." in name:
|
||||
ext = name.rsplit(".", 1)[-1]
|
||||
|
||||
files.append(FileInfo(
|
||||
fid=fid,
|
||||
name=name,
|
||||
size=size,
|
||||
is_dir=is_dir,
|
||||
ext=ext,
|
||||
))
|
||||
|
||||
return files
|
||||
|
||||
def delete(self, file_ids: List[str]) -> bool:
|
||||
"""删除百度网盘文件(按路径)
|
||||
|
||||
file_ids 应为网盘中的完整路径,如 ["/dir/file.txt", "/dir/file2.zip"]
|
||||
|
||||
Args:
|
||||
file_ids: 网盘路径列表
|
||||
|
||||
Returns:
|
||||
True 删除成功(或文件不存在)
|
||||
"""
|
||||
return self._cleanup.delete_files(file_ids)
|
||||
|
||||
# ─── 广告过滤 ────────────────────────────────────────────
|
||||
|
||||
def _filter_ads(self, file_ids: List[str]) -> List[str]:
|
||||
"""广告过滤 — 基于最近一次转存暂存的文件名"""
|
||||
if not self._last_transfer_files:
|
||||
return file_ids
|
||||
|
||||
names = []
|
||||
for f in self._last_transfer_files:
|
||||
if f["fs_id"] in file_ids:
|
||||
names.append(f["name"])
|
||||
else:
|
||||
names.append("")
|
||||
|
||||
return self._cleanup.filter_ad_ids(file_ids, names)
|
||||
|
||||
# ─── 扩展方法 ────────────────────────────────────────────
|
||||
|
||||
def delete_paths(self, paths: List[str]) -> bool:
|
||||
"""便捷删除方法(直接调用 cleanup)"""
|
||||
return self._cleanup.delete_files(paths)
|
||||
154
cloudsearch_transfer/adapter/baidu/cleanup.py
Normal file
154
cloudsearch_transfer/adapter/baidu/cleanup.py
Normal file
@@ -0,0 +1,154 @@
|
||||
"""
|
||||
百度网盘文件清理 — 删除文件 & 广告过滤
|
||||
参考 cloud-auto-save 的 filter_ads + netdisk 的 delete
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import List
|
||||
|
||||
import requests
|
||||
|
||||
from ...errors import TransferError, TransferErrorCode
|
||||
from .credential import BaiduCredentialManager, BAIDU_PAN_API
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 默认广告关键词
|
||||
DEFAULT_AD_KEYWORDS = [
|
||||
"公众号", "微信", "扫码", "加群", "QQ群", "广告",
|
||||
"关注", "免费领取", "点击领取", "全网", "最全",
|
||||
"防走丢", "防迷路", "备用", "务必下载", "必看",
|
||||
"解压密码", "压缩密码",
|
||||
]
|
||||
|
||||
|
||||
class BaiduCleanup:
|
||||
"""百度网盘文件清理 & 广告过滤"""
|
||||
|
||||
def __init__(self, session: requests.Session,
|
||||
credential: BaiduCredentialManager,
|
||||
ad_keywords: List[str] = None):
|
||||
self.session = session
|
||||
self.credential = credential
|
||||
self.ad_keywords = ad_keywords or DEFAULT_AD_KEYWORDS
|
||||
|
||||
# ─── 删除文件 ────────────────────────────────────────────
|
||||
|
||||
def delete_files(self, paths: List[str]) -> bool:
|
||||
"""批量删除文件(按网盘路径)
|
||||
|
||||
POST /api/filemanager?opera=delete&bdstoken={bdstoken}
|
||||
Body: filelist=["/path/to/file1","/path/to/file2"]
|
||||
|
||||
Args:
|
||||
paths: 文件在网盘中的完整路径列表,如 ["/dir/file.txt"]
|
||||
|
||||
Returns:
|
||||
True 全部成功(包括文件不存在的 errno=2)
|
||||
|
||||
Raises:
|
||||
TransferError: 删除失败
|
||||
"""
|
||||
if not paths:
|
||||
logger.info("删除列表为空,跳过")
|
||||
return True
|
||||
|
||||
bdstoken = self.credential.get_bdstoken()
|
||||
url = f"{BAIDU_PAN_API}/api/filemanager"
|
||||
params = {
|
||||
"opera": "delete",
|
||||
"bdstoken": bdstoken,
|
||||
}
|
||||
data = {
|
||||
"filelist": json.dumps(paths, ensure_ascii=False),
|
||||
}
|
||||
headers = self.credential.get_headers()
|
||||
headers["Content-Type"] = "application/x-www-form-urlencoded"
|
||||
|
||||
try:
|
||||
resp = self.session.post(
|
||||
url, params=params, data=data, headers=headers, timeout=30
|
||||
)
|
||||
resp.raise_for_status()
|
||||
result = resp.json()
|
||||
except Exception as e:
|
||||
raise TransferError(
|
||||
TransferErrorCode.NETWORK_ERROR,
|
||||
message=f"百度删除请求失败: {e}",
|
||||
platform="baidu",
|
||||
)
|
||||
|
||||
errno = result.get("errno", -1)
|
||||
|
||||
# errno=0 成功; errno=2 文件不存在(视为成功)
|
||||
if errno in (0, 2):
|
||||
logger.info(f"百度删除完成: {len(paths)} 个路径 (errno={errno})")
|
||||
return True
|
||||
|
||||
raise TransferError(
|
||||
TransferErrorCode.NETWORK_ERROR,
|
||||
message=f"百度删除失败 (errno={errno})",
|
||||
platform="baidu",
|
||||
details=result,
|
||||
)
|
||||
|
||||
# ─── 广告过滤 ────────────────────────────────────────────
|
||||
|
||||
def filter_ads(self, files: List[dict]) -> List[dict]:
|
||||
"""根据文件名过滤广告文件
|
||||
|
||||
Args:
|
||||
files: [{"fs_id": "xxx", "name": "xxx"}, ...]
|
||||
|
||||
Returns:
|
||||
过滤后的文件列表,仅保留非广告文件
|
||||
"""
|
||||
if not self.ad_keywords:
|
||||
return files
|
||||
|
||||
retained = []
|
||||
removed = []
|
||||
for f in files:
|
||||
name = f.get("name", "")
|
||||
if self._is_ad(name):
|
||||
removed.append(name)
|
||||
else:
|
||||
retained.append(f)
|
||||
|
||||
if removed:
|
||||
logger.info(f"广告过滤: 移除 {len(removed)} 个文件: {removed}")
|
||||
return retained
|
||||
|
||||
def filter_ad_ids(self, file_ids: List[str],
|
||||
file_names: List[str]) -> List[str]:
|
||||
"""根据文件名过滤广告,返回保留的 file_ids
|
||||
|
||||
Args:
|
||||
file_ids: 文件 ID 列表
|
||||
file_names: 对应的文件名列表(与 file_ids 一一对应)
|
||||
|
||||
Returns:
|
||||
过滤后的 file_ids
|
||||
"""
|
||||
if not self.ad_keywords:
|
||||
return file_ids
|
||||
|
||||
retained = []
|
||||
for fid, name in zip(file_ids, file_names):
|
||||
if not self._is_ad(name):
|
||||
retained.append(fid)
|
||||
else:
|
||||
logger.info(f"广告过滤: 移除 {name}")
|
||||
|
||||
return retained
|
||||
|
||||
def _is_ad(self, filename: str) -> bool:
|
||||
"""判断文件名是否为广告"""
|
||||
if not filename:
|
||||
return False
|
||||
name_lower = filename.lower()
|
||||
for kw in self.ad_keywords:
|
||||
if kw.lower() in name_lower:
|
||||
return True
|
||||
return False
|
||||
101
cloudsearch_transfer/adapter/baidu/credential.py
Normal file
101
cloudsearch_transfer/adapter/baidu/credential.py
Normal file
@@ -0,0 +1,101 @@
|
||||
"""
|
||||
百度网盘凭证管理器 — bdstoken 获取与校验
|
||||
参考 cloud-auto-save 的 BaiduNetDisk.cookie 机制
|
||||
"""
|
||||
|
||||
import logging
|
||||
import requests
|
||||
|
||||
from ...errors import TransferError, TransferErrorCode
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 百度网盘 API 基础 URL
|
||||
BAIDU_PAN_API = "https://pan.baidu.com"
|
||||
|
||||
|
||||
class BaiduCredentialManager:
|
||||
"""百度网盘 Cookie 凭证 + bdstoken 管理
|
||||
|
||||
百度网盘的大多数受保护 API 都需要 bdstoken 参数,
|
||||
该 token 通过 API 获取并缓存在实例中。
|
||||
"""
|
||||
|
||||
def __init__(self, cookie: str, session: requests.Session):
|
||||
"""
|
||||
Args:
|
||||
cookie: 完整的百度 Cookie 字符串
|
||||
session: 共享的 requests.Session(继承 User-Agent 等 headers)
|
||||
"""
|
||||
self.cookie = cookie
|
||||
self.session = session
|
||||
self._bdstoken: str = ""
|
||||
|
||||
# ─── 公开方法 ──────────────────────────────────────────
|
||||
|
||||
def validate(self) -> bool:
|
||||
"""校验 Cookie 是否有效:长度 >= 50 视为合格"""
|
||||
return bool(self.cookie and len(self.cookie.strip()) >= 50)
|
||||
|
||||
def get_bdstoken(self, force_refresh: bool = False) -> str:
|
||||
"""
|
||||
获取 bdstoken,首次调用会请求 API 获取并缓存。
|
||||
|
||||
API: GET /api/gettemplatevariable?fields=["bdstoken"]
|
||||
|
||||
Raises:
|
||||
TransferError: 获取失败 (BAIDU_BDSTOKEN_FAIL)
|
||||
"""
|
||||
if self._bdstoken and not force_refresh:
|
||||
return self._bdstoken
|
||||
|
||||
url = f"{BAIDU_PAN_API}/api/gettemplatevariable"
|
||||
params = {"fields": '["bdstoken"]'}
|
||||
headers = self.get_headers()
|
||||
|
||||
try:
|
||||
resp = self.session.get(url, params=params, headers=headers, timeout=15)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
except Exception as e:
|
||||
logger.error(f"获取 bdstoken 网络异常: {e}")
|
||||
raise TransferError(
|
||||
TransferErrorCode.BAIDU_BDSTOKEN_FAIL,
|
||||
message=f"百度 bdstoken 请求失败: {e}",
|
||||
platform="baidu",
|
||||
)
|
||||
|
||||
errno = data.get("errno", -1)
|
||||
if errno != 0:
|
||||
logger.error(f"获取 bdstoken API 返回 errno={errno}: {data}")
|
||||
raise TransferError(
|
||||
TransferErrorCode.BAIDU_BDSTOKEN_FAIL,
|
||||
message=f"百度 bdstoken 获取失败 (errno={errno})",
|
||||
platform="baidu",
|
||||
details={"response": data},
|
||||
)
|
||||
|
||||
self._bdstoken = data.get("result", {}).get("bdstoken", "")
|
||||
if not self._bdstoken:
|
||||
raise TransferError(
|
||||
TransferErrorCode.BAIDU_BDSTOKEN_FAIL,
|
||||
message="百度 bdstoken 为空",
|
||||
platform="baidu",
|
||||
)
|
||||
|
||||
logger.info("bdstoken 获取成功")
|
||||
return self._bdstoken
|
||||
|
||||
def get_headers(self) -> dict:
|
||||
"""构建携带 Cookie 的请求头(继承 session 默认 headers 外的额外字段)"""
|
||||
headers = {
|
||||
"Cookie": self.cookie,
|
||||
"Referer": "https://pan.baidu.com/",
|
||||
"Origin": "https://pan.baidu.com",
|
||||
}
|
||||
return headers
|
||||
|
||||
def invalidate_bdstoken(self):
|
||||
"""使缓存失效,下次调用 get_bdstoken 会重新获取"""
|
||||
self._bdstoken = ""
|
||||
logger.info("bdstoken 缓存已失效")
|
||||
448
cloudsearch_transfer/adapter/baidu/transfer.py
Normal file
448
cloudsearch_transfer/adapter/baidu/transfer.py
Normal file
@@ -0,0 +1,448 @@
|
||||
"""
|
||||
百度网盘转存核心 — 5 步转存流程
|
||||
参考 netdisk 的 PanbaiduSave + cloud-auto-save 的 BaiduNetDisk.transfer
|
||||
|
||||
流程:
|
||||
① 验证提取码 → POST /share/verify
|
||||
② 解析分享页 → GET /s/1{surl}
|
||||
③ 转存文件 → POST /share/transfer
|
||||
④ 列出目录 → GET /api/list
|
||||
⑤ 创建分享 → POST /share/set
|
||||
"""
|
||||
|
||||
import re
|
||||
import json
|
||||
import logging
|
||||
from typing import List, Tuple
|
||||
|
||||
import requests
|
||||
|
||||
from ...errors import TransferError, TransferErrorCode
|
||||
from .credential import BaiduCredentialManager, BAIDU_PAN_API
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ─── 正则 ──────────────────────────────────────────────────
|
||||
|
||||
# 从 HTML 中提取 shareid
|
||||
RE_SHAREID = re.compile(r"""shareid["\s:=]+(\d+)""")
|
||||
# 从 HTML 中提取 uk
|
||||
RE_UK = re.compile(r"""uk["\s:=]+(\d+)""")
|
||||
# 从 HTML 中提取 fs_id
|
||||
RE_FS_ID = re.compile(r'"fs_id"\s*:\s*(\d+)')
|
||||
# 从 HTML 中提取 server_filename
|
||||
RE_FILENAME = re.compile(r'"server_filename"\s*:\s*"([^"]*)"')
|
||||
# 从 HTML/JSON 中提取标题
|
||||
RE_TITLE = re.compile(r'"title"\s*:\s*"([^"]*)"')
|
||||
# 从 HTML 中提取文件列表 JSON 块 (file_list 对象) — 标记位置
|
||||
RE_FILE_LIST_MARK = re.compile(r'"file_list"\s*:\s*(\{)', re.DOTALL)
|
||||
# 提取单个文件条目 (fallback)
|
||||
RE_FILE_ENTRY = re.compile(r'\{"fs_id":(\d+),"server_filename":"([^"]+)"')
|
||||
|
||||
|
||||
class BaiduTransfer:
|
||||
"""百度网盘 5 步转存执行器
|
||||
|
||||
每个实例绑定一个 Session + Cookie + bdstoken,
|
||||
执行完整的「验证→解析→转存→查目录→创建分享」流程。
|
||||
"""
|
||||
|
||||
def __init__(self, session: requests.Session,
|
||||
credential: BaiduCredentialManager):
|
||||
self.session = session
|
||||
self.credential = credential
|
||||
self.cookie = credential.cookie
|
||||
|
||||
# ─── 5 步主流程 ────────────────────────────────────────
|
||||
|
||||
def execute(self, surl: str, password: str,
|
||||
save_dir: str = "/") -> Tuple[List[str], dict]:
|
||||
"""执行完整的 5 步转存流程
|
||||
|
||||
Args:
|
||||
surl: 分享短码 (s/1 后面的部分)
|
||||
password: 提取码
|
||||
save_dir: 转存目标目录
|
||||
|
||||
Returns:
|
||||
(new_fs_ids, file_info_dict)
|
||||
new_fs_ids: 转存后的文件 fs_id 列表
|
||||
file_info_dict: {fs_id: name} 映射
|
||||
|
||||
Raises:
|
||||
TransferError: 任何一步失败
|
||||
"""
|
||||
bdstoken = self.credential.get_bdstoken()
|
||||
|
||||
# ① 验证提取码
|
||||
logger.info(f"[百度转存] ① 验证提取码 surl={surl}")
|
||||
self._verify_password(surl, password, bdstoken)
|
||||
|
||||
# ② 解析分享页
|
||||
logger.info(f"[百度转存] ② 解析分享页 surl={surl}")
|
||||
share_info = self._parse_share_page(surl)
|
||||
shareid = share_info["shareid"]
|
||||
uk = share_info["uk"]
|
||||
fs_ids = share_info["fs_ids"]
|
||||
filenames = share_info["filenames"]
|
||||
title = share_info.get("title", "")
|
||||
|
||||
if not fs_ids:
|
||||
raise TransferError(
|
||||
TransferErrorCode.RESOURCE_EMPTY,
|
||||
message="分享中没有找到可转存的文件",
|
||||
platform="baidu",
|
||||
)
|
||||
|
||||
# ③ 转存到自己的网盘
|
||||
logger.info(f"[百度转存] ③ 转存 {len(fs_ids)} 个文件到 {save_dir}")
|
||||
self._transfer_files(shareid, uk, fs_ids, save_dir, bdstoken)
|
||||
|
||||
# ④ 列出目标目录,按文件名匹配新的 fs_id
|
||||
logger.info(f"[百度转存] ④ 列出目录 {save_dir} 匹配新 fs_id")
|
||||
new_fs_ids = self._list_and_match(save_dir, filenames, bdstoken)
|
||||
|
||||
if not new_fs_ids:
|
||||
raise TransferError(
|
||||
TransferErrorCode.NETWORK_ERROR,
|
||||
message="转存后无法匹配到新文件 ID",
|
||||
platform="baidu",
|
||||
)
|
||||
|
||||
# 构建返回的 info dict
|
||||
file_info = {}
|
||||
for name, fid in zip(filenames, new_fs_ids) if len(filenames) == len(new_fs_ids) else []:
|
||||
file_info[fid] = name
|
||||
if not file_info:
|
||||
for fid in new_fs_ids:
|
||||
file_info[fid] = title or fid
|
||||
|
||||
return new_fs_ids, file_info
|
||||
|
||||
def create_share(self, fids: List[int], password: str = "",
|
||||
period: int = 0) -> Tuple[str, str]:
|
||||
"""⑤ 创建新分享
|
||||
|
||||
Args:
|
||||
fids: 转存后的文件 fs_id 列表
|
||||
password: 分享密码(空 = 无密码)
|
||||
period: 分享有效期 (0=永久)
|
||||
|
||||
Returns:
|
||||
(share_url, share_password)
|
||||
"""
|
||||
bdstoken = self.credential.get_bdstoken()
|
||||
url = f"{BAIDU_PAN_API}/share/set"
|
||||
params = {
|
||||
"channel": "chunlei",
|
||||
"clienttype": "0",
|
||||
"web": "1",
|
||||
"bdstoken": bdstoken,
|
||||
}
|
||||
data = {
|
||||
"fid_list": json.dumps(fids),
|
||||
"period": period,
|
||||
"pwd": password,
|
||||
}
|
||||
headers = self.credential.get_headers()
|
||||
|
||||
try:
|
||||
resp = self.session.post(
|
||||
url, params=params, data=data, headers=headers, timeout=30
|
||||
)
|
||||
resp.raise_for_status()
|
||||
except Exception as e:
|
||||
raise TransferError(
|
||||
TransferErrorCode.NETWORK_ERROR,
|
||||
message=f"创建分享请求失败: {e}",
|
||||
platform="baidu",
|
||||
)
|
||||
|
||||
result = resp.json()
|
||||
errno = result.get("errno", -1)
|
||||
|
||||
if errno == 9219:
|
||||
raise TransferError(
|
||||
TransferErrorCode.SHARE_LIMIT,
|
||||
message="百度今日分享次数过多",
|
||||
platform="baidu",
|
||||
)
|
||||
if errno != 0:
|
||||
raise TransferError(
|
||||
TransferErrorCode.SHARE_LINK_FAIL,
|
||||
message=f"创建分享失败 (errno={errno})",
|
||||
platform="baidu",
|
||||
details=result,
|
||||
)
|
||||
|
||||
share_url = result.get("link", "")
|
||||
share_password = result.get("pwd", password) or password
|
||||
|
||||
logger.info(f"[百度转存] ⑤ 分享创建成功: {share_url}")
|
||||
return share_url, share_password
|
||||
|
||||
# ─── 5 步内部方法 ──────────────────────────────────────
|
||||
|
||||
def _verify_password(self, surl: str, password: str, bdstoken: str):
|
||||
"""① 验证提取码
|
||||
|
||||
POST /share/verify?surl={surl}&bdstoken={bdstoken}
|
||||
Body: {"pwd": "xxxx"}
|
||||
|
||||
errno=0 表示通过;errno=-9 表示提取码错误;errno=2 表示分享不存在
|
||||
"""
|
||||
url = f"{BAIDU_PAN_API}/share/verify"
|
||||
params = {
|
||||
"surl": surl,
|
||||
"bdstoken": bdstoken,
|
||||
}
|
||||
data = {"pwd": password}
|
||||
headers = self.credential.get_headers()
|
||||
headers["Content-Type"] = "application/x-www-form-urlencoded"
|
||||
|
||||
try:
|
||||
resp = self.session.post(
|
||||
url, params=params, data=data, headers=headers, timeout=15
|
||||
)
|
||||
resp.raise_for_status()
|
||||
except Exception as e:
|
||||
raise TransferError(
|
||||
TransferErrorCode.NETWORK_ERROR,
|
||||
message=f"验证提取码请求失败: {e}",
|
||||
platform="baidu",
|
||||
)
|
||||
|
||||
result = resp.json()
|
||||
errno = result.get("errno", -1)
|
||||
|
||||
if errno == 0:
|
||||
logger.info("提取码验证通过")
|
||||
return
|
||||
|
||||
if errno == -9 or errno == -62:
|
||||
raise TransferError(
|
||||
TransferErrorCode.PASSCODE_WRONG,
|
||||
message="百度提取码错误",
|
||||
platform="baidu",
|
||||
)
|
||||
if errno == 2 or errno == 118:
|
||||
raise TransferError(
|
||||
TransferErrorCode.SHARE_NOT_EXIST,
|
||||
message="百度分享不存在或已失效",
|
||||
platform="baidu",
|
||||
)
|
||||
raise TransferError(
|
||||
TransferErrorCode.NETWORK_ERROR,
|
||||
message=f"验证提取码失败 (errno={errno})",
|
||||
platform="baidu",
|
||||
details=result,
|
||||
)
|
||||
|
||||
def _parse_share_page(self, surl: str) -> dict:
|
||||
"""② 解析分享页面 HTML
|
||||
|
||||
GET /s/1{surl}
|
||||
从 HTML 中正则提取 shareid, uk, fs_id[], server_filename[]
|
||||
"""
|
||||
url = f"{BAIDU_PAN_API}/s/1{surl}"
|
||||
headers = self.credential.get_headers()
|
||||
|
||||
try:
|
||||
resp = self.session.get(url, headers=headers, timeout=20)
|
||||
resp.raise_for_status()
|
||||
html = resp.text
|
||||
except Exception as e:
|
||||
raise TransferError(
|
||||
TransferErrorCode.NETWORK_ERROR,
|
||||
message=f"打开分享页面失败: {e}",
|
||||
platform="baidu",
|
||||
)
|
||||
|
||||
# 提取 shareid
|
||||
m_shareid = RE_SHAREID.search(html)
|
||||
if not m_shareid:
|
||||
raise TransferError(
|
||||
TransferErrorCode.SHARE_NOT_EXIST,
|
||||
message="无法从页面中提取 shareid,分享可能已失效",
|
||||
platform="baidu",
|
||||
)
|
||||
shareid = m_shareid.group(1)
|
||||
|
||||
# 提取 uk
|
||||
m_uk = RE_UK.search(html)
|
||||
uk = m_uk.group(1) if m_uk else ""
|
||||
|
||||
# 提取标题
|
||||
m_title = RE_TITLE.search(html)
|
||||
title = m_title.group(1) if m_title else ""
|
||||
|
||||
# 提取文件列表 — 优先从 file_list JSON 块中提取
|
||||
fs_ids = []
|
||||
filenames = []
|
||||
|
||||
# 方法1:查找 file_list JSON 块(使用括号计数提取平衡 JSON)
|
||||
m_fl = RE_FILE_LIST_MARK.search(html)
|
||||
if m_fl:
|
||||
start = m_fl.start(1) # { 的位置
|
||||
depth = 1
|
||||
end = start + 1
|
||||
while end < len(html) and depth > 0:
|
||||
if html[end] == '{':
|
||||
depth += 1
|
||||
elif html[end] == '}':
|
||||
depth -= 1
|
||||
end += 1
|
||||
file_list_json = html[start:end]
|
||||
try:
|
||||
file_list = json.loads(file_list_json)
|
||||
for entry in file_list.get("list", []):
|
||||
fs_ids.append(str(entry.get("fs_id", "")))
|
||||
filenames.append(entry.get("server_filename", ""))
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# 方法2:退化为正则提取所有 fs_id + server_filename
|
||||
if not fs_ids:
|
||||
for m in RE_FILE_ENTRY.finditer(html):
|
||||
fs_ids.append(m.group(1))
|
||||
filenames.append(m.group(2))
|
||||
|
||||
if not fs_ids:
|
||||
# 可能只有一个文件,尝试单个提取
|
||||
m_fsid = RE_FS_ID.search(html)
|
||||
m_name = RE_FILENAME.search(html)
|
||||
if m_fsid:
|
||||
fs_ids.append(m_fsid.group(1))
|
||||
filenames.append(m_name.group(1) if m_name else "")
|
||||
|
||||
logger.info(
|
||||
f"解析分享页: shareid={shareid}, uk={uk}, "
|
||||
f"文件数={len(fs_ids)}, title={title[:30]}"
|
||||
)
|
||||
return {
|
||||
"shareid": shareid,
|
||||
"uk": uk,
|
||||
"fs_ids": fs_ids,
|
||||
"filenames": filenames,
|
||||
"title": title,
|
||||
}
|
||||
|
||||
def _transfer_files(self, shareid: str, uk: str,
|
||||
fs_ids: List[str], save_dir: str, bdstoken: str):
|
||||
"""③ 转存文件到自己的网盘
|
||||
|
||||
POST /share/transfer?shareid={shareid}&from={uk}&bdstoken={bdstoken}
|
||||
Body: fsidlist=[1,2,3]&path=/dir
|
||||
"""
|
||||
url = f"{BAIDU_PAN_API}/share/transfer"
|
||||
params = {
|
||||
"shareid": shareid,
|
||||
"from": uk,
|
||||
"bdstoken": bdstoken,
|
||||
}
|
||||
data = {
|
||||
"fsidlist": json.dumps([int(x) for x in fs_ids]),
|
||||
"path": save_dir,
|
||||
}
|
||||
headers = self.credential.get_headers()
|
||||
headers["Content-Type"] = "application/x-www-form-urlencoded"
|
||||
|
||||
try:
|
||||
resp = self.session.post(
|
||||
url, params=params, data=data, headers=headers, timeout=30
|
||||
)
|
||||
resp.raise_for_status()
|
||||
except Exception as e:
|
||||
raise TransferError(
|
||||
TransferErrorCode.NETWORK_ERROR,
|
||||
message=f"转存请求失败: {e}",
|
||||
platform="baidu",
|
||||
)
|
||||
|
||||
result = resp.json()
|
||||
errno = result.get("errno", -1)
|
||||
|
||||
if errno == 0:
|
||||
logger.info(f"转存成功: {len(fs_ids)} 个文件 → {save_dir}")
|
||||
return
|
||||
|
||||
if errno == 12:
|
||||
raise TransferError(
|
||||
TransferErrorCode.CAPACITY_FULL,
|
||||
message="百度网盘空间不足",
|
||||
platform="baidu",
|
||||
)
|
||||
if errno == 9013:
|
||||
raise TransferError(
|
||||
TransferErrorCode.SENSITIVE_RESOURCE,
|
||||
message="文件包含违规内容,无法转存",
|
||||
platform="baidu",
|
||||
)
|
||||
raise TransferError(
|
||||
TransferErrorCode.NETWORK_ERROR,
|
||||
message=f"转存失败 (errno={errno})",
|
||||
platform="baidu",
|
||||
details=result,
|
||||
)
|
||||
|
||||
def _list_and_match(self, save_dir: str, filenames: List[str],
|
||||
bdstoken: str) -> List[str]:
|
||||
"""④ 列出目标目录,按文件名匹配新的 fs_id
|
||||
|
||||
GET /api/list?dir={dir}&bdstoken={bdstoken}
|
||||
从返回的 list 中按 server_filename 匹配,返回按原顺序排列的 fs_id 列表
|
||||
"""
|
||||
url = f"{BAIDU_PAN_API}/api/list"
|
||||
params = {
|
||||
"dir": save_dir,
|
||||
"bdstoken": bdstoken,
|
||||
}
|
||||
headers = self.credential.get_headers()
|
||||
|
||||
try:
|
||||
resp = self.session.get(url, params=params, headers=headers, timeout=15)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
except Exception as e:
|
||||
raise TransferError(
|
||||
TransferErrorCode.NETWORK_ERROR,
|
||||
message=f"列出目录失败: {e}",
|
||||
platform="baidu",
|
||||
)
|
||||
|
||||
errno = data.get("errno", -1)
|
||||
if errno == -12:
|
||||
raise TransferError(
|
||||
TransferErrorCode.DIR_NOT_EXIST,
|
||||
message=f"百度目录不存在: {save_dir}",
|
||||
platform="baidu",
|
||||
)
|
||||
if errno != 0:
|
||||
raise TransferError(
|
||||
TransferErrorCode.NETWORK_ERROR,
|
||||
message=f"列出目录失败 (errno={errno})",
|
||||
platform="baidu",
|
||||
details=data,
|
||||
)
|
||||
|
||||
file_list = data.get("list", [])
|
||||
# 构建文件名 → fs_id 映射
|
||||
name_to_fid = {}
|
||||
for item in file_list:
|
||||
name = item.get("server_filename", "")
|
||||
fid = str(item.get("fs_id", ""))
|
||||
if name and fid:
|
||||
name_to_fid[name] = fid
|
||||
|
||||
# 按原文件名顺序匹配
|
||||
new_fs_ids = []
|
||||
for fname in filenames:
|
||||
if fname in name_to_fid:
|
||||
new_fs_ids.append(name_to_fid[fname])
|
||||
else:
|
||||
logger.warning(f"目录中未找到文件: {fname}")
|
||||
|
||||
logger.info(
|
||||
f"目录匹配: 期望 {len(filenames)} 个, 匹配到 {len(new_fs_ids)} 个"
|
||||
)
|
||||
return new_fs_ids
|
||||
Reference in New Issue
Block a user