- 修复Redis认证 (配置密码) - 启动Python管理后台 (端口9531, 15个功能开关) - 统一版本号 0.2.7 - 更新docker-compose.yml (镜像版本/Redis URL/Admin服务)
155 lines
4.7 KiB
Python
155 lines
4.7 KiB
Python
"""
|
||
百度网盘文件清理 — 删除文件 & 广告过滤
|
||
参考 cloud-auto-save 的 filter_ads + netdisk 的 delete
|
||
"""
|
||
|
||
import json
|
||
import logging
|
||
from typing import List
|
||
|
||
import requests
|
||
|
||
from ...errors import TransferError, TransferErrorCode
|
||
from .credential import BaiduCredentialManager, BAIDU_PAN_API
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
# 默认广告关键词
|
||
DEFAULT_AD_KEYWORDS = [
|
||
"公众号", "微信", "扫码", "加群", "QQ群", "广告",
|
||
"关注", "免费领取", "点击领取", "全网", "最全",
|
||
"防走丢", "防迷路", "备用", "务必下载", "必看",
|
||
"解压密码", "压缩密码",
|
||
]
|
||
|
||
|
||
class BaiduCleanup:
|
||
"""百度网盘文件清理 & 广告过滤"""
|
||
|
||
def __init__(self, session: requests.Session,
|
||
credential: BaiduCredentialManager,
|
||
ad_keywords: List[str] = None):
|
||
self.session = session
|
||
self.credential = credential
|
||
self.ad_keywords = ad_keywords or DEFAULT_AD_KEYWORDS
|
||
|
||
# ─── 删除文件 ────────────────────────────────────────────
|
||
|
||
def delete_files(self, paths: List[str]) -> bool:
|
||
"""批量删除文件(按网盘路径)
|
||
|
||
POST /api/filemanager?opera=delete&bdstoken={bdstoken}
|
||
Body: filelist=["/path/to/file1","/path/to/file2"]
|
||
|
||
Args:
|
||
paths: 文件在网盘中的完整路径列表,如 ["/dir/file.txt"]
|
||
|
||
Returns:
|
||
True 全部成功(包括文件不存在的 errno=2)
|
||
|
||
Raises:
|
||
TransferError: 删除失败
|
||
"""
|
||
if not paths:
|
||
logger.info("删除列表为空,跳过")
|
||
return True
|
||
|
||
bdstoken = self.credential.get_bdstoken()
|
||
url = f"{BAIDU_PAN_API}/api/filemanager"
|
||
params = {
|
||
"opera": "delete",
|
||
"bdstoken": bdstoken,
|
||
}
|
||
data = {
|
||
"filelist": json.dumps(paths, ensure_ascii=False),
|
||
}
|
||
headers = self.credential.get_headers()
|
||
headers["Content-Type"] = "application/x-www-form-urlencoded"
|
||
|
||
try:
|
||
resp = self.session.post(
|
||
url, params=params, data=data, headers=headers, timeout=30
|
||
)
|
||
resp.raise_for_status()
|
||
result = resp.json()
|
||
except Exception as e:
|
||
raise TransferError(
|
||
TransferErrorCode.NETWORK_ERROR,
|
||
message=f"百度删除请求失败: {e}",
|
||
platform="baidu",
|
||
)
|
||
|
||
errno = result.get("errno", -1)
|
||
|
||
# errno=0 成功; errno=2 文件不存在(视为成功)
|
||
if errno in (0, 2):
|
||
logger.info(f"百度删除完成: {len(paths)} 个路径 (errno={errno})")
|
||
return True
|
||
|
||
raise TransferError(
|
||
TransferErrorCode.NETWORK_ERROR,
|
||
message=f"百度删除失败 (errno={errno})",
|
||
platform="baidu",
|
||
details=result,
|
||
)
|
||
|
||
# ─── 广告过滤 ────────────────────────────────────────────
|
||
|
||
def filter_ads(self, files: List[dict]) -> List[dict]:
|
||
"""根据文件名过滤广告文件
|
||
|
||
Args:
|
||
files: [{"fs_id": "xxx", "name": "xxx"}, ...]
|
||
|
||
Returns:
|
||
过滤后的文件列表,仅保留非广告文件
|
||
"""
|
||
if not self.ad_keywords:
|
||
return files
|
||
|
||
retained = []
|
||
removed = []
|
||
for f in files:
|
||
name = f.get("name", "")
|
||
if self._is_ad(name):
|
||
removed.append(name)
|
||
else:
|
||
retained.append(f)
|
||
|
||
if removed:
|
||
logger.info(f"广告过滤: 移除 {len(removed)} 个文件: {removed}")
|
||
return retained
|
||
|
||
def filter_ad_ids(self, file_ids: List[str],
|
||
file_names: List[str]) -> List[str]:
|
||
"""根据文件名过滤广告,返回保留的 file_ids
|
||
|
||
Args:
|
||
file_ids: 文件 ID 列表
|
||
file_names: 对应的文件名列表(与 file_ids 一一对应)
|
||
|
||
Returns:
|
||
过滤后的 file_ids
|
||
"""
|
||
if not self.ad_keywords:
|
||
return file_ids
|
||
|
||
retained = []
|
||
for fid, name in zip(file_ids, file_names):
|
||
if not self._is_ad(name):
|
||
retained.append(fid)
|
||
else:
|
||
logger.info(f"广告过滤: 移除 {name}")
|
||
|
||
return retained
|
||
|
||
def _is_ad(self, filename: str) -> bool:
|
||
"""判断文件名是否为广告"""
|
||
if not filename:
|
||
return False
|
||
name_lower = filename.lower()
|
||
for kw in self.ad_keywords:
|
||
if kw.lower() in name_lower:
|
||
return True
|
||
return False
|