Files
CloudSearch/cloudsearch_transfer/adapter/baidu/cleanup.py
admin 83cbfaf03f v0.2.7: 修复Redis连接 + 启动管理后台
- 修复Redis认证 (配置密码)
- 启动Python管理后台 (端口9531, 15个功能开关)
- 统一版本号 0.2.7
- 更新docker-compose.yml (镜像版本/Redis URL/Admin服务)
2026-05-17 02:22:18 +08:00

155 lines
4.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
百度网盘文件清理 — 删除文件 & 广告过滤
参考 cloud-auto-save 的 filter_ads + netdisk 的 delete
"""
import json
import logging
from typing import List
import requests
from ...errors import TransferError, TransferErrorCode
from .credential import BaiduCredentialManager, BAIDU_PAN_API
logger = logging.getLogger(__name__)
# 默认广告关键词
DEFAULT_AD_KEYWORDS = [
"公众号", "微信", "扫码", "加群", "QQ群", "广告",
"关注", "免费领取", "点击领取", "全网", "最全",
"防走丢", "防迷路", "备用", "务必下载", "必看",
"解压密码", "压缩密码",
]
class BaiduCleanup:
"""百度网盘文件清理 & 广告过滤"""
def __init__(self, session: requests.Session,
credential: BaiduCredentialManager,
ad_keywords: List[str] = None):
self.session = session
self.credential = credential
self.ad_keywords = ad_keywords or DEFAULT_AD_KEYWORDS
# ─── 删除文件 ────────────────────────────────────────────
def delete_files(self, paths: List[str]) -> bool:
"""批量删除文件(按网盘路径)
POST /api/filemanager?opera=delete&bdstoken={bdstoken}
Body: filelist=["/path/to/file1","/path/to/file2"]
Args:
paths: 文件在网盘中的完整路径列表,如 ["/dir/file.txt"]
Returns:
True 全部成功(包括文件不存在的 errno=2
Raises:
TransferError: 删除失败
"""
if not paths:
logger.info("删除列表为空,跳过")
return True
bdstoken = self.credential.get_bdstoken()
url = f"{BAIDU_PAN_API}/api/filemanager"
params = {
"opera": "delete",
"bdstoken": bdstoken,
}
data = {
"filelist": json.dumps(paths, ensure_ascii=False),
}
headers = self.credential.get_headers()
headers["Content-Type"] = "application/x-www-form-urlencoded"
try:
resp = self.session.post(
url, params=params, data=data, headers=headers, timeout=30
)
resp.raise_for_status()
result = resp.json()
except Exception as e:
raise TransferError(
TransferErrorCode.NETWORK_ERROR,
message=f"百度删除请求失败: {e}",
platform="baidu",
)
errno = result.get("errno", -1)
# errno=0 成功; errno=2 文件不存在(视为成功)
if errno in (0, 2):
logger.info(f"百度删除完成: {len(paths)} 个路径 (errno={errno})")
return True
raise TransferError(
TransferErrorCode.NETWORK_ERROR,
message=f"百度删除失败 (errno={errno})",
platform="baidu",
details=result,
)
# ─── 广告过滤 ────────────────────────────────────────────
def filter_ads(self, files: List[dict]) -> List[dict]:
"""根据文件名过滤广告文件
Args:
files: [{"fs_id": "xxx", "name": "xxx"}, ...]
Returns:
过滤后的文件列表,仅保留非广告文件
"""
if not self.ad_keywords:
return files
retained = []
removed = []
for f in files:
name = f.get("name", "")
if self._is_ad(name):
removed.append(name)
else:
retained.append(f)
if removed:
logger.info(f"广告过滤: 移除 {len(removed)} 个文件: {removed}")
return retained
def filter_ad_ids(self, file_ids: List[str],
file_names: List[str]) -> List[str]:
"""根据文件名过滤广告,返回保留的 file_ids
Args:
file_ids: 文件 ID 列表
file_names: 对应的文件名列表(与 file_ids 一一对应)
Returns:
过滤后的 file_ids
"""
if not self.ad_keywords:
return file_ids
retained = []
for fid, name in zip(file_ids, file_names):
if not self._is_ad(name):
retained.append(fid)
else:
logger.info(f"广告过滤: 移除 {name}")
return retained
def _is_ad(self, filename: str) -> bool:
"""判断文件名是否为广告"""
if not filename:
return False
name_lower = filename.lower()
for kw in self.ad_keywords:
if kw.lower() in name_lower:
return True
return False