v0.2.7: 修复Redis连接 + 启动管理后台
- 修复Redis认证 (配置密码) - 启动Python管理后台 (端口9531, 15个功能开关) - 统一版本号 0.2.7 - 更新docker-compose.yml (镜像版本/Redis URL/Admin服务)
This commit is contained in:
154
cloudsearch_transfer/adapter/baidu/cleanup.py
Normal file
154
cloudsearch_transfer/adapter/baidu/cleanup.py
Normal file
@@ -0,0 +1,154 @@
|
||||
"""
|
||||
百度网盘文件清理 — 删除文件 & 广告过滤
|
||||
参考 cloud-auto-save 的 filter_ads + netdisk 的 delete
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import List
|
||||
|
||||
import requests
|
||||
|
||||
from ...errors import TransferError, TransferErrorCode
|
||||
from .credential import BaiduCredentialManager, BAIDU_PAN_API
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 默认广告关键词
|
||||
DEFAULT_AD_KEYWORDS = [
|
||||
"公众号", "微信", "扫码", "加群", "QQ群", "广告",
|
||||
"关注", "免费领取", "点击领取", "全网", "最全",
|
||||
"防走丢", "防迷路", "备用", "务必下载", "必看",
|
||||
"解压密码", "压缩密码",
|
||||
]
|
||||
|
||||
|
||||
class BaiduCleanup:
|
||||
"""百度网盘文件清理 & 广告过滤"""
|
||||
|
||||
def __init__(self, session: requests.Session,
|
||||
credential: BaiduCredentialManager,
|
||||
ad_keywords: List[str] = None):
|
||||
self.session = session
|
||||
self.credential = credential
|
||||
self.ad_keywords = ad_keywords or DEFAULT_AD_KEYWORDS
|
||||
|
||||
# ─── 删除文件 ────────────────────────────────────────────
|
||||
|
||||
def delete_files(self, paths: List[str]) -> bool:
|
||||
"""批量删除文件(按网盘路径)
|
||||
|
||||
POST /api/filemanager?opera=delete&bdstoken={bdstoken}
|
||||
Body: filelist=["/path/to/file1","/path/to/file2"]
|
||||
|
||||
Args:
|
||||
paths: 文件在网盘中的完整路径列表,如 ["/dir/file.txt"]
|
||||
|
||||
Returns:
|
||||
True 全部成功(包括文件不存在的 errno=2)
|
||||
|
||||
Raises:
|
||||
TransferError: 删除失败
|
||||
"""
|
||||
if not paths:
|
||||
logger.info("删除列表为空,跳过")
|
||||
return True
|
||||
|
||||
bdstoken = self.credential.get_bdstoken()
|
||||
url = f"{BAIDU_PAN_API}/api/filemanager"
|
||||
params = {
|
||||
"opera": "delete",
|
||||
"bdstoken": bdstoken,
|
||||
}
|
||||
data = {
|
||||
"filelist": json.dumps(paths, ensure_ascii=False),
|
||||
}
|
||||
headers = self.credential.get_headers()
|
||||
headers["Content-Type"] = "application/x-www-form-urlencoded"
|
||||
|
||||
try:
|
||||
resp = self.session.post(
|
||||
url, params=params, data=data, headers=headers, timeout=30
|
||||
)
|
||||
resp.raise_for_status()
|
||||
result = resp.json()
|
||||
except Exception as e:
|
||||
raise TransferError(
|
||||
TransferErrorCode.NETWORK_ERROR,
|
||||
message=f"百度删除请求失败: {e}",
|
||||
platform="baidu",
|
||||
)
|
||||
|
||||
errno = result.get("errno", -1)
|
||||
|
||||
# errno=0 成功; errno=2 文件不存在(视为成功)
|
||||
if errno in (0, 2):
|
||||
logger.info(f"百度删除完成: {len(paths)} 个路径 (errno={errno})")
|
||||
return True
|
||||
|
||||
raise TransferError(
|
||||
TransferErrorCode.NETWORK_ERROR,
|
||||
message=f"百度删除失败 (errno={errno})",
|
||||
platform="baidu",
|
||||
details=result,
|
||||
)
|
||||
|
||||
# ─── 广告过滤 ────────────────────────────────────────────
|
||||
|
||||
def filter_ads(self, files: List[dict]) -> List[dict]:
|
||||
"""根据文件名过滤广告文件
|
||||
|
||||
Args:
|
||||
files: [{"fs_id": "xxx", "name": "xxx"}, ...]
|
||||
|
||||
Returns:
|
||||
过滤后的文件列表,仅保留非广告文件
|
||||
"""
|
||||
if not self.ad_keywords:
|
||||
return files
|
||||
|
||||
retained = []
|
||||
removed = []
|
||||
for f in files:
|
||||
name = f.get("name", "")
|
||||
if self._is_ad(name):
|
||||
removed.append(name)
|
||||
else:
|
||||
retained.append(f)
|
||||
|
||||
if removed:
|
||||
logger.info(f"广告过滤: 移除 {len(removed)} 个文件: {removed}")
|
||||
return retained
|
||||
|
||||
def filter_ad_ids(self, file_ids: List[str],
|
||||
file_names: List[str]) -> List[str]:
|
||||
"""根据文件名过滤广告,返回保留的 file_ids
|
||||
|
||||
Args:
|
||||
file_ids: 文件 ID 列表
|
||||
file_names: 对应的文件名列表(与 file_ids 一一对应)
|
||||
|
||||
Returns:
|
||||
过滤后的 file_ids
|
||||
"""
|
||||
if not self.ad_keywords:
|
||||
return file_ids
|
||||
|
||||
retained = []
|
||||
for fid, name in zip(file_ids, file_names):
|
||||
if not self._is_ad(name):
|
||||
retained.append(fid)
|
||||
else:
|
||||
logger.info(f"广告过滤: 移除 {name}")
|
||||
|
||||
return retained
|
||||
|
||||
def _is_ad(self, filename: str) -> bool:
|
||||
"""判断文件名是否为广告"""
|
||||
if not filename:
|
||||
return False
|
||||
name_lower = filename.lower()
|
||||
for kw in self.ad_keywords:
|
||||
if kw.lower() in name_lower:
|
||||
return True
|
||||
return False
|
||||
Reference in New Issue
Block a user