""" 百度网盘文件清理 — 删除文件 & 广告过滤 参考 cloud-auto-save 的 filter_ads + netdisk 的 delete """ import json import logging from typing import List import requests from ...errors import TransferError, TransferErrorCode from .credential import BaiduCredentialManager, BAIDU_PAN_API logger = logging.getLogger(__name__) # 默认广告关键词 DEFAULT_AD_KEYWORDS = [ "公众号", "微信", "扫码", "加群", "QQ群", "广告", "关注", "免费领取", "点击领取", "全网", "最全", "防走丢", "防迷路", "备用", "务必下载", "必看", "解压密码", "压缩密码", ] class BaiduCleanup: """百度网盘文件清理 & 广告过滤""" def __init__(self, session: requests.Session, credential: BaiduCredentialManager, ad_keywords: List[str] = None): self.session = session self.credential = credential self.ad_keywords = ad_keywords or DEFAULT_AD_KEYWORDS # ─── 删除文件 ──────────────────────────────────────────── def delete_files(self, paths: List[str]) -> bool: """批量删除文件(按网盘路径) POST /api/filemanager?opera=delete&bdstoken={bdstoken} Body: filelist=["/path/to/file1","/path/to/file2"] Args: paths: 文件在网盘中的完整路径列表,如 ["/dir/file.txt"] Returns: True 全部成功(包括文件不存在的 errno=2) Raises: TransferError: 删除失败 """ if not paths: logger.info("删除列表为空,跳过") return True bdstoken = self.credential.get_bdstoken() url = f"{BAIDU_PAN_API}/api/filemanager" params = { "opera": "delete", "bdstoken": bdstoken, } data = { "filelist": json.dumps(paths, ensure_ascii=False), } headers = self.credential.get_headers() headers["Content-Type"] = "application/x-www-form-urlencoded" try: resp = self.session.post( url, params=params, data=data, headers=headers, timeout=30 ) resp.raise_for_status() result = resp.json() except Exception as e: raise TransferError( TransferErrorCode.NETWORK_ERROR, message=f"百度删除请求失败: {e}", platform="baidu", ) errno = result.get("errno", -1) # errno=0 成功; errno=2 文件不存在(视为成功) if errno in (0, 2): logger.info(f"百度删除完成: {len(paths)} 个路径 (errno={errno})") return True raise TransferError( TransferErrorCode.NETWORK_ERROR, message=f"百度删除失败 (errno={errno})", platform="baidu", details=result, ) # ─── 广告过滤 ──────────────────────────────────────────── def filter_ads(self, files: List[dict]) -> List[dict]: """根据文件名过滤广告文件 Args: files: [{"fs_id": "xxx", "name": "xxx"}, ...] Returns: 过滤后的文件列表,仅保留非广告文件 """ if not self.ad_keywords: return files retained = [] removed = [] for f in files: name = f.get("name", "") if self._is_ad(name): removed.append(name) else: retained.append(f) if removed: logger.info(f"广告过滤: 移除 {len(removed)} 个文件: {removed}") return retained def filter_ad_ids(self, file_ids: List[str], file_names: List[str]) -> List[str]: """根据文件名过滤广告,返回保留的 file_ids Args: file_ids: 文件 ID 列表 file_names: 对应的文件名列表(与 file_ids 一一对应) Returns: 过滤后的 file_ids """ if not self.ad_keywords: return file_ids retained = [] for fid, name in zip(file_ids, file_names): if not self._is_ad(name): retained.append(fid) else: logger.info(f"广告过滤: 移除 {name}") return retained def _is_ad(self, filename: str) -> bool: """判断文件名是否为广告""" if not filename: return False name_lower = filename.lower() for kw in self.ad_keywords: if kw.lower() in name_lower: return True return False