v0.2.7: 修复Redis连接 + 启动管理后台

- 修复Redis认证 (配置密码) - 启动Python管理后台 (端口9531, 15个功能开关) - 统一版本号 0.2.7 - 更新docker-compose.yml (镜像版本/Redis URL/Admin服务)
2026-05-17 02:22:18 +08:00
commit 83cbfaf03f
164 changed files with 25195 additions and 0 deletions
--- a/cloudsearch_enrich/tmdb_enricher.py
+++ b/cloudsearch_enrich/tmdb_enricher.py
@@ -0,0 +1,179 @@
+"""
+CloudSearch TMDB Enricher v1.0.0
+自动匹配影视元数据：海报、评分、简介、年份、类型
+"""
+
+import time
+import logging
+from typing import Optional, Dict, Any, List
+from dataclasses import dataclass, field
+import requests
+
+logger = logging.getLogger(__name__)
+
+TMDB_API_BASE = "https://api.themoviedb.org/3"
+TMDB_IMAGE_BASE = "https://image.tmdb.org/t/p/w500"
+
+
+@dataclass
+class MediaInfo:
+    """影视元数据"""
+    title: str = ""
+    original_title: str = ""
+    year: str = ""
+    poster_url: str = ""
+    backdrop_url: str = ""
+    rating: str = ""
+    rating_count: int = 0
+    description: str = ""
+    genres: List[str] = field(default_factory=list)
+    media_type: str = ""  # movie / tv
+    tmdb_id: int = 0
+    directors: List[str] = field(default_factory=list)
+    actors: List[str] = field(default_factory=list)
+    region: str = ""
+    duration: str = ""
+    seasons: int = 0
+    episodes: int = 0
+    source: str = "tmdb"
+    tmdb_url: str = ""
+
+
+class TMDBEnricher:
+    """TMDB 影视信息增强器"""
+
+    # 常见网盘文件名模式 → 影视标题提取
+    TITLE_PATTERNS = [
+        # [4K] 流浪地球2 (2023)
+        (r'\[.*?\]\s*(.+?)\s*[\(（](\d{4})[\)）]', 2),
+        # 流浪地球2.2023.4K
+        (r'(.+?)\.(\d{4})\.(?:4K|1080[Pp]|2160[Pp]|HD)', 2),
+        # 流浪地球2 2023
+        (r'(.+?)\s+(\d{4})\s', 2),
+        # S01E01 格式
+        (r'(.+?)[\.\s][Ss](\d{2})[Ee](\d{2})', 1),
+    ]
+
+    def __init__(self, api_key: str, language: str = "zh-CN",
+                 cache_ttl: int = 86400):
+        self.api_key = api_key
+        self.language = language
+        self.cache_ttl = cache_ttl
+        self._cache: Dict[str, tuple] = {}  # key → (data, timestamp)
+
+    def enrich(self, title: str, media_type: str = None) -> Optional[MediaInfo]:
+        """根据标题查询 TMDB 元数据"""
+        clean_title, year = self._extract_title_year(title)
+
+        cache_key = f"{clean_title}:{year}:{media_type}"
+        if cache_key in self._cache:
+            data, ts = self._cache[cache_key]
+            if time.time() - ts < self.cache_ttl:
+                return data
+
+        # 智能判断类型
+        if not media_type:
+            media_type = self._guess_type(clean_title)
+
+        info = self._search(clean_title, year, media_type)
+        if info:
+            self._cache[cache_key] = (info, time.time())
+        return info
+
+    def enrich_batch(self, titles: List[str], max_concurrent: int = 5) -> Dict[str, MediaInfo]:
+        """批量查询"""
+        from concurrent.futures import ThreadPoolExecutor, as_completed
+        results = {}
+        with ThreadPoolExecutor(max_workers=max_concurrent) as ex:
+            futures = {ex.submit(self.enrich, t): t for t in titles}
+            for f in as_completed(futures):
+                try:
+                    results[futures[f]] = f.result()
+                except Exception as e:
+                    logger.warning(f"TMDB enrich failed: {futures[f]} - {e}")
+        return results
+
+    def _extract_title_year(self, title: str) -> tuple:
+        """从文件名提取标题和年份"""
+        import re
+        for pattern, year_group in self.TITLE_PATTERNS:
+            m = re.search(pattern, title, re.IGNORECASE)
+            if m:
+                name = m.group(1).strip()
+                year = m.group(year_group) if year_group <= len(m.groups()) else ""
+                # 去掉常见的后缀
+                name = re.sub(r'\s*[\[（(].*?(?:完结|全\d+集|更新).*?[\]）)]', '', name)
+                return name.strip(), year
+        return title.strip(), ""
+
+    def _guess_type(self, title: str) -> str:
+        """根据标题特征判断电影/电视剧"""
+        import re
+        tv_patterns = [
+            r'[Ss]\d{2}[Ee]\d{2}', r'第[一二三四五六七八九十\d]+季',
+            r'[Ss]eason\s*\d+', r'全\d+集', r'更新至\d+',
+        ]
+        for p in tv_patterns:
+            if re.search(p, title):
+                return "tv"
+        return "movie"
+
+    def _search(self, title: str, year: str = "", media_type: str = "movie") -> Optional[MediaInfo]:
+        """搜索 TMDB"""
+        try:
+            # 搜索
+            search_type = "tv" if media_type == "tv" else "movie"
+            params = {
+                "api_key": self.api_key,
+                "query": title,
+                "language": self.language,
+                "page": 1,
+            }
+            if year:
+                params["year" if search_type == "movie" else "first_air_date_year"] = year
+
+            resp = requests.get(
+                f"{TMDB_API_BASE}/search/{search_type}",
+                params=params, timeout=10
+            )
+            data = resp.json()
+            results = data.get("results", [])
+
+            if not results and search_type == "movie":
+                # 电视剧也试一下
+                resp2 = requests.get(
+                    f"{TMDB_API_BASE}/search/tv",
+                    params=params, timeout=10
+                )
+                data2 = resp2.json()
+                results = data2.get("results", [])
+
+            if not results:
+                return None
+
+            item = results[0]
+            return self._parse_result(item, media_type)
+
+        except Exception as e:
+            logger.error(f"TMDB search error: {title} - {e}")
+            return None
+
+    def _parse_result(self, item: dict, media_type: str) -> MediaInfo:
+        """解析 TMDB 返回"""
+        mid = item.get("id", 0)
+        is_tv = media_type == "tv" or item.get("media_type") == "tv"
+
+        return MediaInfo(
+            title=item.get("title") or item.get("name", ""),
+            original_title=item.get("original_title") or item.get("original_name", ""),
+            year=str(item.get("release_date", item.get("first_air_date", ""))[:4]),
+            poster_url=f"{TMDB_IMAGE_BASE}{item['poster_path']}" if item.get("poster_path") else "",
+            backdrop_url=f"{TMDB_IMAGE_BASE}{item['backdrop_path']}" if item.get("backdrop_path") else "",
+            rating=str(round(item.get("vote_average", 0), 1)),
+            rating_count=item.get("vote_count", 0),
+            description=(item.get("overview") or "")[:500],
+            genres=[g.get("name", "") for g in item.get("genre_ids", [])],
+            media_type="tv" if is_tv else "movie",
+            tmdb_id=mid,
+            tmdb_url=f"https://www.themoviedb.org/{'tv' if is_tv else 'movie'}/{mid}",
+        )