v0.2.7: 修复Redis连接 + 启动管理后台

- 修复Redis认证 (配置密码)
- 启动Python管理后台 (端口9531, 15个功能开关)
- 统一版本号 0.2.7
- 更新docker-compose.yml (镜像版本/Redis URL/Admin服务)
This commit is contained in:
2026-05-17 02:22:18 +08:00
commit 83cbfaf03f
164 changed files with 25195 additions and 0 deletions

View File

@@ -0,0 +1,179 @@
"""
CloudSearch TMDB Enricher v1.0.0
自动匹配影视元数据:海报、评分、简介、年份、类型
"""
import time
import logging
from typing import Optional, Dict, Any, List
from dataclasses import dataclass, field
import requests
logger = logging.getLogger(__name__)
TMDB_API_BASE = "https://api.themoviedb.org/3"
TMDB_IMAGE_BASE = "https://image.tmdb.org/t/p/w500"
@dataclass
class MediaInfo:
"""影视元数据"""
title: str = ""
original_title: str = ""
year: str = ""
poster_url: str = ""
backdrop_url: str = ""
rating: str = ""
rating_count: int = 0
description: str = ""
genres: List[str] = field(default_factory=list)
media_type: str = "" # movie / tv
tmdb_id: int = 0
directors: List[str] = field(default_factory=list)
actors: List[str] = field(default_factory=list)
region: str = ""
duration: str = ""
seasons: int = 0
episodes: int = 0
source: str = "tmdb"
tmdb_url: str = ""
class TMDBEnricher:
"""TMDB 影视信息增强器"""
# 常见网盘文件名模式 → 影视标题提取
TITLE_PATTERNS = [
# [4K] 流浪地球2 (2023)
(r'\[.*?\]\s*(.+?)\s*[\(](\d{4})[\)]', 2),
# 流浪地球2.2023.4K
(r'(.+?)\.(\d{4})\.(?:4K|1080[Pp]|2160[Pp]|HD)', 2),
# 流浪地球2 2023
(r'(.+?)\s+(\d{4})\s', 2),
# S01E01 格式
(r'(.+?)[\.\s][Ss](\d{2})[Ee](\d{2})', 1),
]
def __init__(self, api_key: str, language: str = "zh-CN",
cache_ttl: int = 86400):
self.api_key = api_key
self.language = language
self.cache_ttl = cache_ttl
self._cache: Dict[str, tuple] = {} # key → (data, timestamp)
def enrich(self, title: str, media_type: str = None) -> Optional[MediaInfo]:
"""根据标题查询 TMDB 元数据"""
clean_title, year = self._extract_title_year(title)
cache_key = f"{clean_title}:{year}:{media_type}"
if cache_key in self._cache:
data, ts = self._cache[cache_key]
if time.time() - ts < self.cache_ttl:
return data
# 智能判断类型
if not media_type:
media_type = self._guess_type(clean_title)
info = self._search(clean_title, year, media_type)
if info:
self._cache[cache_key] = (info, time.time())
return info
def enrich_batch(self, titles: List[str], max_concurrent: int = 5) -> Dict[str, MediaInfo]:
"""批量查询"""
from concurrent.futures import ThreadPoolExecutor, as_completed
results = {}
with ThreadPoolExecutor(max_workers=max_concurrent) as ex:
futures = {ex.submit(self.enrich, t): t for t in titles}
for f in as_completed(futures):
try:
results[futures[f]] = f.result()
except Exception as e:
logger.warning(f"TMDB enrich failed: {futures[f]} - {e}")
return results
def _extract_title_year(self, title: str) -> tuple:
"""从文件名提取标题和年份"""
import re
for pattern, year_group in self.TITLE_PATTERNS:
m = re.search(pattern, title, re.IGNORECASE)
if m:
name = m.group(1).strip()
year = m.group(year_group) if year_group <= len(m.groups()) else ""
# 去掉常见的后缀
name = re.sub(r'\s*[\[(].*?(?:完结|全\d+集|更新).*?[\])]', '', name)
return name.strip(), year
return title.strip(), ""
def _guess_type(self, title: str) -> str:
"""根据标题特征判断电影/电视剧"""
import re
tv_patterns = [
r'[Ss]\d{2}[Ee]\d{2}', r'第[一二三四五六七八九十\d]+季',
r'[Ss]eason\s*\d+', r'\d+集', r'更新至\d+',
]
for p in tv_patterns:
if re.search(p, title):
return "tv"
return "movie"
def _search(self, title: str, year: str = "", media_type: str = "movie") -> Optional[MediaInfo]:
"""搜索 TMDB"""
try:
# 搜索
search_type = "tv" if media_type == "tv" else "movie"
params = {
"api_key": self.api_key,
"query": title,
"language": self.language,
"page": 1,
}
if year:
params["year" if search_type == "movie" else "first_air_date_year"] = year
resp = requests.get(
f"{TMDB_API_BASE}/search/{search_type}",
params=params, timeout=10
)
data = resp.json()
results = data.get("results", [])
if not results and search_type == "movie":
# 电视剧也试一下
resp2 = requests.get(
f"{TMDB_API_BASE}/search/tv",
params=params, timeout=10
)
data2 = resp2.json()
results = data2.get("results", [])
if not results:
return None
item = results[0]
return self._parse_result(item, media_type)
except Exception as e:
logger.error(f"TMDB search error: {title} - {e}")
return None
def _parse_result(self, item: dict, media_type: str) -> MediaInfo:
"""解析 TMDB 返回"""
mid = item.get("id", 0)
is_tv = media_type == "tv" or item.get("media_type") == "tv"
return MediaInfo(
title=item.get("title") or item.get("name", ""),
original_title=item.get("original_title") or item.get("original_name", ""),
year=str(item.get("release_date", item.get("first_air_date", ""))[:4]),
poster_url=f"{TMDB_IMAGE_BASE}{item['poster_path']}" if item.get("poster_path") else "",
backdrop_url=f"{TMDB_IMAGE_BASE}{item['backdrop_path']}" if item.get("backdrop_path") else "",
rating=str(round(item.get("vote_average", 0), 1)),
rating_count=item.get("vote_count", 0),
description=(item.get("overview") or "")[:500],
genres=[g.get("name", "") for g in item.get("genre_ids", [])],
media_type="tv" if is_tv else "movie",
tmdb_id=mid,
tmdb_url=f"https://www.themoviedb.org/{'tv' if is_tv else 'movie'}/{mid}",
)