- 修复Redis认证 (配置密码) - 启动Python管理后台 (端口9531, 15个功能开关) - 统一版本号 0.2.7 - 更新docker-compose.yml (镜像版本/Redis URL/Admin服务)
180 lines
6.3 KiB
Python
180 lines
6.3 KiB
Python
"""
|
||
CloudSearch TMDB Enricher v1.0.0
|
||
自动匹配影视元数据:海报、评分、简介、年份、类型
|
||
"""
|
||
|
||
import time
|
||
import logging
|
||
from typing import Optional, Dict, Any, List
|
||
from dataclasses import dataclass, field
|
||
import requests
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
TMDB_API_BASE = "https://api.themoviedb.org/3"
|
||
TMDB_IMAGE_BASE = "https://image.tmdb.org/t/p/w500"
|
||
|
||
|
||
@dataclass
|
||
class MediaInfo:
|
||
"""影视元数据"""
|
||
title: str = ""
|
||
original_title: str = ""
|
||
year: str = ""
|
||
poster_url: str = ""
|
||
backdrop_url: str = ""
|
||
rating: str = ""
|
||
rating_count: int = 0
|
||
description: str = ""
|
||
genres: List[str] = field(default_factory=list)
|
||
media_type: str = "" # movie / tv
|
||
tmdb_id: int = 0
|
||
directors: List[str] = field(default_factory=list)
|
||
actors: List[str] = field(default_factory=list)
|
||
region: str = ""
|
||
duration: str = ""
|
||
seasons: int = 0
|
||
episodes: int = 0
|
||
source: str = "tmdb"
|
||
tmdb_url: str = ""
|
||
|
||
|
||
class TMDBEnricher:
|
||
"""TMDB 影视信息增强器"""
|
||
|
||
# 常见网盘文件名模式 → 影视标题提取
|
||
TITLE_PATTERNS = [
|
||
# [4K] 流浪地球2 (2023)
|
||
(r'\[.*?\]\s*(.+?)\s*[\((](\d{4})[\))]', 2),
|
||
# 流浪地球2.2023.4K
|
||
(r'(.+?)\.(\d{4})\.(?:4K|1080[Pp]|2160[Pp]|HD)', 2),
|
||
# 流浪地球2 2023
|
||
(r'(.+?)\s+(\d{4})\s', 2),
|
||
# S01E01 格式
|
||
(r'(.+?)[\.\s][Ss](\d{2})[Ee](\d{2})', 1),
|
||
]
|
||
|
||
def __init__(self, api_key: str, language: str = "zh-CN",
|
||
cache_ttl: int = 86400):
|
||
self.api_key = api_key
|
||
self.language = language
|
||
self.cache_ttl = cache_ttl
|
||
self._cache: Dict[str, tuple] = {} # key → (data, timestamp)
|
||
|
||
def enrich(self, title: str, media_type: str = None) -> Optional[MediaInfo]:
|
||
"""根据标题查询 TMDB 元数据"""
|
||
clean_title, year = self._extract_title_year(title)
|
||
|
||
cache_key = f"{clean_title}:{year}:{media_type}"
|
||
if cache_key in self._cache:
|
||
data, ts = self._cache[cache_key]
|
||
if time.time() - ts < self.cache_ttl:
|
||
return data
|
||
|
||
# 智能判断类型
|
||
if not media_type:
|
||
media_type = self._guess_type(clean_title)
|
||
|
||
info = self._search(clean_title, year, media_type)
|
||
if info:
|
||
self._cache[cache_key] = (info, time.time())
|
||
return info
|
||
|
||
def enrich_batch(self, titles: List[str], max_concurrent: int = 5) -> Dict[str, MediaInfo]:
|
||
"""批量查询"""
|
||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||
results = {}
|
||
with ThreadPoolExecutor(max_workers=max_concurrent) as ex:
|
||
futures = {ex.submit(self.enrich, t): t for t in titles}
|
||
for f in as_completed(futures):
|
||
try:
|
||
results[futures[f]] = f.result()
|
||
except Exception as e:
|
||
logger.warning(f"TMDB enrich failed: {futures[f]} - {e}")
|
||
return results
|
||
|
||
def _extract_title_year(self, title: str) -> tuple:
|
||
"""从文件名提取标题和年份"""
|
||
import re
|
||
for pattern, year_group in self.TITLE_PATTERNS:
|
||
m = re.search(pattern, title, re.IGNORECASE)
|
||
if m:
|
||
name = m.group(1).strip()
|
||
year = m.group(year_group) if year_group <= len(m.groups()) else ""
|
||
# 去掉常见的后缀
|
||
name = re.sub(r'\s*[\[((].*?(?:完结|全\d+集|更新).*?[\]))]', '', name)
|
||
return name.strip(), year
|
||
return title.strip(), ""
|
||
|
||
def _guess_type(self, title: str) -> str:
|
||
"""根据标题特征判断电影/电视剧"""
|
||
import re
|
||
tv_patterns = [
|
||
r'[Ss]\d{2}[Ee]\d{2}', r'第[一二三四五六七八九十\d]+季',
|
||
r'[Ss]eason\s*\d+', r'全\d+集', r'更新至\d+',
|
||
]
|
||
for p in tv_patterns:
|
||
if re.search(p, title):
|
||
return "tv"
|
||
return "movie"
|
||
|
||
def _search(self, title: str, year: str = "", media_type: str = "movie") -> Optional[MediaInfo]:
|
||
"""搜索 TMDB"""
|
||
try:
|
||
# 搜索
|
||
search_type = "tv" if media_type == "tv" else "movie"
|
||
params = {
|
||
"api_key": self.api_key,
|
||
"query": title,
|
||
"language": self.language,
|
||
"page": 1,
|
||
}
|
||
if year:
|
||
params["year" if search_type == "movie" else "first_air_date_year"] = year
|
||
|
||
resp = requests.get(
|
||
f"{TMDB_API_BASE}/search/{search_type}",
|
||
params=params, timeout=10
|
||
)
|
||
data = resp.json()
|
||
results = data.get("results", [])
|
||
|
||
if not results and search_type == "movie":
|
||
# 电视剧也试一下
|
||
resp2 = requests.get(
|
||
f"{TMDB_API_BASE}/search/tv",
|
||
params=params, timeout=10
|
||
)
|
||
data2 = resp2.json()
|
||
results = data2.get("results", [])
|
||
|
||
if not results:
|
||
return None
|
||
|
||
item = results[0]
|
||
return self._parse_result(item, media_type)
|
||
|
||
except Exception as e:
|
||
logger.error(f"TMDB search error: {title} - {e}")
|
||
return None
|
||
|
||
def _parse_result(self, item: dict, media_type: str) -> MediaInfo:
|
||
"""解析 TMDB 返回"""
|
||
mid = item.get("id", 0)
|
||
is_tv = media_type == "tv" or item.get("media_type") == "tv"
|
||
|
||
return MediaInfo(
|
||
title=item.get("title") or item.get("name", ""),
|
||
original_title=item.get("original_title") or item.get("original_name", ""),
|
||
year=str(item.get("release_date", item.get("first_air_date", ""))[:4]),
|
||
poster_url=f"{TMDB_IMAGE_BASE}{item['poster_path']}" if item.get("poster_path") else "",
|
||
backdrop_url=f"{TMDB_IMAGE_BASE}{item['backdrop_path']}" if item.get("backdrop_path") else "",
|
||
rating=str(round(item.get("vote_average", 0), 1)),
|
||
rating_count=item.get("vote_count", 0),
|
||
description=(item.get("overview") or "")[:500],
|
||
genres=[g.get("name", "") for g in item.get("genre_ids", [])],
|
||
media_type="tv" if is_tv else "movie",
|
||
tmdb_id=mid,
|
||
tmdb_url=f"https://www.themoviedb.org/{'tv' if is_tv else 'movie'}/{mid}",
|
||
)
|