v0.2.7: 修复Redis连接 + 启动管理后台
- 修复Redis认证 (配置密码) - 启动Python管理后台 (端口9531, 15个功能开关) - 统一版本号 0.2.7 - 更新docker-compose.yml (镜像版本/Redis URL/Admin服务)
This commit is contained in:
179
cloudsearch_enrich/tmdb_enricher.py
Normal file
179
cloudsearch_enrich/tmdb_enricher.py
Normal file
@@ -0,0 +1,179 @@
|
||||
"""
|
||||
CloudSearch TMDB Enricher v1.0.0
|
||||
自动匹配影视元数据:海报、评分、简介、年份、类型
|
||||
"""
|
||||
|
||||
import time
|
||||
import logging
|
||||
from typing import Optional, Dict, Any, List
|
||||
from dataclasses import dataclass, field
|
||||
import requests
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
TMDB_API_BASE = "https://api.themoviedb.org/3"
|
||||
TMDB_IMAGE_BASE = "https://image.tmdb.org/t/p/w500"
|
||||
|
||||
|
||||
@dataclass
|
||||
class MediaInfo:
|
||||
"""影视元数据"""
|
||||
title: str = ""
|
||||
original_title: str = ""
|
||||
year: str = ""
|
||||
poster_url: str = ""
|
||||
backdrop_url: str = ""
|
||||
rating: str = ""
|
||||
rating_count: int = 0
|
||||
description: str = ""
|
||||
genres: List[str] = field(default_factory=list)
|
||||
media_type: str = "" # movie / tv
|
||||
tmdb_id: int = 0
|
||||
directors: List[str] = field(default_factory=list)
|
||||
actors: List[str] = field(default_factory=list)
|
||||
region: str = ""
|
||||
duration: str = ""
|
||||
seasons: int = 0
|
||||
episodes: int = 0
|
||||
source: str = "tmdb"
|
||||
tmdb_url: str = ""
|
||||
|
||||
|
||||
class TMDBEnricher:
|
||||
"""TMDB 影视信息增强器"""
|
||||
|
||||
# 常见网盘文件名模式 → 影视标题提取
|
||||
TITLE_PATTERNS = [
|
||||
# [4K] 流浪地球2 (2023)
|
||||
(r'\[.*?\]\s*(.+?)\s*[\((](\d{4})[\))]', 2),
|
||||
# 流浪地球2.2023.4K
|
||||
(r'(.+?)\.(\d{4})\.(?:4K|1080[Pp]|2160[Pp]|HD)', 2),
|
||||
# 流浪地球2 2023
|
||||
(r'(.+?)\s+(\d{4})\s', 2),
|
||||
# S01E01 格式
|
||||
(r'(.+?)[\.\s][Ss](\d{2})[Ee](\d{2})', 1),
|
||||
]
|
||||
|
||||
def __init__(self, api_key: str, language: str = "zh-CN",
|
||||
cache_ttl: int = 86400):
|
||||
self.api_key = api_key
|
||||
self.language = language
|
||||
self.cache_ttl = cache_ttl
|
||||
self._cache: Dict[str, tuple] = {} # key → (data, timestamp)
|
||||
|
||||
def enrich(self, title: str, media_type: str = None) -> Optional[MediaInfo]:
|
||||
"""根据标题查询 TMDB 元数据"""
|
||||
clean_title, year = self._extract_title_year(title)
|
||||
|
||||
cache_key = f"{clean_title}:{year}:{media_type}"
|
||||
if cache_key in self._cache:
|
||||
data, ts = self._cache[cache_key]
|
||||
if time.time() - ts < self.cache_ttl:
|
||||
return data
|
||||
|
||||
# 智能判断类型
|
||||
if not media_type:
|
||||
media_type = self._guess_type(clean_title)
|
||||
|
||||
info = self._search(clean_title, year, media_type)
|
||||
if info:
|
||||
self._cache[cache_key] = (info, time.time())
|
||||
return info
|
||||
|
||||
def enrich_batch(self, titles: List[str], max_concurrent: int = 5) -> Dict[str, MediaInfo]:
|
||||
"""批量查询"""
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
results = {}
|
||||
with ThreadPoolExecutor(max_workers=max_concurrent) as ex:
|
||||
futures = {ex.submit(self.enrich, t): t for t in titles}
|
||||
for f in as_completed(futures):
|
||||
try:
|
||||
results[futures[f]] = f.result()
|
||||
except Exception as e:
|
||||
logger.warning(f"TMDB enrich failed: {futures[f]} - {e}")
|
||||
return results
|
||||
|
||||
def _extract_title_year(self, title: str) -> tuple:
|
||||
"""从文件名提取标题和年份"""
|
||||
import re
|
||||
for pattern, year_group in self.TITLE_PATTERNS:
|
||||
m = re.search(pattern, title, re.IGNORECASE)
|
||||
if m:
|
||||
name = m.group(1).strip()
|
||||
year = m.group(year_group) if year_group <= len(m.groups()) else ""
|
||||
# 去掉常见的后缀
|
||||
name = re.sub(r'\s*[\[((].*?(?:完结|全\d+集|更新).*?[\]))]', '', name)
|
||||
return name.strip(), year
|
||||
return title.strip(), ""
|
||||
|
||||
def _guess_type(self, title: str) -> str:
|
||||
"""根据标题特征判断电影/电视剧"""
|
||||
import re
|
||||
tv_patterns = [
|
||||
r'[Ss]\d{2}[Ee]\d{2}', r'第[一二三四五六七八九十\d]+季',
|
||||
r'[Ss]eason\s*\d+', r'全\d+集', r'更新至\d+',
|
||||
]
|
||||
for p in tv_patterns:
|
||||
if re.search(p, title):
|
||||
return "tv"
|
||||
return "movie"
|
||||
|
||||
def _search(self, title: str, year: str = "", media_type: str = "movie") -> Optional[MediaInfo]:
|
||||
"""搜索 TMDB"""
|
||||
try:
|
||||
# 搜索
|
||||
search_type = "tv" if media_type == "tv" else "movie"
|
||||
params = {
|
||||
"api_key": self.api_key,
|
||||
"query": title,
|
||||
"language": self.language,
|
||||
"page": 1,
|
||||
}
|
||||
if year:
|
||||
params["year" if search_type == "movie" else "first_air_date_year"] = year
|
||||
|
||||
resp = requests.get(
|
||||
f"{TMDB_API_BASE}/search/{search_type}",
|
||||
params=params, timeout=10
|
||||
)
|
||||
data = resp.json()
|
||||
results = data.get("results", [])
|
||||
|
||||
if not results and search_type == "movie":
|
||||
# 电视剧也试一下
|
||||
resp2 = requests.get(
|
||||
f"{TMDB_API_BASE}/search/tv",
|
||||
params=params, timeout=10
|
||||
)
|
||||
data2 = resp2.json()
|
||||
results = data2.get("results", [])
|
||||
|
||||
if not results:
|
||||
return None
|
||||
|
||||
item = results[0]
|
||||
return self._parse_result(item, media_type)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"TMDB search error: {title} - {e}")
|
||||
return None
|
||||
|
||||
def _parse_result(self, item: dict, media_type: str) -> MediaInfo:
|
||||
"""解析 TMDB 返回"""
|
||||
mid = item.get("id", 0)
|
||||
is_tv = media_type == "tv" or item.get("media_type") == "tv"
|
||||
|
||||
return MediaInfo(
|
||||
title=item.get("title") or item.get("name", ""),
|
||||
original_title=item.get("original_title") or item.get("original_name", ""),
|
||||
year=str(item.get("release_date", item.get("first_air_date", ""))[:4]),
|
||||
poster_url=f"{TMDB_IMAGE_BASE}{item['poster_path']}" if item.get("poster_path") else "",
|
||||
backdrop_url=f"{TMDB_IMAGE_BASE}{item['backdrop_path']}" if item.get("backdrop_path") else "",
|
||||
rating=str(round(item.get("vote_average", 0), 1)),
|
||||
rating_count=item.get("vote_count", 0),
|
||||
description=(item.get("overview") or "")[:500],
|
||||
genres=[g.get("name", "") for g in item.get("genre_ids", [])],
|
||||
media_type="tv" if is_tv else "movie",
|
||||
tmdb_id=mid,
|
||||
tmdb_url=f"https://www.themoviedb.org/{'tv' if is_tv else 'movie'}/{mid}",
|
||||
)
|
||||
Reference in New Issue
Block a user