""" 运营偏好数据管理 """ from typing import List, Dict, Any, Optional from datetime import datetime import json from pathlib import Path from collections import Counter class OperatorPreferenceManager: """运营偏好数据管理器""" def __init__(self, storage_path: str): self.storage_path = Path(storage_path) self.storage_path.mkdir(parents=True, exist_ok=True) self.feedback_file = self.storage_path / "operator_feedback.jsonl" async def save_feedback( self, content_id: str, rating: str, notes: str, operator_id: str, content_features: Optional[Dict[str, Any]] = None, ) -> None: """保存运营反馈""" record = { "content_id": content_id, "timestamp": datetime.now().isoformat(), "rating": rating, "notes": notes, "operator_id": operator_id, "content_features": content_features or {}, } with open(self.feedback_file, "a", encoding="utf-8") as f: f.write(json.dumps(record, ensure_ascii=False) + "\n") async def get_feedbacks( self, content_id: Optional[str] = None, rating_filter: Optional[str] = None, operator_id: Optional[str] = None, limit: int = 100, ) -> List[Dict[str, Any]]: """获取反馈记录""" if not self.feedback_file.exists(): return [] records = [] with open(self.feedback_file, "r", encoding="utf-8") as f: for line in f: if line.strip(): record = json.loads(line) # 应用筛选条件 if content_id and record.get("content_id") != content_id: continue if rating_filter and record.get("rating") != rating_filter: continue if operator_id and record.get("operator_id") != operator_id: continue records.append(record) return records[-limit:] async def analyze_preferences( self, operator_id: Optional[str] = None, ) -> Dict[str, Any]: """分析运营偏好""" feedbacks = await self.get_feedbacks( operator_id=operator_id, limit=1000, ) if not feedbacks: return { "rating_distribution": {}, "preferred_keywords": [], "preferred_tags": [], "preferred_platforms": [], } # 评级分布 ratings = [f.get("rating") for f in feedbacks] rating_dist = dict(Counter(ratings)) # 提取优质内容的特征 excellent_feedbacks = [f for f in feedbacks if f.get("rating") == "excellent"] keywords = [] tags = [] platforms = [] for feedback in excellent_feedbacks: features = feedback.get("content_features", {}) keywords.extend(features.get("keywords", [])) tags.extend(features.get("tags", [])) if "platform" in features: platforms.append(features["platform"]) # 统计频率 keyword_freq = Counter(keywords) tag_freq = Counter(tags) platform_freq = Counter(platforms) return { "rating_distribution": rating_dist, "preferred_keywords": [k for k, _ in keyword_freq.most_common(10)], "preferred_tags": [t for t, _ in tag_freq.most_common(10)], "preferred_platforms": [p for p, _ in platform_freq.most_common()], "total_feedbacks": len(feedbacks), "excellent_count": len(excellent_feedbacks), } async def get_learning_insights(self) -> Dict[str, Any]: """获取学习洞察""" all_feedbacks = await self.get_feedbacks(limit=1000) if not all_feedbacks: return {"insights": []} # 分析优质内容的共同特征 excellent = [f for f in all_feedbacks if f.get("rating") == "excellent"] good = [f for f in all_feedbacks if f.get("rating") == "good"] poor = [f for f in all_feedbacks if f.get("rating") == "poor"] insights = [] # 洞察1:评级分布 insights.append({ "type": "rating_distribution", "message": f"优质内容占比:{len(excellent) / len(all_feedbacks) * 100:.1f}%", "data": { "excellent": len(excellent), "good": len(good), "poor": len(poor), }, }) # 洞察2:优质内容特征 if excellent: excellent_features = self._extract_common_features(excellent) insights.append({ "type": "excellent_features", "message": "优质内容的共同特征", "data": excellent_features, }) # 洞察3:需要避免的特征 if poor: poor_features = self._extract_common_features(poor) insights.append({ "type": "poor_features", "message": "低质内容的共同特征(需避免)", "data": poor_features, }) return {"insights": insights} def _extract_common_features( self, feedbacks: List[Dict[str, Any]], ) -> Dict[str, Any]: """提取共同特征""" all_keywords = [] all_tags = [] all_platforms = [] for feedback in feedbacks: features = feedback.get("content_features", {}) all_keywords.extend(features.get("keywords", [])) all_tags.extend(features.get("tags", [])) if "platform" in features: all_platforms.append(features["platform"]) return { "top_keywords": [k for k, _ in Counter(all_keywords).most_common(5)], "top_tags": [t for t, _ in Counter(all_tags).most_common(5)], "top_platforms": [p for p, _ in Counter(all_platforms).most_common(3)], }