howard
/
Agent


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
							"""
运营偏好数据管理
"""

from typing import List, Dict, Any, Optional
from datetime import datetime
import json
from pathlib import Path
from collections import Counter


class OperatorPreferenceManager:
    """运营偏好数据管理器"""

    def __init__(self, storage_path: str):
        self.storage_path = Path(storage_path)
        self.storage_path.mkdir(parents=True, exist_ok=True)
        self.feedback_file = self.storage_path / "operator_feedback.jsonl"

    async def save_feedback(
        self,
        content_id: str,
        rating: str,
        notes: str,
        operator_id: str,
        content_features: Optional[Dict[str, Any]] = None,
    ) -> None:
        """保存运营反馈"""
        record = {
            "content_id": content_id,
            "timestamp": datetime.now().isoformat(),
            "rating": rating,
            "notes": notes,
            "operator_id": operator_id,
            "content_features": content_features or {},
        }

        with open(self.feedback_file, "a", encoding="utf-8") as f:
            f.write(json.dumps(record, ensure_ascii=False) + "\n")

    async def get_feedbacks(
        self,
        content_id: Optional[str] = None,
        rating_filter: Optional[str] = None,
        operator_id: Optional[str] = None,
        limit: int = 100,
    ) -> List[Dict[str, Any]]:
        """获取反馈记录"""
        if not self.feedback_file.exists():
            return []

        records = []
        with open(self.feedback_file, "r", encoding="utf-8") as f:
            for line in f:
                if line.strip():
                    record = json.loads(line)

                    # 应用筛选条件
                    if content_id and record.get("content_id") != content_id:
                        continue
                    if rating_filter and record.get("rating") != rating_filter:
                        continue
                    if operator_id and record.get("operator_id") != operator_id:
                        continue

                    records.append(record)

        return records[-limit:]

    async def analyze_preferences(
        self,
        operator_id: Optional[str] = None,
    ) -> Dict[str, Any]:
        """分析运营偏好"""
        feedbacks = await self.get_feedbacks(
            operator_id=operator_id,
            limit=1000,
        )

        if not feedbacks:
            return {
                "rating_distribution": {},
                "preferred_keywords": [],
                "preferred_tags": [],
                "preferred_platforms": [],
            }

        # 评级分布
        ratings = [f.get("rating") for f in feedbacks]
        rating_dist = dict(Counter(ratings))

        # 提取优质内容的特征
        excellent_feedbacks = [f for f in feedbacks if f.get("rating") == "excellent"]

        keywords = []
        tags = []
        platforms = []

        for feedback in excellent_feedbacks:
            features = feedback.get("content_features", {})
            keywords.extend(features.get("keywords", []))
            tags.extend(features.get("tags", []))
            if "platform" in features:
                platforms.append(features["platform"])

        # 统计频率
        keyword_freq = Counter(keywords)
        tag_freq = Counter(tags)
        platform_freq = Counter(platforms)

        return {
            "rating_distribution": rating_dist,
            "preferred_keywords": [k for k, _ in keyword_freq.most_common(10)],
            "preferred_tags": [t for t, _ in tag_freq.most_common(10)],
            "preferred_platforms": [p for p, _ in platform_freq.most_common()],
            "total_feedbacks": len(feedbacks),
            "excellent_count": len(excellent_feedbacks),
        }

    async def get_learning_insights(self) -> Dict[str, Any]:
        """获取学习洞察"""
        all_feedbacks = await self.get_feedbacks(limit=1000)

        if not all_feedbacks:
            return {"insights": []}

        # 分析优质内容的共同特征
        excellent = [f for f in all_feedbacks if f.get("rating") == "excellent"]
        good = [f for f in all_feedbacks if f.get("rating") == "good"]
        poor = [f for f in all_feedbacks if f.get("rating") == "poor"]

        insights = []

        # 洞察1：评级分布
        insights.append({
            "type": "rating_distribution",
            "message": f"优质内容占比：{len(excellent) / len(all_feedbacks) * 100:.1f}%",
            "data": {
                "excellent": len(excellent),
                "good": len(good),
                "poor": len(poor),
            },
        })

        # 洞察2：优质内容特征
        if excellent:
            excellent_features = self._extract_common_features(excellent)
            insights.append({
                "type": "excellent_features",
                "message": "优质内容的共同特征",
                "data": excellent_features,
            })

        # 洞察3：需要避免的特征
        if poor:
            poor_features = self._extract_common_features(poor)
            insights.append({
                "type": "poor_features",
                "message": "低质内容的共同特征（需避免）",
                "data": poor_features,
            })

        return {"insights": insights}

    def _extract_common_features(
        self,
        feedbacks: List[Dict[str, Any]],
    ) -> Dict[str, Any]:
        """提取共同特征"""
        all_keywords = []
        all_tags = []
        all_platforms = []

        for feedback in feedbacks:
            features = feedback.get("content_features", {})
            all_keywords.extend(features.get("keywords", []))
            all_tags.extend(features.get("tags", []))
            if "platform" in features:
                all_platforms.append(features["platform"])

        return {
            "top_keywords": [k for k, _ in Counter(all_keywords).most_common(5)],
            "top_tags": [t for t, _ in Counter(all_tags).most_common(5)],
            "top_platforms": [p for p, _ in Counter(all_platforms).most_common(3)],
        }