3 months ago · 8c314e5160
--- a/applications/aiditApi.py
+++ b/applications/aiditApi.py
@@ -404,4 +404,37 @@ def get_only_auto_reply_accounts():
 
				     denet = DeNetMysql()
			
 
				     result = denet.select(sql)
			
 
				     account_id_list = [i[0] for i in result]
			
 
				-    return set(account_id_list)
			
 
				+    return set(account_id_list)
			
 
				+
			
 
				+
			
 
				+def auto_create_single_video_crawler_task(plan_name, plan_tag, video_id_list):
			
 
				+    url = "http://aigc-api.cybertogether.net/aigc/crawler/plan/save"
			
 
				+    payload = json.dumps({
			
 
				+        "params": {
			
 
				+            "contentFilters": [],
			
 
				+            "accountFilters": [],
			
 
				+            "filterAccountMatchMode": 1,
			
 
				+            "filterContentMatchMode": 1,
			
 
				+            "selectModeValues": [],
			
 
				+            "searchModeValues": [],
			
 
				+            "contentModal": 4,
			
 
				+            "analyze": {},
			
 
				+            "crawlerComment": 0,
			
 
				+            "inputGroup": [],
			
 
				+            "inputSourceGroups": [],
			
 
				+            "modePublishTime": [],
			
 
				+            "name": plan_name,
			
 
				+            "frequencyType": 2,
			
 
				+            "channel": 10,
			
 
				+            "crawlerMode": 5,
			
 
				+            "planTag": plan_tag,
			
 
				+            "voiceExtractFlag": 1,
			
 
				+            "srtExtractFlag": 1,
			
 
				+            "videoKeyFrameType": 1,
			
 
				+            "inputModeValues": video_id_list,
			
 
				+            "planType": 2
			
 
				+        },
			
 
				+        "baseInfo": PERSON_COOKIE
			
 
				+    })
			
 
				+    response = requests.request("POST", url, headers=HEADERS, data=payload)
			
 
				+    return response.json()
			
--- a/applications/api/__init__.py
+++ b/applications/api/__init__.py
@@ -1,6 +1,7 @@
 
				 """
			
 
				 @author: luojunhui
			
 
				 """
			
 
				+from .aigc_system_api import AigcSystemApi
			
 
				 from .deep_seek_api_by_volcanoengine import fetch_deepseek_response
			
 
				 from .moon_shot_api import fetch_moon_shot_response
			
 
				 from .nlp_api import similarity_between_title_list
			
--- a/applications/api/aigc_system_api.py
+++ b/applications/api/aigc_system_api.py
@@ -0,0 +1,45 @@
 
				+"""
			
 
				+@author: luojunhui
			
 
				+"""
			
 
				+
			
 
				+from tenacity import retry
			
 
				+from requests.exceptions import RequestException
			
 
				+import requests
			
 
				+import json
			
 
				+from typing import Optional, Dict, List, TypedDict
			
 
				+
			
 
				+from applications.utils import request_retry
			
 
				+
			
 
				+retry_desc = request_retry(retry_times=3, min_retry_delay=2, max_retry_delay=30)
			
 
				+headers = {
			
 
				+    "Accept": "application/json",
			
 
				+    "Accept-Language": "zh-CN,zh;q=0.9",
			
 
				+    "Content-Type": "application/json",
			
 
				+    "Proxy-Connection": "keep-alive",
			
 
				+    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
			
 
				+}
			
 
				+
			
 
				+
			
 
				+class RelationDict(TypedDict):
			
 
				+    videoPoolTraceId: str
			
 
				+    channelContentId: str
			
 
				+    platform: str
			
 
				+
			
 
				+
			
 
				+class AigcSystemApi:
			
 
				+
			
 
				+    @retry(**retry_desc)
			
 
				+    def insert_crawler_relation_to_aigc_system(
			
 
				+        self, relation_list: List[RelationDict]
			
 
				+    ) -> Optional[Dict]:
			
 
				+        url = "http://aigc-api.cybertogether.net/aigc/crawler/content/videoPoolCrawlerRelation"
			
 
				+        payload = json.dumps({"params": {"relations": relation_list}})
			
 
				+        try:
			
 
				+            response = requests.post(url, headers=headers, data=payload, timeout=60)
			
 
				+            response.raise_for_status()
			
 
				+            return response.json()
			
 
				+        except RequestException as e:
			
 
				+            print(f"API请求失败: {e}")
			
 
				+        except json.JSONDecodeError as e:
			
 
				+            print(f"响应解析失败: {e}")
			
 
				+        return None
			
--- a/applications/const/__init__.py
+++ b/applications/const/__init__.py
@@ -88,7 +88,7 @@ class updatePublishedMsgTaskConst:
 
				     SUBSCRIBE_FAIL_RATE_THRESHOLD = 0.3
			
 
				 
			
 
				 
			
 
				-class updateAccountReadRateTaskConst:
			
 
				+class UpdateAccountReadRateTaskConst:
			
 
				     """
			
 
				     更新账号阅读率常量配置
			
 
				     """
			
@@ -104,8 +104,14 @@ class updateAccountReadRateTaskConst:
 
				     # 文章位置
			
 
				     ARTICLE_INDEX_LIST = [1, 2, 3, 4, 5, 6, 7, 8]
			
 
				 
			
 
				+    # 默认粉丝
			
 
				+    DEFAULT_FANS = 0
			
 
				 
			
 
				-class updateAccountReadAvgTaskConst:
			
 
				+    # 最低粉丝量
			
 
				+    MIN_FANS = 1000
			
 
				+
			
 
				+
			
 
				+class UpdateAccountReadAvgTaskConst:
			
 
				     """
			
 
				     更新账号阅读均值常量配置
			
 
				     """
			
@@ -124,6 +130,19 @@ class updateAccountReadAvgTaskConst:
 
				     ARTICLES_DAILY = 1
			
 
				     TOULIU = 2
			
 
				 
			
 
				+    # 默认粉丝
			
 
				+    DEFAULT_FANS = 0
			
 
				+
			
 
				+    # index list
			
 
				+    ARTICLE_INDEX_LIST = [1, 2, 3, 4, 5, 6, 7, 8]
			
 
				+
			
 
				+    # 默认点赞
			
 
				+    DEFAULT_LIKE = 0
			
 
				+
			
 
				+    # 状态
			
 
				+    USING_STATUS = 1
			
 
				+    NOT_USING_STATUS = 0
			
 
				+
			
 
				 
			
 
				 class WeixinVideoCrawlerConst:
			
 
				     """
			
@@ -179,6 +198,9 @@ class WeixinVideoCrawlerConst:
 
				     # 每天发送的审核视频数量
			
 
				     MAX_VIDEO_NUM = 1000
			
 
				 
			
 
				+    # 单次发布视频审核量
			
 
				+    MAX_VIDEO_NUM_PER_PUBLISH = 350
			
 
				+
			
 
				     # 标题状态
			
 
				     TITLE_DEFAULT_STATUS = 0
			
 
				     TITLE_EXIT_STATUS = 1
			
@@ -346,6 +368,16 @@ class ToutiaoVideoCrawlerConst:
 
				     SLEEP_SECOND = 3
			
 
				 
			
 
				 
			
 
				+class SingleVideoPoolPublishTaskConst:
			
 
				+    """
			
 
				+    const for single video pool publish task
			
 
				+    """
			
 
				+    TRANSFORM_INIT_STATUS = 0
			
 
				+    TRANSFORM_SUCCESS_STATUS = 1
			
 
				+    TRANSFORM_FAIL_STATUS = 99
			
 
				+
			
 
				+
			
 
				+
			
 
				 
			
 
				 
			
 
				 
			
--- a/applications/utils/__init__.py
+++ b/applications/utils/__init__.py
@@ -12,3 +12,5 @@ from .save_to_db import insert_into_single_video_source_table
 
				 from .save_to_db import insert_into_video_meta_accounts_table
			
 
				 from .save_to_db import insert_into_associated_recommendation_table
			
 
				 from .upload import upload_to_oss
			
 
				+from .fetch_info_from_aigc import fetch_account_fans
			
 
				+from .fetch_info_from_aigc import fetch_publishing_account_list
			
--- a/applications/utils/fetch_info_from_aigc.py
+++ b/applications/utils/fetch_info_from_aigc.py
@@ -0,0 +1,58 @@
 
				+"""
			
 
				+fetch info from aigc database system
			
 
				+"""
			
 
				+from collections import defaultdict
			
 
				+from typing import List, Dict
			
 
				+
			
 
				+from pymysql.cursors import DictCursor
			
 
				+
			
 
				+
			
 
				+def fetch_publishing_account_list(db_client) -> List[Dict]:
			
 
				+    """
			
 
				+    fetch account_list from aigc database
			
 
				+    """
			
 
				+    fetch_sql = f"""
			
 
				+        SELECT DISTINCT
			
 
				+            t3.`name` as account_name,
			
 
				+            t3.gh_id as gh_id,
			
 
				+            t3.follower_count as fans,
			
 
				+            t6.account_source_name as account_source,
			
 
				+            t6.mode_type as mode_type,
			
 
				+            t6.account_type as account_type,
			
 
				+            t6.`status` as status
			
 
				+        FROM
			
 
				+            publish_plan t1
			
 
				+            JOIN publish_plan_account t2 ON t1.id = t2.plan_id
			
 
				+            JOIN publish_account t3 ON t2.account_id = t3.id
			
 
				+            LEFT JOIN publish_account_wx_type t4 on t3.id = t4.account_id
			
 
				+            LEFT JOIN wx_statistics_group_source_account t5 on t3.id = t5.account_id
			
 
				+            LEFT JOIN wx_statistics_group_source t6 on t5.group_source_name = t6.account_source_name
			
 
				+        WHERE
			
 
				+            t1.plan_status = 1
			
 
				+            AND t3.channel = 5
			
 
				+            GROUP BY t3.id;
			
 
				+    """
			
 
				+    account_list = db_client.fetch(
			
 
				+        query=fetch_sql,
			
 
				+        cursor_type=DictCursor
			
 
				+    )
			
 
				+    return account_list
			
 
				+
			
 
				+def fetch_account_fans(db_client, start_date: str) -> Dict:
			
 
				+    """
			
 
				+    fetch account fans from aigc database
			
 
				+    """
			
 
				+    sql = f"""
			
 
				+        SELECT t1.date_str, t1.fans_count, t2.gh_id
			
 
				+        FROM datastat_wx t1 JOIN publish_account t2 ON t1.account_id = t2.id
			
 
				+        WHERE t2.channel = 5
			
 
				+            AND t2.status = 1 
			
 
				+            AND t1.date_str >= '{start_date}' 
			
 
				+        ORDER BY t1.date_str;
			
 
				+        """
			
 
				+    result = db_client.fetch(sql)
			
 
				+    fans_dict = defaultdict(dict)
			
 
				+    for dt, fans, gh_id in result:
			
 
				+        fans_dict.setdefault(gh_id, {})[dt] = fans
			
 
				+    return fans_dict
			
 
				+
			
--- a/cal_account_read_rate_avg_daily.py
+++ b/cal_account_read_rate_avg_daily.py
@@ -7,14 +7,21 @@ from tqdm import tqdm
 
				 from pandas import DataFrame
			
 
				 from argparse import ArgumentParser
			
 
				 from datetime import datetime
			
 
				+from pymysql.cursors import DictCursor
			
 
				 
			
 
				-from applications import DeNetMysql, PQMySQL, longArticlesMySQL, bot, Functions, create_feishu_columns_sheet
			
 
				-from applications.const import updateAccountReadRateTaskConst
			
 
				-from config import apolloConfig
			
 
				+from applications import bot, Functions, log
			
 
				+from applications import create_feishu_columns_sheet
			
 
				+from applications.db import DatabaseConnector
			
 
				+from applications.const import UpdateAccountReadRateTaskConst
			
 
				+from applications.utils import fetch_publishing_account_list
			
 
				+from applications.utils import fetch_account_fans
			
 
				+from config import apolloConfig, long_articles_config, piaoquan_crawler_config, denet_config
			
 
				 
			
 
				-const = updateAccountReadRateTaskConst()
			
 
				+
			
 
				+const = UpdateAccountReadRateTaskConst()
			
 
				 config = apolloConfig()
			
 
				 unauthorized_account = json.loads(config.getConfigValue("unauthorized_gh_id_fans"))
			
 
				+backup_account_fans = json.loads(config.getConfigValue("backup_account_fans"))
			
 
				 functions = Functions()
			
 
				 read_rate_table = "long_articles_read_rate"
			
 
				 
			
@@ -37,75 +44,7 @@ def filter_outlier_data(group, key='show_view_count'):
 
				     return filtered_group
			
 
				 
			
 
				 
			
 
				-def get_account_fans_by_dt(db_client) -> dict:
			
 
				-    """
			
 
				-    获取每个账号发粉丝，通过日期来区分
			
 
				-    :return:
			
 
				-    """
			
 
				-    sql = f"""
			
 
				-        SELECT 
			
 
				-            t1.date_str, 
			
 
				-            t1.fans_count, 
			
 
				-            t2.gh_id
			
 
				-        FROM datastat_wx t1
			
 
				-        JOIN publish_account t2 ON t1.account_id = t2.id
			
 
				-        WHERE 
			
 
				-            t2.channel = 5 
			
 
				-        AND t2.status = 1 
			
 
				-        AND t1.date_str >= '2024-07-01' 
			
 
				-        ORDER BY t1.date_str;
			
 
				-    """
			
 
				-    result = db_client.select(sql)
			
 
				-    D = {}
			
 
				-    for line in result:
			
 
				-        dt = line[0]
			
 
				-        fans = line[1]
			
 
				-        gh_id = line[2]
			
 
				-        if D.get(gh_id):
			
 
				-            D[gh_id][dt] = fans
			
 
				-        else:
			
 
				-            D[gh_id] = {dt: fans}
			
 
				-    return D
			
 
				-
			
 
				-
			
 
				-def get_publishing_accounts(db_client) -> list[dict]:
			
 
				-    """
			
 
				-    获取每日正在发布的账号
			
 
				-    :return:
			
 
				-    """
			
 
				-    sql = f"""
			
 
				-    SELECT DISTINCT
			
 
				-        t3.`name`,
			
 
				-        t3.gh_id,
			
 
				-        t3.follower_count,
			
 
				-        t6.account_source_name,
			
 
				-        t6.mode_type,
			
 
				-        t6.account_type,
			
 
				-        t6.`status`
			
 
				-    FROM
			
 
				-        publish_plan t1
			
 
				-        JOIN publish_plan_account t2 ON t1.id = t2.plan_id
			
 
				-        JOIN publish_account t3 ON t2.account_id = t3.id
			
 
				-        LEFT JOIN publish_account_wx_type t4 on t3.id = t4.account_id
			
 
				-        LEFT JOIN wx_statistics_group_source_account t5 on t3.id = t5.account_id
			
 
				-        LEFT JOIN wx_statistics_group_source t6 on t5.group_source_name = t6.account_source_name
			
 
				-    WHERE
			
 
				-        t1.plan_status = 1
			
 
				-        AND t3.channel = 5
			
 
				-        -- AND t3.follower_count > 0
			
 
				-        GROUP BY t3.id;
			
 
				-    """
			
 
				-    account_list = db_client.select(sql)
			
 
				-    result_list = [
			
 
				-        {
			
 
				-            "account_name": i[0],
			
 
				-            "gh_id": i[1]
			
 
				-        } for i in account_list
			
 
				-    ]
			
 
				-    return result_list
			
 
				-
			
 
				-
			
 
				-def get_account_articles_detail(db_client, gh_id_tuple) -> list[dict]:
			
 
				+def get_account_articles_detail(db_client, gh_id_tuple, min_publish_timestamp) -> list[dict]:
			
 
				     """
			
 
				     get articles details
			
 
				     :return:
			
@@ -116,47 +55,37 @@ def get_account_articles_detail(db_client, gh_id_tuple) -> list[dict]:
 
				             FROM 
			
 
				                 official_articles_v2
			
 
				             WHERE 
			
 
				-                ghId IN {gh_id_tuple} and Type = '{const.BULK_PUBLISH_TYPE}';
			
 
				+                ghId IN {gh_id_tuple} and Type = '{const.BULK_PUBLISH_TYPE}' and publish_timestamp >= {min_publish_timestamp};
			
 
				             """
			
 
				-    result = db_client.select(sql)
			
 
				-    response_list = [
			
 
				-        {
			
 
				-            "ghId": i[0],
			
 
				-            "accountName": i[1],
			
 
				-            "ItemIndex": i[2],
			
 
				-            "show_view_count": i[3],
			
 
				-            "publish_timestamp": i[4]
			
 
				-        }
			
 
				-        for i in result
			
 
				-    ]
			
 
				+    response_list = db_client.fetch(query=sql, cursor_type=DictCursor)
			
 
				     return response_list
			
 
				 
			
 
				 
			
 
				-def cal_account_read_rate(gh_id_tuple) -> DataFrame:
			
 
				+def cal_account_read_rate(article_list, fans_dict) -> DataFrame:
			
 
				     """
			
 
				     计算账号位置的阅读率
			
 
				     :return:
			
 
				     """
			
 
				-    pq_db = PQMySQL()
			
 
				-    de_db = DeNetMysql()
			
 
				     response = []
			
 
				-    fans_dict_each_day = get_account_fans_by_dt(db_client=de_db)
			
 
				-    account_article_detail = get_account_articles_detail(
			
 
				-        db_client=pq_db,
			
 
				-        gh_id_tuple=gh_id_tuple
			
 
				-    )
			
 
				-    for line in account_article_detail:
			
 
				+    for line in article_list:
			
 
				         gh_id = line['ghId']
			
 
				         dt = functions.timestamp_to_str(timestamp=line['publish_timestamp'], string_format='%Y-%m-%d')
			
 
				-        fans = fans_dict_each_day.get(gh_id, {}).get(dt, 0)
			
 
				+        fans = fans_dict.get(gh_id, {}).get(dt, const.DEFAULT_FANS)
			
 
				+        if not fans:
			
 
				+            fans = int(unauthorized_account.get(gh_id, const.DEFAULT_FANS))
			
 
				         if not fans:
			
 
				-            fans = int(unauthorized_account.get(gh_id, 0))
			
 
				+            fans = int(backup_account_fans.get(gh_id, const.DEFAULT_FANS))
			
 
				+            log(
			
 
				+                task='cal_read_rate_avg_task',
			
 
				+                function='cal_account_read_rate',
			
 
				+                message='未获取到粉丝，使用备份粉丝表',
			
 
				+                data=line
			
 
				+            )
			
 
				         line['fans'] = fans
			
 
				-        if fans > 1000:
			
 
				+        if fans > const.MIN_FANS:
			
 
				             line['readRate'] = line['show_view_count'] / fans if fans else 0
			
 
				             response.append(line)
			
 
				-    return DataFrame(response,
			
 
				-                     columns=['ghId', 'accountName', 'ItemIndex', 'show_view_count', 'publish_timestamp', 'readRate'])
			
 
				+    return DataFrame(response, columns=['ghId', 'accountName', 'ItemIndex', 'show_view_count', 'publish_timestamp', 'readRate'])
			
 
				 
			
 
				 
			
 
				 def cal_avg_account_read_rate(df, gh_id, index, dt) -> dict:
			
@@ -168,7 +97,7 @@ def cal_avg_account_read_rate(df, gh_id, index, dt) -> dict:
 
				     min_time = max_time - const.STATISTICS_PERIOD
			
 
				 
			
 
				     # 通过
			
 
				-    filterDataFrame = df[
			
 
				+    filter_dataframe = df[
			
 
				         (df["ghId"] == gh_id)
			
 
				         & (min_time <= df["publish_timestamp"])
			
 
				         & (df["publish_timestamp"] <= max_time)
			
@@ -176,13 +105,13 @@ def cal_avg_account_read_rate(df, gh_id, index, dt) -> dict:
 
				         ]
			
 
				 
			
 
				     # 用二倍标准差过滤
			
 
				-    finalDF = filter_outlier_data(filterDataFrame)
			
 
				+    final_dataframe = filter_outlier_data(filter_dataframe)
			
 
				 
			
 
				     return {
			
 
				-        "read_rate_avg": finalDF['readRate'].mean(),
			
 
				-        "max_publish_time": finalDF['publish_timestamp'].max(),
			
 
				-        "min_publish_time": finalDF['publish_timestamp'].min(),
			
 
				-        "records": len(finalDF)
			
 
				+        "read_rate_avg": final_dataframe['readRate'].mean(),
			
 
				+        "max_publish_time": final_dataframe['publish_timestamp'].max(),
			
 
				+        "min_publish_time": final_dataframe['publish_timestamp'].min(),
			
 
				+        "records": len(final_dataframe)
			
 
				     }
			
 
				 
			
 
				 
			
@@ -204,7 +133,7 @@ def check_each_position(db_client, gh_id, index, dt, avg_rate) -> dict:
 
				         WHERE gh_id = '{gh_id}' and position = {index} and dt_version < {dt}
			
 
				         ORDER BY dt_version DESC limit 1;
			
 
				     """
			
 
				-    result = db_client.select(select_sql)
			
 
				+    result = db_client.fetch(select_sql)
			
 
				     if result:
			
 
				         account_name = result[0][0]
			
 
				         previous_read_rate_avg = result[0][1]
			
@@ -246,6 +175,9 @@ def update_single_day(dt, account_list, article_df, lam):
 
				         string_format='%Y-%m-%d'
			
 
				     )
			
 
				 
			
 
				+    # processed_account_set
			
 
				+    processed_account_set = set()
			
 
				+
			
 
				     for account in tqdm(account_list, desc=dt):
			
 
				         for index in const.ARTICLE_INDEX_LIST:
			
 
				             read_rate_detail = cal_avg_account_read_rate(
			
@@ -259,7 +191,9 @@ def update_single_day(dt, account_list, article_df, lam):
 
				             min_publish_time = read_rate_detail['min_publish_time']
			
 
				             articles_count = read_rate_detail['records']
			
 
				             if articles_count:
			
 
				-                if index in {1, 2}:
			
 
				+                processed_account_set.add(account['gh_id'])
			
 
				+                # check read rate in position 1 and 2
			
 
				+                if index in [1, 2]:
			
 
				                     error_obj = check_each_position(
			
 
				                         db_client=lam,
			
 
				                         gh_id=account['gh_id'],
			
@@ -269,6 +203,7 @@ def update_single_day(dt, account_list, article_df, lam):
 
				                     )
			
 
				                     if error_obj:
			
 
				                         error_list.append(error_obj)
			
 
				+                # insert into database
			
 
				                 try:
			
 
				                     if not read_rate_avg:
			
 
				                         continue
			
@@ -278,8 +213,8 @@ def update_single_day(dt, account_list, article_df, lam):
 
				                         values
			
 
				                         (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
			
 
				                     """
			
 
				-                    lam.update(
			
 
				-                        sql=insert_sql,
			
 
				+                    lam.save(
			
 
				+                        query=insert_sql,
			
 
				                         params=(
			
 
				                             account['account_name'],
			
 
				                             account['gh_id'],
			
@@ -294,14 +229,17 @@ def update_single_day(dt, account_list, article_df, lam):
 
				                         )
			
 
				                     )
			
 
				                 except Exception as e:
			
 
				+                    print(e)
			
 
				                     insert_error_list.append(str(e))
			
 
				 
			
 
				+    # bot sql error
			
 
				     if insert_error_list:
			
 
				         bot(
			
 
				             title="更新阅读率均值，存在sql 插入失败",
			
 
				             detail=insert_error_list
			
 
				         )
			
 
				 
			
 
				+    # bot outliers
			
 
				     if error_list:
			
 
				         columns = [
			
 
				             create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="account_name", display_name="账号名称"),
			
@@ -314,7 +252,7 @@ def update_single_day(dt, account_list, article_df, lam):
 
				                                         display_name="相对变化率")
			
 
				         ]
			
 
				         bot(
			
 
				-            title="更新阅读率均值，头次出现异常值通知",
			
 
				+            title="阅读率均值表异常信息， 总共处理{}个账号".format(len(processed_account_set)),
			
 
				             detail={
			
 
				                 "columns": columns,
			
 
				                 "rows": error_list
			
@@ -323,12 +261,14 @@ def update_single_day(dt, account_list, article_df, lam):
 
				             mention=False
			
 
				         )
			
 
				 
			
 
				+    # if no error, send success info
			
 
				     if not error_list and not insert_error_list:
			
 
				         bot(
			
 
				-            title="阅读率均值表，更新成功",
			
 
				+            title="阅读率均值表更新成功， 总共处理{}个账号".format(len(processed_account_set)),
			
 
				             detail={
			
 
				                 "日期": dt
			
 
				-            }
			
 
				+            },
			
 
				+            mention=False
			
 
				         )
			
 
				 
			
 
				 
			
@@ -347,12 +287,36 @@ def main() -> None:
 
				     else:
			
 
				         dt = datetime.today().strftime('%Y-%m-%d')
			
 
				 
			
 
				-    lam = longArticlesMySQL()
			
 
				-    de = DeNetMysql()
			
 
				-    account_list = get_publishing_accounts(db_client=de)
			
 
				-    df = cal_account_read_rate(tuple([i['gh_id'] for i in account_list]))
			
 
				+    # init stat period
			
 
				+    max_time = functions.str_to_timestamp(date_string=dt)
			
 
				+    min_time = max_time - const.STATISTICS_PERIOD
			
 
				+    min_stat_date = functions.timestamp_to_str(timestamp=min_time, string_format='%Y-%m-%d')
			
 
				+
			
 
				+    # init database connector
			
 
				+    long_articles_db_client = DatabaseConnector(db_config=long_articles_config)
			
 
				+    long_articles_db_client.connect()
			
 
				+
			
 
				+    piaoquan_crawler_db_client = DatabaseConnector(db_config=piaoquan_crawler_config)
			
 
				+    piaoquan_crawler_db_client.connect()
			
 
				+
			
 
				+    denet_db_client = DatabaseConnector(db_config=denet_config)
			
 
				+    denet_db_client.connect()
			
 
				+
			
 
				+    # get account list
			
 
				+    account_list = fetch_publishing_account_list(db_client=denet_db_client)
			
 
				+
			
 
				+    # get fans dict
			
 
				+    fans_dict = fetch_account_fans(db_client=denet_db_client, start_date=min_stat_date)
			
 
				+
			
 
				+    # get data frame from official_articles_v2
			
 
				+    gh_id_tuple = tuple([i['gh_id'] for i in account_list])
			
 
				+    article_list = get_account_articles_detail(db_client=piaoquan_crawler_db_client, gh_id_tuple=gh_id_tuple, min_publish_timestamp=min_time)
			
 
				+
			
 
				+    # cal account read rate and make a dataframe
			
 
				+    read_rate_dataframe = cal_account_read_rate(article_list, fans_dict)
			
 
				 
			
 
				-    update_single_day(dt, account_list, df, lam)
			
 
				+    # update each day's data
			
 
				+    update_single_day(dt, account_list, read_rate_dataframe, long_articles_db_client)
			
 
				 
			
 
				 
			
 
				 if __name__ == '__main__':
			
--- a/coldStartTasks/publish/publish_single_video_pool_videos.py
+++ b/coldStartTasks/publish/publish_single_video_pool_videos.py
@@ -0,0 +1,117 @@
 
				+import json
			
 
				+import datetime
			
 
				+import traceback
			
 
				+
			
 
				+from pymysql.cursors import DictCursor
			
 
				+from tqdm import tqdm
			
 
				+
			
 
				+from applications import bot, aiditApi
			
 
				+from applications.const import SingleVideoPoolPublishTaskConst
			
 
				+from applications.db import DatabaseConnector
			
 
				+from config import long_articles_config, apolloConfig
			
 
				+
			
 
				+config = apolloConfig()
			
 
				+const = SingleVideoPoolPublishTaskConst()
			
 
				+
			
 
				+video_pool_config = json.loads(config.getConfigValue(key="video_pool_config"))
			
 
				+
			
 
				+
			
 
				+class PublishSingleVideoPoolVideos:
			
 
				+    def __init__(self):
			
 
				+        self.db_client = DatabaseConnector(db_config=long_articles_config)
			
 
				+        self.db_client.connect()
			
 
				+
			
 
				+    def get_task_list(self, platform:str) -> list[dict]:
			
 
				+        daily_limit = video_pool_config[platform]['process_num_each_day']
			
 
				+        fetch_query = f"""
			
 
				+            select id, content_trace_id, pq_vid
			
 
				+            from single_video_transform_queue
			
 
				+            where status = {const.TRANSFORM_INIT_STATUS} and platform = '{platform}'
			
 
				+            order by score desc
			
 
				+            limit {daily_limit};
			
 
				+        """
			
 
				+        fetch_response = self.db_client.fetch(query=fetch_query, cursor_type=DictCursor)
			
 
				+        return fetch_response
			
 
				+
			
 
				+    def update_tasks_status(self,
			
 
				+                           task_id_tuple: tuple,
			
 
				+                           ori_status: int,
			
 
				+                           new_status: int)-> int:
			
 
				+        update_query = f"""
			
 
				+            update single_video_transform_queue
			
 
				+            set status = %s
			
 
				+            where id in %s and status = %s;
			
 
				+        """
			
 
				+        affected_rows = self.db_client.save(
			
 
				+            query=update_query,
			
 
				+            params=(new_status, task_id_tuple, ori_status)
			
 
				+        )
			
 
				+        return affected_rows
			
 
				+
			
 
				+    def deal(self):
			
 
				+        """
			
 
				+        entrance of this class
			
 
				+        """
			
 
				+        platform_list = ["sph", "gzh", "toutiao", "hksp"]
			
 
				+        for platform in tqdm(platform_list, desc='process each platform'):
			
 
				+            task_list = self.get_task_list(platform)
			
 
				+            task_id_tuple = tuple([task['id'] for task in task_list])
			
 
				+            vid_list = [task['pq_vid'] for task in task_list]
			
 
				+            if vid_list:
			
 
				+                try:
			
 
				+                    # create video crawler plan
			
 
				+                    plan_name = f"{video_pool_config[platform]['nick_name']}-{datetime.datetime.today().strftime('%Y-%m-%d')}-视频数量： {len(vid_list)}"
			
 
				+                    crawler_plan_response = aiditApi.auto_create_single_video_crawler_task(
			
 
				+                        plan_name=plan_name,
			
 
				+                        plan_tag="单视频供给冷启动",
			
 
				+                        video_id_list=vid_list,
			
 
				+                    )
			
 
				+                    crawler_plan_id = crawler_plan_response["data"]["id"]
			
 
				+                    crawler_plan_name = crawler_plan_response["data"]["name"]
			
 
				+
			
 
				+                    # bind crawler plan to generate plan
			
 
				+                    crawler_task_list = [
			
 
				+                        {
			
 
				+                            "contentType": 1,
			
 
				+                            "inputSourceModal": 4,
			
 
				+                            "inputSourceChannel": 10,
			
 
				+                            "inputSourceType": 2,
			
 
				+                            "inputSourceValue": crawler_plan_id,
			
 
				+                            "inputSourceSubType": None,
			
 
				+                            "fieldName": None,
			
 
				+                            "inputSourceLabel": "原始帖子-视频-票圈小程序-内容添加计划-{}".format(crawler_plan_name),
			
 
				+                        }
			
 
				+                    ]
			
 
				+                    generate_plan_id = video_pool_config[platform]['generate_plan_id']
			
 
				+                    aiditApi.bind_crawler_task_to_generate_task(
			
 
				+                        crawler_task_list=crawler_task_list,
			
 
				+                        generate_task_id=generate_plan_id,
			
 
				+                    )
			
 
				+
			
 
				+                    # update status
			
 
				+                    self.update_tasks_status(
			
 
				+                        task_id_tuple=task_id_tuple,
			
 
				+                        ori_status=const.TRANSFORM_INIT_STATUS,
			
 
				+                        new_status=const.TRANSFORM_SUCCESS_STATUS
			
 
				+                    )
			
 
				+                except Exception as e:
			
 
				+                    bot(
			
 
				+                        title='视频内容池发布任务',
			
 
				+                        detail={
			
 
				+                            'platform': platform,
			
 
				+                            'date': datetime.datetime.today().strftime('%Y-%m-%d'),
			
 
				+                            'msg': '发布视频内容池失败，原因：{}'.format(str(e)),
			
 
				+                            'detail': traceback.format_exc(),
			
 
				+                        },
			
 
				+                        mention=False
			
 
				+                    )
			
 
				+            else:
			
 
				+                bot(
			
 
				+                    title='视频内容池发布任务',
			
 
				+                    detail={
			
 
				+                        'platform': platform,
			
 
				+                        'date': datetime.datetime.today().strftime('%Y-%m-%d'),
			
 
				+                        'msg': '该平台无待发布视频，请关注供给的抓取'
			
 
				+                    },
			
 
				+                    mention=False
			
 
				+                )
			
--- a/coldStartTasks/publish/publish_video_to_pq_for_audit.py
+++ b/coldStartTasks/publish/publish_video_to_pq_for_audit.py
@@ -11,13 +11,15 @@ from pymysql.cursors import DictCursor
 
				 
			
 
				 from applications import log
			
 
				 from applications import PQAPI
			
 
				-from applications.const import WeixinVideoCrawlerConst
			
 
				+from applications.api import AigcSystemApi
			
 
				 from applications.api import fetch_moon_shot_response
			
 
				+from applications.const import WeixinVideoCrawlerConst
			
 
				 from applications.db import DatabaseConnector
			
 
				 from config import long_articles_config
			
 
				 
			
 
				 const = WeixinVideoCrawlerConst()
			
 
				 pq_functions = PQAPI()
			
 
				+aigc = AigcSystemApi()
			
 
				 
			
 
				 
			
 
				 class PublishVideosForAudit(object):
			
@@ -36,12 +38,14 @@ class PublishVideosForAudit(object):
 
				         """
			
 
				         already_published_count = self.get_published_articles_today()
			
 
				         rest_count = const.MAX_VIDEO_NUM - already_published_count
			
 
				+
			
 
				+        limit_count = min(rest_count, const.MAX_VIDEO_NUM_PER_PUBLISH)
			
 
				         sql = f"""
			
 
				             SELECT id, article_title, video_oss_path 
			
 
				             FROM publish_single_video_source 
			
 
				             WHERE audit_status = {const.VIDEO_AUDIT_INIT_STATUS} and bad_status = {const.TITLE_DEFAULT_STATUS}
			
 
				             ORDER BY score DESC
			
 
				-            LIMIT {rest_count};
			
 
				+            LIMIT {limit_count};
			
 
				             """
			
 
				         response = self.db_client.fetch(sql, cursor_type=DictCursor)
			
 
				         return response
			
@@ -140,7 +144,11 @@ class PublishVideosForAudit(object):
 
				         获取需要检查的视频列表
			
 
				         :return:
			
 
				         """
			
 
				-        sql = f"""SELECT audit_video_id FROM publish_single_video_source WHERE audit_status = {const.VIDEO_AUDIT_PROCESSING_STATUS};"""
			
 
				+        sql = f"""
			
 
				+            select content_trace_id, audit_video_id, score, platform 
			
 
				+            from publish_single_video_source
			
 
				+            where audit_status = {const.VIDEO_AUDIT_PROCESSING_STATUS};
			
 
				+        """
			
 
				         response = self.db_client.fetch(sql, cursor_type=DictCursor)
			
 
				         return response
			
 
				 
			
@@ -195,12 +203,30 @@ class PublishVideosForAudit(object):
 
				             )
			
 
				             return False
			
 
				 
			
 
				-    def check_video_status(self, video_id: int) -> Dict:
			
 
				+    def insert_into_task_queue(self, video) -> int:
			
 
				+        """
			
 
				+        enqueue
			
 
				+        """
			
 
				+        insert_query = f"""
			
 
				+            insert into single_video_transform_queue
			
 
				+            (content_trace_id, pq_vid, score, platform)
			
 
				+            values (%s, %s, %s, %s);
			
 
				+        """
			
 
				+        affected_rows = self.db_client.save(
			
 
				+            query=insert_query,
			
 
				+            params=(
			
 
				+                video['content_trace_id'], video['audit_video_id'], video['score'], video['platform']
			
 
				+            )
			
 
				+        )
			
 
				+        return affected_rows
			
 
				+
			
 
				+    def check_video_status(self, video_obj: dict) -> Dict:
			
 
				         """
			
 
				         检查视频的状态，若视频审核通过or不通过，修改记录状态
			
 
				-        :param video_id:
			
 
				+        :param video_obj:
			
 
				         :return:
			
 
				         """
			
 
				+        video_id = video_obj['audit_video_id']
			
 
				         response = pq_functions.getPQVideoListDetail([video_id])
			
 
				         audit_status = response.get("data")[0].get("auditStatus")
			
 
				         # 请求成功
			
@@ -214,6 +240,19 @@ class PublishVideosForAudit(object):
 
				                     ori_audit_status=const.VIDEO_AUDIT_PROCESSING_STATUS,
			
 
				                     new_audit_status=const.VIDEO_AUDIT_SUCCESS_STATUS
			
 
				                 )
			
 
				+                # 将视频存储到任务队列
			
 
				+                self.insert_into_task_queue(video_obj)
			
 
				+
			
 
				+                # 将视频存储到 aigc 表
			
 
				+                aigc.insert_crawler_relation_to_aigc_system(
			
 
				+                    relation_list=[
			
 
				+                        {
			
 
				+                            "videoPoolTraceId": video_obj['content_trace_id'],
			
 
				+                            "channelContentId": str(video_id),
			
 
				+                            "platform": video_obj['platform'],
			
 
				+                        }
			
 
				+                    ]
			
 
				+                )
			
 
				             else:
			
 
				                 # 修改小程序标题失败，修改审核状态为4
			
 
				                 affected_rows = self.update_audit_status(
			
@@ -293,7 +332,7 @@ class PublishVideosForAudit(object):
 
				         for video_obj in tqdm(video_list, desc="视频检查"):
			
 
				             video_id = video_obj.get("audit_video_id")
			
 
				             try:
			
 
				-                response = self.check_video_status(video_id)
			
 
				+                response = self.check_video_status(video_obj)
			
 
				                 if response.get("affected_rows"):
			
 
				                     continue
			
 
				                 else:
			
--- a/sh/run_gzh_video_crawler.sh
+++ b/sh/run_gzh_video_crawler.sh
@@ -0,0 +1,26 @@
 
				+#!/bin/bash
			
 
				+
			
 
				+# 获取当前日期，格式为 YYYY-MM-DD
			
 
				+CURRENT_DATE=$(date +%F)
			
 
				+
			
 
				+# 日志文件路径，包含日期
			
 
				+LOG_FILE="/root/luojunhui/logs/gzh_video_crawler_log_$CURRENT_DATE.txt"
			
 
				+
			
 
				+# 重定向整个脚本的输出到带日期的日志文件
			
 
				+exec >> "$LOG_FILE" 2>&1
			
 
				+if pgrep -f "python3 run_video_account_crawler.py" > /dev/null
			
 
				+then
			
 
				+    echo "$(date '+%Y-%m-%d %H:%M:%S') - run_video_account_crawler.py is running"
			
 
				+else
			
 
				+    echo "$(date '+%Y-%m-%d %H:%M:%S') - trying to restart run_video_account_crawler.py"
			
 
				+    # 切换到指定目录
			
 
				+    cd /root/luojunhui/LongArticlesJob
			
 
				+
			
 
				+    # 激活 Conda 环境
			
 
				+    source /root/miniconda3/etc/profile.d/conda.sh
			
 
				+    conda activate tasks
			
 
				+
			
 
				+    # 在后台运行 Python 脚本并重定向日志输出
			
 
				+    nohup python3 run_video_account_crawler.py >> "${LOG_FILE}" 2>&1 &
			
 
				+    echo "$(date '+%Y-%m-%d %H:%M:%S') - successfully restarted run_video_account_crawler.py"
			
 
				+fi
			
--- a/sh/run_video_publish_and_audit.sh
+++ b/sh/run_video_publish_and_audit.sh
@@ -21,8 +21,6 @@ else
 
				     conda activate tasks
			
 
				 
			
 
				     # 在后台运行 Python 脚本并重定向日志输出
			
 
				-    nohup python3 run_video_account_crawler.py >> "${LOG_FILE}" 2>&1 &
			
 
				-    sleep 180
			
 
				     nohup python3 run_video_publish_and_audit.py >> "${LOG_FILE}" 2>&1 &
			
 
				     echo "$(date '+%Y-%m-%d %H:%M:%S') - successfully restarted run_video_publish_and_audit.py"
			
 
				 fi
			
--- a/tasks/update_published_articles_minigram_detail.py
+++ b/tasks/update_published_articles_minigram_detail.py
@@ -1,6 +1,7 @@
 
				 """
			
 
				 @author: luojunhui
			
 
				 """
			
 
				+import json
			
 
				 import traceback
			
 
				 
			
 
				 from datetime import datetime, timedelta
			
@@ -49,24 +50,6 @@ def extract_path(path: str) -> Dict:
 
				         return EMPTY_DICT
			
 
				 
			
 
				 
			
 
				-def get_article_mini_program_info(content_url: str) -> List[Dict]:
			
 
				-    """
			
 
				-    获取文章的小程序信息
			
 
				-    :return:
			
 
				-    """
			
 
				-    try:
			
 
				-        article_detail = spider.get_article_text(content_url)
			
 
				-    except Exception as e:
			
 
				-        raise SpiderError(error=e, spider="detail", url=content_url)
			
 
				-
			
 
				-    response_code = article_detail['code']
			
 
				-    if response_code == const.ARTICLE_SUCCESS_CODE:
			
 
				-        mini_info = article_detail['data']['data']['mini_program']
			
 
				-        return mini_info
			
 
				-    else:
			
 
				-        return EMPTY_LIST
			
 
				-
			
 
				-
			
 
				 class UpdatePublishedArticlesMinigramDetail(object):
			
 
				     """
			
 
				     更新已发布文章数据
			
@@ -136,7 +119,7 @@ class UpdatePublishedArticlesMinigramDetail(object):
 
				         :return:
			
 
				         """
			
 
				         sql = f"""
			
 
				-             SELECT ContentUrl, wx_sn, publish_timestamp, accountName, title
			
 
				+             SELECT ContentUrl, wx_sn, publish_timestamp, accountName, title, root_source_id_list
			
 
				              FROM official_articles_v2
			
 
				              WHERE FROM_UNIXTIME(publish_timestamp)
			
 
				              BETWEEN DATE_SUB('{biz_date}', INTERVAL 1 DAY) AND DATE_SUB('{biz_date}', INTERVAL 1 SECOND);
			
@@ -181,8 +164,9 @@ class UpdatePublishedArticlesMinigramDetail(object):
 
				         url = article_info['ContentUrl']
			
 
				         publish_timestamp = article_info['publish_timestamp']
			
 
				         wx_sn = article_info['wx_sn'].decode()
			
 
				+        root_source_id_list = json.loads(article_info['root_source_id_list'] if article_info['root_source_id_list'] else EMPTY_LIST)
			
 
				 
			
 
				-        article_mini_program_detail = get_article_mini_program_info(url)
			
 
				+        article_mini_program_detail = self.get_article_mini_program_info(url, root_source_id_list)
			
 
				         if article_mini_program_detail:
			
 
				             log(
			
 
				                 task=TASK_NAME,
			
@@ -208,9 +192,13 @@ class UpdatePublishedArticlesMinigramDetail(object):
 
				                         image_url = mini_item['image_url']
			
 
				                         nick_name = mini_item['nike_name']
			
 
				                         # extract video id and root_source_id
			
 
				-                        id_info = extract_path(mini_item['path'])
			
 
				-                        root_source_id = id_info['root_source_id']
			
 
				-                        video_id = id_info['video_id']
			
 
				+                        if mini_item.get("root_source_id") and mini_item.get("video_id"):
			
 
				+                            root_source_id = mini_item['root_source_id']
			
 
				+                            video_id = mini_item['video_id']
			
 
				+                        else:
			
 
				+                            id_info = extract_path(mini_item['path'])
			
 
				+                            root_source_id = id_info['root_source_id']
			
 
				+                            video_id = id_info['video_id']
			
 
				                         kimi_title = mini_item['title']
			
 
				                         self.insert_each_root_source_id(
			
 
				                             wx_sn=wx_sn,
			
@@ -235,7 +223,53 @@ class UpdatePublishedArticlesMinigramDetail(object):
 
				                 return article_info
			
 
				 
			
 
				         else:
			
 
				-            return article_info
			
 
				+            return EMPTY_DICT
			
 
				+
			
 
				+    def get_article_mini_program_info(self, content_url: str, root_source_id_list: list) -> List[Dict]:
			
 
				+        """
			
 
				+        获取文章的小程序信息
			
 
				+        :return:
			
 
				+        """
			
 
				+        if root_source_id_list:
			
 
				+            # 说明已经获取到 root_source_id了
			
 
				+            fetch_sql = f"""
			
 
				+                select video_id, root_source_id from long_articles_root_source_id where root_source_id in %s;
			
 
				+            """
			
 
				+            fetch_response = self.long_articles_db_client.fetch(
			
 
				+                query=fetch_sql,
			
 
				+                params=(tuple(root_source_id_list),),
			
 
				+                cursor_type=DictCursor
			
 
				+            )
			
 
				+            mini_info = []
			
 
				+            if fetch_response:
			
 
				+                # 构造 mini_info 的格式
			
 
				+                for item in fetch_response:
			
 
				+                    mini_info.append(
			
 
				+                        {
			
 
				+                            "app_id": "wx89e7eb06478361d7",
			
 
				+                            "avatar": "https://rescdn.yishihui.com/0temp/logo.png",
			
 
				+                            "image_url": "",
			
 
				+                            "nike_name": "票圈 l 3亿人喜欢的视频平台",
			
 
				+                            "root_source_id": item['root_source_id'],
			
 
				+                            "video_id": item['video_id'],
			
 
				+                            "service_type": "0",
			
 
				+                            "title": "",
			
 
				+                            "type": "card"
			
 
				+                        }
			
 
				+                    )
			
 
				+                return mini_info
			
 
				+
			
 
				+        try:
			
 
				+            article_detail = spider.get_article_text(content_url)
			
 
				+        except Exception as e:
			
 
				+            raise SpiderError(error=e, spider="detail", url=content_url)
			
 
				+
			
 
				+        response_code = article_detail['code']
			
 
				+        if response_code == const.ARTICLE_SUCCESS_CODE:
			
 
				+            mini_info = article_detail['data']['data']['mini_program']
			
 
				+            return mini_info
			
 
				+        else:
			
 
				+            return EMPTY_LIST
			
 
				 
			
 
				     def get_root_source_id_for_three_days(self, biz_date: str) -> List[Dict]:
			
 
				         """
			
@@ -375,5 +409,3 @@ class UpdatePublishedArticlesMinigramDetail(object):
 
				             )
			
 
				 
			
 
				 
			
 
				-
			
 
				-
			
--- a/updateAccountV3.py
+++ b/updateAccountV3.py
@@ -7,151 +7,104 @@ import time
 
				 from tqdm import tqdm
			
 
				 from datetime import datetime, timedelta
			
 
				 from argparse import ArgumentParser
			
 
				+from pymysql.cursors import DictCursor
			
 
				 
			
 
				-from applications import PQMySQL, DeNetMysql, longArticlesMySQL
			
 
				-from applications.const import updateAccountReadAvgTaskConst
			
 
				+from applications.const import UpdateAccountReadAvgTaskConst
			
 
				+from applications.db import DatabaseConnector
			
 
				+from applications.utils import fetch_account_fans
			
 
				+from applications.utils import fetch_publishing_account_list
			
 
				 from config import apolloConfig
			
 
				+from config import long_articles_config, denet_config, piaoquan_crawler_config
			
 
				 
			
 
				+read_rate_table = "long_articles_read_rate"
			
 
				+read_avg_table = "account_avg_info_v3"
			
 
				 config = apolloConfig()
			
 
				+const = UpdateAccountReadAvgTaskConst()
			
 
				 unauthorized_account = json.loads(config.getConfigValue("unauthorized_gh_id_fans"))
			
 
				 touliu_accounts = set(json.loads(config.getConfigValue("touliu_gh_id_list")))
			
 
				-
			
 
				-
			
 
				-def get_account_fans_by_dt(db_client) -> dict:
			
 
				-    """
			
 
				-    获取每个账号发粉丝，通过日期来区分
			
 
				-    :return:
			
 
				-    """
			
 
				-    sql = f"""
			
 
				-        SELECT 
			
 
				-            t1.date_str, 
			
 
				-            t1.fans_count, 
			
 
				-            t2.gh_id
			
 
				-        FROM datastat_wx t1
			
 
				-        JOIN publish_account t2 ON t1.account_id = t2.id
			
 
				-        WHERE 
			
 
				-            t2.channel = 5 
			
 
				-        AND t2.status = 1 
			
 
				-        AND t1.date_str >= '2024-09-01' 
			
 
				-        ORDER BY t1.date_str;
			
 
				-    """
			
 
				-    result = db_client.select(sql)
			
 
				-    D = {}
			
 
				-    for line in result:
			
 
				-        dt = line[0]
			
 
				-        fans = line[1]
			
 
				-        gh_id = line[2]
			
 
				-        if D.get(gh_id):
			
 
				-            D[gh_id][dt] = fans
			
 
				-        else:
			
 
				-            D[gh_id] = {dt: fans}
			
 
				-    return D
			
 
				-
			
 
				+backup_account_fans = json.loads(config.getConfigValue("backup_account_fans"))
			
 
				 
			
 
				 class UpdateAccountInfoVersion3(object):
			
 
				     """
			
 
				-    更新账号信息 v3
			
 
				+    更新账号的平均阅读率
			
 
				     """
			
 
				 
			
 
				     def __init__(self):
			
 
				-        self.const = updateAccountReadAvgTaskConst()
			
 
				-        self.pq = PQMySQL()
			
 
				-        self.de = DeNetMysql()
			
 
				-        self.lam = longArticlesMySQL()
			
 
				+        # init piaoquan crawler db client
			
 
				+        self.piaoquan_crawler_db_client = DatabaseConnector(piaoquan_crawler_config)
			
 
				+        self.piaoquan_crawler_db_client.connect()
			
 
				+
			
 
				+        # init long articles db client
			
 
				+        self.long_articles_db_client = DatabaseConnector(long_articles_config)
			
 
				+        self.long_articles_db_client.connect()
			
 
				+
			
 
				+        #  init aigc db client
			
 
				+        self.denet_db_client = DatabaseConnector(denet_config)
			
 
				+        self.denet_db_client.connect()
			
 
				 
			
 
				-    def get_account_position_read_rate(self, dt):
			
 
				+    def fetch_read_rate_avg_for_each_account(self, dt):
			
 
				         """
			
 
				         从长文数据库获取账号阅读均值
			
 
				         :return:
			
 
				         """
			
 
				         dt = int(dt.replace("-", ""))
			
 
				         sql = f"""
			
 
				-            SELECT 
			
 
				-                gh_id, position, read_rate_avg
			
 
				-            FROM
			
 
				-                long_articles_read_rate
			
 
				-            WHERE dt_version = {dt};
			
 
				+            select gh_id, position, read_rate_avg
			
 
				+            from {read_rate_table}
			
 
				+            where dt_version = {dt};
			
 
				         """
			
 
				-
			
 
				-        result = self.lam.select(sql)
			
 
				+        fetch_response_list = self.long_articles_db_client.fetch(query=sql, cursor_type=DictCursor)
			
 
				         account_read_rate_dict = {}
			
 
				-        for item in result:
			
 
				-            gh_id = item[0]
			
 
				-            position = item[1]
			
 
				-            rate = item[2]
			
 
				-            key = "{}_{}".format(gh_id, position)
			
 
				-            account_read_rate_dict[key] = rate
			
 
				+        for item in fetch_response_list:
			
 
				+            key = "{}_{}".format(item['gh_id'], item['position'])
			
 
				+            account_read_rate_dict[key] = item['read_rate_avg']
			
 
				         return account_read_rate_dict
			
 
				 
			
 
				-    def get_publishing_accounts(self):
			
 
				-        """
			
 
				-        获取每日正在发布的账号
			
 
				-        :return:
			
 
				-        """
			
 
				-        sql = f"""
			
 
				-        SELECT DISTINCT
			
 
				-            t3.`name`,
			
 
				-            t3.gh_id,
			
 
				-            t3.follower_count,
			
 
				-            t6.account_source_name,
			
 
				-            t6.mode_type,
			
 
				-            t6.account_type,
			
 
				-            t6.`status`
			
 
				-        FROM
			
 
				-            publish_plan t1
			
 
				-            JOIN publish_plan_account t2 ON t1.id = t2.plan_id
			
 
				-            JOIN publish_account t3 ON t2.account_id = t3.id
			
 
				-            LEFT JOIN publish_account_wx_type t4 on t3.id = t4.account_id
			
 
				-            LEFT JOIN wx_statistics_group_source_account t5 on t3.id = t5.account_id
			
 
				-            LEFT JOIN wx_statistics_group_source t6 on t5.group_source_name = t6.account_source_name
			
 
				-        WHERE
			
 
				-            t1.plan_status = 1
			
 
				-            AND t3.channel = 5
			
 
				-            GROUP BY t3.id;
			
 
				-        """
			
 
				-        account_list = self.de.select(sql)
			
 
				-        result_list = [
			
 
				-            {
			
 
				-                "account_name": i[0],
			
 
				-                "gh_id": i[1],
			
 
				-                "fans": i[2],
			
 
				-                "account_source_name": i[3],
			
 
				-                "mode_type": i[4],
			
 
				-                "account_type": i[5],
			
 
				-                "status": i[6]
			
 
				-            } for i in account_list
			
 
				-        ]
			
 
				-        return result_list
			
 
				-
			
 
				     def do_task_list(self, dt):
			
 
				         """
			
 
				         do it
			
 
				         """
			
 
				-        fans_dict = get_account_fans_by_dt(db_client=self.de)
			
 
				-        account_list = self.get_publishing_accounts()
			
 
				-        rate_dict = self.get_account_position_read_rate(dt)
			
 
				+        # get fans dict from aigc
			
 
				+        fans_dict = fetch_account_fans(self.denet_db_client, dt)
			
 
				+
			
 
				+        # get publishing account list from aigc
			
 
				+        account_list = fetch_publishing_account_list(self.denet_db_client)
			
 
				+
			
 
				+        # fetch each account's read avg for each position
			
 
				+        read_rate_avg_dict = self.fetch_read_rate_avg_for_each_account(dt)
			
 
				+
			
 
				         for account in tqdm(account_list, desc=dt):
			
 
				             gh_id = account["gh_id"]
			
 
				-            business_type = self.const.TOULIU if gh_id in touliu_accounts else self.const.ARTICLES_DAILY
			
 
				-            fans = fans_dict.get(gh_id, {}).get(dt, 0)
			
 
				+            business_type = const.TOULIU if gh_id in touliu_accounts else const.ARTICLES_DAILY
			
 
				+            fans = fans_dict.get(gh_id, {}).get(dt, const.DEFAULT_FANS)
			
 
				+
			
 
				+            # use unauthorized account's fans if not found in aigc
			
 
				+            if not fans:
			
 
				+                fans = int(unauthorized_account.get(gh_id, const.DEFAULT_FANS))
			
 
				+
			
 
				+            # use backup account's fans if not found in aigc
			
 
				             if not fans:
			
 
				-                fans = int(unauthorized_account.get(gh_id, 0))
			
 
				+                fans = int(backup_account_fans.get(gh_id, const.DEFAULT_FANS))
			
 
				+
			
 
				             if fans:
			
 
				-                for index in range(1, 9):
			
 
				+                for index in const.ARTICLE_INDEX_LIST:
			
 
				                     gh_id_position = "{}_{}".format(gh_id, index)
			
 
				-                    if rate_dict.get(gh_id_position):
			
 
				-                        rate = rate_dict[gh_id_position]
			
 
				-                        read_avg = fans * rate
			
 
				-                        print(rate, read_avg)
			
 
				+                    if read_rate_avg_dict.get(gh_id_position):
			
 
				+                        # fetch read rate avg
			
 
				+                        read_rate_avg = read_rate_avg_dict[gh_id_position]
			
 
				+                        # cal read avg
			
 
				+                        read_avg = fans * read_rate_avg
			
 
				+
			
 
				+                        # insert into database
			
 
				                         insert_sql = f"""
			
 
				-                        INSERT INTO account_avg_info_v3
			
 
				-                        (gh_id, position, update_time, account_name, fans, read_avg, like_avg, status, account_type, account_mode, account_source, account_status, business_type, read_rate_avg)
			
 
				-                        values
			
 
				-                        (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
			
 
				+                            insert into {read_avg_table}
			
 
				+                            (gh_id, position, update_time, account_name, fans, read_avg, like_avg, status, account_type, account_mode, account_source, account_status, business_type, read_rate_avg)
			
 
				+                            values
			
 
				+                            (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
			
 
				                         """
			
 
				                         try:
			
 
				-                            self.pq.update(
			
 
				-                                sql=insert_sql,
			
 
				+                            self.piaoquan_crawler_db_client.save(
			
 
				+                                query=insert_sql,
			
 
				                                 params=(
			
 
				                                     gh_id,
			
 
				                                     index,
			
@@ -159,29 +112,29 @@ class UpdateAccountInfoVersion3(object):
 
				                                     account['account_name'],
			
 
				                                     fans,
			
 
				                                     read_avg,
			
 
				-                                    0,
			
 
				-                                    1,
			
 
				+                                    const.DEFAULT_LIKE,
			
 
				+                                    const.USING_STATUS,
			
 
				                                     account['account_type'],
			
 
				                                     account['mode_type'],
			
 
				-                                    account['account_source_name'],
			
 
				+                                    account['account_source'],
			
 
				                                     account['status'],
			
 
				                                     business_type,
			
 
				-                                    rate
			
 
				+                                    read_rate_avg
			
 
				                                 )
			
 
				                             )
			
 
				                         except Exception as e:
			
 
				-                            updateSQL = f"""
			
 
				-                            UPDATE account_avg_info_v3
			
 
				-                            set fans = %s, read_avg = %s, read_rate_avg = %s
			
 
				-                            where gh_id = %s and position = %s and update_time = %s
			
 
				+                            update_sql = f"""
			
 
				+                                update {read_avg_table}
			
 
				+                                set fans = %s, read_avg = %s, read_rate_avg = %s
			
 
				+                                where gh_id = %s and position = %s and update_time = %s
			
 
				                             """
			
 
				                             try:
			
 
				-                                affected_rows = self.pq.update(
			
 
				-                                    sql=updateSQL,
			
 
				+                                self.piaoquan_crawler_db_client.save(
			
 
				+                                    query=update_sql,
			
 
				                                     params=(
			
 
				                                         fans,
			
 
				                                         read_avg,
			
 
				-                                        rate,
			
 
				+                                        read_rate_avg,
			
 
				                                         account['gh_id'],
			
 
				                                         index,
			
 
				                                         dt
			
@@ -192,17 +145,16 @@ class UpdateAccountInfoVersion3(object):
 
				 
			
 
				                         # 修改前一天的状态为 0
			
 
				                         update_status_sql = f"""
			
 
				-                        UPDATE account_avg_info_v3
			
 
				-                        SET status = %s
			
 
				-                        where update_time != %s and gh_id = %s and position = %s;
			
 
				+                            update {read_avg_table}
			
 
				+                            set status = %s
			
 
				+                            where update_time != %s and gh_id = %s and position = %s;
			
 
				                         """
			
 
				-                        rows_affected = self.pq.update(
			
 
				-                            sql=update_status_sql,
			
 
				+                        self.piaoquan_crawler_db_client.save(
			
 
				+                            query=update_status_sql,
			
 
				                             params=(
			
 
				-                                0, dt, account['gh_id'], index
			
 
				+                                const.NOT_USING_STATUS, dt, account['gh_id'], index
			
 
				                             )
			
 
				                         )
			
 
				-                        print("修改成功")
			
 
				 
			
 
				 
			
 
				 def main():
			
@@ -215,15 +167,15 @@ def main():
 
				                         help="Run only once for date in format of %Y-%m-%d. \
			
 
				                                 If no specified, run as daily jobs.")
			
 
				     args = parser.parse_args()
			
 
				-    Up = UpdateAccountInfoVersion3()
			
 
				+    update_account_read_avg_task = UpdateAccountInfoVersion3()
			
 
				     if args.run_date:
			
 
				-        Up.do_task_list(dt=args.run_date)
			
 
				+        update_account_read_avg_task.do_task_list(dt=args.run_date)
			
 
				     else:
			
 
				         dt_object = datetime.fromtimestamp(int(time.time()))
			
 
				         one_day = timedelta(days=1)
			
 
				         yesterday = dt_object - one_day
			
 
				         yesterday_str = yesterday.strftime('%Y-%m-%d')
			
 
				-        Up.do_task_list(dt=yesterday_str)
			
 
				+        update_account_read_avg_task.do_task_list(dt=yesterday_str)
			
 
				 
			
 
				 
			
 
				 if __name__ == '__main__':