|
@@ -7,7 +7,7 @@ from tqdm import tqdm
|
|
|
from datetime import datetime, timedelta
|
|
from datetime import datetime, timedelta
|
|
|
from urllib.parse import unquote, parse_qs, urlparse
|
|
from urllib.parse import unquote, parse_qs, urlparse
|
|
|
|
|
|
|
|
-from applications.utils import fetch_from_odps
|
|
|
|
|
|
|
+from applications.utils import fetch_from_odps, show_desc_to_sta
|
|
|
from applications.crawler.wechat import get_article_list_from_account
|
|
from applications.crawler.wechat import get_article_list_from_account
|
|
|
from applications.crawler.wechat import get_article_detail
|
|
from applications.crawler.wechat import get_article_detail
|
|
|
|
|
|
|
@@ -24,12 +24,12 @@ class CooperateAccountsMonitorTaskConst:
|
|
|
HAS_MINI_PROGRAM = 1
|
|
HAS_MINI_PROGRAM = 1
|
|
|
DONT_HAS_MINI_PROGRAM = 0
|
|
DONT_HAS_MINI_PROGRAM = 0
|
|
|
|
|
|
|
|
- ARTICLE_NUM = 200
|
|
|
|
|
|
|
+ ARTICLE_NUM = 100
|
|
|
|
|
|
|
|
|
|
|
|
|
class CooperateAccountsMonitorTaskUtils(CooperateAccountsMonitorTaskConst):
|
|
class CooperateAccountsMonitorTaskUtils(CooperateAccountsMonitorTaskConst):
|
|
|
@staticmethod
|
|
@staticmethod
|
|
|
- def get_monitor_account_list():
|
|
|
|
|
|
|
+ def get_uv_account_list():
|
|
|
# dt = (datetime.today() - timedelta(days=1)).strftime("%Y%m%d")
|
|
# dt = (datetime.today() - timedelta(days=1)).strftime("%Y%m%d")
|
|
|
week_ago = (datetime.today() - timedelta(days=7)).strftime("%Y-%m-%d %H:%M:%S")
|
|
week_ago = (datetime.today() - timedelta(days=7)).strftime("%Y-%m-%d %H:%M:%S")
|
|
|
query = f"""
|
|
query = f"""
|
|
@@ -272,6 +272,7 @@ class CooperateAccountsMonitorTask(CooperateAccountsMonitorMapper):
|
|
|
base_info = group_article["AppMsg"]["BaseInfo"]
|
|
base_info = group_article["AppMsg"]["BaseInfo"]
|
|
|
detail_info = group_article["AppMsg"]["DetailInfo"]
|
|
detail_info = group_article["AppMsg"]["DetailInfo"]
|
|
|
for single_article in detail_info:
|
|
for single_article in detail_info:
|
|
|
|
|
+ show_stat = show_desc_to_sta(single_article.get("ShowDesc", None))
|
|
|
single_param = (
|
|
single_param = (
|
|
|
gh_id,
|
|
gh_id,
|
|
|
account_name,
|
|
account_name,
|
|
@@ -284,13 +285,15 @@ class CooperateAccountsMonitorTask(CooperateAccountsMonitorMapper):
|
|
|
single_article["Digest"],
|
|
single_article["Digest"],
|
|
|
single_article["send_time"],
|
|
single_article["send_time"],
|
|
|
self.extract_wx_sn(single_article["ContentUrl"]),
|
|
self.extract_wx_sn(single_article["ContentUrl"]),
|
|
|
|
|
+ show_stat.get("show_view_count", 0),
|
|
|
|
|
+ show_stat.get("show_like_count", 0)
|
|
|
)
|
|
)
|
|
|
params.append(single_param)
|
|
params.append(single_param)
|
|
|
|
|
|
|
|
query = """
|
|
query = """
|
|
|
INSERT IGNORE INTO cooperate_accounts_daily_detail
|
|
INSERT IGNORE INTO cooperate_accounts_daily_detail
|
|
|
- (gh_id, account_name, app_msg_id, publish_type, position, article_title, article_link, article_cover, article_desc, publish_timestamp, wx_sn)
|
|
|
|
|
- VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
|
|
|
|
|
|
|
+ (gh_id, account_name, app_msg_id, publish_type, position, article_title, article_link, article_cover, article_desc, publish_timestamp, wx_sn, read_cnt, like_cnt)
|
|
|
|
|
+ VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
|
|
|
"""
|
|
"""
|
|
|
await self.pool.async_save(query=query, params=params, batch=True)
|
|
await self.pool.async_save(query=query, params=params, batch=True)
|
|
|
|
|
|
|
@@ -338,13 +341,13 @@ class CooperateAccountsMonitorTask(CooperateAccountsMonitorMapper):
|
|
|
pass
|
|
pass
|
|
|
|
|
|
|
|
# 获取待处理的文章
|
|
# 获取待处理的文章
|
|
|
- async def get_article_list(self):
|
|
|
|
|
|
|
+ async def get_article_list(self, account_name_tuple):
|
|
|
query = """
|
|
query = """
|
|
|
SELECT wx_sn, article_link FROM cooperate_accounts_daily_detail
|
|
SELECT wx_sn, article_link FROM cooperate_accounts_daily_detail
|
|
|
- WHERE fetch_status = %s ORDER BY position LIMIT %s;
|
|
|
|
|
|
|
+ WHERE fetch_status = %s AND account_name IN %s ORDER BY position LIMIT %s;
|
|
|
"""
|
|
"""
|
|
|
return await self.pool.async_fetch(
|
|
return await self.pool.async_fetch(
|
|
|
- query=query, params=(self.INIT_STATUS, self.ARTICLE_NUM)
|
|
|
|
|
|
|
+ query=query, params=(self.INIT_STATUS, account_name_tuple, self.ARTICLE_NUM)
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
# 入口函数
|
|
# 入口函数
|
|
@@ -364,10 +367,24 @@ class CooperateAccountsMonitorTask(CooperateAccountsMonitorMapper):
|
|
|
|
|
|
|
|
|
|
|
|
|
case "get_detail":
|
|
case "get_detail":
|
|
|
- article_list = await self.get_article_list()
|
|
|
|
|
- for article in tqdm(article_list, desc="处理文章详情"):
|
|
|
|
|
- try:
|
|
|
|
|
- await self.set_article_detail(article)
|
|
|
|
|
|
|
+ has_uv_accounts = self.get_uv_account_list()
|
|
|
|
|
+ has_uv_name_list = []
|
|
|
|
|
+ for i in has_uv_accounts:
|
|
|
|
|
+ account_name = i.公众号名
|
|
|
|
|
+ if account_name:
|
|
|
|
|
+ has_uv_name_list.append(account_name)
|
|
|
|
|
+
|
|
|
|
|
+ if has_uv_name_list:
|
|
|
|
|
+ account_name_tuple = tuple(has_uv_name_list)
|
|
|
|
|
+ article_list = await self.get_article_list(account_name_tuple)
|
|
|
|
|
+ for article in tqdm(article_list, desc="处理文章详情"):
|
|
|
|
|
+ try:
|
|
|
|
|
+ await self.set_article_detail(article)
|
|
|
|
|
+
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ print(f"获取文章详情失败-{article['article_link']}-{e}")
|
|
|
|
|
+
|
|
|
|
|
+ else:
|
|
|
|
|
+ print("没有需要处理详情的账号")
|
|
|
|
|
+ return
|
|
|
|
|
|
|
|
- except Exception as e:
|
|
|
|
|
- print(f"获取文章详情失败-{article['article_link']}-{e}")
|
|
|