|
@@ -13,14 +13,20 @@ def upsert_good_authors(
|
|
|
) -> int:
|
|
) -> int:
|
|
|
"""
|
|
"""
|
|
|
将 good_account_expansion 中的 accounts 写入 demand_find_author 表。
|
|
将 good_account_expansion 中的 accounts 写入 demand_find_author 表。
|
|
|
|
|
+ 兼容两种格式:
|
|
|
|
|
+ - 标准格式:{"enabled": true, "accounts": [...]}
|
|
|
|
|
+ - 降级格式:直接传 list(agent 未严格遵守 schema 时的兜底)
|
|
|
"""
|
|
"""
|
|
|
if not good_account_block:
|
|
if not good_account_block:
|
|
|
return 0
|
|
return 0
|
|
|
|
|
|
|
|
- if not good_account_block.get("enabled"):
|
|
|
|
|
- return 0
|
|
|
|
|
|
|
+ if isinstance(good_account_block, list):
|
|
|
|
|
+ accounts: List[Dict[str, Any]] = good_account_block
|
|
|
|
|
+ else:
|
|
|
|
|
+ if not good_account_block.get("enabled"):
|
|
|
|
|
+ return 0
|
|
|
|
|
+ accounts = good_account_block.get("accounts") or []
|
|
|
|
|
|
|
|
- accounts: List[Dict[str, Any]] = good_account_block.get("accounts") or []
|
|
|
|
|
if not accounts:
|
|
if not accounts:
|
|
|
return 0
|
|
return 0
|
|
|
|
|
|
|
@@ -35,12 +41,17 @@ def upsert_good_authors(
|
|
|
with conn.cursor() as cur:
|
|
with conn.cursor() as cur:
|
|
|
rows = 0
|
|
rows = 0
|
|
|
for acc in accounts:
|
|
for acc in accounts:
|
|
|
- author_name = acc.get("author_nickname") or ""
|
|
|
|
|
|
|
+ # 与 output_schema 一致:author_nickname / author_sec_uid / author_url
|
|
|
|
|
+ # 兼容 Agent 常用别名:account_name、sec_uid(见 good_account_expansion 数组简写)
|
|
|
|
|
+ author_name = (
|
|
|
|
|
+ acc.get("author_nickname")
|
|
|
|
|
+ or acc.get("account_name")
|
|
|
|
|
+ or ""
|
|
|
|
|
+ )
|
|
|
author_link = acc.get("author_url") or ""
|
|
author_link = acc.get("author_url") or ""
|
|
|
- if not author_name or not author_link:
|
|
|
|
|
- sec_uid = acc.get("author_sec_uid")
|
|
|
|
|
- if sec_uid and not author_link:
|
|
|
|
|
- author_link = f"https://www.douyin.com/user/{sec_uid}"
|
|
|
|
|
|
|
+ sec_uid = acc.get("author_sec_uid") or acc.get("sec_uid")
|
|
|
|
|
+ if not author_link and sec_uid:
|
|
|
|
|
+ author_link = f"https://www.douyin.com/user/{sec_uid}"
|
|
|
if not author_name or not author_link:
|
|
if not author_name or not author_link:
|
|
|
continue
|
|
continue
|
|
|
|
|
|
|
@@ -77,12 +88,12 @@ def insert_contents(
|
|
|
|
|
|
|
|
sql = """
|
|
sql = """
|
|
|
INSERT INTO demand_find_content_result (
|
|
INSERT INTO demand_find_content_result (
|
|
|
- trace_id, query, rank_no, aweme_id, video_url, title, author_name, author_link,
|
|
|
|
|
|
|
+ trace_id, query, rank_no, aweme_id, video_url, title, author_name, author_id, author_link,
|
|
|
digg_count, comment_count, share_count,
|
|
digg_count, comment_count, share_count,
|
|
|
portrait_source, elderly_ratio, elderly_tgi, recommendation_reason,
|
|
portrait_source, elderly_ratio, elderly_tgi, recommendation_reason,
|
|
|
demand_content_id
|
|
demand_content_id
|
|
|
) VALUES (
|
|
) VALUES (
|
|
|
- %s, %s, %s, %s, %s, %s, %s, %s,
|
|
|
|
|
|
|
+ %s, %s, %s, %s, %s, %s, %s, %s, %s,
|
|
|
%s, %s, %s,
|
|
%s, %s, %s,
|
|
|
%s, %s, %s, %s,
|
|
%s, %s, %s, %s,
|
|
|
%s
|
|
%s
|
|
@@ -92,6 +103,12 @@ def insert_contents(
|
|
|
rows = 0
|
|
rows = 0
|
|
|
for item in contents:
|
|
for item in contents:
|
|
|
video_url = item.get("video_url") or ""
|
|
video_url = item.get("video_url") or ""
|
|
|
|
|
+ stats = item.get("statistics") or {}
|
|
|
|
|
+ portrait = item.get("portrait_data") or {}
|
|
|
|
|
+ # age_distribution 是 agent 有时输出的非标准结构,兜底提取 50+ 占比
|
|
|
|
|
+ age_dist = portrait.get("age_distribution") or {}
|
|
|
|
|
+ age_50_plus_ratio = portrait.get("age_50_plus_ratio") or age_dist.get("50+") or ""
|
|
|
|
|
+ age_50_plus_tgi = portrait.get("age_50_plus_tgi") or ""
|
|
|
cur.execute(
|
|
cur.execute(
|
|
|
sql,
|
|
sql,
|
|
|
(
|
|
(
|
|
@@ -102,13 +119,15 @@ def insert_contents(
|
|
|
video_url,
|
|
video_url,
|
|
|
item.get("title") or "",
|
|
item.get("title") or "",
|
|
|
item.get("author_nickname") or "",
|
|
item.get("author_nickname") or "",
|
|
|
|
|
+ item.get("author_sec_uid") or "",
|
|
|
item.get("author_url") or "",
|
|
item.get("author_url") or "",
|
|
|
- int(item.get("statistics", {}).get("digg_count") or 0),
|
|
|
|
|
- int(item.get("statistics", {}).get("comment_count") or 0),
|
|
|
|
|
- int(item.get("statistics", {}).get("share_count") or 0),
|
|
|
|
|
- item.get("portrait_data").get("source") or "",
|
|
|
|
|
- str(item.get("portrait_data").get("age_50_plus_ratio") or ""),
|
|
|
|
|
- str(item.get("portrait_data").get("age_50_plus_tgi") or ""),
|
|
|
|
|
|
|
+ # like_count 是 agent 有时输出的非标准字段名,兜底处理
|
|
|
|
|
+ int(stats.get("digg_count") or stats.get("like_count") or 0),
|
|
|
|
|
+ int(stats.get("comment_count") or 0),
|
|
|
|
|
+ int(stats.get("share_count") or 0),
|
|
|
|
|
+ portrait.get("source") or "",
|
|
|
|
|
+ str(age_50_plus_ratio) if age_50_plus_ratio != "" else "",
|
|
|
|
|
+ str(age_50_plus_tgi) if age_50_plus_tgi != "" else "",
|
|
|
item.get("reason") or "",
|
|
item.get("reason") or "",
|
|
|
demand_content_id,
|
|
demand_content_id,
|
|
|
),
|
|
),
|