|
@@ -89,15 +89,29 @@ def upsert_good_authors(
|
|
|
return rows
|
|
return rows
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
+def fetch_demand_content_dt(conn, demand_content_id: int) -> Optional[Any]:
|
|
|
|
|
+ """按 demand_content.id 查询 dt(与 schedule 约定一致,多为 YYYYMMDD 整数)。"""
|
|
|
|
|
+ sql = "SELECT dt FROM demand_content WHERE id = %s LIMIT 1"
|
|
|
|
|
+ with conn.cursor() as cur:
|
|
|
|
|
+ cur.execute(sql, (demand_content_id,))
|
|
|
|
|
+ row = cur.fetchone()
|
|
|
|
|
+ if not row:
|
|
|
|
|
+ return None
|
|
|
|
|
+ return row.get("dt")
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
def insert_contents(
|
|
def insert_contents(
|
|
|
conn,
|
|
conn,
|
|
|
trace_id: str,
|
|
trace_id: str,
|
|
|
query: str,
|
|
query: str,
|
|
|
demand_content_id: int,
|
|
demand_content_id: int,
|
|
|
contents: List[Dict[str, Any]],
|
|
contents: List[Dict[str, Any]],
|
|
|
|
|
+ dt: Optional[Any] = None,
|
|
|
) -> int:
|
|
) -> int:
|
|
|
"""
|
|
"""
|
|
|
将 contents 列表写入 demand_find_content_result 表。
|
|
将 contents 列表写入 demand_find_content_result 表。
|
|
|
|
|
+
|
|
|
|
|
+ dt 来自 demand_content.dt,与 demand_content_id 对应;未查到时可传 None。
|
|
|
"""
|
|
"""
|
|
|
if not contents:
|
|
if not contents:
|
|
|
return 0
|
|
return 0
|
|
@@ -107,12 +121,12 @@ def insert_contents(
|
|
|
trace_id, query, rank_no, aweme_id, video_url, title, author_name, author_id, author_link,
|
|
trace_id, query, rank_no, aweme_id, video_url, title, author_name, author_id, author_link,
|
|
|
digg_count, comment_count, share_count,
|
|
digg_count, comment_count, share_count,
|
|
|
portrait_source, elderly_ratio, elderly_tgi, recommendation_reason,
|
|
portrait_source, elderly_ratio, elderly_tgi, recommendation_reason,
|
|
|
- demand_content_id
|
|
|
|
|
|
|
+ demand_content_id, dt
|
|
|
) VALUES (
|
|
) VALUES (
|
|
|
%s, %s, %s, %s, %s, %s, %s, %s, %s,
|
|
%s, %s, %s, %s, %s, %s, %s, %s, %s,
|
|
|
%s, %s, %s,
|
|
%s, %s, %s,
|
|
|
%s, %s, %s, %s,
|
|
%s, %s, %s, %s,
|
|
|
- %s
|
|
|
|
|
|
|
+ %s, %s
|
|
|
)
|
|
)
|
|
|
"""
|
|
"""
|
|
|
with conn.cursor() as cur:
|
|
with conn.cursor() as cur:
|
|
@@ -146,6 +160,7 @@ def insert_contents(
|
|
|
str(age_50_plus_tgi) if age_50_plus_tgi != "" else "",
|
|
str(age_50_plus_tgi) if age_50_plus_tgi != "" else "",
|
|
|
item.get("reason") or "",
|
|
item.get("reason") or "",
|
|
|
demand_content_id,
|
|
demand_content_id,
|
|
|
|
|
+ dt,
|
|
|
),
|
|
),
|
|
|
)
|
|
)
|
|
|
rows += cur.rowcount
|
|
rows += cur.rowcount
|
|
@@ -228,30 +243,41 @@ def update_web_html_url(trace_id: str, web_html_url: str) -> int:
|
|
|
conn.close()
|
|
conn.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
-def update_process_trace_by_aweme_id(*, trace_id: str, aweme_id: str, process_trace_text: str) -> int:
|
|
|
|
|
|
|
+def update_process_trace_by_aweme_id(
|
|
|
|
|
+ *,
|
|
|
|
|
+ trace_id: str,
|
|
|
|
|
+ aweme_id: str,
|
|
|
|
|
+ process_trace_text: str,
|
|
|
|
|
+ channel: str = "抖音",
|
|
|
|
|
+) -> int:
|
|
|
"""
|
|
"""
|
|
|
- 根据 (trace_id, aweme_id) 回写 demand_find_content_result.process_trace(TEXT)。
|
|
|
|
|
|
|
+ 根据 (trace_id, aweme_id) 回写 demand_find_content_result.process_trace(TEXT)与 channel。
|
|
|
|
|
|
|
|
约定:
|
|
约定:
|
|
|
- trace_id 为 output 子目录名
|
|
- trace_id 为 output 子目录名
|
|
|
- aweme_id 为内容唯一 id(表中 demand_find_content_result.aweme_id)
|
|
- aweme_id 为内容唯一 id(表中 demand_find_content_result.aweme_id)
|
|
|
- process_trace_text 为 JSON 序列化后的字符串(或原始文本)
|
|
- process_trace_text 为 JSON 序列化后的字符串(或原始文本)
|
|
|
|
|
+ - channel 默认「抖音」;当前业务仅抖音搜索场景,后续可按行区分时再传入
|
|
|
"""
|
|
"""
|
|
|
t = (trace_id or "").strip()
|
|
t = (trace_id or "").strip()
|
|
|
a = (aweme_id or "").strip()
|
|
a = (aweme_id or "").strip()
|
|
|
text = (process_trace_text or "").strip()
|
|
text = (process_trace_text or "").strip()
|
|
|
|
|
+ ch = (channel or "").strip()
|
|
|
if not t or not a or not text:
|
|
if not t or not a or not text:
|
|
|
return 0
|
|
return 0
|
|
|
|
|
+ if not ch:
|
|
|
|
|
+ ch = "抖音"
|
|
|
|
|
|
|
|
sql = """
|
|
sql = """
|
|
|
UPDATE demand_find_content_result
|
|
UPDATE demand_find_content_result
|
|
|
- SET process_trace = %s
|
|
|
|
|
|
|
+ SET process_trace = %s,
|
|
|
|
|
+ channel = %s
|
|
|
WHERE trace_id = %s AND aweme_id = %s
|
|
WHERE trace_id = %s AND aweme_id = %s
|
|
|
"""
|
|
"""
|
|
|
conn = get_connection()
|
|
conn = get_connection()
|
|
|
try:
|
|
try:
|
|
|
with conn.cursor() as cur:
|
|
with conn.cursor() as cur:
|
|
|
- cur.execute(sql, (text, t, a))
|
|
|
|
|
|
|
+ cur.execute(sql, (text, ch, t, a))
|
|
|
return cur.rowcount
|
|
return cur.rowcount
|
|
|
finally:
|
|
finally:
|
|
|
conn.close()
|
|
conn.close()
|