|
@@ -14,7 +14,8 @@ def insert_into_single_video_source_table(db_client, video_item):
|
|
|
INSERT INTO publish_single_video_source
|
|
|
(content_trace_id, article_title, out_account_id, out_account_name, read_cnt, like_cnt, article_url, cover_url,
|
|
|
video_oss_path, publish_timestamp, crawler_timestamp, url_unique_md5, category, tags, platform, source_account,
|
|
|
- category_status, audit_status, audit_video_id, mini_program_title)
|
|
|
+ category_status, audit_status, audit_video_id, mini_program_title
|
|
|
+ )
|
|
|
values
|
|
|
(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
|
|
|
"""
|
|
@@ -56,3 +57,52 @@ def insert_into_single_video_source_table(db_client, video_item):
|
|
|
"oss_path": video_item["video_oss_path"],
|
|
|
},
|
|
|
)
|
|
|
+
|
|
|
+
|
|
|
+def insert_into_article_meta_table(db_client, article_item):
|
|
|
+ """
|
|
|
+ insert article meta table
|
|
|
+ """
|
|
|
+ insert_query = f"""
|
|
|
+ insert into crawler_meta_article
|
|
|
+ (
|
|
|
+ platform, mode, category, out_account_id, article_index, title, link, read_cnt, like_cnt,
|
|
|
+ description, publish_time, crawler_time, status, unique_index, llm_sensitivity, title_sensitivity
|
|
|
+ )
|
|
|
+ VALUES
|
|
|
+ (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ db_client.save(
|
|
|
+ query=insert_query,
|
|
|
+ params=(
|
|
|
+ article_item.platform,
|
|
|
+ article_item.mode,
|
|
|
+ article_item.category,
|
|
|
+ article_item.out_account_id,
|
|
|
+ article_item.article_index,
|
|
|
+ article_item.title,
|
|
|
+ article_item.link,
|
|
|
+ article_item.read_cnt,
|
|
|
+ article_item.like_cnt,
|
|
|
+ article_item.description,
|
|
|
+ article_item.publish_time,
|
|
|
+ article_item.crawler_time,
|
|
|
+ article_item.status,
|
|
|
+ article_item.unique_index,
|
|
|
+ article_item.llm_sensitivity,
|
|
|
+ article_item.title_sensitivity
|
|
|
+ ),
|
|
|
+ )
|
|
|
+ except Exception as e:
|
|
|
+ log(
|
|
|
+ task="{}_article_crawler".format(article_item.platform),
|
|
|
+ function="save_each_article",
|
|
|
+ message="save article failed",
|
|
|
+ data={
|
|
|
+ "error": str(e),
|
|
|
+ "traceback": traceback.format_exc(),
|
|
|
+ "link": article_item.link
|
|
|
+ }
|
|
|
+ )
|
|
|
+
|