123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108 |
- """
- @author: luojunhui
- """
- import traceback
- from applications.aliyunLogApi import log
- def insert_into_single_video_source_table(db_client, video_item):
- """
- insert video into single video source table
- """
- insert_sql = f"""
- INSERT INTO publish_single_video_source
- (content_trace_id, article_title, out_account_id, out_account_name, read_cnt, like_cnt, article_url, cover_url,
- video_oss_path, publish_timestamp, crawler_timestamp, url_unique_md5, category, tags, platform, source_account,
- category_status, audit_status, audit_video_id, mini_program_title
- )
- values
- (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
- """
- try:
- db_client.save(
- query=insert_sql,
- params=(
- video_item["content_trace_id"],
- video_item["article_title"],
- video_item["out_account_id"],
- video_item["out_account_name"],
- video_item["read_cnt"],
- video_item["like_cnt"],
- video_item["article_url"],
- video_item["cover_url"],
- video_item["video_oss_path"],
- video_item["publish_timestamp"],
- video_item["crawler_timestamp"],
- video_item["url_unique_md5"],
- video_item["category"],
- video_item["tags"],
- video_item["platform"],
- video_item["source_account"],
- video_item["category_status"],
- video_item["audit_status"],
- video_item["audit_video_id"],
- video_item["mini_program_title"],
- ),
- )
- except Exception as e:
- log(
- task="{}_video_crawler".format(video_item["platform"]),
- function="save_each_video",
- message="save video failed",
- data={
- "error": str(e),
- "traceback": traceback.format_exc(),
- "video_id": video_item["url_unique_md5"],
- "oss_path": video_item["video_oss_path"],
- },
- )
- def insert_into_article_meta_table(db_client, article_item):
- """
- insert article meta table
- """
- insert_query = f"""
- insert into crawler_meta_article
- (
- platform, mode, category, out_account_id, article_index, title, link, read_cnt, like_cnt,
- description, publish_time, crawler_time, status, unique_index, llm_sensitivity, title_sensitivity
- )
- VALUES
- (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
- """
- try:
- db_client.save(
- query=insert_query,
- params=(
- article_item.platform,
- article_item.mode,
- article_item.category,
- article_item.out_account_id,
- article_item.article_index,
- article_item.title,
- article_item.link,
- article_item.read_cnt,
- article_item.like_cnt,
- article_item.description,
- article_item.publish_time,
- article_item.crawler_time,
- article_item.status,
- article_item.unique_index,
- article_item.llm_sensitivity,
- article_item.title_sensitivity
- ),
- )
- except Exception as e:
- log(
- task="{}_article_crawler".format(article_item.platform),
- function="save_each_article",
- message="save article failed",
- data={
- "error": str(e),
- "traceback": traceback.format_exc(),
- "link": article_item.link
- }
- )
|