123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135 |
- """
- @author: luojunhui
- """
- import traceback
- from applications.aliyunLogApi import log
- def insert_into_single_video_source_table(db_client, video_item):
- """
- insert video into single video source table
- """
- insert_sql = f"""
- INSERT INTO publish_single_video_source
- (content_trace_id, article_title, out_account_id, out_account_name, read_cnt, like_cnt, article_url, cover_url, video_oss_path, publish_timestamp, crawler_timestamp, url_unique_md5, category, tags, platform, source_account)
- values
- (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
- """
- try:
- db_client.save(
- query=insert_sql,
- params=(
- video_item["content_trace_id"],
- video_item["article_title"],
- video_item["out_account_id"],
- video_item["out_account_name"],
- video_item["read_cnt"],
- video_item["like_cnt"],
- video_item["article_url"],
- video_item["cover_url"],
- video_item["video_oss_path"],
- video_item["publish_timestamp"],
- video_item["crawler_timestamp"],
- video_item["url_unique_md5"],
- video_item["category"],
- video_item["tags"],
- video_item["platform"],
- video_item["source_account"],
- ),
- )
- except Exception as e:
- log(
- task="{}_video_crawler".format(video_item["platform"]),
- function="save_each_video",
- message="save video failed",
- data={
- "error": str(e),
- "traceback": traceback.format_exc(),
- "video_id": video_item["url_unique_md5"],
- "oss_path": video_item["video_oss_path"],
- },
- )
- def insert_into_video_meta_accounts_table(db_client, account_item):
- """
- insert account into account meta table
- """
- insert_sql = f"""
- insert into video_meta_accounts
- (platform, account_id, account_name, max_cursor, account_init_date, status, priority)
- values
- (%s, %s, %s, %s, %s, %s, %s);
- """
- try:
- db_client.save(
- query=insert_sql,
- params=(
- account_item["platform"],
- account_item["account_id"],
- account_item["account_name"],
- account_item["max_cursor"],
- account_item["account_init_date"],
- account_item["status"],
- account_item["priority"],
- ),
- )
- except Exception as e:
- log(
- task="{}_account_crawler".format(account_item["platform"]),
- function="save_each_account",
- message="save account failed",
- data={
- "error": str(e),
- "traceback": traceback.format_exc(),
- "account_id": account_item["account_id"],
- },
- )
- def insert_into_candidate_account_pool_table(db_client, account_item):
- """
- insert recommendation into recommendation table
- """
- # check whether duplicate video
- fetch_query = f"""
- select id from crawler_candidate_account_pool
- where account_id = %s and platform = %s;
- """
- duplicate_id = db_client.fetch(
- query=fetch_query, params=(
- account_item["account_id"],
- account_item["platform"]
- )
- )
- if duplicate_id:
- return
- # insert into table
- insert_query = f"""
- insert into crawler_candidate_account_pool
- (account_name, account_id, title_list, platform, crawler_date)
- values
- (%s, %s, %s, %s, %s)
- """
- try:
- db_client.save(
- query=insert_query,
- params=(
- account_item["account_name"],
- account_item["account_id"],
- account_item["title_list"],
- account_item["platform"],
- account_item["crawler_date"]
- )
- )
- except Exception as e:
- log(
- task="{}_account_crawler".format(account_item["platform"]),
- function="save_each_account",
- message="save account failed",
- data={
- "error": str(e),
- "traceback": traceback.format_exc(),
- "item": account_item
- }
- )
|