# -*- coding: utf-8 -*- # @Author: wangkun # @Time: 2023/4/17 import json import os import sys import time from datetime import date, timedelta from hashlib import md5 sys.path.append(os.getcwd()) from common.common import Common from common.feishu import Feishu from common.scheduling_db import MysqlHelper class Insert: @classmethod def get_config(cls, log_type, crawler, text, env): select_sql = f"""select * from crawler_config where source="benshanzhufu" """ contents = MysqlHelper.get_values(log_type, crawler, select_sql, env, action='') title_list = [] filter_list = [] for content in contents: config = content['config'] config_dict = eval(config) for k, v in config_dict.items(): if k == "title": title_list_config = v.split(",") for title in title_list_config: title_list.append(title) if k == "filter": filter_list_config = v.split(",") for filter_word in filter_list_config: filter_list.append(filter_word) if text == "title": return title_list elif text == "filter": return filter_list @classmethod def before_day(cls): publish_time_str_rule = (date.today() + timedelta(days=-30)).strftime("%Y-%m-%d %H:%M:%S") publish_time_stamp_rule = int(time.mktime(time.strptime(publish_time_str_rule, "%Y-%m-%d %H:%M:%S"))) print(publish_time_str_rule) print(publish_time_stamp_rule) @classmethod def insert_config(cls, log_type, crawler, env): filter_sheet = Feishu.get_values_batch(log_type, crawler, "DjXfqG") # title_sheet = Feishu.get_values_batch(log_type, crawler, "bHSW1p") filter_list = [] # title_list = [] for x in filter_sheet: for y in x: if y is None: pass else: filter_list.append(y) # for x in title_sheet: # for y in x: # if y is None: # pass # else: # title_list.append(y) # str_title = ','.join(title_list) str_filter = ','.join(filter_list) config_dict = { # "title": str_title, "filter": str_filter } str_config_dict = str(config_dict) # print(f"config_dict:{config_dict}") # print(f"str_config_dict:{str_config_dict}") insert_sql = f""" insert into crawler_config(title, source, config) values("本山祝福小程序", "benshanzhufu", "{str_config_dict}") """ MysqlHelper.update_values(log_type, crawler, insert_sql, env) @classmethod def insert_video_from_feishu_to_mysql(cls, log_type, crawler, env): jixiangxingfu_sheetid = ['d9e9b1'] for sheetid in jixiangxingfu_sheetid: xiaoniangao_sheet = Feishu.get_values_batch(log_type, crawler, sheetid) for i in range(1, len(xiaoniangao_sheet)): # for i in range(1, 5): if xiaoniangao_sheet[i][5] is None or xiaoniangao_sheet[i][7] is None: continue video_id = xiaoniangao_sheet[i][12].replace("https://admin.piaoquantv.com/cms/post-detail/", "").replace( "/info", "") if video_id == "None": continue video_id = int(video_id) out_user_id = "jixiangxingfu" platform = "吉祥幸福" strategy = "推荐榜爬虫策略" video_title = str(xiaoniangao_sheet[i][7]) play_cnt = int(xiaoniangao_sheet[i][9].split("万")[0])*10000 duration = str(xiaoniangao_sheet[i][10]) width = int(xiaoniangao_sheet[i][11].split("*")[0]) height = int(xiaoniangao_sheet[i][11].split("*")[1]) cover_url = str(xiaoniangao_sheet[i][13]) video_url = str(xiaoniangao_sheet[i][14]) crawler_rule = json.dumps({}) out_video_id = md5(video_title.encode('utf8')).hexdigest() # print(f"video_id:{video_id}, type:{type(video_id)}") # print(f"out_user_id:{out_user_id}, type:{type(out_user_id)}") # print(f"platform:{platform}, type:{type(platform)}") # print(f"strategy:{strategy}, type:{type(strategy)}") # print(f"video_title:{video_title}, type:{type(video_title)}") # print(f"cover_url:{cover_url}, type:{type(cover_url)}") # print(f"video_url:{video_url}, type:{type(video_url)}") # print(f"crawler_rule:{crawler_rule}, type:{type(crawler_rule)}") select_sql = f""" select * from crawler_video where platform="{platform}" and video_url="{video_url}" """ Common.logger(log_type, crawler).info(f"select_sql:{select_sql}") repeat_video = MysqlHelper.get_values(log_type, crawler, select_sql, env) Common.logger(log_type, crawler).info(f"repeat_video:{repeat_video}") if repeat_video is not None and len(repeat_video) != 0: Common.logger(log_type, crawler).info(f"{video_title} 已存在数据库中\n") else: # 视频信息保存数据库 insert_sql = f""" insert into crawler_video(video_id, out_user_id, platform, strategy, out_video_id, video_title, cover_url, video_url, duration, play_cnt, crawler_rule, width, height) values({video_id}, "{out_user_id}", "{platform}", "{strategy}", "{out_video_id}", "{video_title}", "{cover_url}", "{video_url}", {duration}, {play_cnt}, '{crawler_rule}', {width}, {height}) """ Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}") MysqlHelper.update_values(log_type, crawler, insert_sql, env, action='') Common.logger(log_type, crawler).info('视频信息插入数据库成功!\n') if __name__ == "__main__": # Insert.insert_config("insert", "benshanzhufu", "dev") # print(Insert.get_config("insert", "ganggangdouchuan", "filter", "dev")) # Insert.insert_video_from_feishu_to_mysql("insert-dev", "ganggangdouchuan", "dev") # Insert.insert_video_from_feishu_to_mysql("insert-dev", "jixiangxingfu", "dev") Insert.insert_video_from_feishu_to_mysql("insert-prod", "jixiangxingfu", "prod") pass