123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131 |
- """
- @author: luojunhui
- """
- import json
- import time
- import pymysql
- from applications.functions.log import logging
- def select_download_videos(trace_id):
- """
- 查询
- :param trace_id:
- :return:
- """
- sql = "select video_id from crawler_video where out_user_id = '{}' and video_title = '{}';".format(trace_id,
- trace_id)
- connection = pymysql.connect(
- host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com", # 数据库IP地址,内网地址
- port=3306, # 端口号
- user="crawler", # mysql用户名
- passwd="crawler123456@", # mysql用户登录密码
- db="piaoquan-crawler", # 数据库名
- charset="utf8mb4" # 如果数据库里面的文本是utf8编码的,charset指定是utf8
- )
- cursor = connection.cursor()
- cursor.execute(sql)
- out_video_list = cursor.fetchall()
- if len(out_video_list) > 0:
- if out_video_list[0][0] == 0:
- video_id = search_id_to_video(trace_id)
- else:
- video_id = out_video_list[0][0]
- vid_list = [video_id]
- logging(
- code="2003",
- trace_id=trace_id,
- info="recall_search_list",
- function="find_videos_in_mysql",
- data=vid_list
- )
- return {
- "search_videos": "success",
- "trace_id": trace_id,
- "video_list": vid_list
- }
- else:
- return {
- "search_videos": "failed",
- "trace_id": trace_id,
- "video_list": []
- }
- def select_pq_videos():
- """
- 查询
- :return: info_list
- """
- connection = pymysql.connect(
- host="rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com", # 数据库IP地址,内网地址
- port=3306, # 端口号
- user="wx2016_longvideo", # mysql用户名
- passwd="wx2016_longvideoP@assword1234", # mysql用户登录密码
- db="incentive", # 数据库名
- charset="utf8mb4" # 如果数据库里面的文本是utf8编码的,charset指定是utf8
- )
- sql = "select video_id, key_words, search_keys, extra_keys from video_content"
- cursor = connection.cursor()
- cursor.execute(sql)
- data = cursor.fetchall()
- result = [
- {
- "video_id": line[0],
- "key_words": json.loads(line[1]),
- "search_keys": json.loads(line[2]),
- "extra_keys": json.loads(line[3]),
- }
- for line in data
- ]
- return result
- # 敏感词
- def select_sensitive_words():
- """
- sensitive words
- :return:
- """
- connection = pymysql.connect(
- host="rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com", # 数据库IP地址,内网地址
- port=3306, # 端口号
- user="wx2016_longvideo", # mysql用户名
- passwd="wx2016_longvideoP@assword1234", # mysql用户登录密码
- db="longvideo", # 数据库名
- charset="utf8mb4" # 如果数据库里面的文本是utf8编码的,charset指定是utf8
- )
- sql = "select `keyword` from wx_sensitive_word where `data_status` = 0"
- cursor = connection.cursor()
- cursor.execute(sql)
- data = cursor.fetchall()
- result = [line[0] for line in data]
- return result
- def search_id_to_video(trace_id):
- """
- 通过 search_id 返回 video_id
- :param trace_id:
- :return:
- """
- sql = "select video_id from crawler_video where out_user_id = '{}' and video_title = '{}';".format(trace_id,
- trace_id)
- connection = pymysql.connect(
- host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com", # 数据库IP地址,内网地址
- port=3306, # 端口号
- user="crawler", # mysql用户名
- passwd="crawler123456@", # mysql用户登录密码
- db="piaoquan-crawler", # 数据库名
- charset="utf8mb4" # 如果数据库里面的文本是utf8编码的,charset指定是utf8
- )
- cursor = connection.cursor()
- cursor.execute(sql)
- out_video_list = cursor.fetchall()
- if int(out_video_list[0][0]) == 0:
- time.sleep(1)
- return search_id_to_video(trace_id)
- else:
- return out_video_list[0][0]
|