|
@@ -1,50 +1,257 @@
|
|
|
|
+# encoding: utf-8
|
|
"""
|
|
"""
|
|
@author: luojunhui
|
|
@author: luojunhui
|
|
"""
|
|
"""
|
|
import json
|
|
import json
|
|
|
|
+import time
|
|
import uuid
|
|
import uuid
|
|
import requests
|
|
import requests
|
|
|
|
+import pymysql
|
|
import urllib.parse
|
|
import urllib.parse
|
|
|
|
|
|
-from applications.functions.auto_white import auto_white
|
|
|
|
|
|
+from applications.functions.log import logging
|
|
|
|
|
|
|
|
|
|
-def create_gzh_path(video_id, shared_uid):
|
|
|
|
|
|
+class Functions(object):
|
|
"""
|
|
"""
|
|
- :param video_id: 视频 id
|
|
|
|
- :param shared_uid: 分享 id
|
|
|
|
|
|
+ 通用工具代码
|
|
"""
|
|
"""
|
|
- root_share_id = str(uuid.uuid4())
|
|
|
|
- url = f"pages/user-videos?id={video_id}&su={shared_uid}&fromGzh=1&rootShareId={root_share_id}&shareId={root_share_id}"
|
|
|
|
- # 自动把 root_share_id 加入到白名单
|
|
|
|
- auto_white(root_share_id)
|
|
|
|
- return root_share_id, f"pages/category?jumpPage={urllib.parse.quote(url, safe='')}"
|
|
|
|
|
|
|
|
|
|
+ # 自动加入白名单逻辑
|
|
|
|
+ @classmethod
|
|
|
|
+ def auto_white(cls, root_share_id):
|
|
|
|
+ """
|
|
|
|
+ 自动加入白名单, 保证公众号百分百出广告
|
|
|
|
+ :param root_share_id:
|
|
|
|
+ :return:
|
|
|
|
+ """
|
|
|
|
|
|
-def request_for_info(video_id):
|
|
|
|
- """
|
|
|
|
- 请求数据
|
|
|
|
- :param video_id:
|
|
|
|
- :return:
|
|
|
|
- """
|
|
|
|
- url = "https://longvideoapi.piaoquantv.com/longvideoapi/openapi/video/batchSelectVideoInfo"
|
|
|
|
- data = {
|
|
|
|
- "videoIdList": [video_id]
|
|
|
|
- }
|
|
|
|
- header = {
|
|
|
|
- "Content-Type": "application/json",
|
|
|
|
- }
|
|
|
|
- response = requests.post(url, headers=header, data=json.dumps(data))
|
|
|
|
- return response.json()
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-def choose_video(result):
|
|
|
|
|
|
+ def get_cookie():
|
|
|
|
+ """
|
|
|
|
+ 获取 cookie
|
|
|
|
+ :return:
|
|
|
|
+ """
|
|
|
|
+ url = "https://admin.piaoquantv.com/manager/login?account=luojunhui&passWd=e10adc3949ba59abbe56e057f20f883e&muid=7"
|
|
|
|
+ payload = {}
|
|
|
|
+ headers = {
|
|
|
|
+ 'accept': 'application/json, text/plain, */*',
|
|
|
|
+ 'accept-language': 'en',
|
|
|
|
+ 'priority': 'u=1, i',
|
|
|
|
+ 'sec-ch-ua': '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"',
|
|
|
|
+ 'sec-ch-ua-mobile': '?0',
|
|
|
|
+ 'sec-ch-ua-platform': '"macOS"',
|
|
|
|
+ 'sec-fetch-dest': 'empty',
|
|
|
|
+ 'sec-fetch-mode': 'cors',
|
|
|
|
+ 'sec-fetch-site': 'same-origin',
|
|
|
|
+ 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
|
|
|
|
+ }
|
|
|
|
+ response = requests.request("GET", url, headers=headers, data=payload)
|
|
|
|
+ return response.cookies.values()[0]
|
|
|
|
+
|
|
|
|
+ url = "https://admin.piaoquantv.com/manager/ad/own/white/rootShare/save"
|
|
|
|
+ dd = {
|
|
|
|
+ "rootShareId": root_share_id,
|
|
|
|
+ "commit": "算法自动加入白名单--"
|
|
|
|
+ }
|
|
|
|
+ payload = json.dumps(dd)
|
|
|
|
+ cookie = get_cookie()
|
|
|
|
+ headers = {
|
|
|
|
+ 'accept': 'application/json',
|
|
|
|
+ 'accept-language': 'en',
|
|
|
|
+ 'content-type': 'application/json;',
|
|
|
|
+ 'cookie': "SESSION=" + cookie,
|
|
|
|
+ 'origin': 'https://admin.piaoquantv.com',
|
|
|
|
+ 'priority': 'u=1, i',
|
|
|
|
+ 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
|
|
|
|
+ }
|
|
|
|
+ response = requests.request("POST", url, headers=headers, data=payload)
|
|
|
|
+ return response.json()['content']
|
|
|
|
+
|
|
|
|
+ # 创建公众号分享卡片
|
|
|
|
+ @classmethod
|
|
|
|
+ def create_gzh_path(cls, video_id, shared_uid):
|
|
|
|
+ """
|
|
|
|
+ :param video_id: 视频 id
|
|
|
|
+ :param shared_uid: 分享 id
|
|
|
|
+ """
|
|
|
|
+ root_share_id = str(uuid.uuid4())
|
|
|
|
+ url = f"pages/user-videos?id={video_id}&su={shared_uid}&fromGzh=1&rootShareId={root_share_id}&shareId={root_share_id}"
|
|
|
|
+ # 自动把 root_share_id 加入到白名单
|
|
|
|
+ cls.auto_white(root_share_id)
|
|
|
|
+ return root_share_id, f"pages/category?jumpPage={urllib.parse.quote(url, safe='')}"
|
|
|
|
+
|
|
|
|
+ # 从票圈请求视频
|
|
|
|
+ @classmethod
|
|
|
|
+ def request_for_info(cls, video_id):
|
|
|
|
+ """
|
|
|
|
+ 请求数据
|
|
|
|
+ :param video_id:
|
|
|
|
+ :return:
|
|
|
|
+ """
|
|
|
|
+ url = "https://longvideoapi.piaoquantv.com/longvideoapi/openapi/video/batchSelectVideoInfo"
|
|
|
|
+ data = {
|
|
|
|
+ "videoIdList": [video_id]
|
|
|
|
+ }
|
|
|
|
+ header = {
|
|
|
|
+ "Content-Type": "application/json",
|
|
|
|
+ }
|
|
|
|
+ response = requests.post(url, headers=header, data=json.dumps(data))
|
|
|
|
+ return response.json()
|
|
|
|
+
|
|
|
|
+ # 清理标题
|
|
|
|
+ @classmethod
|
|
|
|
+ def clean_title(cls, strings):
|
|
|
|
+ """
|
|
|
|
+ :param strings:
|
|
|
|
+ :return:
|
|
|
|
+ """
|
|
|
|
+ return (
|
|
|
|
+ strings.strip()
|
|
|
|
+ .replace("\n", "")
|
|
|
|
+ .replace("/", "")
|
|
|
|
+ .replace("\r", "")
|
|
|
|
+ .replace("#", "")
|
|
|
|
+ .replace(".", "。")
|
|
|
|
+ .replace("\\", "")
|
|
|
|
+ .replace("&NBSP", "")
|
|
|
|
+ .replace(":", "")
|
|
|
|
+ .replace("*", "")
|
|
|
|
+ .replace("?", "")
|
|
|
|
+ .replace("?", "")
|
|
|
|
+ .replace('"', "")
|
|
|
|
+ .replace("<", "")
|
|
|
|
+ .replace(">", "")
|
|
|
|
+ .replace("|", "")
|
|
|
|
+ .replace(" ", "")
|
|
|
|
+ .replace('"', "")
|
|
|
|
+ .replace("'", "")
|
|
|
|
+ )
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+class MySQLServer(object):
|
|
"""
|
|
"""
|
|
- :param result: 计算出来的结果
|
|
|
|
- :return: uid, video_id
|
|
|
|
|
|
+ MySql 服务
|
|
"""
|
|
"""
|
|
- score1 = result['s1_score']
|
|
|
|
- if score1 > 0:
|
|
|
|
- return result['s1_uid'], result['s1_vid']
|
|
|
|
- else:
|
|
|
|
- return None, None
|
|
|
|
|
|
+
|
|
|
|
+ @classmethod
|
|
|
|
+ def select_download_videos(cls, trace_id):
|
|
|
|
+ """
|
|
|
|
+ 查询
|
|
|
|
+ :param trace_id:
|
|
|
|
+ :return:
|
|
|
|
+ """
|
|
|
|
+ sql = "select video_id from crawler_video where out_user_id = '{}' and video_title = '{}';".format(trace_id,
|
|
|
|
+ trace_id)
|
|
|
|
+ connection = pymysql.connect(
|
|
|
|
+ host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com", # 数据库IP地址,内网地址
|
|
|
|
+ port=3306, # 端口号
|
|
|
|
+ user="crawler", # mysql用户名
|
|
|
|
+ passwd="crawler123456@", # mysql用户登录密码
|
|
|
|
+ db="piaoquan-crawler", # 数据库名
|
|
|
|
+ charset="utf8mb4" # 如果数据库里面的文本是utf8编码的,charset指定是utf8
|
|
|
|
+ )
|
|
|
|
+ cursor = connection.cursor()
|
|
|
|
+ cursor.execute(sql)
|
|
|
|
+ out_video_list = cursor.fetchall()
|
|
|
|
+ if len(out_video_list) > 0:
|
|
|
|
+ if out_video_list[0][0] == 0:
|
|
|
|
+ video_id = cls.search_id_to_video(trace_id)
|
|
|
|
+ else:
|
|
|
|
+ video_id = out_video_list[0][0]
|
|
|
|
+
|
|
|
|
+ vid_list = [video_id]
|
|
|
|
+ logging(
|
|
|
|
+ code="2003",
|
|
|
|
+ trace_id=trace_id,
|
|
|
|
+ info="recall_search_list",
|
|
|
|
+ function="find_videos_in_mysql",
|
|
|
|
+ data=vid_list
|
|
|
|
+ )
|
|
|
|
+ return {
|
|
|
|
+ "search_videos": "success",
|
|
|
|
+ "trace_id": trace_id,
|
|
|
|
+ "video_list": vid_list
|
|
|
|
+ }
|
|
|
|
+ else:
|
|
|
|
+ return {
|
|
|
|
+ "search_videos": "failed",
|
|
|
|
+ "trace_id": trace_id,
|
|
|
|
+ "video_list": []
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ @classmethod
|
|
|
|
+ def select_pq_videos(cls):
|
|
|
|
+ """
|
|
|
|
+ 查询
|
|
|
|
+ :return: info_list
|
|
|
|
+ """
|
|
|
|
+ connection = pymysql.connect(
|
|
|
|
+ host="rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com", # 数据库IP地址,内网地址
|
|
|
|
+ port=3306, # 端口号
|
|
|
|
+ user="wx2016_longvideo", # mysql用户名
|
|
|
|
+ passwd="wx2016_longvideoP@assword1234", # mysql用户登录密码
|
|
|
|
+ db="incentive", # 数据库名
|
|
|
|
+ charset="utf8mb4" # 如果数据库里面的文本是utf8编码的,charset指定是utf8
|
|
|
|
+ )
|
|
|
|
+ sql = "select video_id, key_words, search_keys, extra_keys from video_content"
|
|
|
|
+ cursor = connection.cursor()
|
|
|
|
+ cursor.execute(sql)
|
|
|
|
+ data = cursor.fetchall()
|
|
|
|
+ result = [
|
|
|
|
+ {
|
|
|
|
+ "video_id": line[0],
|
|
|
|
+ "key_words": json.loads(line[1]),
|
|
|
|
+ "search_keys": json.loads(line[2]),
|
|
|
|
+ "extra_keys": json.loads(line[3]),
|
|
|
|
+ }
|
|
|
|
+ for line in data
|
|
|
|
+ ]
|
|
|
|
+ return result
|
|
|
|
+
|
|
|
|
+ # 敏感词
|
|
|
|
+ @classmethod
|
|
|
|
+ def select_sensitive_words(cls):
|
|
|
|
+ """
|
|
|
|
+ sensitive words
|
|
|
|
+ :return:
|
|
|
|
+ """
|
|
|
|
+ connection = pymysql.connect(
|
|
|
|
+ host="rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com", # 数据库IP地址,内网地址
|
|
|
|
+ port=3306, # 端口号
|
|
|
|
+ user="wx2016_longvideo", # mysql用户名
|
|
|
|
+ passwd="wx2016_longvideoP@assword1234", # mysql用户登录密码
|
|
|
|
+ db="longvideo", # 数据库名
|
|
|
|
+ charset="utf8mb4" # 如果数据库里面的文本是utf8编码的,charset指定是utf8
|
|
|
|
+ )
|
|
|
|
+ sql = "select `keyword` from wx_sensitive_word where `data_status` = 0"
|
|
|
|
+ cursor = connection.cursor()
|
|
|
|
+ cursor.execute(sql)
|
|
|
|
+ data = cursor.fetchall()
|
|
|
|
+ result = [line[0] for line in data]
|
|
|
|
+ return result
|
|
|
|
+
|
|
|
|
+ @classmethod
|
|
|
|
+ def search_id_to_video(cls, trace_id):
|
|
|
|
+ """
|
|
|
|
+ 通过 search_id 返回 video_id
|
|
|
|
+ :param trace_id:
|
|
|
|
+ :return:
|
|
|
|
+ """
|
|
|
|
+ sql = "select video_id from crawler_video where out_user_id = '{}' and video_title = '{}';".format(trace_id,
|
|
|
|
+ trace_id)
|
|
|
|
+ connection = pymysql.connect(
|
|
|
|
+ host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com", # 数据库IP地址,内网地址
|
|
|
|
+ port=3306, # 端口号
|
|
|
|
+ user="crawler", # mysql用户名
|
|
|
|
+ passwd="crawler123456@", # mysql用户登录密码
|
|
|
|
+ db="piaoquan-crawler", # 数据库名
|
|
|
|
+ charset="utf8mb4" # 如果数据库里面的文本是utf8编码的,charset指定是utf8
|
|
|
|
+ )
|
|
|
|
+ cursor = connection.cursor()
|
|
|
|
+ cursor.execute(sql)
|
|
|
|
+ out_video_list = cursor.fetchall()
|
|
|
|
+ if int(out_video_list[0][0]) == 0:
|
|
|
|
+ time.sleep(1)
|
|
|
|
+ return cls.search_id_to_video(trace_id)
|
|
|
|
+ else:
|
|
|
|
+ return out_video_list[0][0]
|