|
@@ -3,267 +3,171 @@
|
|
|
@author: luojunhui
|
|
|
"""
|
|
|
import json
|
|
|
-import time
|
|
|
import uuid
|
|
|
import requests
|
|
|
-import pymysql
|
|
|
import urllib.parse
|
|
|
|
|
|
-from applications.functions.log import logging
|
|
|
|
|
|
-
|
|
|
-class Functions(object):
|
|
|
+def auto_white(root_share_id):
|
|
|
"""
|
|
|
- 通用工具代码
|
|
|
+ 自动加入白名单, 保证公众号百分百出广告
|
|
|
+ :param root_share_id:
|
|
|
+ :return:
|
|
|
"""
|
|
|
|
|
|
- # 自动加入白名单逻辑
|
|
|
- @classmethod
|
|
|
- def auto_white(cls, root_share_id):
|
|
|
+ def get_cookie():
|
|
|
"""
|
|
|
- 自动加入白名单, 保证公众号百分百出广告
|
|
|
- :param root_share_id:
|
|
|
+ 获取 cookie
|
|
|
:return:
|
|
|
"""
|
|
|
-
|
|
|
- def get_cookie():
|
|
|
- """
|
|
|
- 获取 cookie
|
|
|
- :return:
|
|
|
- """
|
|
|
- url = "https://admin.piaoquantv.com/manager/login?account=luojunhui&passWd=e10adc3949ba59abbe56e057f20f883e&muid=7"
|
|
|
- payload = {}
|
|
|
- headers = {
|
|
|
- 'accept': 'application/json, text/plain, */*',
|
|
|
- 'accept-language': 'en',
|
|
|
- 'priority': 'u=1, i',
|
|
|
- 'sec-ch-ua': '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"',
|
|
|
- 'sec-ch-ua-mobile': '?0',
|
|
|
- 'sec-ch-ua-platform': '"macOS"',
|
|
|
- 'sec-fetch-dest': 'empty',
|
|
|
- 'sec-fetch-mode': 'cors',
|
|
|
- 'sec-fetch-site': 'same-origin',
|
|
|
- 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
|
|
|
- }
|
|
|
- response = requests.request("GET", url, headers=headers, data=payload)
|
|
|
- return response.cookies.values()[0]
|
|
|
-
|
|
|
- url = "https://admin.piaoquantv.com/manager/ad/own/white/rootShare/save"
|
|
|
- dd = {
|
|
|
- "rootShareId": root_share_id,
|
|
|
- "commit": "算法自动加入白名单--"
|
|
|
- }
|
|
|
- payload = json.dumps(dd)
|
|
|
- cookie = get_cookie()
|
|
|
+ url = "https://admin.piaoquantv.com/manager/login?account=luojunhui&passWd=e10adc3949ba59abbe56e057f20f883e&muid=7"
|
|
|
+ payload = {}
|
|
|
headers = {
|
|
|
- 'accept': 'application/json',
|
|
|
+ 'accept': 'application/json, text/plain, */*',
|
|
|
'accept-language': 'en',
|
|
|
- 'content-type': 'application/json;',
|
|
|
- 'cookie': "SESSION=" + cookie,
|
|
|
- 'origin': 'https://admin.piaoquantv.com',
|
|
|
'priority': 'u=1, i',
|
|
|
+ 'sec-ch-ua': '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"',
|
|
|
+ 'sec-ch-ua-mobile': '?0',
|
|
|
+ 'sec-ch-ua-platform': '"macOS"',
|
|
|
+ 'sec-fetch-dest': 'empty',
|
|
|
+ 'sec-fetch-mode': 'cors',
|
|
|
+ 'sec-fetch-site': 'same-origin',
|
|
|
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
|
|
|
}
|
|
|
- response = requests.request("POST", url, headers=headers, data=payload)
|
|
|
- return response.json()['content']
|
|
|
-
|
|
|
- # 创建公众号分享卡片
|
|
|
- @classmethod
|
|
|
- def create_gzh_path(cls, video_id, shared_uid):
|
|
|
- """
|
|
|
- :param video_id: 视频 id
|
|
|
- :param shared_uid: 分享 id
|
|
|
- """
|
|
|
- root_share_id = str(uuid.uuid4())
|
|
|
- url = f"pages/user-videos?id={video_id}&su={shared_uid}&fromGzh=1&rootShareId={root_share_id}&shareId={root_share_id}"
|
|
|
- # 自动把 root_share_id 加入到白名单
|
|
|
- cls.auto_white(root_share_id)
|
|
|
- return root_share_id, f"pages/category?jumpPage={urllib.parse.quote(url, safe='')}"
|
|
|
-
|
|
|
- # 从票圈请求视频
|
|
|
- @classmethod
|
|
|
- def request_for_info(cls, video_id):
|
|
|
- """
|
|
|
- 请求数据
|
|
|
- :param video_id:
|
|
|
- :return:
|
|
|
- """
|
|
|
- url = "https://longvideoapi.piaoquantv.com/longvideoapi/openapi/video/batchSelectVideoInfo"
|
|
|
- data = {
|
|
|
- "videoIdList": [video_id]
|
|
|
- }
|
|
|
- header = {
|
|
|
- "Content-Type": "application/json",
|
|
|
- }
|
|
|
- response = requests.post(url, headers=header, data=json.dumps(data))
|
|
|
- return response.json()
|
|
|
-
|
|
|
- # 清理标题
|
|
|
- @classmethod
|
|
|
- def clean_title(cls, strings):
|
|
|
- """
|
|
|
- :param strings:
|
|
|
- :return:
|
|
|
- """
|
|
|
- return (
|
|
|
- strings.strip()
|
|
|
- .replace("\n", "")
|
|
|
- .replace("/", "")
|
|
|
- .replace("\r", "")
|
|
|
- .replace("#", "")
|
|
|
- .replace(".", "。")
|
|
|
- .replace("\\", "")
|
|
|
- .replace("&NBSP", "")
|
|
|
- .replace(":", "")
|
|
|
- .replace("*", "")
|
|
|
- .replace("?", "")
|
|
|
- .replace("?", "")
|
|
|
- .replace('"', "")
|
|
|
- .replace("<", "")
|
|
|
- .replace(">", "")
|
|
|
- .replace("|", "")
|
|
|
- .replace(" ", "")
|
|
|
- .replace('"', "")
|
|
|
- .replace("'", "")
|
|
|
- )
|
|
|
-
|
|
|
- @classmethod
|
|
|
- def sensitive_flag(cls, s_words, ori_title):
|
|
|
- """
|
|
|
- :param s_words:
|
|
|
- :param ori_title:
|
|
|
- :return:
|
|
|
- """
|
|
|
- for word in s_words:
|
|
|
- if word in ori_title:
|
|
|
- return False
|
|
|
- return True
|
|
|
-
|
|
|
-
|
|
|
-class MySQLServer(object):
|
|
|
+ response = requests.request("GET", url, headers=headers, data=payload)
|
|
|
+ return response.cookies.values()[0]
|
|
|
+
|
|
|
+ url = "https://admin.piaoquantv.com/manager/ad/own/white/rootShare/save"
|
|
|
+ dd = {
|
|
|
+ "rootShareId": root_share_id,
|
|
|
+ "commit": "算法自动加入白名单--"
|
|
|
+ }
|
|
|
+ payload = json.dumps(dd)
|
|
|
+ cookie = get_cookie()
|
|
|
+ headers = {
|
|
|
+ 'accept': 'application/json',
|
|
|
+ 'accept-language': 'en',
|
|
|
+ 'content-type': 'application/json;',
|
|
|
+ 'cookie': "SESSION=" + cookie,
|
|
|
+ 'origin': 'https://admin.piaoquantv.com',
|
|
|
+ 'priority': 'u=1, i',
|
|
|
+ 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
|
|
|
+ }
|
|
|
+ response = requests.request("POST", url, headers=headers, data=payload)
|
|
|
+ return response.json()['content']
|
|
|
+
|
|
|
+
|
|
|
+def create_gzh_path(video_id, shared_uid):
|
|
|
"""
|
|
|
- MySql 服务
|
|
|
+ :param video_id: 视频 id
|
|
|
+ :param shared_uid: 分享 id
|
|
|
"""
|
|
|
+ root_share_id = str(uuid.uuid4())
|
|
|
+ url = f"pages/user-videos?id={video_id}&su={shared_uid}&fromGzh=1&rootShareId={root_share_id}&shareId={root_share_id}"
|
|
|
+ # 自动把 root_share_id 加入到白名单
|
|
|
+ auto_white(root_share_id)
|
|
|
+ return root_share_id, f"pages/category?jumpPage={urllib.parse.quote(url, safe='')}"
|
|
|
|
|
|
- @classmethod
|
|
|
- def select_download_videos(cls, trace_id):
|
|
|
- """
|
|
|
- 查询
|
|
|
- :param trace_id:
|
|
|
- :return:
|
|
|
- """
|
|
|
- sql = "select video_id from crawler_video where out_user_id = '{}' and video_title = '{}';".format(trace_id,
|
|
|
- trace_id)
|
|
|
- connection = pymysql.connect(
|
|
|
- host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com", # 数据库IP地址,内网地址
|
|
|
- port=3306, # 端口号
|
|
|
- user="crawler", # mysql用户名
|
|
|
- passwd="crawler123456@", # mysql用户登录密码
|
|
|
- db="piaoquan-crawler", # 数据库名
|
|
|
- charset="utf8mb4" # 如果数据库里面的文本是utf8编码的,charset指定是utf8
|
|
|
- )
|
|
|
- cursor = connection.cursor()
|
|
|
- cursor.execute(sql)
|
|
|
- out_video_list = cursor.fetchall()
|
|
|
- if len(out_video_list) > 0:
|
|
|
- if out_video_list[0][0] == 0:
|
|
|
- video_id = cls.search_id_to_video(trace_id)
|
|
|
- else:
|
|
|
- video_id = out_video_list[0][0]
|
|
|
-
|
|
|
- vid_list = [video_id]
|
|
|
- logging(
|
|
|
- code="2003",
|
|
|
- trace_id=trace_id,
|
|
|
- info="recall_search_list",
|
|
|
- function="find_videos_in_mysql",
|
|
|
- data=vid_list
|
|
|
- )
|
|
|
- return {
|
|
|
- "search_videos": "success",
|
|
|
- "trace_id": trace_id,
|
|
|
- "video_list": vid_list
|
|
|
- }
|
|
|
- else:
|
|
|
- return {
|
|
|
- "search_videos": "failed",
|
|
|
- "trace_id": trace_id,
|
|
|
- "video_list": []
|
|
|
- }
|
|
|
|
|
|
- @classmethod
|
|
|
- def select_pq_videos(cls):
|
|
|
- """
|
|
|
- 查询
|
|
|
- :return: info_list
|
|
|
- """
|
|
|
- connection = pymysql.connect(
|
|
|
- host="rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com", # 数据库IP地址,内网地址
|
|
|
- port=3306, # 端口号
|
|
|
- user="wx2016_longvideo", # mysql用户名
|
|
|
- passwd="wx2016_longvideoP@assword1234", # mysql用户登录密码
|
|
|
- db="incentive", # 数据库名
|
|
|
- charset="utf8mb4" # 如果数据库里面的文本是utf8编码的,charset指定是utf8
|
|
|
- )
|
|
|
- sql = "select video_id, key_words, search_keys, extra_keys from video_content"
|
|
|
- cursor = connection.cursor()
|
|
|
- cursor.execute(sql)
|
|
|
- data = cursor.fetchall()
|
|
|
- result = [
|
|
|
- {
|
|
|
- "video_id": line[0],
|
|
|
- "key_words": json.loads(line[1]),
|
|
|
- "search_keys": json.loads(line[2]),
|
|
|
- "extra_keys": json.loads(line[3]),
|
|
|
- }
|
|
|
- for line in data
|
|
|
- ]
|
|
|
- return result
|
|
|
+def request_for_info(video_id):
|
|
|
+ """
|
|
|
+ 请求数据
|
|
|
+ :param video_id:
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ url = "https://longvideoapi.piaoquantv.com/longvideoapi/openapi/video/batchSelectVideoInfo"
|
|
|
+ data = {
|
|
|
+ "videoIdList": [video_id]
|
|
|
+ }
|
|
|
+ header = {
|
|
|
+ "Content-Type": "application/json",
|
|
|
+ }
|
|
|
+ response = requests.post(url, headers=header, data=json.dumps(data))
|
|
|
+ return response.json()
|
|
|
+
|
|
|
+
|
|
|
+def clean_title(strings):
|
|
|
+ """
|
|
|
+ :param strings:
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ return (
|
|
|
+ strings.strip()
|
|
|
+ .replace("\n", "")
|
|
|
+ .replace("/", "")
|
|
|
+ .replace("\r", "")
|
|
|
+ .replace("#", "")
|
|
|
+ .replace(".", "。")
|
|
|
+ .replace("\\", "")
|
|
|
+ .replace("&NBSP", "")
|
|
|
+ .replace(":", "")
|
|
|
+ .replace("*", "")
|
|
|
+ .replace("?", "")
|
|
|
+ .replace("?", "")
|
|
|
+ .replace('"', "")
|
|
|
+ .replace("<", "")
|
|
|
+ .replace(">", "")
|
|
|
+ .replace("|", "")
|
|
|
+ .replace(" ", "")
|
|
|
+ .replace('"', "")
|
|
|
+ .replace("'", "")
|
|
|
+ )
|
|
|
+
|
|
|
+
|
|
|
+def sensitive_flag(s_words, ori_title):
|
|
|
+ """
|
|
|
+ :param s_words:
|
|
|
+ :param ori_title:
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ for word in s_words:
|
|
|
+ if word in ori_title:
|
|
|
+ return False
|
|
|
+ return True
|
|
|
|
|
|
- # 敏感词
|
|
|
- @classmethod
|
|
|
- def select_sensitive_words(cls):
|
|
|
- """
|
|
|
- sensitive words
|
|
|
- :return:
|
|
|
- """
|
|
|
- connection = pymysql.connect(
|
|
|
- host="rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com", # 数据库IP地址,内网地址
|
|
|
- port=3306, # 端口号
|
|
|
- user="wx2016_longvideo", # mysql用户名
|
|
|
- passwd="wx2016_longvideoP@assword1234", # mysql用户登录密码
|
|
|
- db="longvideo", # 数据库名
|
|
|
- charset="utf8mb4" # 如果数据库里面的文本是utf8编码的,charset指定是utf8
|
|
|
- )
|
|
|
- sql = "select `keyword` from wx_sensitive_word where `data_status` = 0"
|
|
|
- cursor = connection.cursor()
|
|
|
- cursor.execute(sql)
|
|
|
- data = cursor.fetchall()
|
|
|
- result = [line[0] for line in data]
|
|
|
- return result
|
|
|
|
|
|
- @classmethod
|
|
|
- def search_id_to_video(cls, trace_id):
|
|
|
- """
|
|
|
- 通过 search_id 返回 video_id
|
|
|
- :param trace_id:
|
|
|
- :return:
|
|
|
- """
|
|
|
- sql = "select video_id from crawler_video where out_user_id = '{}' and video_title = '{}';".format(trace_id,
|
|
|
- trace_id)
|
|
|
- connection = pymysql.connect(
|
|
|
- host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com", # 数据库IP地址,内网地址
|
|
|
- port=3306, # 端口号
|
|
|
- user="crawler", # mysql用户名
|
|
|
- passwd="crawler123456@", # mysql用户登录密码
|
|
|
- db="piaoquan-crawler", # 数据库名
|
|
|
- charset="utf8mb4" # 如果数据库里面的文本是utf8编码的,charset指定是utf8
|
|
|
- )
|
|
|
- cursor = connection.cursor()
|
|
|
- cursor.execute(sql)
|
|
|
- out_video_list = cursor.fetchall()
|
|
|
- if int(out_video_list[0][0]) == 0:
|
|
|
- time.sleep(1)
|
|
|
- return cls.search_id_to_video(trace_id)
|
|
|
- else:
|
|
|
- return out_video_list[0][0]
|
|
|
+def account_info_map(gh_id):
|
|
|
+ """
|
|
|
+ 根据账号 id 来判断返回哪个小程序账号
|
|
|
+ :param gh_id:
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ pqlh = {
|
|
|
+ "name": "票圈乐活",
|
|
|
+ "id": "wxe8f8f0e23cecad0f",
|
|
|
+ "avatar": "https://rescdn.yishihui.com/0temp/lehuo.png"
|
|
|
+ }
|
|
|
+ xyfxhyjl = {
|
|
|
+ "name": "幸运福星好运锦鲤",
|
|
|
+ "id": "wx95dcbfc0753c06a8",
|
|
|
+ "avatar": ""
|
|
|
+ }
|
|
|
+ pqzf = {
|
|
|
+ "name": "票圈祝福",
|
|
|
+ "id": "wxf7261ed54f2e450e",
|
|
|
+ "avatar": ""
|
|
|
+ }
|
|
|
+ buy_accounts = [
|
|
|
+ "gh_084a485e859a",
|
|
|
+ "gh_e24da99dc899",
|
|
|
+ "gh_e0eb490115f5",
|
|
|
+ "gh_183d80deffb8",
|
|
|
+ "gh_5ff48e9fb9ef",
|
|
|
+ "gh_9f8dc5b0c74e",
|
|
|
+ "gh_6d9f36e3a7be"
|
|
|
+ ]
|
|
|
+ dyy = [
|
|
|
+ "gh_9877c8541764",
|
|
|
+ "gh_6d205db62f04",
|
|
|
+ "gh_c69776baf2cd",
|
|
|
+ "gh_7e5818b2dd83",
|
|
|
+ "gh_89ef4798d3ea",
|
|
|
+ "gh_a2901d34f75b",
|
|
|
+ "gh_b15de7c99912"
|
|
|
+ ]
|
|
|
+ if gh_id in buy_accounts:
|
|
|
+ return ""
|
|
|
+ elif gh_id in dyy:
|
|
|
+ return ""
|
|
|
+ else:
|
|
|
+ return ""
|