|
@@ -4,10 +4,16 @@ import sys
|
|
import time
|
|
import time
|
|
import uuid
|
|
import uuid
|
|
import json
|
|
import json
|
|
|
|
+from datetime import datetime
|
|
|
|
|
|
-
|
|
|
|
|
|
+import cv2
|
|
import requests
|
|
import requests
|
|
|
|
|
|
|
|
+from application.common.feishu import FsData
|
|
|
|
+from application.common.feishu.feishu_utils import FeishuUtils
|
|
|
|
+from application.common.gpt import GPT4oMini
|
|
|
|
+from application.common.mysql.sql import Sql
|
|
|
|
+from application.common.redis.xng_redis import xng_in_video_data
|
|
|
|
|
|
sys.path.append(os.getcwd())
|
|
sys.path.append(os.getcwd())
|
|
|
|
|
|
@@ -16,11 +22,11 @@ from application.pipeline import PiaoQuanPipeline
|
|
from application.common.messageQueue import MQ
|
|
from application.common.messageQueue import MQ
|
|
from application.common.log import AliyunLogger
|
|
from application.common.log import AliyunLogger
|
|
from application.common.mysql import MysqlHelper
|
|
from application.common.mysql import MysqlHelper
|
|
-from application.common import Feishu, haiwai_tunnel_proxies
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class DKYHYRecommend(object):
|
|
class DKYHYRecommend(object):
|
|
|
|
+
|
|
"""
|
|
"""
|
|
打开迎好运
|
|
打开迎好运
|
|
"""
|
|
"""
|
|
@@ -38,110 +44,52 @@ class DKYHYRecommend(object):
|
|
self.aliyun_log = AliyunLogger(mode=self.mode, platform=self.platform)
|
|
self.aliyun_log = AliyunLogger(mode=self.mode, platform=self.platform)
|
|
self.mysql = MysqlHelper(mode=self.mode, platform=self)
|
|
self.mysql = MysqlHelper(mode=self.mode, platform=self)
|
|
|
|
|
|
- def get_cookie(self):
|
|
|
|
- sql = f""" select * from crawler_config where source="{self.platform}" """
|
|
|
|
- configs = self.mysql.select(sql=sql)
|
|
|
|
- for config in configs:
|
|
|
|
- if "token" in config:
|
|
|
|
- token_element = config[3]
|
|
|
|
- data_json = json.loads(token_element)
|
|
|
|
- token = data_json.get("token")
|
|
|
|
- return token
|
|
|
|
-
|
|
|
|
- def logic(self):
|
|
|
|
- for i in range(10):
|
|
|
|
- app_id = 'wx2f9f796a36e11d71'
|
|
|
|
- js_code = self.get_js_code(app_id)
|
|
|
|
- token = self.get_search_params(app_id, js_code)
|
|
|
|
- if token:
|
|
|
|
- return token
|
|
|
|
-
|
|
|
|
-
|
|
|
|
- def get_js_code(self, app_id: str) -> str:
|
|
|
|
- js_code = ''
|
|
|
|
- try:
|
|
|
|
- url = 'http://61.48.133.26:30001/GetMiniAppCode'
|
|
|
|
- data = {
|
|
|
|
- "appid": app_id
|
|
|
|
- }
|
|
|
|
- response =requests.request(method='POST', url=url, json=data)
|
|
|
|
- body = response.content.decode()
|
|
|
|
- res_data = json.loads(body)
|
|
|
|
- js_code = res_data['GetMiniAppCode']
|
|
|
|
-
|
|
|
|
- except Exception as e:
|
|
|
|
- pass
|
|
|
|
- return js_code
|
|
|
|
-
|
|
|
|
- def get_search_params(self, app_id: str, js_code: str) -> dict:
|
|
|
|
- try:
|
|
|
|
- url = "https://api.riyingkj.com/api/user/login/v1"
|
|
|
|
- payload = json.dumps({
|
|
|
|
- "appid": app_id,
|
|
|
|
- "code": js_code,
|
|
|
|
- "exp": {}
|
|
|
|
- })
|
|
|
|
- headers = {
|
|
|
|
- 'Host': 'api.riyingkj.com',
|
|
|
|
- 'xweb_xhr': '1',
|
|
|
|
- 'X-Token': '',
|
|
|
|
- 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.6(0x13080610) XWEB/1156',
|
|
|
|
- 'Content-Type': 'application/json',
|
|
|
|
- 'Accept': '*/*',
|
|
|
|
- 'Referer': 'https://servicewechat.com/wx2f9f796a36e11d71/28/page-frame.html',
|
|
|
|
- 'Accept-Language': 'zh-CN,zh;q=0.9'
|
|
|
|
- }
|
|
|
|
- response = requests.request("POST", url, headers=headers, data=payload)
|
|
|
|
- response = response.json()
|
|
|
|
- token = response["token"]
|
|
|
|
-
|
|
|
|
- except Exception as e:
|
|
|
|
- return ''
|
|
|
|
-
|
|
|
|
- return token
|
|
|
|
-
|
|
|
|
|
|
|
|
def get_recommend_list(self):
|
|
def get_recommend_list(self):
|
|
|
|
+ print("打开迎好运")
|
|
|
|
+
|
|
"""
|
|
"""
|
|
获取推荐页视频
|
|
获取推荐页视频
|
|
"""
|
|
"""
|
|
- token = self.logic()
|
|
|
|
headers = {
|
|
headers = {
|
|
- 'Host': 'api.riyingkj.com',
|
|
|
|
- 'xweb_xhr': '1',
|
|
|
|
- 'X-Token': token,
|
|
|
|
- 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.6(0x13080610) XWEB/1156',
|
|
|
|
- 'Content-Type': 'application/json',
|
|
|
|
- 'Accept': '*/*',
|
|
|
|
- 'Referer': 'https://servicewechat.com/wx2f9f796a36e11d71/28/page-frame.html',
|
|
|
|
- 'Accept-Language': 'zh-CN,zh;q=0.9'
|
|
|
|
|
|
+ 'Content-Type': 'application/json'
|
|
}
|
|
}
|
|
- for i in range(20):
|
|
|
|
- time.sleep(random.randint(1, 10))
|
|
|
|
- url = "https://api.riyingkj.com/api/recommend/list/v1"
|
|
|
|
|
|
+ url = "http://8.217.192.46:8889/crawler/da_kai_ying_hao_yun/recommend"
|
|
|
|
+ data_rule = FsData()
|
|
|
|
+ title_rule = data_rule.get_title_rule()
|
|
|
|
+ while True:
|
|
payload = json.dumps({
|
|
payload = json.dumps({
|
|
- "limit": 5,
|
|
|
|
- "exp": {}
|
|
|
|
|
|
+ "cursor": ""
|
|
})
|
|
})
|
|
response = requests.request("POST", url, headers=headers, data=payload)
|
|
response = requests.request("POST", url, headers=headers, data=payload)
|
|
- for index, video_obj in enumerate(response.json(), 1):
|
|
|
|
|
|
+ response = response.json()
|
|
|
|
+ if response['code'] != 0:
|
|
|
|
+ self.aliyun_log.logging(
|
|
|
|
+ code="3000",
|
|
|
|
+ message="抓取单条视频失败,请求失败"
|
|
|
|
+ ),
|
|
|
|
+ return
|
|
|
|
+ data = response['data']['data']
|
|
|
|
+ if len(data) == 0:
|
|
|
|
+ return
|
|
|
|
+ for index, video_obj in enumerate(data, 1):
|
|
try:
|
|
try:
|
|
self.aliyun_log.logging(
|
|
self.aliyun_log.logging(
|
|
code="1001", message="扫描到一条视频", data=video_obj
|
|
code="1001", message="扫描到一条视频", data=video_obj
|
|
)
|
|
)
|
|
- self.process_video_obj(video_obj)
|
|
|
|
|
|
+ self.process_video_obj(video_obj, title_rule)
|
|
except Exception as e:
|
|
except Exception as e:
|
|
self.aliyun_log.logging(
|
|
self.aliyun_log.logging(
|
|
code="3000",
|
|
code="3000",
|
|
message="抓取单条视频失败, 该视频位于第{}页第{}条报错原因是{}".format(
|
|
message="抓取单条视频失败, 该视频位于第{}页第{}条报错原因是{}".format(
|
|
- i, index, e
|
|
|
|
|
|
+ 1, index, e
|
|
),
|
|
),
|
|
)
|
|
)
|
|
- if self.limit_flag:
|
|
|
|
- return
|
|
|
|
- time.sleep(random.randint(5, 10))
|
|
|
|
|
|
+ if self.limit_flag:
|
|
|
|
+ return
|
|
|
|
+ time.sleep(random.randint(1, 5))
|
|
|
|
|
|
- def process_video_obj(self, video_obj):
|
|
|
|
|
|
+ def process_video_obj(self, video_obj, title_rule):
|
|
"""
|
|
"""
|
|
处理视频
|
|
处理视频
|
|
:param video_obj:
|
|
:param video_obj:
|
|
@@ -157,6 +105,8 @@ class DKYHYRecommend(object):
|
|
item.add_video_info("out_user_id", video_obj["uuid"])
|
|
item.add_video_info("out_user_id", video_obj["uuid"])
|
|
item.add_video_info("cover_url", video_obj["cover_url"])
|
|
item.add_video_info("cover_url", video_obj["cover_url"])
|
|
item.add_video_info("like_cnt", 0)
|
|
item.add_video_info("like_cnt", 0)
|
|
|
|
+ item.add_video_info("share_cnt", 0)
|
|
|
|
+ item.add_video_info("comment_cnt", 0)
|
|
item.add_video_info("video_url", video_obj["urls"][0])
|
|
item.add_video_info("video_url", video_obj["urls"][0])
|
|
item.add_video_info("out_video_id", video_obj["uuid"])
|
|
item.add_video_info("out_video_id", video_obj["uuid"])
|
|
item.add_video_info("platform", self.platform)
|
|
item.add_video_info("platform", self.platform)
|
|
@@ -174,6 +124,27 @@ class DKYHYRecommend(object):
|
|
trace_id=trace_id,
|
|
trace_id=trace_id,
|
|
)
|
|
)
|
|
if pipeline.process_item():
|
|
if pipeline.process_item():
|
|
|
|
+ title_list = title_rule.split(",")
|
|
|
|
+ title = video_obj["title"]
|
|
|
|
+ contains_keyword = any(keyword in title for keyword in title_list)
|
|
|
|
+ if contains_keyword:
|
|
|
|
+ new_title = GPT4oMini.get_ai_mini_title(title)
|
|
|
|
+ if new_title:
|
|
|
|
+ item.add_video_info("video_title", new_title)
|
|
|
|
+ current_time = datetime.now()
|
|
|
|
+ formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
|
|
|
|
+ values = [
|
|
|
|
+ [
|
|
|
|
+ video_obj["video_path"],
|
|
|
|
+ video_obj["cover_image"],
|
|
|
|
+ title,
|
|
|
|
+ new_title,
|
|
|
|
+ formatted_time,
|
|
|
|
+ ]
|
|
|
|
+ ]
|
|
|
|
+ FeishuUtils.insert_columns("U5dXsSlPOhiNNCtEfgqcm1iYnpf", "m9KG95", "ROWS", 1, 2)
|
|
|
|
+ time.sleep(0.5)
|
|
|
|
+ FeishuUtils.update_values("m9KG95", "8c7191", "A2:Z2", values)
|
|
self.download_cnt += 1
|
|
self.download_cnt += 1
|
|
self.mq.send_msg(mq_obj)
|
|
self.mq.send_msg(mq_obj)
|
|
self.aliyun_log.logging(code="1002", message="成功发送至 ETL", data=mq_obj)
|
|
self.aliyun_log.logging(code="1002", message="成功发送至 ETL", data=mq_obj)
|
|
@@ -182,9 +153,6 @@ class DKYHYRecommend(object):
|
|
):
|
|
):
|
|
self.limit_flag = True
|
|
self.limit_flag = True
|
|
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-
|
|
|
|
def run(self):
|
|
def run(self):
|
|
self.get_recommend_list()
|
|
self.get_recommend_list()
|
|
|
|
|