|
@@ -2,17 +2,21 @@ import json
|
|
|
import random
|
|
|
import re
|
|
|
import time
|
|
|
+import uuid
|
|
|
import requests
|
|
|
|
|
|
from common.common import Common
|
|
|
from common.scheduling_db import MysqlHelper
|
|
|
from common.mq import MQ
|
|
|
+from common.aliyun_log import AliyunLogger
|
|
|
+from common.pipeline import PiaoQuanPipeline
|
|
|
from common.public import download_rule, get_config_from_mysql
|
|
|
|
|
|
proxies = {"http": None, "https": None}
|
|
|
|
|
|
+
|
|
|
class FqwRecommend:
|
|
|
- platform = ("福气旺")
|
|
|
+ platform = "福气旺"
|
|
|
download_cnt = 0
|
|
|
element_list = []
|
|
|
i = 0
|
|
@@ -32,6 +36,13 @@ class FqwRecommend:
|
|
|
try:
|
|
|
Common.logger(log_type, crawler).info(f"正在抓取第{page}页")
|
|
|
Common.logging(log_type, crawler, env, f"正在抓取第{page}页")
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="2000",
|
|
|
+ platform=crawler,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message=f"正在抓取第{page}页"
|
|
|
+ )
|
|
|
url = "https://api.xinghetime.com/luckvideo/video/getRecommendVideos"
|
|
|
payload = json.dumps({
|
|
|
"baseParam": {
|
|
@@ -54,14 +65,35 @@ class FqwRecommend:
|
|
|
if "data" not in r.text or r.status_code != 200:
|
|
|
Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
|
|
|
Common.logging(log_type, crawler, env, f"get_videoList:{r.text}\n")
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="2000",
|
|
|
+ platform=crawler,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message=f"get_videoList:{r.text}\n"
|
|
|
+ )
|
|
|
return
|
|
|
elif "data" not in r.json():
|
|
|
Common.logger(log_type, crawler).info(f"get_videoList:{r.json()}\n")
|
|
|
Common.logging(log_type, crawler, env, f"get_videoList:{r.json()}\n")
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="2000",
|
|
|
+ platform=crawler,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message=f"get_videoList:{r.json()}\n"
|
|
|
+ )
|
|
|
return
|
|
|
elif len(r.json()["data"]) == 0:
|
|
|
Common.logger(log_type, crawler).warning(f"get_videoList:{r.json()['data']['list']}\n")
|
|
|
Common.logging(log_type, crawler, env, f"get_videoList:{r.json()['data']['list']}\n")
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="2000",
|
|
|
+ platform=crawler,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message=f"get_videoList:{r.json()['data']['list']}\n"
|
|
|
+ )
|
|
|
return
|
|
|
else:
|
|
|
# 视频列表
|
|
@@ -74,6 +106,15 @@ class FqwRecommend:
|
|
|
cls.element_list = []
|
|
|
return
|
|
|
cls.i += 1
|
|
|
+ trace_id = crawler + str(uuid.uuid1())
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="1001",
|
|
|
+ platform=crawler,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message="扫描到一条视频",
|
|
|
+ trace_id=trace_id
|
|
|
+ )
|
|
|
video_title = feeds[i].get("title", "").strip().replace("\n", "") \
|
|
|
.replace("/", "").replace("\\", "").replace("\r", "") \
|
|
|
.replace(":", "").replace("*", "").replace("?", "") \
|
|
@@ -90,7 +131,6 @@ class FqwRecommend:
|
|
|
result = number[0]
|
|
|
else:
|
|
|
result = 0
|
|
|
-
|
|
|
time_str = feeds[i]["durationFormat"]
|
|
|
minutes, seconds = map(int, time_str.split(':'))
|
|
|
# 计算总秒数
|
|
@@ -100,6 +140,7 @@ class FqwRecommend:
|
|
|
"video_id": str(feeds[i]["videoId"]), # 视频id
|
|
|
"publish_time_stamp": publish_time_stamp,
|
|
|
"publish_time_str": publish_time_str,
|
|
|
+ "update_time_stamp": int(time.time()),
|
|
|
"category_id": int(feeds[i].get("category_id", 0)), # 视频来源(精彩推荐)
|
|
|
"cover_url": feeds[i].get("coverImagePath", ""), # 视频封面
|
|
|
"video_url": feeds[i]["videoPath"], # 视频链接
|
|
@@ -119,38 +160,48 @@ class FqwRecommend:
|
|
|
for k, v in video_dict.items():
|
|
|
Common.logger(log_type, crawler).info(f"{k}:{v}")
|
|
|
Common.logging(log_type, crawler, env, f"video_dict:{video_dict}")
|
|
|
-
|
|
|
+ video_dict["out_user_id"] = video_dict["user_id"]
|
|
|
+ video_dict["platform"] = crawler
|
|
|
+ video_dict["strategy"] = log_type
|
|
|
+ video_dict["out_video_id"] = video_dict["video_id"]
|
|
|
+ video_dict["width"] = video_dict["video_width"]
|
|
|
+ video_dict["height"] = video_dict["video_height"]
|
|
|
+ video_dict["crawler_rule"] = json.dumps(rule_dict)
|
|
|
+ video_dict["user_id"] = our_uid
|
|
|
+ video_dict["publish_time"] = video_dict["publish_time_str"]
|
|
|
if video_dict["video_id"] == "" or video_dict["video_title"] == "" or video_dict[
|
|
|
"video_url"] == "":
|
|
|
Common.logger(log_type, crawler).info("无效视频\n")
|
|
|
Common.logging(log_type, crawler, env, "无效视频\n")
|
|
|
- elif download_rule(log_type=log_type, crawler=crawler, video_dict=video_dict,
|
|
|
- rule_dict=rule_dict) is False:
|
|
|
- Common.logger(log_type, crawler).info("不满足抓取规则\n")
|
|
|
- Common.logging(log_type, crawler, env, "不满足抓取规则\n")
|
|
|
- elif any(str(word) if str(word) in video_dict["video_title"] else False
|
|
|
- for word in get_config_from_mysql(log_type=log_type,
|
|
|
- source=crawler,
|
|
|
- env=env,
|
|
|
- text="filter",
|
|
|
- action="")) is True:
|
|
|
- Common.logger(log_type, crawler).info('已中过滤词\n')
|
|
|
- Common.logging(log_type, crawler, env, '已中过滤词\n')
|
|
|
- elif cls.repeat_video(log_type, crawler, video_dict["video_id"], env) != 0:
|
|
|
- Common.logger(log_type, crawler).info('视频已下载\n')
|
|
|
- Common.logging(log_type, crawler, env, '视频已下载\n')
|
|
|
-
|
|
|
- else:
|
|
|
- video_dict["out_user_id"] = video_dict["user_id"]
|
|
|
- video_dict["platform"] = crawler
|
|
|
- video_dict["strategy"] = log_type
|
|
|
- video_dict["out_video_id"] = video_dict["video_id"]
|
|
|
- video_dict["width"] = video_dict["video_width"]
|
|
|
- video_dict["height"] = video_dict["video_height"]
|
|
|
- video_dict["crawler_rule"] = json.dumps(rule_dict)
|
|
|
- video_dict["user_id"] = our_uid
|
|
|
- video_dict["publish_time"] = video_dict["publish_time_str"]
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="2005",
|
|
|
+ platform=crawler,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message="无效视频",
|
|
|
+ data=video_dict,
|
|
|
+ trace_id=trace_id
|
|
|
+ )
|
|
|
+ continue
|
|
|
+ pipeline = PiaoQuanPipeline(
|
|
|
+ platform=crawler,
|
|
|
+ mode=log_type,
|
|
|
+ trace_id=trace_id,
|
|
|
+ item=video_dict,
|
|
|
+ rule_dict=rule_dict,
|
|
|
+ env=env
|
|
|
+ )
|
|
|
+ if pipeline.process_item():
|
|
|
mq.send_msg(video_dict)
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="1002",
|
|
|
+ platform=crawler,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ trace_id=trace_id,
|
|
|
+ data=video_dict,
|
|
|
+ message="成功发送至ETL"
|
|
|
+ )
|
|
|
cls.download_cnt += 1
|
|
|
interval = random.randrange(5, 11)
|
|
|
time.sleep(interval)
|
|
@@ -158,12 +209,24 @@ class FqwRecommend:
|
|
|
except Exception as e:
|
|
|
Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
|
|
|
Common.logging(log_type, crawler, env, f"抓取单条视频异常:{e}\n")
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="3000",
|
|
|
+ platform=crawler,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message=f"抓取单条视频异常:{e}\n"
|
|
|
+ )
|
|
|
page += 1
|
|
|
except Exception as e:
|
|
|
Common.logger(log_type, crawler).error(f"抓取第{page}页时异常:{e}\n")
|
|
|
Common.logging(log_type, crawler, env, f"抓取第{page}页时异常:{e}\n")
|
|
|
-
|
|
|
-
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="3000",
|
|
|
+ platform=crawler,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message=f"抓取第{page}页时异常:{e}\n"
|
|
|
+ )
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|