|
@@ -81,6 +81,8 @@ class ZhuJinShanJinMeiRecommend(object):
|
|
self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
|
|
self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
|
|
self.limit_flag = False
|
|
self.limit_flag = False
|
|
self.cryptor = AES()
|
|
self.cryptor = AES()
|
|
|
|
+ self.aliyun_log = AliyunLogger(platform=self.platform, mode=self.mode)
|
|
|
|
+ self.mysql = MysqlHelper(platform=self.platform, mode=self.mode)
|
|
|
|
|
|
def get_recommend_list(self):
|
|
def get_recommend_list(self):
|
|
url = "https://api.lidongze.cn/jeecg-boot/ugc/getVideoListsEn2"
|
|
url = "https://api.lidongze.cn/jeecg-boot/ugc/getVideoListsEn2"
|
|
@@ -117,29 +119,20 @@ class ZhuJinShanJinMeiRecommend(object):
|
|
page_index = result['list']['current'] + 1
|
|
page_index = result['list']['current'] + 1
|
|
for index, video_obj in enumerate(result['list']['records'], 1):
|
|
for index, video_obj in enumerate(result['list']['records'], 1):
|
|
try:
|
|
try:
|
|
- AliyunLogger.logging(
|
|
|
|
|
|
+ self.aliyun_log.logging(
|
|
code="1001",
|
|
code="1001",
|
|
- platform=self.platform,
|
|
|
|
- mode=self.mode,
|
|
|
|
- env=self.env,
|
|
|
|
message="扫描到一条视频",
|
|
message="扫描到一条视频",
|
|
data=video_obj
|
|
data=video_obj
|
|
)
|
|
)
|
|
self.process_video_obj(video_obj)
|
|
self.process_video_obj(video_obj)
|
|
except Exception as e:
|
|
except Exception as e:
|
|
- AliyunLogger.logging(
|
|
|
|
|
|
+ self.aliyun_log.logging(
|
|
code="3000",
|
|
code="3000",
|
|
- platform=self.platform,
|
|
|
|
- mode=self.mode,
|
|
|
|
- env=self.env,
|
|
|
|
message="抓取单条视频失败, 该视频位于第{}页第{}条报错原因是{}".format(page_index, index, e)
|
|
message="抓取单条视频失败, 该视频位于第{}页第{}条报错原因是{}".format(page_index, index, e)
|
|
)
|
|
)
|
|
except Exception as e:
|
|
except Exception as e:
|
|
- AliyunLogger.logging(
|
|
|
|
|
|
+ self.aliyun_log.logging(
|
|
code="3000",
|
|
code="3000",
|
|
- platform=self.platform,
|
|
|
|
- mode=self.mode,
|
|
|
|
- env=self.env,
|
|
|
|
message="抓取第{}页的时候失败, 报错原因是{}".format(page_index, e)
|
|
message="抓取第{}页的时候失败, 报错原因是{}".format(page_index, e)
|
|
)
|
|
)
|
|
time.sleep(random.randint(5, 10))
|
|
time.sleep(random.randint(5, 10))
|
|
@@ -198,21 +191,15 @@ class ZhuJinShanJinMeiRecommend(object):
|
|
detail_video = json.loads(self.cryptor.aes_decrypt(detail_response.text))
|
|
detail_video = json.loads(self.cryptor.aes_decrypt(detail_response.text))
|
|
if detail_video['success']:
|
|
if detail_video['success']:
|
|
try:
|
|
try:
|
|
- AliyunLogger.logging(
|
|
|
|
|
|
+ self.aliyun_log.logging(
|
|
code="1001",
|
|
code="1001",
|
|
- platform=self.platform,
|
|
|
|
- mode=self.mode,
|
|
|
|
- env=self.env,
|
|
|
|
message="扫描到一条视频",
|
|
message="扫描到一条视频",
|
|
data=detail_video['data']
|
|
data=detail_video['data']
|
|
)
|
|
)
|
|
self.process_video_obj(detail_video['data'])
|
|
self.process_video_obj(detail_video['data'])
|
|
except Exception as e:
|
|
except Exception as e:
|
|
- AliyunLogger.logging(
|
|
|
|
|
|
+ self.aliyun_log.logging(
|
|
code="3000",
|
|
code="3000",
|
|
- platform=self.platform,
|
|
|
|
- mode=self.mode,
|
|
|
|
- env=self.env,
|
|
|
|
message="抓取单条视频失败, 该视频位于第{}条报错原因是{}".format(index, e)
|
|
message="抓取单条视频失败, 该视频位于第{}条报错原因是{}".format(index, e)
|
|
)
|
|
)
|
|
|
|
|
|
@@ -254,11 +241,8 @@ class ZhuJinShanJinMeiRecommend(object):
|
|
if pipeline.process_item():
|
|
if pipeline.process_item():
|
|
self.download_cnt += 1
|
|
self.download_cnt += 1
|
|
self.mq.send_msg(mq_obj)
|
|
self.mq.send_msg(mq_obj)
|
|
- AliyunLogger.logging(
|
|
|
|
|
|
+ self.aliyun_log.logging(
|
|
code="1002",
|
|
code="1002",
|
|
- platform=self.platform,
|
|
|
|
- mode=self.mode,
|
|
|
|
- env=self.env,
|
|
|
|
message="成功发送至 ETL",
|
|
message="成功发送至 ETL",
|
|
data=mq_obj
|
|
data=mq_obj
|
|
)
|
|
)
|
|
@@ -273,44 +257,26 @@ class ZhuJinShanJinMeiRecommend(object):
|
|
如果用户 id 存在,则判断用户是否修改名字,若名字修改则更新名字
|
|
如果用户 id 存在,则判断用户是否修改名字,若名字修改则更新名字
|
|
"""
|
|
"""
|
|
select_user_sql = f"""select name, name_id from accounts where name_id = "{out_user_id}" and platform = "{self.platform}" and useful = 1 limit 1"""
|
|
select_user_sql = f"""select name, name_id from accounts where name_id = "{out_user_id}" and platform = "{self.platform}" and useful = 1 limit 1"""
|
|
- out_user_info = MysqlHelper.get_values(
|
|
|
|
- log_type=self.mode,
|
|
|
|
- crawler=self.platform,
|
|
|
|
- sql=select_user_sql,
|
|
|
|
- env=self.env,
|
|
|
|
- machine="",
|
|
|
|
- )
|
|
|
|
|
|
+ out_user_info = self.mysql.select(sql=select_user_sql)
|
|
if out_user_info:
|
|
if out_user_info:
|
|
name, name_id = out_user_info[0]
|
|
name, name_id = out_user_info[0]
|
|
if name == out_user_name:
|
|
if name == out_user_name:
|
|
return
|
|
return
|
|
else:
|
|
else:
|
|
update_sql = f"""update accounts set name = "{out_user_name}" where name_id = "{out_user_id}";"""
|
|
update_sql = f"""update accounts set name = "{out_user_name}" where name_id = "{out_user_id}";"""
|
|
- MysqlHelper.update_values(
|
|
|
|
- log_type=self.mode,
|
|
|
|
- crawler=self.platform,
|
|
|
|
- sql=update_sql,
|
|
|
|
- env=self.env,
|
|
|
|
- machine=""
|
|
|
|
- )
|
|
|
|
|
|
+ self.mysql.update(sql=update_sql)
|
|
else:
|
|
else:
|
|
insert_sql = f"""INSERT INTO accounts (name, name_id, platform, useful) values ("{out_user_name}", "{out_user_id}", "{self.platform}", 1 )"""
|
|
insert_sql = f"""INSERT INTO accounts (name, name_id, platform, useful) values ("{out_user_name}", "{out_user_id}", "{self.platform}", 1 )"""
|
|
- MysqlHelper.update_values(
|
|
|
|
- log_type=self.mode,
|
|
|
|
- crawler=self.platform,
|
|
|
|
- sql=insert_sql,
|
|
|
|
- env=self.env,
|
|
|
|
- machine="",
|
|
|
|
- )
|
|
|
|
|
|
+ self.mysql.update(sql=insert_sql)
|
|
|
|
|
|
def get_user_list(self):
|
|
def get_user_list(self):
|
|
|
|
+ """
|
|
|
|
+ 获取用户列表
|
|
|
|
+ :return:
|
|
|
|
+ """
|
|
select_user_sql = f"""select name_id from accounts where platform = "{self.platform}" and useful = 1"""
|
|
select_user_sql = f"""select name_id from accounts where platform = "{self.platform}" and useful = 1"""
|
|
- out_user_info = MysqlHelper.get_values(
|
|
|
|
- log_type=self.mode,
|
|
|
|
- crawler=self.platform,
|
|
|
|
|
|
+ out_user_info = self.mysql.select(
|
|
sql=select_user_sql,
|
|
sql=select_user_sql,
|
|
- env=self.env,
|
|
|
|
- machine="",
|
|
|
|
)
|
|
)
|
|
if out_user_info:
|
|
if out_user_info:
|
|
result = []
|
|
result = []
|
|
@@ -337,11 +303,8 @@ class ZhuJinShanJinMeiRecommend(object):
|
|
while page_index <= total_page:
|
|
while page_index <= total_page:
|
|
try:
|
|
try:
|
|
if self.limit_flag:
|
|
if self.limit_flag:
|
|
- AliyunLogger.logging(
|
|
|
|
|
|
+ self.aliyun_log.logging(
|
|
code="2000",
|
|
code="2000",
|
|
- platform=self.platform,
|
|
|
|
- mode=self.mode,
|
|
|
|
- env=self.env,
|
|
|
|
message="本轮已经抓取足够数量的视频"
|
|
message="本轮已经抓取足够数量的视频"
|
|
)
|
|
)
|
|
return
|
|
return
|
|
@@ -363,30 +326,21 @@ class ZhuJinShanJinMeiRecommend(object):
|
|
page_index = result['list']['current'] + 1
|
|
page_index = result['list']['current'] + 1
|
|
for index, video_obj in enumerate(result['list']['records'], 1):
|
|
for index, video_obj in enumerate(result['list']['records'], 1):
|
|
try:
|
|
try:
|
|
- AliyunLogger.logging(
|
|
|
|
|
|
+ self.aliyun_log.logging(
|
|
code="1001",
|
|
code="1001",
|
|
- platform=self.platform,
|
|
|
|
- mode=self.mode,
|
|
|
|
- env=self.env,
|
|
|
|
message="扫描到一条视频",
|
|
message="扫描到一条视频",
|
|
data=video_obj
|
|
data=video_obj
|
|
)
|
|
)
|
|
self.process_video_obj(video_obj)
|
|
self.process_video_obj(video_obj)
|
|
except Exception as e:
|
|
except Exception as e:
|
|
- AliyunLogger.logging(
|
|
|
|
|
|
+ self.aliyun_log.logging(
|
|
code="3000",
|
|
code="3000",
|
|
- platform=self.platform,
|
|
|
|
- mode=self.mode,
|
|
|
|
- env=self.env,
|
|
|
|
message="抓取单条视频失败, 该视频位于第{}页第{}条报错原因是{}".format(page_index, index,
|
|
message="抓取单条视频失败, 该视频位于第{}页第{}条报错原因是{}".format(page_index, index,
|
|
e)
|
|
e)
|
|
)
|
|
)
|
|
except Exception as e:
|
|
except Exception as e:
|
|
- AliyunLogger.logging(
|
|
|
|
|
|
+ self.aliyun_log.logging(
|
|
code="3000",
|
|
code="3000",
|
|
- platform=self.platform,
|
|
|
|
- mode=self.mode,
|
|
|
|
- env=self.env,
|
|
|
|
message="抓取第{}页的时候失败, 报错原因是{}".format(page_index, e)
|
|
message="抓取第{}页的时候失败, 报错原因是{}".format(page_index, e)
|
|
)
|
|
)
|
|
time.sleep(random.randint(5, 10))
|
|
time.sleep(random.randint(5, 10))
|
|
@@ -409,24 +363,15 @@ class ZhuJinShanJinMeiRecommend(object):
|
|
for index, user_id in enumerate(user_list):
|
|
for index, user_id in enumerate(user_list):
|
|
try:
|
|
try:
|
|
if self.limit_flag:
|
|
if self.limit_flag:
|
|
- AliyunLogger.logging(
|
|
|
|
|
|
+ self.aliyun_log.logging(
|
|
code="2000",
|
|
code="2000",
|
|
- platform=self.platform,
|
|
|
|
- mode=self.mode,
|
|
|
|
- env=self.env,
|
|
|
|
message="本轮已经抓取足够数量的视频"
|
|
message="本轮已经抓取足够数量的视频"
|
|
)
|
|
)
|
|
return
|
|
return
|
|
self.get_user_videos(user_id=user_id)
|
|
self.get_user_videos(user_id=user_id)
|
|
except Exception as e:
|
|
except Exception as e:
|
|
- AliyunLogger.logging(
|
|
|
|
|
|
+ self.aliyun_log.logging(
|
|
code="3000",
|
|
code="3000",
|
|
- platform=self.platform,
|
|
|
|
- mode=self.mode,
|
|
|
|
- env=self.env,
|
|
|
|
message="抓取账号视频出现异常,账号 id 是{}, 报错原因是{}".format(user_id, e)
|
|
message="抓取账号视频出现异常,账号 id 是{}, 报错原因是{}".format(user_id, e)
|
|
)
|
|
)
|
|
|
|
|
|
-
|
|
|
|
-if __name__ == '__main__':
|
|
|
|
- pass
|
|
|