|
@@ -1,11 +1,12 @@
|
|
# -*- coding: utf-8 -*-
|
|
# -*- coding: utf-8 -*-
|
|
-# @Time: 2024/05/08
|
|
|
|
|
|
+# @Time: 2023/11/17
|
|
import json
|
|
import json
|
|
import os
|
|
import os
|
|
import random
|
|
import random
|
|
import sys
|
|
import sys
|
|
import time
|
|
import time
|
|
import uuid
|
|
import uuid
|
|
|
|
+import subprocess
|
|
from datetime import datetime, timedelta
|
|
from datetime import datetime, timedelta
|
|
from hashlib import md5
|
|
from hashlib import md5
|
|
|
|
|
|
@@ -23,9 +24,7 @@ from common.common import Common
|
|
from common.mq import MQ
|
|
from common.mq import MQ
|
|
from common.scheduling_db import MysqlHelper
|
|
from common.scheduling_db import MysqlHelper
|
|
|
|
|
|
-"""
|
|
|
|
-祝福年糕圈
|
|
|
|
-"""
|
|
|
|
|
|
+
|
|
class ZFQZRecommendNew:
|
|
class ZFQZRecommendNew:
|
|
env = None
|
|
env = None
|
|
driver = None
|
|
driver = None
|
|
@@ -44,7 +43,7 @@ class ZFQZRecommendNew:
|
|
self.rule_dict = rule_dict
|
|
self.rule_dict = rule_dict
|
|
self.our_uid = our_uid
|
|
self.our_uid = our_uid
|
|
if self.env == "dev":
|
|
if self.env == "dev":
|
|
- chromedriverExecutable = "/Users/tzld/Downloads/chromedriver_mac64/chromedriver"
|
|
|
|
|
|
+ chromedriverExecutable = "/Users/piaoquan/Downloads/chromedriver"
|
|
else:
|
|
else:
|
|
chromedriverExecutable = "/Users/piaoquan/Downloads/chromedriver-mac-x64/chromedriver"
|
|
chromedriverExecutable = "/Users/piaoquan/Downloads/chromedriver-mac-x64/chromedriver"
|
|
|
|
|
|
@@ -109,7 +108,7 @@ class ZFQZRecommendNew:
|
|
)
|
|
)
|
|
size = self.driver.get_window_size()
|
|
size = self.driver.get_window_size()
|
|
self.driver.swipe(int(size['width'] * 0.5), int(size['height'] * 0.8),
|
|
self.driver.swipe(int(size['width'] * 0.5), int(size['height'] * 0.8),
|
|
- int(size['width'] * 0.5), int(size['height'] * 0.2), 200)
|
|
|
|
|
|
+ int(size['width'] * 0.5), int(size['height'] * 0.2), 100)
|
|
else:
|
|
else:
|
|
pass
|
|
pass
|
|
except Exception as e:
|
|
except Exception as e:
|
|
@@ -126,16 +125,19 @@ class ZFQZRecommendNew:
|
|
size = self.driver.get_window_size()
|
|
size = self.driver.get_window_size()
|
|
self.driver.swipe(int(size['width'] * 0.5), int(size['height'] * 0.2),
|
|
self.driver.swipe(int(size['width'] * 0.5), int(size['height'] * 0.2),
|
|
int(size['width'] * 0.5), int(size['height'] * 0.8), 200)
|
|
int(size['width'] * 0.5), int(size['height'] * 0.8), 200)
|
|
|
|
+ command = 'adb shell service call statusbar 2'
|
|
|
|
+ process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
|
|
|
|
+ process.communicate()
|
|
|
|
+
|
|
time.sleep(1)
|
|
time.sleep(1)
|
|
- time.sleep(1)
|
|
|
|
- Common.logger(log_type, crawler).info('打开小程序"祝福年糕圈"')
|
|
|
|
- self.driver.find_elements(By.XPATH, '//*[@text=开心年糕圈"]')[-1].click()
|
|
|
|
|
|
+ Common.logger(log_type, crawler).info('打开小程序"祝福圈子"')
|
|
|
|
+ self.driver.find_elements(By.XPATH, '//*[@text="祝福圈子"]')[-1].click()
|
|
AliyunLogger.logging(
|
|
AliyunLogger.logging(
|
|
code="1000",
|
|
code="1000",
|
|
platform=self.platform,
|
|
platform=self.platform,
|
|
mode=log_type,
|
|
mode=log_type,
|
|
env=env,
|
|
env=env,
|
|
- message='打开小程序"祝福年糕圈"成功'
|
|
|
|
|
|
+ message='打开小程序"祝福圈子"成功'
|
|
)
|
|
)
|
|
time.sleep(5)
|
|
time.sleep(5)
|
|
self.get_videoList()
|
|
self.get_videoList()
|
|
@@ -167,7 +169,7 @@ class ZFQZRecommendNew:
|
|
|
|
|
|
|
|
|
|
def swipe_up(self):
|
|
def swipe_up(self):
|
|
- self.search_elements('//*[@class="expose--adapt-parent"]')
|
|
|
|
|
|
+ self.search_elements('//*[@class="bless--list"]')
|
|
size = self.driver.get_window_size()
|
|
size = self.driver.get_window_size()
|
|
self.driver.swipe(int(size["width"] * 0.5), int(size["height"] * 0.8),
|
|
self.driver.swipe(int(size["width"] * 0.5), int(size["height"] * 0.8),
|
|
int(size["width"] * 0.5), int(size["height"] * 0.442), 200)
|
|
int(size["width"] * 0.5), int(size["height"] * 0.442), 200)
|
|
@@ -186,16 +188,16 @@ class ZFQZRecommendNew:
|
|
self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
|
|
self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
|
|
self.driver.implicitly_wait(20)
|
|
self.driver.implicitly_wait(20)
|
|
|
|
|
|
- self.check_to_applet(xpath='//*[@class="expose--adapt-parent"]')
|
|
|
|
|
|
+ self.check_to_applet(xpath='//*[@class="tags--tag tags--tag-0 tags--checked"]')
|
|
time.sleep(1)
|
|
time.sleep(1)
|
|
- # name = ["推荐", "春节"]
|
|
|
|
- # selected_text = random.choice(name)
|
|
|
|
- # try:
|
|
|
|
- # self.driver.find_element(By.XPATH, f"//wx-button[contains(., '{selected_text}')]").click()
|
|
|
|
- # time.sleep(2)
|
|
|
|
- # except NoSuchElementException:
|
|
|
|
- # Common.logger(self.log_type, self.crawler).info(f"没有该tab:{selected_text}\n")
|
|
|
|
- # pass
|
|
|
|
|
|
+ name = ["推荐"]
|
|
|
|
+ selected_text = random.choice(name)
|
|
|
|
+ try:
|
|
|
|
+ self.driver.find_element(By.XPATH, f"//wx-button[contains(., '{selected_text}')]").click()
|
|
|
|
+ time.sleep(2)
|
|
|
|
+ except NoSuchElementException:
|
|
|
|
+ Common.logger(self.log_type, self.crawler).info(f"没有该tab:{selected_text}\n")
|
|
|
|
+ pass
|
|
print("开始获取视频信息")
|
|
print("开始获取视频信息")
|
|
for i in range(20):
|
|
for i in range(20):
|
|
print("下滑{}次".format(i))
|
|
print("下滑{}次".format(i))
|
|
@@ -207,6 +209,7 @@ class ZFQZRecommendNew:
|
|
return
|
|
return
|
|
|
|
|
|
print("下滑完成")
|
|
print("下滑完成")
|
|
|
|
+ # time.sleep(100)
|
|
Common.logger(self.log_type, self.crawler).info("已抓取完一组,休眠 5 秒\n")
|
|
Common.logger(self.log_type, self.crawler).info("已抓取完一组,休眠 5 秒\n")
|
|
AliyunLogger.logging(
|
|
AliyunLogger.logging(
|
|
code="1000",
|
|
code="1000",
|
|
@@ -291,28 +294,28 @@ class ZFQZRecommendNew:
|
|
Common.logger(self.log_type, self.crawler).info(f"第{self.count}条视频")
|
|
Common.logger(self.log_type, self.crawler).info(f"第{self.count}条视频")
|
|
# 获取 trace_id, 并且把该 id 当做视频生命周期唯一索引
|
|
# 获取 trace_id, 并且把该 id 当做视频生命周期唯一索引
|
|
trace_id = self.crawler + str(uuid.uuid1())
|
|
trace_id = self.crawler + str(uuid.uuid1())
|
|
- video_title = video_element.find("wx-view", class_="dynamic--title-container").text
|
|
|
|
|
|
+ video_title = video_element.find("wx-view", class_="dynamic--title").text
|
|
play_str = video_element.find("wx-view", class_="dynamic--views").text
|
|
play_str = video_element.find("wx-view", class_="dynamic--views").text
|
|
- like_str = video_element.findAll("wx-view", class_="dynamic--commerce-btn-text")[0].text
|
|
|
|
- comment_str = video_element.findAll("wx-view", class_="dynamic--commerce-btn-text")[1].text
|
|
|
|
|
|
+ # like_str = video_element.findAll("wx-view", class_="dynamic--commerce-btn-text")[0].text
|
|
|
|
+ # comment_str = video_element.findAll("wx-view", class_="dynamic--commerce-btn-text")[1].text
|
|
duration_str = video_element.find("wx-view", class_="dynamic--duration").text
|
|
duration_str = video_element.find("wx-view", class_="dynamic--duration").text
|
|
user_name = video_element.find("wx-view", class_="dynamic--nick-top").text
|
|
user_name = video_element.find("wx-view", class_="dynamic--nick-top").text
|
|
avatar_url = video_element.find("wx-image", class_="avatar--avatar")["src"]
|
|
avatar_url = video_element.find("wx-image", class_="avatar--avatar")["src"]
|
|
cover_url = video_element.find("wx-image", class_="dynamic--bg-image")["src"]
|
|
cover_url = video_element.find("wx-image", class_="dynamic--bg-image")["src"]
|
|
play_cnt = int(play_str.replace("+", "").replace("次播放", ""))
|
|
play_cnt = int(play_str.replace("+", "").replace("次播放", ""))
|
|
duration = int(duration_str.split(":")[0].strip()) * 60 + int(duration_str.split(":")[-1].strip())
|
|
duration = int(duration_str.split(":")[0].strip()) * 60 + int(duration_str.split(":")[-1].strip())
|
|
- if "点赞" in like_str:
|
|
|
|
- like_cnt = 0
|
|
|
|
- elif "万" in like_str:
|
|
|
|
- like_cnt = int(like_str.split("万")[0]) * 10000
|
|
|
|
- else:
|
|
|
|
- like_cnt = int(like_str)
|
|
|
|
- if "评论" in comment_str:
|
|
|
|
- comment_cnt = 0
|
|
|
|
- elif "万" in comment_str:
|
|
|
|
- comment_cnt = int(comment_str.split("万")[0]) * 10000
|
|
|
|
- else:
|
|
|
|
- comment_cnt = int(comment_str)
|
|
|
|
|
|
+ # if "点赞" in like_str:
|
|
|
|
+ # like_cnt = 0
|
|
|
|
+ # elif "万" in like_str:
|
|
|
|
+ # like_cnt = int(like_str.split("万")[0]) * 10000
|
|
|
|
+ # else:
|
|
|
|
+ # like_cnt = int(like_str)
|
|
|
|
+ # if "评论" in comment_str:
|
|
|
|
+ # comment_cnt = 0
|
|
|
|
+ # elif "万" in comment_str:
|
|
|
|
+ # comment_cnt = int(comment_str.split("万")[0]) * 10000
|
|
|
|
+ # else:
|
|
|
|
+ # comment_cnt = int(comment_str)
|
|
out_video_id = md5(video_title.encode('utf8')).hexdigest()
|
|
out_video_id = md5(video_title.encode('utf8')).hexdigest()
|
|
out_user_id = md5(user_name.encode('utf8')).hexdigest()
|
|
out_user_id = md5(user_name.encode('utf8')).hexdigest()
|
|
repeat_id = self.repeat_video(out_video_id)
|
|
repeat_id = self.repeat_video(out_video_id)
|
|
@@ -328,9 +331,9 @@ class ZFQZRecommendNew:
|
|
"play_str": play_str,
|
|
"play_str": play_str,
|
|
"play_cnt": play_cnt,
|
|
"play_cnt": play_cnt,
|
|
"like_str": "",
|
|
"like_str": "",
|
|
- "like_cnt": like_cnt,
|
|
|
|
- "comment_cnt": comment_cnt,
|
|
|
|
- "share_cnt": 0,
|
|
|
|
|
|
+ "like_cnt": 50,
|
|
|
|
+ "comment_cnt": 0,
|
|
|
|
+ "share_cnt": 50,
|
|
"user_name": user_name,
|
|
"user_name": user_name,
|
|
"user_id": out_user_id,
|
|
"user_id": out_user_id,
|
|
'publish_time_stamp': int(time.time()),
|
|
'publish_time_stamp': int(time.time()),
|
|
@@ -406,6 +409,6 @@ if __name__ == "__main__":
|
|
rule_dict1 = {"period": {"min": 0, "max": 0},
|
|
rule_dict1 = {"period": {"min": 0, "max": 0},
|
|
"duration": {"min": 1, "max": 0},
|
|
"duration": {"min": 1, "max": 0},
|
|
"favorite_cnt": {"min": 0, "max": 0},
|
|
"favorite_cnt": {"min": 0, "max": 0},
|
|
- "videos_cnt": {"min": 1000, "max": 0},
|
|
|
|
|
|
+ "videos_cnt": {"min": 0, "max": 0},
|
|
"share_cnt": {"min": 0, "max": 0}}
|
|
"share_cnt": {"min": 0, "max": 0}}
|
|
ZFQZRecommendNew("recommend", "zhufuquanzi", "dev", rule_dict1, 6267141)
|
|
ZFQZRecommendNew("recommend", "zhufuquanzi", "dev", rule_dict1, 6267141)
|