|
@@ -15,6 +15,7 @@ from appium.webdriver.webdriver import WebDriver
|
|
|
from selenium.common import NoSuchElementException
|
|
|
from selenium.webdriver.common.by import By
|
|
|
sys.path.append(os.getcwd())
|
|
|
+from common.mq import MQ
|
|
|
from common.feishu import Feishu
|
|
|
from common.publish import Publish
|
|
|
from common.common import Common
|
|
@@ -260,18 +261,20 @@ class ShipinhaoSearch:
|
|
|
|
|
|
@classmethod
|
|
|
def repeat_out_video_id(cls, log_type, crawler, out_video_id, env):
|
|
|
- sql = f""" select * from crawler_video where platform="{cls.platform}" and out_video_id="{out_video_id}"; """
|
|
|
+ # sql = f""" select * from crawler_video where platform="{cls.platform}" and out_video_id="{out_video_id}"; """
|
|
|
+ sql = f""" select * from crawler_video where platform in ("{crawler}","{cls.platform}") and out_video_id="{out_video_id}"; """
|
|
|
repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
|
|
|
return len(repeat_video)
|
|
|
|
|
|
@classmethod
|
|
|
def repeat_video_url(cls, log_type, crawler, video_url, env):
|
|
|
- sql = f""" select * from crawler_video where platform="{cls.platform}" and video_url="{video_url}"; """
|
|
|
+ # sql = f""" select * from crawler_video where platform="{cls.platform}" and video_url="{video_url}"; """
|
|
|
+ sql = f""" select * from crawler_video where platform in ("{crawler}","{cls.platform}") and video_url="{video_url}"; """
|
|
|
repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
|
|
|
return len(repeat_video)
|
|
|
|
|
|
@classmethod
|
|
|
- def download_publish(cls, log_type, crawler, word, video_dict, our_uid, env):
|
|
|
+ def download_publish(cls, log_type, crawler, word, video_dict, rule_dict, our_uid, env):
|
|
|
# 下载视频
|
|
|
Common.download_method(log_type=log_type, crawler=crawler, text="video", title=video_dict["video_title"], url=video_dict["video_url"])
|
|
|
|
|
@@ -326,7 +329,6 @@ class ShipinhaoSearch:
|
|
|
except FileNotFoundError:
|
|
|
return
|
|
|
|
|
|
- rule_dict = cls.rule_dict(log_type, crawler)
|
|
|
insert_sql = f""" insert into crawler_video(video_id,
|
|
|
out_user_id,
|
|
|
platform,
|
|
@@ -542,9 +544,9 @@ class ShipinhaoSearch:
|
|
|
|
|
|
return our_user_list
|
|
|
|
|
|
-
|
|
|
@classmethod
|
|
|
def search_video(cls, log_type, crawler, word, driver: WebDriver, our_uid, env):
|
|
|
+ mq = MQ(topic_name="topic_crawler_etl_" + env)
|
|
|
# 点击微信搜索框,并输入搜索词
|
|
|
driver.implicitly_wait(10)
|
|
|
Common.logger(log_type, crawler).info("点击搜索框")
|
|
@@ -641,6 +643,7 @@ class ShipinhaoSearch:
|
|
|
"video_id": out_video_id,
|
|
|
"play_cnt": 0,
|
|
|
"duration": duration,
|
|
|
+ # "duration": 60,
|
|
|
"user_name": user_name,
|
|
|
"user_id": out_user_id,
|
|
|
"avatar_url": avatar_url,
|
|
@@ -668,7 +671,7 @@ class ShipinhaoSearch:
|
|
|
video_dict["share_cnt"] = video_info_dict["share_cnt"]
|
|
|
video_dict["favorite_cnt"] = video_info_dict["favorite_cnt"]
|
|
|
video_dict["comment_cnt"] = video_info_dict["comment_cnt"]
|
|
|
- video_dict["publish_time_str"] = video_info_dict["publish_time_str"]
|
|
|
+ video_dict["publish_time_str"] = video_info_dict["publish_time_str"] + " 00:00:00"
|
|
|
video_dict["publish_time_stamp"] = video_info_dict["publish_time_stamp"]
|
|
|
Common.logger(log_type, crawler).info(f'publish_time:{video_dict["publish_time_str"]}')
|
|
|
Common.logging(log_type, crawler, env, f'publish_time:{video_dict["publish_time_str"]}')
|
|
@@ -676,12 +679,26 @@ class ShipinhaoSearch:
|
|
|
Common.logger(log_type, crawler).info("不满足抓取规则\n")
|
|
|
Common.logging(log_type, crawler, env, "不满足抓取规则\n")
|
|
|
else:
|
|
|
- cls.download_publish(log_type=log_type,
|
|
|
- crawler=crawler,
|
|
|
- word=word,
|
|
|
- video_dict=video_dict,
|
|
|
- our_uid=our_uid,
|
|
|
- env=env)
|
|
|
+ rule_dict = cls.rule_dict(log_type, crawler)
|
|
|
+ video_dict["out_user_id"] = video_dict["user_id"]
|
|
|
+ video_dict["platform"] = crawler
|
|
|
+ video_dict["strategy"] = log_type
|
|
|
+ video_dict["out_video_id"] = video_dict["video_id"]
|
|
|
+ video_dict["width"] = 0
|
|
|
+ video_dict["height"] = 0
|
|
|
+ video_dict["crawler_rule"] = json.dumps(rule_dict)
|
|
|
+ video_dict["user_id"] = our_uid
|
|
|
+ video_dict["publish_time"] = video_dict["publish_time_str"]
|
|
|
+ mq.send_msg(video_dict)
|
|
|
+ cls.download_cnt += 1
|
|
|
+ # cls.download_publish(log_type=log_type,
|
|
|
+ # crawler=crawler,
|
|
|
+ # word=word,
|
|
|
+ # video_dict=video_dict,
|
|
|
+ # rule_dict=rule_dict,
|
|
|
+ # our_uid=our_uid,
|
|
|
+ # env=env)
|
|
|
+
|
|
|
except Exception as e:
|
|
|
Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
|
|
|
Common.logging(log_type, crawler, env, f"抓取单条视频异常:{e}\n")
|