|
@@ -75,10 +75,8 @@ class WeixinVideoCrawler(object):
|
|
|
latest_crawler_timestamp = account_obj["latest_crawler_timestamp"]
|
|
|
if latest_crawler_timestamp is None:
|
|
|
latest_crawler_timestamp = const.DEFAULT_TIMESTAMP
|
|
|
- print(gh_id, account_name, latest_crawler_timestamp)
|
|
|
# 调用爬虫接口
|
|
|
response = spider.update_msg_list(gh_id, index=cursor)
|
|
|
- print(json.dumps(response, ensure_ascii=False, indent=4))
|
|
|
if response['code'] == const.REQUEST_SUCCESS:
|
|
|
# 一般返回最近10天的msg_list
|
|
|
msg_list = response.get('data', {}).get("data", [])
|
|
@@ -103,11 +101,11 @@ class WeixinVideoCrawler(object):
|
|
|
:return:
|
|
|
"""
|
|
|
select_sql = f"""
|
|
|
- SELECT count(1)
|
|
|
+ SELECT id
|
|
|
FROM publish_single_video_source
|
|
|
WHERE url_unique_md5 = '{url_unique}';
|
|
|
"""
|
|
|
- response = self.db_client.select_json(select_sql)
|
|
|
+ response = self.db_client.select(select_sql)
|
|
|
if response:
|
|
|
return True
|
|
|
else:
|
|
@@ -126,13 +124,12 @@ class WeixinVideoCrawler(object):
|
|
|
publish_type = info.get("AppMsg", {}).get("BaseInfo", {}).get("Type", None)
|
|
|
detail_article_list = info.get("AppMsg", {}).get("DetailInfo", [])
|
|
|
if detail_article_list:
|
|
|
- for article in detail_article_list:
|
|
|
+ for article in tqdm(detail_article_list, desc="crawler_in_msg_list"):
|
|
|
article_url = article.get("ContentUrl", None)
|
|
|
url_unique = functions.generateGzhId(article_url)
|
|
|
# 判断该视频链接是否下载,若已经下载则直接跳过
|
|
|
if self.is_downloaded(url_unique):
|
|
|
continue
|
|
|
-
|
|
|
try:
|
|
|
download_path = functions.download_gzh_video(article_url)
|
|
|
if download_path:
|
|
@@ -177,7 +174,6 @@ class WeixinVideoCrawler(object):
|
|
|
data={"account_name": account_name, "url": article_url}
|
|
|
)
|
|
|
except Exception as e:
|
|
|
- print(str(e))
|
|
|
try:
|
|
|
update_sql = f"""
|
|
|
UPDATE publish_single_video_source
|
|
@@ -217,7 +213,7 @@ class WeixinVideoCrawler(object):
|
|
|
:return:
|
|
|
"""
|
|
|
account_list = self.get_crawler_accounts()
|
|
|
- for account_obj in tqdm(account_list[3:4], desc="crawler_video_for_each_account"):
|
|
|
+ for account_obj in tqdm(account_list, desc="crawler_video_for_each_account"):
|
|
|
self.crawler_article_video_list(account_obj)
|
|
|
self.update_account_latest_crawler_timestamp(gh_id=account_obj["gh_id"])
|
|
|
time.sleep(const.SLEEP_SECONDS)
|
|
@@ -243,9 +239,9 @@ class WeixinVideoCrawler(object):
|
|
|
执行任务
|
|
|
:return:
|
|
|
"""
|
|
|
- # start_timestamp = int(time.time())
|
|
|
+ start_timestamp = int(time.time())
|
|
|
self.crawler_task()
|
|
|
- # self.mention(start_timestamp)
|
|
|
+ self.mention(start_timestamp)
|
|
|
|
|
|
|
|
|
|