2 年之前 · e598c8b453
--- a/shipinhao/shipinhao_main/run_shipinhao_search.py
+++ b/shipinhao/shipinhao_main/run_shipinhao_search.py
@@ -11,9 +11,11 @@ from shipinhao.shipinhao_search.shipinhao_search import ShipinhaoSearch
 
				 
			
 
				 def main(log_type, crawler, env):
			
 
				     Common.logger(log_type, crawler).info('开始抓取 视频号 搜索策略\n')
			
 
				+    Common.logging(log_type, crawler, env, '开始抓取 视频号 搜索策略\n')
			
 
				     ShipinhaoSearch.get_search_videos(log_type, crawler, env)
			
 
				     Common.del_logs(log_type, crawler)
			
 
				     Common.logger(log_type, crawler).info('抓取完一轮\n')
			
 
				+    Common.logging(log_type, crawler, env, '抓取完一轮\n')
			
 
				 
			
 
				 
			
 
				 if __name__ == "__main__":
			
--- a/shipinhao/shipinhao_search/shipinhao_search.py
+++ b/shipinhao/shipinhao_search/shipinhao_search.py
@@ -164,6 +164,7 @@ class ShipinhaoSearch:
 
				     @classmethod
			
 
				     def start_wechat(cls, log_type, crawler, word, our_uid, env):
			
 
				         Common.logger(log_type, crawler).info('启动微信')
			
 
				+        Common.logging(log_type, crawler, env, '启动微信')
			
 
				         if env == "dev":
			
 
				             chromedriverExecutable = "/Users/wangkun/Downloads/chromedriver/chromedriver_v107/chromedriver"
			
 
				         else:
			
@@ -207,12 +208,14 @@ class ShipinhaoSearch:
 
				                          env=env)
			
 
				         cls.close_wechat(log_type=log_type,
			
 
				                          crawler=crawler,
			
 
				+                         env=env,
			
 
				                          driver=driver)
			
 
				 
			
 
				     @classmethod
			
 
				-    def close_wechat(cls, log_type, crawler, driver: WebDriver):
			
 
				+    def close_wechat(cls, log_type, crawler, env, driver: WebDriver):
			
 
				         driver.quit()
			
 
				         Common.logger(log_type, crawler).info(f"微信退出成功\n")
			
 
				+        Common.logging(log_type, crawler, env, f"微信退出成功\n")
			
 
				 
			
 
				     @classmethod
			
 
				     def is_contain_chinese(cls, strword):
			
@@ -274,6 +277,7 @@ class ShipinhaoSearch:
 
				             md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
			
 
				             shutil.rmtree(f"./{crawler}/videos/{md_title}/")
			
 
				             Common.logger(log_type, crawler).info("视频size=0，删除成功\n")
			
 
				+            Common.logging(log_type, crawler, env, "视频size=0，删除成功\n")
			
 
				             return
			
 
				         video_dict["video_width"] = ffmpeg_dict["width"]
			
 
				         video_dict["video_height"] = ffmpeg_dict["height"]
			
@@ -283,6 +287,7 @@ class ShipinhaoSearch:
 
				             md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
			
 
				             shutil.rmtree(f"./{crawler}/videos/{md_title}/")
			
 
				             Common.logger(log_type, crawler).info("宽高不满足抓取规则，删除成功\n")
			
 
				+            Common.logging(log_type, crawler, env, "宽高不满足抓取规则，删除成功\n")
			
 
				             return
			
 
				 
			
 
				         # 下载封面
			
@@ -292,6 +297,7 @@ class ShipinhaoSearch:
 
				 
			
 
				         # 上传视频
			
 
				         Common.logger(log_type, crawler).info("开始上传视频...")
			
 
				+        Common.logging(log_type, crawler, env, "开始上传视频...")
			
 
				         our_video_id = Publish.upload_and_publish(log_type=log_type,
			
 
				                                                   crawler=crawler,
			
 
				                                                   strategy="搜索爬虫策略",
			
@@ -303,6 +309,7 @@ class ShipinhaoSearch:
 
				         else:
			
 
				             our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
			
 
				         Common.logger(log_type, crawler).info("视频上传完成")
			
 
				+        Common.logging(log_type, crawler, env, "视频上传完成")
			
 
				 
			
 
				         if our_video_id is None:
			
 
				             try:
			
@@ -310,6 +317,7 @@ class ShipinhaoSearch:
 
				                 md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
			
 
				                 shutil.rmtree(f"./{crawler}/videos/{md_title}")
			
 
				                 Common.logger(log_type, crawler).warning(f"our_video_id:{our_video_id}, 删除成功\n")
			
 
				+                Common.logging(log_type, crawler, env, f"our_video_id:{our_video_id}, 删除成功\n")
			
 
				                 return
			
 
				             except FileNotFoundError:
			
 
				                 return
			
@@ -344,8 +352,10 @@ class ShipinhaoSearch:
 
				                                                 {int(video_dict['video_width'])},
			
 
				                                                 {int(video_dict['video_height'])}) """
			
 
				         Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
			
 
				+        Common.logging(log_type, crawler, env, f"insert_sql:{insert_sql}")
			
 
				         MysqlHelper.update_values(log_type, crawler, insert_sql, env)
			
 
				         Common.logger(log_type, crawler).info('视频信息插入数据库成功！')
			
 
				+        Common.logging(log_type, crawler, env, '视频信息插入数据库成功！')
			
 
				 
			
 
				         # 写飞书
			
 
				         Feishu.insert_columns(log_type, crawler, "xYWCzf", "ROWS", 1, 2)
			
@@ -368,6 +378,7 @@ class ShipinhaoSearch:
 
				                    video_dict["video_url"]]]
			
 
				         Feishu.update_values(log_type, crawler, "xYWCzf", "F2:Z2", values)
			
 
				         Common.logger(log_type, crawler).info("写入飞书成功\n")
			
 
				+        Common.logging(log_type, crawler, env, "写入飞书成功\n")
			
 
				         cls.download_cnt += 1
			
 
				 
			
 
				     @classmethod
			
@@ -483,6 +494,7 @@ class ShipinhaoSearch:
 
				             user_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
			
 
				             if user_sheet is None:
			
 
				                 Common.logger(log_type, crawler).warning(f"user_sheet:{user_sheet}, 3秒钟后重试")
			
 
				+                Common.logging(log_type, crawler, env, f"user_sheet:{user_sheet}, 3秒钟后重试")
			
 
				                 time.sleep(3)
			
 
				                 continue
			
 
				             our_user_list = []
			
@@ -496,6 +508,7 @@ class ShipinhaoSearch:
 
				                 tag4 = user_sheet[i][11]
			
 
				                 tag5 = user_sheet[i][12]
			
 
				                 Common.logger(log_type, crawler).info(f"正在更新 {search_word} 搜索词信息")
			
 
				+                Common.logging(log_type, crawler, env, f"正在更新 {search_word} 搜索词信息")
			
 
				                 if our_uid is None:
			
 
				                     default_user = getUser.get_default_user()
			
 
				                     # 用来创建our_id的信息
			
@@ -514,6 +527,7 @@ class ShipinhaoSearch:
 
				                     Feishu.update_values(log_type, crawler, sheetid, f'G{i + 1}:H{i + 1}',
			
 
				                                          [[our_uid, our_user_link]])
			
 
				                     Common.logger(log_type, crawler).info(f'站内用户主页创建成功:{our_user_link}\n')
			
 
				+                    Common.logging(log_type, crawler, env, f'站内用户主页创建成功:{our_user_link}\n')
			
 
				                 our_user_dict = {
			
 
				                     'out_uid': '',
			
 
				                     'search_word': search_word,
			
@@ -530,11 +544,13 @@ class ShipinhaoSearch:
 
				         # 点击微信搜索框，并输入搜索词
			
 
				         driver.implicitly_wait(10)
			
 
				         Common.logger(log_type, crawler).info("点击搜索框")
			
 
				+        Common.logging(log_type, crawler, env, "点击搜索框")
			
 
				         driver.find_element(By.ID, 'com.tencent.mm:id/j5t').click()
			
 
				         time.sleep(0.5)
			
 
				         driver.find_element(By.ID, 'com.tencent.mm:id/cd7').clear().send_keys(word)
			
 
				         driver.press_keycode(AndroidKey.ENTER)
			
 
				         Common.logger(log_type, crawler).info("进入搜索词页面")
			
 
				+        Common.logging(log_type, crawler, env, "进入搜索词页面")
			
 
				         driver.find_elements(By.ID, 'com.tencent.mm:id/br8')[0].click()
			
 
				         time.sleep(5)
			
 
				 
			
@@ -542,12 +558,14 @@ class ShipinhaoSearch:
 
				         check_to_webview = cls.check_to_webview(log_type, crawler, driver)
			
 
				         if check_to_webview is None:
			
 
				             Common.logger(log_type, crawler).info("切换到视频号 webview 失败\n")
			
 
				+            Common.logging(log_type, crawler, env, "切换到视频号 webview 失败\n")
			
 
				             return
			
 
				         time.sleep(1)
			
 
				 
			
 
				         # 切换到"视频号"分类
			
 
				         shipinhao_tags = cls.search_elements(driver, '//div[@class="unit"]/*[2]')
			
 
				         Common.logger(log_type, crawler).info('点击"视频号"分类')
			
 
				+        Common.logging(log_type, crawler, env, '点击"视频号"分类')
			
 
				         shipinhao_tags[0].click()
			
 
				         time.sleep(5)
			
 
				 
			
@@ -555,40 +573,49 @@ class ShipinhaoSearch:
 
				         while True:
			
 
				             if cls.search_elements(driver, '//*[@class="mixed-box__bd"]') is None:
			
 
				                 Common.logger(log_type, crawler).info('窗口已销毁\n')
			
 
				+                Common.logging(log_type, crawler, env, '窗口已销毁\n')
			
 
				                 return
			
 
				 
			
 
				             Common.logger(log_type, crawler).info('获取视频列表\n')
			
 
				+            Common.logging(log_type, crawler, env, '获取视频列表\n')
			
 
				             video_elements = cls.search_elements(driver, '//div[@class="rich-media active__absolute"]')
			
 
				             if video_elements is None:
			
 
				                 Common.logger(log_type, crawler).warning(f'video_elements:{video_elements}')
			
 
				+                Common.logging(log_type, crawler, env, f'video_elements:{video_elements}')
			
 
				                 return
			
 
				 
			
 
				             video_element_temp = video_elements[index:]
			
 
				             if len(video_element_temp) == 0:
			
 
				                 Common.logger(log_type, crawler).info('到底啦~~~~~~~~~~~~~\n')
			
 
				+                Common.logging(log_type, crawler, env, '到底啦~~~~~~~~~~~~~\n')
			
 
				                 return
			
 
				 
			
 
				             for i, video_element in enumerate(video_element_temp):
			
 
				                 try:
			
 
				                     Common.logger(log_type, crawler).info(f"download_cnt:{cls.download_cnt}")
			
 
				+                    Common.logging(log_type, crawler, env, f"download_cnt:{cls.download_cnt}")
			
 
				                     if cls.download_cnt >= cls.videos_cnt(log_type, crawler):
			
 
				                         Common.logger(log_type, crawler).info(f'搜索词:"{word}"，已抓取视频数:{cls.download_cnt}')
			
 
				+                        Common.logging(log_type, crawler, env, f'搜索词:"{word}"，已抓取视频数:{cls.download_cnt}')
			
 
				                         cls.download_cnt = 0
			
 
				                         return
			
 
				 
			
 
				                     if video_element is None:
			
 
				                         Common.logger(log_type, crawler).info('到底啦~\n')
			
 
				+                        Common.logging(log_type, crawler, env, '到底啦~\n')
			
 
				                         return
			
 
				 
			
 
				                     cls.i += 1
			
 
				                     cls.search_elements(driver, '//*[@class="rich-media active__absolute"]')
			
 
				 
			
 
				                     Common.logger(log_type, crawler).info(f'拖动"视频"列表第{cls.i}个至屏幕中间')
			
 
				+                    Common.logging(log_type, crawler, env, f'拖动"视频"列表第{cls.i}个至屏幕中间')
			
 
				                     time.sleep(3)
			
 
				                     driver.execute_script("arguments[0].scrollIntoView({block:'center',inline:'center'})",
			
 
				                                           video_element)
			
 
				                     if len(video_element.find_elements(By.XPATH, "//*[@text='没有更多的搜索结果']")) != 0:
			
 
				                         Common.logger(log_type, crawler).info("没有更多的搜索结果\n")
			
 
				+                        Common.logging(log_type, crawler, env, "没有更多的搜索结果\n")
			
 
				                         return
			
 
				                     video_title = video_element.find_elements(By.XPATH, '//div[@class="rich-media__title ellipsis_2"]/span')[index + i].text[:40]
			
 
				                     video_url = video_element.find_elements(By.XPATH, '//div[@class="video-player"]')[index+i].get_attribute('src')
			
@@ -616,12 +643,16 @@ class ShipinhaoSearch:
 
				                     }
			
 
				                     for k, v in video_dict.items():
			
 
				                         Common.logger(log_type, crawler).info(f"{k}:{v}")
			
 
				+                    Common.logging(log_type, crawler, env, f"{video_dict}")
			
 
				                     if video_title is None or video_url is None:
			
 
				                         Common.logger(log_type, crawler).info("无效视频\n")
			
 
				+                        Common.logging(log_type, crawler, env, "无效视频\n")
			
 
				                     elif cls.repeat_out_video_id(log_type, crawler, out_video_id, env) != 0:
			
 
				                         Common.logger(log_type, crawler).info('视频已下载\n')
			
 
				+                        Common.logging(log_type, crawler, env, '视频已下载\n')
			
 
				                     elif cls.repeat_video_url(log_type, crawler, video_url, env) != 0:
			
 
				                         Common.logger(log_type, crawler).info('视频已下载\n')
			
 
				+                        Common.logging(log_type, crawler, env, '视频已下载\n')
			
 
				                     else:
			
 
				                         video_element.click()
			
 
				                         time.sleep(3)
			
@@ -633,8 +664,10 @@ class ShipinhaoSearch:
 
				                         video_dict["publish_time_str"] = video_info_dict["publish_time_str"]
			
 
				                         video_dict["publish_time_stamp"] = video_info_dict["publish_time_stamp"]
			
 
				                         Common.logger(log_type, crawler).info(f'publish_time:{video_dict["publish_time_str"]}')
			
 
				+                        Common.logging(log_type, crawler, env, f'publish_time:{video_dict["publish_time_str"]}')
			
 
				                         if cls.download_rule(log_type=log_type, crawler=crawler, video_dict=video_dict) is False:
			
 
				                             Common.logger(log_type, crawler).info("不满足抓取规则\n")
			
 
				+                            Common.logging(log_type, crawler, env, "不满足抓取规则\n")
			
 
				                         else:
			
 
				                             cls.download_publish(log_type=log_type,
			
 
				                                                  crawler=crawler,
			
@@ -644,8 +677,10 @@ class ShipinhaoSearch:
 
				                                                  env=env)
			
 
				                 except Exception as e:
			
 
				                     Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
			
 
				+                    Common.logging(log_type, crawler, env, f"抓取单条视频异常:{e}\n")
			
 
				 
			
 
				             Common.logger(log_type, crawler).info('已抓取完一组视频，休眠1秒\n')
			
 
				+            Common.logging(log_type, crawler, env, '已抓取完一组视频，休眠1秒\n')
			
 
				             time.sleep(1)
			
 
				             index = index + len(video_element_temp)
			
 
				 
			
@@ -660,6 +695,7 @@ class ShipinhaoSearch:
 
				                 search_word = user["search_word"]
			
 
				                 our_uid = user["our_uid"]
			
 
				                 Common.logger(log_type, crawler).info(f"开始抓取:{search_word}")
			
 
				+                Common.logging(log_type, crawler, env, f"开始抓取:{search_word}")
			
 
				 
			
 
				                 cls.start_wechat(log_type=log_type,
			
 
				                                  crawler=crawler,
			
@@ -668,6 +704,7 @@ class ShipinhaoSearch:
 
				                                  env=env)
			
 
				             except Exception as e:
			
 
				                 Common.logger(log_type, crawler).error(f"抓取{user['search_word']}时异常:{e}\n")
			
 
				+                Common.logging(log_type, crawler, env, f"抓取{user['search_word']}时异常:{e}\n")
			
 
				 
			
 
				 
			
 
				 if __name__ == '__main__':