wangkun 1 year ago
parent
commit
e598c8b453

+ 2 - 0
shipinhao/shipinhao_main/run_shipinhao_search.py

@@ -11,9 +11,11 @@ from shipinhao.shipinhao_search.shipinhao_search import ShipinhaoSearch
 
 def main(log_type, crawler, env):
     Common.logger(log_type, crawler).info('开始抓取 视频号 搜索策略\n')
+    Common.logging(log_type, crawler, env, '开始抓取 视频号 搜索策略\n')
     ShipinhaoSearch.get_search_videos(log_type, crawler, env)
     Common.del_logs(log_type, crawler)
     Common.logger(log_type, crawler).info('抓取完一轮\n')
+    Common.logging(log_type, crawler, env, '抓取完一轮\n')
 
 
 if __name__ == "__main__":

+ 38 - 1
shipinhao/shipinhao_search/shipinhao_search.py

@@ -164,6 +164,7 @@ class ShipinhaoSearch:
     @classmethod
     def start_wechat(cls, log_type, crawler, word, our_uid, env):
         Common.logger(log_type, crawler).info('启动微信')
+        Common.logging(log_type, crawler, env, '启动微信')
         if env == "dev":
             chromedriverExecutable = "/Users/wangkun/Downloads/chromedriver/chromedriver_v107/chromedriver"
         else:
@@ -207,12 +208,14 @@ class ShipinhaoSearch:
                          env=env)
         cls.close_wechat(log_type=log_type,
                          crawler=crawler,
+                         env=env,
                          driver=driver)
 
     @classmethod
-    def close_wechat(cls, log_type, crawler, driver: WebDriver):
+    def close_wechat(cls, log_type, crawler, env, driver: WebDriver):
         driver.quit()
         Common.logger(log_type, crawler).info(f"微信退出成功\n")
+        Common.logging(log_type, crawler, env, f"微信退出成功\n")
 
     @classmethod
     def is_contain_chinese(cls, strword):
@@ -274,6 +277,7 @@ class ShipinhaoSearch:
             md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
             shutil.rmtree(f"./{crawler}/videos/{md_title}/")
             Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
+            Common.logging(log_type, crawler, env, "视频size=0,删除成功\n")
             return
         video_dict["video_width"] = ffmpeg_dict["width"]
         video_dict["video_height"] = ffmpeg_dict["height"]
@@ -283,6 +287,7 @@ class ShipinhaoSearch:
             md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
             shutil.rmtree(f"./{crawler}/videos/{md_title}/")
             Common.logger(log_type, crawler).info("宽高不满足抓取规则,删除成功\n")
+            Common.logging(log_type, crawler, env, "宽高不满足抓取规则,删除成功\n")
             return
 
         # 下载封面
@@ -292,6 +297,7 @@ class ShipinhaoSearch:
 
         # 上传视频
         Common.logger(log_type, crawler).info("开始上传视频...")
+        Common.logging(log_type, crawler, env, "开始上传视频...")
         our_video_id = Publish.upload_and_publish(log_type=log_type,
                                                   crawler=crawler,
                                                   strategy="搜索爬虫策略",
@@ -303,6 +309,7 @@ class ShipinhaoSearch:
         else:
             our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
         Common.logger(log_type, crawler).info("视频上传完成")
+        Common.logging(log_type, crawler, env, "视频上传完成")
 
         if our_video_id is None:
             try:
@@ -310,6 +317,7 @@ class ShipinhaoSearch:
                 md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
                 shutil.rmtree(f"./{crawler}/videos/{md_title}")
                 Common.logger(log_type, crawler).warning(f"our_video_id:{our_video_id}, 删除成功\n")
+                Common.logging(log_type, crawler, env, f"our_video_id:{our_video_id}, 删除成功\n")
                 return
             except FileNotFoundError:
                 return
@@ -344,8 +352,10 @@ class ShipinhaoSearch:
                                                 {int(video_dict['video_width'])},
                                                 {int(video_dict['video_height'])}) """
         Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
+        Common.logging(log_type, crawler, env, f"insert_sql:{insert_sql}")
         MysqlHelper.update_values(log_type, crawler, insert_sql, env)
         Common.logger(log_type, crawler).info('视频信息插入数据库成功!')
+        Common.logging(log_type, crawler, env, '视频信息插入数据库成功!')
 
         # 写飞书
         Feishu.insert_columns(log_type, crawler, "xYWCzf", "ROWS", 1, 2)
@@ -368,6 +378,7 @@ class ShipinhaoSearch:
                    video_dict["video_url"]]]
         Feishu.update_values(log_type, crawler, "xYWCzf", "F2:Z2", values)
         Common.logger(log_type, crawler).info("写入飞书成功\n")
+        Common.logging(log_type, crawler, env, "写入飞书成功\n")
         cls.download_cnt += 1
 
     @classmethod
@@ -483,6 +494,7 @@ class ShipinhaoSearch:
             user_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
             if user_sheet is None:
                 Common.logger(log_type, crawler).warning(f"user_sheet:{user_sheet}, 3秒钟后重试")
+                Common.logging(log_type, crawler, env, f"user_sheet:{user_sheet}, 3秒钟后重试")
                 time.sleep(3)
                 continue
             our_user_list = []
@@ -496,6 +508,7 @@ class ShipinhaoSearch:
                 tag4 = user_sheet[i][11]
                 tag5 = user_sheet[i][12]
                 Common.logger(log_type, crawler).info(f"正在更新 {search_word} 搜索词信息")
+                Common.logging(log_type, crawler, env, f"正在更新 {search_word} 搜索词信息")
                 if our_uid is None:
                     default_user = getUser.get_default_user()
                     # 用来创建our_id的信息
@@ -514,6 +527,7 @@ class ShipinhaoSearch:
                     Feishu.update_values(log_type, crawler, sheetid, f'G{i + 1}:H{i + 1}',
                                          [[our_uid, our_user_link]])
                     Common.logger(log_type, crawler).info(f'站内用户主页创建成功:{our_user_link}\n')
+                    Common.logging(log_type, crawler, env, f'站内用户主页创建成功:{our_user_link}\n')
                 our_user_dict = {
                     'out_uid': '',
                     'search_word': search_word,
@@ -530,11 +544,13 @@ class ShipinhaoSearch:
         # 点击微信搜索框,并输入搜索词
         driver.implicitly_wait(10)
         Common.logger(log_type, crawler).info("点击搜索框")
+        Common.logging(log_type, crawler, env, "点击搜索框")
         driver.find_element(By.ID, 'com.tencent.mm:id/j5t').click()
         time.sleep(0.5)
         driver.find_element(By.ID, 'com.tencent.mm:id/cd7').clear().send_keys(word)
         driver.press_keycode(AndroidKey.ENTER)
         Common.logger(log_type, crawler).info("进入搜索词页面")
+        Common.logging(log_type, crawler, env, "进入搜索词页面")
         driver.find_elements(By.ID, 'com.tencent.mm:id/br8')[0].click()
         time.sleep(5)
 
@@ -542,12 +558,14 @@ class ShipinhaoSearch:
         check_to_webview = cls.check_to_webview(log_type, crawler, driver)
         if check_to_webview is None:
             Common.logger(log_type, crawler).info("切换到视频号 webview 失败\n")
+            Common.logging(log_type, crawler, env, "切换到视频号 webview 失败\n")
             return
         time.sleep(1)
 
         # 切换到"视频号"分类
         shipinhao_tags = cls.search_elements(driver, '//div[@class="unit"]/*[2]')
         Common.logger(log_type, crawler).info('点击"视频号"分类')
+        Common.logging(log_type, crawler, env, '点击"视频号"分类')
         shipinhao_tags[0].click()
         time.sleep(5)
 
@@ -555,40 +573,49 @@ class ShipinhaoSearch:
         while True:
             if cls.search_elements(driver, '//*[@class="mixed-box__bd"]') is None:
                 Common.logger(log_type, crawler).info('窗口已销毁\n')
+                Common.logging(log_type, crawler, env, '窗口已销毁\n')
                 return
 
             Common.logger(log_type, crawler).info('获取视频列表\n')
+            Common.logging(log_type, crawler, env, '获取视频列表\n')
             video_elements = cls.search_elements(driver, '//div[@class="rich-media active__absolute"]')
             if video_elements is None:
                 Common.logger(log_type, crawler).warning(f'video_elements:{video_elements}')
+                Common.logging(log_type, crawler, env, f'video_elements:{video_elements}')
                 return
 
             video_element_temp = video_elements[index:]
             if len(video_element_temp) == 0:
                 Common.logger(log_type, crawler).info('到底啦~~~~~~~~~~~~~\n')
+                Common.logging(log_type, crawler, env, '到底啦~~~~~~~~~~~~~\n')
                 return
 
             for i, video_element in enumerate(video_element_temp):
                 try:
                     Common.logger(log_type, crawler).info(f"download_cnt:{cls.download_cnt}")
+                    Common.logging(log_type, crawler, env, f"download_cnt:{cls.download_cnt}")
                     if cls.download_cnt >= cls.videos_cnt(log_type, crawler):
                         Common.logger(log_type, crawler).info(f'搜索词:"{word}",已抓取视频数:{cls.download_cnt}')
+                        Common.logging(log_type, crawler, env, f'搜索词:"{word}",已抓取视频数:{cls.download_cnt}')
                         cls.download_cnt = 0
                         return
 
                     if video_element is None:
                         Common.logger(log_type, crawler).info('到底啦~\n')
+                        Common.logging(log_type, crawler, env, '到底啦~\n')
                         return
 
                     cls.i += 1
                     cls.search_elements(driver, '//*[@class="rich-media active__absolute"]')
 
                     Common.logger(log_type, crawler).info(f'拖动"视频"列表第{cls.i}个至屏幕中间')
+                    Common.logging(log_type, crawler, env, f'拖动"视频"列表第{cls.i}个至屏幕中间')
                     time.sleep(3)
                     driver.execute_script("arguments[0].scrollIntoView({block:'center',inline:'center'})",
                                           video_element)
                     if len(video_element.find_elements(By.XPATH, "//*[@text='没有更多的搜索结果']")) != 0:
                         Common.logger(log_type, crawler).info("没有更多的搜索结果\n")
+                        Common.logging(log_type, crawler, env, "没有更多的搜索结果\n")
                         return
                     video_title = video_element.find_elements(By.XPATH, '//div[@class="rich-media__title ellipsis_2"]/span')[index + i].text[:40]
                     video_url = video_element.find_elements(By.XPATH, '//div[@class="video-player"]')[index+i].get_attribute('src')
@@ -616,12 +643,16 @@ class ShipinhaoSearch:
                     }
                     for k, v in video_dict.items():
                         Common.logger(log_type, crawler).info(f"{k}:{v}")
+                    Common.logging(log_type, crawler, env, f"{video_dict}")
                     if video_title is None or video_url is None:
                         Common.logger(log_type, crawler).info("无效视频\n")
+                        Common.logging(log_type, crawler, env, "无效视频\n")
                     elif cls.repeat_out_video_id(log_type, crawler, out_video_id, env) != 0:
                         Common.logger(log_type, crawler).info('视频已下载\n')
+                        Common.logging(log_type, crawler, env, '视频已下载\n')
                     elif cls.repeat_video_url(log_type, crawler, video_url, env) != 0:
                         Common.logger(log_type, crawler).info('视频已下载\n')
+                        Common.logging(log_type, crawler, env, '视频已下载\n')
                     else:
                         video_element.click()
                         time.sleep(3)
@@ -633,8 +664,10 @@ class ShipinhaoSearch:
                         video_dict["publish_time_str"] = video_info_dict["publish_time_str"]
                         video_dict["publish_time_stamp"] = video_info_dict["publish_time_stamp"]
                         Common.logger(log_type, crawler).info(f'publish_time:{video_dict["publish_time_str"]}')
+                        Common.logging(log_type, crawler, env, f'publish_time:{video_dict["publish_time_str"]}')
                         if cls.download_rule(log_type=log_type, crawler=crawler, video_dict=video_dict) is False:
                             Common.logger(log_type, crawler).info("不满足抓取规则\n")
+                            Common.logging(log_type, crawler, env, "不满足抓取规则\n")
                         else:
                             cls.download_publish(log_type=log_type,
                                                  crawler=crawler,
@@ -644,8 +677,10 @@ class ShipinhaoSearch:
                                                  env=env)
                 except Exception as e:
                     Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
+                    Common.logging(log_type, crawler, env, f"抓取单条视频异常:{e}\n")
 
             Common.logger(log_type, crawler).info('已抓取完一组视频,休眠1秒\n')
+            Common.logging(log_type, crawler, env, '已抓取完一组视频,休眠1秒\n')
             time.sleep(1)
             index = index + len(video_element_temp)
 
@@ -660,6 +695,7 @@ class ShipinhaoSearch:
                 search_word = user["search_word"]
                 our_uid = user["our_uid"]
                 Common.logger(log_type, crawler).info(f"开始抓取:{search_word}")
+                Common.logging(log_type, crawler, env, f"开始抓取:{search_word}")
 
                 cls.start_wechat(log_type=log_type,
                                  crawler=crawler,
@@ -668,6 +704,7 @@ class ShipinhaoSearch:
                                  env=env)
             except Exception as e:
                 Common.logger(log_type, crawler).error(f"抓取{user['search_word']}时异常:{e}\n")
+                Common.logging(log_type, crawler, env, f"抓取{user['search_word']}时异常:{e}\n")
 
 
 if __name__ == '__main__':