2 år sedan · a028cf1a4a
--- a/main/click_share_video.py
+++ b/main/click_share_video.py
@@ -3,10 +3,12 @@
 
				 # @Time: 2022/9/1
			
 
				 import os
			
 
				 import sys
			
 
				+import time
			
 
				+
			
 
				 from appium import webdriver
			
 
				 from selenium.webdriver.common.by import By
			
 
				 sys.path.append(os.getcwd())
			
 
				-from main.common import Common
			
 
				+from crawler_shipinhao.main.common import Common
			
 
				 
			
 
				 
			
 
				 class Click:
			
@@ -24,7 +26,7 @@ class Click:
 
				 
			
 
				         Common.logger(log_type).info('点击视频:{}', video_title)
			
 
				         driver.find_element(By.NAME, video_title).click()
			
 
				-
			
 
				+        time.sleep(5)
			
 
				         driver.quit()
			
 
				 
			
 
				 
			
--- a/main/common.py
+++ b/main/common.py
@@ -31,7 +31,7 @@ class Common:
 
				         使用 logger 模块生成日志
			
 
				         """
			
 
				         # 日志路径
			
 
				-        log_dir = "./logs/"
			
 
				+        log_dir = r"./crawler_shipinhao/logs/"
			
 
				         log_path = os.getcwd() + os.sep + log_dir
			
 
				         if not os.path.isdir(log_path):
			
 
				             os.makedirs(log_path)
			
@@ -64,7 +64,7 @@ class Common:
 
				         :d_dir: 需要删除的 log 地址
			
 
				         :return: 保留最近 6 个日志
			
 
				         """
			
 
				-        logs_dir = "./logs/"
			
 
				+        logs_dir = r"./crawler_shipinhao/logs/"
			
 
				         if not os.path.exists(logs_dir):
			
 
				             os.mkdir(logs_dir)
			
 
				 
			
@@ -91,11 +91,11 @@ class Common:
 
				         视频封面，或视频播放地址：d_url
			
 
				         下载保存路径："./files/{d_title}/"
			
 
				         """
			
 
				-        videos_dir = "./videos/"
			
 
				+        videos_dir = r"./crawler_shipinhao/videos/"
			
 
				         if not os.path.exists(videos_dir):
			
 
				             os.mkdir(videos_dir)
			
 
				         # 首先创建一个保存该视频相关信息的文件夹
			
 
				-        video_dir = "./videos/" + d_name + "/"
			
 
				+        video_dir = r"./crawler_shipinhao/videos/" + d_name + r"/"
			
 
				         if not os.path.exists(video_dir):
			
 
				             os.mkdir(video_dir)
			
 
				 
			
--- a/main/feishu_lib.py
+++ b/main/feishu_lib.py
@@ -3,7 +3,7 @@
 
				 import json
			
 
				 import requests
			
 
				 import urllib3
			
 
				-from main.common import Common
			
 
				+from crawler_shipinhao.main.common import Common
			
 
				 proxies = {"http": None, "https": None}
			
 
				 
			
 
				 
			
--- a/main/get_url.py
+++ b/main/get_url.py
@@ -1,13 +1,64 @@
 
				 # -*- coding: utf-8 -*-
			
 
				 # @Author: wangkun
			
 
				 # @Time: 2022/9/1
			
 
				+import json
			
 
				+import os
			
 
				+import sys
			
 
				+import time
			
 
				+sys.path.append(os.getcwd())
			
 
				+from crawler_shipinhao.main.common import Common
			
 
				 
			
 
				 
			
 
				 class GetUrl:
			
 
				     @classmethod
			
 
				-    def get_url(cls):
			
 
				-        pass
			
 
				+    def get_url(cls, log_type):
			
 
				+        try:
			
 
				+            # charles 抓包文件保存目录
			
 
				+            charles_file_dir = r"./crawler_kanyikan_recommend/chlsfiles/"
			
 
				+
			
 
				+            if int(len(os.listdir(charles_file_dir))) == 1:
			
 
				+                Common.logger(log_type).info("未找到chlsfile文件，等待60s")
			
 
				+                time.sleep(60)
			
 
				+            else:
			
 
				+                # 目标文件夹下所有文件
			
 
				+                all_file = sorted(os.listdir(charles_file_dir))
			
 
				+
			
 
				+                # 获取到目标文件
			
 
				+                old_file = all_file[-1]
			
 
				+
			
 
				+                # 分离文件名与扩展名
			
 
				+                new_file = os.path.splitext(old_file)
			
 
				+
			
 
				+                # 重命名文件后缀
			
 
				+                os.rename(os.path.join(charles_file_dir, old_file),
			
 
				+                          os.path.join(charles_file_dir, new_file[0] + ".txt"))
			
 
				+
			
 
				+                with open(charles_file_dir + new_file[0] + ".txt", encoding='utf-8-sig', errors='ignore') as f:
			
 
				+                    contents = json.load(f, strict=False)
			
 
				+
			
 
				+                video_url_list = []
			
 
				+                cover_url_list = []
			
 
				+
			
 
				+                if "finder.video.qq.com" in [text['host'] for text in contents]:
			
 
				+                    for text in contents:
			
 
				+                        if text["host"] == "finder.video.qq.com" and text["path"] == "/251/20302/stodownload":
			
 
				+                            video_url_list.append(text)
			
 
				+                        elif text["host"] == "finder.video.qq.com" and text["path"] == "/251/20350/stodownload":
			
 
				+                            cover_url_list.append(text)
			
 
				+
			
 
				+                    video_url = video_url_list[0]['host']+video_url_list[0]['path']+'?'+video_url_list[0]['query']
			
 
				+                    cover_url = cover_url_list[0]['host']+cover_url_list[0]['path']+'?'+cover_url_list[0]['query']
			
 
				+
			
 
				+                    return video_url, cover_url
			
 
				+                else:
			
 
				+                    Common.logger(log_type).info("未找到 url，10s后重新获取")
			
 
				+                    time.sleep(10)
			
 
				+                    cls.get_url(log_type)
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            Common.logger(log_type).exception("get_url异常：{}", e)
			
 
				+            return None
			
 
				 
			
 
				 
			
 
				 if __name__ == '__main__':
			
 
				-    GetUrl.get_url()
			
 
				+    GetUrl.get_url('recommend')
			
--- a/main/publish.py
+++ b/main/publish.py
@@ -8,7 +8,7 @@ import time
 
				 import oss2
			
 
				 import requests
			
 
				 import urllib3
			
 
				-from main.common import Common
			
 
				+from crawler_shipinhao.main.common import Common
			
 
				 proxies = {"http": None, "https": None}
			
 
				 
			
 
				 
			
@@ -134,7 +134,7 @@ class Publish:
 
				         os.rmdir(local_file)
			
 
				         Common.logger(log_type).info("remove local file dir = {} success".format(local_file))
			
 
				 
			
 
				-    local_file_path = './videos'
			
 
				+    local_file_path = '.\\crawler_shipinhao\\videos'
			
 
				     video_file = 'video'
			
 
				     image_file = 'image'
			
 
				     info_file = 'info'
			
@@ -183,7 +183,7 @@ class Publish:
 
				                     # 单个视频文件夹下的所有视频文件
			
 
				                     for fi in dir_files:
			
 
				                         # 视频文件夹下的所有文件路径
			
 
				-                        fi_path = fi_d + '/' + fi
			
 
				+                        fi_path = fi_d + '\\' + fi
			
 
				                         Common.logger(log_type).info('dir fi_path = {}'.format(fi_path))
			
 
				                         # 读取 info.txt，赋值给 data
			
 
				                         if cls.info_file in fi:
			
@@ -210,7 +210,7 @@ class Publish:
 
				                     # 刷新数据
			
 
				                     dir_files = os.listdir(fi_d)
			
 
				                     for fi in dir_files:
			
 
				-                        fi_path = fi_d + '/' + fi
			
 
				+                        fi_path = fi_d + '\\' + fi
			
 
				                         # Common.logger(log_type).info('dir fi_path = {}'.format(fi_path))
			
 
				                         # 上传oss
			
 
				                         if cls.video_file in fi:
			
--- a/main/run_shipinhao_recommend.py
+++ b/main/run_shipinhao_recommend.py
@@ -6,8 +6,8 @@ import sys
 
				 import time
			
 
				 
			
 
				 sys.path.append(os.getcwd())
			
 
				-from main.common import Common
			
 
				-from main.shipinhao_recommend import Recommend
			
 
				+from crawler_shipinhao.main.common import Common
			
 
				+from crawler_shipinhao.main.shipinhao_recommend import Recommend
			
 
				 
			
 
				 
			
 
				 class Main:
			
--- a/main/shipinhao_recommend.py
+++ b/main/shipinhao_recommend.py
@@ -7,9 +7,12 @@ from appium import webdriver
 
				 from appium.webdriver.webdriver import WebDriver
			
 
				 from selenium.common import NoSuchElementException
			
 
				 from selenium.webdriver.common.by import By
			
 
				+
			
 
				 sys.path.append(os.getcwd())
			
 
				-from main.common import Common
			
 
				-from main.feishu_lib import Feishu
			
 
				+from crawler_shipinhao.main.common import Common
			
 
				+from crawler_shipinhao.main.feishu_lib import Feishu
			
 
				+from crawler_shipinhao.main.click_share_video import Click
			
 
				+from crawler_shipinhao.main.get_url import GetUrl
			
 
				 
			
 
				 
			
 
				 class Recommend:
			
@@ -208,23 +211,35 @@ class Recommend:
 
				                 driver.find_element(By.XPATH, '//*[@text="爬虫群"]').click()
			
 
				                 driver.find_element(By.ID, 'com.tencent.mm:id/guw').click()
			
 
				 
			
 
				-                # 把视频信息写入飞书feeds文档
			
 
				-                Feishu.insert_columns(log_type, 'shipinhao', 'FSDlBy', 'ROWS', 1, 2)
			
 
				-                get_feeds_time = int(time.time())
			
 
				-                values = [[time.strftime('%Y/%m/%d %H:%M:%S', time.localtime(get_feeds_time)),
			
 
				-                           '推荐榜',
			
 
				-                           str(video_title),
			
 
				-                           duration,
			
 
				-                           like_cnt,
			
 
				-                           share_cnt,
			
 
				-                           favorite_cnt,
			
 
				-                           comment_cnt,
			
 
				-                           str(user_name)]]
			
 
				+                Click.click_video(log_type, video_title)
			
 
				                 time.sleep(1)
			
 
				-                Feishu.update_values(log_type, 'shipinhao', 'FSDlBy', 'A2:Z2', values)
			
 
				-                Common.logger(log_type).info('向上滑动页面')
			
 
				-                driver.swipe(500, 1000, 500, 300, 300)
			
 
				-                Common.logger(log_type).info('视频信息写入飞书文档成功\n')
			
 
				+
			
 
				+                urls = GetUrl.get_url(log_type)
			
 
				+                if urls is None:
			
 
				+                    Common.logger(log_type).info('未获取到视频 URL')
			
 
				+                else:
			
 
				+                    video_url = urls[0]
			
 
				+                    cover_url = urls[1]
			
 
				+
			
 
				+                    # 把视频信息写入飞书feeds文档
			
 
				+                    Feishu.insert_columns(log_type, 'shipinhao', 'FSDlBy', 'ROWS', 1, 2)
			
 
				+                    get_feeds_time = int(time.time())
			
 
				+                    values = [[time.strftime('%Y/%m/%d %H:%M:%S', time.localtime(get_feeds_time)),
			
 
				+                               '推荐榜',
			
 
				+                               str(video_title),
			
 
				+                               duration,
			
 
				+                               like_cnt,
			
 
				+                               share_cnt,
			
 
				+                               favorite_cnt,
			
 
				+                               comment_cnt,
			
 
				+                               str(user_name),
			
 
				+                               cover_url,
			
 
				+                               video_url]]
			
 
				+                    time.sleep(1)
			
 
				+                    Feishu.update_values(log_type, 'shipinhao', 'FSDlBy', 'A2:Z2', values)
			
 
				+                    Common.logger(log_type).info('向上滑动页面')
			
 
				+                    driver.swipe(500, 1000, 500, 300, 300)
			
 
				+                    Common.logger(log_type).info('视频信息写入飞书文档成功\n')
			
 
				 
			
 
				 
			
 
				 if __name__ == '__main__':