wangkun před 2 roky
rodič
revize
a028cf1a4a

+ 4 - 2
main/click_share_video.py

@@ -3,10 +3,12 @@
 # @Time: 2022/9/1
 import os
 import sys
+import time
+
 from appium import webdriver
 from selenium.webdriver.common.by import By
 sys.path.append(os.getcwd())
-from main.common import Common
+from crawler_shipinhao.main.common import Common
 
 
 class Click:
@@ -24,7 +26,7 @@ class Click:
 
         Common.logger(log_type).info('点击视频:{}', video_title)
         driver.find_element(By.NAME, video_title).click()
-
+        time.sleep(5)
         driver.quit()
 
 

+ 4 - 4
main/common.py

@@ -31,7 +31,7 @@ class Common:
         使用 logger 模块生成日志
         """
         # 日志路径
-        log_dir = "./logs/"
+        log_dir = r"./crawler_shipinhao/logs/"
         log_path = os.getcwd() + os.sep + log_dir
         if not os.path.isdir(log_path):
             os.makedirs(log_path)
@@ -64,7 +64,7 @@ class Common:
         :d_dir: 需要删除的 log 地址
         :return: 保留最近 6 个日志
         """
-        logs_dir = "./logs/"
+        logs_dir = r"./crawler_shipinhao/logs/"
         if not os.path.exists(logs_dir):
             os.mkdir(logs_dir)
 
@@ -91,11 +91,11 @@ class Common:
         视频封面,或视频播放地址:d_url
         下载保存路径:"./files/{d_title}/"
         """
-        videos_dir = "./videos/"
+        videos_dir = r"./crawler_shipinhao/videos/"
         if not os.path.exists(videos_dir):
             os.mkdir(videos_dir)
         # 首先创建一个保存该视频相关信息的文件夹
-        video_dir = "./videos/" + d_name + "/"
+        video_dir = r"./crawler_shipinhao/videos/" + d_name + r"/"
         if not os.path.exists(video_dir):
             os.mkdir(video_dir)
 

+ 1 - 1
main/feishu_lib.py

@@ -3,7 +3,7 @@
 import json
 import requests
 import urllib3
-from main.common import Common
+from crawler_shipinhao.main.common import Common
 proxies = {"http": None, "https": None}
 
 

+ 54 - 3
main/get_url.py

@@ -1,13 +1,64 @@
 # -*- coding: utf-8 -*-
 # @Author: wangkun
 # @Time: 2022/9/1
+import json
+import os
+import sys
+import time
+sys.path.append(os.getcwd())
+from crawler_shipinhao.main.common import Common
 
 
 class GetUrl:
     @classmethod
-    def get_url(cls):
-        pass
+    def get_url(cls, log_type):
+        try:
+            # charles 抓包文件保存目录
+            charles_file_dir = r"./crawler_kanyikan_recommend/chlsfiles/"
+
+            if int(len(os.listdir(charles_file_dir))) == 1:
+                Common.logger(log_type).info("未找到chlsfile文件,等待60s")
+                time.sleep(60)
+            else:
+                # 目标文件夹下所有文件
+                all_file = sorted(os.listdir(charles_file_dir))
+
+                # 获取到目标文件
+                old_file = all_file[-1]
+
+                # 分离文件名与扩展名
+                new_file = os.path.splitext(old_file)
+
+                # 重命名文件后缀
+                os.rename(os.path.join(charles_file_dir, old_file),
+                          os.path.join(charles_file_dir, new_file[0] + ".txt"))
+
+                with open(charles_file_dir + new_file[0] + ".txt", encoding='utf-8-sig', errors='ignore') as f:
+                    contents = json.load(f, strict=False)
+
+                video_url_list = []
+                cover_url_list = []
+
+                if "finder.video.qq.com" in [text['host'] for text in contents]:
+                    for text in contents:
+                        if text["host"] == "finder.video.qq.com" and text["path"] == "/251/20302/stodownload":
+                            video_url_list.append(text)
+                        elif text["host"] == "finder.video.qq.com" and text["path"] == "/251/20350/stodownload":
+                            cover_url_list.append(text)
+
+                    video_url = video_url_list[0]['host']+video_url_list[0]['path']+'?'+video_url_list[0]['query']
+                    cover_url = cover_url_list[0]['host']+cover_url_list[0]['path']+'?'+cover_url_list[0]['query']
+
+                    return video_url, cover_url
+                else:
+                    Common.logger(log_type).info("未找到 url,10s后重新获取")
+                    time.sleep(10)
+                    cls.get_url(log_type)
+
+        except Exception as e:
+            Common.logger(log_type).exception("get_url异常:{}", e)
+            return None
 
 
 if __name__ == '__main__':
-    GetUrl.get_url()
+    GetUrl.get_url('recommend')

+ 4 - 4
main/publish.py

@@ -8,7 +8,7 @@ import time
 import oss2
 import requests
 import urllib3
-from main.common import Common
+from crawler_shipinhao.main.common import Common
 proxies = {"http": None, "https": None}
 
 
@@ -134,7 +134,7 @@ class Publish:
         os.rmdir(local_file)
         Common.logger(log_type).info("remove local file dir = {} success".format(local_file))
 
-    local_file_path = './videos'
+    local_file_path = '.\\crawler_shipinhao\\videos'
     video_file = 'video'
     image_file = 'image'
     info_file = 'info'
@@ -183,7 +183,7 @@ class Publish:
                     # 单个视频文件夹下的所有视频文件
                     for fi in dir_files:
                         # 视频文件夹下的所有文件路径
-                        fi_path = fi_d + '/' + fi
+                        fi_path = fi_d + '\\' + fi
                         Common.logger(log_type).info('dir fi_path = {}'.format(fi_path))
                         # 读取 info.txt,赋值给 data
                         if cls.info_file in fi:
@@ -210,7 +210,7 @@ class Publish:
                     # 刷新数据
                     dir_files = os.listdir(fi_d)
                     for fi in dir_files:
-                        fi_path = fi_d + '/' + fi
+                        fi_path = fi_d + '\\' + fi
                         # Common.logger(log_type).info('dir fi_path = {}'.format(fi_path))
                         # 上传oss
                         if cls.video_file in fi:

+ 2 - 2
main/run_shipinhao_recommend.py

@@ -6,8 +6,8 @@ import sys
 import time
 
 sys.path.append(os.getcwd())
-from main.common import Common
-from main.shipinhao_recommend import Recommend
+from crawler_shipinhao.main.common import Common
+from crawler_shipinhao.main.shipinhao_recommend import Recommend
 
 
 class Main:

+ 33 - 18
main/shipinhao_recommend.py

@@ -7,9 +7,12 @@ from appium import webdriver
 from appium.webdriver.webdriver import WebDriver
 from selenium.common import NoSuchElementException
 from selenium.webdriver.common.by import By
+
 sys.path.append(os.getcwd())
-from main.common import Common
-from main.feishu_lib import Feishu
+from crawler_shipinhao.main.common import Common
+from crawler_shipinhao.main.feishu_lib import Feishu
+from crawler_shipinhao.main.click_share_video import Click
+from crawler_shipinhao.main.get_url import GetUrl
 
 
 class Recommend:
@@ -208,23 +211,35 @@ class Recommend:
                 driver.find_element(By.XPATH, '//*[@text="爬虫群"]').click()
                 driver.find_element(By.ID, 'com.tencent.mm:id/guw').click()
 
-                # 把视频信息写入飞书feeds文档
-                Feishu.insert_columns(log_type, 'shipinhao', 'FSDlBy', 'ROWS', 1, 2)
-                get_feeds_time = int(time.time())
-                values = [[time.strftime('%Y/%m/%d %H:%M:%S', time.localtime(get_feeds_time)),
-                           '推荐榜',
-                           str(video_title),
-                           duration,
-                           like_cnt,
-                           share_cnt,
-                           favorite_cnt,
-                           comment_cnt,
-                           str(user_name)]]
+                Click.click_video(log_type, video_title)
                 time.sleep(1)
-                Feishu.update_values(log_type, 'shipinhao', 'FSDlBy', 'A2:Z2', values)
-                Common.logger(log_type).info('向上滑动页面')
-                driver.swipe(500, 1000, 500, 300, 300)
-                Common.logger(log_type).info('视频信息写入飞书文档成功\n')
+
+                urls = GetUrl.get_url(log_type)
+                if urls is None:
+                    Common.logger(log_type).info('未获取到视频 URL')
+                else:
+                    video_url = urls[0]
+                    cover_url = urls[1]
+
+                    # 把视频信息写入飞书feeds文档
+                    Feishu.insert_columns(log_type, 'shipinhao', 'FSDlBy', 'ROWS', 1, 2)
+                    get_feeds_time = int(time.time())
+                    values = [[time.strftime('%Y/%m/%d %H:%M:%S', time.localtime(get_feeds_time)),
+                               '推荐榜',
+                               str(video_title),
+                               duration,
+                               like_cnt,
+                               share_cnt,
+                               favorite_cnt,
+                               comment_cnt,
+                               str(user_name),
+                               cover_url,
+                               video_url]]
+                    time.sleep(1)
+                    Feishu.update_values(log_type, 'shipinhao', 'FSDlBy', 'A2:Z2', values)
+                    Common.logger(log_type).info('向上滑动页面')
+                    driver.swipe(500, 1000, 500, 300, 300)
+                    Common.logger(log_type).info('视频信息写入飞书文档成功\n')
 
 
 if __name__ == '__main__':