wangkun 3 лет назад
Родитель
Сommit
bca5b7d779
6 измененных файлов с 52 добавлено и 51 удалено
  1. 11 11
      main/common.py
  2. 8 8
      main/download_play.py
  3. 12 12
      main/download_sendtime.py
  4. 13 13
      main/download_up.py
  5. 2 2
      main/get_feeds.py
  6. 6 5
      main/publish.py

+ 11 - 11
main/common.py

@@ -33,7 +33,7 @@ class Common:
         生成 log 日志
         """
         # 日志路径
-        log_dir = "./logs/"
+        log_dir = r"./logs/"
         log_path = os.getcwd() + os.sep + log_dir
         if not os.path.isdir(log_path):
             os.makedirs(log_path)
@@ -55,7 +55,7 @@ class Common:
         清除冗余日志文件
         :return: 保留最近 7 个日志
         """
-        log_dir = "./logs/"
+        log_dir = r"./logs/"
         all_files = sorted(os.listdir(log_dir))
         all_logs = []
         for log in all_files:
@@ -74,9 +74,9 @@ class Common:
     @classmethod
     def del_charles_files(cls):
         # 目标文件夹下所有文件
-        all_file = sorted(os.listdir("./chlsfiles/"))
+        all_file = sorted(os.listdir(r"./chlsfiles/"))
         for file in all_file[0:-2]:
-            os.remove("./chlsfiles/" + file)
+            os.remove(r"./chlsfiles/" + file)
 
     @classmethod
     def download_method(cls, text, d_name, d_url):
@@ -87,7 +87,7 @@ class Common:
         下载保存路径:"./files/{d_title}/"
         """
         # 首先创建一个保存该视频相关信息的文件夹
-        video_dir = "./videos/" + d_name + "/"
+        video_dir = r"./videos/" + d_name + r"/"
         if not os.path.exists(video_dir):
             os.mkdir(video_dir)
 
@@ -133,13 +133,13 @@ class Common:
         :param t_name: 文件名
         :return: 文件内容
         """
-        with open("./txt/" + t_name, "r", encoding="utf8") as f:
+        with open(r"./txt/" + t_name, "r", encoding="utf8") as f:
             return f.readlines()
 
     @classmethod
     def get_session(cls):
         # charles 抓包文件保存目录
-        charles_file_dir = "./chlsfiles/"
+        charles_file_dir = r"./chlsfiles/"
 
         if int(len(os.listdir(charles_file_dir))) == 1:
             Common.crawler_log().info("未找到chlsfile文件,等待60s")
@@ -218,7 +218,7 @@ class Common:
 
     @classmethod
     def kanyikan_download_count(cls):
-        videoid_path = "./txt/kanyikan_videoid.txt"
+        videoid_path = r"./txt/kanyikan_videoid.txt"
         count = 0
         for count, line in enumerate(open(videoid_path, "rb").readlines()):
             count += 1
@@ -231,9 +231,9 @@ class Common:
         :return:
         """
         # 创建空文件
-        with open("./txt/" + str(cls.today) + "_kanyikan_videoid.txt", "a") as f:
+        with open(r"./txt/" + str(cls.today) + "_kanyikan_videoid.txt", "a") as f:
             f.write("")
-        videoid_path = "./txt/" + str(cls.today) + "_kanyikan_videoid.txt"
+        videoid_path = r"./txt/" + str(cls.today) + "_kanyikan_videoid.txt"
         count = 0
         for count, line in enumerate(open(videoid_path, "rb").readlines()):
             count += 1
@@ -245,7 +245,7 @@ class Common:
         删除快手渠道昨日下载视频数的 txt 文件
         :return:
         """
-        yesterday_kanyikan_videoid_txt_dir = "./txt/"
+        yesterday_kanyikan_videoid_txt_dir = r"./txt/"
         all_files = sorted(os.listdir(yesterday_kanyikan_videoid_txt_dir))
         for file in all_files:
             name = os.path.splitext(file)[0]

+ 8 - 8
main/download_play.py

@@ -73,11 +73,11 @@ class DownloadPlay:
                     Common.download_method(text="video", d_name=download_video_title, d_url=download_video_url)
 
                     # 保存视频 ID 到 "./txt/kanyikan_videoid.txt"
-                    with open("./txt/kanyikan_videoid.txt", "a", encoding="utf8") as f_a:
+                    with open(r"./txt/kanyikan_videoid.txt", "a", encoding="utf8") as f_a:
                         f_a.write(download_video_id + "\n")
 
                     # 保存视频信息至 "./videos/{download_video_title}/info.txt"
-                    with open("./videos/" + download_video_title + "/info.txt", "a", encoding="utf8") as f_a:
+                    with open(r"./videos/" + download_video_title + r"/info.txt", "a", encoding="utf8") as f_a:
                         f_a.write(str(download_video_id) + "\n" +
                                   str(download_video_title) + "\n" +
                                   str(download_video_duration) + "\n" +
@@ -99,9 +99,9 @@ class DownloadPlay:
 
                     # 删除该视频在kanyikan_feeds.txt中的信息
                     Common.crawler_log().info("删除该视频在kanyikan_feeds.txt中的信息:{}".format(download_video_title))
-                    with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
+                    with open(r"./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
                         lines = f_r.readlines()
-                    with open("./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
+                    with open(r"./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
                         for line in lines:
                             if download_video_id in line.split(" + ")[1]:
                                 continue
@@ -109,18 +109,18 @@ class DownloadPlay:
                 else:
                     # 删除该视频在 recommend.txt中的信息
                     Common.crawler_log().info("该视频不满足下载规则,删除在kanyikan_feeds.txt中的信息:{}".format(download_video_title))
-                    with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
+                    with open(r"./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
                         lines = f_r.readlines()
-                    with open("./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
+                    with open(r"./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
                         for line in lines:
                             if download_video_id in line.split(" + ")[1]:
                                 continue
                             f_w.write(line)
             except Exception as e:
                 Common.crawler_log().info("视频 info 异常".format(e))
-                with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
+                with open(r"./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
                     lines = f_r.readlines()
-                with open("./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
+                with open(r"./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
                     for line in lines:
                         if download_video_id in line.split(" + ")[1]:
                             continue

+ 12 - 12
main/download_sendtime.py

@@ -124,10 +124,10 @@ class DownloadSendtime:
                                 # 下载视频
                                 Common.download_method("video", v_title, download_url_up)
                                 # 保存视频 ID 到 "./txt/kanyikan_videoid.txt"
-                                with open("./txt/kanyikan_videoid.txt", "a", encoding="utf8") as f_a:
+                                with open(r"./txt/kanyikan_videoid.txt", "a", encoding="utf8") as f_a:
                                     f_a.write(v_id + "\n")
                                 # 保存视频信息到 "./files/{视频标题}/videoinfo.txt"
-                                with open("./videos/" + v_title + "/" + "info.txt",
+                                with open(r"./videos/" + v_title + "/" + "info.txt",
                                           "a", encoding="utf8") as f_a2:
                                     f_a2.write(str(v_id) + "\n" +
                                                str(v_title) + "\n" +
@@ -150,9 +150,9 @@ class DownloadSendtime:
 
                                 # 删除该视频在kanyikan_feeds.txt中的信息
                                 Common.crawler_log().info("删除该视频在kanyikan_feeds.txt中的信息:{}".format(v_title))
-                                with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f1:
+                                with open(r"./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f1:
                                     lines = f1.readlines()
-                                with open("./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w1:
+                                with open(r"./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w1:
                                     for line1 in lines:
                                         if v_id in line1.split(" + ")[1]:
                                             continue
@@ -161,9 +161,9 @@ class DownloadSendtime:
                                 # 删除之前保存的该视频信息
                                 Common.crawler_log().info("该视频7天播放量:{}<10000".format(
                                     int(v_play_cnt_sendtime)) + ";" + "不满足下载规则:{}".format(v_title))
-                                with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
+                                with open(r"./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
                                     lines = f_r.readlines()
-                                with open("./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
+                                with open(r"./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
                                     for line2 in lines:
                                         if v_id in line2.split(" + ")[1]:
                                             continue
@@ -172,18 +172,18 @@ class DownloadSendtime:
                             Common.crawler_log().info("视频发布时间大于7天:{}天".format(
                                 int((int(time.time()) - int(v_send_date)) / 86400))
                                                 + ";" + "标题:{}".format(v_title))
-                            with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
+                            with open(r"./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
                                 lines = f_r.readlines()
-                            with open("./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
+                            with open(r"./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
                                 for line2 in lines:
                                     if v_id in line2.split(" + ")[1]:
                                         continue
                                     f_w.write(line2)
                     else:
                         Common.crawler_log().info("不满足下载规则:{}".format(v_title))
-                        with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
+                        with open(r"./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
                             lines = f_r.readlines()
-                        with open("./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
+                        with open(r"./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
                             for line3 in lines:
                                 if v_id in line3.split(" + ")[1]:
                                     continue
@@ -191,9 +191,9 @@ class DownloadSendtime:
 
             except Exception as e:
                 Common.crawler_log().error("获取视频info异常:{},删除该视频".format(e))
-                with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
+                with open(r"./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
                     lines = f_r.readlines()
-                with open("./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
+                with open(r"./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
                     for line4 in lines:
                         if v_id in line4.split(" + ")[1]:
                             continue

+ 13 - 13
main/download_up.py

@@ -129,10 +129,10 @@ class DownloadUp:
                                 # 下载视频
                                 Common.download_method("video", v_title, download_url_up)
                                 # 保存视频 ID 到 "./txt/kanyikan_videoid.txt"
-                                with open("./txt/kanyikan_videoid.txt", "a", encoding="utf8") as f_a:
+                                with open(r"./txt/kanyikan_videoid.txt", "a", encoding="utf8") as f_a:
                                     f_a.write(v_id + "\n")
                                 # 保存视频信息到 "./files/{视频标题}/videoinfo.txt"
-                                with open("./videos/" + v_title + "/" + "info.txt",
+                                with open(r"./videos/" + v_title + "/" + "info.txt",
                                           "a", encoding="utf8") as f_a2:
                                     f_a2.write(str(v_id) + "\n" +
                                                str(v_title) + "\n" +
@@ -155,9 +155,9 @@ class DownloadUp:
 
                                 # 删除该视频在kanyikan_feeds.txt中的信息
                                 Common.crawler_log().info("删除该视频在kanyikan_feeds.txt中的信息:{}".format(v_title))
-                                with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f1:
+                                with open(r"./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f1:
                                     lines = f1.readlines()
-                                with open("./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w1:
+                                with open(r"./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w1:
                                     for line1 in lines:
                                         if v_id in line1.split(" + ")[1]:
                                             continue
@@ -167,14 +167,14 @@ class DownloadUp:
                                 Common.crawler_log().info("该视频1小时内的播放量:{}<1000".format(
                                     int(v_play_cnt_up) - int(v_play_ctn)) + ";"
                                                           + "更新该视频在kanyikan_feeds.txt中的信息:{}".format(v_title))
-                                with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
+                                with open(r"./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
                                     lines = f_r.readlines()
-                                with open("./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
+                                with open(r"./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
                                     for line2 in lines:
                                         if v_id in line2.split(" + ")[1]:
                                             continue
                                         f_w.write(line2)
-                                with open("./txt/kanyikan_feeds.txt", "a", encoding="utf-8") as f_a:
+                                with open(r"./txt/kanyikan_feeds.txt", "a", encoding="utf-8") as f_a:
                                     f_a.write(str(int(time.time())) + " + "
                                               + str(v_id) + " + "
                                               + str(v_play_cnt_up) + " + "
@@ -196,9 +196,9 @@ class DownloadUp:
                                 int((int(time.time()) - int(v_time)) / 60)) + ";" + "标题:{}".format(v_title))
                             # 删除之前保存的该视频信息
                             Common.crawler_log().info("删除该视频在kanyikan_feeds.txt中的信息:{}".format(v_title))
-                            with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
+                            with open(r"./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
                                 lines = f_r.readlines()
-                            with open("./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
+                            with open(r"./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
                                 for line2 in lines:
                                     if v_id in line2.split(" + ")[1]:
                                         continue
@@ -207,9 +207,9 @@ class DownloadUp:
                         Common.crawler_log().info("不满足下载规则:{}".format(v_title))
                         # 删除之前保存的该视频信息
                         Common.crawler_log().info("删除该视频在kanyikan_feeds.txt中的信息:{}".format(v_title))
-                        with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
+                        with open(r"./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
                             lines = f_r.readlines()
-                        with open("./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
+                        with open(r"./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
                             for line3 in lines:
                                 if v_id in line3.split(" + ")[1]:
                                     continue
@@ -217,9 +217,9 @@ class DownloadUp:
             except Exception as e:
                 Common.crawler_log().error("获取视频info异常:{},删除该视频".format(e))
                 # 删除之前保存的该视频信息
-                with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
+                with open(r"./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
                     lines = f_r.readlines()
-                with open("./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
+                with open(r"./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
                     for line4 in lines:
                         if v_id in line4.split(" + ")[1]:
                             continue

+ 2 - 2
main/get_feeds.py

@@ -187,7 +187,7 @@ def get_feeds():
                             if len(contents) == 0:
                                 Common.crawler_log().info("添加该视频信息至kanyikan_feeds.txt:{}".format(video_title))
                                 # 当前时间、视频 ID、播放量 存储到 kanyikan_feeds.txt
-                                with open("./txt/kanyikan_feeds.txt", "a", encoding="utf8") as f:
+                                with open(r"./txt/kanyikan_feeds.txt", "a", encoding="utf8") as f:
                                     f.write(str(basic_time) + " + "
                                             + str(video_id) + " + "
                                             + str(video_play_cnt) + " + "
@@ -210,7 +210,7 @@ def get_feeds():
                                 else:
                                     Common.crawler_log().info("添加该视频信息至kanyikan_feeds.txt:{}".format(video_title))
                                     # 当前时间、视频 ID、播放量 存储到 kanyikan_feeds.txt
-                                    with open("./txt/kanyikan_feeds.txt", "a", encoding="utf8") as f:
+                                    with open(r"./txt/kanyikan_feeds.txt", "a", encoding="utf8") as f:
                                         f.write(str(basic_time) + " + "
                                                 + str(video_id) + " + "
                                                 + str(video_play_cnt) + " + "

+ 6 - 5
main/publish.py

@@ -115,8 +115,8 @@ class Publish:
     - 读取 基本信息 调用发布接口
     """
     # env 日期20220225 文件名
-    oss_file_path_video = 'longvideo/crawler_local/video/{}/{}/{}'
-    oss_file_path_image = 'longvideo/crawler_local/image/{}/{}/{}'
+    oss_file_path_video = r'longvideo/crawler_local/video/{}/{}/{}'
+    oss_file_path_image = r'longvideo/crawler_local/image/{}/{}/{}'
 
     @classmethod
     def put_file(cls, oss_file, local_file):
@@ -138,7 +138,7 @@ class Publish:
         os.rmdir(local_file)
         Common.crawler_log().info("remove local file dir = {} success".format(local_file))
 
-    local_file_path = './videos'
+    local_file_path = r'./videos'
     video_file = 'video'
     image_file = 'image'
     info_file = 'info'
@@ -194,7 +194,7 @@ class Publish:
                     # 单个视频文件夹下的所有视频文件
                     for fi in dir_files:
                         # 视频文件夹下的所有文件路径
-                        fi_path = fi_d + '/' + fi
+                        fi_path = fi_d + r'/' + fi
                         Common.crawler_log().info('dir fi_path = {}'.format(fi_path))
                         # 读取 info.txt,赋值给 data
                         if cls.info_file in fi:
@@ -215,12 +215,13 @@ class Publish:
                                         data['crawlerSrcPublishTimestamp'] = line
                                 else:
                                     Common.crawler_log().warning("{} line is None".format(fi_path))
+                            f.close()
                             # remove info.txt
                             cls.remove_local_file(fi_path)
                     # 刷新数据
                     dir_files = os.listdir(fi_d)
                     for fi in dir_files:
-                        fi_path = fi_d + '/' + fi
+                        fi_path = fi_d + r'/' + fi
                         Common.crawler_log().info('dir fi_path = {}'.format(fi_path))
                         # 上传oss
                         if cls.video_file in fi: