zhangyong 8 mesi fa
parent
commit
220ee258ec
3 ha cambiato i file con 40 aggiunte e 56 eliminazioni
  1. 23 22
      common/feishu_form.py
  2. 13 0
      common/sql_help.py
  3. 4 34
      data_channel/sph_crawling_data.py

+ 23 - 22
common/feishu_form.py

@@ -107,28 +107,29 @@ class Material():
                             "gg_duration_total": gg_duration,
                         }
                         processed_list.append(json.dumps(number_dict, ensure_ascii=False))
-                        if ls_number and ls_number not in {'None', ''}:
-                            if channel_id == "抖音":
-                                new_channel_id = "抖音历史"
-                            if channel_id == "快手":
-                                new_channel_id = "快手历史"
-                            values1 = [new_channel_id, video_id_total, piaoquan_id, video_share, video_ending_total, crop_tool,
-                                      gg_duration, title_total]
-                            filtered_values1 = [str(value) for value in values1 if value is not None and value != "None"]
-                            task_mark1 = "_".join(map(str, filtered_values1))
-                            number_dict = {
-                                "task_mark": task_mark1,
-                                "channel_id": new_channel_id,
-                                "channel_url": user,
-                                "piaoquan_id": piaoquan_id,
-                                "number": ls_number,
-                                "title": title,
-                                "video_share": video_share,
-                                "video_ending": video_ending,
-                                "crop_total": crop_tool,
-                                "gg_duration_total": gg_duration,
-                            }
-                            processed_list.append(json.dumps(number_dict, ensure_ascii=False))
+                        if channel_id == "抖音" or channel_id == "快手":
+                            if ls_number and ls_number not in {'None', ''}:
+                                if channel_id == "抖音":
+                                    new_channel_id = "抖音历史"
+                                if channel_id == "快手":
+                                    new_channel_id = "快手历史"
+                                values1 = [new_channel_id, video_id_total, piaoquan_id, video_share, video_ending_total, crop_tool,
+                                          gg_duration, title_total]
+                                filtered_values1 = [str(value) for value in values1 if value is not None and value != "None"]
+                                task_mark1 = "_".join(map(str, filtered_values1))
+                                number_dict = {
+                                    "task_mark": task_mark1,
+                                    "channel_id": new_channel_id,
+                                    "channel_url": user,
+                                    "piaoquan_id": piaoquan_id,
+                                    "number": ls_number,
+                                    "title": title,
+                                    "video_share": video_share,
+                                    "video_ending": video_ending,
+                                    "crop_total": crop_tool,
+                                    "gg_duration_total": gg_duration,
+                                }
+                                processed_list.append(json.dumps(number_dict, ensure_ascii=False))
                 else:
                     return processed_list
 

+ 13 - 0
common/sql_help.py

@@ -105,6 +105,19 @@ class sqlCollect():
         )
         return res
 
+    """查询是否有视频号是否插入过数据库"""
+    @classmethod
+    def sph_data_info_v_id(cls, video_id, channel):
+        sql = """
+                              SELECT video_id
+                              FROM sph_data_info
+                              WHERE video_id = %s and channel = %s
+                          """
+        data = MysqlHelper.get_values(sql, (str(video_id), channel))
+        if data:
+            return True
+        return False
+
     """查询是否有视频号数据"""
     @classmethod
     def sph_channel_user_list(cls):

+ 4 - 34
data_channel/sph_crawling_data.py

@@ -1,6 +1,4 @@
-import configparser
 import json
-import os
 import random
 import time
 
@@ -8,42 +6,15 @@ import requests
 
 from common import Material, Oss, Common
 from common.sql_help import sqlCollect
-from data_channel.piaoquan import PQ
 from data_channel.shipinhao import SPH
-config = configparser.ConfigParser()
-config.read('./config.ini')
-class SphHistory:
 
-    @classmethod
-    def remove_files(cls, video_path_url):
-        """
-        删除指定目录下的所有文件和子目录
-        """
-        if os.path.exists(video_path_url) and os.path.isdir(video_path_url):
-            for root, dirs, files in os.walk(video_path_url):
-                for file in files:
-                    file_path = os.path.join(root, file)
-                    os.remove(file_path)
-                for dir in dirs:
-                    dir_path = os.path.join(root, dir)
-                    os.rmdir(dir_path)
+class SphHistory:
 
-    @classmethod
-    def create_folders(cls):
-        """
-        根据标示和任务标示创建目录
-        """
-        video_path_url = config['PATHS']['VIDEO_PATH']+"/sph_crawling/"
-        # video_path_url = '/root/video_rewriting/path/sph_crawling/'
-        if not os.path.exists(video_path_url):
-            os.makedirs(video_path_url)
-        return video_path_url
 
     """获取视频号所有内容"""
     @classmethod
     def sph_data_info(cls):
         user_list = cls.get_sph_user()
-        video_path_url = cls.create_folders()
         if user_list == None:
             return
         for user in user_list:
@@ -82,6 +53,9 @@ class SphHistory:
                     for obj in res_json["UpMasterHomePage"]:
                         Common.logger("sph_crawling").info(f"{user}扫描到一条数据")
                         objectId = obj['objectId']
+                        object_id = sqlCollect.sph_data_info_v_id(objectId, "视频号")
+                        if object_id:
+                            continue
                         objectNonceId = obj['objectNonceId']
                         url = "http://61.48.133.26:30001/GetFinderDownloadAddress"
                         payload = json.dumps({
@@ -102,7 +76,6 @@ class SphHistory:
                         oss_video_key = Oss.channel_upload_oss(video_url, v_id)  # 视频发送OSS
                         oss_video_key = oss_video_key.get("oss_object_key")
                         Common.logger("sph_crawling").info(f"{user}视频发送oss成功,视频oss地址{oss_video_key}")
-
                         share_cnt = int(obj['forward_count'])  # 分享
                         like_cnt = int(obj['like_count'])  # 点赞
                         video_title = video_obj.get('title').split("\n")[0].split("#")[0]
@@ -118,11 +91,8 @@ class SphHistory:
                         fav_count = obj['fav_count']  # 大拇指点赞数
                         sqlCollect.sph_data_info('视频号', objectId, video_url, cover, video_title, str(share_cnt), str(like_cnt), oss_video_key, oss_cover_key, nick_name, user_name, comment_count, fav_count, create_time)
                         Common.logger("sph_crawling").info(f"{nick_name}插入数据成功")
-                        cls.remove_files(video_path_url)
-                        return "完成"
             except Exception as e:
                 Common.logger("sph_crawling").info(f"{user}异常,异常信息{e}")
-                cls.remove_files(video_path_url)
                 continue
 
     @classmethod