Parcourir la source

增加视频时长

zhangyong il y a 8 mois
Parent
commit
98d3edef6a
3 fichiers modifiés avec 37 ajouts et 7 suppressions
  1. 6 6
      common/sql_help.py
  2. 3 1
      data_channel/sph_crawling_data.py
  3. 28 0
      data_channel/sph_ls.py

+ 6 - 6
common/sql_help.py

@@ -96,10 +96,10 @@ class sqlCollect():
 
     """抓取视频号数据入库"""
     @classmethod
-    def sph_data_info(cls, channel: str, objectId: str, video_url: str, cover: str, video_title: str, share_cnt: str, like_cnt: str, oss_video_key: str, oss_cover_key: str, nick_name: str, user_name: str, comment_count: str, fav_count: str, create_time: str):
-        insert_sql = f"""INSERT INTO sph_data_info (channel, video_id, video_url, video_cover, video_title, share_cnt, like_cnt, oss_url, oss_cover, nick_name, user_name, comment_count, fav_count, create_time)
+    def sph_data_info(cls, channel: str, objectId: str, video_url: str, cover: str, video_title: str, share_cnt: str, like_cnt: str, oss_video_key: str, oss_cover_key: str, nick_name: str, user_name: str, comment_count: str, fav_count: str, create_time: str, duration: str):
+        insert_sql = f"""INSERT INTO sph_data_info (channel, video_id, video_url, video_cover, video_title, share_cnt, like_cnt, oss_url, oss_cover, nick_name, user_name, comment_count, fav_count, create_time, duration)
                          values 
-                         ("{channel}", "{objectId}", "{video_url}", "{cover}", "{video_title}", "{share_cnt}", "{like_cnt}", "{oss_video_key}", "{oss_cover_key}", "{nick_name}", "{user_name}", "{comment_count}", "{fav_count}", "{create_time}")"""
+                         ("{channel}", "{objectId}", "{video_url}", "{cover}", "{video_title}", "{share_cnt}", "{like_cnt}", "{oss_video_key}", "{oss_cover_key}", "{nick_name}", "{user_name}", "{comment_count}", "{fav_count}", "{create_time}", "{duration}")"""
         res = MysqlHelper.update_values(
             sql=insert_sql
         )
@@ -126,9 +126,9 @@ class sqlCollect():
 
     """获取视频号历史数据"""
     @classmethod
-    def sph_data_info_list(cls, user, channel):
-        sql = """SELECT video_id, video_title, share_count, like_count, oss_url, oss_cover  FROM sph_data_info WHERE nick_name = %s and channel = %s """
-        data = MysqlHelper.get_values(sql, (user, channel))
+    def sph_data_info_list(cls, user):
+        sql = """SELECT video_id, video_title, share_cnt, like_cnt, oss_url, oss_cover  FROM sph_data_info WHERE nick_name = %s  ORDER BY share_cnt DESC"""
+        data = MysqlHelper.get_values(sql, (user))
         if data:
             return data
         return None

+ 3 - 1
data_channel/sph_crawling_data.py

@@ -6,6 +6,7 @@ import requests
 
 from common import Material, Oss, Common, Feishu
 from common.sql_help import sqlCollect
+from data_channel.data_help import dataHelp
 from data_channel.shipinhao import SPH
 
 class SphHistory:
@@ -71,6 +72,7 @@ class SphHistory:
                         time.sleep(random.randint(0, 1))
                         video_obj = response.json()
                         video_url = video_obj.get('DownloadAddress')
+                        duration = dataHelp.video_duration(video_url)
                         cover = video_obj.get('thumb_url')
                         if len(video_url) == 0:
                             continue
@@ -94,7 +96,7 @@ class SphHistory:
                         nick_name = obj['nickname']  # 用户名
                         comment_count = obj['comment_count']  # 评论数
                         fav_count = obj['fav_count']  # 大拇指点赞数
-                        sqlCollect.sph_data_info('视频号', objectId, video_url, cover, video_title, str(share_cnt), str(like_cnt), oss_video_key, oss_cover_key, nick_name, user_name, comment_count, fav_count, create_time)
+                        sqlCollect.sph_data_info('视频号', objectId, video_url, cover, video_title, str(share_cnt), str(like_cnt), oss_video_key, oss_cover_key, nick_name, user_name, comment_count, fav_count, create_time,duration)
                         Common.logger("sph_crawling").info(f"{nick_name}插入数据成功")
                 sqlCollect.update_sph_channel_user_status(user)
                 Common.logger("sph_crawling").info(f"{user}用户抓取完成")

+ 28 - 0
data_channel/sph_ls.py

@@ -0,0 +1,28 @@
+from common.sql_help import sqlCollect
+
+
+class SPHLS:
+
+    @classmethod
+    def get_sphls_data(cls, task_mark, url, number, mark):
+        data_list = sqlCollect.sph_data_info_list("郑蓝旗")
+        list = []
+        if data_list:
+            for data in data_list:
+                video_id = data[0]
+                old_title = data[1]
+                share_cnt = int(data[2])
+                like_cnt = int(data[3])
+                oss_url = data[4]
+                oss_cover = data[5]
+                if share_cnt < 300 or share_cnt < like_cnt or
+                all_data = {"video_id": video_id, "cover": oss_cover, "video_url": oss_url, "rule": video_percent,
+                            "old_title": old_title}
+
+
+
+
+
+
+if __name__ == '__main__':
+    SPHLS.get_sphls_data(1,2,3,4)