zhangyong 4 months ago
parent
commit
346c59328e

+ 14 - 0
common/feishu_form.py

@@ -14,6 +14,20 @@ from common.feishu_utils import Feishu
 
 class Material():
 
+
+    @classmethod
+    def feishu_list(cls,channel_id):
+        summary = Feishu.get_values_batch("KsoMsyP2ghleM9tzBfmcEEXBnXg", "dQriSJ")
+        for row in summary[1:]:
+            channel = row[0]
+            day_count = row[1]
+            if channel:
+                if channel == channel_id:
+                    return day_count
+            else:
+                return None
+        return None
+
     @classmethod
     def get_count_restrict(cls, channel):
         count_channel = Feishu.get_values_batch("KsoMsyP2ghleM9tzBfmcEEXBnXg", "187FZ7")

+ 17 - 0
common/sql_help.py

@@ -40,6 +40,23 @@ class sqlCollect():
         return True
 
     """
+    根据时间判断该任务id是否用过
+    """
+    @classmethod
+    def is_used_days(cls, task_mark, video_id, mark_name, channel, day_count):
+        sql = """
+                SELECT used_video_id
+                FROM pj_video_data
+                WHERE used_video_id = %s AND task_name = %s AND mark_name = %s AND channel = %s 
+                ORDER BY data_time DESC
+                LIMIT 1 AND create_time >= DATE_SUB(NOW(), INTERVAL %s DAY)
+            """
+        data = MysqlHelper.get_values(sql, (str(video_id), task_mark, mark_name, channel,int(day_count)))
+        if len(data) == 0 or data == ():
+            return False
+        return True
+
+    """
    快手小程序判断该任务id是否用过
    """
     @classmethod

+ 5 - 1
data_channel/douyin.py

@@ -68,7 +68,11 @@ class DY:
                     if entity_type == 4:
                         # is_top = data[i].get('is_top')  # 是否置顶
                         video_id = data[i].get('aweme_id')  # 文章id
-                        status = sqlCollect.is_used(task_mark, video_id, mark, channel_id)
+                        day_count = Material.get_count_restrict(channel_id)
+                        if day_count:
+                            status =sqlCollect.is_used_days(task_mark, video_id, mark, channel_id,day_count)
+                        else:
+                            status = sqlCollect.is_used(task_mark, video_id, mark, channel_id)
 
                         video_uri = data[i].get('video', {}).get('play_addr', {}).get('uri')
                         ratio = f'{data[i].get("video", {}).get("height")}p'

+ 7 - 3
data_channel/dy_keyword.py

@@ -1,7 +1,7 @@
 import requests
 import json
 
-from common import AliyunLogger
+from common import AliyunLogger, Material
 from common.sql_help import sqlCollect
 
 
@@ -17,7 +17,7 @@ class DyKeyword:
         short_duration_rule = 0
         url = "http://8.217.192.46:8889/crawler/dou_yin/keyword"
         list = []
-        if not keyword or keyword == "":
+        if not keyword or not keyword.strip():
             return list
         payload = json.dumps({
             "keyword": keyword,
@@ -56,7 +56,11 @@ class DyKeyword:
             for i in range(len(data)):
 
                 video_id = data[i].get('aweme_id')  # 文章id
-                status = sqlCollect.is_used(task_mark, video_id, mark, channel_id)
+                day_count = Material.get_count_restrict(channel_id)
+                if day_count:
+                    status = sqlCollect.is_used_days(task_mark, video_id, mark, channel_id, day_count)
+                else:
+                    status = sqlCollect.is_used(task_mark, video_id, mark, channel_id)
                 video_uri = data[i].get('video', {}).get('play_addr', {}).get('uri')
                 ratio = f'{data[i].get("video", {}).get("height")}p'
                 # video_url = f'https://www.iesdouyin.com/aweme/v1/play/?video_id={video_uri}&ratio={ratio}&line=0'  # 视频链接

+ 8 - 2
data_channel/dy_ls.py

@@ -4,7 +4,7 @@ import time
 import requests
 import json
 
-from common import Feishu, AliyunLogger
+from common import Feishu, AliyunLogger, Material
 from common.sql_help import sqlCollect
 
 
@@ -14,6 +14,8 @@ class DYLS:
         url = "http://8.217.192.46:8889/crawler/dou_yin/blogger"
         list = []
         next_cursor = ''
+        if not url_id or not url_id.strip():
+            return list
         for i in range(5):
             try:
                 payload = json.dumps({
@@ -39,7 +41,11 @@ class DYLS:
                     video_id = data[i].get('aweme_id')  # 文章id
                     # status = sqlCollect.is_used(task_mark, video_id, mark, "抖音")
                     # if status:
-                    status = sqlCollect.is_used(task_mark, video_id, mark, "抖音历史")
+                    day_count = Material.get_count_restrict(channel_id)
+                    if day_count:
+                        status = sqlCollect.is_used_days(task_mark, video_id, mark, "抖音历史", day_count)
+                    else:
+                        status = sqlCollect.is_used(task_mark, video_id, mark, "抖音历史")
 
                     video_uri = data[i].get('video', {}).get('play_addr', {}).get('uri')
                     ratio = f'{data[i].get("video", {}).get("height")}p'

+ 7 - 3
data_channel/ks_keyword.py

@@ -4,7 +4,7 @@ import time
 import requests
 import json
 
-from common import AliyunLogger, Feishu
+from common import AliyunLogger, Feishu, Material
 from common.sql_help import sqlCollect
 
 
@@ -21,7 +21,7 @@ class KsKeyword:
 
         url = "http://8.217.192.46:8889/crawler/kuai_shou/keyword"
         list = []
-        if not keyword or keyword == "":
+        if not keyword or not keyword.strip():
             return list
         payload = json.dumps({
             "keyword": keyword,
@@ -71,7 +71,11 @@ class KsKeyword:
                 if photo_type != "VIDEO":
                     continue
                 photo_id =  data['photoId']
-                status = sqlCollect.is_used(task_mark, photo_id, mark, channel_id)
+                day_count = Material.get_count_restrict(channel_id)
+                if day_count:
+                    status = sqlCollect.is_used_days(task_mark, photo_id, mark, channel_id, day_count)
+                else:
+                    status = sqlCollect.is_used(task_mark, photo_id, mark, channel_id)
 
                 image_url = data['webpCoverUrls'][0]['url']
                 video_url = data['mainMvUrls'][0]['url']

+ 14 - 8
data_channel/ks_ls.py

@@ -2,7 +2,7 @@ import random
 import time
 import requests
 import json
-from common import Feishu, AliyunLogger
+from common import Feishu, AliyunLogger, Material
 from common.sql_help import sqlCollect
 
 class KSLS:
@@ -10,11 +10,12 @@ class KSLS:
     @classmethod
     def get_ksls_list(cls, task_mark, url_id, number, mark, channel_id, name):
         #  快手app
+        list = []
         url = "http://8.217.192.46:8889/crawler/kuai_shou/blogger"
         next_cursor = ""
         try:
-            if not url_id:
-                return
+            if not url_id or not url_id.strip():
+                return list
             for i in range(5):
                 payload = json.dumps({
                     "account_id": url_id,
@@ -27,7 +28,7 @@ class KSLS:
                 time.sleep(random.randint(1, 5))
                 response = requests.request("POST", url, headers=headers, data=payload, timeout=30)
                 response = response.json()
-                list = []
+
                 data_all_list = response["data"]
                 if data_all_list == None or len(data_all_list) == 0:
                     try:
@@ -43,7 +44,12 @@ class KSLS:
                 data_list = data_all_list["data"]
                 for data in data_list:
                     photo_id = data["photo_id"]
-                    status = sqlCollect.is_used(task_mark, photo_id, mark, "快手历史")
+                    day_count = Material.get_count_restrict(channel_id)
+                    if day_count:
+                        status = sqlCollect.is_used_days(task_mark, photo_id, mark, "快手历史", day_count)
+                    else:
+
+                        status = sqlCollect.is_used(task_mark, photo_id, mark, "快手历史")
 
                     view_count = data["view_count"]
                     share_count = data["share_count"]
@@ -56,9 +62,9 @@ class KSLS:
                     log_data = f"user:{url_id},,video_id:{photo_id},,video_url:'',original_title:{old_title},,share_count:{share_count},,view_count:{view_count},,duration:{duration}"
 
                     AliyunLogger.logging(channel_id, name, url_id, photo_id, "扫描到一条视频", "2001", log_data)
-                    # if status:
-                    #     AliyunLogger.logging(channel_id, name, url_id, photo_id, "该视频已改造过", "2002", log_data)
-                    #     continue
+                    if status:
+                        AliyunLogger.logging(channel_id, name, url_id, photo_id, "该视频已改造过", "2002", log_data)
+                        continue
                     if float(video_percent) < special:
                         AliyunLogger.logging(channel_id, name, url_id, photo_id, "不符合规则:分享/浏览小于0.0005", "2003", log_data)
                         continue

+ 1 - 1
data_channel/ks_xcx.py

@@ -8,7 +8,7 @@ from common.sql_help import sqlCollect
 
 class KSXCX:
     @classmethod
-    def get_xcx_date(cls):
+    def get_xcx_date(cls,channel_id):
         list = []
         try:
             url = "http://8.217.192.46:8889/crawler/kuai_shou/mp_recommend"

+ 7 - 3
data_channel/ks_xcx_keyword.py

@@ -4,7 +4,7 @@ import time
 import requests
 import json
 
-from common import AliyunLogger, Feishu
+from common import AliyunLogger, Feishu, Material
 from common.sql_help import sqlCollect
 
 
@@ -21,7 +21,7 @@ class KsXCXKeyword:
 
         url = "http://8.217.192.46:8889/crawler/kuai_shou/mp_keyword"
         list = []
-        if not keyword or keyword == "":
+        if not keyword or not keyword.strip():
             return list
         payload = json.dumps({
             "keyword": keyword,
@@ -73,7 +73,11 @@ class KsXCXKeyword:
                 if photo_type != "VIDEO":
                     continue
                 photo_id =  data['photoId']
-                status = sqlCollect.is_used(task_mark, photo_id, mark, channel_id)
+                day_count = Material.get_count_restrict(channel_id)
+                if day_count:
+                    status = sqlCollect.is_used_days(task_mark, photo_id, mark, channel_id, day_count)
+                else:
+                    status = sqlCollect.is_used(task_mark, photo_id, mark, channel_id)
 
                 image_url = data['webpCoverUrls'][0]['url']
                 video_url = data['mainMvUrls'][0]['url']

+ 8 - 4
data_channel/kuaishou.py

@@ -117,7 +117,11 @@ class KS:
                     # if status:
                     #     continue
                     video_id = feeds[i].get("photo", {}).get("id", "")
-                    status = sqlCollect.is_used(task_mark, video_id, mark, channel_id)
+                    day_count = Material.get_count_restrict(channel_id)
+                    if day_count:
+                        status = sqlCollect.is_used_days(task_mark, video_id, mark, channel_id, day_count)
+                    else:
+                        status = sqlCollect.is_used(task_mark, video_id, mark, channel_id)
 
                     old_title = feeds[i].get("photo", {}).get("caption")
                     cover_url = feeds[i].get('photo', {}).get('coverUrl', "")
@@ -131,9 +135,9 @@ class KS:
                     # log_data = f"user:{url_id},,video_id:{video_id},,video_url:{video_url},,original_title:{old_title},,view_count:{view_count},,duration:{duration}"
 
                     AliyunLogger.logging(channel_id, name, url_id, video_id, "扫描到一条视频", "2001", log_data)
-                    # if status:
-                    #     AliyunLogger.logging(channel_id, name, url_id, video_id, "该视频已改造过", "2002", log_data)
-                    #     continue
+                    if status:
+                        AliyunLogger.logging(channel_id, name, url_id, video_id, "该视频已改造过", "2002", log_data)
+                        continue
                     share_count = cls.get_share_count(video_id)
 
                     special = float(0.001)

+ 6 - 2
data_channel/shipinhao.py

@@ -4,7 +4,7 @@ import time
 
 import requests
 
-from common import AliyunLogger, Feishu
+from common import AliyunLogger, Feishu, Material
 from common.sql_help import sqlCollect
 
 
@@ -82,7 +82,11 @@ class SPH:
                         return list
                     for obj in data_lsit:
                         objectId = obj['id']
-                        status = sqlCollect.is_used(task_mark, objectId, mark, "视频号")
+                        day_count = Material.get_count_restrict(channel_id)
+                        if day_count:
+                            status = sqlCollect.is_used_days(task_mark, objectId, mark, "视频号", day_count)
+                        else:
+                            status = sqlCollect.is_used(task_mark, objectId, mark, "视频号")
 
                         old_title = obj['objectDesc']['description']
                         url_p = obj['objectDesc']['media'][0]['Url']

+ 6 - 2
data_channel/sph_keyword.py

@@ -4,7 +4,7 @@ import time
 import requests
 import json
 
-from common import Feishu, AliyunLogger
+from common import Feishu, AliyunLogger, Material
 from common.sql_help import sqlCollect
 
 
@@ -57,7 +57,11 @@ class SphKeyword:
                 video_url = items["videoUrl"]
                 log_data = f"user:{keyword},,video_id:{video_id},,video_url:{video_url},,original_title:{old_title},,digg_count:{digg_count},,duration:{duration}"
                 AliyunLogger.logging(channel_id, name, keyword, video_id, "扫描到一条视频", "2001", log_data)
-                status = sqlCollect.is_used(task_mark, video_id, mark, channel_id)
+                day_count = Material.get_count_restrict(channel_id)
+                if day_count:
+                    status = sqlCollect.is_used_days(task_mark, video_id, mark, channel_id, day_count)
+                else:
+                    status = sqlCollect.is_used(task_mark, video_id, mark, channel_id)
                 if status:
                     AliyunLogger.logging(channel_id, name, keyword, video_id, "该视频已改造过", "2002", log_data)
                     continue

+ 6 - 2
data_channel/sph_ls.py

@@ -1,4 +1,4 @@
-from common import AliyunLogger
+from common import AliyunLogger, Material
 from common.sql_help import sqlCollect
 
 
@@ -11,7 +11,11 @@ class SPHLS:
         if data_list:
             for data in data_list:
                 video_id = data[0]
-                status = sqlCollect.is_used(task_mark,video_id, mark, "视频号历史")
+                day_count = Material.get_count_restrict(channel_id)
+                if day_count:
+                    status = sqlCollect.is_used_days(task_mark, video_id, mark, "视频号历史", day_count)
+                else:
+                    status = sqlCollect.is_used(task_mark,video_id, mark, "视频号历史")
 
                 old_title = data[1]
                 share_cnt = int(data[2])

+ 1 - 1
video_rewriting/video_processor.py

@@ -504,7 +504,7 @@ class VideoProcessor:
         elif channel_id == '视频号推荐流':
             return SPHFeed.get_feed_date()
         elif channel_id == '快手小程序':
-            return KSXCX.get_xcx_date()
+            return KSXCX.get_xcx_date(channel_id)
 
 
     @classmethod