zhangyong 3 månader sedan
förälder
incheckning
9ba2175667
3 ändrade filer med 25 tillägg och 13 borttagningar
  1. 1 6
      Dockerfile
  2. 18 7
      common/sql_help.py
  3. 6 0
      top_automatic/top_data_processing.py

+ 1 - 6
Dockerfile

@@ -6,14 +6,9 @@ COPY . .
 
 ENV TZ=Asia/Shanghai
 
-RUN apt update && apt --no-install-recommends install -y wget xz-utils nscd libgl-dev libglib2.0-dev fonts-wqy-zenhei \
+RUN apt update && apt --no-install-recommends install -y wget xz-utils nscd \
     && apt-get clean && rm -rf /var/lib/apt/lists/* \
     && pip install -r requirements.txt --no-cache-dir \
-    && wget -O /tmp/ffmpeg-7.0.2-amd64-static.tar.xz https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-amd64-static.tar.xz \
-    && tar -xJvf /tmp/ffmpeg-7.0.2-amd64-static.tar.xz -C /usr/local/ \
-    && rm /tmp/ffmpeg-7.0.2-amd64-static.tar.xz \
-    && ln -s /usr/local/ffmpeg-7.0.2-amd64-static/ffprobe /usr/local/bin/ffprobe \
-    && ln -s /usr/local/ffmpeg-7.0.2-amd64-static/ffmpeg /usr/local/bin/ffmpeg \
     && mkdir -p /app/cache
 
 ENTRYPOINT ["python", "/app/job_data.py"]

+ 18 - 7
common/sql_help.py

@@ -7,22 +7,33 @@ from common.mysql_db import MysqlHelper
 class sqlCollect():
 
 
+    @classmethod
+    def get_channle_id(cls, pq_id):
+        """
+        从数据库表中读取 id
+        """
+        sql = f"""select v_id,channel from machine_making_data where pq_vid = %s limit 1"""
+        data = MysqlHelper.get_values(sql, (pq_id,))
+        if data:
+            return data[0][0],data[0][1]
+        else:
+            return None, None
 
     @classmethod
-    def insert_machine_making_data(cls, name: str, task_mark: str, channel_id: str, url: str, v_id: str, piaoquan_id: str, new_title: str, code: str, formatted_time, old_title: str, oss_object_key: str):
-        insert_sql = f"""INSERT INTO machine_making_data (name, task_mark, channel, user, v_id, pq_uid, title, pq_vid, data_time, old_title, oss_object_key) values ("{name}", "{task_mark}", "{channel_id}", "{url}", "{v_id}" , "{piaoquan_id}", "{new_title}", "{code}", "{formatted_time}", "{old_title}", "{oss_object_key}")"""
+    def insert_pj_video_data(cls, user_video_id: str, channel: str):
+        insert_sql = f"""INSERT INTO pj_video_data (user_video_id,channel) values ("{user_video_id}", "{channel}")"""
         MysqlHelper.update_values(
             sql=insert_sql
         )
 
     @classmethod
-    def get_channle_id(cls, pq_id):
+    def select_pj_video_data(cls, user_video_id):
         """
         从数据库表中读取 id
         """
-        sql = f"""select v_id,channel from machine_making_data where pq_vid = %s limit 1"""
-        data = MysqlHelper.get_values(sql, (pq_id,))
+        sql = f"""select user_video_id from pj_video_data where user_video_id = %s limit 1"""
+        data = MysqlHelper.get_values(sql, (user_video_id,))
         if data:
-            return data[0][0],data[0][1]
+            return True
         else:
-            return None, None
+            return False

+ 6 - 0
top_automatic/top_data_processing.py

@@ -147,6 +147,11 @@ class Top:
             in_job_video_data("task:top_all_data", json.dumps(data, ensure_ascii=False, indent=4))
             AliyunLogger.logging(data['channel'], data, "没有获取到视频用户ID,等待重新获取","fail")
             return
+        status = sqlCollect.select_pj_video_data(channel_account_id)
+        if status:
+            logger.info(f"[+] 任务{data},该用户站外ID已添加过")
+            AliyunLogger.logging(data['channel'], data, "该用户站外ID已添加过", channel_account_id)
+            return
         data["channel_account_id"] = channel_account_id
         if channel_id in ["抖音关键词抓取", "快手关键词抓取"]:
             data["tag_transport_channel"] = tag_transport_channel
@@ -156,6 +161,7 @@ class Top:
             redis_data = f"task:top_data_{'ks' if tag_transport_channel == '快手' else 'dy'}_gz"
         AliyunLogger.logging(data['channel'], data, "获取成功等待写入改造任务", channel_account_id)
         in_job_video_data(redis_data, json.dumps(data, ensure_ascii=False, indent=4))
+        sqlCollect.insert_pj_video_data(channel_account_id, channel_id)
         logger.info(f"[+] 开始写入飞书表格")
         current_time = datetime.now()
         formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")