kevin.yang 1 month ago
parent
commit
083e8d1a64

+ 0 - 1
application/config/topic_group_queue.py

@@ -42,7 +42,6 @@ class TopicGroup(object):
             ('fqhyd', 'recommend', 'fuqihaoyundao'),
             ('cjpq', 'recommend', 'chaojipiaoquan'),
             ('xlzf', 'recommend', 'xuanlanzhufu'),
-            ('xczf', 'recommend', 'xinchunzhufu'),
             ('zzhxzfy', 'recommend', 'zhaozhaohuanxizhufuyu'),
         ]
 

+ 1 - 1
spider/crawler_online/chaojipiaoquan.py

@@ -102,7 +102,7 @@ class CJPQRecommend(object):
         item.add_video_info("play_cnt", 0)
         item.add_video_info("publish_time_stamp", int(time.time()))
         item.add_video_info("out_user_id", video_obj["id"])
-        item.add_video_info("cover_url", video_obj["cover"])
+        item.add_video_info("cover_url", video_obj["mainImg"])
         item.add_video_info("like_cnt", 0)
         item.add_video_info("share_cnt", 0)
         item.add_video_info("comment_cnt", 0)

+ 0 - 166
spider/crawler_online/xinchunzhufu.py

@@ -1,166 +0,0 @@
-import os
-import random
-import sys
-import time
-import uuid
-import json
-from datetime import datetime
-
-import requests
-
-from application.common.feishu import FsData
-from application.common.feishu.feishu_utils import FeishuUtils
-from application.common.gpt import GPT4oMini
-
-sys.path.append(os.getcwd())
-
-from application.items import VideoItem
-from application.pipeline import PiaoQuanPipeline
-from application.common.messageQueue import MQ
-from application.common.log import AliyunLogger
-from application.common.mysql import MysqlHelper
-
-
-
-class XCZFRecommend(object):
-
-    """
-    新春祝福
-    """
-
-    def __init__(self, platform, mode, rule_dict, user_list, env="prod"):
-        self.limit_flag = False
-        self.platform = platform
-        self.mode = mode
-        self.rule_dict = rule_dict
-        self.user_list = user_list
-        self.env = env
-        self.download_cnt = 0
-        self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
-        self.expire_flag = False
-        self.aliyun_log = AliyunLogger(mode=self.mode, platform=self.platform)
-        self.mysql = MysqlHelper(mode=self.mode, platform=self)
-
-
-    def get_recommend_list(self):
-        print("新春祝福开始")
-
-        """
-        获取推荐页视频
-        """
-        headers = {
-            'Content-Type': 'application/json'
-        }
-        url = "http://8.217.192.46:8889/crawler/xin_chun_zhu_fu/recommend"
-        data_rule = FsData()
-        title_rule = data_rule.get_title_rule()
-        while True:
-            payload = json.dumps({
-                "cursor": ""
-            })
-            response = requests.request("POST", url, headers=headers, data=payload)
-            response = response.json()
-            if response['code'] != 0:
-                self.aliyun_log.logging(
-                    code="3000",
-                    message="抓取单条视频失败,请求失败"
-                ),
-                return
-            data = response['data']['data']
-            if len(data) == 0:
-                return
-            for index, video_obj in enumerate(data, 1):
-                try:
-                    self.aliyun_log.logging(
-                        code="1001", message="扫描到一条视频", data=video_obj
-                    )
-                    self.process_video_obj(video_obj, title_rule)
-                except Exception as e:
-                    self.aliyun_log.logging(
-                        code="3000",
-                        message="抓取单条视频失败, 该视频位于第{}页第{}条报错原因是{}".format(
-                            1, index, e
-                        ),
-                    )
-                if self.limit_flag:
-                    return
-                time.sleep(random.randint(1, 5))
-
-    def process_video_obj(self, video_obj, title_rule):
-        """
-        处理视频
-        :param video_obj:
-        """
-        time.sleep(random.randint(3, 8))
-        trace_id = self.platform + str(uuid.uuid1())
-        our_user = random.choice(self.user_list)
-        item = VideoItem()
-        item.add_video_info("video_id", video_obj["uuid"])
-        item.add_video_info("video_title", video_obj["title"])
-        item.add_video_info("play_cnt", 0)
-        item.add_video_info("publish_time_stamp", int(time.time()))
-        item.add_video_info("out_user_id", video_obj["uuid"])
-        item.add_video_info("cover_url", video_obj["cover_url"])
-        item.add_video_info("like_cnt", 0)
-        item.add_video_info("share_cnt", 0)
-        item.add_video_info("comment_cnt", 0)
-        item.add_video_info("video_url", video_obj["urls"][0])
-        item.add_video_info("out_video_id", video_obj["uuid"])
-        item.add_video_info("platform", self.platform)
-        item.add_video_info("strategy", self.mode)
-        item.add_video_info("session", "{}-{}".format(self.platform, int(time.time())))
-        item.add_video_info("user_id", our_user["uid"])
-        item.add_video_info("user_name", our_user["nick_name"])
-        mq_obj = item.produce_item()
-        pipeline = PiaoQuanPipeline(
-            platform=self.platform,
-            mode=self.mode,
-            rule_dict=self.rule_dict,
-            env=self.env,
-            item=mq_obj,
-            trace_id=trace_id,
-        )
-        if pipeline.process_item():
-            title_list = title_rule.split(",")
-            title = video_obj["title"]
-            contains_keyword = any(keyword in title for keyword in title_list)
-            if contains_keyword:
-                new_title = GPT4oMini.get_ai_mini_title(title)
-                if new_title:
-                    item.add_video_info("video_title", new_title)
-                    current_time = datetime.now()
-                    formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
-                    values = [
-                        [
-                            video_obj["urls"][0],
-                            video_obj["cover_url"],
-                            title,
-                            new_title,
-                            formatted_time,
-                        ]
-                    ]
-                    FeishuUtils.insert_columns("U5dXsSlPOhiNNCtEfgqcm1iYnpf", "rcQv7r", "ROWS", 1, 2)
-                    time.sleep(0.5)
-                    FeishuUtils.update_values("U5dXsSlPOhiNNCtEfgqcm1iYnpf", "rcQv7r", "A2:Z2", values)
-            self.download_cnt += 1
-            self.mq.send_msg(mq_obj)
-            self.aliyun_log.logging(code="1002", message="成功发送至 ETL", data=mq_obj)
-            if self.download_cnt >= int(
-                    self.rule_dict.get("videos_cnt", {}).get("min", 200)
-            ):
-                self.limit_flag = True
-
-    def run(self):
-        self.get_recommend_list()
-
-
-if __name__ == '__main__':
-    J = XCZFRecommend(
-        platform="xinchunzhufu",
-        mode="recommend",
-        rule_dict={},
-        user_list=[{'uid': "123456", 'nick_name': "xiaoxiao"}],
-
-    )
-    J.get_recommend_list()
-    # J.logic()

+ 1 - 1
spider/crawler_online/xuanlanzhufu.py

@@ -102,7 +102,7 @@ class XLZFRecommend(object):
         item.add_video_info("play_cnt", 0)
         item.add_video_info("publish_time_stamp", int(time.time()))
         item.add_video_info("out_user_id", video_obj["id"])
-        item.add_video_info("cover_url", video_obj["cover"])
+        item.add_video_info("cover_url", video_obj["img"])
         item.add_video_info("like_cnt", 0)
         item.add_video_info("share_cnt", 0)
         item.add_video_info("comment_cnt", 0)

+ 2 - 2
spider/crawler_online/zhaozhaohuanxizhufuyu.py

@@ -90,7 +90,7 @@ class ZZHXZFYRecommend(object):
         处理视频
         :param video_obj:
         """
-        video_url = self.get_video_url(video_obj["id"])
+        video_url = self.get_video_url(video_obj["vid"])
         if not video_url:
             return
         time.sleep(random.randint(3, 8))
@@ -98,7 +98,7 @@ class ZZHXZFYRecommend(object):
         our_user = random.choice(self.user_list)
         item = VideoItem()
         item.add_video_info("video_id", video_obj["id"])
-        item.add_video_info("video_title", video_obj["title"])
+        item.add_video_info("video_title", video_obj["vtitle"])
         item.add_video_info("play_cnt", 0)
         item.add_video_info("publish_time_stamp", int(time.time()))
         item.add_video_info("out_user_id", video_obj["id"])

+ 0 - 4
spider/spider_map.py

@@ -185,10 +185,6 @@ spider_map = {
     "xuanlanzhufu": {
         "recommend": XLZFRecommend
     },
-    # 新春祝福
-    "xinchunzhufu": {
-        "recommend": XCZFRecommend
-    },
     # 朝朝欢喜祝福语
     "zhaozhaohuanxizhufuyu": {
         "recommend": ZZHXZFYRecommend