ソースを参照

rank---微信体系内只要有返回则选择微信体系内视频

罗俊辉 11 ヶ月 前
コミット
9203301b37

+ 2 - 2
app.py

@@ -8,7 +8,7 @@ from applications.routes import my_blueprint
 # 初始化 App
 app = Quart(__name__, static_folder='applications/static')
 logging(
-    code="1000",
+    code="0000",
     info="APP Initialization Complete",
     function="app"
 )
@@ -16,7 +16,7 @@ logging(
 # 注册蓝图
 app.register_blueprint(my_blueprint)
 logging(
-    code="1000",
+    code="0000",
     info="Blue Print Initialization Complete",
     function="app"
 )

+ 302 - 0
applications/config.py

@@ -0,0 +1,302 @@
+"""
+@author: luojunhui
+"""
+
+gh_id_dict = {
+    "gh_01f8afd03366": {
+        "uid": 69637520,
+        "nick_name": "非亲非故"
+    },
+    "gh_058e41145a0c": {
+        "uid": 69637476,
+        "nick_name": "甜腻梦话"
+    },
+    "gh_084a485e859a": {
+        "uid": 69637472,
+        "nick_name": "梦星月"
+    },
+    "gh_0921c03402cd": {
+        "uid": 69637531,
+        "nick_name": "你的女友"
+    },
+    "gh_0c89e11f8bf3": {
+        "uid": 69637508,
+        "nick_name": "粟米"
+    },
+    "gh_171cec079b2a": {
+        "uid": 69637501,
+        "nick_name": "海上"
+    },
+    "gh_183d80deffb8": {
+        "uid": 69637491,
+        "nick_name": "论趣"
+    },
+    "gh_1ee2e1b39ccf": {
+        "uid": 69637473,
+        "nick_name": "纵有疾风起"
+    },
+    "gh_234ef02cdee5": {
+        "uid": 69637513,
+        "nick_name": "夹逼"
+    },
+    "gh_26a307578776": {
+        "uid": 69637490,
+        "nick_name": "最宝贝的宝贝"
+    },
+    "gh_29074b51f2b7": {
+        "uid": 69637530,
+        "nick_name": "沉舸"
+    },
+    "gh_2b8c6aa035ae": {
+        "uid": 69637470,
+        "nick_name": "懶得取名"
+    },
+    "gh_34318194fd0e": {
+        "uid": 69637517,
+        "nick_name": "徒四壁"
+    },
+    "gh_3845af6945d0": {
+        "uid": 69637545,
+        "nick_name": "秋水娉婷"
+    },
+    "gh_3ac6d7208961": {
+        "uid": 69637497,
+        "nick_name": "小熊的少女梦"
+    },
+    "gh_3c7d38636846": {
+        "uid": 69637519,
+        "nick_name": "油腻腻"
+    },
+    "gh_3df10391639c": {
+        "uid": 69637541,
+        "nick_name": "六郎娇面"
+    },
+    "gh_40a0ad154478": {
+        "uid": 69637516,
+        "nick_name": "禁止"
+    },
+    "gh_424c8eeabced": {
+        "uid": 69637522,
+        "nick_name": "认命"
+    },
+    "gh_4568b5a7e2fe": {
+        "uid": 69637482,
+        "nick_name": "香腮"
+    },
+    "gh_45beb952dc74": {
+        "uid": 69637488,
+        "nick_name": "毋庸"
+    },
+    "gh_484de412b0ef": {
+        "uid": 69637481,
+        "nick_name": "婪"
+    },
+    "gh_4c058673c07e": {
+        "uid": 69637474,
+        "nick_name": "影帝"
+    },
+    "gh_538f78f9d3aa": {
+        "uid": 69637478,
+        "nick_name": "伤痕"
+    },
+    "gh_56a6765df869": {
+        "uid": 69637514,
+        "nick_name": "风月"
+    },
+    "gh_56ca3dae948c": {
+        "uid": 69637538,
+        "nick_name": "留下太多回忆"
+    },
+    "gh_5e543853d8f0": {
+        "uid": 69637543,
+        "nick_name": "不知春秋"
+    },
+    "gh_5ff48e9fb9ef": {
+        "uid": 69637494,
+        "nick_name": "寻她找他"
+    },
+    "gh_671f460c856c": {
+        "uid": 69637523,
+        "nick_name": "绝不改悔"
+    },
+    "gh_6b7c2a257263": {
+        "uid": 69637528,
+        "nick_name": "奶牙"
+    },
+    "gh_6d205db62f04": {
+        "uid": 69637509,
+        "nick_name": "怕羞"
+    },
+    "gh_6d9f36e3a7be": {
+        "uid": 69637498,
+        "nick_name": "望长安"
+    },
+    "gh_73be0287bb94": {
+        "uid": 69637537,
+        "nick_name": "戏剧"
+    },
+    "gh_744cb16f6e16": {
+        "uid": 69637505,
+        "nick_name": "反駁"
+    },
+    "gh_7b4a5f86d68c": {
+        "uid": 69637477,
+        "nick_name": "我很想你"
+    },
+    "gh_7bca1c99aea0": {
+        "uid": 69637511,
+        "nick_name": "从小就很傲"
+    },
+    "gh_7e5818b2dd83": {
+        "uid": 69637532,
+        "nick_name": "二八佳人"
+    },
+    "gh_89ef4798d3ea": {
+        "uid": 69637533,
+        "nick_name": "彼岸花"
+    },
+    "gh_901b0d722749": {
+        "uid": 69637518,
+        "nick_name": "深情不为我"
+    },
+    "gh_9161517e5676": {
+        "uid": 69637495,
+        "nick_name": "折磨"
+    },
+    "gh_93e00e187787": {
+        "uid": 69637504,
+        "nick_name": "理会"
+    },
+    "gh_9877c8541764": {
+        "uid": 69637506,
+        "nick_name": "我沿着悲伤"
+    },
+    "gh_9cf3b7ff486b": {
+        "uid": 69637492,
+        "nick_name": "hoit"
+    },
+    "gh_9e559b3b94ca": {
+        "uid": 69637471,
+        "nick_name": "我与你相遇"
+    },
+    "gh_9f8dc5b0c74e": {
+        "uid": 69637496,
+        "nick_name": "港口"
+    },
+    "gh_a182cfc94dad": {
+        "uid": 69637539,
+        "nick_name": "四海八荒"
+    },
+    "gh_a2901d34f75b": {
+        "uid": 69637535,
+        "nick_name": "听腻了谎话"
+    },
+    "gh_a307072c04b9": {
+        "uid": 69637521,
+        "nick_name": "踏步"
+    },
+    "gh_a6351b447819": {
+        "uid": 69637540,
+        "nick_name": "七猫酒馆"
+    },
+    "gh_ac43e43b253b": {
+        "uid": 69637499,
+        "nick_name": "一厢情愿"
+    },
+    "gh_adca24a8f429": {
+        "uid": 69637483,
+        "nick_name": "对你何止一句喜欢"
+    },
+    "gh_b15de7c99912": {
+        "uid": 69637536,
+        "nick_name": "糖炒板栗"
+    },
+    "gh_b32125c73861": {
+        "uid": 69637493,
+        "nick_name": "发尾"
+    },
+    "gh_b3ffc1ca3a04": {
+        "uid": 69637546,
+        "nick_name": "主宰你心"
+    },
+    "gh_b8baac4296cb": {
+        "uid": 69637489,
+        "nick_name": "生性"
+    },
+    "gh_b9b99173ff8a": {
+        "uid": 69637524,
+        "nick_name": "养一只月亮"
+    },
+    "gh_bd57b6978e06": {
+        "uid": 69637527,
+        "nick_name": "厌遇"
+    },
+    "gh_be8c29139989": {
+        "uid": 69637502,
+        "nick_name": "不负"
+    },
+    "gh_bfe5b705324a": {
+        "uid": 69637529,
+        "nick_name": "乐极"
+    },
+    "gh_bff0bcb0694a": {
+        "uid": 69637534,
+        "nick_name": "简迷离"
+    },
+    "gh_c69776baf2cd": {
+        "uid": 69637512,
+        "nick_name": "骄纵"
+    },
+    "gh_c91b42649690": {
+        "uid": 69637503,
+        "nick_name": "荟萃"
+    },
+    "gh_d2cc901deca7": {
+        "uid": 69637487,
+        "nick_name": "恶意调笑"
+    },
+    "gh_d5f935d0d1f2": {
+        "uid": 69637500,
+        "nick_name": "青少年哪吒"
+    },
+    "gh_da76772d8d15": {
+        "uid": 69637526,
+        "nick_name": "独揽风月"
+    },
+    "gh_de9f9ebc976b": {
+        "uid": 69637475,
+        "nick_name": "剑出鞘恩怨了"
+    },
+    "gh_e0eb490115f5": {
+        "uid": 69637486,
+        "nick_name": "赋别"
+    },
+    "gh_e24da99dc899": {
+        "uid": 69637484,
+        "nick_name": "恋雨夏季"
+    },
+    "gh_e2576b7181c6": {
+        "uid": 69637515,
+        "nick_name": "满天星"
+    },
+    "gh_e75dbdc73d80": {
+        "uid": 69637542,
+        "nick_name": "情战"
+    },
+    "gh_e9d819f9e147": {
+        "uid": 69637525,
+        "nick_name": "与卿"
+    },
+    "gh_efaf7da157f5": {
+        "uid": 69637547,
+        "nick_name": "心野性子浪"
+    },
+    "gh_f4594783f5b8": {
+        "uid": 69637544,
+        "nick_name": "自缚"
+    },
+    "gh_fe6ef3a65a48": {
+        "uid": 69637480,
+        "nick_name": "风间"
+    }
+}

+ 0 - 3
applications/functions/ask_kimi.py

@@ -1,9 +1,6 @@
 """
 @author: luojunhui
 """
-"""
-@author: luojunhui
-"""
 import json
 from openai import OpenAI
 

+ 55 - 9
applications/functions/common.py

@@ -3,18 +3,30 @@
 """
 import os
 import json
-import time
 import uuid
 import requests
 import urllib.parse
-from concurrent.futures import ThreadPoolExecutor
 
 from applications.functions.auto_white import auto_white
-from applications.functions.mysql import select
+from applications.functions.mysql import select, select_sensitive_words
 from applications.functions.ask_kimi import ask_kimi
 from applications.log import logging
 
 
+def sensitive_flag(title):
+    """
+    判断标题是否命中过滤词
+    :param title:
+    :return:
+    """
+    sensitive_words = select_sensitive_words()
+    for word in sensitive_words:
+        if word in title:
+            # title = title.replace(word, "*")
+            return False
+    return True
+
+
 def ask_kimi_and_save_to_local(info_tuple):
     """
     save file to local
@@ -23,7 +35,7 @@ def ask_kimi_and_save_to_local(info_tuple):
     title, trace_id, save_path = info_tuple[0], info_tuple[1], info_tuple[2]
     if os.path.exists(save_path):
         logging(
-            code="1002",
+            code="2001",
             info="该 video 信息已经挖掘完成---{}".format(title),
             function="ask_kimi_and_save_to_local",
             trace_id=trace_id,
@@ -35,7 +47,7 @@ def ask_kimi_and_save_to_local(info_tuple):
         else:
             result = ask_kimi(title)
         logging(
-            code="1002",
+            code="2001",
             info="kimi-result",
             data=result,
             trace_id=trace_id,
@@ -96,7 +108,7 @@ def find_videos_in_mysql(trace_id):
     out_video_list = select(sql=sql)
     if len(out_video_list) > 0:
         vid_list = [i[0] for i in out_video_list if i[0] != 0]
-        vid_list = vid_list[:2]
+        vid_list = [vid_list[-1]]
         dir_path = os.path.join(os.getcwd(), 'applications', 'static', "out_videos")
         os.makedirs(os.path.dirname(dir_path), exist_ok=True)
         done_list = os.listdir(dir_path)
@@ -107,9 +119,15 @@ def find_videos_in_mysql(trace_id):
                 os.path.join(dir_path, "{}.json".format(i[0]))
             ) for i in out_video_list if not "{}.json".format(i[0]) in done_list
         ]
-        with ThreadPoolExecutor(max_workers=2) as pool:
-            pool.map(ask_kimi_and_save_to_local, process_list)
-        # time.sleep(5)
+        if process_list:
+            ask_kimi_and_save_to_local(process_list[0])
+        logging(
+            code="2003",
+            trace_id=trace_id,
+            info="recall_search_list",
+            function="find_videos_in_mysql",
+            data=vid_list
+        )
         return {
             "search_videos": "success",
             "trace_id": trace_id,
@@ -121,3 +139,31 @@ def find_videos_in_mysql(trace_id):
             "trace_id": trace_id,
             "video_list": []
         }
+
+
+def clean_title(strings):
+    """
+    :param strings:
+    :return:
+    """
+    return (
+        strings.strip()
+        .replace("\n", "")
+        .replace("/", "")
+        .replace("\r", "")
+        .replace("#", "")
+        .replace(".", "。")
+        .replace("\\", "")
+        .replace("&NBSP", "")
+        .replace(":", "")
+        .replace("*", "")
+        .replace("?", "")
+        .replace("?", "")
+        .replace('"', "")
+        .replace("<", "")
+        .replace(">", "")
+        .replace("|", "")
+        .replace(" ", "")
+        .replace('"', "")
+        .replace("'", "")
+    )

+ 98 - 0
applications/functions/item.py

@@ -0,0 +1,98 @@
+"""
+@author: luojunhui
+"""
+import time
+
+from applications.functions.common import clean_title
+
+
+class VideoItem(object):
+    """
+    function: 当扫描进一条视频的时候,对该视频的基本信息进行处理,保证发送给 pipeline和 etl 的 video_dict 是正确的
+    __init__: 初始化空json 对象,用来存储视频信息
+    add_video_info: 把视频信息存储到 item 对象中
+    check_item: 检查 item 对象中的各个元素以及处理
+    """
+
+    def __init__(self):
+        self.item = {}
+
+    def add_video_info(self, key, value):
+        self.item[key] = value
+
+    def check_item(self):
+        """
+        判断item 里面的字段,是否符合要求
+        字段分为 3 类:
+        1. 必须存在数据的字段: ["video_id", "user_id", "user_name", "out_user_id", "out_video_id", "session", "video_url", "cover_url", "platform", "strategy"]
+        2. 不存在默认为 0 的字段 :["duration", "play_cnt", "like_cnt", "comment_cnt", "share_cnt", "width", "height"]
+        3. 需要后出理的字段: video_title, publish_time
+        """
+        if self.item.get("video_title"):
+            self.item["video_title"] = clean_title(self.item["video_title"])
+        else:
+            return False
+        if self.item.get("publish_time_stamp"):
+            publish_time_str = time.strftime(
+                "%Y-%m-%d %H:%M:%S", time.localtime(self.item["publish_time_stamp"])
+            )
+            self.add_video_info("publish_time_str", publish_time_str)
+        else:
+            publish_time_stamp = int(time.time())
+            publish_time_str = time.strftime(
+                "%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp)
+            )
+            self.add_video_info("publish_time_stamp", publish_time_stamp)
+            self.add_video_info("publish_time_str", publish_time_str)
+        self.add_video_info("publish_time", publish_time_str)
+        if not self.item.get("update_time_stamp"):
+            self.add_video_info("update_time_stamp", int(time.time()))
+
+        # 如果不存在,默认值为 0
+        config_keys = [
+            "duration",
+            "play_cnt",
+            "like_cnt",
+            "comment_cnt",
+            "share_cnt",
+            "width",
+            "height",
+        ]
+        for config_key in config_keys:
+            if self.item.get(config_key):
+                continue
+            else:
+                self.add_video_info(config_key, 0)
+
+        # 必须存在的元素,若不存在则会报错
+        must_keys = [
+            "video_id",
+            "user_id",
+            "user_name",
+            "out_video_id",
+            "session",
+            "video_url",
+            "cover_url",
+            "platform",
+            "strategy",
+        ]
+        """
+        video_id, out_video_id 均为站外视频 id
+        usr_id: 站内用户 id
+        out_user_id: 站外用户 id
+        user_name: 站外用户名称
+        """
+        for m_key in must_keys:
+            if self.item.get(m_key):
+                continue
+            else:
+                # print(m_key)
+                return False
+        return True
+
+    def produce_item(self):
+        flag = self.check_item()
+        if flag:
+            return self.item
+        else:
+            return False

+ 21 - 0
applications/functions/mysql.py

@@ -54,3 +54,24 @@ def select_pq_videos():
         for line in data
     ]
     return result
+
+
+def select_sensitive_words():
+    """
+    sensitive words
+    :return:
+    """
+    connection = pymysql.connect(
+        host="rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com",  # 数据库IP地址,内网地址
+        port=3306,  # 端口号
+        user="wx2016_longvideo",  # mysql用户名
+        passwd="wx2016_longvideoP@assword1234",  # mysql用户登录密码
+        db="longvideo",  # 数据库名
+        charset="utf8mb4"  # 如果数据库里面的文本是utf8编码的,charset指定是utf8
+    )
+    sql = "select `keyword` from wx_sensitive_word where `data_status` = 0"
+    cursor = connection.cursor()
+    cursor.execute(sql)
+    data = cursor.fetchall()
+    result = [line[0] for line in data]
+    return result

+ 0 - 35
applications/functions/odps.py

@@ -1,35 +0,0 @@
-"""
-@author: luojunhui
-"""
-
-from odps import ODPS
-
-
-class PyODPS(object):
-    """
-    PyODPS class, get data from odps server
-    """
-
-    def __init__(self):
-        self.endpoint = "http://service.cn.maxcompute.aliyun.com/api"
-        self.access_id = "LTAIWYUujJAm7CbH"
-        self.access_key = "RfSjdiWwED1sGFlsjXv0DlfTnZTG1P"
-        self.project = "loghubods"
-
-        self.od = ODPS(
-            access_id=self.access_id,
-            secret_access_key=self.access_key,
-            endpoint=self.endpoint,
-            project=self.project,
-        )
-
-    def select(self, sql):
-        """
-        :param sql: 查询语句
-        :return: odps_obj{}
-        """
-        result = []
-        with self.od.execute_sql(sql).open_reader() as reader:
-            for record in reader:
-                result.append(record)
-        return result

+ 12 - 1
applications/log.py

@@ -6,8 +6,18 @@ import json
 from aliyun.log import LogClient, PutLogsRequest, LogItem
 
 
-def logging(code, trace_id=None, info=None, port=None, alg=None, function=None, data=None):
+def logging(
+        code,
+        mode="prod",
+        trace_id=None,
+        info=None,
+        port=None,
+        alg=None,
+        function=None,
+        data=None
+):
     """
+    :param mode: 生产模式 or  测试模式
     :param trace_id: 请求唯一 id
     :param data: 信息
     :param code: 日志状态码
@@ -30,6 +40,7 @@ def logging(code, trace_id=None, info=None, port=None, alg=None, function=None,
     log_group = []
     log_item = LogItem()
     contents = [
+        (f"mode", str(mode)),
         (f"code", str(code)),
         (f"alg", str(alg)),
         (f"function", str(function)),

+ 2 - 4
applications/mq.py

@@ -22,13 +22,11 @@ class MQ(object):
 
     def send_msg(self, params):
         """
-        发送 mq,并且记录 redis
+        send msg to mq client
         """
-        account = params["ghId"]
-
         try:
             msg = TopicMessage(json.dumps(params))
-            message_key = account + str(uuid4())
+            message_key = str(uuid4())
             msg.set_message_key(message_key)
             re_msg = self.producer.publish_message(msg)
             print(re_msg)

+ 19 - 11
applications/routes.py

@@ -9,7 +9,7 @@ from quart import Blueprint, jsonify, request
 
 from applications.log import logging
 from applications.process import ProcessParams
-from applications.mq import MQ
+from applications.search import search_videos
 from applications.functions.common import find_videos_in_mysql, ask_kimi_and_save_to_local
 
 my_blueprint = Blueprint('kimi', __name__)
@@ -35,28 +35,35 @@ async def search_videos_from_the_web():
     从web 搜索视频并且存储到票圈的视频库中
     :return:
     """
-    mq = MQ(topic_name="search_spider_prod")
+    params = await request.get_json()
+    title = params['title']
+    gh_id = params['ghId']
     trace_id = "search-{}-{}".format(str(uuid.uuid4()), str(int(time.time())))
+    params['trace_id'] = trace_id
     logging(
-        code="1001",
+        code="2000",
         info="搜索视频内容接口请求成功",
         port="title_to_search",
+        function="search_videos_from_the_web",
         trace_id=trace_id
     )
-    params = await request.get_json()
-    params['trace_id'] = trace_id
-    title = params['title']
     title_p = os.path.join(os.getcwd(), 'applications', 'static', "titles", "{}.json".format(title))
     if os.path.exists(title_p):
         logging(
-            code="1002",
+            code="2001",
             info="该标题已经被 kimi 处理过,跳过请求 kimi 操作--- {}".format(title),
-            function="process",
+            function="search_videos_from_the_web",
             trace_id=trace_id
         )
     else:
         ask_kimi_and_save_to_local((title, trace_id, title_p))
-    mq.send_msg(params=params)
+    await asyncio.sleep(2)
+    search_videos(
+        title=title,
+        video_path=title_p,
+        trace_id=trace_id,
+        gh_id=gh_id,
+    )
     res = {
         "trace_id": trace_id,
         "code": 0
@@ -73,10 +80,11 @@ async def find_in_mysql():
     data = await request.get_json()
     trace_id = data['traceId']
     logging(
-        code="1001",
+        code="2000",
         info="请求接口成功",
         port="title_to_video",
-        trace_id=data['traceId']
+        trace_id=trace_id,
+        function="find_in_mysql"
     )
     res = find_videos_in_mysql(trace_id=trace_id)
     return jsonify(res)

+ 177 - 0
applications/search.py

@@ -0,0 +1,177 @@
+"""
+@author: luojunhui
+调用接口在微信内搜索视频
+"""
+import json
+import time
+import requests
+
+from applications.mq import MQ
+from applications.log import logging
+from applications.config import gh_id_dict
+from applications.functions.item import VideoItem
+from applications.functions.common import sensitive_flag
+
+
+def wx_search(keys):
+    """
+    WeChat search
+    :param keys:
+    :return:
+    """
+    url = "http://8.217.190.241:8888/crawler/wei_xin/keyword"
+    payload = json.dumps({
+        "keyword": keys,
+        "cursor": "0",
+        "content_type": "video"
+    })
+    headers = {
+        'Content-Type': 'application/json'
+    }
+    response = requests.request("POST", url, headers=headers, data=payload)
+    return response.json()
+
+
+def process_weixin_video_obj(video_obj, user, trace_id, title):
+    """
+    异步处理微信 video_obj
+    公众号和站内账号一一对应
+    :param title:
+    :param trace_id:
+    :param user:
+    :param video_obj:
+    :return:
+    """
+    ETL_MQ = MQ(topic_name="topic_crawler_etl_prod")
+    platform = "weixin_search"
+    publish_time_stamp = int(video_obj['pubTime'])
+    item = VideoItem()
+    item.add_video_info("user_id", user["uid"])
+    item.add_video_info("user_name", user["nick_name"])
+    item.add_video_info("video_id", video_obj['hashDocID'])
+    item.add_video_info("video_title", title)
+    item.add_video_info("publish_time_stamp", int(publish_time_stamp))
+    item.add_video_info("video_url", video_obj["videoUrl"])
+    item.add_video_info("cover_url", video_obj["image"])
+    item.add_video_info("out_video_id", video_obj['hashDocID'])
+    item.add_video_info("out_user_id", trace_id)
+    item.add_video_info("platform", platform)
+    item.add_video_info("strategy", "search")
+    item.add_video_info("session", "{}-{}".format(platform, int(time.time())))
+    mq_obj = item.produce_item()
+    ETL_MQ.send_msg(params=mq_obj)
+    logging(
+        code="6002",
+        info="发送消息至 ETL",
+        data=mq_obj
+    )
+
+
+def return_video(video_path, title, trace_id):
+    """
+    search and send msg to ETL
+    :param trace_id:
+    :param title:  视频标题
+    :param video_path:  视频路径
+    :return:
+    """
+    with open(video_path, encoding='utf-8') as f:
+        my_obj = json.loads(f.read())
+    if my_obj:
+        # 三者都搜索,优先搜索 title
+        title_result = wx_search(keys=title)
+        if title_result['msg'] == '未知错误':
+            logging(
+                code="7001",
+                info="通过标题搜索失败---{}".format(title),
+                trace_id=trace_id
+            )
+        else:
+            obj_list = title_result['data']['data']
+            for obj in obj_list:
+                title = obj['items'][0]['title'].replace('<em class=\"highlight\">', '').replace('</em>',
+                                                                                                 '').replace("#",
+                                                                                                             "")
+                if sensitive_flag(title):
+                    return obj
+                else:
+                    continue
+
+        # search_keys
+        search_keys_result = wx_search(keys=my_obj['search_keys'][0])
+        if search_keys_result['msg'] == '未知错误':
+            logging(
+                code="7001",
+                info="通过搜索词搜索失败---{}".format(title),
+                trace_id=trace_id
+            )
+        else:
+            obj_list = search_keys_result['data']['data']
+            for obj in obj_list:
+                title = obj['items'][0]['title'].replace('<em class=\"highlight\">', '').replace('</em>',
+                                                                                                 '').replace("#",
+                                                                                                             "")
+                if sensitive_flag(title):
+                    return obj
+                else:
+                    continue
+
+        # theme
+        theme_result = wx_search(keys=my_obj['theme'])
+        if theme_result['msg'] == '未知错误':
+            logging(
+                code="7001",
+                info="通过主题搜索失败---{}".format(title),
+                trace_id=trace_id
+            )
+        else:
+            obj_list = theme_result['data']['data']
+            for obj in obj_list:
+                title = obj['items'][0]['title'].replace('<em class=\"highlight\">', '').replace('</em>',
+                                                                                                 '').replace("#",
+                                                                                                             "")
+                if sensitive_flag(title):
+                    return obj
+                else:
+                    continue
+        return None
+    else:
+        logging(
+            code="7000",
+            info="标题--{}--kimi 挖掘数据失败".format(title),
+            trace_id=trace_id
+        )
+        return None
+
+
+def search_videos(video_path, title, trace_id, gh_id):
+    """
+    search and send msg to ETL
+    :param gh_id:
+    :param video_path:
+    :param title:
+    :param trace_id:
+    :return:
+    """
+    video_obj = return_video(video_path, title, trace_id)
+    if video_obj:
+        logging(
+            code="7002",
+            info="视频搜索成功",
+            trace_id=trace_id,
+            data=video_obj
+        )
+        title = video_obj['items'][0]['title'].replace('<em class=\"highlight\">', '').replace('</em>', '').replace("#",
+                                                                                                                    "")
+        process_weixin_video_obj(
+            video_obj=video_obj['items'][0],
+            user=gh_id_dict.get(gh_id),
+            trace_id=trace_id,
+            title=title
+        )
+    else:
+        logging(
+            code="7003",
+            info="视频搜索失败",
+            trace_id=trace_id
+        )