1 jaar geleden · 9203301b37
--- a/app.py
+++ b/app.py
@@ -8,7 +8,7 @@ from applications.routes import my_blueprint
 
				 # 初始化 App
			
 
				 app = Quart(__name__, static_folder='applications/static')
			
 
				 logging(
			
 
				-    code="1000",
			
 
				+    code="0000",
			
 
				     info="APP Initialization Complete",
			
 
				     function="app"
			
 
				 )
			
@@ -16,7 +16,7 @@ logging(
 
				 # 注册蓝图
			
 
				 app.register_blueprint(my_blueprint)
			
 
				 logging(
			
 
				-    code="1000",
			
 
				+    code="0000",
			
 
				     info="Blue Print Initialization Complete",
			
 
				     function="app"
			
 
				 )
			
--- a/applications/config.py
+++ b/applications/config.py
@@ -0,0 +1,302 @@
 
				+"""
			
 
				+@author: luojunhui
			
 
				+"""
			
 
				+
			
 
				+gh_id_dict = {
			
 
				+    "gh_01f8afd03366": {
			
 
				+        "uid": 69637520,
			
 
				+        "nick_name": "非亲非故"
			
 
				+    },
			
 
				+    "gh_058e41145a0c": {
			
 
				+        "uid": 69637476,
			
 
				+        "nick_name": "甜腻梦话"
			
 
				+    },
			
 
				+    "gh_084a485e859a": {
			
 
				+        "uid": 69637472,
			
 
				+        "nick_name": "梦星月"
			
 
				+    },
			
 
				+    "gh_0921c03402cd": {
			
 
				+        "uid": 69637531,
			
 
				+        "nick_name": "你的女友"
			
 
				+    },
			
 
				+    "gh_0c89e11f8bf3": {
			
 
				+        "uid": 69637508,
			
 
				+        "nick_name": "粟米"
			
 
				+    },
			
 
				+    "gh_171cec079b2a": {
			
 
				+        "uid": 69637501,
			
 
				+        "nick_name": "海上"
			
 
				+    },
			
 
				+    "gh_183d80deffb8": {
			
 
				+        "uid": 69637491,
			
 
				+        "nick_name": "论趣"
			
 
				+    },
			
 
				+    "gh_1ee2e1b39ccf": {
			
 
				+        "uid": 69637473,
			
 
				+        "nick_name": "纵有疾风起"
			
 
				+    },
			
 
				+    "gh_234ef02cdee5": {
			
 
				+        "uid": 69637513,
			
 
				+        "nick_name": "夹逼"
			
 
				+    },
			
 
				+    "gh_26a307578776": {
			
 
				+        "uid": 69637490,
			
 
				+        "nick_name": "最宝贝的宝贝"
			
 
				+    },
			
 
				+    "gh_29074b51f2b7": {
			
 
				+        "uid": 69637530,
			
 
				+        "nick_name": "沉舸"
			
 
				+    },
			
 
				+    "gh_2b8c6aa035ae": {
			
 
				+        "uid": 69637470,
			
 
				+        "nick_name": "懶得取名"
			
 
				+    },
			
 
				+    "gh_34318194fd0e": {
			
 
				+        "uid": 69637517,
			
 
				+        "nick_name": "徒四壁"
			
 
				+    },
			
 
				+    "gh_3845af6945d0": {
			
 
				+        "uid": 69637545,
			
 
				+        "nick_name": "秋水娉婷"
			
 
				+    },
			
 
				+    "gh_3ac6d7208961": {
			
 
				+        "uid": 69637497,
			
 
				+        "nick_name": "小熊的少女梦"
			
 
				+    },
			
 
				+    "gh_3c7d38636846": {
			
 
				+        "uid": 69637519,
			
 
				+        "nick_name": "油腻腻"
			
 
				+    },
			
 
				+    "gh_3df10391639c": {
			
 
				+        "uid": 69637541,
			
 
				+        "nick_name": "六郎娇面"
			
 
				+    },
			
 
				+    "gh_40a0ad154478": {
			
 
				+        "uid": 69637516,
			
 
				+        "nick_name": "禁止"
			
 
				+    },
			
 
				+    "gh_424c8eeabced": {
			
 
				+        "uid": 69637522,
			
 
				+        "nick_name": "认命"
			
 
				+    },
			
 
				+    "gh_4568b5a7e2fe": {
			
 
				+        "uid": 69637482,
			
 
				+        "nick_name": "香腮"
			
 
				+    },
			
 
				+    "gh_45beb952dc74": {
			
 
				+        "uid": 69637488,
			
 
				+        "nick_name": "毋庸"
			
 
				+    },
			
 
				+    "gh_484de412b0ef": {
			
 
				+        "uid": 69637481,
			
 
				+        "nick_name": "婪"
			
 
				+    },
			
 
				+    "gh_4c058673c07e": {
			
 
				+        "uid": 69637474,
			
 
				+        "nick_name": "影帝"
			
 
				+    },
			
 
				+    "gh_538f78f9d3aa": {
			
 
				+        "uid": 69637478,
			
 
				+        "nick_name": "伤痕"
			
 
				+    },
			
 
				+    "gh_56a6765df869": {
			
 
				+        "uid": 69637514,
			
 
				+        "nick_name": "风月"
			
 
				+    },
			
 
				+    "gh_56ca3dae948c": {
			
 
				+        "uid": 69637538,
			
 
				+        "nick_name": "留下太多回忆"
			
 
				+    },
			
 
				+    "gh_5e543853d8f0": {
			
 
				+        "uid": 69637543,
			
 
				+        "nick_name": "不知春秋"
			
 
				+    },
			
 
				+    "gh_5ff48e9fb9ef": {
			
 
				+        "uid": 69637494,
			
 
				+        "nick_name": "寻她找他"
			
 
				+    },
			
 
				+    "gh_671f460c856c": {
			
 
				+        "uid": 69637523,
			
 
				+        "nick_name": "绝不改悔"
			
 
				+    },
			
 
				+    "gh_6b7c2a257263": {
			
 
				+        "uid": 69637528,
			
 
				+        "nick_name": "奶牙"
			
 
				+    },
			
 
				+    "gh_6d205db62f04": {
			
 
				+        "uid": 69637509,
			
 
				+        "nick_name": "怕羞"
			
 
				+    },
			
 
				+    "gh_6d9f36e3a7be": {
			
 
				+        "uid": 69637498,
			
 
				+        "nick_name": "望长安"
			
 
				+    },
			
 
				+    "gh_73be0287bb94": {
			
 
				+        "uid": 69637537,
			
 
				+        "nick_name": "戏剧"
			
 
				+    },
			
 
				+    "gh_744cb16f6e16": {
			
 
				+        "uid": 69637505,
			
 
				+        "nick_name": "反駁"
			
 
				+    },
			
 
				+    "gh_7b4a5f86d68c": {
			
 
				+        "uid": 69637477,
			
 
				+        "nick_name": "我很想你"
			
 
				+    },
			
 
				+    "gh_7bca1c99aea0": {
			
 
				+        "uid": 69637511,
			
 
				+        "nick_name": "从小就很傲"
			
 
				+    },
			
 
				+    "gh_7e5818b2dd83": {
			
 
				+        "uid": 69637532,
			
 
				+        "nick_name": "二八佳人"
			
 
				+    },
			
 
				+    "gh_89ef4798d3ea": {
			
 
				+        "uid": 69637533,
			
 
				+        "nick_name": "彼岸花"
			
 
				+    },
			
 
				+    "gh_901b0d722749": {
			
 
				+        "uid": 69637518,
			
 
				+        "nick_name": "深情不为我"
			
 
				+    },
			
 
				+    "gh_9161517e5676": {
			
 
				+        "uid": 69637495,
			
 
				+        "nick_name": "折磨"
			
 
				+    },
			
 
				+    "gh_93e00e187787": {
			
 
				+        "uid": 69637504,
			
 
				+        "nick_name": "理会"
			
 
				+    },
			
 
				+    "gh_9877c8541764": {
			
 
				+        "uid": 69637506,
			
 
				+        "nick_name": "我沿着悲伤"
			
 
				+    },
			
 
				+    "gh_9cf3b7ff486b": {
			
 
				+        "uid": 69637492,
			
 
				+        "nick_name": "hoit"
			
 
				+    },
			
 
				+    "gh_9e559b3b94ca": {
			
 
				+        "uid": 69637471,
			
 
				+        "nick_name": "我与你相遇"
			
 
				+    },
			
 
				+    "gh_9f8dc5b0c74e": {
			
 
				+        "uid": 69637496,
			
 
				+        "nick_name": "港口"
			
 
				+    },
			
 
				+    "gh_a182cfc94dad": {
			
 
				+        "uid": 69637539,
			
 
				+        "nick_name": "四海八荒"
			
 
				+    },
			
 
				+    "gh_a2901d34f75b": {
			
 
				+        "uid": 69637535,
			
 
				+        "nick_name": "听腻了谎话"
			
 
				+    },
			
 
				+    "gh_a307072c04b9": {
			
 
				+        "uid": 69637521,
			
 
				+        "nick_name": "踏步"
			
 
				+    },
			
 
				+    "gh_a6351b447819": {
			
 
				+        "uid": 69637540,
			
 
				+        "nick_name": "七猫酒馆"
			
 
				+    },
			
 
				+    "gh_ac43e43b253b": {
			
 
				+        "uid": 69637499,
			
 
				+        "nick_name": "一厢情愿"
			
 
				+    },
			
 
				+    "gh_adca24a8f429": {
			
 
				+        "uid": 69637483,
			
 
				+        "nick_name": "对你何止一句喜欢"
			
 
				+    },
			
 
				+    "gh_b15de7c99912": {
			
 
				+        "uid": 69637536,
			
 
				+        "nick_name": "糖炒板栗"
			
 
				+    },
			
 
				+    "gh_b32125c73861": {
			
 
				+        "uid": 69637493,
			
 
				+        "nick_name": "发尾"
			
 
				+    },
			
 
				+    "gh_b3ffc1ca3a04": {
			
 
				+        "uid": 69637546,
			
 
				+        "nick_name": "主宰你心"
			
 
				+    },
			
 
				+    "gh_b8baac4296cb": {
			
 
				+        "uid": 69637489,
			
 
				+        "nick_name": "生性"
			
 
				+    },
			
 
				+    "gh_b9b99173ff8a": {
			
 
				+        "uid": 69637524,
			
 
				+        "nick_name": "养一只月亮"
			
 
				+    },
			
 
				+    "gh_bd57b6978e06": {
			
 
				+        "uid": 69637527,
			
 
				+        "nick_name": "厌遇"
			
 
				+    },
			
 
				+    "gh_be8c29139989": {
			
 
				+        "uid": 69637502,
			
 
				+        "nick_name": "不负"
			
 
				+    },
			
 
				+    "gh_bfe5b705324a": {
			
 
				+        "uid": 69637529,
			
 
				+        "nick_name": "乐极"
			
 
				+    },
			
 
				+    "gh_bff0bcb0694a": {
			
 
				+        "uid": 69637534,
			
 
				+        "nick_name": "简迷离"
			
 
				+    },
			
 
				+    "gh_c69776baf2cd": {
			
 
				+        "uid": 69637512,
			
 
				+        "nick_name": "骄纵"
			
 
				+    },
			
 
				+    "gh_c91b42649690": {
			
 
				+        "uid": 69637503,
			
 
				+        "nick_name": "荟萃"
			
 
				+    },
			
 
				+    "gh_d2cc901deca7": {
			
 
				+        "uid": 69637487,
			
 
				+        "nick_name": "恶意调笑"
			
 
				+    },
			
 
				+    "gh_d5f935d0d1f2": {
			
 
				+        "uid": 69637500,
			
 
				+        "nick_name": "青少年哪吒"
			
 
				+    },
			
 
				+    "gh_da76772d8d15": {
			
 
				+        "uid": 69637526,
			
 
				+        "nick_name": "独揽风月"
			
 
				+    },
			
 
				+    "gh_de9f9ebc976b": {
			
 
				+        "uid": 69637475,
			
 
				+        "nick_name": "剑出鞘恩怨了"
			
 
				+    },
			
 
				+    "gh_e0eb490115f5": {
			
 
				+        "uid": 69637486,
			
 
				+        "nick_name": "赋别"
			
 
				+    },
			
 
				+    "gh_e24da99dc899": {
			
 
				+        "uid": 69637484,
			
 
				+        "nick_name": "恋雨夏季"
			
 
				+    },
			
 
				+    "gh_e2576b7181c6": {
			
 
				+        "uid": 69637515,
			
 
				+        "nick_name": "满天星"
			
 
				+    },
			
 
				+    "gh_e75dbdc73d80": {
			
 
				+        "uid": 69637542,
			
 
				+        "nick_name": "情战"
			
 
				+    },
			
 
				+    "gh_e9d819f9e147": {
			
 
				+        "uid": 69637525,
			
 
				+        "nick_name": "与卿"
			
 
				+    },
			
 
				+    "gh_efaf7da157f5": {
			
 
				+        "uid": 69637547,
			
 
				+        "nick_name": "心野性子浪"
			
 
				+    },
			
 
				+    "gh_f4594783f5b8": {
			
 
				+        "uid": 69637544,
			
 
				+        "nick_name": "自缚"
			
 
				+    },
			
 
				+    "gh_fe6ef3a65a48": {
			
 
				+        "uid": 69637480,
			
 
				+        "nick_name": "风间"
			
 
				+    }
			
 
				+}
			
--- a/applications/functions/ask_kimi.py
+++ b/applications/functions/ask_kimi.py
@@ -1,9 +1,6 @@
 
				 """
			
 
				 @author: luojunhui
			
 
				 """
			
 
				-"""
			
 
				-@author: luojunhui
			
 
				-"""
			
 
				 import json
			
 
				 from openai import OpenAI
			
 
				 
			
--- a/applications/functions/common.py
+++ b/applications/functions/common.py
@@ -3,18 +3,30 @@
 
				 """
			
 
				 import os
			
 
				 import json
			
 
				-import time
			
 
				 import uuid
			
 
				 import requests
			
 
				 import urllib.parse
			
 
				-from concurrent.futures import ThreadPoolExecutor
			
 
				 
			
 
				 from applications.functions.auto_white import auto_white
			
 
				-from applications.functions.mysql import select
			
 
				+from applications.functions.mysql import select, select_sensitive_words
			
 
				 from applications.functions.ask_kimi import ask_kimi
			
 
				 from applications.log import logging
			
 
				 
			
 
				 
			
 
				+def sensitive_flag(title):
			
 
				+    """
			
 
				+    判断标题是否命中过滤词
			
 
				+    :param title:
			
 
				+    :return:
			
 
				+    """
			
 
				+    sensitive_words = select_sensitive_words()
			
 
				+    for word in sensitive_words:
			
 
				+        if word in title:
			
 
				+            # title = title.replace(word, "*")
			
 
				+            return False
			
 
				+    return True
			
 
				+
			
 
				+
			
 
				 def ask_kimi_and_save_to_local(info_tuple):
			
 
				     """
			
 
				     save file to local
			
@@ -23,7 +35,7 @@ def ask_kimi_and_save_to_local(info_tuple):
 
				     title, trace_id, save_path = info_tuple[0], info_tuple[1], info_tuple[2]
			
 
				     if os.path.exists(save_path):
			
 
				         logging(
			
 
				-            code="1002",
			
 
				+            code="2001",
			
 
				             info="该 video 信息已经挖掘完成---{}".format(title),
			
 
				             function="ask_kimi_and_save_to_local",
			
 
				             trace_id=trace_id,
			
@@ -35,7 +47,7 @@ def ask_kimi_and_save_to_local(info_tuple):
 
				         else:
			
 
				             result = ask_kimi(title)
			
 
				         logging(
			
 
				-            code="1002",
			
 
				+            code="2001",
			
 
				             info="kimi-result",
			
 
				             data=result,
			
 
				             trace_id=trace_id,
			
@@ -96,7 +108,7 @@ def find_videos_in_mysql(trace_id):
 
				     out_video_list = select(sql=sql)
			
 
				     if len(out_video_list) > 0:
			
 
				         vid_list = [i[0] for i in out_video_list if i[0] != 0]
			
 
				-        vid_list = vid_list[:2]
			
 
				+        vid_list = [vid_list[-1]]
			
 
				         dir_path = os.path.join(os.getcwd(), 'applications', 'static', "out_videos")
			
 
				         os.makedirs(os.path.dirname(dir_path), exist_ok=True)
			
 
				         done_list = os.listdir(dir_path)
			
@@ -107,9 +119,15 @@ def find_videos_in_mysql(trace_id):
 
				                 os.path.join(dir_path, "{}.json".format(i[0]))
			
 
				             ) for i in out_video_list if not "{}.json".format(i[0]) in done_list
			
 
				         ]
			
 
				-        with ThreadPoolExecutor(max_workers=2) as pool:
			
 
				-            pool.map(ask_kimi_and_save_to_local, process_list)
			
 
				-        # time.sleep(5)
			
 
				+        if process_list:
			
 
				+            ask_kimi_and_save_to_local(process_list[0])
			
 
				+        logging(
			
 
				+            code="2003",
			
 
				+            trace_id=trace_id,
			
 
				+            info="recall_search_list",
			
 
				+            function="find_videos_in_mysql",
			
 
				+            data=vid_list
			
 
				+        )
			
 
				         return {
			
 
				             "search_videos": "success",
			
 
				             "trace_id": trace_id,
			
@@ -121,3 +139,31 @@ def find_videos_in_mysql(trace_id):
 
				             "trace_id": trace_id,
			
 
				             "video_list": []
			
 
				         }
			
 
				+
			
 
				+
			
 
				+def clean_title(strings):
			
 
				+    """
			
 
				+    :param strings:
			
 
				+    :return:
			
 
				+    """
			
 
				+    return (
			
 
				+        strings.strip()
			
 
				+        .replace("\n", "")
			
 
				+        .replace("/", "")
			
 
				+        .replace("\r", "")
			
 
				+        .replace("#", "")
			
 
				+        .replace(".", "。")
			
 
				+        .replace("\\", "")
			
 
				+        .replace("&NBSP", "")
			
 
				+        .replace(":", "")
			
 
				+        .replace("*", "")
			
 
				+        .replace("？", "")
			
 
				+        .replace("?", "")
			
 
				+        .replace('"', "")
			
 
				+        .replace("<", "")
			
 
				+        .replace(">", "")
			
 
				+        .replace("|", "")
			
 
				+        .replace(" ", "")
			
 
				+        .replace('"', "")
			
 
				+        .replace("'", "")
			
 
				+    )
			
--- a/applications/functions/item.py
+++ b/applications/functions/item.py
@@ -0,0 +1,98 @@
 
				+"""
			
 
				+@author: luojunhui
			
 
				+"""
			
 
				+import time
			
 
				+
			
 
				+from applications.functions.common import clean_title
			
 
				+
			
 
				+
			
 
				+class VideoItem(object):
			
 
				+    """
			
 
				+    function: 当扫描进一条视频的时候，对该视频的基本信息进行处理，保证发送给 pipeline和 etl 的 video_dict 是正确的
			
 
				+    __init__: 初始化空json 对象，用来存储视频信息
			
 
				+    add_video_info: 把视频信息存储到 item 对象中
			
 
				+    check_item: 检查 item 对象中的各个元素以及处理
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        self.item = {}
			
 
				+
			
 
				+    def add_video_info(self, key, value):
			
 
				+        self.item[key] = value
			
 
				+
			
 
				+    def check_item(self):
			
 
				+        """
			
 
				+        判断item 里面的字段，是否符合要求
			
 
				+        字段分为 3 类：
			
 
				+        1. 必须存在数据的字段： ["video_id", "user_id", "user_name", "out_user_id", "out_video_id", "session", "video_url", "cover_url", "platform", "strategy"]
			
 
				+        2. 不存在默认为 0 的字段 ：["duration", "play_cnt", "like_cnt", "comment_cnt", "share_cnt", "width", "height"]
			
 
				+        3. 需要后出理的字段： video_title, publish_time
			
 
				+        """
			
 
				+        if self.item.get("video_title"):
			
 
				+            self.item["video_title"] = clean_title(self.item["video_title"])
			
 
				+        else:
			
 
				+            return False
			
 
				+        if self.item.get("publish_time_stamp"):
			
 
				+            publish_time_str = time.strftime(
			
 
				+                "%Y-%m-%d %H:%M:%S", time.localtime(self.item["publish_time_stamp"])
			
 
				+            )
			
 
				+            self.add_video_info("publish_time_str", publish_time_str)
			
 
				+        else:
			
 
				+            publish_time_stamp = int(time.time())
			
 
				+            publish_time_str = time.strftime(
			
 
				+                "%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp)
			
 
				+            )
			
 
				+            self.add_video_info("publish_time_stamp", publish_time_stamp)
			
 
				+            self.add_video_info("publish_time_str", publish_time_str)
			
 
				+        self.add_video_info("publish_time", publish_time_str)
			
 
				+        if not self.item.get("update_time_stamp"):
			
 
				+            self.add_video_info("update_time_stamp", int(time.time()))
			
 
				+
			
 
				+        # 如果不存在，默认值为 0
			
 
				+        config_keys = [
			
 
				+            "duration",
			
 
				+            "play_cnt",
			
 
				+            "like_cnt",
			
 
				+            "comment_cnt",
			
 
				+            "share_cnt",
			
 
				+            "width",
			
 
				+            "height",
			
 
				+        ]
			
 
				+        for config_key in config_keys:
			
 
				+            if self.item.get(config_key):
			
 
				+                continue
			
 
				+            else:
			
 
				+                self.add_video_info(config_key, 0)
			
 
				+
			
 
				+        # 必须存在的元素，若不存在则会报错
			
 
				+        must_keys = [
			
 
				+            "video_id",
			
 
				+            "user_id",
			
 
				+            "user_name",
			
 
				+            "out_video_id",
			
 
				+            "session",
			
 
				+            "video_url",
			
 
				+            "cover_url",
			
 
				+            "platform",
			
 
				+            "strategy",
			
 
				+        ]
			
 
				+        """
			
 
				+        video_id, out_video_id 均为站外视频 id
			
 
				+        usr_id: 站内用户 id
			
 
				+        out_user_id: 站外用户 id
			
 
				+        user_name: 站外用户名称
			
 
				+        """
			
 
				+        for m_key in must_keys:
			
 
				+            if self.item.get(m_key):
			
 
				+                continue
			
 
				+            else:
			
 
				+                # print(m_key)
			
 
				+                return False
			
 
				+        return True
			
 
				+
			
 
				+    def produce_item(self):
			
 
				+        flag = self.check_item()
			
 
				+        if flag:
			
 
				+            return self.item
			
 
				+        else:
			
 
				+            return False
			
--- a/applications/functions/mysql.py
+++ b/applications/functions/mysql.py
@@ -54,3 +54,24 @@ def select_pq_videos():
 
				         for line in data
			
 
				     ]
			
 
				     return result
			
 
				+
			
 
				+
			
 
				+def select_sensitive_words():
			
 
				+    """
			
 
				+    sensitive words
			
 
				+    :return:
			
 
				+    """
			
 
				+    connection = pymysql.connect(
			
 
				+        host="rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com",  # 数据库IP地址，内网地址
			
 
				+        port=3306,  # 端口号
			
 
				+        user="wx2016_longvideo",  # mysql用户名
			
 
				+        passwd="wx2016_longvideoP@assword1234",  # mysql用户登录密码
			
 
				+        db="longvideo",  # 数据库名
			
 
				+        charset="utf8mb4"  # 如果数据库里面的文本是utf8编码的，charset指定是utf8
			
 
				+    )
			
 
				+    sql = "select `keyword` from wx_sensitive_word where `data_status` = 0"
			
 
				+    cursor = connection.cursor()
			
 
				+    cursor.execute(sql)
			
 
				+    data = cursor.fetchall()
			
 
				+    result = [line[0] for line in data]
			
 
				+    return result
			
--- a/applications/functions/odps.py
+++ b/applications/functions/odps.py
@@ -1,35 +0,0 @@
 
				-"""
			
 
				-@author: luojunhui
			
 
				-"""
			
 
				-
			
 
				-from odps import ODPS
			
 
				-
			
 
				-
			
 
				-class PyODPS(object):
			
 
				-    """
			
 
				-    PyODPS class, get data from odps server
			
 
				-    """
			
 
				-
			
 
				-    def __init__(self):
			
 
				-        self.endpoint = "http://service.cn.maxcompute.aliyun.com/api"
			
 
				-        self.access_id = "LTAIWYUujJAm7CbH"
			
 
				-        self.access_key = "RfSjdiWwED1sGFlsjXv0DlfTnZTG1P"
			
 
				-        self.project = "loghubods"
			
 
				-
			
 
				-        self.od = ODPS(
			
 
				-            access_id=self.access_id,
			
 
				-            secret_access_key=self.access_key,
			
 
				-            endpoint=self.endpoint,
			
 
				-            project=self.project,
			
 
				-        )
			
 
				-
			
 
				-    def select(self, sql):
			
 
				-        """
			
 
				-        :param sql: 查询语句
			
 
				-        :return: odps_obj{}
			
 
				-        """
			
 
				-        result = []
			
 
				-        with self.od.execute_sql(sql).open_reader() as reader:
			
 
				-            for record in reader:
			
 
				-                result.append(record)
			
 
				-        return result
			
--- a/applications/log.py
+++ b/applications/log.py
@@ -6,8 +6,18 @@ import json
 
				 from aliyun.log import LogClient, PutLogsRequest, LogItem
			
 
				 
			
 
				 
			
 
				-def logging(code, trace_id=None, info=None, port=None, alg=None, function=None, data=None):
			
 
				+def logging(
			
 
				+        code,
			
 
				+        mode="prod",
			
 
				+        trace_id=None,
			
 
				+        info=None,
			
 
				+        port=None,
			
 
				+        alg=None,
			
 
				+        function=None,
			
 
				+        data=None
			
 
				+):
			
 
				     """
			
 
				+    :param mode: 生产模式 or  测试模式
			
 
				     :param trace_id: 请求唯一 id
			
 
				     :param data: 信息
			
 
				     :param code: 日志状态码
			
@@ -30,6 +40,7 @@ def logging(code, trace_id=None, info=None, port=None, alg=None, function=None,
 
				     log_group = []
			
 
				     log_item = LogItem()
			
 
				     contents = [
			
 
				+        (f"mode", str(mode)),
			
 
				         (f"code", str(code)),
			
 
				         (f"alg", str(alg)),
			
 
				         (f"function", str(function)),
			
--- a/applications/mq.py
+++ b/applications/mq.py
@@ -22,13 +22,11 @@ class MQ(object):
 
				 
			
 
				     def send_msg(self, params):
			
 
				         """
			
 
				-        发送 mq，并且记录 redis
			
 
				+        send msg to mq client
			
 
				         """
			
 
				-        account = params["ghId"]
			
 
				-
			
 
				         try:
			
 
				             msg = TopicMessage(json.dumps(params))
			
 
				-            message_key = account + str(uuid4())
			
 
				+            message_key = str(uuid4())
			
 
				             msg.set_message_key(message_key)
			
 
				             re_msg = self.producer.publish_message(msg)
			
 
				             print(re_msg)
			
--- a/applications/routes.py
+++ b/applications/routes.py
@@ -9,7 +9,7 @@ from quart import Blueprint, jsonify, request
 
				 
			
 
				 from applications.log import logging
			
 
				 from applications.process import ProcessParams
			
 
				-from applications.mq import MQ
			
 
				+from applications.search import search_videos
			
 
				 from applications.functions.common import find_videos_in_mysql, ask_kimi_and_save_to_local
			
 
				 
			
 
				 my_blueprint = Blueprint('kimi', __name__)
			
@@ -35,28 +35,35 @@ async def search_videos_from_the_web():
 
				     从web 搜索视频并且存储到票圈的视频库中
			
 
				     :return:
			
 
				     """
			
 
				-    mq = MQ(topic_name="search_spider_prod")
			
 
				+    params = await request.get_json()
			
 
				+    title = params['title']
			
 
				+    gh_id = params['ghId']
			
 
				     trace_id = "search-{}-{}".format(str(uuid.uuid4()), str(int(time.time())))
			
 
				+    params['trace_id'] = trace_id
			
 
				     logging(
			
 
				-        code="1001",
			
 
				+        code="2000",
			
 
				         info="搜索视频内容接口请求成功",
			
 
				         port="title_to_search",
			
 
				+        function="search_videos_from_the_web",
			
 
				         trace_id=trace_id
			
 
				     )
			
 
				-    params = await request.get_json()
			
 
				-    params['trace_id'] = trace_id
			
 
				-    title = params['title']
			
 
				     title_p = os.path.join(os.getcwd(), 'applications', 'static', "titles", "{}.json".format(title))
			
 
				     if os.path.exists(title_p):
			
 
				         logging(
			
 
				-            code="1002",
			
 
				+            code="2001",
			
 
				             info="该标题已经被 kimi 处理过，跳过请求 kimi 操作--- {}".format(title),
			
 
				-            function="process",
			
 
				+            function="search_videos_from_the_web",
			
 
				             trace_id=trace_id
			
 
				         )
			
 
				     else:
			
 
				         ask_kimi_and_save_to_local((title, trace_id, title_p))
			
 
				-    mq.send_msg(params=params)
			
 
				+    await asyncio.sleep(2)
			
 
				+    search_videos(
			
 
				+        title=title,
			
 
				+        video_path=title_p,
			
 
				+        trace_id=trace_id,
			
 
				+        gh_id=gh_id,
			
 
				+    )
			
 
				     res = {
			
 
				         "trace_id": trace_id,
			
 
				         "code": 0
			
@@ -73,10 +80,11 @@ async def find_in_mysql():
 
				     data = await request.get_json()
			
 
				     trace_id = data['traceId']
			
 
				     logging(
			
 
				-        code="1001",
			
 
				+        code="2000",
			
 
				         info="请求接口成功",
			
 
				         port="title_to_video",
			
 
				-        trace_id=data['traceId']
			
 
				+        trace_id=trace_id,
			
 
				+        function="find_in_mysql"
			
 
				     )
			
 
				     res = find_videos_in_mysql(trace_id=trace_id)
			
 
				     return jsonify(res)
			
--- a/applications/search.py
+++ b/applications/search.py
@@ -0,0 +1,177 @@
 
				+"""
			
 
				+@author: luojunhui
			
 
				+调用接口在微信内搜索视频
			
 
				+"""
			
 
				+import json
			
 
				+import time
			
 
				+import requests
			
 
				+
			
 
				+from applications.mq import MQ
			
 
				+from applications.log import logging
			
 
				+from applications.config import gh_id_dict
			
 
				+from applications.functions.item import VideoItem
			
 
				+from applications.functions.common import sensitive_flag
			
 
				+
			
 
				+
			
 
				+def wx_search(keys):
			
 
				+    """
			
 
				+    WeChat search
			
 
				+    :param keys:
			
 
				+    :return:
			
 
				+    """
			
 
				+    url = "http://8.217.190.241:8888/crawler/wei_xin/keyword"
			
 
				+    payload = json.dumps({
			
 
				+        "keyword": keys,
			
 
				+        "cursor": "0",
			
 
				+        "content_type": "video"
			
 
				+    })
			
 
				+    headers = {
			
 
				+        'Content-Type': 'application/json'
			
 
				+    }
			
 
				+    response = requests.request("POST", url, headers=headers, data=payload)
			
 
				+    return response.json()
			
 
				+
			
 
				+
			
 
				+def process_weixin_video_obj(video_obj, user, trace_id, title):
			
 
				+    """
			
 
				+    异步处理微信 video_obj
			
 
				+    公众号和站内账号一一对应
			
 
				+    :param title:
			
 
				+    :param trace_id:
			
 
				+    :param user:
			
 
				+    :param video_obj:
			
 
				+    :return:
			
 
				+    """
			
 
				+    ETL_MQ = MQ(topic_name="topic_crawler_etl_prod")
			
 
				+    platform = "weixin_search"
			
 
				+    publish_time_stamp = int(video_obj['pubTime'])
			
 
				+    item = VideoItem()
			
 
				+    item.add_video_info("user_id", user["uid"])
			
 
				+    item.add_video_info("user_name", user["nick_name"])
			
 
				+    item.add_video_info("video_id", video_obj['hashDocID'])
			
 
				+    item.add_video_info("video_title", title)
			
 
				+    item.add_video_info("publish_time_stamp", int(publish_time_stamp))
			
 
				+    item.add_video_info("video_url", video_obj["videoUrl"])
			
 
				+    item.add_video_info("cover_url", video_obj["image"])
			
 
				+    item.add_video_info("out_video_id", video_obj['hashDocID'])
			
 
				+    item.add_video_info("out_user_id", trace_id)
			
 
				+    item.add_video_info("platform", platform)
			
 
				+    item.add_video_info("strategy", "search")
			
 
				+    item.add_video_info("session", "{}-{}".format(platform, int(time.time())))
			
 
				+    mq_obj = item.produce_item()
			
 
				+    ETL_MQ.send_msg(params=mq_obj)
			
 
				+    logging(
			
 
				+        code="6002",
			
 
				+        info="发送消息至 ETL",
			
 
				+        data=mq_obj
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+def return_video(video_path, title, trace_id):
			
 
				+    """
			
 
				+    search and send msg to ETL
			
 
				+    :param trace_id:
			
 
				+    :param title:  视频标题
			
 
				+    :param video_path:  视频路径
			
 
				+    :return:
			
 
				+    """
			
 
				+    with open(video_path, encoding='utf-8') as f:
			
 
				+        my_obj = json.loads(f.read())
			
 
				+    if my_obj:
			
 
				+        # 三者都搜索，优先搜索 title
			
 
				+        title_result = wx_search(keys=title)
			
 
				+        if title_result['msg'] == '未知错误':
			
 
				+            logging(
			
 
				+                code="7001",
			
 
				+                info="通过标题搜索失败---{}".format(title),
			
 
				+                trace_id=trace_id
			
 
				+            )
			
 
				+        else:
			
 
				+            obj_list = title_result['data']['data']
			
 
				+            for obj in obj_list:
			
 
				+                title = obj['items'][0]['title'].replace('<em class=\"highlight\">', '').replace('</em>',
			
 
				+                                                                                                 '').replace("#",
			
 
				+                                                                                                             "")
			
 
				+                if sensitive_flag(title):
			
 
				+                    return obj
			
 
				+                else:
			
 
				+                    continue
			
 
				+
			
 
				+        # search_keys
			
 
				+        search_keys_result = wx_search(keys=my_obj['search_keys'][0])
			
 
				+        if search_keys_result['msg'] == '未知错误':
			
 
				+            logging(
			
 
				+                code="7001",
			
 
				+                info="通过搜索词搜索失败---{}".format(title),
			
 
				+                trace_id=trace_id
			
 
				+            )
			
 
				+        else:
			
 
				+            obj_list = search_keys_result['data']['data']
			
 
				+            for obj in obj_list:
			
 
				+                title = obj['items'][0]['title'].replace('<em class=\"highlight\">', '').replace('</em>',
			
 
				+                                                                                                 '').replace("#",
			
 
				+                                                                                                             "")
			
 
				+                if sensitive_flag(title):
			
 
				+                    return obj
			
 
				+                else:
			
 
				+                    continue
			
 
				+
			
 
				+        # theme
			
 
				+        theme_result = wx_search(keys=my_obj['theme'])
			
 
				+        if theme_result['msg'] == '未知错误':
			
 
				+            logging(
			
 
				+                code="7001",
			
 
				+                info="通过主题搜索失败---{}".format(title),
			
 
				+                trace_id=trace_id
			
 
				+            )
			
 
				+        else:
			
 
				+            obj_list = theme_result['data']['data']
			
 
				+            for obj in obj_list:
			
 
				+                title = obj['items'][0]['title'].replace('<em class=\"highlight\">', '').replace('</em>',
			
 
				+                                                                                                 '').replace("#",
			
 
				+                                                                                                             "")
			
 
				+                if sensitive_flag(title):
			
 
				+                    return obj
			
 
				+                else:
			
 
				+                    continue
			
 
				+        return None
			
 
				+    else:
			
 
				+        logging(
			
 
				+            code="7000",
			
 
				+            info="标题--{}--kimi 挖掘数据失败".format(title),
			
 
				+            trace_id=trace_id
			
 
				+        )
			
 
				+        return None
			
 
				+
			
 
				+
			
 
				+def search_videos(video_path, title, trace_id, gh_id):
			
 
				+    """
			
 
				+    search and send msg to ETL
			
 
				+    :param gh_id:
			
 
				+    :param video_path:
			
 
				+    :param title:
			
 
				+    :param trace_id:
			
 
				+    :return:
			
 
				+    """
			
 
				+    video_obj = return_video(video_path, title, trace_id)
			
 
				+    if video_obj:
			
 
				+        logging(
			
 
				+            code="7002",
			
 
				+            info="视频搜索成功",
			
 
				+            trace_id=trace_id,
			
 
				+            data=video_obj
			
 
				+        )
			
 
				+        title = video_obj['items'][0]['title'].replace('<em class=\"highlight\">', '').replace('</em>', '').replace("#",
			
 
				+                                                                                                                    "")
			
 
				+        process_weixin_video_obj(
			
 
				+            video_obj=video_obj['items'][0],
			
 
				+            user=gh_id_dict.get(gh_id),
			
 
				+            trace_id=trace_id,
			
 
				+            title=title
			
 
				+        )
			
 
				+    else:
			
 
				+        logging(
			
 
				+            code="7003",
			
 
				+            info="视频搜索失败",
			
 
				+            trace_id=trace_id
			
 
				+        )