%!s(int64=2) %!d(string=hai) anos · fd890704f8
--- a/README.md
+++ b/README.md
@@ -1,3 +1,3 @@
 
															 # title_with_video
														
 
															-通过长文本标题+kimi匹配票圈内相关视频，在公众号文章中插入小程序分享页面，目的是为了给小程序倒流
														
 
															+通过长文本标题+kimi匹配票圈内相关视频，在公众号文章中插入小程序分享页面，目的是为了给小程序导流
														
--- a/app.py
+++ b/app.py
@@ -0,0 +1,15 @@
 
															+"""
														
 
															+@author: luojunhui
														
 
															+"""
														
 
															+from quart import Quart
														
 
															+from applications.routes.title_routes import my_blueprint
														
 
															+
														
 
															+# 初始化 App
														
 
															+app = Quart(__name__)
														
 
															+
														
 
															+# 注册蓝图
														
 
															+app.register_blueprint(my_blueprint)
														
 
															+
														
 
															+
														
 
															+if __name__ == '__main__':
														
 
															+    app.run(debug=True)
														
--- a/applications/functions/ask_kimi.py
+++ b/applications/functions/ask_kimi.py
@@ -0,0 +1,55 @@
 
															+"""
														
 
															+@author: luojunhui
														
 
															+"""
														
 
															+"""
														
 
															+@author: luojunhui
														
 
															+"""
														
 
															+import json
														
 
															+from openai import OpenAI
														
 
															+
														
 
															+
														
 
															+def ask_kimi(question):
														
 
															+    """
														
 
															+    Ask Kimi for information
														
 
															+    :param question: tiny text
														
 
															+    :return: "{}"
														
 
															+    """
														
 
															+    single_title_prompt = """
														
 
															+        我会给你一个视频标题，需要你帮我用你所学的知识来帮我分析出以下信息，信息我都写到 json 里面了
														
 
															+        {
														
 
															+            "key_words": [],  # 返回三个关键词
														
 
															+            "search_keys": [], # 标题可能的搜索关键词，返回 3 个
														
 
															+            "extra_keys": [], # 关心这个视频的用户还会关心哪些关键词， 返回 3 个
														
 
															+            "tone": 标题的语气，用一个词概括,
														
 
															+            "target_audience": 标题的受众群体，用一个词概括,
														
 
															+            "target_age": 标题的受众年龄段，从 老年， 中年，青年，小孩， 不限， 这五个里面选择，
														
 
															+            "target_gender": 受众性别,
														
 
															+            "address": 受众可能属于哪个城市,
														
 
															+            "theme": 标题的主题， 用一个词概括
														
 
															+        }
														
 
															+        只需要返回一个 json，key 和上面的一样，
														
 
															+        我给你的标题是: 
														
 
															+        """
														
 
															+    client = OpenAI(
														
 
															+        api_key='sk-tz1VaKqksTzk0F8HxlU4YVGwj7oa1g0c0puGNUZrdn9MDtzm',
														
 
															+        base_url="https://api.moonshot.cn/v1"
														
 
															+    )
														
 
															+    chat_completion = client.chat.completions.create(
														
 
															+        messages=[
														
 
															+            {
														
 
															+                "role": "user",
														
 
															+                "content": single_title_prompt + question,
														
 
															+            }
														
 
															+        ],
														
 
															+        model="moonshot-v1-8k",
														
 
															+    )
														
 
															+    response = chat_completion.choices[0].message.content
														
 
															+    try:
														
 
															+        response = json.loads(response)
														
 
															+        return response
														
 
															+    except:
														
 
															+        return {}
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
--- a/applications/functions/calculate.py
+++ b/applications/functions/calculate.py
@@ -0,0 +1,127 @@
 
															+"""
														
 
															+@author: luojunhui
														
 
															+"""
														
 
															+import json
														
 
															+import os
														
 
															+
														
 
															+
														
 
															+def read_single_file(filename):
														
 
															+    """
														
 
															+    :param filename:
														
 
															+    """
														
 
															+    with open(filename, encoding="utf-8") as f:
														
 
															+        data = json.loads(f.read())
														
 
															+    if data:
														
 
															+        return data
														
 
															+    else:
														
 
															+        return {}
														
 
															+
														
 
															+
														
 
															+def compute_similarity(file_1, file_2):
														
 
															+    """
														
 
															+    计算
														
 
															+    :param file_1:
														
 
															+    :param file_2:
														
 
															+    :return:
														
 
															+    """
														
 
															+    data_1 = read_single_file(file_1)
														
 
															+    data_2 = read_single_file(file_2)
														
 
															+
														
 
															+    def calculate_v1(d1, d2):
														
 
															+        """
														
 
															+        通过交并集来判断
														
 
															+        :param d1:
														
 
															+        :param d2:
														
 
															+        :return:
														
 
															+        """
														
 
															+        f1_keys = set(d1["key_words"])
														
 
															+        f2_keys = set(d2["key_words"])
														
 
															+        keys_union = f1_keys | f2_keys
														
 
															+        keys_intersection = f1_keys & f2_keys
														
 
															+        f1_search_keys = set(d1["search_keys"])
														
 
															+        f2_search_keys = set(d2["search_keys"])
														
 
															+        search_keys_union = f1_search_keys | f2_search_keys
														
 
															+        search_keys_intersection = f1_search_keys & f2_search_keys
														
 
															+        f1_extra_keys = set(d1["extra_keys"])
														
 
															+        f2_extra_keys = set(d2["extra_keys"])
														
 
															+        extra_keys_union = f1_extra_keys | f2_extra_keys
														
 
															+        extra_keys_intersection = f1_extra_keys & f2_extra_keys
														
 
															+        score_1 = len(keys_intersection) / len(keys_union)
														
 
															+        score_2 = len(search_keys_intersection) / len(search_keys_union)
														
 
															+        score_3 = len(extra_keys_intersection) / len(extra_keys_union)
														
 
															+        return score_1 * 0.4 + score_2 * 0.4 + score_3 * 0.2
														
 
															+
														
 
															+    def calculate_v2(d1, d2):
														
 
															+        """
														
 
															+        计算方法 v2
														
 
															+        :param d1:
														
 
															+        :param d2:
														
 
															+        :return:
														
 
															+        """
														
 
															+        score = 0
														
 
															+        tone_1 = d1["tone"]
														
 
															+        tone_2 = d2["tone"]
														
 
															+        if tone_1 == tone_2:
														
 
															+            score += 0.1
														
 
															+        target_audience_1 = d1["target_audience"]
														
 
															+        target_audience_2 = d2["target_audience"]
														
 
															+        if target_audience_1 == target_audience_2:
														
 
															+            score += 0.2
														
 
															+        target_age_1 = d1["target_age"]
														
 
															+        target_age_2 = d2["target_age"]
														
 
															+        if target_age_1 == target_age_2:
														
 
															+            score += 0.2
														
 
															+        address_1 = d1["address"]
														
 
															+        address_2 = d2["address"]
														
 
															+        if address_1 == address_2:
														
 
															+            score += 0.2
														
 
															+        gender_1 = d1["theme"]
														
 
															+        gender_2 = d2["theme"]
														
 
															+        if gender_1 == gender_2:
														
 
															+            score += 0.5
														
 
															+        return score
														
 
															+
														
 
															+    if data_1 and data_2:
														
 
															+        try:
														
 
															+            score_1 = calculate_v1(data_1, data_2)
														
 
															+            score_2 = calculate_v2(data_1, data_2)
														
 
															+            return score_1, score_2
														
 
															+        except Exception as e:
														
 
															+            return 0, 0
														
 
															+    else:
														
 
															+        return 0, 0
														
 
															+
														
 
															+
														
 
															+def title_mix(title_p, dt):
														
 
															+    """
														
 
															+    执行代码
														
 
															+    :param title_p:
														
 
															+    :param dt: dt
														
 
															+    """
														
 
															+    json_path = os.path.join(os.getcwd(), 'applications', 'static', dt)
														
 
															+    # 处理标题信息
														
 
															+    files = os.listdir(json_path)
														
 
															+    pq_files = [os.path.join(json_path, file) for file in files]
														
 
															+    score_list_1 = []
														
 
															+    score_list_2 = []
														
 
															+    for file in pq_files:
														
 
															+        file_name = file.split('/')[-1].replace(".json", "")
														
 
															+        v_id = file_name.split('_')[1]
														
 
															+        uid = file_name.split('_')[0]
														
 
															+        score1, score2 = compute_similarity(title_p, file)
														
 
															+        score_list_1.append([score1, v_id, uid])
														
 
															+        score_list_2.append([score2, v_id, uid])
														
 
															+
														
 
															+    s1_list = sorted(score_list_1, key=lambda x: x[0], reverse=True)
														
 
															+    s2_list = sorted(score_list_2, key=lambda x: x[0], reverse=True)
														
 
															+    title = title_p.split("/")[-1].replace(".json", "")
														
 
															+    obj = {
														
 
															+        "title": title,
														
 
															+        "s1_vid": s1_list[0][1],
														
 
															+        "s1_score": s1_list[0][0],
														
 
															+        "s1_uid": s1_list[0][2],
														
 
															+        "s2_vid": s2_list[0][1],
														
 
															+        "s2_score": s2_list[0][0],
														
 
															+        "s2_uid": s2_list[0][2]
														
 
															+    }
														
 
															+    return obj
														
--- a/applications/functions/odps.py
+++ b/applications/functions/odps.py
@@ -0,0 +1,35 @@
 
															+"""
														
 
															+@author: luojunhui
														
 
															+"""
														
 
															+
														
 
															+from odps import ODPS
														
 
															+
														
 
															+
														
 
															+class PyODPS(object):
														
 
															+    """
														
 
															+    PyODPS class, get data from odps server
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self):
														
 
															+        self.endpoint = "http://service.cn.maxcompute.aliyun.com/api"
														
 
															+        self.access_id = "LTAIWYUujJAm7CbH"
														
 
															+        self.access_key = "RfSjdiWwED1sGFlsjXv0DlfTnZTG1P"
														
 
															+        self.project = "loghubods"
														
 
															+
														
 
															+        self.od = ODPS(
														
 
															+            access_id=self.access_id,
														
 
															+            secret_access_key=self.access_key,
														
 
															+            endpoint=self.endpoint,
														
 
															+            project=self.project,
														
 
															+        )
														
 
															+
														
 
															+    def select(self, sql):
														
 
															+        """
														
 
															+        :param sql: 查询语句
														
 
															+        :return: odps_obj{}
														
 
															+        """
														
 
															+        result = []
														
 
															+        with self.od.execute_sql(sql).open_reader() as reader:
														
 
															+            for record in reader:
														
 
															+                result.append(record)
														
 
															+        return result
														
--- a/applications/process.py
+++ b/applications/process.py
@@ -0,0 +1,108 @@
 
															+"""
														
 
															+@author: luojunhui
														
 
															+对请求进行操作
														
 
															+"""
														
 
															+
														
 
															+import os
														
 
															+import json
														
 
															+import uuid
														
 
															+import requests
														
 
															+import urllib.parse
														
 
															+
														
 
															+from applications.functions.ask_kimi import ask_kimi
														
 
															+from applications.functions.calculate import title_mix
														
 
															+
														
 
															+
														
 
															+class ProcessParams(object):
														
 
															+    """
														
 
															+    Params Analysis
														
 
															+    """
														
 
															+
														
 
															+    @classmethod
														
 
															+    def get_params(cls, data):
														
 
															+        """
														
 
															+        "accountName": "公众号名称",
														
 
															+        "content": "文章正文",
														
 
															+        "title": "文章标题",
														
 
															+        "cover": "封面链接"
														
 
															+        :param data:
														
 
															+        :return: title
														
 
															+        """
														
 
															+        return data['title']
														
 
															+
														
 
															+    @classmethod
														
 
															+    def ask_kimi_and_save_to_local(cls, title):
														
 
															+        """
														
 
															+        save file to local
														
 
															+        :param title:
														
 
															+        :return:
														
 
															+        """
														
 
															+        save_path = os.path.join(os.getcwd(), 'applications', 'static', "titles", "{}.json".format(title))
														
 
															+        if os.path.exists(save_path):
														
 
															+            return
														
 
															+        else:
														
 
															+            os.makedirs(os.path.dirname(save_path), exist_ok=True)
														
 
															+            if not title:
														
 
															+                result = "{}"
														
 
															+            else:
														
 
															+                result = ask_kimi(title)
														
 
															+            with open(save_path, "w", encoding="utf-8") as f:
														
 
															+                f.write(json.dumps(result, ensure_ascii=False))
														
 
															+
														
 
															+    @classmethod
														
 
															+    def create_gzh_path(cls, video_id, shared_uid):
														
 
															+        """
														
 
															+        :param video_id: 视频 id
														
 
															+        :param shared_uid: 分享 id
														
 
															+        """
														
 
															+        root_share_id = str(uuid.uuid4())
														
 
															+        url = f"pages/user-videos?id={video_id}&su={shared_uid}&fromGzh=1&rootShareId={root_share_id}&shareId={root_share_id}"
														
 
															+        return root_share_id, f"pages/category?jumpPage={urllib.parse.quote(url)}"
														
 
															+
														
 
															+    @classmethod
														
 
															+    def request_for_info(cls, video_id):
														
 
															+        """
														
 
															+        请求数据
														
 
															+        :param video_id:
														
 
															+        :return:
														
 
															+        """
														
 
															+        url = "https://longvideoapi.piaoquantv.com/longvideoapi/openapi/video/batchSelectVideoInfo"
														
 
															+        data = {
														
 
															+            "videoIdList": [video_id]
														
 
															+        }
														
 
															+        header = {
														
 
															+            "Content-Type": "application/json",
														
 
															+        }
														
 
															+        response = requests.post(url, headers=header, data=json.dumps(data))
														
 
															+        return response.json()
														
 
															+
														
 
															+    @classmethod
														
 
															+    def process(cls, data):
														
 
															+        """执行代码"""
														
 
															+        title = cls.get_params(data)
														
 
															+        title_p = os.path.join(os.getcwd(), 'applications', 'static', "titles", "{}.json".format(title))
														
 
															+        if os.path.exists(title_p):
														
 
															+            result = title_mix(title_p=title_p, dt="20240417")
														
 
															+        else:
														
 
															+            cls.ask_kimi_and_save_to_local(title)
														
 
															+            result = title_mix(title_p=title_p, dt="20240417")
														
 
															+        uid, video_id = result['s1_uid'], result['s1_vid']
														
 
															+        root_share_id, productionPath = cls.create_gzh_path(uid, video_id)
														
 
															+        response = cls.request_for_info(video_id)
														
 
															+        productionCover = response["data"][0]['coverImg']['coverImgPath']
														
 
															+        productionName = response["data"][0]['title']
														
 
															+        programAvatar = "Avatar"
														
 
															+        programId = "wx89e7eb06478361d7"
														
 
															+        programName = "票圈vlog"
														
 
															+        source = "Web"
														
 
															+        result = {
														
 
															+            "productionCover": productionCover,
														
 
															+            "productionName": productionName,
														
 
															+            "programAvatar": programAvatar,
														
 
															+            "programId": programId,
														
 
															+            "programName": programName,
														
 
															+            "source": source,
														
 
															+            "rootShareId": root_share_id,
														
 
															+            "productionPath": productionPath
														
 
															+        }
														
 
															+        return result
														
--- a/applications/routes/title_routes.py
+++ b/applications/routes/title_routes.py
@@ -0,0 +1,30 @@
 
															+"""
														
 
															+@author: luojunhui
														
 
															+"""
														
 
															+from quart import Blueprint, jsonify, request
														
 
															+
														
 
															+from applications.process import ProcessParams
														
 
															+
														
 
															+
														
 
															+my_blueprint = Blueprint('kimi', __name__)
														
 
															+
														
 
															+
														
 
															+@my_blueprint.route('/healthcheck')
														
 
															+async def hello():
														
 
															+    """
														
 
															+    Hello World Test
														
 
															+    :return:
														
 
															+    """
														
 
															+    return jsonify({'message': 'Hello, World!'})
														
 
															+
														
 
															+
														
 
															+@my_blueprint.route('/title_to_video', methods=['POST'])
														
 
															+async def post_data():
														
 
															+    """
														
 
															+    请求接口代码
														
 
															+    :return:
														
 
															+    """
														
 
															+    p = ProcessParams()
														
 
															+    data = await request.get_json()
														
 
															+    processed_data = p.process(data)
														
 
															+    return jsonify(processed_data)
														
--- a/applications/static/logo.png
+++ b/applications/static/logo.png
--- a/hypercorn_config.toml
+++ b/hypercorn_config.toml
@@ -0,0 +1,3 @@
 
															+reload = true
														
 
															+bind = "0.0.0.0:8000"
														
 
															+workers = 2
														
--- a/read_data_from_odps_daily.py
+++ b/read_data_from_odps_daily.py
@@ -0,0 +1,81 @@
 
															+"""
														
 
															+@author: luojunhui
														
 
															+Read data from ODPS daily and save file to static folder
														
 
															+"""
														
 
															+import json
														
 
															+import os
														
 
															+import time
														
 
															+import schedule
														
 
															+from datetime import datetime, timedelta
														
 
															+from concurrent.futures.thread import ThreadPoolExecutor
														
 
															+
														
 
															+from applications.functions.odps import PyODPS
														
 
															+from applications.functions.ask_kimi import ask_kimi
														
 
															+
														
 
															+
														
 
															+def read_data_from_odps_daily(dt):
														
 
															+    """
														
 
															+    Read data from ODPS daily and save file to static folder
														
 
															+    :return:
														
 
															+    """
														
 
															+    sql = f"""select * from loghubods.lastday_return where dt = '{dt}' limit 20;"""
														
 
															+    data_list = PyODPS().select(sql)
														
 
															+    obj_list = [
														
 
															+        {
														
 
															+            "video_id": obj['videoid'],
														
 
															+            "title": obj['title'],
														
 
															+            "uid": obj['uid'],
														
 
															+            "dt": dt
														
 
															+        } for obj in data_list
														
 
															+    ]
														
 
															+    return obj_list
														
 
															+
														
 
															+
														
 
															+def save_file_to_local(obj):
														
 
															+    """
														
 
															+    use kimi to mine data info and save to local file
														
 
															+    :param obj:
														
 
															+    :return:
														
 
															+    """
														
 
															+    video_id = obj['video_id']
														
 
															+    title = obj['title']
														
 
															+    uid = obj['uid']
														
 
															+    dt = obj["dt"]
														
 
															+    save_path = os.path.join(os.getcwd(), 'applications', 'static', dt, "{}_{}.json".format(uid, video_id))
														
 
															+    print(save_path)
														
 
															+    if os.path.exists(save_path):
														
 
															+        return
														
 
															+    else:
														
 
															+        os.makedirs(os.path.dirname(save_path), exist_ok=True)
														
 
															+        if not title:
														
 
															+            result = {}
														
 
															+        else:
														
 
															+            result = ask_kimi(title)
														
 
															+        print(result)
														
 
															+        with open(save_path, "w", encoding="utf-8") as f:
														
 
															+            f.write(json.dumps(result, ensure_ascii=False))
														
 
															+
														
 
															+
														
 
															+def run():
														
 
															+    """
														
 
															+    Read data from ODPS daily and save file to static folder with thread pool
														
 
															+    :return:
														
 
															+    """
														
 
															+    today = datetime.today()
														
 
															+    yesterday = today - timedelta(days=1)
														
 
															+    yesterday_str = yesterday.strftime("%Y%m%d")
														
 
															+    data_list = read_data_from_odps_daily(yesterday_str)
														
 
															+    # print(data_list)
														
 
															+    for obj in data_list:
														
 
															+        save_file_to_local(obj)
														
 
															+    # with ThreadPoolExecutor(max_workers=10) as Pool:
														
 
															+    #     Pool.map(save_file_to_local, data_list)
														
 
															+
														
 
															+
														
 
															+if __name__ == '__main__':
														
 
															+    run()
														
 
															+    # # 设置任务每天的 9:00 执行
														
 
															+    # schedule.every().day.at("09:00").do(run)
														
 
															+    # while True:
														
 
															+    #     schedule.run_pending()
														
 
															+    #     time.sleep(1)
														
--- a/test.py
+++ b/test.py
@@ -0,0 +1,36 @@
 
															+"""
														
 
															+@author: luojunhui
														
 
															+"""
														
 
															+import time
														
 
															+import requests
														
 
															+import argparse
														
 
															+from concurrent.futures import ThreadPoolExecutor
														
 
															+
														
 
															+
														
 
															+def request_data(url):
														
 
															+    # index = _url.split("#")[0]
														
 
															+    # url = _url.split("#")[1]
														
 
															+    body = {
														
 
															+        "title": "发布不幸消息"
														
 
															+    }
														
 
															+    t = time.time()
														
 
															+    res = requests.post(url, json=body)
														
 
															+    e = time.time()
														
 
															+    # print(index)
														
 
															+    print(e - t)
														
 
															+    print(res.text)
														
 
															+    # print(res.json())
														
 
															+
														
 
															+
														
 
															+if __name__ == "__main__":
														
 
															+    # parser = argparse.ArgumentParser()  # 新建参数解释器对象
														
 
															+    # parser.add_argument("--thread")
														
 
															+    # args = parser.parse_args()
														
 
															+    # thread = int(args.thread)
														
 
															+    dt = ["http://127.0.0.1:8000/title_to_video"]
														
 
															+    # total_s = time.time()
														
 
															+    request_data(dt[0])
														
 
															+    # with ThreadPoolExecutor(max_workers=thread) as pool:
														
 
															+    #     pool.map(request_data, dt)
														
 
															+    # total_e = time.time()
														
 
															+    # print(total_e - total_s)