пре 1 година · fd890704f8
--- a/README.md
+++ b/README.md
@@ -1,3 +1,3 @@
 
				 # title_with_video
			
 
				 
			
 
				-通过长文本标题+kimi匹配票圈内相关视频，在公众号文章中插入小程序分享页面，目的是为了给小程序倒流
			
 
				+通过长文本标题+kimi匹配票圈内相关视频，在公众号文章中插入小程序分享页面，目的是为了给小程序导流
			
--- a/app.py
+++ b/app.py
@@ -0,0 +1,15 @@
 
				+"""
			
 
				+@author: luojunhui
			
 
				+"""
			
 
				+from quart import Quart
			
 
				+from applications.routes.title_routes import my_blueprint
			
 
				+
			
 
				+# 初始化 App
			
 
				+app = Quart(__name__)
			
 
				+
			
 
				+# 注册蓝图
			
 
				+app.register_blueprint(my_blueprint)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    app.run(debug=True)
			
--- a/applications/functions/ask_kimi.py
+++ b/applications/functions/ask_kimi.py
@@ -0,0 +1,55 @@
 
				+"""
			
 
				+@author: luojunhui
			
 
				+"""
			
 
				+"""
			
 
				+@author: luojunhui
			
 
				+"""
			
 
				+import json
			
 
				+from openai import OpenAI
			
 
				+
			
 
				+
			
 
				+def ask_kimi(question):
			
 
				+    """
			
 
				+    Ask Kimi for information
			
 
				+    :param question: tiny text
			
 
				+    :return: "{}"
			
 
				+    """
			
 
				+    single_title_prompt = """
			
 
				+        我会给你一个视频标题，需要你帮我用你所学的知识来帮我分析出以下信息，信息我都写到 json 里面了
			
 
				+        {
			
 
				+            "key_words": [],  # 返回三个关键词
			
 
				+            "search_keys": [], # 标题可能的搜索关键词，返回 3 个
			
 
				+            "extra_keys": [], # 关心这个视频的用户还会关心哪些关键词， 返回 3 个
			
 
				+            "tone": 标题的语气，用一个词概括,
			
 
				+            "target_audience": 标题的受众群体，用一个词概括,
			
 
				+            "target_age": 标题的受众年龄段，从 老年， 中年，青年，小孩， 不限， 这五个里面选择，
			
 
				+            "target_gender": 受众性别,
			
 
				+            "address": 受众可能属于哪个城市,
			
 
				+            "theme": 标题的主题， 用一个词概括
			
 
				+        }
			
 
				+        只需要返回一个 json，key 和上面的一样，
			
 
				+        我给你的标题是: 
			
 
				+        """
			
 
				+    client = OpenAI(
			
 
				+        api_key='sk-tz1VaKqksTzk0F8HxlU4YVGwj7oa1g0c0puGNUZrdn9MDtzm',
			
 
				+        base_url="https://api.moonshot.cn/v1"
			
 
				+    )
			
 
				+    chat_completion = client.chat.completions.create(
			
 
				+        messages=[
			
 
				+            {
			
 
				+                "role": "user",
			
 
				+                "content": single_title_prompt + question,
			
 
				+            }
			
 
				+        ],
			
 
				+        model="moonshot-v1-8k",
			
 
				+    )
			
 
				+    response = chat_completion.choices[0].message.content
			
 
				+    try:
			
 
				+        response = json.loads(response)
			
 
				+        return response
			
 
				+    except:
			
 
				+        return {}
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
--- a/applications/functions/calculate.py
+++ b/applications/functions/calculate.py
@@ -0,0 +1,127 @@
 
				+"""
			
 
				+@author: luojunhui
			
 
				+"""
			
 
				+import json
			
 
				+import os
			
 
				+
			
 
				+
			
 
				+def read_single_file(filename):
			
 
				+    """
			
 
				+    :param filename:
			
 
				+    """
			
 
				+    with open(filename, encoding="utf-8") as f:
			
 
				+        data = json.loads(f.read())
			
 
				+    if data:
			
 
				+        return data
			
 
				+    else:
			
 
				+        return {}
			
 
				+
			
 
				+
			
 
				+def compute_similarity(file_1, file_2):
			
 
				+    """
			
 
				+    计算
			
 
				+    :param file_1:
			
 
				+    :param file_2:
			
 
				+    :return:
			
 
				+    """
			
 
				+    data_1 = read_single_file(file_1)
			
 
				+    data_2 = read_single_file(file_2)
			
 
				+
			
 
				+    def calculate_v1(d1, d2):
			
 
				+        """
			
 
				+        通过交并集来判断
			
 
				+        :param d1:
			
 
				+        :param d2:
			
 
				+        :return:
			
 
				+        """
			
 
				+        f1_keys = set(d1["key_words"])
			
 
				+        f2_keys = set(d2["key_words"])
			
 
				+        keys_union = f1_keys | f2_keys
			
 
				+        keys_intersection = f1_keys & f2_keys
			
 
				+        f1_search_keys = set(d1["search_keys"])
			
 
				+        f2_search_keys = set(d2["search_keys"])
			
 
				+        search_keys_union = f1_search_keys | f2_search_keys
			
 
				+        search_keys_intersection = f1_search_keys & f2_search_keys
			
 
				+        f1_extra_keys = set(d1["extra_keys"])
			
 
				+        f2_extra_keys = set(d2["extra_keys"])
			
 
				+        extra_keys_union = f1_extra_keys | f2_extra_keys
			
 
				+        extra_keys_intersection = f1_extra_keys & f2_extra_keys
			
 
				+        score_1 = len(keys_intersection) / len(keys_union)
			
 
				+        score_2 = len(search_keys_intersection) / len(search_keys_union)
			
 
				+        score_3 = len(extra_keys_intersection) / len(extra_keys_union)
			
 
				+        return score_1 * 0.4 + score_2 * 0.4 + score_3 * 0.2
			
 
				+
			
 
				+    def calculate_v2(d1, d2):
			
 
				+        """
			
 
				+        计算方法 v2
			
 
				+        :param d1:
			
 
				+        :param d2:
			
 
				+        :return:
			
 
				+        """
			
 
				+        score = 0
			
 
				+        tone_1 = d1["tone"]
			
 
				+        tone_2 = d2["tone"]
			
 
				+        if tone_1 == tone_2:
			
 
				+            score += 0.1
			
 
				+        target_audience_1 = d1["target_audience"]
			
 
				+        target_audience_2 = d2["target_audience"]
			
 
				+        if target_audience_1 == target_audience_2:
			
 
				+            score += 0.2
			
 
				+        target_age_1 = d1["target_age"]
			
 
				+        target_age_2 = d2["target_age"]
			
 
				+        if target_age_1 == target_age_2:
			
 
				+            score += 0.2
			
 
				+        address_1 = d1["address"]
			
 
				+        address_2 = d2["address"]
			
 
				+        if address_1 == address_2:
			
 
				+            score += 0.2
			
 
				+        gender_1 = d1["theme"]
			
 
				+        gender_2 = d2["theme"]
			
 
				+        if gender_1 == gender_2:
			
 
				+            score += 0.5
			
 
				+        return score
			
 
				+
			
 
				+    if data_1 and data_2:
			
 
				+        try:
			
 
				+            score_1 = calculate_v1(data_1, data_2)
			
 
				+            score_2 = calculate_v2(data_1, data_2)
			
 
				+            return score_1, score_2
			
 
				+        except Exception as e:
			
 
				+            return 0, 0
			
 
				+    else:
			
 
				+        return 0, 0
			
 
				+
			
 
				+
			
 
				+def title_mix(title_p, dt):
			
 
				+    """
			
 
				+    执行代码
			
 
				+    :param title_p:
			
 
				+    :param dt: dt
			
 
				+    """
			
 
				+    json_path = os.path.join(os.getcwd(), 'applications', 'static', dt)
			
 
				+    # 处理标题信息
			
 
				+    files = os.listdir(json_path)
			
 
				+    pq_files = [os.path.join(json_path, file) for file in files]
			
 
				+    score_list_1 = []
			
 
				+    score_list_2 = []
			
 
				+    for file in pq_files:
			
 
				+        file_name = file.split('/')[-1].replace(".json", "")
			
 
				+        v_id = file_name.split('_')[1]
			
 
				+        uid = file_name.split('_')[0]
			
 
				+        score1, score2 = compute_similarity(title_p, file)
			
 
				+        score_list_1.append([score1, v_id, uid])
			
 
				+        score_list_2.append([score2, v_id, uid])
			
 
				+
			
 
				+    s1_list = sorted(score_list_1, key=lambda x: x[0], reverse=True)
			
 
				+    s2_list = sorted(score_list_2, key=lambda x: x[0], reverse=True)
			
 
				+    title = title_p.split("/")[-1].replace(".json", "")
			
 
				+    obj = {
			
 
				+        "title": title,
			
 
				+        "s1_vid": s1_list[0][1],
			
 
				+        "s1_score": s1_list[0][0],
			
 
				+        "s1_uid": s1_list[0][2],
			
 
				+        "s2_vid": s2_list[0][1],
			
 
				+        "s2_score": s2_list[0][0],
			
 
				+        "s2_uid": s2_list[0][2]
			
 
				+    }
			
 
				+    return obj
			
--- a/applications/functions/odps.py
+++ b/applications/functions/odps.py
@@ -0,0 +1,35 @@
 
				+"""
			
 
				+@author: luojunhui
			
 
				+"""
			
 
				+
			
 
				+from odps import ODPS
			
 
				+
			
 
				+
			
 
				+class PyODPS(object):
			
 
				+    """
			
 
				+    PyODPS class, get data from odps server
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        self.endpoint = "http://service.cn.maxcompute.aliyun.com/api"
			
 
				+        self.access_id = "LTAIWYUujJAm7CbH"
			
 
				+        self.access_key = "RfSjdiWwED1sGFlsjXv0DlfTnZTG1P"
			
 
				+        self.project = "loghubods"
			
 
				+
			
 
				+        self.od = ODPS(
			
 
				+            access_id=self.access_id,
			
 
				+            secret_access_key=self.access_key,
			
 
				+            endpoint=self.endpoint,
			
 
				+            project=self.project,
			
 
				+        )
			
 
				+
			
 
				+    def select(self, sql):
			
 
				+        """
			
 
				+        :param sql: 查询语句
			
 
				+        :return: odps_obj{}
			
 
				+        """
			
 
				+        result = []
			
 
				+        with self.od.execute_sql(sql).open_reader() as reader:
			
 
				+            for record in reader:
			
 
				+                result.append(record)
			
 
				+        return result
			
--- a/applications/process.py
+++ b/applications/process.py
@@ -0,0 +1,108 @@
 
				+"""
			
 
				+@author: luojunhui
			
 
				+对请求进行操作
			
 
				+"""
			
 
				+
			
 
				+import os
			
 
				+import json
			
 
				+import uuid
			
 
				+import requests
			
 
				+import urllib.parse
			
 
				+
			
 
				+from applications.functions.ask_kimi import ask_kimi
			
 
				+from applications.functions.calculate import title_mix
			
 
				+
			
 
				+
			
 
				+class ProcessParams(object):
			
 
				+    """
			
 
				+    Params Analysis
			
 
				+    """
			
 
				+
			
 
				+    @classmethod
			
 
				+    def get_params(cls, data):
			
 
				+        """
			
 
				+        "accountName": "公众号名称",
			
 
				+        "content": "文章正文",
			
 
				+        "title": "文章标题",
			
 
				+        "cover": "封面链接"
			
 
				+        :param data:
			
 
				+        :return: title
			
 
				+        """
			
 
				+        return data['title']
			
 
				+
			
 
				+    @classmethod
			
 
				+    def ask_kimi_and_save_to_local(cls, title):
			
 
				+        """
			
 
				+        save file to local
			
 
				+        :param title:
			
 
				+        :return:
			
 
				+        """
			
 
				+        save_path = os.path.join(os.getcwd(), 'applications', 'static', "titles", "{}.json".format(title))
			
 
				+        if os.path.exists(save_path):
			
 
				+            return
			
 
				+        else:
			
 
				+            os.makedirs(os.path.dirname(save_path), exist_ok=True)
			
 
				+            if not title:
			
 
				+                result = "{}"
			
 
				+            else:
			
 
				+                result = ask_kimi(title)
			
 
				+            with open(save_path, "w", encoding="utf-8") as f:
			
 
				+                f.write(json.dumps(result, ensure_ascii=False))
			
 
				+
			
 
				+    @classmethod
			
 
				+    def create_gzh_path(cls, video_id, shared_uid):
			
 
				+        """
			
 
				+        :param video_id: 视频 id
			
 
				+        :param shared_uid: 分享 id
			
 
				+        """
			
 
				+        root_share_id = str(uuid.uuid4())
			
 
				+        url = f"pages/user-videos?id={video_id}&su={shared_uid}&fromGzh=1&rootShareId={root_share_id}&shareId={root_share_id}"
			
 
				+        return root_share_id, f"pages/category?jumpPage={urllib.parse.quote(url)}"
			
 
				+
			
 
				+    @classmethod
			
 
				+    def request_for_info(cls, video_id):
			
 
				+        """
			
 
				+        请求数据
			
 
				+        :param video_id:
			
 
				+        :return:
			
 
				+        """
			
 
				+        url = "https://longvideoapi.piaoquantv.com/longvideoapi/openapi/video/batchSelectVideoInfo"
			
 
				+        data = {
			
 
				+            "videoIdList": [video_id]
			
 
				+        }
			
 
				+        header = {
			
 
				+            "Content-Type": "application/json",
			
 
				+        }
			
 
				+        response = requests.post(url, headers=header, data=json.dumps(data))
			
 
				+        return response.json()
			
 
				+
			
 
				+    @classmethod
			
 
				+    def process(cls, data):
			
 
				+        """执行代码"""
			
 
				+        title = cls.get_params(data)
			
 
				+        title_p = os.path.join(os.getcwd(), 'applications', 'static', "titles", "{}.json".format(title))
			
 
				+        if os.path.exists(title_p):
			
 
				+            result = title_mix(title_p=title_p, dt="20240417")
			
 
				+        else:
			
 
				+            cls.ask_kimi_and_save_to_local(title)
			
 
				+            result = title_mix(title_p=title_p, dt="20240417")
			
 
				+        uid, video_id = result['s1_uid'], result['s1_vid']
			
 
				+        root_share_id, productionPath = cls.create_gzh_path(uid, video_id)
			
 
				+        response = cls.request_for_info(video_id)
			
 
				+        productionCover = response["data"][0]['coverImg']['coverImgPath']
			
 
				+        productionName = response["data"][0]['title']
			
 
				+        programAvatar = "Avatar"
			
 
				+        programId = "wx89e7eb06478361d7"
			
 
				+        programName = "票圈vlog"
			
 
				+        source = "Web"
			
 
				+        result = {
			
 
				+            "productionCover": productionCover,
			
 
				+            "productionName": productionName,
			
 
				+            "programAvatar": programAvatar,
			
 
				+            "programId": programId,
			
 
				+            "programName": programName,
			
 
				+            "source": source,
			
 
				+            "rootShareId": root_share_id,
			
 
				+            "productionPath": productionPath
			
 
				+        }
			
 
				+        return result
			
--- a/applications/routes/title_routes.py
+++ b/applications/routes/title_routes.py
@@ -0,0 +1,30 @@
 
				+"""
			
 
				+@author: luojunhui
			
 
				+"""
			
 
				+from quart import Blueprint, jsonify, request
			
 
				+
			
 
				+from applications.process import ProcessParams
			
 
				+
			
 
				+
			
 
				+my_blueprint = Blueprint('kimi', __name__)
			
 
				+
			
 
				+
			
 
				+@my_blueprint.route('/healthcheck')
			
 
				+async def hello():
			
 
				+    """
			
 
				+    Hello World Test
			
 
				+    :return:
			
 
				+    """
			
 
				+    return jsonify({'message': 'Hello, World!'})
			
 
				+
			
 
				+
			
 
				+@my_blueprint.route('/title_to_video', methods=['POST'])
			
 
				+async def post_data():
			
 
				+    """
			
 
				+    请求接口代码
			
 
				+    :return:
			
 
				+    """
			
 
				+    p = ProcessParams()
			
 
				+    data = await request.get_json()
			
 
				+    processed_data = p.process(data)
			
 
				+    return jsonify(processed_data)
			
--- a/applications/static/logo.png
+++ b/applications/static/logo.png
--- a/hypercorn_config.toml
+++ b/hypercorn_config.toml
@@ -0,0 +1,3 @@
 
				+reload = true
			
 
				+bind = "0.0.0.0:8000"
			
 
				+workers = 2
			
--- a/read_data_from_odps_daily.py
+++ b/read_data_from_odps_daily.py
@@ -0,0 +1,81 @@
 
				+"""
			
 
				+@author: luojunhui
			
 
				+Read data from ODPS daily and save file to static folder
			
 
				+"""
			
 
				+import json
			
 
				+import os
			
 
				+import time
			
 
				+import schedule
			
 
				+from datetime import datetime, timedelta
			
 
				+from concurrent.futures.thread import ThreadPoolExecutor
			
 
				+
			
 
				+from applications.functions.odps import PyODPS
			
 
				+from applications.functions.ask_kimi import ask_kimi
			
 
				+
			
 
				+
			
 
				+def read_data_from_odps_daily(dt):
			
 
				+    """
			
 
				+    Read data from ODPS daily and save file to static folder
			
 
				+    :return:
			
 
				+    """
			
 
				+    sql = f"""select * from loghubods.lastday_return where dt = '{dt}' limit 20;"""
			
 
				+    data_list = PyODPS().select(sql)
			
 
				+    obj_list = [
			
 
				+        {
			
 
				+            "video_id": obj['videoid'],
			
 
				+            "title": obj['title'],
			
 
				+            "uid": obj['uid'],
			
 
				+            "dt": dt
			
 
				+        } for obj in data_list
			
 
				+    ]
			
 
				+    return obj_list
			
 
				+
			
 
				+
			
 
				+def save_file_to_local(obj):
			
 
				+    """
			
 
				+    use kimi to mine data info and save to local file
			
 
				+    :param obj:
			
 
				+    :return:
			
 
				+    """
			
 
				+    video_id = obj['video_id']
			
 
				+    title = obj['title']
			
 
				+    uid = obj['uid']
			
 
				+    dt = obj["dt"]
			
 
				+    save_path = os.path.join(os.getcwd(), 'applications', 'static', dt, "{}_{}.json".format(uid, video_id))
			
 
				+    print(save_path)
			
 
				+    if os.path.exists(save_path):
			
 
				+        return
			
 
				+    else:
			
 
				+        os.makedirs(os.path.dirname(save_path), exist_ok=True)
			
 
				+        if not title:
			
 
				+            result = {}
			
 
				+        else:
			
 
				+            result = ask_kimi(title)
			
 
				+        print(result)
			
 
				+        with open(save_path, "w", encoding="utf-8") as f:
			
 
				+            f.write(json.dumps(result, ensure_ascii=False))
			
 
				+
			
 
				+
			
 
				+def run():
			
 
				+    """
			
 
				+    Read data from ODPS daily and save file to static folder with thread pool
			
 
				+    :return:
			
 
				+    """
			
 
				+    today = datetime.today()
			
 
				+    yesterday = today - timedelta(days=1)
			
 
				+    yesterday_str = yesterday.strftime("%Y%m%d")
			
 
				+    data_list = read_data_from_odps_daily(yesterday_str)
			
 
				+    # print(data_list)
			
 
				+    for obj in data_list:
			
 
				+        save_file_to_local(obj)
			
 
				+    # with ThreadPoolExecutor(max_workers=10) as Pool:
			
 
				+    #     Pool.map(save_file_to_local, data_list)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    run()
			
 
				+    # # 设置任务每天的 9:00 执行
			
 
				+    # schedule.every().day.at("09:00").do(run)
			
 
				+    # while True:
			
 
				+    #     schedule.run_pending()
			
 
				+    #     time.sleep(1)
			
--- a/test.py
+++ b/test.py
@@ -0,0 +1,36 @@
 
				+"""
			
 
				+@author: luojunhui
			
 
				+"""
			
 
				+import time
			
 
				+import requests
			
 
				+import argparse
			
 
				+from concurrent.futures import ThreadPoolExecutor
			
 
				+
			
 
				+
			
 
				+def request_data(url):
			
 
				+    # index = _url.split("#")[0]
			
 
				+    # url = _url.split("#")[1]
			
 
				+    body = {
			
 
				+        "title": "发布不幸消息"
			
 
				+    }
			
 
				+    t = time.time()
			
 
				+    res = requests.post(url, json=body)
			
 
				+    e = time.time()
			
 
				+    # print(index)
			
 
				+    print(e - t)
			
 
				+    print(res.text)
			
 
				+    # print(res.json())
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    # parser = argparse.ArgumentParser()  # 新建参数解释器对象
			
 
				+    # parser.add_argument("--thread")
			
 
				+    # args = parser.parse_args()
			
 
				+    # thread = int(args.thread)
			
 
				+    dt = ["http://127.0.0.1:8000/title_to_video"]
			
 
				+    # total_s = time.time()
			
 
				+    request_data(dt[0])
			
 
				+    # with ThreadPoolExecutor(max_workers=thread) as pool:
			
 
				+    #     pool.map(request_data, dt)
			
 
				+    # total_e = time.time()
			
 
				+    # print(total_e - total_s)