Browse Source

初始化————

罗俊辉 1 year ago
parent
commit
fd890704f8

+ 1 - 1
README.md

@@ -1,3 +1,3 @@
 # title_with_video
 
-通过长文本标题+kimi匹配票圈内相关视频,在公众号文章中插入小程序分享页面,目的是为了给小程序
+通过长文本标题+kimi匹配票圈内相关视频,在公众号文章中插入小程序分享页面,目的是为了给小程序

+ 15 - 0
app.py

@@ -0,0 +1,15 @@
+"""
+@author: luojunhui
+"""
+from quart import Quart
+from applications.routes.title_routes import my_blueprint
+
+# 初始化 App
+app = Quart(__name__)
+
+# 注册蓝图
+app.register_blueprint(my_blueprint)
+
+
+if __name__ == '__main__':
+    app.run(debug=True)

+ 55 - 0
applications/functions/ask_kimi.py

@@ -0,0 +1,55 @@
+"""
+@author: luojunhui
+"""
+"""
+@author: luojunhui
+"""
+import json
+from openai import OpenAI
+
+
+def ask_kimi(question):
+    """
+    Ask Kimi for information
+    :param question: tiny text
+    :return: "{}"
+    """
+    single_title_prompt = """
+        我会给你一个视频标题,需要你帮我用你所学的知识来帮我分析出以下信息,信息我都写到 json 里面了
+        {
+            "key_words": [],  # 返回三个关键词
+            "search_keys": [], # 标题可能的搜索关键词,返回 3 个
+            "extra_keys": [], # 关心这个视频的用户还会关心哪些关键词, 返回 3 个
+            "tone": 标题的语气,用一个词概括,
+            "target_audience": 标题的受众群体,用一个词概括,
+            "target_age": 标题的受众年龄段,从 老年, 中年,青年,小孩, 不限, 这五个里面选择,
+            "target_gender": 受众性别,
+            "address": 受众可能属于哪个城市,
+            "theme": 标题的主题, 用一个词概括
+        }
+        只需要返回一个 json,key 和上面的一样,
+        我给你的标题是: 
+        """
+    client = OpenAI(
+        api_key='sk-tz1VaKqksTzk0F8HxlU4YVGwj7oa1g0c0puGNUZrdn9MDtzm',
+        base_url="https://api.moonshot.cn/v1"
+    )
+    chat_completion = client.chat.completions.create(
+        messages=[
+            {
+                "role": "user",
+                "content": single_title_prompt + question,
+            }
+        ],
+        model="moonshot-v1-8k",
+    )
+    response = chat_completion.choices[0].message.content
+    try:
+        response = json.loads(response)
+        return response
+    except:
+        return {}
+
+
+
+

+ 127 - 0
applications/functions/calculate.py

@@ -0,0 +1,127 @@
+"""
+@author: luojunhui
+"""
+import json
+import os
+
+
+def read_single_file(filename):
+    """
+    :param filename:
+    """
+    with open(filename, encoding="utf-8") as f:
+        data = json.loads(f.read())
+    if data:
+        return data
+    else:
+        return {}
+
+
+def compute_similarity(file_1, file_2):
+    """
+    计算
+    :param file_1:
+    :param file_2:
+    :return:
+    """
+    data_1 = read_single_file(file_1)
+    data_2 = read_single_file(file_2)
+
+    def calculate_v1(d1, d2):
+        """
+        通过交并集来判断
+        :param d1:
+        :param d2:
+        :return:
+        """
+        f1_keys = set(d1["key_words"])
+        f2_keys = set(d2["key_words"])
+        keys_union = f1_keys | f2_keys
+        keys_intersection = f1_keys & f2_keys
+        f1_search_keys = set(d1["search_keys"])
+        f2_search_keys = set(d2["search_keys"])
+        search_keys_union = f1_search_keys | f2_search_keys
+        search_keys_intersection = f1_search_keys & f2_search_keys
+        f1_extra_keys = set(d1["extra_keys"])
+        f2_extra_keys = set(d2["extra_keys"])
+        extra_keys_union = f1_extra_keys | f2_extra_keys
+        extra_keys_intersection = f1_extra_keys & f2_extra_keys
+        score_1 = len(keys_intersection) / len(keys_union)
+        score_2 = len(search_keys_intersection) / len(search_keys_union)
+        score_3 = len(extra_keys_intersection) / len(extra_keys_union)
+        return score_1 * 0.4 + score_2 * 0.4 + score_3 * 0.2
+
+    def calculate_v2(d1, d2):
+        """
+        计算方法 v2
+        :param d1:
+        :param d2:
+        :return:
+        """
+        score = 0
+        tone_1 = d1["tone"]
+        tone_2 = d2["tone"]
+        if tone_1 == tone_2:
+            score += 0.1
+        target_audience_1 = d1["target_audience"]
+        target_audience_2 = d2["target_audience"]
+        if target_audience_1 == target_audience_2:
+            score += 0.2
+        target_age_1 = d1["target_age"]
+        target_age_2 = d2["target_age"]
+        if target_age_1 == target_age_2:
+            score += 0.2
+        address_1 = d1["address"]
+        address_2 = d2["address"]
+        if address_1 == address_2:
+            score += 0.2
+        gender_1 = d1["theme"]
+        gender_2 = d2["theme"]
+        if gender_1 == gender_2:
+            score += 0.5
+        return score
+
+    if data_1 and data_2:
+        try:
+            score_1 = calculate_v1(data_1, data_2)
+            score_2 = calculate_v2(data_1, data_2)
+            return score_1, score_2
+        except Exception as e:
+            return 0, 0
+    else:
+        return 0, 0
+
+
+def title_mix(title_p, dt):
+    """
+    执行代码
+    :param title_p:
+    :param dt: dt
+    """
+    json_path = os.path.join(os.getcwd(), 'applications', 'static', dt)
+    # 处理标题信息
+    files = os.listdir(json_path)
+    pq_files = [os.path.join(json_path, file) for file in files]
+    score_list_1 = []
+    score_list_2 = []
+    for file in pq_files:
+        file_name = file.split('/')[-1].replace(".json", "")
+        v_id = file_name.split('_')[1]
+        uid = file_name.split('_')[0]
+        score1, score2 = compute_similarity(title_p, file)
+        score_list_1.append([score1, v_id, uid])
+        score_list_2.append([score2, v_id, uid])
+
+    s1_list = sorted(score_list_1, key=lambda x: x[0], reverse=True)
+    s2_list = sorted(score_list_2, key=lambda x: x[0], reverse=True)
+    title = title_p.split("/")[-1].replace(".json", "")
+    obj = {
+        "title": title,
+        "s1_vid": s1_list[0][1],
+        "s1_score": s1_list[0][0],
+        "s1_uid": s1_list[0][2],
+        "s2_vid": s2_list[0][1],
+        "s2_score": s2_list[0][0],
+        "s2_uid": s2_list[0][2]
+    }
+    return obj

+ 35 - 0
applications/functions/odps.py

@@ -0,0 +1,35 @@
+"""
+@author: luojunhui
+"""
+
+from odps import ODPS
+
+
+class PyODPS(object):
+    """
+    PyODPS class, get data from odps server
+    """
+
+    def __init__(self):
+        self.endpoint = "http://service.cn.maxcompute.aliyun.com/api"
+        self.access_id = "LTAIWYUujJAm7CbH"
+        self.access_key = "RfSjdiWwED1sGFlsjXv0DlfTnZTG1P"
+        self.project = "loghubods"
+
+        self.od = ODPS(
+            access_id=self.access_id,
+            secret_access_key=self.access_key,
+            endpoint=self.endpoint,
+            project=self.project,
+        )
+
+    def select(self, sql):
+        """
+        :param sql: 查询语句
+        :return: odps_obj{}
+        """
+        result = []
+        with self.od.execute_sql(sql).open_reader() as reader:
+            for record in reader:
+                result.append(record)
+        return result

+ 108 - 0
applications/process.py

@@ -0,0 +1,108 @@
+"""
+@author: luojunhui
+对请求进行操作
+"""
+
+import os
+import json
+import uuid
+import requests
+import urllib.parse
+
+from applications.functions.ask_kimi import ask_kimi
+from applications.functions.calculate import title_mix
+
+
+class ProcessParams(object):
+    """
+    Params Analysis
+    """
+
+    @classmethod
+    def get_params(cls, data):
+        """
+        "accountName": "公众号名称",
+        "content": "文章正文",
+        "title": "文章标题",
+        "cover": "封面链接"
+        :param data:
+        :return: title
+        """
+        return data['title']
+
+    @classmethod
+    def ask_kimi_and_save_to_local(cls, title):
+        """
+        save file to local
+        :param title:
+        :return:
+        """
+        save_path = os.path.join(os.getcwd(), 'applications', 'static', "titles", "{}.json".format(title))
+        if os.path.exists(save_path):
+            return
+        else:
+            os.makedirs(os.path.dirname(save_path), exist_ok=True)
+            if not title:
+                result = "{}"
+            else:
+                result = ask_kimi(title)
+            with open(save_path, "w", encoding="utf-8") as f:
+                f.write(json.dumps(result, ensure_ascii=False))
+
+    @classmethod
+    def create_gzh_path(cls, video_id, shared_uid):
+        """
+        :param video_id: 视频 id
+        :param shared_uid: 分享 id
+        """
+        root_share_id = str(uuid.uuid4())
+        url = f"pages/user-videos?id={video_id}&su={shared_uid}&fromGzh=1&rootShareId={root_share_id}&shareId={root_share_id}"
+        return root_share_id, f"pages/category?jumpPage={urllib.parse.quote(url)}"
+
+    @classmethod
+    def request_for_info(cls, video_id):
+        """
+        请求数据
+        :param video_id:
+        :return:
+        """
+        url = "https://longvideoapi.piaoquantv.com/longvideoapi/openapi/video/batchSelectVideoInfo"
+        data = {
+            "videoIdList": [video_id]
+        }
+        header = {
+            "Content-Type": "application/json",
+        }
+        response = requests.post(url, headers=header, data=json.dumps(data))
+        return response.json()
+
+    @classmethod
+    def process(cls, data):
+        """执行代码"""
+        title = cls.get_params(data)
+        title_p = os.path.join(os.getcwd(), 'applications', 'static', "titles", "{}.json".format(title))
+        if os.path.exists(title_p):
+            result = title_mix(title_p=title_p, dt="20240417")
+        else:
+            cls.ask_kimi_and_save_to_local(title)
+            result = title_mix(title_p=title_p, dt="20240417")
+        uid, video_id = result['s1_uid'], result['s1_vid']
+        root_share_id, productionPath = cls.create_gzh_path(uid, video_id)
+        response = cls.request_for_info(video_id)
+        productionCover = response["data"][0]['coverImg']['coverImgPath']
+        productionName = response["data"][0]['title']
+        programAvatar = "Avatar"
+        programId = "wx89e7eb06478361d7"
+        programName = "票圈vlog"
+        source = "Web"
+        result = {
+            "productionCover": productionCover,
+            "productionName": productionName,
+            "programAvatar": programAvatar,
+            "programId": programId,
+            "programName": programName,
+            "source": source,
+            "rootShareId": root_share_id,
+            "productionPath": productionPath
+        }
+        return result

+ 30 - 0
applications/routes/title_routes.py

@@ -0,0 +1,30 @@
+"""
+@author: luojunhui
+"""
+from quart import Blueprint, jsonify, request
+
+from applications.process import ProcessParams
+
+
+my_blueprint = Blueprint('kimi', __name__)
+
+
+@my_blueprint.route('/healthcheck')
+async def hello():
+    """
+    Hello World Test
+    :return:
+    """
+    return jsonify({'message': 'Hello, World!'})
+
+
+@my_blueprint.route('/title_to_video', methods=['POST'])
+async def post_data():
+    """
+    请求接口代码
+    :return:
+    """
+    p = ProcessParams()
+    data = await request.get_json()
+    processed_data = p.process(data)
+    return jsonify(processed_data)

BIN
applications/static/logo.png


+ 3 - 0
hypercorn_config.toml

@@ -0,0 +1,3 @@
+reload = true
+bind = "0.0.0.0:8000"
+workers = 2

+ 81 - 0
read_data_from_odps_daily.py

@@ -0,0 +1,81 @@
+"""
+@author: luojunhui
+Read data from ODPS daily and save file to static folder
+"""
+import json
+import os
+import time
+import schedule
+from datetime import datetime, timedelta
+from concurrent.futures.thread import ThreadPoolExecutor
+
+from applications.functions.odps import PyODPS
+from applications.functions.ask_kimi import ask_kimi
+
+
+def read_data_from_odps_daily(dt):
+    """
+    Read data from ODPS daily and save file to static folder
+    :return:
+    """
+    sql = f"""select * from loghubods.lastday_return where dt = '{dt}' limit 20;"""
+    data_list = PyODPS().select(sql)
+    obj_list = [
+        {
+            "video_id": obj['videoid'],
+            "title": obj['title'],
+            "uid": obj['uid'],
+            "dt": dt
+        } for obj in data_list
+    ]
+    return obj_list
+
+
+def save_file_to_local(obj):
+    """
+    use kimi to mine data info and save to local file
+    :param obj:
+    :return:
+    """
+    video_id = obj['video_id']
+    title = obj['title']
+    uid = obj['uid']
+    dt = obj["dt"]
+    save_path = os.path.join(os.getcwd(), 'applications', 'static', dt, "{}_{}.json".format(uid, video_id))
+    print(save_path)
+    if os.path.exists(save_path):
+        return
+    else:
+        os.makedirs(os.path.dirname(save_path), exist_ok=True)
+        if not title:
+            result = {}
+        else:
+            result = ask_kimi(title)
+        print(result)
+        with open(save_path, "w", encoding="utf-8") as f:
+            f.write(json.dumps(result, ensure_ascii=False))
+
+
+def run():
+    """
+    Read data from ODPS daily and save file to static folder with thread pool
+    :return:
+    """
+    today = datetime.today()
+    yesterday = today - timedelta(days=1)
+    yesterday_str = yesterday.strftime("%Y%m%d")
+    data_list = read_data_from_odps_daily(yesterday_str)
+    # print(data_list)
+    for obj in data_list:
+        save_file_to_local(obj)
+    # with ThreadPoolExecutor(max_workers=10) as Pool:
+    #     Pool.map(save_file_to_local, data_list)
+
+
+if __name__ == '__main__':
+    run()
+    # # 设置任务每天的 9:00 执行
+    # schedule.every().day.at("09:00").do(run)
+    # while True:
+    #     schedule.run_pending()
+    #     time.sleep(1)

+ 36 - 0
test.py

@@ -0,0 +1,36 @@
+"""
+@author: luojunhui
+"""
+import time
+import requests
+import argparse
+from concurrent.futures import ThreadPoolExecutor
+
+
+def request_data(url):
+    # index = _url.split("#")[0]
+    # url = _url.split("#")[1]
+    body = {
+        "title": "发布不幸消息"
+    }
+    t = time.time()
+    res = requests.post(url, json=body)
+    e = time.time()
+    # print(index)
+    print(e - t)
+    print(res.text)
+    # print(res.json())
+
+
+if __name__ == "__main__":
+    # parser = argparse.ArgumentParser()  # 新建参数解释器对象
+    # parser.add_argument("--thread")
+    # args = parser.parse_args()
+    # thread = int(args.thread)
+    dt = ["http://127.0.0.1:8000/title_to_video"]
+    # total_s = time.time()
+    request_data(dt[0])
+    # with ThreadPoolExecutor(max_workers=thread) as pool:
+    #     pool.map(request_data, dt)
+    # total_e = time.time()
+    # print(total_e - total_s)