123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372 |
- """
- @author: luojunhui
- """
- import os
- import json
- import time
- import uuid
- import requests
- import pymysql
- import urllib.parse
- from openai import OpenAI
- from applications.functions.log import logging
- class Functions(object):
- """
- 通用工具代码
- """
- # 自动加入白名单逻辑
- @classmethod
- def auto_white(cls, root_share_id):
- """
- 自动加入白名单, 保证公众号百分百出广告
- :param root_share_id:
- :return:
- """
- def get_cookie():
- """
- 获取 cookie
- :return:
- """
- url = "https://admin.piaoquantv.com/manager/login?account=luojunhui&passWd=e10adc3949ba59abbe56e057f20f883e&muid=7"
- payload = {}
- headers = {
- 'accept': 'application/json, text/plain, */*',
- 'accept-language': 'en',
- 'priority': 'u=1, i',
- 'sec-ch-ua': '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"',
- 'sec-ch-ua-mobile': '?0',
- 'sec-ch-ua-platform': '"macOS"',
- 'sec-fetch-dest': 'empty',
- 'sec-fetch-mode': 'cors',
- 'sec-fetch-site': 'same-origin',
- 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
- }
- response = requests.request("GET", url, headers=headers, data=payload)
- return response.cookies.values()[0]
- url = "https://admin.piaoquantv.com/manager/ad/own/white/rootShare/save"
- dd = {
- "rootShareId": root_share_id,
- "commit": "算法自动加入白名单--"
- }
- payload = json.dumps(dd)
- cookie = get_cookie()
- headers = {
- 'accept': 'application/json',
- 'accept-language': 'en',
- 'content-type': 'application/json;',
- 'cookie': "SESSION=" + cookie,
- 'origin': 'https://admin.piaoquantv.com',
- 'priority': 'u=1, i',
- 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
- }
- response = requests.request("POST", url, headers=headers, data=payload)
- return response.json()['content']
- # 创建公众号分享卡片
- @classmethod
- def create_gzh_path(cls, video_id, shared_uid):
- """
- :param video_id: 视频 id
- :param shared_uid: 分享 id
- """
- root_share_id = str(uuid.uuid4())
- url = f"pages/user-videos?id={video_id}&su={shared_uid}&fromGzh=1&rootShareId={root_share_id}&shareId={root_share_id}"
- # 自动把 root_share_id 加入到白名单
- cls.auto_white(root_share_id)
- return root_share_id, f"pages/category?jumpPage={urllib.parse.quote(url, safe='')}"
- # 从票圈请求视频
- @classmethod
- def request_for_info(cls, video_id):
- """
- 请求数据
- :param video_id:
- :return:
- """
- url = "https://longvideoapi.piaoquantv.com/longvideoapi/openapi/video/batchSelectVideoInfo"
- data = {
- "videoIdList": [video_id]
- }
- header = {
- "Content-Type": "application/json",
- }
- response = requests.post(url, headers=header, data=json.dumps(data))
- return response.json()
- # 清理标题
- @classmethod
- def clean_title(cls, strings):
- """
- :param strings:
- :return:
- """
- return (
- strings.strip()
- .replace("\n", "")
- .replace("/", "")
- .replace("\r", "")
- .replace("#", "")
- .replace(".", "。")
- .replace("\\", "")
- .replace("&NBSP", "")
- .replace(":", "")
- .replace("*", "")
- .replace("?", "")
- .replace("?", "")
- .replace('"', "")
- .replace("<", "")
- .replace(">", "")
- .replace("|", "")
- .replace(" ", "")
- .replace('"', "")
- .replace("'", "")
- )
- class MySQLServer(object):
- """
- MySql 服务
- """
- @classmethod
- def select_download_videos(cls, trace_id):
- """
- 查询
- :param trace_id:
- :return:
- """
- sql = "select video_id from crawler_video where out_user_id = '{}' and video_title = '{}';".format(trace_id,
- trace_id)
- connection = pymysql.connect(
- host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com", # 数据库IP地址,内网地址
- port=3306, # 端口号
- user="crawler", # mysql用户名
- passwd="crawler123456@", # mysql用户登录密码
- db="piaoquan-crawler", # 数据库名
- charset="utf8mb4" # 如果数据库里面的文本是utf8编码的,charset指定是utf8
- )
- cursor = connection.cursor()
- cursor.execute(sql)
- out_video_list = cursor.fetchall()
- if len(out_video_list) > 0:
- if out_video_list[0][0] == 0:
- video_id = cls.search_id_to_video(trace_id)
- else:
- video_id = out_video_list[0][0]
- vid_list = [video_id]
- logging(
- code="2003",
- trace_id=trace_id,
- info="recall_search_list",
- function="find_videos_in_mysql",
- data=vid_list
- )
- return {
- "search_videos": "success",
- "trace_id": trace_id,
- "video_list": vid_list
- }
- else:
- return {
- "search_videos": "failed",
- "trace_id": trace_id,
- "video_list": []
- }
- @classmethod
- def select_pq_videos(cls):
- """
- 查询
- :return: info_list
- """
- connection = pymysql.connect(
- host="rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com", # 数据库IP地址,内网地址
- port=3306, # 端口号
- user="wx2016_longvideo", # mysql用户名
- passwd="wx2016_longvideoP@assword1234", # mysql用户登录密码
- db="incentive", # 数据库名
- charset="utf8mb4" # 如果数据库里面的文本是utf8编码的,charset指定是utf8
- )
- sql = "select video_id, key_words, search_keys, extra_keys from video_content"
- cursor = connection.cursor()
- cursor.execute(sql)
- data = cursor.fetchall()
- result = [
- {
- "video_id": line[0],
- "key_words": json.loads(line[1]),
- "search_keys": json.loads(line[2]),
- "extra_keys": json.loads(line[3]),
- }
- for line in data
- ]
- return result
- # 敏感词
- @classmethod
- def select_sensitive_words(cls):
- """
- sensitive words
- :return:
- """
- connection = pymysql.connect(
- host="rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com", # 数据库IP地址,内网地址
- port=3306, # 端口号
- user="wx2016_longvideo", # mysql用户名
- passwd="wx2016_longvideoP@assword1234", # mysql用户登录密码
- db="longvideo", # 数据库名
- charset="utf8mb4" # 如果数据库里面的文本是utf8编码的,charset指定是utf8
- )
- sql = "select `keyword` from wx_sensitive_word where `data_status` = 0"
- cursor = connection.cursor()
- cursor.execute(sql)
- data = cursor.fetchall()
- result = [line[0] for line in data]
- return result
- @classmethod
- def search_id_to_video(cls, trace_id):
- """
- 通过 search_id 返回 video_id
- :param trace_id:
- :return:
- """
- sql = "select video_id from crawler_video where out_user_id = '{}' and video_title = '{}';".format(trace_id,
- trace_id)
- connection = pymysql.connect(
- host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com", # 数据库IP地址,内网地址
- port=3306, # 端口号
- user="crawler", # mysql用户名
- passwd="crawler123456@", # mysql用户登录密码
- db="piaoquan-crawler", # 数据库名
- charset="utf8mb4" # 如果数据库里面的文本是utf8编码的,charset指定是utf8
- )
- cursor = connection.cursor()
- cursor.execute(sql)
- out_video_list = cursor.fetchall()
- if int(out_video_list[0][0]) == 0:
- time.sleep(1)
- return cls.search_id_to_video(trace_id)
- else:
- return out_video_list[0][0]
- class KimiServer(object):
- """
- Kimi Server
- """
- @classmethod
- def ask_kimi(cls, question):
- """
- Ask Kimi for information
- :param question: tiny text
- :return: "{}"
- """
- single_title_prompt = """
- 我会给你一个视频标题,需要你帮我用你所学的知识来帮我分析出以下信息,信息我都写到 json 里面了
- {
- "key_words": [], # 返回三个关键词
- "search_keys": [], # 标题可能的搜索关键词,返回 3 个
- "extra_keys": [], # 关心这个视频的用户还会关心哪些关键词, 返回 3 个
- "theme": 标题的主题, 用一个词概括
- }
- 只需要返回一个 json,key 和上面的一样,
- 我给你的标题是:
- """
- client = OpenAI(
- api_key='sk-tz1VaKqksTzk0F8HxlU4YVGwj7oa1g0c0puGNUZrdn9MDtzm',
- base_url="https://api.moonshot.cn/v1"
- )
- chat_completion = client.chat.completions.create(
- messages=[
- {
- "role": "user",
- "content": single_title_prompt + question,
- }
- ],
- model="moonshot-v1-8k",
- )
- response = chat_completion.choices[0].message.content.replace('```json', '').replace('```', '')
- try:
- response = json.loads(response)
- return response
- except:
- return {}
- @classmethod
- def ask_kimi_and_save_to_local(cls, info_tuple):
- """
- save file to local
- :return:
- """
- title, trace_id, save_path = info_tuple[0], info_tuple[1], info_tuple[2]
- if os.path.exists(save_path):
- logging(
- code="2001",
- info="该 video 信息已经挖掘完成---{}".format(title),
- function="ask_kimi_and_save_to_local",
- trace_id=trace_id,
- )
- else:
- os.makedirs(os.path.dirname(save_path), exist_ok=True)
- if not title:
- result = {}
- else:
- result = cls.ask_kimi(title)
- logging(
- code="2001",
- info="kimi-result",
- data=result,
- trace_id=trace_id,
- function="ask_kimi_and_save_to_local"
- )
- with open(save_path, "w", encoding="utf-8") as f:
- f.write(json.dumps(result, ensure_ascii=False))
- @classmethod
- def kimi_title(cls, ori_title):
- """
- prompt + kimi + ori_title generate new title
- :param ori_title:
- :return:
- """
- single_title_prompt = """
- 请将以上标题改写成适合小程序点击和传播的小程序标题,小程序标题的写作规范如下,请学习后进行小程序标题的编写。直接输出最终的小程序标题
- 小程序标题写作规范:
- 1.要点前置:将最重要的信息放在标题的最前面,以快速吸引读者的注意力。例如,“5月一辈子同学,三辈子亲,送给我的老同学,听哭无数人!”中的“5月”和“一辈子同学,三辈子亲”都是重要的信息点。
- 2.激发情绪:使用能够触动人心的语言,激发读者的情感共鸣。如“只剩两人同学聚会,看后感动落泪。”使用“感动落泪”激发读者的同情和怀旧情绪。
- 3.使用数字和特殊符号:数字可以提供具体性,而特殊符号如“🔴”、“😄”、“🔥”等可以吸引视觉注意力,增加点击率。
- 4.悬念和好奇心:创建悬念或提出问题,激发读者的好奇心。例如,“太神奇了!长江水位下降,重庆出现惊奇一幕!”中的“惊奇一幕”就是一个悬念。
- 5.名人效应:如果内容与知名人士相关,提及他们的名字可以增加标题的吸引力。
- 6.社会价值观:触及读者的文化和社会价值观,如家庭、友情、国家荣誉等。
- 7.标点符号的运用:使用感叹号、问号等标点来增强语气和情感表达。
- 8.直接的语言:使用直白、口语化的语言,易于理解,如“狗屁股,笑死我了!”。
- 9.热点人物或事件:提及当前的热点人物或事件,利用热点效应吸引读者。
- 10.字数适中:保持标题在10-20个字之间,既不过长也不过短,确保信息的完整性和吸引力。
- 11.适当的紧迫感:使用“最新”、“首次”、“紧急”等词汇,创造一种紧迫感,促使读者立即行动。
- 12.情感或价值诉求:使用如“感动”、“泪目”、“经典”等词汇,直接与读者的情感或价值观产生共鸣。
- 避免误导:确保标题准确反映内容,避免夸大或误导读者。
- """
- client = OpenAI(
- api_key='sk-tz1VaKqksTzk0F8HxlU4YVGwj7oa1g0c0puGNUZrdn9MDtzm',
- base_url="https://api.moonshot.cn/v1"
- )
- chat_completion = client.chat.completions.create(
- messages=[
- {
- "role": "user",
- "content": ori_title + "\n" + single_title_prompt,
- }
- ],
- model="moonshot-v1-8k",
- )
- response = chat_completion.choices[0].message.content
- return response
|