فهرست منبع

增加企业微信相关内容

罗俊辉 9 ماه پیش
والد
کامیت
defac8b0e4

+ 7 - 0
applications/deal/recall_deal.py

@@ -66,6 +66,8 @@ class RecallDeal(object):
             source_id = "touliu_tencentGzhArticle_{}_".format(gh_id) + generate_source_id()
         elif self.mini_program_type == 1:
             source_id = "longArticles_" + generate_source_id()
+        elif self.mini_program_type == 3:
+            source_id = "WeCom_" + generate_source_id()
         else:
             source_id = "Error mini_program_type {}".format(self.mini_program_type)
         url = f"pages/user-videos?id={video_id}&su={shared_uid}&fromGzh=1&rootShareId={root_share_id}&shareId={root_share_id}&rootSourceId={source_id}"
@@ -88,6 +90,11 @@ class RecallDeal(object):
             programAvatar = "https://rescdn.yishihui.com/0temp/zfyfyc.jpeg"
             programId = "wxcddf231abd0dabdc"
             programName = "祝福有福有财"
+        elif self.mini_program_type == 3:
+            # 企业微信
+            programAvatar = "https://rescdn.yishihui.com/0temp/xymhfqdd.png"
+            programId = "wx7187c217efef24a7"
+            programName = "幸运美好福气多多"
         else:
             programAvatar = "https://rescdn.yishihui.com/0temp/ssyqsh.png"
             programId = "wx59d9e2c05f00f880"

+ 0 - 237
applications/functions/async_etl.py

@@ -1,237 +0,0 @@
-"""
-@author: luojunhui
-"""
-import os
-
-import oss2
-import aiohttp
-import aiofiles
-from hashlib import md5
-from uuid import uuid4
-
-import requests
-from fake_useragent import FakeUserAgent
-
-
-async def upload_to_oss(local_video_path, type_):
-    """
-    把视频上传到 oss
-    :return:
-    """
-    oss_video_key = "long_articles/{}/".format(type_) + str(uuid4())
-    access_key_id = "LTAIP6x1l3DXfSxm"
-    access_key_secret = "KbTaM9ars4OX3PMS6Xm7rtxGr1FLon"
-    endpoint = "oss-cn-hangzhou.aliyuncs.com"
-    bucket_name = "art-pubbucket"
-    bucket = oss2.Bucket(
-        oss2.Auth(access_key_id, access_key_secret), endpoint, bucket_name
-    )
-    bucket.put_object_from_file(key=oss_video_key, filename=local_video_path)
-    return oss_video_key
-
-
-class AsyncETL(object):
-    """
-    视频下载功能
-    """
-
-    def __init__(self, video_obj):
-        self.platform = video_obj["platform"]
-        self.video_id = video_obj["video_id"]
-        self.video_url = video_obj["video_url"]
-        self.uid = video_obj["user_id"]
-        self.title = video_obj["video_title"]
-        self.cover_url = video_obj["cover_url"]
-        # self.proxy = {
-        #     "http://": "http://t17772369458618:5zqcjkmy@q796.kdltps.com:15818/",
-        #     "https://": "http://t17772369458618:5zqcjkmy@q796.kdltps.com:15818/",
-        # }
-        self.max_retry = 5
-
-    def request_header(self, type_="video"):
-        """
-        请求头
-        :return:
-        """
-        if self.platform == "xg_search":
-            if "v9-xg-web-pc.ixigua.com" in self.video_url:
-                headers = {
-                    "Accept": "*/*",
-                    "Accept-Language": "zh-CN,zh;q=0.9",
-                    "Host": "v9-xg-web-pc.ixigua.com",
-                    "User-Agent": FakeUserAgent().chrome,
-                    "Origin": "https://www.ixigua.com/",
-                    "Referer": "https://www.ixigua.com/"
-                }
-            elif "v3-xg-web-pc.ixigua.com" in self.video_url:
-                headers = {
-                    "Accept": "*/*",
-                    "Accept-Language": "zh-CN,zh;q=0.9",
-                    "Host": "v3-xg-web-pc.ixigua.com",
-                    "User-Agent": FakeUserAgent().chrome,
-                    "Origin": "https://www.ixigua.com/",
-                    "Referer": "https://www.ixigua.com/"
-                }
-            elif type_ == "cover":
-                headers = {
-                    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
-                    'Accept-Language': 'en,zh;q=0.9,zh-CN;q=0.8',
-                    'Cache-Control': 'max-age=0',
-                    'Proxy-Connection': 'keep-alive',
-                    'Upgrade-Insecure-Requests': '1',
-                    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36'
-                }
-            else:
-                headers = {
-                    "Accept": "*/*",
-                    "Accept-Language": "zh-CN,zh;q=0.9",
-                    "Host": "v3-xg-web-pc.ixigua.com",
-                    "User-Agent": FakeUserAgent().chrome,
-                    "Origin": "https://www.ixigua.com/",
-                    "Referer": "https://www.ixigua.com/"
-                }
-        elif self.platform == "baidu_search":
-            headers = {
-                "Accept": "*/*",
-                "Accept-Language": "zh-CN,zh;q=0.9",
-                "User-Agent": FakeUserAgent().chrome,
-            }
-        elif self.platform == "wx_search":
-            headers = {
-                "Accept": "*/*",
-                "Accept-Language": "zh-CN,zh;q=0.9",
-                "User-Agent": FakeUserAgent().chrome,
-                "Origin": "https://mp.weixin.qq.com",
-                "Referer": "https://mp.weixin.qq.com"
-            }
-        elif self.platform == "dy_search":
-            headers = {
-                'accept': '*/*',
-                'accept-language': 'en,zh;q=0.9,zh-CN;q=0.8',
-                'priority': 'i',
-                'range': 'bytes=0-',
-                'referer': 'https://v11-coldf.douyinvod.com/',
-                'user-agent': FakeUserAgent().chrome
-            }
-        else:
-            headers = {}
-        return headers
-
-    def generate_video_path(self):
-        """
-        通过视频信息生成唯一视频地址
-        :return:
-        """
-        index = "{}-{}".format(self.platform, self.video_id)
-        index = md5(index.encode()).hexdigest()
-        file_name = "{}.mp4".format(index)
-        cover_name = "{}.png".format(index)
-        file_path = os.path.join(os.getcwd(), "videos", file_name)
-        cover_path = os.path.join(os.getcwd(), "videos", cover_name)
-        return file_path, cover_path
-
-    async def publish_by__request(self, video_path, cover):
-        """
-        发布
-        :return:
-        """
-        url = "https://vlogapi.piaoquantv.com/longvideoapi/crawler/video/send"
-        headers = {
-            "User-Agent": "PQSpeed/486 CFNetwork/1410.1 Darwin/22.6.0",
-            "cookie": "JSESSIONID=4DEA2B5173BB9A9E82DB772C0ACDBC9F; JSESSIONID=D02C334150025222A0B824A98B539B78",
-            "referer": "http://appspeed.piaoquantv.com",
-            "token": "524a8bc871dbb0f4d4717895083172ab37c02d2f",
-            "accept-language": "zh-CN,zh-Hans;q=0.9",
-            "Content-Type": "application/x-www-form-urlencoded",
-        }
-        payload = {
-            "coverImgPath": cover,
-            "deviceToken": "9ef064f2f7869b3fd67d6141f8a899175dddc91240971172f1f2a662ef891408",
-            "fileExtensions": "MP4",
-            "loginUid": self.uid,
-            "networkType": "Wi-Fi",
-            "platform": "iOS",
-            "requestId": "fb972cbd4f390afcfd3da1869cd7d001",
-            "sessionId": "362290597725ce1fa870d7be4f46dcc2",
-            "subSessionId": "362290597725ce1fa870d7be4f46dcc2",
-            "title": self.title,
-            "token": "524a8bc871dbb0f4d4717895083172ab37c02d2f",
-            "uid": self.uid,
-            "versionCode": "486",
-            "versionName": "3.4.12",
-            "videoFromScene": "1",
-            "videoPath": video_path,
-            "viewStatus": "1",
-        }
-        response = requests.post(
-            url=url,
-            headers=headers,
-            data=payload,
-        )
-        return response.json()
-
-    async def download(self, file_path):
-        """
-        :param file_path:
-        :return:
-        """
-        headers = self.request_header()
-        if os.path.exists(file_path):
-            file_size = os.path.getsize(file_path)
-            headers["Range"] = f"bytes={file_size}-"
-        else:
-            file_size = 0
-        async with aiohttp.ClientSession() as session:
-            async with session.get(self.video_url, headers=headers) as response:
-                if response.status in [200, 206]:
-                    mode = "ab+" if file_size > 0 else "wb"
-                    f = await aiofiles.open(file_path, mode)
-                    await f.write(await response.read())
-                    await f.close()
-                else:
-                    print(response.status)
-        return file_path
-
-    async def download_cover(self, file_path):
-        """
-        下载视频封面
-        :param file_path:
-        :return:
-        """
-        headers = self.request_header(type_="cover")
-        response = requests.get(url=self.cover_url, headers=headers)
-        if b"<html>" in response.content:
-            return None
-        else:
-            with open(file_path, "wb") as f:
-                f.write(response.content)
-            return file_path
-
-    async def etl_deal(self):
-        """
-        ETL Deal Task
-        :return:
-        """
-        local_video_path, local_cover_path = self.generate_video_path()
-        # download videos
-        file_path = await self.download(local_video_path)
-        # download cover
-        cover_path = await self.download_cover(local_cover_path)
-        # upload to oss
-        oss_video = await upload_to_oss(
-            local_video_path=file_path,
-            type_="video"
-        )
-        if cover_path:
-            oss_cover = await upload_to_oss(
-                local_video_path=cover_path,
-                type_="image"
-                )
-        else:
-            oss_cover = None
-        # publish to pq
-        result = await self.publish_by__request(
-            video_path=oss_video,
-            cover=oss_cover
-        )
-        return result["data"]["id"]

+ 0 - 27
applications/functions/browser_extract.py

@@ -1,27 +0,0 @@
-# """
-# @author: luojunhui
-# """
-# import time
-# from selenium import webdriver
-# from selenium.webdriver.chrome.service import Service
-# from selenium.webdriver.chrome.options import Options
-# from webdriver_manager.chrome import ChromeDriverManager
-#
-#
-# def get_source_code(url):
-#     """
-#     :param url:
-#     :return:
-#     """
-#     # 配置 Chrome 选项
-#     chrome_options = Options()
-#     chrome_options.add_argument('--headless')  # 无头模式
-#     chrome_options.add_argument('--disable-gpu')
-#     chrome_options.add_argument('--incognito')
-#     service = Service(ChromeDriverManager().install())
-#     driver = webdriver.Chrome(service=service, options=chrome_options)
-#     driver.get(url)
-#     time.sleep(3)
-#     page_text = driver.page_source
-#     driver.quit()
-#     return page_text

+ 0 - 34
applications/functions/mq.py

@@ -1,34 +0,0 @@
-"""
-@author: luojunhui
-"""
-import json
-from uuid import uuid4
-from mq_http_sdk.mq_exception import MQExceptionBase
-from mq_http_sdk.mq_producer import TopicMessage
-from mq_http_sdk.mq_client import MQClient
-
-
-class MQ(object):
-    """
-    MQ Class
-    """
-    instance_id = "MQ_INST_1894469520484605_BXhXuzkZ"
-
-    def __init__(self, topic_name) -> None:
-        self.mq_client = MQClient("http://1894469520484605.mqrest.cn-qingdao-public.aliyuncs.com",
-                                  "LTAI4G7puhXtLyHzHQpD6H7A",
-                                  "nEbq3xWNQd1qLpdy2u71qFweHkZjSG")
-        self.producer = self.mq_client.get_producer(self.instance_id, topic_name)
-
-    def send_msg(self, params):
-        """
-        send msg to mq client
-        """
-        try:
-            msg = TopicMessage(json.dumps(params))
-            message_key = str(uuid4())
-            msg.set_message_key(message_key)
-            re_msg = self.producer.publish_message(msg)
-            print(re_msg)
-        except MQExceptionBase as e:
-            print(e)

+ 0 - 131
applications/functions/mysql.py

@@ -1,131 +0,0 @@
-"""
-@author: luojunhui
-"""
-import json
-import time
-import pymysql
-
-from applications.functions.log import logging
-
-
-def select_download_videos(trace_id):
-    """
-    查询
-    :param trace_id:
-    :return:
-    """
-    sql = "select video_id from crawler_video where out_user_id = '{}' and video_title = '{}';".format(trace_id,
-                                                                                                       trace_id)
-    connection = pymysql.connect(
-        host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com",  # 数据库IP地址,内网地址
-        port=3306,  # 端口号
-        user="crawler",  # mysql用户名
-        passwd="crawler123456@",  # mysql用户登录密码
-        db="piaoquan-crawler",  # 数据库名
-        charset="utf8mb4"  # 如果数据库里面的文本是utf8编码的,charset指定是utf8
-    )
-    cursor = connection.cursor()
-    cursor.execute(sql)
-    out_video_list = cursor.fetchall()
-    if len(out_video_list) > 0:
-        if out_video_list[0][0] == 0:
-            video_id = search_id_to_video(trace_id)
-        else:
-            video_id = out_video_list[0][0]
-
-        vid_list = [video_id]
-        logging(
-            code="2003",
-            trace_id=trace_id,
-            info="recall_search_list",
-            function="find_videos_in_mysql",
-            data=vid_list
-        )
-        return {
-            "search_videos": "success",
-            "trace_id": trace_id,
-            "video_list": vid_list
-        }
-    else:
-        return {
-            "search_videos": "failed",
-            "trace_id": trace_id,
-            "video_list": []
-        }
-
-
-def select_pq_videos():
-    """
-    查询
-    :return: info_list
-    """
-    connection = pymysql.connect(
-        host="rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com",  # 数据库IP地址,内网地址
-        port=3306,  # 端口号
-        user="wx2016_longvideo",  # mysql用户名
-        passwd="wx2016_longvideoP@assword1234",  # mysql用户登录密码
-        db="incentive",  # 数据库名
-        charset="utf8mb4"  # 如果数据库里面的文本是utf8编码的,charset指定是utf8
-    )
-    sql = "select video_id, key_words, search_keys, extra_keys from video_content"
-    cursor = connection.cursor()
-    cursor.execute(sql)
-    data = cursor.fetchall()
-    result = [
-        {
-            "video_id": line[0],
-            "key_words": json.loads(line[1]),
-            "search_keys": json.loads(line[2]),
-            "extra_keys": json.loads(line[3]),
-        }
-        for line in data
-    ]
-    return result
-
-
-# 敏感词
-def select_sensitive_words():
-    """
-    sensitive words
-    :return:
-    """
-    connection = pymysql.connect(
-        host="rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com",  # 数据库IP地址,内网地址
-        port=3306,  # 端口号
-        user="wx2016_longvideo",  # mysql用户名
-        passwd="wx2016_longvideoP@assword1234",  # mysql用户登录密码
-        db="longvideo",  # 数据库名
-        charset="utf8mb4"  # 如果数据库里面的文本是utf8编码的,charset指定是utf8
-    )
-    sql = "select `keyword` from wx_sensitive_word where `data_status` = 0"
-    cursor = connection.cursor()
-    cursor.execute(sql)
-    data = cursor.fetchall()
-    result = [line[0] for line in data]
-    return result
-
-
-def search_id_to_video(trace_id):
-    """
-    通过 search_id 返回 video_id
-    :param trace_id:
-    :return:
-    """
-    sql = "select video_id from crawler_video where out_user_id = '{}' and video_title = '{}';".format(trace_id,
-                                                                                                       trace_id)
-    connection = pymysql.connect(
-        host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com",  # 数据库IP地址,内网地址
-        port=3306,  # 端口号
-        user="crawler",  # mysql用户名
-        passwd="crawler123456@",  # mysql用户登录密码
-        db="piaoquan-crawler",  # 数据库名
-        charset="utf8mb4"  # 如果数据库里面的文本是utf8编码的,charset指定是utf8
-    )
-    cursor = connection.cursor()
-    cursor.execute(sql)
-    out_video_list = cursor.fetchall()
-    if int(out_video_list[0][0]) == 0:
-        time.sleep(1)
-        return search_id_to_video(trace_id)
-    else:
-        return out_video_list[0][0]

+ 1 - 39
applications/static/config.py

@@ -490,50 +490,12 @@ gh_id_dict = {
 }
 
 
-# 实验配置文件
-buy_accounts = [
-    "gh_084a485e859a",
-    "gh_e24da99dc899",
-    "gh_e0eb490115f5",
-    "gh_183d80deffb8",
-    "gh_5ff48e9fb9ef",
-    "gh_9f8dc5b0c74e",
-    "gh_6d9f36e3a7be"
-]
-
-dyy = [
-    "gh_9877c8541764",
-    "gh_6d205db62f04",
-    "gh_c69776baf2cd",
-    "gh_7e5818b2dd83",
-    "gh_89ef4798d3ea",
-    "gh_a2901d34f75b",
-    "gh_b15de7c99912"
-]
-
-ab_test_config = {
-    "gh_084a485e859a": 1,
-    "gh_e24da99dc899": 1,
-    "gh_e0eb490115f5": 1,
-    "gh_183d80deffb8": 1,
-    "gh_5ff48e9fb9ef": 1,
-    "gh_9f8dc5b0c74e": 1,
-    "gh_6d9f36e3a7be": 1,
-    "gh_9877c8541764": 1,
-    "gh_6d205db62f04": 1,
-    "gh_c69776baf2cd": 1,
-    "gh_7e5818b2dd83": 1,
-    "gh_89ef4798d3ea": 1,
-    "gh_a2901d34f75b": 1,
-    "gh_b15de7c99912": 1
-}
-
 # prod
 db_article = "long_articles_video"
+db_video = "article_match_videos"
 
 # dev
 # db_article = "long_articles_video_dev"
-article_queue = "long_articles_queue"
 
 # spider coroutines
 spider_coroutines = 6