Переглянути джерело

更新 share_url
更新 logo.png

罗俊辉 1 рік тому
батько
коміт
bb633455b8

+ 69 - 0
applications/functions/search_video.py

@@ -0,0 +1,69 @@
+"""
+@author: luojunhui
+"""
+import requests
+import json
+import asyncio
+
+from tqdm import tqdm
+
+from applications.functions.upload import upload_to_pq, upload_to_oss
+
+
+async def process_video_obj(video_obj):
+    """
+    Process video object
+    :param video_obj:
+    :return:
+   """
+    video_dict = {
+        "video_url": video_obj['videoUrl'],
+        "video_title": video_obj['title'],
+        "publish_time": video_obj['pubTime'],
+        "like_num": video_obj['likeNum'],
+        "video_id": video_obj['hashDocID'],
+        "video_duration": video_obj['duration'],
+        "video_cover": video_obj['image']
+    }
+    res = await upload_to_oss(
+        video_id=video_obj['hashDocID'],
+        video_url=video_obj['videoUrl'])
+    print(res)
+    res2 = upload_to_pq(
+        oss_object_key=res['oss_object_key'],
+        title=video_obj['title']
+    )
+
+
+async def search_spider(params):
+    """
+    通过搜索爬虫 + search_keys 来获取视频信息,并且以 MQ 的方式发送给 ETL, 正常上传发布
+    :param params: []
+    :return:
+    """
+    search_keys = params['search_keys']
+    url = "http://8.217.190.241:8888/crawler/wei_xin/keyword"
+
+    payload = json.dumps({
+        "keyword": ",".join(search_keys),
+        "cursor": "0",
+        "content_type": "video"
+    })
+    headers = {
+        'Content-Type': 'application/json'
+    }
+    response = requests.request("POST", url, headers=headers, data=payload)
+    data_list = response.json()['data']['data']
+    for item in tqdm(data_list[:2]):
+        video_obj = item['items'][0]
+        await process_video_obj(video_obj)
+
+
+async def main():
+    p = {
+        "search_keys": ["同学聚会", "演讲", "点赞"]
+    }
+    await search_spider(p)
+
+# 执行异步函数
+asyncio.run(main())

+ 111 - 0
applications/functions/upload.py

@@ -0,0 +1,111 @@
+"""
+@author: luojunhui
+"""
+import oss2
+import requests
+import urllib.parse
+
+OSS_ACCESS_KEY_ID = "LTAIP6x1l3DXfSxm"
+OSS_ACCESS_KEY_SECRET = "KbTaM9ars4OX3PMS6Xm7rtxGr1FLon"
+# OSS_BUCKET_ENDPOINT = "oss-cn-hangzhou-internal.aliyuncs.com"  # 内网地址
+
+
+OSS_BUCKET_ENDPOINT = "oss-cn-hangzhou.aliyuncs.com"  # 外网地址
+
+import aiohttp
+import aiofiles
+
+
+async def download_video(video_url, output_filename):
+    """
+    Asynchronously download a video from a given URL and save it to a specified file.
+    :param video_url: The URL of the video to download
+    :param output_filename: The file path where the video should be saved
+    """
+    async with aiohttp.ClientSession() as session:
+        async with session.get(video_url) as response:
+            if response.status == 200:
+                async with aiofiles.open(output_filename, mode='wb') as file:
+                    while True:
+                        chunk = await response.content.read(1024)
+                        if not chunk:
+                            break
+                        await file.write(chunk)
+                print("Download completed successfully.")
+            else:
+                print(f"Failed to download video. HTTP status: {response.status}")
+
+
+async def upload_to_oss(video_id, video_url):
+    """
+    Uploads video file to OSS
+    :param video_id:
+    :param video_url:
+    :return:
+    """
+    # print("start download video...")
+    # await download_video(video_url, "temp.mp4")
+    # print("video download successfully done")
+    with open("temp.mp4", "rb") as file:
+        file_content = file.read()
+    print("读取完成")
+    content_type = 'application/octet-stream'
+    oss_object_key = f'single_video/{video_id}'
+    auth = oss2.Auth(OSS_ACCESS_KEY_ID, OSS_ACCESS_KEY_SECRET)
+    bucket = oss2.Bucket(auth, OSS_BUCKET_ENDPOINT, "art-pubbucket")
+    response = bucket.put_object(oss_object_key, file_content, headers={'Content-Type': content_type})
+    print(response.status)
+    if 'Content-Length' in response.headers:
+        return {
+            'status': response.status,
+            'oss_object_key': oss_object_key}
+    raise AssertionError(f'OSS上传失败,请求ID: \n{response.headers["x-oss-request-id"]}')
+
+
+def upload_to_pq(oss_object_key, title, user_id="69611689"):
+    """
+    Uploads video files to PQ
+    :param oss_object_key:
+    :param title:
+    :param user_id:
+    :return:
+    """
+    url = "https://vlogapi.piaoquantv.com/longvideoapi/crawler/video/send"
+    payload = dict(
+        pageSource='vlog-pages/post/post-video-post',
+        videoPath=oss_object_key,
+        width='720',
+        height='1280',
+        fileExtensions='mp4',
+        viewStatus='1',
+        title=title,
+        careModelStatus='1',
+        token='f04f58d6e664cbc9902660a1e8d20ce6cd7fdb0f',
+        loginUid=user_id,
+        versionCode='719',
+        machineCode='weixin_openid_o0w175aZ4FJtqVsA1tcozJDJHdDU',
+        appId='wx89e7eb06478361d7',
+        clientTimestamp='1703337579331',
+        machineInfo='{"sdkVersion":"3.2.5","brand":"iPhone","language":"zh_CN","model":"iPhone 12 Pro<iPhone13,3>","platform":"ios","system":"iOS 15.6.1","weChatVersion":"8.0.44","screenHeight":844,"screenWidth":390,"pixelRatio":3,"windowHeight":762,"windowWidth":390,"softVersion":"4.1.719"}',
+        sessionId='1703337560040-27bfe208-a389-f476-db1d-840681e04b32',
+        subSessionId='1703337569952-8f56d53c-b36d-760e-8abe-0b4a027cd5bd',
+        senceType='1089',
+        hotSenceType='1089',
+        id='1050',
+        channel='pq'
+    )
+
+    payload['videoPath'] = oss_object_key
+    payload['title'] = title
+    data = urllib.parse.urlencode(payload)
+    headers = {
+        'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 15_6_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/8.0.44(0x18002c2d) NetType/WIFI Language/zh_CN',
+        'Accept-Encoding': 'gzip,compress,br,deflate',
+        'Referer': 'https://servicewechat.com/wx89e7eb06478361d7/726/page-frame.html',
+        'Content-Type': 'application/x-www-form-urlencoded',
+        'Cookie': 'JSESSIONID=A60D96E7A300A25EA05425B069C8B459'
+    }
+    response = requests.post(url, data=data, headers=headers)
+    data = response.json()
+    code = data["code"]
+    return code

+ 18 - 0
applications/routes.py

@@ -42,3 +42,21 @@ async def post_data():
     data = await request.get_json()
     processed_data = p.process(data)
     return jsonify(processed_data)
+
+
+@my_blueprint.route('/search_videos', methods=['POST'])
+async def search_data():
+    """
+    通过搜索词去搜索获取视频信息
+    :return:
+    """
+    trace_id = "search-{}-{}".format(str(uuid.uuid4()), str(int(time.time())))
+    logging(
+        code="1001",
+        info="请求接口成功",
+        port="search_videos",
+        trace_id=trace_id
+    )
+    data = await request.get_json()
+    result = await search_spider(data)
+    return jsonify(result)

+ 2 - 2
test.py

@@ -20,8 +20,8 @@ def request_data(url):
     e = time.time()
     # print(index)
     print(e - t)
-    # print(res.text)
-    print(res.json())
+    print(res.text)
+    # print(res.json())
 
 
 if __name__ == "__main__":