Browse Source

process.py
1. 返回 videourl
2. 增加日志- root_share_id && video_id

routes.py
1. 搜索接口开发 ing
search_video.py
1. 搜索接口正在开发

罗俊辉 1 year ago
parent
commit
35e66271b3
5 changed files with 43 additions and 146 deletions
  1. 26 37
      applications/functions/search_video.py
  2. 0 106
      applications/functions/upload.py
  3. 15 2
      applications/process.py
  4. 1 0
      applications/routes.py
  5. 1 1
      test.py

+ 26 - 37
applications/functions/search_video.py

@@ -3,39 +3,12 @@
 """
 import requests
 import json
-import asyncio
+import time
 
 from tqdm import tqdm
 
-from upload import upload_to_pq, upload_to_oss
 
-
-async def process_video_obj(video_obj):
-    """
-    Process video object
-    :param video_obj:
-    :return:
-   """
-    video_dict = {
-        "video_url": video_obj['videoUrl'],
-        "video_title": video_obj['title'],
-        "publish_time": video_obj['pubTime'],
-        "like_num": video_obj['likeNum'],
-        "video_id": video_obj['hashDocID'],
-        "video_duration": video_obj['duration'],
-        "video_cover": video_obj['image']
-    }
-    res = await upload_to_oss(
-        video_id=video_obj['hashDocID'],
-        video_url=video_obj['videoUrl'])
-    print(res)
-    res2 = upload_to_pq(
-        oss_object_key=res['oss_object_key'],
-        title=video_obj['title']
-    )
-
-
-async def search_spider(params):
+def search_spider(params):
     """
     通过搜索爬虫 + search_keys 来获取视频信息,并且以 MQ 的方式发送给 ETL, 正常上传发布
     :param params: []
@@ -54,16 +27,32 @@ async def search_spider(params):
     }
     response = requests.request("POST", url, headers=headers, data=payload)
     data_list = response.json()['data']['data']
-    for item in tqdm(data_list[:2]):
+    for item in tqdm(data_list[:1]):
         video_obj = item['items'][0]
-        await process_video_obj(video_obj)
+        print(json.dumps(video_obj, ensure_ascii=False, indent=4))
+        video_id = video_obj['hashDocID']
+        video_url = video_obj['videoUrl']
+        video_title = video_obj['title']
+        etl(vid=video_id, video_url=video_url, title=video_title)
 
 
-async def main():
-    p = {
-        "search_keys": ["同学聚会", "演讲", "点赞"]
+def etl(vid, title, video_url):
+    print(vid)
+    url = "http://lightgbm-internal-test.piaoquantv.com/etl"
+    payload = {
+        "video_title": title,
+        "video_url": video_url,
+        "video_id": vid,
+
     }
-    await search_spider(p)
+    t = time.time()
+    res = requests.post(url, json=payload)
+    e = time.time()
+    print(e - t)
+    print(res.json())
+
 
-# 执行异步函数
-asyncio.run(main())
+# p = {
+#     "search_keys": ["王者荣耀", "李白", "五杀"]
+# }
+# search_spider(p)

+ 0 - 106
applications/functions/upload.py

@@ -1,106 +0,0 @@
-"""
-@author: luojunhui
-"""
-import oss2
-import requests
-import urllib.parse
-
-OSS_ACCESS_KEY_ID = "LTAIP6x1l3DXfSxm"
-OSS_ACCESS_KEY_SECRET = "KbTaM9ars4OX3PMS6Xm7rtxGr1FLon"
-# OSS_BUCKET_ENDPOINT = "oss-cn-hangzhou-internal.aliyuncs.com"  # 内网地址
-
-
-OSS_BUCKET_ENDPOINT = "oss-cn-hangzhou.aliyuncs.com"  # 外网地址
-
-import aiohttp
-import aiofiles
-
-
-async def download_video(video_url, output_filename):
-    """
-    Asynchronously download a video from a given URL and save it to a specified file.
-    :param video_url: The URL of the video to download
-    :param output_filename: The file path where the video should be saved
-    """
-    async with aiohttp.ClientSession() as session:
-        async with session.get(video_url) as response:
-            if response.status == 200:
-                async with aiofiles.open(output_filename, mode='wb') as file:
-                    while True:
-                        chunk = await response.content.read(1024)
-                        if not chunk:
-                            break
-                        await file.write(chunk)
-                print("Download completed successfully.")
-            else:
-                print(f"Failed to download video. HTTP status: {response.status}")
-
-
-async def upload_to_oss(video_id, video_url):
-    """
-    Uploads video file to OSS
-    :param video_id:
-    :param video_url:
-    :return:
-    """
-    print("start download video...")
-    await download_video(video_url, "temp.mp4")
-    print("video download successfully done")
-    oss_object_key = f'single_video/{video_id}'
-    auth = oss2.Auth(OSS_ACCESS_KEY_ID, OSS_ACCESS_KEY_SECRET)
-    bucket = oss2.Bucket(auth, OSS_BUCKET_ENDPOINT, "art-pubbucket")
-    response = bucket.put_object_from_file(oss_object_key, "temp.mp4")
-    if 'Content-Length' in response.headers:
-        return {
-            'status': response.status,
-            'oss_object_key': oss_object_key}
-    raise AssertionError(f'OSS上传失败,请求ID: \n{response.headers["x-oss-request-id"]}')
-
-
-def upload_to_pq(oss_object_key, title, user_id="69611689"):
-    """
-    Uploads video files to PQ
-    :param oss_object_key:
-    :param title:
-    :param user_id:
-    :return:
-    """
-    url = "https://vlogapi.piaoquantv.com/longvideoapi/crawler/video/send"
-    payload = dict(
-        pageSource='vlog-pages/post/post-video-post',
-        videoPath=oss_object_key,
-        width='720',
-        height='1280',
-        fileExtensions='mp4',
-        viewStatus='1',
-        title=title,
-        careModelStatus='1',
-        token='f04f58d6e664cbc9902660a1e8d20ce6cd7fdb0f',
-        loginUid=user_id,
-        versionCode='719',
-        machineCode='weixin_openid_o0w175aZ4FJtqVsA1tcozJDJHdDU',
-        appId='wx89e7eb06478361d7',
-        clientTimestamp='1703337579331',
-        machineInfo='{"sdkVersion":"3.2.5","brand":"iPhone","language":"zh_CN","model":"iPhone 12 Pro<iPhone13,3>","platform":"ios","system":"iOS 15.6.1","weChatVersion":"8.0.44","screenHeight":844,"screenWidth":390,"pixelRatio":3,"windowHeight":762,"windowWidth":390,"softVersion":"4.1.719"}',
-        sessionId='1703337560040-27bfe208-a389-f476-db1d-840681e04b32',
-        subSessionId='1703337569952-8f56d53c-b36d-760e-8abe-0b4a027cd5bd',
-        senceType='1089',
-        hotSenceType='1089',
-        id='1050',
-        channel='pq'
-    )
-
-    payload['videoPath'] = oss_object_key
-    payload['title'] = title
-    data = urllib.parse.urlencode(payload)
-    headers = {
-        'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 15_6_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/8.0.44(0x18002c2d) NetType/WIFI Language/zh_CN',
-        'Accept-Encoding': 'gzip,compress,br,deflate',
-        'Referer': 'https://servicewechat.com/wx89e7eb06478361d7/726/page-frame.html',
-        'Content-Type': 'application/x-www-form-urlencoded',
-        'Cookie': 'JSESSIONID=A60D96E7A300A25EA05425B069C8B459'
-    }
-    response = requests.post(url, data=data, headers=headers)
-    data = response.json()
-    code = data["code"]
-    return code

+ 15 - 2
applications/process.py

@@ -162,6 +162,7 @@ class ProcessParams(object):
             response = request_for_info(video_id)
             productionCover = response['data'][0]['shareImgPath']
             productionName = response["data"][0]['title']
+            videoUrl = response['data'][0]['videoPath']
             programAvatar = "/static/logo.png"
             programId = "wx89e7eb06478361d7"
             programName = "票圈vlog"
@@ -174,8 +175,19 @@ class ProcessParams(object):
                 "programName": programName,
                 "source": source,
                 "rootShareId": root_share_id,
-                "productionPath": productionPath
+                "productionPath": productionPath,
+                "videoUrl": videoUrl
             }
+            logging(
+                code="2000",
+                info="统计 root_share_id && video_id",
+                function="process",
+                trace_id=self.trace_id,
+                data={
+                    "rootShareId": root_share_id,
+                    "videoId": video_id
+                }
+            )
         else:
             result = {
                 "productionCover": None,
@@ -185,7 +197,8 @@ class ProcessParams(object):
                 "programName": None,
                 "source": None,
                 "rootShareId": None,
-                "productionPath": None
+                "productionPath": None,
+                "videoUrl": None
             }
         logging(
             code="1002",

+ 1 - 0
applications/routes.py

@@ -7,6 +7,7 @@ from quart import Blueprint, jsonify, request
 
 from applications.log import logging
 from applications.process import ProcessParams
+from applications.functions.search_video import search_spider
 
 my_blueprint = Blueprint('kimi', __name__)
 

+ 1 - 1
test.py

@@ -13,7 +13,7 @@ def request_data(url):
     body = {
         "accountName": "魔法美学馆",
         "content": "8月20日,最高人民法院举行新闻发布会,发布新修订的《最高人民法院关于审理民间借贷案件适用法律若干问题的规定》(以下简称《规定》)并回答记者提问。",
-        "title": "最高法发布新修订的《关于审理民间借贷案件适用法律若干问题的规定》(附全文)"
+        "title": "邯郸杀人案件"
     }
     t = time.time()
     res = requests.post(url, json=body)