Преглед изворни кода

注释掉 task1里面的复用历史视频的逻辑

罗俊辉 пре 7 месеци
родитељ
комит
0430245a92
4 измењених фајлова са 82 додато и 116 уклоњено
  1. 1 1
      applications/deal/response.py
  2. 1 1
      tasks/task1.py
  3. 80 0
      test_code/hurry.py
  4. 0 114
      test_code/publish_by_async.py

+ 1 - 1
applications/deal/response.py

@@ -22,7 +22,7 @@ class Response(object):
     def __init__(self, trace_id, mysql_client, mini_program_type):
         """
         长文: 25, 29, 31
-        投流: 36
+        投流: 33
         企微: 27
         :param trace_id:
         :param mysql_client:

+ 1 - 1
tasks/task1.py

@@ -305,7 +305,7 @@ class MatchTask1(object):
                 info="存在历史文章",
                 trace_id=trace_id
             )
-            await self.use_exists_contents_videos(video_id_list=video_id_list, params=params)
+            # await self.use_exists_contents_videos(video_id_list=video_id_list, params=params)
         else:
             flag = await self.judge_content_processing(content_id)
             if flag:

+ 80 - 0
test_code/hurry.py

@@ -0,0 +1,80 @@
+"""
+@author: luojunhui
+"""
+import json
+import time
+
+import pymysql
+from tqdm import tqdm
+
+spider_connection = pymysql.connect(
+    host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com",
+    port=3306,
+    user="crawler",
+    passwd="crawler123456@",
+    db="piaoquan-crawler",
+    charset="utf8mb4"
+)
+
+with open("id.txt", encoding="utf-8") as f:
+    data = f.readlines()
+
+L = {}
+for line in data:
+    new_id = line.split(",")[1].strip()
+    old_id = line.split(",")[0]
+    L[new_id] = old_id
+print(len(L))
+
+with open("update_video_trace_id.json", encoding="utf-8") as f:
+    publish_data = json.loads(f.read())
+
+print(len(publish_data))
+
+
+def update_into_long_videos(trace_id, vid1, vid2, vid3):
+    """
+    更新video_id
+    :param trace_id:
+    :param vid1:
+    :param vid2:
+    :param vid3:
+    :return:
+    """
+    update_sql = f"""
+        UPDATE long_articles_video
+        set recall_video_id1 = %s, recall_video_id2 = %s, recall_video_id3 = %s
+        where trace_id = %s;
+    """
+    cursor = spider_connection.cursor()
+    cursor.execute(
+        update_sql,
+        (vid1, vid2, vid3, trace_id)
+    )
+    spider_connection.commit()
+
+
+ff = 0
+for item in tqdm(publish_data):
+    trace_id = item['trace_id']
+    data_info = item['result_data']
+    try:
+        vid_list = []
+        for video_obj in json.loads(data_info):
+            path = video_obj['productionPath']
+            video_id = path.split("id%3D")[1].split("%26su")[0]
+            # if L.get(video_id):
+            vid_list.append(video_id)
+        update_into_long_videos(
+            trace_id=trace_id,
+            vid1=vid_list[0],
+            vid2=vid_list[1],
+            vid3=vid_list[2]
+        )
+
+    except Exception as e:
+        print("No videos   {}".format(e))
+
+print(ff)
+
+

+ 0 - 114
test_code/publish_by_async.py

@@ -1,114 +0,0 @@
-"""
-@author: luojunhui
-"""
-import asyncio
-import json
-import os.path
-
-import aiohttp
-import aiofiles
-
-
-async def write_json_file(old_video_id, new_obj):
-    """
-    异步写文件
-    :param old_video_id:
-    :param new_obj:
-    """
-    filename = "vv/{}.json".format(old_video_id)
-    async with aiofiles.open(filename, mode='w', encoding='utf-8') as f:
-        await f.write(json.dumps(new_obj, ensure_ascii=False, indent=4))
-
-
-async def fetch_data(url, headers, payload):
-    """
-    :param url:
-    :param headers:
-    :param payload:
-    :return:
-    """
-    async with aiohttp.ClientSession() as session:
-        async with session.post(url, headers=headers, data=payload) as response:
-            return await response.json()
-
-
-async def publish(obj):
-    """
-
-    :param obj:
-    :return:
-    """
-    cover = obj['cover']
-    uid = obj['uid']
-    title = obj['title']
-    video_path = obj['oss_path']
-    old_video_id = obj['vid']
-    path = "vv/{}.json".format(old_video_id)
-    if os.path.exists(path):
-        pass
-    else:
-        try:
-            url = "https://vlogapi.piaoquantv.com/longvideoapi/crawler/video/send"
-            headers = {
-                "User-Agent": "PQSpeed/486 CFNetwork/1410.1 Darwin/22.6.0",
-                "cookie": "JSESSIONID=4DEA2B5173BB9A9E82DB772C0ACDBC9F; JSESSIONID=D02C334150025222A0B824A98B539B78",
-                "referer": "http://appspeed.piaoquantv.com",
-                "token": "524a8bc871dbb0f4d4717895083172ab37c02d2f",
-                "accept-language": "zh-CN,zh-Hans;q=0.9",
-                "Content-Type": "application/x-www-form-urlencoded",
-            }
-            payload = {
-                "coverImgPath": cover,
-                "deviceToken": "9ef064f2f7869b3fd67d6141f8a899175dddc91240971172f1f2a662ef891408",
-                "fileExtensions": "MP4",
-                "loginUid": uid,
-                "networkType": "Wi-Fi",
-                "platform": "iOS",
-                "requestId": "fb972cbd4f390afcfd3da1869cd7d001",
-                "sessionId": "362290597725ce1fa870d7be4f46dcc2",
-                "subSessionId": "362290597725ce1fa870d7be4f46dcc2",
-                "title": title,
-                "token": "524a8bc871dbb0f4d4717895083172ab37c02d2f",
-                "uid": uid,
-                "versionCode": "486",
-                "versionName": "3.4.12",
-                "videoFromScene": "1",
-                "videoPath": video_path,
-                "viewStatus": "1",
-            }
-            new_obj = await fetch_data(url, headers, payload)
-            await write_json_file(old_video_id, new_obj)
-        except:
-            pass
-        # return new_obj['data']['id']
-
-
-# 批量处理函数
-async def process_in_batches(task_list, batch_size):
-    """
-
-    :param task_list:
-    :param batch_size:
-    :return:
-    """
-    for i in range(0, len(task_list), batch_size):
-        batch = task_list[i:i + batch_size]  # 拆分批次
-        tasks = [publish(params) for params in batch]
-        results = await asyncio.gather(*tasks)
-        print("批次{}结果:".format(i), results)
-
-
-async def main():
-    with open("id_dict.json", encoding="utf-8") as f:
-        data = json.loads(f.read())
-    L = []
-    for key in data:
-        detail = data[key]
-        detail['vid'] = key
-        L.append(detail)
-    task_list = L
-    tasks = [publish(params) for params in task_list]
-    await process_in_batches(task_list, batch_size=30)
-    await asyncio.gather(*tasks)
-
-asyncio.run(main())