Преглед на файлове

视频号搜索功能开发

luojunhui преди 4 месеца
родител
ревизия
e62937a9ec
променени са 3 файла, в които са добавени 106 реда и са изтрити 0 реда
  1. 5 0
      coldStartTasks/crawler/channels/__init__.py
  2. 54 0
      coldStartTasks/crawler/channels/blogger.py
  3. 47 0
      coldStartTasks/crawler/channels/search.py

+ 5 - 0
coldStartTasks/crawler/channels/__init__.py

@@ -0,0 +1,5 @@
+"""
+@author: luojunhui
+"""
+from .blogger import get_channel_account_video_list
+from .search import search_in_wechat_channel

+ 54 - 0
coldStartTasks/crawler/channels/blogger.py

@@ -0,0 +1,54 @@
+"""
+@author: luojunhui
+"""
+
+import json
+
+from typing import List, Dict
+
+import requests
+
+cookie_ = "RK=kreEdgt2YJ; ptcz=988b2dee721fc7f396a696a31bcfaca33cdb372f1b881ee5affbce5e5d978e8c; _qimei_uuid42=186031009051009d7cd1945011a64a99cb68d2482e; _qimei_q36=; _qimei_h38=428c111f7cd1945011a64a990300000ca18603; pgv_pvid=2616476048; pgv_pvi=2160320512; pac_uid=0_ddQwmCn3ZjrMh; ua_id=KprpYPwJOYehnDVGAAAAAN8sApDAL6kVurDXPvDD1LE=; wxuin=19406922801022; mm_lang=zh_CN; qq_domain_video_guid_verify=3dffe9bc35c6dbe6; eas_sid=51Q7O323M2M9L6L6u9o3V6C6O1; _qimei_fingerprint=f9c116e7a475fe667b054d9b03458b75; rewardsn=; wxtokenkey=777; poc_sid=HB1oo2ejseSHevB6BGoDCoB-GU-iNU8OPY2xtDQ-; pgv_info=ssid=s3327168304; uuid=de51161116b3868d07b845a0e78be181; rand_info=CAESIB81zx0VcstyAKgHM0wgP6hOjqZBcxfhzXI6WXcKLyr6; slave_bizuin=3524986952; data_bizuin=3524986952; bizuin=3524986952; data_ticket=H2f/yAT7QnqDUp3owWqHfV+clyYa5e7HoxyFe2dId3RLeHVcdlyNTQHLPSSJa/e1; slave_sid=c0dMelVKMWJ4WnZWc0ppOUcxS0lmSkpmbEV5ZFVfTUVIU2N2NmF3RXp6dDNDWEJma3FCYmdaTXVqdEpfbk9JQXBWUFUxYjRNbDhxWFdsejhuR05iZ3JLVGd4Y3ppZG92Y3EySGNCbFZNVldPU2V2bzE5WkVmMHNMVjhmZ0hXYThzdmFaMFJoNkw3Uk44Rlk0; slave_user=gh_0d8cf8319a3b; xid=73cc085ade4f756adfb9c3b36c406132; _clck=3524986952|1|ftl|0; _clsk=rvxmb1|1740039679010|2|1|mp.weixin.qq.com/weheat-agent/payload/record"
+
+
+def get_channel_account_video_list(
+    account_id: str,
+    token: str,
+    buffer: str = "",
+    count: int = 15,
+    cookie: str = "",
+) -> List[Dict]:
+    """
+    :param account_id: 视频号id
+    :param token: 视频号token
+    :param buffer: 类似于cursor
+    :param count: 一次抓取的视频数量
+    :param cookie: 登录后的cookie
+    :return: result_list
+    """
+    url = "https://mp.weixin.qq.com/cgi-bin/videosnap?"
+    params = {
+        "action": "get_feed_list",
+        "username": account_id,
+        "buffer": buffer,
+        "count": count,
+        "scene": "0",
+        "token": token,
+        "lang": "zh_CN",
+        "f": "json",
+        "ajax": "1",
+    }
+    headers = {
+        "accept": "*/*",
+        "accept-language": "zh,zh-CN;q=0.9",
+        "priority": "u=1, i",
+        "referer": "https://mp.weixin.qq.com/cgi-bin/appmsg?t=media/appmsg_edit_v2&action=edit&isNew=1&type=77&createType=0&token={}&lang=zh_CN".format(
+            token
+        ),
+        "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
+        "x-requested-with": "XMLHttpRequest",
+        "cookie": cookie,
+    }
+    response = requests.request("GET", url, headers=headers, params=params)
+    response_json = response.json()
+    return response_json

+ 47 - 0
coldStartTasks/crawler/channels/search.py

@@ -0,0 +1,47 @@
+"""
+@author: luojunhui
+"""
+
+import json
+from typing import List, Dict
+
+import requests
+
+
+def search_in_wechat_channel(
+    search_key: str,
+    search_type: int,
+    page: int = 0,
+    cookie: str = "",
+    search_id: str = "",
+    offset: int = 0,
+) -> Dict:
+    """
+    :param search_key: 搜索关键字
+    :param search_type: 搜索类型,1: 搜索所有视频, 2: 搜索视频号账号
+    :param page: 页码
+    :param cookie: 登录后的cookie
+    :param search_id: 搜索id
+    :param offset: 偏移量
+    :return: result_list
+    """
+    token = "d3fb918f-0f36-4769-b095-410181614231"
+    app_id = "wx_GKpVW8xfEhcaxMIK9sSm6"
+    url = "http://api.geweapi.com/gewe/v2/api/finder/search"
+    payload = json.dumps(
+        {
+            "appId": app_id,
+            "proxyIp": "",
+            "content": search_key,
+            "category": search_type,
+            "filter": 0,
+            "page": page,
+            "cookie": cookie,
+            "searchId": search_id,
+            "offset": offset,
+        }
+    )
+    headers = {"X-GEWE-TOKEN": token, "Content-Type": "application/json"}
+    response = requests.request("POST", url, headers=headers, data=payload, timeout=60)
+    response_json = response.json()
+    return response_json