9 месяцев назад · 73d6c2fb3f
--- a/applications/api/__init__.py
+++ b/applications/api/__init__.py
@@ -2,4 +2,5 @@
 
				 @author: luojunhui
			
 
				 """
			
 
				 from .moon_shot_api import fetch_moon_shot_response
			
 
				-from .nlp_api import similarity_between_title_list
			
 
				+from .nlp_api import similarity_between_title_list
			
 
				+from .gewe_api import WechatChannelAPI
			
--- a/applications/api/gewe_api.py
+++ b/applications/api/gewe_api.py
@@ -0,0 +1,108 @@
 
				+"""
			
 
				+@author: luojunhui
			
 
				+"""
			
 
				+
			
 
				+from tenacity import (
			
 
				+    retry,
			
 
				+    stop_after_attempt,
			
 
				+    wait_exponential,
			
 
				+    retry_if_exception_type,
			
 
				+)
			
 
				+from requests.exceptions import RequestException
			
 
				+import requests
			
 
				+import json
			
 
				+from typing import Optional, Dict
			
 
				+
			
 
				+COMMON_RETRY = dict(
			
 
				+    stop=stop_after_attempt(3),  # 总共尝试3次
			
 
				+    wait=wait_exponential(min=2, max=30),
			
 
				+    retry=retry_if_exception_type((RequestException, TimeoutError)),
			
 
				+    reraise=True  # 重试耗尽后重新抛出异常
			
 
				+)
			
 
				+
			
 
				+
			
 
				+class WechatChannelAPI:
			
 
				+    """
			
 
				+    wechat channel api by gw
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, base_url: str, token: str, app_id: str):
			
 
				+        self.base_url = base_url
			
 
				+        self.token = token
			
 
				+        self.app_id = app_id
			
 
				+
			
 
				+    @retry(**COMMON_RETRY)
			
 
				+    def search(
			
 
				+            self,
			
 
				+            search_key: str,
			
 
				+            search_type: int,
			
 
				+            page: int = 0,
			
 
				+            cookie: str = "",
			
 
				+            search_id: str = "",
			
 
				+            offset: int = 0,
			
 
				+    ) -> Optional[Dict]:
			
 
				+        """
			
 
				+        搜索微信视频号内容（支持重试）
			
 
				+
			
 
				+        :param search_key: 搜索关键字
			
 
				+        :param search_type: 搜索类型，1: 搜索所有视频，2: 搜索视频号账号
			
 
				+        :param page: 页码
			
 
				+        :param cookie: 登录后的cookie
			
 
				+        :param search_id: 搜索id
			
 
				+        :param offset: 偏移量
			
 
				+        :return: 返回搜索结果字典，失败时返回None
			
 
				+        """
			
 
				+        url = f"{self.base_url}/gewe/v2/api/finder/search"
			
 
				+        payload = {
			
 
				+            "appId": self.app_id,
			
 
				+            "proxyIp": "",
			
 
				+            "content": search_key,
			
 
				+            "category": search_type,
			
 
				+            "filter": 0,
			
 
				+            "page": page,
			
 
				+            "cookie": cookie,
			
 
				+            "searchId": search_id,
			
 
				+            "offset": offset,
			
 
				+        }
			
 
				+        headers = {"X-GEWE-TOKEN": self.token, "Content-Type": "application/json"}
			
 
				+
			
 
				+        try:
			
 
				+            response = requests.post(url, headers=headers, json=payload, timeout=60)
			
 
				+            response.raise_for_status()
			
 
				+            return response.json()
			
 
				+        except RequestException as e:
			
 
				+            print(f"API请求失败: {e}")
			
 
				+        except json.JSONDecodeError as e:
			
 
				+            print(f"响应解析失败: {e}")
			
 
				+        return None
			
 
				+
			
 
				+    @retry(**COMMON_RETRY)
			
 
				+    def get_channel_video_list(
			
 
				+            self, user_id: str, last_buffer: str = ""
			
 
				+    ) -> Optional[Dict]:
			
 
				+        """
			
 
				+        获取视频号账号的视频列表（支持重试）
			
 
				+
			
 
				+        :param user_id: 视频号账号ID
			
 
				+        :param last_buffer: 分页标记，用于获取下一页数据
			
 
				+        :return: 返回视频列表字典，失败时返回None
			
 
				+        """
			
 
				+        url = f"{self.base_url}/gewe/v2/api/finder/userPage"
			
 
				+        payload = {
			
 
				+            "appId": self.app_id,
			
 
				+            "proxyIp": "",
			
 
				+            "lastBuffer": last_buffer,
			
 
				+            "toUserName": user_id,
			
 
				+            "maxId": 0,
			
 
				+        }
			
 
				+        headers = {"X-GEWE-TOKEN": self.token, "Content-Type": "application/json"}
			
 
				+
			
 
				+        try:
			
 
				+            response = requests.post(url, headers=headers, json=payload, timeout=60)
			
 
				+            response.raise_for_status()
			
 
				+            return response.json()
			
 
				+        except RequestException as e:
			
 
				+            print(f"获取视频列表请求失败: {e}")
			
 
				+        except json.JSONDecodeError as e:
			
 
				+            print(f"响应解析失败: {e}")
			
 
				+        return None
			
--- a/coldStartTasks/crawler/channels/__init__.py
+++ b/coldStartTasks/crawler/channels/__init__.py
@@ -1,5 +1,6 @@
 
				 """
			
 
				 @author: luojunhui
			
 
				+@tool: pycharm && deepseek
			
 
				 """
			
 
				-from .blogger import get_channel_account_video_list
			
 
				+from .blogger import get_channel_account_videos
			
 
				 from .search import search_in_wechat_channel
			
--- a/coldStartTasks/crawler/channels/blogger.py
+++ b/coldStartTasks/crawler/channels/blogger.py
@@ -2,53 +2,21 @@
 
				 @author: luojunhui
			
 
				 """
			
 
				 
			
 
				-import json
			
 
				-
			
 
				-from typing import List, Dict
			
 
				-
			
 
				 import requests
			
 
				+import json
			
 
				 
			
 
				-cookie_ = "RK=kreEdgt2YJ; ptcz=988b2dee721fc7f396a696a31bcfaca33cdb372f1b881ee5affbce5e5d978e8c; _qimei_uuid42=186031009051009d7cd1945011a64a99cb68d2482e; _qimei_q36=; _qimei_h38=428c111f7cd1945011a64a990300000ca18603; pgv_pvid=2616476048; pgv_pvi=2160320512; pac_uid=0_ddQwmCn3ZjrMh; ua_id=KprpYPwJOYehnDVGAAAAAN8sApDAL6kVurDXPvDD1LE=; wxuin=19406922801022; mm_lang=zh_CN; qq_domain_video_guid_verify=3dffe9bc35c6dbe6; eas_sid=51Q7O323M2M9L6L6u9o3V6C6O1; _qimei_fingerprint=f9c116e7a475fe667b054d9b03458b75; rewardsn=; wxtokenkey=777; poc_sid=HB1oo2ejseSHevB6BGoDCoB-GU-iNU8OPY2xtDQ-; pgv_info=ssid=s3327168304; uuid=de51161116b3868d07b845a0e78be181; rand_info=CAESIB81zx0VcstyAKgHM0wgP6hOjqZBcxfhzXI6WXcKLyr6; slave_bizuin=3524986952; data_bizuin=3524986952; bizuin=3524986952; data_ticket=H2f/yAT7QnqDUp3owWqHfV+clyYa5e7HoxyFe2dId3RLeHVcdlyNTQHLPSSJa/e1; slave_sid=c0dMelVKMWJ4WnZWc0ppOUcxS0lmSkpmbEV5ZFVfTUVIU2N2NmF3RXp6dDNDWEJma3FCYmdaTXVqdEpfbk9JQXBWUFUxYjRNbDhxWFdsejhuR05iZ3JLVGd4Y3ppZG92Y3EySGNCbFZNVldPU2V2bzE5WkVmMHNMVjhmZ0hXYThzdmFaMFJoNkw3Uk44Rlk0; slave_user=gh_0d8cf8319a3b; xid=73cc085ade4f756adfb9c3b36c406132; _clck=3524986952|1|ftl|0; _clsk=rvxmb1|1740039679010|2|1|mp.weixin.qq.com/weheat-agent/payload/record"
			
 
				+from applications.api import WechatChannelAPI
			
 
				+from config import gewe_token, gewe_app_id
			
 
				 
			
 
				 
			
 
				-def get_channel_account_video_list(
			
 
				-    account_id: str,
			
 
				-    token: str,
			
 
				-    buffer: str = "",
			
 
				-    count: int = 15,
			
 
				-    cookie: str = "",
			
 
				-) -> List[Dict]:
			
 
				+def get_channel_account_videos(user_id, last_buffer=""):
			
 
				     """
			
 
				-    :param account_id: 视频号id
			
 
				-    :param token: 视频号token
			
 
				-    :param buffer: 类似于cursor
			
 
				-    :param count: 一次抓取的视频数量
			
 
				-    :param cookie: 登录后的cookie
			
 
				-    :return: result_list
			
 
				+    get channel account videos
			
 
				     """
			
 
				-    url = "https://mp.weixin.qq.com/cgi-bin/videosnap?"
			
 
				-    params = {
			
 
				-        "action": "get_feed_list",
			
 
				-        "username": account_id,
			
 
				-        "buffer": buffer,
			
 
				-        "count": count,
			
 
				-        "scene": "0",
			
 
				-        "token": token,
			
 
				-        "lang": "zh_CN",
			
 
				-        "f": "json",
			
 
				-        "ajax": "1",
			
 
				-    }
			
 
				-    headers = {
			
 
				-        "accept": "*/*",
			
 
				-        "accept-language": "zh,zh-CN;q=0.9",
			
 
				-        "priority": "u=1, i",
			
 
				-        "referer": "https://mp.weixin.qq.com/cgi-bin/appmsg?t=media/appmsg_edit_v2&action=edit&isNew=1&type=77&createType=0&token={}&lang=zh_CN".format(
			
 
				-            token
			
 
				-        ),
			
 
				-        "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
			
 
				-        "x-requested-with": "XMLHttpRequest",
			
 
				-        "cookie": cookie,
			
 
				-    }
			
 
				-    response = requests.request("GET", url, headers=headers, params=params)
			
 
				-    response_json = response.json()
			
 
				-    return response_json
			
 
				+    channel_api = WechatChannelAPI(
			
 
				+        base_url='http://api.geweapi.com',
			
 
				+        token=gewe_token,
			
 
				+        app_id=gewe_app_id
			
 
				+    )
			
 
				+    result = channel_api.get_channel_video_list(user_id, last_buffer)
			
 
				+    return result
			
--- a/coldStartTasks/crawler/channels/search.py
+++ b/coldStartTasks/crawler/channels/search.py
@@ -2,10 +2,10 @@
 
				 @author: luojunhui
			
 
				 """
			
 
				 
			
 
				-import json
			
 
				-from typing import List, Dict
			
 
				+from typing import Dict
			
 
				 
			
 
				-import requests
			
 
				+from applications.api import WechatChannelAPI
			
 
				+from config import gewe_token, gewe_app_id
			
 
				 
			
 
				 
			
 
				 def search_in_wechat_channel(
			
@@ -25,23 +25,17 @@ def search_in_wechat_channel(
 
				     :param offset: 偏移量
			
 
				     :return: result_list
			
 
				     """
			
 
				-    token = "d3fb918f-0f36-4769-b095-410181614231"
			
 
				-    app_id = "wx_GKpVW8xfEhcaxMIK9sSm6"
			
 
				-    url = "http://api.geweapi.com/gewe/v2/api/finder/search"
			
 
				-    payload = json.dumps(
			
 
				-        {
			
 
				-            "appId": app_id,
			
 
				-            "proxyIp": "",
			
 
				-            "content": search_key,
			
 
				-            "category": search_type,
			
 
				-            "filter": 0,
			
 
				-            "page": page,
			
 
				-            "cookie": cookie,
			
 
				-            "searchId": search_id,
			
 
				-            "offset": offset,
			
 
				-        }
			
 
				+    channel_api = WechatChannelAPI(
			
 
				+        base_url='http://api.geweapi.com',
			
 
				+        token=gewe_token,
			
 
				+        app_id=gewe_app_id
			
 
				     )
			
 
				-    headers = {"X-GEWE-TOKEN": token, "Content-Type": "application/json"}
			
 
				-    response = requests.request("POST", url, headers=headers, data=payload, timeout=60)
			
 
				-    response_json = response.json()
			
 
				-    return response_json
			
 
				+    result = channel_api.search(
			
 
				+        search_key=search_key,
			
 
				+        search_type=search_type,
			
 
				+        page=page,
			
 
				+        cookie=cookie,
			
 
				+        search_id=search_id,
			
 
				+        offset=offset
			
 
				+    )
			
 
				+    return result
			
--- a/config/__init__.py
+++ b/config/__init__.py
@@ -88,3 +88,8 @@ moon_shot = {
 
				     "model": "moonshot-v1-32k",
			
 
				     "base_url": "https://api.moonshot.cn/v1"
			
 
				 }
			
 
				+
			
 
				+
			
 
				+#GeWe
			
 
				+gewe_token = "d3fb918f-0f36-4769-b095-410181614231"
			
 
				+gewe_app_id = "wx_GKpVW8xfEhcaxMIK9sSm6"
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -20,4 +20,5 @@ protobuf~=3.20.3
 
				 openai~=1.17.0
			
 
				 oss2~=2.19.1
			
 
				 fake-useragent~=1.5.1
			
 
				-playwright~=1.49.1
			
 
				+playwright~=1.49.1
			
 
				+tenacity~=9.0.0
			
--- a/tasks/crawler_channel_account_videos.py
+++ b/tasks/crawler_channel_account_videos.py
@@ -0,0 +1,18 @@
 
				+"""
			
 
				+@author: luojunhui
			
 
				+@tool: pycharm && deepseek
			
 
				+"""
			
 
				+
			
 
				+from applications.db import DatabaseConnector
			
 
				+from config import long_articles_config
			
 
				+from coldStartTasks.crawler.channels import get_channel_account_videos
			
 
				+
			
 
				+
			
 
				+class CrawlerChannelAccountVideos:
			
 
				+    """
			
 
				+    crawler channel account videos
			
 
				+    """
			
 
				+    def __init__(self):
			
 
				+        self.db_client = DatabaseConnector(db_config=long_articles_config)
			
 
				+        self.db_client.connect()
			
 
				+