123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132 |
- """
- @author: luojunhui
- """
- import json
- import time
- import requests
- from applications.aliyunLogApi import log
- from applications.decoratorApi import retryOnNone
- class WeixinSpider(object):
- """
- Update account articles
- """
- # ip = "8.217.190.241"
- # ip = "47.98.154.124"
- # port = "8888"
- base_url = "http://crawler-cn.aiddit.com/crawler/wei_xin"
- headers = {"Content-Type": "application/json"}
- @classmethod
- @retryOnNone()
- def search_articles(cls, title, page="1") -> dict:
- """
- search articles in wx
- :return:
- """
- url = "{}/keyword".format(cls.base_url)
- payload = json.dumps({"keyword": title, "cursor": page})
- response = requests.request(
- "POST", url, headers=cls.headers, data=payload, timeout=120
- )
- return response.json()
- @classmethod
- # @retryOnNone()
- def get_article_text(cls, content_link, is_count=False, is_cache=True) -> dict:
- """
- 获取文章
- :param is_cache:
- :param is_count:
- :param content_link:
- :return:
- """
- url = "{}/detail".format(cls.base_url)
- payload = json.dumps(
- {
- "content_link": content_link,
- "is_count": is_count,
- "is_ad": False,
- "is_cache": is_cache,
- }
- )
- response = requests.request(
- "POST", url, headers=cls.headers, data=payload, timeout=120
- )
- return response.json()
- @classmethod
- @retryOnNone()
- def update_msg_list(cls, ghId, index) -> dict:
- """
- :return:
- """
- url = "{}/blogger".format(cls.base_url)
- payload = {
- "account_id": ghId,
- "cursor": index,
- }
- response = requests.post(
- url=url, headers=cls.headers, data=json.dumps(payload), timeout=120
- )
- return response.json()
- @classmethod
- @retryOnNone()
- def get_account_by_url(cls, content_url) -> dict:
- """
- 通过文章获取账号信息
- :param content_url:
- :return:
- """
- url = "{}/account_info".format(cls.base_url)
- data = {"content_link": content_url}
- response = requests.request(
- "POST", url=url, headers=cls.headers, json=data, timeout=120
- )
- return response.json()
- @classmethod
- def get_recommend_articles(cls, content_link) -> dict:
- """
- use content link to get recommend articles
- :param content_link:
- :return:
- """
- url = "{}/recommend".format(cls.base_url)
- payload = json.dumps({"content_link": content_link})
- response = requests.request(
- "POST", url=url, headers=cls.headers, data=payload, timeout=120
- )
- response_json = response.json()
- if response_json["code"] != 0:
- return cls.get_recommend_articles(content_link)
- time.sleep(3)
- return response.json()
- @classmethod
- def get_recommend_articles_v2(cls, content_link) -> dict:
- """
- use content link to get recommend articles
- :param content_link:
- :return:
- """
- url = "http://datapi.top/wxapi/relatedarticle"
- payload = json.dumps(
- {"content_link": content_link, "token": "401e4d3c85068bb5"}
- )
- response = requests.request(
- "POST", url=url, headers=cls.headers, data=payload, timeout=120
- )
- log(
- task="article_association_crawler",
- function="get_recommend_articles_v2",
- message="获取推荐链接,付费接口",
- data={"content_link": content_link, "response": response.json()},
- )
- time.sleep(3)
- return response.json()
|