|
@@ -11,80 +11,83 @@ class WeixinSpider(object):
|
|
|
"""
|
|
|
Update account articles
|
|
|
"""
|
|
|
- ip = "8.217.190.241"
|
|
|
+ # ip = "8.217.190.241"
|
|
|
# ip = "47.98.154.124"
|
|
|
- port = "8888"
|
|
|
+ # port = "8888"
|
|
|
+ base_url = "http://crawler-cn.aiddit.com/crawler/wei_xin"
|
|
|
+ headers = {
|
|
|
+ "Content-Type": "application/json"
|
|
|
+ }
|
|
|
|
|
|
@classmethod
|
|
|
@retryOnNone()
|
|
|
- def search_articles(cls, title):
|
|
|
+ def search_articles(cls, title) -> dict:
|
|
|
"""
|
|
|
search articles in wx
|
|
|
:return:
|
|
|
"""
|
|
|
- url = "http://{}:{}/crawler/wei_xin/keyword".format(cls.ip, cls.port)
|
|
|
+ url = "{}/keyword".format(cls.base_url)
|
|
|
payload = json.dumps({
|
|
|
"keyword": title,
|
|
|
"cursor": "1"
|
|
|
})
|
|
|
- headers = {
|
|
|
- 'Content-Type': 'application/json'
|
|
|
- }
|
|
|
-
|
|
|
- response = requests.request("POST", url, headers=headers, data=payload)
|
|
|
+ response = requests.request("POST", url, headers=cls.headers, data=payload)
|
|
|
return response.json()
|
|
|
|
|
|
@classmethod
|
|
|
- # @retryOnNone()
|
|
|
- def get_article_text(cls, content_link):
|
|
|
+ def get_article_text(cls, content_link, is_count=False) -> dict:
|
|
|
"""
|
|
|
获取文章
|
|
|
+ :param is_count:
|
|
|
:param content_link:
|
|
|
:return:
|
|
|
"""
|
|
|
- url = "http://{}:{}/crawler/wei_xin/detail".format(cls.ip, cls.port)
|
|
|
+ url = "{}/detail".format(cls.base_url)
|
|
|
payload = json.dumps({
|
|
|
"content_link": content_link,
|
|
|
- "is_count": False,
|
|
|
+ "is_count": is_count,
|
|
|
"is_ad": False
|
|
|
})
|
|
|
- headers = {
|
|
|
- 'Content-Type': 'application/json'
|
|
|
- }
|
|
|
- response = requests.request("POST", url, headers=headers, data=payload)
|
|
|
+ response = requests.request("POST", url, headers=cls.headers, data=payload)
|
|
|
return response.json()
|
|
|
|
|
|
@classmethod
|
|
|
@retryOnNone()
|
|
|
- def update_msg_list(cls, ghId, index):
|
|
|
+ def update_msg_list(cls, ghId, index) -> dict:
|
|
|
"""
|
|
|
:return:
|
|
|
"""
|
|
|
- url = 'http://{}:{}/crawler/wei_xin/blogger'.format(cls.ip, cls.port)
|
|
|
+ url = '{}/blogger'.format(cls.base_url)
|
|
|
payload = {
|
|
|
'account_id': ghId,
|
|
|
'cursor': index,
|
|
|
}
|
|
|
- headers = {
|
|
|
- 'Content-Type': 'application/json'
|
|
|
- }
|
|
|
- response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=120)
|
|
|
- # print("response", response.text)
|
|
|
+ response = requests.post(url=url, headers=cls.headers, data=json.dumps(payload), timeout=120)
|
|
|
return response.json()
|
|
|
|
|
|
@classmethod
|
|
|
@retryOnNone()
|
|
|
- def get_account_by_url(cls, content_url):
|
|
|
+ def get_account_by_url(cls, content_url) -> dict:
|
|
|
"""
|
|
|
通过文章获取账号信息
|
|
|
:param content_url:
|
|
|
:return:
|
|
|
"""
|
|
|
- response = requests.request(
|
|
|
- "POST",
|
|
|
- url='http://{}:{}/crawler/wei_xin/account_info'.format(cls.ip, cls.port),
|
|
|
- headers={'Content-Type': 'application/json'},
|
|
|
- json={"content_link": content_url}
|
|
|
- )
|
|
|
+ url = '{}/account_info'.format(cls.base_url)
|
|
|
+ data = {"content_link": content_url}
|
|
|
+ response = requests.request("POST", url=url, headers=cls.headers, json=data, timeout=120)
|
|
|
return response.json()
|
|
|
|
|
|
+ @classmethod
|
|
|
+ def get_recommend_articles(cls, content_link) -> dict:
|
|
|
+ """
|
|
|
+ use content link to get recommend articles
|
|
|
+ :param content_link:
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ url = "{}/recommend".format(cls.base_url)
|
|
|
+ payload = json.dumps(
|
|
|
+ {"content_link": content_link}
|
|
|
+ )
|
|
|
+ response = requests.request("POST", url=url, headers=cls.headers, data=payload, timeout=120)
|
|
|
+ return response.json()
|