1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980 |
- """
- @author: luojunhui
- """
- import requests
- from applications.functions import title_sim_v2_by_list
- from applications.functions import get_article_title_url_list
- class LongArticlesPipeline(object):
- """
- Long articles Pipeline
- """
- @classmethod
- def history_exists(cls, title, account_nickname, plan_name):
- """
- 判断是否存储
- :param plan_name:
- :param title:
- :param account_nickname:
- :return:
- """
- if "【1】" in plan_name or "【2】" in plan_name:
- index_list = [1, 2]
- else:
- index_list = [1, 2, 3, 4, 5, 6, 7, 8]
- account_title_list = get_article_title_url_list(
- account_nickname,
- index_list=index_list
- )
- sim_res = title_sim_v2_by_list(title, account_title_list)
- if sim_res:
- return True
- return False
- @classmethod
- def article_safe(cls, title):
- """
- 判断文章是否安全
- """
- url = "http://192.168.100.31:8177/sensitive/is_sensitive"
- body = {
- "text": title
- }
- print(body)
- response = requests.post(
- url=url,
- json=body,
- headers={"Content-Type": "application/json"}
- )
- return response.json()['is_sensitive']
- @classmethod
- def deal(cls, article_obj):
- """
- :param article_obj:
- :return:
- """
- history_exists_flag = cls.history_exists(
- title=article_obj['title'],
- account_nickname=article_obj['crawlerAccountName'],
- plan_name=article_obj['producePlanName']
- )
- if history_exists_flag:
- response = {
- "fileterReason": "历史已发布文章",
- "status": True
- }
- return response
- else:
- safe_flag = cls.article_safe(title=article_obj['title'])
- if safe_flag:
- response = {
- "fileterReason": "安全违规",
- "status": True
- }
- return response
- else:
- return False
|