123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162 |
- """
- @author: luojunhui
- """
- import time
- import requests
- from applications.functions import title_sim_v2_by_list, is_bad
- from applications.config import port
- class LongArticlesPipeline(object):
- """
- Long articles Pipeline
- """
- @classmethod
- def get_titles(cls, account_name, index_list):
- """
- :param account_name:
- :param index_list:
- :return:
- """
- print("开始请求")
- print(account_name)
- print(index_list)
- url = "http://localhost:{}/title_list".format(port)
- response = requests.request(
- "POST",
- url=url,
- headers={},
- json={
- "account_name": account_name,
- "index_list": index_list,
- "min_time": None,
- "max_time": None,
- "msg_type": "9"
- }
- )
- print(response.status_code)
- print(response.text)
- print(response.json())
- return response.json()['title_list']
- @classmethod
- def history_title(cls, account_nickname):
- """
- 判断是否存储
- :param account_nickname:
- :return:
- """
- # if "【1】" in plan_name or "【2】" in plan_name:
- index_list_1 = [1, 2]
- index_list_2 = [1, 2, 3, 4, 5, 6, 7, 8]
- print("开始请求")
- account_title_list_1 = cls.get_titles(
- account_nickname,
- index_list=index_list_1
- )
- print(account_title_list_1)
- account_title_list_2 = cls.get_titles(
- account_nickname,
- index_list=index_list_2
- )
- res = {
- "rule_1": account_title_list_1,
- "rule_2": account_title_list_2
- }
- return res
- @classmethod
- def history_exists(cls, title, account_title_list):
- """
- 判断文章是否历史已发布
- :param title:
- :param account_title_list:
- :return:
- """
- sim_res = title_sim_v2_by_list(title, account_title_list)
- if sim_res:
- return True
- return False
- @classmethod
- def article_safe(cls, title):
- """
- 判断文章是否安全
- """
- url = "http://61.48.133.26:8177/sensitive/is_sensitive"
- body = {
- "text": title
- }
- response = requests.post(
- url=url,
- json=body,
- headers={"Content-Type": "application/json"}
- )
- return response.json()['is_sensitive']
- @classmethod
- def article_bad(cls, title, account_nickname):
- """
- 判断该文章是否为劣质文章
- :param title:
- :param account_nickname:
- :return:
- """
- return is_bad(title)
- @classmethod
- def deal(cls, article_obj, account_name, history_title_dict):
- """
- :param history_title_dict:
- :param account_name:
- :param article_obj:
- :return:
- """
- a = time.time()
- article_bad_flag = cls.article_bad(
- title=article_obj['title'],
- account_nickname=account_name
- )
- b = time.time()
- print("历史低质量文章:", b - a)
- if article_bad_flag:
- response = {
- "filterReason": "历史表现差的文章",
- "status": True
- }
- return response
- else:
- c = time.time()
- plan_name = article_obj['producePlanName']
- if "【1】" in plan_name or "【2】" in plan_name:
- history_title_list = history_title_dict['rule_1']
- else:
- history_title_list = history_title_dict['rule_2']
- history_exists_flag = cls.history_exists(
- title=article_obj['title'],
- account_title_list=history_title_list
- )
- d = time.time()
- print("历史已经发布文章:", d - c)
- if history_exists_flag:
- response = {
- "filterReason": "历史已发布文章",
- "status": True
- }
- return response
- else:
- e = time.time()
- safe_flag = cls.article_safe(title=article_obj['title'])
- f = time.time()
- print("安全:", f - e)
- if safe_flag:
- response = {
- "filterReason": "安全违规",
- "status": True
- }
- return response
- else:
- return False
|