""" @author: luojunhui """ import requests from applications.functions import title_sim_v2_by_list from applications.functions import get_article_title_url_list class LongArticlesPipeline(object): """ Long articles Pipeline """ @classmethod def history_exists(cls, title, account_nickname, plan_name): """ 判断是否存储 :param plan_name: :param title: :param account_nickname: :return: """ if "【1】" in plan_name or "【2】" in plan_name: index_list = [1, 2] else: index_list = [1, 2, 3, 4, 5, 6, 7, 8] account_title_list = get_article_title_url_list( account_nickname, index_list=index_list ) sim_res = title_sim_v2_by_list(title, account_title_list) if sim_res: return True return False @classmethod def article_safe(cls, title): """ 判断文章是否安全 """ url = "http://192.168.100.31:8177/sensitive/is_sensitive" body = { "text": title } print(body) response = requests.post( url=url, json=body, headers={"Content-Type": "application/json"} ) return response.json()['is_sensitive'] @classmethod def deal(cls, article_obj): """ :param article_obj: :return: """ history_exists_flag = cls.history_exists( title=article_obj['title'], account_nickname=article_obj['crawlerAccountName'], plan_name=article_obj['producePlanName'] ) if history_exists_flag: response = { "fileterReason": "历史已发布文章", "status": True } return response else: safe_flag = cls.article_safe(title=article_obj['title']) if safe_flag: response = { "fileterReason": "安全违规", "status": True } return response else: return False