|  | @@ -1,10 +1,12 @@
 | 
	
		
			
				|  |  |  """
 | 
	
		
			
				|  |  |  @author: luojunhui
 | 
	
		
			
				|  |  |  """
 | 
	
		
			
				|  |  | +import time
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |  import requests
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  from applications.functions import title_sim_v2_by_list
 | 
	
		
			
				|  |  | -from applications.functions import get_article_title_url_list, get_article_titles
 | 
	
		
			
				|  |  | +from applications.functions import get_article_titles
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  class LongArticlesPipeline(object):
 | 
	
	
		
			
				|  | @@ -13,23 +15,38 @@ class LongArticlesPipeline(object):
 | 
	
		
			
				|  |  |      """
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      @classmethod
 | 
	
		
			
				|  |  | -    def history_exists(cls, title, account_nickname, plan_name):
 | 
	
		
			
				|  |  | +    def history_title(cls, account_nickname):
 | 
	
		
			
				|  |  |          """
 | 
	
		
			
				|  |  |          判断是否存储
 | 
	
		
			
				|  |  | -        :param plan_name:
 | 
	
		
			
				|  |  | -        :param title:
 | 
	
		
			
				|  |  |          :param account_nickname:
 | 
	
		
			
				|  |  |          :return:
 | 
	
		
			
				|  |  |          """
 | 
	
		
			
				|  |  | -        if "【1】" in plan_name or "【2】" in plan_name:
 | 
	
		
			
				|  |  | -            index_list = [1, 2]
 | 
	
		
			
				|  |  | -        else:
 | 
	
		
			
				|  |  | -            index_list = [1, 2, 3, 4, 5, 6, 7, 8]
 | 
	
		
			
				|  |  | -        account_title_list = get_article_titles(
 | 
	
		
			
				|  |  | +        # if "【1】" in plan_name or "【2】" in plan_name:
 | 
	
		
			
				|  |  | +        index_list_1 = [1, 2]
 | 
	
		
			
				|  |  | +        index_list_2 = [1, 2, 3, 4, 5, 6, 7, 8]
 | 
	
		
			
				|  |  | +        account_title_list_1 = get_article_titles(
 | 
	
		
			
				|  |  |              account_nickname,
 | 
	
		
			
				|  |  | -            index_list=index_list
 | 
	
		
			
				|  |  | +            index_list=index_list_1
 | 
	
		
			
				|  |  |          )
 | 
	
		
			
				|  |  | -        print(account_title_list)
 | 
	
		
			
				|  |  | +        account_title_list_2 = get_article_titles(
 | 
	
		
			
				|  |  | +            account_nickname,
 | 
	
		
			
				|  |  | +            index_list=index_list_2
 | 
	
		
			
				|  |  | +        )
 | 
	
		
			
				|  |  | +        res = {
 | 
	
		
			
				|  |  | +            "rule_1": account_title_list_1,
 | 
	
		
			
				|  |  | +            "rule_2": account_title_list_2
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        }
 | 
	
		
			
				|  |  | +        return res
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    @classmethod
 | 
	
		
			
				|  |  | +    def history_exists(cls, title, account_title_list):
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +        判断文章是否历史已发布
 | 
	
		
			
				|  |  | +        :param title:
 | 
	
		
			
				|  |  | +        :param account_title_list:
 | 
	
		
			
				|  |  | +        :return:
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  |          sim_res = title_sim_v2_by_list(title, account_title_list)
 | 
	
		
			
				|  |  |          if sim_res:
 | 
	
		
			
				|  |  |              return True
 | 
	
	
		
			
				|  | @@ -77,16 +94,20 @@ class LongArticlesPipeline(object):
 | 
	
		
			
				|  |  |          return response.json()['is_bad']
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      @classmethod
 | 
	
		
			
				|  |  | -    def deal(cls, article_obj, account_name):
 | 
	
		
			
				|  |  | +    def deal(cls, article_obj, account_name, history_title_dict):
 | 
	
		
			
				|  |  |          """
 | 
	
		
			
				|  |  | +        :param history_title_dict:
 | 
	
		
			
				|  |  |          :param account_name:
 | 
	
		
			
				|  |  |          :param article_obj:
 | 
	
		
			
				|  |  |          :return:
 | 
	
		
			
				|  |  |          """
 | 
	
		
			
				|  |  | +        a = time.time()
 | 
	
		
			
				|  |  |          article_bad_flag = cls.article_bad(
 | 
	
		
			
				|  |  |              title=article_obj['title'],
 | 
	
		
			
				|  |  |              account_nickname=account_name
 | 
	
		
			
				|  |  |          )
 | 
	
		
			
				|  |  | +        b = time.time()
 | 
	
		
			
				|  |  | +        print("历史低质量文章:", b - a)
 | 
	
		
			
				|  |  |          if article_bad_flag:
 | 
	
		
			
				|  |  |              response = {
 | 
	
		
			
				|  |  |                  "filterReason": "历史表现差的文章",
 | 
	
	
		
			
				|  | @@ -94,11 +115,18 @@ class LongArticlesPipeline(object):
 | 
	
		
			
				|  |  |              }
 | 
	
		
			
				|  |  |              return response
 | 
	
		
			
				|  |  |          else:
 | 
	
		
			
				|  |  | +            c = time.time()
 | 
	
		
			
				|  |  | +            plan_name = article_obj['producePlanName']
 | 
	
		
			
				|  |  | +            if "【1】" in plan_name or "【2】" in plan_name:
 | 
	
		
			
				|  |  | +                history_title_list = history_title_dict['rule_1']
 | 
	
		
			
				|  |  | +            else:
 | 
	
		
			
				|  |  | +                history_title_list = history_title_dict['rule_2']
 | 
	
		
			
				|  |  |              history_exists_flag = cls.history_exists(
 | 
	
		
			
				|  |  |                  title=article_obj['title'],
 | 
	
		
			
				|  |  | -                account_nickname=account_name,
 | 
	
		
			
				|  |  | -                plan_name=article_obj['producePlanName']
 | 
	
		
			
				|  |  | +                account_title_list=history_title_list
 | 
	
		
			
				|  |  |              )
 | 
	
		
			
				|  |  | +            d = time.time()
 | 
	
		
			
				|  |  | +            print("历史已经发布文章:", d - c)
 | 
	
		
			
				|  |  |              if history_exists_flag:
 | 
	
		
			
				|  |  |                  response = {
 | 
	
		
			
				|  |  |                      "filterReason": "历史已发布文章",
 | 
	
	
		
			
				|  | @@ -106,7 +134,10 @@ class LongArticlesPipeline(object):
 | 
	
		
			
				|  |  |                  }
 | 
	
		
			
				|  |  |                  return response
 | 
	
		
			
				|  |  |              else:
 | 
	
		
			
				|  |  | +                e = time.time()
 | 
	
		
			
				|  |  |                  safe_flag = cls.article_safe(title=article_obj['title'])
 | 
	
		
			
				|  |  | +                f = time.time()
 | 
	
		
			
				|  |  | +                print("安全:", f - e)
 | 
	
		
			
				|  |  |                  if safe_flag:
 | 
	
		
			
				|  |  |                      response = {
 | 
	
		
			
				|  |  |                          "filterReason": "安全违规",
 |