|
@@ -1,10 +1,12 @@
|
|
|
"""
|
|
|
@author: luojunhui
|
|
|
"""
|
|
|
+import time
|
|
|
+
|
|
|
import requests
|
|
|
|
|
|
from applications.functions import title_sim_v2_by_list
|
|
|
-from applications.functions import get_article_title_url_list, get_article_titles
|
|
|
+from applications.functions import get_article_titles
|
|
|
|
|
|
|
|
|
class LongArticlesPipeline(object):
|
|
@@ -13,23 +15,38 @@ class LongArticlesPipeline(object):
|
|
|
"""
|
|
|
|
|
|
@classmethod
|
|
|
- def history_exists(cls, title, account_nickname, plan_name):
|
|
|
+ def history_title(cls, account_nickname):
|
|
|
"""
|
|
|
判断是否存储
|
|
|
- :param plan_name:
|
|
|
- :param title:
|
|
|
:param account_nickname:
|
|
|
:return:
|
|
|
"""
|
|
|
- if "【1】" in plan_name or "【2】" in plan_name:
|
|
|
- index_list = [1, 2]
|
|
|
- else:
|
|
|
- index_list = [1, 2, 3, 4, 5, 6, 7, 8]
|
|
|
- account_title_list = get_article_titles(
|
|
|
+ # if "【1】" in plan_name or "【2】" in plan_name:
|
|
|
+ index_list_1 = [1, 2]
|
|
|
+ index_list_2 = [1, 2, 3, 4, 5, 6, 7, 8]
|
|
|
+ account_title_list_1 = get_article_titles(
|
|
|
account_nickname,
|
|
|
- index_list=index_list
|
|
|
+ index_list=index_list_1
|
|
|
)
|
|
|
- print(account_title_list)
|
|
|
+ account_title_list_2 = get_article_titles(
|
|
|
+ account_nickname,
|
|
|
+ index_list=index_list_2
|
|
|
+ )
|
|
|
+ res = {
|
|
|
+ "rule_1": account_title_list_1,
|
|
|
+ "rule_2": account_title_list_2
|
|
|
+
|
|
|
+ }
|
|
|
+ return res
|
|
|
+
|
|
|
+ @classmethod
|
|
|
+ def history_exists(cls, title, account_title_list):
|
|
|
+ """
|
|
|
+ 判断文章是否历史已发布
|
|
|
+ :param title:
|
|
|
+ :param account_title_list:
|
|
|
+ :return:
|
|
|
+ """
|
|
|
sim_res = title_sim_v2_by_list(title, account_title_list)
|
|
|
if sim_res:
|
|
|
return True
|
|
@@ -77,16 +94,20 @@ class LongArticlesPipeline(object):
|
|
|
return response.json()['is_bad']
|
|
|
|
|
|
@classmethod
|
|
|
- def deal(cls, article_obj, account_name):
|
|
|
+ def deal(cls, article_obj, account_name, history_title_dict):
|
|
|
"""
|
|
|
+ :param history_title_dict:
|
|
|
:param account_name:
|
|
|
:param article_obj:
|
|
|
:return:
|
|
|
"""
|
|
|
+ a = time.time()
|
|
|
article_bad_flag = cls.article_bad(
|
|
|
title=article_obj['title'],
|
|
|
account_nickname=account_name
|
|
|
)
|
|
|
+ b = time.time()
|
|
|
+ print("历史低质量文章:", b - a)
|
|
|
if article_bad_flag:
|
|
|
response = {
|
|
|
"filterReason": "历史表现差的文章",
|
|
@@ -94,11 +115,18 @@ class LongArticlesPipeline(object):
|
|
|
}
|
|
|
return response
|
|
|
else:
|
|
|
+ c = time.time()
|
|
|
+ plan_name = article_obj['producePlanName']
|
|
|
+ if "【1】" in plan_name or "【2】" in plan_name:
|
|
|
+ history_title_list = history_title_dict['rule_1']
|
|
|
+ else:
|
|
|
+ history_title_list = history_title_dict['rule_2']
|
|
|
history_exists_flag = cls.history_exists(
|
|
|
title=article_obj['title'],
|
|
|
- account_nickname=account_name,
|
|
|
- plan_name=article_obj['producePlanName']
|
|
|
+ account_title_list=history_title_list
|
|
|
)
|
|
|
+ d = time.time()
|
|
|
+ print("历史已经发布文章:", d - c)
|
|
|
if history_exists_flag:
|
|
|
response = {
|
|
|
"filterReason": "历史已发布文章",
|
|
@@ -106,7 +134,10 @@ class LongArticlesPipeline(object):
|
|
|
}
|
|
|
return response
|
|
|
else:
|
|
|
+ e = time.time()
|
|
|
safe_flag = cls.article_safe(title=article_obj['title'])
|
|
|
+ f = time.time()
|
|
|
+ print("安全:", f - e)
|
|
|
if safe_flag:
|
|
|
response = {
|
|
|
"filterReason": "安全违规",
|