|
@@ -1,6 +1,7 @@
|
|
|
"""
|
|
|
@author: luojunhui
|
|
|
"""
|
|
|
+
|
|
|
import random
|
|
|
import time
|
|
|
|
|
@@ -23,7 +24,7 @@ def deduplication(rank1, rank2, rank3):
|
|
|
result = []
|
|
|
if item_list:
|
|
|
for item in item_list:
|
|
|
- title = item['title']
|
|
|
+ title = item["title"]
|
|
|
if title_sim_v2_by_list(title, dup_list):
|
|
|
# print("标题重复,已经过滤\t", title)
|
|
|
continue
|
|
@@ -63,11 +64,13 @@ class AccountArticleRank(object):
|
|
|
"""
|
|
|
self.publishArticleList = []
|
|
|
self.filter_list = []
|
|
|
- history_title_dict = self.pipeline.history_title(account_nickname=self.accountName)
|
|
|
- for item in tqdm(self.params['publishArticleList']):
|
|
|
+ history_title_dict = self.pipeline.history_title(
|
|
|
+ account_nickname=self.accountName
|
|
|
+ )
|
|
|
+ for item in tqdm(self.params["publishArticleList"]):
|
|
|
flag = self.pipeline.deal(item, self.accountName, history_title_dict)
|
|
|
if flag:
|
|
|
- item['filterReason'] = flag['filterReason']
|
|
|
+ item["filterReason"] = flag["filterReason"]
|
|
|
self.filter_list.append(item)
|
|
|
else:
|
|
|
self.publishArticleList.append(item)
|
|
@@ -86,11 +89,7 @@ class AccountArticleRank(object):
|
|
|
self.publishNum = self.params["publishNum"]
|
|
|
print("开始校验参数")
|
|
|
self.filter()
|
|
|
- self.logger.log(
|
|
|
- code="1001",
|
|
|
- msg="参数校验成功",
|
|
|
- data=self.params
|
|
|
- )
|
|
|
+ self.logger.log(code="1001", msg="参数校验成功", data=self.params)
|
|
|
return None
|
|
|
except Exception as e:
|
|
|
response = {
|
|
@@ -99,9 +98,7 @@ class AccountArticleRank(object):
|
|
|
"code": 0,
|
|
|
}
|
|
|
self.logger.log(
|
|
|
- code="1002",
|
|
|
- msg="参数校验失败--{}".format(e),
|
|
|
- data=self.params
|
|
|
+ code="1002", msg="参数校验失败--{}".format(e), data=self.params
|
|
|
)
|
|
|
return response
|
|
|
|
|
@@ -111,10 +108,17 @@ class AccountArticleRank(object):
|
|
|
:return:
|
|
|
"""
|
|
|
# 第一步把所有文章标题分为3组
|
|
|
- article_list1_ori = [i for i in self.publishArticleList if "【1】" in i['producePlanName']]
|
|
|
- article_list2_ori = [i for i in self.publishArticleList if "【2】" in i['producePlanName']]
|
|
|
- article_list3_ori = [i for i in self.publishArticleList if
|
|
|
- not i in article_list1_ori and not i in article_list2_ori]
|
|
|
+ article_list1_ori = [
|
|
|
+ i for i in self.publishArticleList if "【1】" in i["producePlanName"]
|
|
|
+ ]
|
|
|
+ article_list2_ori = [
|
|
|
+ i for i in self.publishArticleList if "【2】" in i["producePlanName"]
|
|
|
+ ]
|
|
|
+ article_list3_ori = [
|
|
|
+ i
|
|
|
+ for i in self.publishArticleList
|
|
|
+ if not i in article_list1_ori and not i in article_list2_ori
|
|
|
+ ]
|
|
|
|
|
|
# # 全局去重,保留优先级由 L1 --> L2 --> L3
|
|
|
# hash_map = {}
|
|
@@ -150,22 +154,24 @@ class AccountArticleRank(object):
|
|
|
if article_list1_ori:
|
|
|
rank1 = ArticleRank().rank(
|
|
|
account_list=[self.accountName],
|
|
|
- text_list=[i['title'] for i in article_list1_ori]
|
|
|
+ text_list=[i["title"] for i in article_list1_ori],
|
|
|
)
|
|
|
- score_list1 = rank1[self.accountName]['score_list']
|
|
|
+ score_list1 = rank1[self.accountName]["score_list"]
|
|
|
ranked_1 = []
|
|
|
for index, value in enumerate(score_list1):
|
|
|
obj = article_list1_ori[index]
|
|
|
- obj['score'] = value + 1000
|
|
|
+ obj["score"] = value + 1000
|
|
|
ranked_1.append(obj)
|
|
|
- ranked_1 = sorted(ranked_1, key=lambda x: x['score'], reverse=True)
|
|
|
+ ranked_1 = sorted(ranked_1, key=lambda x: x["score"], reverse=True)
|
|
|
else:
|
|
|
ranked_1 = []
|
|
|
# rank2
|
|
|
if article_list2_ori:
|
|
|
for item in article_list2_ori:
|
|
|
- item['score'] = 100
|
|
|
- ranked_2 = sorted(article_list2_ori, key=lambda x: x['crawlerViewCount'], reverse=True)
|
|
|
+ item["score"] = 100
|
|
|
+ ranked_2 = sorted(
|
|
|
+ article_list2_ori, key=lambda x: x["crawlerViewCount"], reverse=True
|
|
|
+ )
|
|
|
else:
|
|
|
ranked_2 = []
|
|
|
|
|
@@ -173,25 +179,21 @@ class AccountArticleRank(object):
|
|
|
if article_list3_ori:
|
|
|
rank3 = ArticleRank().rank(
|
|
|
account_list=[self.accountName],
|
|
|
- text_list=[i['title'] for i in article_list3_ori]
|
|
|
+ text_list=[i["title"] for i in article_list3_ori],
|
|
|
)
|
|
|
- score_list3 = rank3[self.accountName]['score_list']
|
|
|
+ score_list3 = rank3[self.accountName]["score_list"]
|
|
|
ranked_3 = []
|
|
|
for index, value in enumerate(score_list3):
|
|
|
obj = article_list3_ori[index]
|
|
|
- obj['score'] = value
|
|
|
+ obj["score"] = value
|
|
|
ranked_3.append(obj)
|
|
|
- ranked_3 = sorted(ranked_3, key=lambda x: x['score'], reverse=True)
|
|
|
+ ranked_3 = sorted(ranked_3, key=lambda x: x["score"], reverse=True)
|
|
|
else:
|
|
|
ranked_3 = []
|
|
|
self.logger.log(
|
|
|
code="1004",
|
|
|
msg="排序完成",
|
|
|
- data={
|
|
|
- "rank1": ranked_1,
|
|
|
- "rank2": ranked_2,
|
|
|
- "rank3": ranked_3
|
|
|
- }
|
|
|
+ data={"rank1": ranked_1, "rank2": ranked_2, "rank3": ranked_3},
|
|
|
)
|
|
|
return ranked_1, ranked_2, ranked_3
|
|
|
|
|
@@ -203,7 +205,9 @@ class AccountArticleRank(object):
|
|
|
print("开始排序")
|
|
|
try:
|
|
|
ranked_1_d, ranked_2_d, ranked_3_d = await self.basic_rank()
|
|
|
- ranked_1, ranked_2, ranked_3 = deduplication(ranked_1_d, ranked_2_d, ranked_3_d)
|
|
|
+ ranked_1, ranked_2, ranked_3 = deduplication(
|
|
|
+ ranked_1_d, ranked_2_d, ranked_3_d
|
|
|
+ )
|
|
|
print("去重成功")
|
|
|
try:
|
|
|
L = []
|
|
@@ -229,14 +233,10 @@ class AccountArticleRank(object):
|
|
|
"ghId": self.ghId,
|
|
|
"strategy": self.strategy,
|
|
|
"publishNum": self.publishNum,
|
|
|
- "rank_list": L[:self.publishNum],
|
|
|
- "filter_list": self.filter_list
|
|
|
+ "rank_list": L[: self.publishNum],
|
|
|
+ "filter_list": self.filter_list,
|
|
|
}
|
|
|
- self.logger.log(
|
|
|
- code=1006,
|
|
|
- msg="rank successfully",
|
|
|
- data=result
|
|
|
- )
|
|
|
+ self.logger.log(code=1006, msg="rank successfully", data=result)
|
|
|
response = {"status": "Rank Success", "data": result, "code": 1}
|
|
|
except Exception as e:
|
|
|
result = {
|
|
@@ -246,12 +246,10 @@ class AccountArticleRank(object):
|
|
|
"strategy": self.strategy,
|
|
|
"publishNum": self.publishNum,
|
|
|
"rank_list": self.publishArticleList[: self.publishNum],
|
|
|
- "filter_list": self.filter_list
|
|
|
+ "filter_list": self.filter_list,
|
|
|
}
|
|
|
self.logger.log(
|
|
|
- code=1007,
|
|
|
- msg="rank failed because of {}".format(e),
|
|
|
- data=result
|
|
|
+ code=1007, msg="rank failed because of {}".format(e), data=result
|
|
|
)
|
|
|
print("排序成功")
|
|
|
response = {"status": "Rank Fail", "data": result, "code": 1}
|
|
@@ -295,34 +293,19 @@ class AccountArticleRank(object):
|
|
|
"""
|
|
|
match self.strategy:
|
|
|
case "ArticleRankV1":
|
|
|
- self.logger.log(
|
|
|
- code="1003",
|
|
|
- msg="命中排序策略1"
|
|
|
- )
|
|
|
+ self.logger.log(code="1003", msg="命中排序策略1")
|
|
|
return await self.rank_v1()
|
|
|
case "ArticleRankV2":
|
|
|
- self.logger.log(
|
|
|
- code="1003",
|
|
|
- msg="命中排序策略2"
|
|
|
- )
|
|
|
+ self.logger.log(code="1003", msg="命中排序策略2")
|
|
|
return await self.rank_v2()
|
|
|
case "ArticleRankV3":
|
|
|
- self.logger.log(
|
|
|
- code="1003",
|
|
|
- msg="命中排序策略3"
|
|
|
- )
|
|
|
+ self.logger.log(code="1003", msg="命中排序策略3")
|
|
|
return await self.rank_v3()
|
|
|
case "ArticleRankV4":
|
|
|
- self.logger.log(
|
|
|
- code="1003",
|
|
|
- msg="命中排序策略4"
|
|
|
- )
|
|
|
+ self.logger.log(code="1003", msg="命中排序策略4")
|
|
|
return await self.rank_v4()
|
|
|
case "ArticleRankV5":
|
|
|
- self.logger.log(
|
|
|
- code="1003",
|
|
|
- msg="命中排序策略5"
|
|
|
- )
|
|
|
+ self.logger.log(code="1003", msg="命中排序策略5")
|
|
|
return await self.rank_v5()
|
|
|
|
|
|
async def deal(self):
|