|
@@ -5,6 +5,7 @@ import json
|
|
|
|
|
|
import aiohttp
|
|
|
from applications.articleTools import ArticleDBTools
|
|
|
+from applications.config import port
|
|
|
|
|
|
|
|
|
class AccountServer(object):
|
|
@@ -14,26 +15,28 @@ class AccountServer(object):
|
|
|
|
|
|
def __init__(self, mysql_client, params):
|
|
|
self.account_name_list = None
|
|
|
+ self.gh_id_list = None
|
|
|
self.sim_type = None
|
|
|
self.interest_type = None
|
|
|
self.min_time = None
|
|
|
self.max_time = None
|
|
|
self.rate = None
|
|
|
self.title_list = None
|
|
|
+ self.view_count_filter = None
|
|
|
self.params = params
|
|
|
self.AT = ArticleDBTools(mysql_client)
|
|
|
|
|
|
- async def request_for_nlp(self, title_list, account_interest, account_weight):
|
|
|
+ async def request_for_nlp(self, title_list, account_interest, interest_weight):
|
|
|
"""
|
|
|
nlp process
|
|
|
"""
|
|
|
headers = {"Content-Type": "application/json"}
|
|
|
- url = "http://localhost:6060/nlp"
|
|
|
+ url = "http://localhost:{}/nlp".format(port)
|
|
|
body = {
|
|
|
"data": {
|
|
|
"text_list_a": [i.replace("'", "") for i in title_list],
|
|
|
"text_list_b": [i.replace("'", "") for i in account_interest],
|
|
|
- "score_list_b": account_weight,
|
|
|
+ "score_list_b": interest_weight,
|
|
|
"symbol": 1,
|
|
|
},
|
|
|
"function": "similarities_cross_mean" if self.sim_type == "mean" else "similarities_cross_avg"
|
|
@@ -41,14 +44,14 @@ class AccountServer(object):
|
|
|
async with aiohttp.ClientSession() as session:
|
|
|
async with session.post(url, headers=headers, json=body) as response:
|
|
|
response_text = await response.text()
|
|
|
- print("结果:\t", response_text)
|
|
|
+ # print("结果:\t", response_text)
|
|
|
if response_text:
|
|
|
return await response.json()
|
|
|
else:
|
|
|
print("Received empty response")
|
|
|
return {}
|
|
|
|
|
|
- def checkParams(self):
|
|
|
+ def check_params(self):
|
|
|
"""
|
|
|
校验传参
|
|
|
:return:
|
|
@@ -56,20 +59,23 @@ class AccountServer(object):
|
|
|
try:
|
|
|
self.title_list = self.params["text_list"]
|
|
|
self.account_name_list = self.params.get("account_nickname_list", [])
|
|
|
+ self.gh_id_list = self.params.get("gh_id_list", [])
|
|
|
self.rate = self.params.get("rate", 0.1)
|
|
|
self.max_time = self.params.get("max_time")
|
|
|
self.min_time = self.params.get("min_time")
|
|
|
self.interest_type = self.params.get("interest_type", "top")
|
|
|
self.sim_type = self.params.get("sim_type", "mean")
|
|
|
+ self.view_count_filter = self.params.get("view_count_filter", None)
|
|
|
return None
|
|
|
except Exception as e:
|
|
|
response = {"error": "Params error", "detail": str(e)}
|
|
|
return response
|
|
|
|
|
|
- async def getAccountInterest(
|
|
|
+ async def get_account_interest(
|
|
|
self,
|
|
|
- account_name,
|
|
|
- method,
|
|
|
+ gh_id,
|
|
|
+ interest_type,
|
|
|
+ view_count_filter,
|
|
|
rate=None,
|
|
|
msg_type=None,
|
|
|
index_list=None,
|
|
@@ -78,71 +84,81 @@ class AccountServer(object):
|
|
|
):
|
|
|
"""
|
|
|
获取账号的兴趣类型
|
|
|
- :param account_name:
|
|
|
+ :param gh_id:
|
|
|
:param max_time:
|
|
|
:param min_time:
|
|
|
:param index_list:
|
|
|
:param msg_type:
|
|
|
- :param keys_dict:
|
|
|
:param rate:
|
|
|
- :param gh_id:
|
|
|
- :param method:
|
|
|
+ :param interest_type:
|
|
|
+ :param view_count_filter:
|
|
|
:return:
|
|
|
"""
|
|
|
good_df, bad_df = await self.AT.get_good_bad_articles(
|
|
|
- account_name=account_name,
|
|
|
- method=method,
|
|
|
+ gh_id=gh_id,
|
|
|
+ interest_type=interest_type,
|
|
|
msg_type=msg_type,
|
|
|
index_list=index_list,
|
|
|
min_time=min_time,
|
|
|
max_time=max_time,
|
|
|
rate=rate,
|
|
|
+ view_count_filter=view_count_filter,
|
|
|
)
|
|
|
- view_count_list = good_df["show_view_count"].values.tolist()
|
|
|
- title_list = good_df["title"].values.tolist()
|
|
|
- print(view_count_list)
|
|
|
- print(title_list)
|
|
|
- return title_list, view_count_list
|
|
|
+ extend_dicts = {
|
|
|
+ 'view_count': good_df["show_view_count"].values.tolist(),
|
|
|
+ }
|
|
|
+ if 'view_count_avg' in good_df.columns:
|
|
|
+ extend_dicts['view_count_rate'] = \
|
|
|
+ (good_df["show_view_count"] / good_df["view_count_avg"]).values.tolist()
|
|
|
|
|
|
- async def getEachAccountScoreList(self, account_name):
|
|
|
+ account_interest = good_df["title"].values.tolist()
|
|
|
+ return account_interest, extend_dicts
|
|
|
+
|
|
|
+ async def get_each_account_score_list(self, gh_id):
|
|
|
"""
|
|
|
获取和单个账号的相关性分数
|
|
|
:return:
|
|
|
"""
|
|
|
- # try:
|
|
|
- account_interest, account_weight = await self.getAccountInterest(
|
|
|
- account_name=account_name,
|
|
|
- method=self.interest_type,
|
|
|
- rate=self.rate
|
|
|
- )
|
|
|
- sim_key = "score_list_mean" if self.sim_type == "mean" else "score_list_avg"
|
|
|
- response = await self.request_for_nlp(
|
|
|
- title_list=self.title_list,
|
|
|
- account_interest=account_interest,
|
|
|
- account_weight=account_weight
|
|
|
- )
|
|
|
- return {
|
|
|
- "score_list": response[sim_key],
|
|
|
- "text_list_max": response["text_list_max"],
|
|
|
- }
|
|
|
- # except Exception as e:
|
|
|
- # print(e)
|
|
|
- # return {
|
|
|
- # "score_list": [0] * len(self.title_list),
|
|
|
- # "text_list_max": self.title_list,
|
|
|
- # }
|
|
|
+ try:
|
|
|
+ account_interest, extend_dicts = await self.get_account_interest(
|
|
|
+ gh_id=gh_id,
|
|
|
+ interest_type=self.interest_type,
|
|
|
+ rate=self.rate,
|
|
|
+ view_count_filter=self.view_count_filter,
|
|
|
+ min_time=self.min_time,
|
|
|
+ max_time=self.max_time,
|
|
|
+ )
|
|
|
+ interest_weight = extend_dicts['view_count']
|
|
|
+ if self.sim_type == "weighted_by_view_count_rate":
|
|
|
+ interest_weight = extend_dicts['view_count_rate']
|
|
|
+ response = await self.request_for_nlp(
|
|
|
+ title_list=self.title_list,
|
|
|
+ account_interest=account_interest,
|
|
|
+ interest_weight=interest_weight
|
|
|
+ )
|
|
|
+ score_list_key = "score_list_mean" if self.sim_type == "mean" else "score_list_avg"
|
|
|
+ return {
|
|
|
+ "score_list": response[score_list_key],
|
|
|
+ "text_list_max": response["text_list_max"],
|
|
|
+ }
|
|
|
+ except Exception as e:
|
|
|
+ print(e)
|
|
|
+ return {
|
|
|
+ "score_list": [0] * len(self.title_list),
|
|
|
+ "text_list_max": self.title_list,
|
|
|
+ }
|
|
|
|
|
|
- async def getAccountListScoreList(self):
|
|
|
+ async def get_account_list_score_list(self):
|
|
|
"""
|
|
|
获取AccountList中每一个账号的相关性分数
|
|
|
:return:
|
|
|
"""
|
|
|
response = {}
|
|
|
- for accountName in self.account_name_list:
|
|
|
- if response.get(accountName):
|
|
|
+ for gh_id in self.gh_id_list:
|
|
|
+ if response.get(gh_id):
|
|
|
continue
|
|
|
else:
|
|
|
- response[accountName] = await self.getEachAccountScoreList(account_name=accountName)
|
|
|
+ response[gh_id] = await self.get_each_account_score_list(gh_id=gh_id)
|
|
|
return response
|
|
|
|
|
|
async def deal(self):
|
|
@@ -151,5 +167,5 @@ class AccountServer(object):
|
|
|
:return:
|
|
|
"""
|
|
|
return (
|
|
|
- self.checkParams() if self.checkParams() else await self.getAccountListScoreList()
|
|
|
+ self.check_params() if self.check_params() else await self.get_account_list_score_list()
|
|
|
)
|