123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171 |
- """
- @author: luojunhui
- """
- import json
- import aiohttp
- from applications.articleTools import ArticleDBTools
- from applications.config import port
- class AccountServer(object):
- """
- 获取标题和公众号文章的相关性
- """
- def __init__(self, mysql_client, params):
- self.account_name_list = None
- self.gh_id_list = None
- self.sim_type = None
- self.interest_type = None
- self.min_time = None
- self.max_time = None
- self.rate = None
- self.title_list = None
- self.view_count_filter = None
- self.params = params
- self.AT = ArticleDBTools(mysql_client)
- async def request_for_nlp(self, title_list, account_interest, interest_weight):
- """
- nlp process
- """
- headers = {"Content-Type": "application/json"}
- url = "http://localhost:{}/nlp".format(port)
- body = {
- "data": {
- "text_list_a": [i.replace("'", "") for i in title_list],
- "text_list_b": [i.replace("'", "") for i in account_interest],
- "score_list_b": interest_weight,
- "symbol": 1,
- },
- "function": "similarities_cross_mean" if self.sim_type == "mean" else "similarities_cross_avg"
- }
- async with aiohttp.ClientSession() as session:
- async with session.post(url, headers=headers, json=body) as response:
- response_text = await response.text()
- # print("结果:\t", response_text)
- if response_text:
- return await response.json()
- else:
- print("Received empty response")
- return {}
- def check_params(self):
- """
- 校验传参
- :return:
- """
- try:
- self.title_list = self.params["text_list"]
- self.account_name_list = self.params.get("account_nickname_list", [])
- self.gh_id_list = self.params.get("gh_id_list", [])
- self.rate = self.params.get("rate", 0.1)
- self.max_time = self.params.get("max_time")
- self.min_time = self.params.get("min_time")
- self.interest_type = self.params.get("interest_type", "top")
- self.sim_type = self.params.get("sim_type", "mean")
- self.view_count_filter = self.params.get("view_count_filter", None)
- return None
- except Exception as e:
- response = {"error": "Params error", "detail": str(e)}
- return response
- async def get_account_interest(
- self,
- gh_id,
- interest_type,
- view_count_filter,
- rate=None,
- msg_type=None,
- index_list=None,
- min_time=None,
- max_time=None,
- ):
- """
- 获取账号的兴趣类型
- :param gh_id:
- :param max_time:
- :param min_time:
- :param index_list:
- :param msg_type:
- :param rate:
- :param interest_type:
- :param view_count_filter:
- :return:
- """
- good_df, bad_df = await self.AT.get_good_bad_articles(
- gh_id=gh_id,
- interest_type=interest_type,
- msg_type=msg_type,
- index_list=index_list,
- min_time=min_time,
- max_time=max_time,
- rate=rate,
- view_count_filter=view_count_filter,
- )
- extend_dicts = {
- 'view_count': good_df["show_view_count"].values.tolist(),
- }
- if 'view_count_avg' in good_df.columns:
- extend_dicts['view_count_rate'] = \
- (good_df["show_view_count"] / good_df["view_count_avg"]).values.tolist()
- account_interest = good_df["title"].values.tolist()
- return account_interest, extend_dicts
- async def get_each_account_score_list(self, gh_id):
- """
- 获取和单个账号的相关性分数
- :return:
- """
- try:
- account_interest, extend_dicts = await self.get_account_interest(
- gh_id=gh_id,
- interest_type=self.interest_type,
- rate=self.rate,
- view_count_filter=self.view_count_filter,
- min_time=self.min_time,
- max_time=self.max_time,
- )
- interest_weight = extend_dicts['view_count']
- if self.sim_type == "weighted_by_view_count_rate":
- interest_weight = extend_dicts['view_count_rate']
- response = await self.request_for_nlp(
- title_list=self.title_list,
- account_interest=account_interest,
- interest_weight=interest_weight
- )
- score_list_key = "score_list_mean" if self.sim_type == "mean" else "score_list_avg"
- return {
- "score_list": response[score_list_key],
- "text_list_max": response["text_list_max"],
- }
- except Exception as e:
- print(e)
- return {
- "score_list": [0] * len(self.title_list),
- "text_list_max": self.title_list,
- }
- async def get_account_list_score_list(self):
- """
- 获取AccountList中每一个账号的相关性分数
- :return:
- """
- response = {}
- for gh_id in self.gh_id_list:
- if response.get(gh_id):
- continue
- else:
- response[gh_id] = await self.get_each_account_score_list(gh_id=gh_id)
- return response
- async def deal(self):
- """
- Deal Function
- :return:
- """
- return (
- self.check_params() if self.check_params() else await self.get_account_list_score_list()
- )
|