|
@@ -1,4 +1,4 @@
|
|
|
-from typing import Dict, Set, List
|
|
|
|
|
|
|
+from typing import Dict, Set, List, Tuple
|
|
|
|
|
|
|
|
from app.core.config import GlobalConfigSettings
|
|
from app.core.config import GlobalConfigSettings
|
|
|
from app.core.database import DatabaseManager
|
|
from app.core.database import DatabaseManager
|
|
@@ -6,11 +6,12 @@ from app.core.observability import LogService
|
|
|
|
|
|
|
|
from app.infra.mapper import LongArticleDatabaseMapper
|
|
from app.infra.mapper import LongArticleDatabaseMapper
|
|
|
from app.infra.mapper import PiaoquanCrawlerDatabaseMapper
|
|
from app.infra.mapper import PiaoquanCrawlerDatabaseMapper
|
|
|
-from app.infra.shared.tools import init_odps_client
|
|
|
|
|
|
|
+from app.infra.external import OdpsService
|
|
|
|
|
|
|
|
from app.recommend.offline_recommend.strategy import I2I
|
|
from app.recommend.offline_recommend.strategy import I2I
|
|
|
from app.recommend.offline_recommend.strategy import GetTopArticleStrategy
|
|
from app.recommend.offline_recommend.strategy import GetTopArticleStrategy
|
|
|
from app.recommend.offline_recommend.utils import RecommendApolloClient
|
|
from app.recommend.offline_recommend.utils import RecommendApolloClient
|
|
|
|
|
+from app.recommend.offline_recommend.utils import ProduceBaseData
|
|
|
|
|
|
|
|
|
|
|
|
|
class BaseOffRecommendUtils:
|
|
class BaseOffRecommendUtils:
|
|
@@ -21,15 +22,12 @@ class BaseOffRecommendUtils:
|
|
|
):
|
|
):
|
|
|
self.pool = pool
|
|
self.pool = pool
|
|
|
self.recommend_apollo_client = RecommendApolloClient(config=config)
|
|
self.recommend_apollo_client = RecommendApolloClient(config=config)
|
|
|
- self.odps_client = init_odps_client()
|
|
|
|
|
-
|
|
|
|
|
- # read from odps
|
|
|
|
|
- async def read_from_odps(self, query: str) -> List:
|
|
|
|
|
- with self.odps_client.execute_sql(query).open_reader() as reader:
|
|
|
|
|
- if reader:
|
|
|
|
|
- return [item for item in reader]
|
|
|
|
|
- else:
|
|
|
|
|
- return []
|
|
|
|
|
|
|
+ self.odps_client = OdpsService(
|
|
|
|
|
+ access_id="LTAIWYUujJAm7CbH",
|
|
|
|
|
+ secret_access_key="RfSjdiWwED1sGFlsjXv0DlfTnZTG1P",
|
|
|
|
|
+ endpoint="http://service.cn.maxcompute.aliyun.com/api",
|
|
|
|
|
+ project="loghubods",
|
|
|
|
|
+ )
|
|
|
|
|
|
|
|
# 获取全局过滤标题
|
|
# 获取全局过滤标题
|
|
|
async def get_global_filter_title(self) -> Set[str]:
|
|
async def get_global_filter_title(self) -> Set[str]:
|
|
@@ -57,10 +55,39 @@ class BaseOffRecommendUtils:
|
|
|
case _:
|
|
case _:
|
|
|
query = I2I.batch_base(title_list)
|
|
query = I2I.batch_base(title_list)
|
|
|
|
|
|
|
|
- recommend_articles = await self.read_from_odps(query)
|
|
|
|
|
|
|
+ recommend_articles = await self.odps_client.read_from_odps(query)
|
|
|
return recommend_articles
|
|
return recommend_articles
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
+class BaseOfflineDataProduce(BaseOffRecommendUtils):
|
|
|
|
|
+ """
|
|
|
|
|
+ 构建实验离线数据
|
|
|
|
|
+ """
|
|
|
|
|
+
|
|
|
|
|
+ def __init__(
|
|
|
|
|
+ self,
|
|
|
|
|
+ pool: DatabaseManager,
|
|
|
|
|
+ config: GlobalConfigSettings,
|
|
|
|
|
+ log_service: LogService,
|
|
|
|
|
+ ):
|
|
|
|
|
+ super().__init__(pool, config)
|
|
|
|
|
+ self.pool = pool
|
|
|
|
|
+ self.log_service = log_service
|
|
|
|
|
+
|
|
|
|
|
+ async def produce_article_data(self, account_tuple: Tuple[str]):
|
|
|
|
|
+ query = ProduceBaseData.article_unionid_mapper(account_tuple)
|
|
|
|
|
+ print(query)
|
|
|
|
|
+ await self.odps_client.execute_odps_query(query)
|
|
|
|
|
+
|
|
|
|
|
+ async def produce_title_data(self):
|
|
|
|
|
+ query2 = ProduceBaseData.title_unionid_mapper()
|
|
|
|
|
+ await self.odps_client.execute_odps_query(query2)
|
|
|
|
|
+
|
|
|
|
|
+ async def produce_i2i_table(self, dt: str):
|
|
|
|
|
+ query = ProduceBaseData.i2i_mapper(dt=dt)
|
|
|
|
|
+ await self.odps_client.execute_odps_query(query)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
class BaseOfflineRecommend(BaseOffRecommendUtils):
|
|
class BaseOfflineRecommend(BaseOffRecommendUtils):
|
|
|
def __init__(
|
|
def __init__(
|
|
|
self,
|
|
self,
|
|
@@ -134,13 +161,15 @@ class BaseOfflineRecommend(BaseOffRecommendUtils):
|
|
|
match strategy:
|
|
match strategy:
|
|
|
case "v1":
|
|
case "v1":
|
|
|
odps_query = GetTopArticleStrategy.strategy_v1(account_name)
|
|
odps_query = GetTopArticleStrategy.strategy_v1(account_name)
|
|
|
- top_articles = await self.read_from_odps(odps_query)
|
|
|
|
|
|
|
+ top_articles = await self.odps_client.read_from_odps(odps_query)
|
|
|
top_titles = [i.title for i in top_articles]
|
|
top_titles = [i.title for i in top_articles]
|
|
|
|
|
|
|
|
case "base":
|
|
case "base":
|
|
|
mysql_query = GetTopArticleStrategy.base()
|
|
mysql_query = GetTopArticleStrategy.base()
|
|
|
- top_articles = await LongArticleDatabaseMapper.Recommend.get_top_articles(
|
|
|
|
|
- pool=self.pool, query=mysql_query, gh_id=gh_id
|
|
|
|
|
|
|
+ top_articles = (
|
|
|
|
|
+ await LongArticleDatabaseMapper.Recommend.get_top_articles(
|
|
|
|
|
+ pool=self.pool, query=mysql_query, gh_id=gh_id
|
|
|
|
|
+ )
|
|
|
)
|
|
)
|
|
|
top_titles = [i["title"] for i in top_articles]
|
|
top_titles = [i["title"] for i in top_articles]
|
|
|
|
|
|