1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586 |
- """
- @author: luojunhui
- """
- from applications import AIDTApi, DeNetMysql, PQMySQL, Functions
- from config import poolLevelConfig, cateMap, coldPoolArticlesNum
- class ColdStartTask(object):
- """
- 冷启分配任务
- """
- AidApi = AIDTApi()
- DeMysql = DeNetMysql()
- PqMysql = PQMySQL()
- Fun = Functions()
- pool4 = poolLevelConfig['1']
- @classmethod
- def getTopArticles(cls, category, limit_count):
- """
- 获取高分享的文章list
- :return:
- """
- sql = f"""
- select content_id, content_link, title
- from cold_start_article_pool
- where category = '{category}'
- order by view_count DESC, publish_time_stamp DESC
- limit {limit_count};
- """
- result = cls.PqMysql.select(sql)
- return result
- @classmethod
- def computeScore(cls):
- """
- 和每个账号计算相关性分数
- :return:
- """
- category_list = ["军事政法", "健康养生", "宗教历史"]
- L = []
- for category in category_list:
- limit_count = coldPoolArticlesNum * cateMap.get(category, 0.1)
- article_tuple = cls.getTopArticles(category, int(limit_count))
- title_list = [article[2] for article in article_tuple]
- score_list = cls.Fun.getTitleScore(title_list, "指尖奇文")['指尖奇文']['score_list']
- for index, score in enumerate(score_list):
- obj = {
- "id": article_tuple[index][0],
- "url": article_tuple[index][1],
- "title": article_tuple[index][2],
- "cate": category,
- "score": score
- }
- L.append(obj)
- result = [i for i in L if i['score'] >= 0.35]
- return result
- @classmethod
- def sendToColdPool(cls, plan_id, plan_name, plan_tag):
- """
- 把文章send至第四层
- :return:
- """
- result = cls.computeScore()
- army = [i for i in result if i['cate'] == '军事政法']
- healthy = [i for i in result if i['cate'] == '健康养生']
- history = [i for i in result if i['cate'] == '宗教历史']
- url_list = [i['url'] for i in result]
- # 再加一次配比,每个品类的数量占比
- cls.AidApi.updateArticleIntoCrawlerPlan(
- plan_id=plan_id,
- plan_name=plan_name,
- plan_tag=plan_tag,
- url_list=[i['url'] for i in result]
- )
- if __name__ == '__main__':
- CST = ColdStartTask()
- CST.sendToColdPool(
- plan_id=None,
- plan_name="冷启池子--0729--Monday--分品类抓取--6个品类",
- plan_tag="autoArticlePoolLevel1",
- )
|