task2.py 2.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. """
  2. @author: luojunhui
  3. """
  4. from applications import AIDTApi, DeNetMysql, PQMySQL, Functions
  5. from config import poolLevelConfig, cateMap, coldPoolArticlesNum
  6. class ColdStartTask(object):
  7. """
  8. 冷启分配任务
  9. """
  10. AidApi = AIDTApi()
  11. DeMysql = DeNetMysql()
  12. PqMysql = PQMySQL()
  13. Fun = Functions()
  14. pool4 = poolLevelConfig['1']
  15. @classmethod
  16. def getTopArticles(cls, category, limit_count):
  17. """
  18. 获取高分享的文章list
  19. :return:
  20. """
  21. sql = f"""
  22. select content_id, content_link, title
  23. from cold_start_article_pool
  24. where category = '{category}'
  25. order by view_count DESC, publish_time_stamp DESC
  26. limit {limit_count};
  27. """
  28. result = cls.PqMysql.select(sql)
  29. return result
  30. @classmethod
  31. def computeScore(cls):
  32. """
  33. 和每个账号计算相关性分数
  34. :return:
  35. """
  36. category_list = ["军事政法", "健康养生", "宗教历史"]
  37. L = []
  38. for category in category_list:
  39. limit_count = coldPoolArticlesNum * cateMap.get(category, 0.1)
  40. article_tuple = cls.getTopArticles(category, int(limit_count))
  41. title_list = [article[2] for article in article_tuple]
  42. score_list = cls.Fun.getTitleScore(title_list, "指尖奇文")['指尖奇文']['score_list']
  43. for index, score in enumerate(score_list):
  44. obj = {
  45. "id": article_tuple[index][0],
  46. "url": article_tuple[index][1],
  47. "title": article_tuple[index][2],
  48. "cate": category,
  49. "score": score
  50. }
  51. L.append(obj)
  52. result = [i for i in L if i['score'] >= 0.35]
  53. return result
  54. @classmethod
  55. def sendToColdPool(cls, plan_id, plan_name, plan_tag):
  56. """
  57. 把文章send至第四层
  58. :return:
  59. """
  60. result = cls.computeScore()
  61. army = [i for i in result if i['cate'] == '军事政法']
  62. healthy = [i for i in result if i['cate'] == '健康养生']
  63. history = [i for i in result if i['cate'] == '宗教历史']
  64. url_list = [i['url'] for i in result]
  65. # 再加一次配比,每个品类的数量占比
  66. cls.AidApi.updateArticleIntoCrawlerPlan(
  67. plan_id=plan_id,
  68. plan_name=plan_name,
  69. plan_tag=plan_tag,
  70. url_list=[i['url'] for i in result]
  71. )
  72. if __name__ == '__main__':
  73. CST = ColdStartTask()
  74. CST.sendToColdPool(
  75. plan_id=None,
  76. plan_name="冷启池子--0729--Monday--分品类抓取--6个品类",
  77. plan_tag="autoArticlePoolLevel1",
  78. )