task3.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. """
  2. @author: luojunhui
  3. """
  4. import datetime
  5. from tqdm import tqdm
  6. from applications import AIDTApi, DeNetMysql, PQMySQL, Functions, ODPSApi
  7. from config import poolTagMap
  8. from stratrgy import ArticlePoolStrategy
  9. class SendToMultiLevels(object):
  10. """
  11. 冷启分配任务
  12. """
  13. AidApi = AIDTApi()
  14. DeMysql = DeNetMysql()
  15. PqMysql = PQMySQL()
  16. Fun = Functions()
  17. OA = ODPSApi()
  18. @classmethod
  19. def getYesterdayData(cls):
  20. """
  21. 获取前一天数据表现
  22. :return:
  23. """
  24. odps_sql = "select * from loghubods.changwen_article_datastat where dt = '20240724';"
  25. result = cls.OA.select(sql=odps_sql)
  26. response_list = [
  27. {
  28. "article_id": record["article_id"],
  29. "increase_read_count": record["increase_read_count"],
  30. "read_count": record["read_count"],
  31. "increase_income": record["increase_income"],
  32. "income": record["income"],
  33. "increase_share_count": record["increase_share_count"],
  34. "share_count": record["share_count"],
  35. "update_timestamp": record["update_timestamp"]
  36. } for record in result if record['increase_read_count'] >= 1000
  37. ]
  38. return response_list
  39. @classmethod
  40. def splitToDifferentPools(cls, yesterday_data):
  41. """
  42. 分类至Pools
  43. :return:
  44. """
  45. pool_level_1 = [cls.Fun.matchLinkById(i['article_id']) for i in tqdm(yesterday_data) if
  46. i['increase_read_count'] >= 9000]
  47. pool_level_2 = [cls.Fun.matchLinkById(i['article_id']) for i in tqdm(yesterday_data) if
  48. 3500 <= i['increase_read_count'] < 9000]
  49. pool_level_3 = [cls.Fun.matchLinkById(i['article_id']) for i in tqdm(yesterday_data) if
  50. 1000 <= i['increase_read_count'] < 3500]
  51. L = {
  52. "Level1": pool_level_1,
  53. "Level2": pool_level_2,
  54. "Level3": pool_level_3
  55. }
  56. return L
  57. @classmethod
  58. def sendToEachCrawlerPlan(cls, key, url_list):
  59. """
  60. :param key:
  61. :param url_list:
  62. :return:
  63. """
  64. # daily自动创建新抓取计划
  65. # cls.AidApi.updateArticleIntoCrawlerPlan(
  66. # plan_id=None,
  67. # plan_name="{}--{}".format(datetime.datetime.today().__str__().split(" ")[0], key),
  68. # plan_tag=poolTagMap[key],
  69. # url_list=url_list
  70. # )
  71. @classmethod
  72. def sendToDifferentPools(cls, pool_info):
  73. """
  74. 获取文章url
  75. :return:
  76. """
  77. for key in pool_info:
  78. cls.sendToEachCrawlerPlan(key, pool_info[key])
  79. @classmethod
  80. def deal(cls):
  81. """
  82. Dealing function
  83. :return:
  84. """
  85. yesterday_data = cls.getYesterdayData()
  86. level_url_list_map = cls.splitToDifferentPools(yesterday_data)
  87. cls.sendToDifferentPools(pool_info=level_url_list_map)
  88. STML = SendToMultiLevels()
  89. yesterday_data = STML.getYesterdayData()
  90. S = ArticlePoolStrategy()
  91. detail_list = S.getData(article_list=yesterday_data)
  92. S.splitByStrategy(detail_list=detail_list)