task3.py 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. """
  2. @author: luojunhui
  3. """
  4. import datetime
  5. from tqdm import tqdm
  6. from applications import AIDTApi, DeNetMysql, PQMySQL, Functions, longArticlesMySQL
  7. from config import poolTagMap
  8. from stratrgy import ArticlePoolStrategy
  9. class SendToMultiLevels(object):
  10. """
  11. 流量池任务
  12. """
  13. AidApi = AIDTApi()
  14. DeMysql = DeNetMysql()
  15. PqMysql = PQMySQL()
  16. Fun = Functions()
  17. # Ad =
  18. @classmethod
  19. def getYesterdayData(cls):
  20. """
  21. 获取前一天数据表现
  22. :return:
  23. """
  24. sql = f"""
  25. select article_id, read_count from changwen_article_datastat
  26. where article_id in (
  27. select id from changwen_article
  28. where publish_timestamp >= 1722268800000
  29. ) and read_count > 100;
  30. """
  31. result = cls.Ad.select(sql=sql)
  32. response_list = [
  33. {
  34. "id": line[0],
  35. "read_count": line[1]
  36. } for line in result
  37. ]
  38. return response_list
  39. @classmethod
  40. def splitToDifferentPools(cls, yesterday_data):
  41. """
  42. 分类至Pools
  43. :return:
  44. """
  45. S = ArticlePoolStrategy()
  46. detail_list = S.getData(article_list=yesterday_data)
  47. result = S.splitByStrategy(detail_list=detail_list)
  48. return result
  49. @classmethod
  50. def sendToEachCrawlerPlan(cls, key, result_list):
  51. """
  52. :param result_list:
  53. :param key:
  54. :return:
  55. """
  56. # print(key)
  57. # print(len(result_list))
  58. # for index, i in enumerate(result_list):
  59. # print(index, "\t", i['level_rate'], "\t", i['title'], "\t", i['avg_read'], "\t", i['article_read'], "\t", i['key'])
  60. # print(url_list)
  61. # daily自动创建新抓取计划
  62. cls.AidApi.updateArticleIntoCrawlerPlan(
  63. plan_id=None,
  64. plan_name="流量池晋级--{}--{}".format(datetime.datetime.today().__str__().split(" ")[0], key),
  65. plan_tag=poolTagMap[key],
  66. url_list=[i['url'] for i in result_list]
  67. )
  68. @classmethod
  69. def sendToDifferentPools(cls, pool_info):
  70. """
  71. 获取文章url
  72. :return:
  73. """
  74. for key in pool_info:
  75. cls.sendToEachCrawlerPlan(key, pool_info[key])
  76. @classmethod
  77. def deal(cls):
  78. """
  79. Dealing function
  80. :return:
  81. """
  82. yesterday_data = cls.getYesterdayData()
  83. level_url_list_map = cls.splitToDifferentPools(yesterday_data)
  84. cls.sendToDifferentPools(pool_info=level_url_list_map)
  85. if __name__ == '__main__':
  86. S = SendToMultiLevels()
  87. S.deal()
  88. # yesterday_data = S.getYesterdayData()
  89. # for line in tqdm(yesterday_data):
  90. # print(line)