task6.py 2.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. """
  2. @author: luojunhui
  3. 计算账号的阅读均值倍数
  4. """
  5. import json
  6. from pandas import DataFrame
  7. from applications import DeNetMysql
  8. from applications import AIDTApi
  9. D = DeNetMysql()
  10. def get_accounts():
  11. """
  12. 获取账号
  13. :return:
  14. """
  15. sql = f"""select account_id from long_article_accounts_outside where category = '军事政法';"""
  16. account_list = D.select(sql)
  17. account_list_ = []
  18. for account in account_list:
  19. account_list_.append(account[0])
  20. return account_list_
  21. def get_account_avg():
  22. """
  23. 获取账号
  24. :return:
  25. """
  26. with open("avg.json", encoding="utf-8") as f:
  27. avg_dict = json.loads(f.read())
  28. account_list = get_accounts()
  29. L = []
  30. for account in account_list:
  31. select_sql = f"""
  32. select title, read_cnt, link from crawler_meta_article
  33. where out_account_id = '{account}';
  34. """
  35. result_list = D.select(select_sql)
  36. try:
  37. avg_read = avg_dict[account]
  38. for i in result_list:
  39. title, read_cnt, link = i
  40. avg_score = read_cnt / avg_read
  41. temp = [title, link, read_cnt, avg_score]
  42. L.append(temp)
  43. except:
  44. continue
  45. sl = sorted(L, reverse=True, key=lambda x: x[3])
  46. a = 0
  47. b = 0
  48. LL = []
  49. for line in sl:
  50. title = line[0]
  51. read_cnt = line[2]
  52. if "农历" in title or "节" in title or line[3] < 1.3 or len(title) < 15 or read_cnt < 1000:
  53. a += 1
  54. continue
  55. else:
  56. b += 1
  57. LL.append(line)
  58. # print(a)
  59. # print(b)
  60. # df = DataFrame(LL, columns=["title", "link", "read", "read_avg"])
  61. # df.to_excel("test.xlsx", index=False)
  62. # url_list = [i[1] for i in LL]
  63. # try:
  64. # AIDTApi().updateArticleIntoCrawlerPlan(
  65. # plan_id=None,
  66. # plan_name="军事政法类冷启-0805-new",
  67. # plan_tag="autoArticlePoolLevel1",
  68. # url_list=url_list
  69. # )
  70. # except Exception as e:
  71. # print("error--{}".format(e))
  72. get_account_avg()