task6.py 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. """
  2. @author: luojunhui
  3. 计算账号的阅读均值倍数
  4. """
  5. import json
  6. from pandas import DataFrame
  7. from tqdm import tqdm
  8. from applications import DeNetMysql
  9. from applications import AIDTApi
  10. D = DeNetMysql()
  11. def get_account_avg():
  12. """
  13. 获取账号
  14. :return:
  15. """
  16. with open("/Users/luojunhui/cyber/LongArticlesJob/dev/avg_new_health.json", encoding="utf-8") as f:
  17. avg_dict = json.loads(f.read())
  18. account_position_list = list(avg_dict.keys())
  19. L = []
  20. for account in tqdm(account_position_list):
  21. gh_id = account[:-2]
  22. index = int(account[-1:])
  23. select_sql = f"""
  24. select title, read_cnt, link from crawler_meta_article
  25. where out_account_id = '{gh_id}' and article_index = {index} and status = 1;
  26. """
  27. result_list = D.select(select_sql)
  28. try:
  29. avg_read = avg_dict[account]
  30. for i in result_list:
  31. title, read_cnt, link = i
  32. avg_score = read_cnt / avg_read
  33. temp = [title, link, read_cnt, avg_score]
  34. L.append(temp)
  35. except:
  36. continue
  37. sl = sorted(L, reverse=True, key=lambda x: x[3])
  38. a = 0
  39. b = 0
  40. LL = []
  41. for line in sl:
  42. title = line[0]
  43. read_cnt = line[2]
  44. if "农历" in title or '太极' in title or "节" in title or line[3] < 1.3 or len(title) < 15 or read_cnt < 5000:
  45. a += 1
  46. continue
  47. else:
  48. b += 1
  49. print(line)
  50. LL.append(line)
  51. print(a)
  52. print(b)
  53. df = DataFrame(LL, columns=["title", "link", "read", "read_avg"])
  54. df.to_excel("health_2.xlsx", index=False)
  55. # url_list = [i[1] for i in LL[3:]]
  56. # try:
  57. # AIDTApi().updateArticleIntoCrawlerPlan(
  58. # plan_id=None,
  59. # plan_name="历史冷启-0816-new",
  60. # plan_tag="autoArticlePoolLevel1",
  61. # url_list=url_list
  62. # )
  63. # except Exception as e:
  64. # print("error--{}".format(e))
  65. get_account_avg()