task6.py 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. """
  2. @author: luojunhui
  3. 计算账号的阅读均值倍数
  4. """
  5. import json
  6. import pandas as pd
  7. from pandas import DataFrame
  8. from tqdm import tqdm
  9. from applications import DeNetMysql
  10. from applications import AIDTApi
  11. D = DeNetMysql()
  12. def get_account_avg():
  13. """
  14. 获取账号
  15. :return:
  16. """
  17. # with open("/Users/luojunhui/cyber/LongArticlesJob/dev/军事历史.json", encoding="utf-8") as f:
  18. # avg_dict = json.loads(f.read())
  19. #
  20. # account_position_list = list(avg_dict.keys())
  21. # L = []
  22. # for account in tqdm(account_position_list):
  23. # gh_id = account[:-2]
  24. # index = int(account[-1:])
  25. # select_sql = f"""
  26. # select title, read_cnt, link from crawler_meta_article
  27. # where out_account_id = '{gh_id}' and article_index = {index} and status = 1;
  28. # """
  29. # result_list = D.select(select_sql)
  30. # try:
  31. # avg_read = avg_dict[account]['readAvg']
  32. # for i in result_list:
  33. # title, read_cnt, link = i
  34. # avg_score = read_cnt / avg_read
  35. # temp = [title, link, read_cnt, avg_score, avg_read, avg_dict[account]['category']]
  36. # L.append(temp)
  37. # except:
  38. # continue
  39. #
  40. # sl = sorted(L, reverse=True, key=lambda x: x[3])
  41. # a = 0
  42. # b = 0
  43. # LL = []
  44. # for line in sl:
  45. # title = line[0]
  46. # read_cnt = line[2]
  47. # if "农历" in title or '太极' in title or "节" in title or line[3] < 1.3 or len(title) < 15 or read_cnt < 5000:
  48. # a += 1
  49. # continue
  50. # else:
  51. # b += 1
  52. # print(line)
  53. # LL.append(line)
  54. # print(a)
  55. # print(b)
  56. # df = DataFrame(LL, columns=["title", "link", "read", "read_avg_times", "read_avg", "category"])
  57. # df.to_excel("historyArmy.xlsx", index=False)
  58. # url_list = [i[1] for i in LL[3:]]
  59. dataFrame = pd.read_excel("historyArmy.xlsx")
  60. print(dataFrame.columns.values.tolist())
  61. url_list = []
  62. for line in dataFrame.values.tolist():
  63. if line[-1] == '历史':
  64. url_list.append(line[1])
  65. print(len(url_list))
  66. try:
  67. AIDTApi().updateArticleIntoCrawlerPlan(
  68. plan_id=None,
  69. plan_name="历史冷启-0905-new",
  70. plan_tag="autoArticlePoolLevel1",
  71. url_list=url_list
  72. )
  73. except Exception as e:
  74. print("error--{}".format(e))
  75. get_account_avg()