hksp_test_0515.py 3.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. """
  2. @author: luojunhui
  3. """
  4. import json
  5. import pandas as pd
  6. from applications.search import hksp_search
  7. def ab_test(title):
  8. """
  9. :param title:
  10. :return:
  11. """
  12. recall_list = hksp_search(title)
  13. if recall_list:
  14. best_0 = recall_list[0]
  15. print(best_0['title'], best_0['playcnt'], best_0['like'] if best_0.get('like') else 0, best_0['comment'] if best_0.get("comment") else 0)
  16. ab_1 = sorted(recall_list, reverse=True, key=lambda x: int(x['playcnt']))[0]
  17. ab_2 = sorted(recall_list, reverse=True, key=lambda x: int(x['like'] if x.get('like') else 0))[0]
  18. ab_3 = sorted(recall_list, reverse=True, key=lambda x: int(x['comment'] if x.get("comment") else 0))[0]
  19. ab_4 = sorted(recall_list, reverse=True, key=lambda x: (int(x['like'] if x.get('like') else 0) / int(x['playcnt'])))[0]
  20. # print(ab_1['title'], ab_1['playcnt'], ab_1['like'] if ab_1.get('like') else 0, ab_1['comment'] if ab_1.get("comment") else 0)
  21. # print(ab_2['title'], ab_2['playcnt'], ab_2['like'] if ab_2.get('like') else 0, ab_2['comment'] if ab_2.get("comment") else 0)
  22. # print(ab_3['title'], ab_3['playcnt'], ab_3['like'] if ab_3.get('like') else 0, ab_3['comment'] if ab_3.get("comment") else 0)
  23. # print(ab_4['title'], ab_4['playcnt'], ab_4['like'] if ab_4.get('like') else 0, ab_4['comment'] if ab_4.get("comment") else 0)
  24. return [best_0, ab_1, ab_2, ab_3, ab_4]
  25. else:
  26. return []
  27. title_list = [
  28. "菲再闯仁爱礁,中国海警船掀了炮衣,好话说尽,1号令立即实施",
  29. "乌克兰遭受严重损失!俄罗斯在西方国家大使馆展示摧毁的设备",
  30. "中方出重拳了,一句话把以色列挂在火上烤,就看它能否接住",
  31. "终于合作!中东国家大团结,以色列要完蛋",
  32. "【2】毛远新从监狱刑满释放后,对女儿十分愧疚,回忆往事时他偷偷落泪",
  33. "【2】韩国政局要变天,中国一年前的警示应验了,尹锡悦终究自食其果",
  34. "金正恩乘专列过江,为何故意绕开中国?原因有两个,值得我们警惕",
  35. "中方出手!给巴勒斯坦打去电话,“战狼”已至中东!",
  36. "中国不欠犹太人的,华春莹用双语发文,西方该感恩没资历道德绑架"
  37. ]
  38. with open("result.json", encoding="utf-8") as f:
  39. title_dict = json.loads(f.read())
  40. ooo = []
  41. for line in title_list:
  42. print(line)
  43. c_title = title_dict[line]['c_title']
  44. c_keys = "# ".join(title_dict[line]["keys"])
  45. result = ab_test(line)
  46. if result:
  47. for index, item in enumerate(result):
  48. temp = [line, c_title, c_keys, "ab_{}".format(index), item['title'], item.get('playcnt', None), item.get('like', None), item.get('comment', None), item['playurl']]
  49. ooo.append(temp)
  50. else:
  51. sub_result = ab_test(title_dict[line]['c_title'])
  52. if sub_result:
  53. for index, item in enumerate(sub_result):
  54. temp = [line, c_title, c_keys, "ab_{}".format(index), item['title'], item.get('playcnt', None), item.get('like', None), item.get('comment', None), item['playurl']]
  55. print(temp)
  56. ooo.append(temp)
  57. else:
  58. ss_result = ab_test(title_dict[line]['keys'][0])
  59. for index, item in enumerate(ss_result):
  60. temp = [line, c_title, c_keys, "ab_{}".format(index), item['title'], item.get('playcnt', None), item.get('like', None), item.get('comment', None), item['playurl']]
  61. print(temp)
  62. ooo.append(temp)
  63. df = pd.DataFrame(ooo, columns=['article_title', 'kimi_content_summary', 'kimi_content_keys', 'ab_test', 'out_title', 'views', 'like', 'comment', 'videoUrl'])
  64. df.to_excel("baidu_test.xlsx", index=False)