1 year ago · 15294c3643
--- a/dev/hksp_test_0515.py
+++ b/dev/hksp_test_0515.py
@@ -0,0 +1,71 @@
 
				+"""
			
 
				+@author: luojunhui
			
 
				+"""
			
 
				+import json
			
 
				+import pandas as pd
			
 
				+from applications.search import hksp_search
			
 
				+
			
 
				+
			
 
				+def ab_test(title):
			
 
				+    """
			
 
				+    :param title:
			
 
				+    :return:
			
 
				+    """
			
 
				+    recall_list = hksp_search(title)
			
 
				+    if recall_list:
			
 
				+        best_0 = recall_list[0]
			
 
				+        print(best_0['title'], best_0['playcnt'], best_0['like'] if best_0.get('like') else 0, best_0['comment'] if best_0.get("comment") else 0)
			
 
				+        ab_1 = sorted(recall_list, reverse=True, key=lambda x: int(x['playcnt']))[0]
			
 
				+        ab_2 = sorted(recall_list, reverse=True, key=lambda x: int(x['like'] if x.get('like') else 0))[0]
			
 
				+        ab_3 = sorted(recall_list, reverse=True, key=lambda x: int(x['comment'] if x.get("comment") else 0))[0]
			
 
				+        ab_4 = sorted(recall_list, reverse=True, key=lambda x: (int(x['like'] if x.get('like') else 0) / int(x['playcnt'])))[0]
			
 
				+        # print(ab_1['title'], ab_1['playcnt'], ab_1['like'] if ab_1.get('like') else 0, ab_1['comment'] if ab_1.get("comment") else 0)
			
 
				+        # print(ab_2['title'], ab_2['playcnt'], ab_2['like'] if ab_2.get('like') else 0, ab_2['comment'] if ab_2.get("comment") else 0)
			
 
				+        # print(ab_3['title'], ab_3['playcnt'], ab_3['like'] if ab_3.get('like') else 0, ab_3['comment'] if ab_3.get("comment") else 0)
			
 
				+        # print(ab_4['title'], ab_4['playcnt'], ab_4['like'] if ab_4.get('like') else 0, ab_4['comment'] if ab_4.get("comment") else 0)
			
 
				+        return [best_0, ab_1, ab_2, ab_3, ab_4]
			
 
				+    else:
			
 
				+        return []
			
 
				+
			
 
				+
			
 
				+title_list = [
			
 
				+    "菲再闯仁爱礁，中国海警船掀了炮衣，好话说尽，1号令立即实施",
			
 
				+    "乌克兰遭受严重损失！俄罗斯在西方国家大使馆展示摧毁的设备",
			
 
				+    "中方出重拳了，一句话把以色列挂在火上烤，就看它能否接住",
			
 
				+    "终于合作！中东国家大团结，以色列要完蛋",
			
 
				+    "【2】毛远新从监狱刑满释放后，对女儿十分愧疚，回忆往事时他偷偷落泪",
			
 
				+    "【2】韩国政局要变天，中国一年前的警示应验了，尹锡悦终究自食其果",
			
 
				+    "金正恩乘专列过江，为何故意绕开中国？原因有两个，值得我们警惕",
			
 
				+    "中方出手！给巴勒斯坦打去电话，“战狼”已至中东！",
			
 
				+    "中国不欠犹太人的，华春莹用双语发文，西方该感恩没资历道德绑架"
			
 
				+]
			
 
				+
			
 
				+with open("result.json", encoding="utf-8") as f:
			
 
				+    title_dict = json.loads(f.read())
			
 
				+
			
 
				+ooo = []
			
 
				+for line in title_list:
			
 
				+    print(line)
			
 
				+    c_title = title_dict[line]['c_title']
			
 
				+    c_keys = "# ".join(title_dict[line]["keys"])
			
 
				+    result = ab_test(line)
			
 
				+    if result:
			
 
				+        for index, item in enumerate(result):
			
 
				+            temp = [line, c_title, c_keys, "ab_{}".format(index), item['title'], item.get('playcnt', None), item.get('like', None), item.get('comment', None), item['playurl']]
			
 
				+            ooo.append(temp)
			
 
				+    else:
			
 
				+        sub_result = ab_test(title_dict[line]['c_title'])
			
 
				+        if sub_result:
			
 
				+            for index, item in enumerate(sub_result):
			
 
				+                temp = [line, c_title, c_keys, "ab_{}".format(index), item['title'], item.get('playcnt', None), item.get('like', None), item.get('comment', None), item['playurl']]
			
 
				+                print(temp)
			
 
				+                ooo.append(temp)
			
 
				+        else:
			
 
				+            ss_result = ab_test(title_dict[line]['keys'][0])
			
 
				+            for index, item in enumerate(ss_result):
			
 
				+                temp = [line, c_title, c_keys,  "ab_{}".format(index), item['title'], item.get('playcnt', None), item.get('like', None), item.get('comment', None), item['playurl']]
			
 
				+                print(temp)
			
 
				+                ooo.append(temp)
			
 
				+
			
 
				+df = pd.DataFrame(ooo, columns=['article_title', 'kimi_content_summary', 'kimi_content_keys', 'ab_test', 'out_title', 'views', 'like', 'comment', 'videoUrl'])
			
 
				+df.to_excel("baidu_test.xlsx", index=False)
			
--- a/dev/read_in.py
+++ b/dev/read_in.py
@@ -0,0 +1,17 @@
 
				+"""
			
 
				+@author: luojunhui
			
 
				+"""
			
 
				+import json
			
 
				+
			
 
				+file_path = 'ttt.txt'
			
 
				+
			
 
				+with open(file_path, encoding="utf-8") as f:
			
 
				+    data_lines = f.readlines()
			
 
				+
			
 
				+dy_c = 0
			
 
				+buy_c = 0
			
 
				+for line in data_lines:
			
 
				+    data = json.loads(json.loads(line[:-1])['data'])
			
 
				+    if data['productionPath']:
			
 
				+        if "20764105" in data['productionPath']:
			
 
				+            print(1)