|
@@ -191,16 +191,13 @@ def add_func2(initial_df, pre_h_df):
|
|
|
min_score = 0
|
|
|
initial_video_id_list = initial_df['videoid'].to_list()
|
|
|
pre_h_df = pre_h_df[pre_h_df['score'] > min_score]
|
|
|
- print(f"pre1: {len(pre_h_df)}\n{pre_h_df}")
|
|
|
pre_h_df = pre_h_df[~pre_h_df['videoid'].isin(initial_video_id_list)]
|
|
|
- print(f"pre2: {len(pre_h_df)}\n{pre_h_df}")
|
|
|
|
|
|
df = pd.concat([initial_df, pre_h_df], ignore_index=True)
|
|
|
# videoid去重,保留分值高
|
|
|
df['videoid'] = df['videoid'].astype(int)
|
|
|
df = df.sort_values(by=['score'], ascending=False)
|
|
|
df = df.drop_duplicates(subset=['videoid'], keep="first")
|
|
|
- print(f"initial = {len(initial_df)}, df = {len(df)}")
|
|
|
return df
|
|
|
|
|
|
|