import BertEmbedding import MilvusComponent import ODPSQueryUtil from datetime import datetime def convert_to_timestamp(date_string): # 解析日期时间字符串 dt = datetime.strptime(date_string, '%Y-%m-%d %H:%M:%S') # 转换为时间戳 timestamp = dt.timestamp() return timestamp def insertEmbeddings(start_idx, limit): # 1. Get data from ODPS records = ODPSQueryUtil.query_video_title_perfermance(start_idx, limit) print(f"Got {len(records)} records") # 取出所有的videoid videoids = [] title_vectors = [] titles = [] preview_times_arr = [] preview_users_arr = [] view_times_arr = [] view_users_arr = [] play_times_arr = [] play_users_arr = [] share_times_arr = [] share_users_arr = [] return_times_arr = [] return_users_arr = [] create_time_arr = [] for record in records: try: videoids.append(record.videoid) title_vectors.append(BertEmbedding.text_to_vector(record.title)) titles.append(record.title) preview_times_arr.append(int(record.preview_times)) preview_users_arr.append(int(record.preview_users)) view_times_arr.append(int(record.view_times)) view_users_arr.append(int(record.view_users)) play_times_arr.append(int(record.play_times)) play_users_arr.append(int(record.play_users)) share_times_arr.append(int(record.share_times)) share_users_arr.append(int(record.share_users)) return_times_arr.append(int(record.return_times)) return_users_arr.append(int(record.return_users)) create_time_arr.append( int(convert_to_timestamp(record.gmt_create))) except Exception as e: print(e) print(f"Got {len(videoids)} videoids embedding success") try: # 3. Insert embedding into Milvus data = [videoids, title_vectors, titles, preview_times_arr, preview_users_arr, view_times_arr, view_users_arr, play_times_arr, play_users_arr, share_times_arr, share_users_arr, return_times_arr, return_users_arr, create_time_arr] MilvusComponent.insert_vector(data) except Exception as e: print(e) print(f"Inserted {len(videoids)} videoids embedding success") if __name__ == '__main__': # for i in range(0, 2200000, 100): # y = i + 100 # print(f"Inserting {i} to {y}") # insertEmbeddings(i, 100) # print(f"Inserted Over {i} to {y}") vector = BertEmbedding.text_to_vector("老人终会成为儿女的手下败将") print(vector)