12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273 |
- import BertEmbedding
- import MilvusComponent
- import ODPSQueryUtil
- from datetime import datetime
- def convert_to_timestamp(date_string):
- # 解析日期时间字符串
- dt = datetime.strptime(date_string, '%Y-%m-%d %H:%M:%S')
- # 转换为时间戳
- timestamp = dt.timestamp()
- return timestamp
- def insertEmbeddings(start_idx, limit):
- # 1. Get data from ODPS
- records = ODPSQueryUtil.query_video_title_perfermance(start_idx, limit)
- print(f"Got {len(records)} records")
- # 取出所有的videoid
- videoids = []
- title_vectors = []
- titles = []
- preview_times_arr = []
- preview_users_arr = []
- view_times_arr = []
- view_users_arr = []
- play_times_arr = []
- play_users_arr = []
- share_times_arr = []
- share_users_arr = []
- return_times_arr = []
- return_users_arr = []
- create_time_arr = []
- for record in records:
- try:
- videoids.append(record.videoid)
- title_vectors.append(BertEmbedding.text_to_vector(record.title))
- titles.append(record.title)
- preview_times_arr.append(int(record.preview_times))
- preview_users_arr.append(int(record.preview_users))
- view_times_arr.append(int(record.view_times))
- view_users_arr.append(int(record.view_users))
- play_times_arr.append(int(record.play_times))
- play_users_arr.append(int(record.play_users))
- share_times_arr.append(int(record.share_times))
- share_users_arr.append(int(record.share_users))
- return_times_arr.append(int(record.return_times))
- return_users_arr.append(int(record.return_users))
- create_time_arr.append(
- int(convert_to_timestamp(record.gmt_create)))
- except Exception as e:
- print(e)
- print(f"Got {len(videoids)} videoids embedding success")
- try:
- # 3. Insert embedding into Milvus
- data = [videoids, title_vectors, titles, preview_times_arr, preview_users_arr,
- view_times_arr, view_users_arr, play_times_arr, play_users_arr, share_times_arr,
- share_users_arr, return_times_arr, return_users_arr, create_time_arr]
- MilvusComponent.insert_vector(data)
- except Exception as e:
- print(e)
- print(f"Inserted {len(videoids)} videoids embedding success")
- if __name__ == '__main__':
- # for i in range(0, 2200000, 100):
- # y = i + 100
- # print(f"Inserting {i} to {y}")
- # insertEmbeddings(i, 100)
- # print(f"Inserted Over {i} to {y}")
- vector = BertEmbedding.text_to_vector("老人终会成为儿女的手下败将")
- print(vector)
|