# encoding: utf-8 import csv import numpy as np import time from bert_serving.client import BertClient begin_time = time.time() bc = BertClient() #bc = BertClient(ip='192.168.204.120', port=5555) csvFile2 = open("./embedding_semantic_videoTzld1116-alldata.csv",'w',newline='',encoding='utf-8') writer = csv.writer(csvFile2) csvRow1 = ['videoId', 'semantic_embedding'] writer.writerow(csvRow1) csvRow = [] is_title = True idx = 0 fileName = "./video_words20201115.csv" with open(fileName, "r") as csvFile1: reader = csv.reader(csvFile1) for line in reader: if is_title: is_title = False continue print("idx is: " + str(idx)) line_list = line video_id = line_list[0] print("video is is: " , video_id) vector_str = line_list[1] str1 = vector_str print("vector is: ") print(str1) data = [] data.append(vector_str) vectors = bc.encode(data) print("vectors.size is: ", vectors.size) print("vectors.shape is: ", vectors.shape) vectors_list = vectors.tolist() # str_list = str1.split(" ") # str_list = str1.strip('\n').split() # 第一种方法 # str_list = list(filter(None, str1.split(" "))) # 第二种方法 res_row = [] res_str = "" res_str += video_id + ':' res_str += str(vectors_list)[2: -2] res_row.append(res_str) writer.writerow(res_row) idx += 1 csvFile2.close() csvFile1.close() print("update csv file cost time is: " + str(time.time() - begin_time))