|
@@ -5,8 +5,24 @@ from gensim.models import word2vec
|
|
|
|
|
|
if __name__=="__main__":
|
|
|
f = open(sys.argv[1])
|
|
|
+ arr = []
|
|
|
+ num = 0
|
|
|
while True:
|
|
|
- line = f.readlin()
|
|
|
+ line = f.readline()
|
|
|
if not line:
|
|
|
break
|
|
|
-
|
|
|
+ num = num+1
|
|
|
+ if num == 1:
|
|
|
+ continue
|
|
|
+ items = line.strip().split("\t")
|
|
|
+ print(items)
|
|
|
+ if len(items)<2:
|
|
|
+ continue
|
|
|
+ arr.append(items[1].split(" "))
|
|
|
+ print(arr)
|
|
|
+ f.close()
|
|
|
+ '''model = word2vec.Word2Vec(arr, vector_size=100, min_count=1,sg=1)
|
|
|
+ model.wv.save_word2vec_format('word2vec.txt',binary=False)'''
|
|
|
+ #model.save('word2vec.model')
|
|
|
+
|
|
|
+
|