12345678910111213141516171819202122232425262728 |
- import sys
- from gensim.models import word2vec
- if __name__=="__main__":
- f = open(sys.argv[1])
- arr = []
- num = 0
- while True:
- line = f.readline()
- if not line:
- break
- num = num+1
- if num == 1:
- continue
- items = line.strip().split("\t")
-
- if len(items)<2:
- continue
- arr.append(items[1].split(" "))
-
- f.close()
- model = word2vec.Word2Vec(arr, vector_size=64, min_count=2,sg=1, workers=10)
- model.wv.save_word2vec_format('word2vec.txt',binary=False)
-
-
|