12345678910111213141516171819202122232425262728 |
- #coding utf-8
- import sys
- from gensim.models import word2vec
- if __name__=="__main__":
- f = open(sys.argv[1])
- arr = []
- num = 0
- while True:
- line = f.readline()
- if not line:
- break
- num = num+1
- if num == 1:
- continue
- items = line.strip().split("\t")
- #print(items)
- if len(items)<2:
- continue
- arr.append(items[1].split(" "))
- #print(arr)
- f.close()
- model = word2vec.Word2Vec(arr, vector_size=64, min_count=2,sg=1, workers=10)
- model.wv.save_word2vec_format('word2vec.txt',binary=False)
- #model.save('word2vec.model')
-
|