word2vec.py 648 B

12345678910111213141516171819202122232425262728
  1. #coding utf-8
  2. import sys
  3. from gensim.models import word2vec
  4. if __name__=="__main__":
  5. f = open(sys.argv[1])
  6. arr = []
  7. num = 0
  8. while True:
  9. line = f.readline()
  10. if not line:
  11. break
  12. num = num+1
  13. if num == 1:
  14. continue
  15. items = line.strip().split("\t")
  16. #print(items)
  17. if len(items)<2:
  18. continue
  19. arr.append(items[1].split(" "))
  20. #print(arr)
  21. f.close()
  22. model = word2vec.Word2Vec(arr, vector_size=64, min_count=2,sg=1, workers=10)
  23. model.wv.save_word2vec_format('word2vec.txt',binary=False)
  24. #model.save('word2vec.model')