extract.py 747 B

12345678910111213141516171819202122232425262728293031323334
  1. import pandas as pd
  2. import json
  3. path = "download.xlsx"
  4. name_set = set()
  5. id_set = set()
  6. name_id_dict = {}
  7. df_list = pd.read_excel(path, usecols=[17, 18]).values.tolist()
  8. for item in df_list:
  9. name_set.add(item[0])
  10. id_set.add(item[1])
  11. key = item[1]
  12. value = str(item[0])
  13. if name_id_dict.get(key):
  14. name_id_dict[key].add(value)
  15. else:
  16. name_id_dict[key] = set()
  17. name_id_dict[key].add(value)
  18. print(len(name_set))
  19. print(len(id_set))
  20. count = 0
  21. for key in name_id_dict:
  22. if len(name_id_dict[key]) > 1:
  23. print(key, "\t", name_id_dict[key])
  24. count += 1
  25. print(name_id_dict['MzUzMjMxNTE3Ng=='])
  26. # with open("fake_id.json", "w", encoding="utf-8") as f:
  27. # f.write(json.dumps(list(id_set)))