migrate_file_to_db.py 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. """
  2. @author: luojunhui
  3. """
  4. import os
  5. import json
  6. import pymysql
  7. from tqdm import tqdm
  8. def insert_into_mysql(path):
  9. """
  10. :param path: 文件路径
  11. :return:
  12. {
  13. "channel": 5,
  14. "channel_account_id": "72ed4e3ca6c846cba40e5b736387c760",
  15. "xhs_id": null,
  16. "dy_id": null,
  17. "account_link": "https://mp.weixin.qq.com/mp/profile_ext?action=home&__biz=MzkwMTQ0NDYwNg==&scene=124#wechat_redirect",
  18. "account_name": "龙虾探长",
  19. "avatar_url": null,
  20. "background_url": null,
  21. "gender": "其他",
  22. "description": "分享趣味故事",
  23. "ip_location": null,
  24. "tags": null,
  25. "follower_count": null,
  26. "publish_count": null,
  27. "like_count": null,
  28. "collect_count": null,
  29. "comment_count": null,
  30. "looking_count": null,
  31. "biz_info": "MzkwMTQ0NDYwNg==",
  32. "wx_gh": "gh_5cc284077cda",
  33. "update_timestamp": 1720577540593
  34. }
  35. """
  36. with open(path, encoding="utf-8") as f:
  37. info = json.loads(f.read())
  38. accountName = info.get("account_name", None)
  39. ghId = info.get("wx_gh", None)
  40. bizInfo = info.get("biz_info", None)
  41. accountLink = info.get("account_link", None)
  42. avatarUrl = info.get("avatar_url", None)
  43. description = info.get("description", None)
  44. updateTimestamp = info.get("update_timestamp", None)
  45. print(updateTimestamp)
  46. connection = pymysql.connect(
  47. host='rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com',
  48. port=3306,
  49. user='crawler',
  50. password='crawler123456@',
  51. db='piaoquan-crawler',
  52. charset='utf8mb4'
  53. )
  54. # insert_sql = f"""
  55. # INSERT INTO official_accounts
  56. # (accountName, ghId, bizInfo, accountLink, avatarUrl, description, updateTimestamp)
  57. # values
  58. # (%s, %s, %s, %s, %s, %s, %s);
  59. # """
  60. update_sql = f"""
  61. UPDATE official_accounts
  62. SET updateTimestamp = %s
  63. WHERE ghId = %s
  64. """
  65. cursor = connection.cursor()
  66. cursor.execute(
  67. update_sql,
  68. (
  69. # accountName,
  70. # bizInfo,
  71. # accountLink,
  72. # avatarUrl,
  73. # description,
  74. updateTimestamp,
  75. ghId
  76. )
  77. )
  78. connection.commit()
  79. def read_account_info():
  80. """
  81. 获取公众号账号信息
  82. :return:
  83. """
  84. path = 'account'
  85. file_list = []
  86. for parent, dirs, files in os.walk(path):
  87. for file in files:
  88. if file == "account.json":
  89. target_path = os.path.join(parent, file)
  90. file_list.append(target_path)
  91. return file_list
  92. if __name__ == '__main__':
  93. p_list = read_account_info()
  94. for fp in tqdm(p_list):
  95. insert_into_mysql(fp)