123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103 |
- """
- @author: luojunhui
- """
- import os
- import json
- import pymysql
- from tqdm import tqdm
- def insert_into_mysql(path):
- """
- :param path: 文件路径
- :return:
- {
- "channel": 5,
- "channel_account_id": "72ed4e3ca6c846cba40e5b736387c760",
- "xhs_id": null,
- "dy_id": null,
- "account_link": "https://mp.weixin.qq.com/mp/profile_ext?action=home&__biz=MzkwMTQ0NDYwNg==&scene=124#wechat_redirect",
- "account_name": "龙虾探长",
- "avatar_url": null,
- "background_url": null,
- "gender": "其他",
- "description": "分享趣味故事",
- "ip_location": null,
- "tags": null,
- "follower_count": null,
- "publish_count": null,
- "like_count": null,
- "collect_count": null,
- "comment_count": null,
- "looking_count": null,
- "biz_info": "MzkwMTQ0NDYwNg==",
- "wx_gh": "gh_5cc284077cda",
- "update_timestamp": 1720577540593
- }
- """
- with open(path, encoding="utf-8") as f:
- info = json.loads(f.read())
- accountName = info.get("account_name", None)
- ghId = info.get("wx_gh", None)
- bizInfo = info.get("biz_info", None)
- accountLink = info.get("account_link", None)
- avatarUrl = info.get("avatar_url", None)
- description = info.get("description", None)
- updateTimestamp = info.get("update_timestamp", None)
- print(updateTimestamp)
- connection = pymysql.connect(
- host='rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com',
- port=3306,
- user='crawler',
- password='crawler123456@',
- db='piaoquan-crawler',
- charset='utf8mb4'
- )
- # insert_sql = f"""
- # INSERT INTO official_accounts
- # (accountName, ghId, bizInfo, accountLink, avatarUrl, description, updateTimestamp)
- # values
- # (%s, %s, %s, %s, %s, %s, %s);
- # """
- update_sql = f"""
- UPDATE official_accounts
- SET updateTimestamp = %s
- WHERE ghId = %s
- """
- cursor = connection.cursor()
- cursor.execute(
- update_sql,
- (
- # accountName,
- # bizInfo,
- # accountLink,
- # avatarUrl,
- # description,
- updateTimestamp,
- ghId
- )
- )
- connection.commit()
- def read_account_info():
- """
- 获取公众号账号信息
- :return:
- """
- path = 'account'
- file_list = []
- for parent, dirs, files in os.walk(path):
- for file in files:
- if file == "account.json":
- target_path = os.path.join(parent, file)
- file_list.append(target_path)
- return file_list
- if __name__ == '__main__':
- p_list = read_account_info()
- for fp in tqdm(p_list):
- insert_into_mysql(fp)
|