import json import pymysql import pandas as pd connection = pymysql.connect( host='rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com', port=3306, user='crawler', password='crawler123456@', db='piaoquan-crawler', charset='utf8mb4' ) columns = pd.read_csv("piaoquan_crawler_long_articles_video.csv").columns.tolist() def extract_desired_key(json_str): """ ppp :param json_str: :return: """ try: data = json.loads(json_str) return data['productionPath'].split("rootSourceId%3D")[1] except json.JSONDecodeError: return None sql = f""" select * from long_articles_video where account_name in ("小阳看天下", "小惠爱厨房") and update_time > "2024-07-04"; """ cursor = connection.cursor() cursor.execute(sql) result = cursor.fetchall() connection.close() L = [] for line in result: temp = list(line) temp[-5] = json.loads(temp[-5])['productionPath'].split("rootSourceId%3D")[1] if temp[-5] else None temp[-6] = json.loads(temp[-6])['productionPath'].split("rootSourceId%3D")[1] if temp[-6] else None temp[-7] = json.loads(temp[-7])['productionPath'].split("rootSourceId%3D")[1] if temp[-7] else None print(temp) L.append(temp) df = pd.DataFrame(L, columns=columns) df.to_excel("root_source_id.xlsx", index=False)