1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283 |
- import mysql.connector
- import json
- # 配置数据库连接参数
- db_config = {
- 'host': 'rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com',
- 'database': 'incentive',
- 'port': 3306,
- 'user': 'wx2016_longvideo',
- 'password': 'wx2016_longvideoP@assword1234',
- }
- # 连接到MySQL数据库
- cnx = mysql.connector.connect(**db_config)
- cursor = cnx.cursor()
- # 定义JSON字段名称
- all_field_names = ['key_words', 'search_keys', 'extra_keys', 'category_list', 'tone', 'target_audience',
- 'target_age', 'target_gender', 'address', 'theme']
- json_field_names = ['key_words', 'search_keys', 'extra_keys', 'category_list']
- normal_field_names = ['tone', 'target_audience',
- 'target_age', 'target_gender', 'address', 'theme']
- # 批量插入的参数列表
- insert_batch = []
- # 读取video_content表中的JSON数据并解析
- select_sql = "SELECT * FROM video_content;"
- cursor.execute(select_sql)
- rows = cursor.fetchall() # 使用fetchall()确保读取所有行
- print("Reading data from video_content table...")
- print("row count: ", len(rows))
- for row in rows:
- video_id = row[1]
- # 遍历所有的JSON字段
- for field_name in json_field_names:
- # 获取对应的JSON字符串
- json_data = row[all_field_names.index(field_name) + 2]
- # 判断是否是json字符串
- if not json_data:
- continue
- if json_data[0] != '[':
- continue
- # 解析JSON字符串
- tags = json.loads(json_data) if json_data else []
- # 构建批量插入的参数
- for tag in tags:
- insert_batch.append((video_id, tag, field_name))
- for field_name in normal_field_names:
- # 获取对应的字段值
- value = row[all_field_names.index(field_name) + 2]
- # 构建批量插入的参数
- insert_batch.append((video_id, value, field_name))
- # 每1000个记录执行一次批量插入
- if len(insert_batch) >= 1000:
- cursor.executemany("""
- INSERT INTO video_content_mapping (video_id, tag, tag_type)
- VALUES (%s, %s, %s)
- """, insert_batch)
- # 清空列表以便下一次批量插入
- print(f"Inserting records {len(insert_batch)} rows...")
- insert_batch.clear()
- # 插入剩余的记录(如果有)
- if insert_batch:
- cursor.executemany("""
- INSERT INTO video_content_mapping (video_id, tag, tag_type)
- VALUES (%s, %s, %s)
- """, insert_batch)
- print(f"Inserting records {len(insert_batch)} rows...")
- # 提交事务
- cnx.commit()
- # 关闭游标和连接
- cursor.close()
- cnx.close()
|