|
@@ -1,83 +0,0 @@
|
|
-import mysql.connector
|
|
|
|
-import json
|
|
|
|
-
|
|
|
|
-# 配置数据库连接参数
|
|
|
|
-db_config = {
|
|
|
|
- 'host': 'rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com',
|
|
|
|
- 'database': 'incentive',
|
|
|
|
- 'port': 3306,
|
|
|
|
- 'user': 'wx2016_longvideo',
|
|
|
|
- 'password': 'wx2016_longvideoP@assword1234',
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-# 连接到MySQL数据库
|
|
|
|
-cnx = mysql.connector.connect(**db_config)
|
|
|
|
-cursor = cnx.cursor()
|
|
|
|
-
|
|
|
|
-# 定义JSON字段名称
|
|
|
|
-all_field_names = ['key_words', 'search_keys', 'extra_keys', 'category_list', 'tone', 'target_audience',
|
|
|
|
- 'target_age', 'target_gender', 'address', 'theme']
|
|
|
|
-
|
|
|
|
-json_field_names = ['key_words', 'search_keys', 'extra_keys', 'category_list']
|
|
|
|
-
|
|
|
|
-normal_field_names = ['tone', 'target_audience',
|
|
|
|
- 'target_age', 'target_gender', 'address', 'theme']
|
|
|
|
-
|
|
|
|
-# 批量插入的参数列表
|
|
|
|
-insert_batch = []
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-# 读取video_content表中的JSON数据并解析
|
|
|
|
-select_sql = "SELECT * FROM video_content;"
|
|
|
|
-cursor.execute(select_sql)
|
|
|
|
-rows = cursor.fetchall() # 使用fetchall()确保读取所有行
|
|
|
|
-print("Reading data from video_content table...")
|
|
|
|
-print("row count: ", len(rows))
|
|
|
|
-
|
|
|
|
-for row in rows:
|
|
|
|
- video_id = row[1]
|
|
|
|
- # 遍历所有的JSON字段
|
|
|
|
- for field_name in json_field_names:
|
|
|
|
- # 获取对应的JSON字符串
|
|
|
|
- json_data = row[all_field_names.index(field_name) + 2]
|
|
|
|
- # 判断是否是json字符串
|
|
|
|
- if not json_data:
|
|
|
|
- continue
|
|
|
|
- if json_data[0] != '[':
|
|
|
|
- continue
|
|
|
|
- # 解析JSON字符串
|
|
|
|
- tags = json.loads(json_data) if json_data else []
|
|
|
|
- # 构建批量插入的参数
|
|
|
|
- for tag in tags:
|
|
|
|
- insert_batch.append((video_id, tag, field_name))
|
|
|
|
-
|
|
|
|
- for field_name in normal_field_names:
|
|
|
|
- # 获取对应的字段值
|
|
|
|
- value = row[all_field_names.index(field_name) + 2]
|
|
|
|
- # 构建批量插入的参数
|
|
|
|
- insert_batch.append((video_id, value, field_name))
|
|
|
|
-
|
|
|
|
- # 每1000个记录执行一次批量插入
|
|
|
|
- if len(insert_batch) >= 1000:
|
|
|
|
- cursor.executemany("""
|
|
|
|
- INSERT INTO video_content_mapping (video_id, tag, tag_type)
|
|
|
|
- VALUES (%s, %s, %s)
|
|
|
|
- """, insert_batch)
|
|
|
|
- # 清空列表以便下一次批量插入
|
|
|
|
- print(f"Inserting records {len(insert_batch)} rows...")
|
|
|
|
- insert_batch.clear()
|
|
|
|
-
|
|
|
|
-# 插入剩余的记录(如果有)
|
|
|
|
-if insert_batch:
|
|
|
|
- cursor.executemany("""
|
|
|
|
- INSERT INTO video_content_mapping (video_id, tag, tag_type)
|
|
|
|
- VALUES (%s, %s, %s)
|
|
|
|
- """, insert_batch)
|
|
|
|
- print(f"Inserting records {len(insert_batch)} rows...")
|
|
|
|
-
|
|
|
|
-# 提交事务
|
|
|
|
-cnx.commit()
|
|
|
|
-
|
|
|
|
-# 关闭游标和连接
|
|
|
|
-cursor.close()
|
|
|
|
-cnx.close()
|
|
|