result_convertor.py 2.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. import mysql.connector
  2. import json
  3. # 配置数据库连接参数
  4. db_config = {
  5. 'host': 'rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com',
  6. 'database': 'incentive',
  7. 'port': 3306,
  8. 'user': 'wx2016_longvideo',
  9. 'password': 'wx2016_longvideoP@assword1234',
  10. }
  11. # 连接到MySQL数据库
  12. cnx = mysql.connector.connect(**db_config)
  13. cursor = cnx.cursor()
  14. # 定义JSON字段名称
  15. all_field_names = ['key_words', 'search_keys', 'extra_keys', 'category_list', 'tone', 'target_audience',
  16. 'target_age', 'target_gender', 'address', 'theme']
  17. json_field_names = ['key_words', 'search_keys', 'extra_keys', 'category_list']
  18. normal_field_names = ['tone', 'target_audience',
  19. 'target_age', 'target_gender', 'address', 'theme']
  20. # 批量插入的参数列表
  21. insert_batch = []
  22. # 读取video_content表中的JSON数据并解析
  23. select_sql = "SELECT * FROM video_content;"
  24. cursor.execute(select_sql)
  25. rows = cursor.fetchall() # 使用fetchall()确保读取所有行
  26. print("Reading data from video_content table...")
  27. print("row count: ", len(rows))
  28. for row in rows:
  29. video_id = row[1]
  30. # 遍历所有的JSON字段
  31. for field_name in json_field_names:
  32. # 获取对应的JSON字符串
  33. json_data = row[all_field_names.index(field_name) + 2]
  34. # 判断是否是json字符串
  35. if not json_data:
  36. continue
  37. if json_data[0] != '[':
  38. continue
  39. # 解析JSON字符串
  40. tags = json.loads(json_data) if json_data else []
  41. # 构建批量插入的参数
  42. for tag in tags:
  43. insert_batch.append((video_id, tag, field_name))
  44. for field_name in normal_field_names:
  45. # 获取对应的字段值
  46. value = row[all_field_names.index(field_name) + 2]
  47. # 构建批量插入的参数
  48. insert_batch.append((video_id, value, field_name))
  49. # 每1000个记录执行一次批量插入
  50. if len(insert_batch) >= 1000:
  51. cursor.executemany("""
  52. INSERT INTO video_content_mapping (video_id, tag, tag_type)
  53. VALUES (%s, %s, %s)
  54. """, insert_batch)
  55. # 清空列表以便下一次批量插入
  56. print(f"Inserting records {len(insert_batch)} rows...")
  57. insert_batch.clear()
  58. # 插入剩余的记录(如果有)
  59. if insert_batch:
  60. cursor.executemany("""
  61. INSERT INTO video_content_mapping (video_id, tag, tag_type)
  62. VALUES (%s, %s, %s)
  63. """, insert_batch)
  64. print(f"Inserting records {len(insert_batch)} rows...")
  65. # 提交事务
  66. cnx.commit()
  67. # 关闭游标和连接
  68. cursor.close()
  69. cnx.close()