sunxy 1 год назад
Родитель
Сommit
5f4c69a5aa
4 измененных файлов с 113 добавлено и 11 удалено
  1. 3 1
      ai_tag_task.py
  2. 3 0
      gpt_tag.py
  3. 24 10
      mysql_connect.py
  4. 83 0
      result_convertor.py

+ 3 - 1
ai_tag_task.py

@@ -75,12 +75,14 @@ def get_video_ai_tags(video_id, asr_file, video_info):
                     log_message['gptRes4'] = gpt_res4
                     # 5. 解析gpt产出结果
                     parseRes = praseGptRes(gpt_res2, gpt_res3, gpt_res4)
+                    parseRes['video_id'] = video_id
                     log_message.update(parseRes)
 
                     # 6. 保存结果
-                    mysql_connect.insert_content()
+                    mysql_connect.insert_content(parseRes)
 
                 except:
+                    log_.error(traceback.format_exc())
                     pass
         else:
             pass

+ 3 - 0
gpt_tag.py

@@ -71,6 +71,9 @@ def request_gpt(prompt):
             if "```json" in result_content:
                 result_content = result_content.split(
                     "```json")[1].split("```")[0]
+            if "```" in result_content:
+                result_content = result_content.split(
+                    "```")[1].split("```")[0]
             return result_content
         except Exception:
             log_.error(traceback.format_exc())

+ 24 - 10
mysql_connect.py

@@ -1,5 +1,6 @@
 import mysql.connector
 from mysql.connector import Error
+import json
 
 MYSQL_CONFIG = {
     'host': 'rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com',
@@ -27,23 +28,36 @@ def insert_content(gpt_res):
 
             # 插入数据的SQL语句
             insert_query = """
-            INSERT INTO video_content(video_id,key_words,search_keys,extra_keys,tone,target_audience,target_age,target_gender,address,theme)
-            VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
+            INSERT INTO video_content(video_id,key_words,search_keys,extra_keys,category_list,tone,target_audience,target_age,target_gender,address,theme)
+            VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
             """
             # 准备要插入的数据,转换字典列表为元组列表
-            data_to_insert = [(gpt_res['video_id'], gpt_res['key_words'], gpt_res['search_keys'], gpt_res['extra_keys'],
-                               gpt_res['tone'], gpt_res['target_audience'], gpt_res['target_age'], gpt_res['target_gender'],
-                               gpt_res['address'], gpt_res['theme'])]
+            data_to_insert = [
+                (
+                    gpt_res.get('video_id', ''),
+                    json.dumps(gpt_res.get('key_words', ''),
+                               sort_keys=True, ensure_ascii=False),
+                    json.dumps(gpt_res.get('search_keys', ''),
+                               sort_keys=True, ensure_ascii=False),
+                    json.dumps(gpt_res.get('extra_keys', ''),
+                               sort_keys=True, ensure_ascii=False),
+                    json.dumps(gpt_res.get('category', ''),
+                               sort_keys=True, ensure_ascii=False),
+                    str(gpt_res.get('tone', '')),
+                    str(gpt_res.get('target_audience', '')),
+                    str(gpt_res.get('target_age', '')),
+                    str(gpt_res.get('target_gender', '')),
+                    str(gpt_res.get('address', '')),
+                    str(gpt_res.get('theme', ''))
+                )
+            ]
 
             # 执行批量插入操作
             cursor.executemany(insert_query, data_to_insert)
             print('数据插入成功')
 
-            # 检查插入结果
-            # cursor.execute('SELECT * FROM employees')
-            # records = cursor.fetchall()
-            # print('插入的数据:', records)
-
+            # 提交事务
+            conn.commit()
     except Error as e:
         print('数据库连接或操作出错:', e)
     finally:

+ 83 - 0
result_convertor.py

@@ -0,0 +1,83 @@
+import mysql.connector
+import json
+
+# 配置数据库连接参数
+db_config = {
+    'host': 'rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com',
+    'database': 'incentive',
+    'port': 3306,
+    'user': 'wx2016_longvideo',
+    'password': 'wx2016_longvideoP@assword1234',
+}
+
+# 连接到MySQL数据库
+cnx = mysql.connector.connect(**db_config)
+cursor = cnx.cursor()
+
+# 定义JSON字段名称
+all_field_names = ['key_words', 'search_keys', 'extra_keys', 'category_list', 'tone', 'target_audience',
+                   'target_age', 'target_gender', 'address', 'theme']
+
+json_field_names = ['key_words', 'search_keys', 'extra_keys', 'category_list']
+
+normal_field_names = ['tone', 'target_audience',
+                      'target_age', 'target_gender', 'address', 'theme']
+
+# 批量插入的参数列表
+insert_batch = []
+
+
+# 读取video_content表中的JSON数据并解析
+select_sql = "SELECT * FROM video_content;"
+cursor.execute(select_sql)
+rows = cursor.fetchall()  # 使用fetchall()确保读取所有行
+print("Reading data from video_content table...")
+print("row count: ", len(rows))
+
+for row in rows:
+    video_id = row[1]
+    # 遍历所有的JSON字段
+    for field_name in json_field_names:
+        # 获取对应的JSON字符串
+        json_data = row[all_field_names.index(field_name) + 2]
+        # 判断是否是json字符串
+        if not json_data:
+            continue
+        if json_data[0] != '[':
+            continue
+        # 解析JSON字符串
+        tags = json.loads(json_data) if json_data else []
+        # 构建批量插入的参数
+        for tag in tags:
+            insert_batch.append((video_id, tag, field_name))
+
+    for field_name in normal_field_names:
+        # 获取对应的字段值
+        value = row[all_field_names.index(field_name) + 2]
+        # 构建批量插入的参数
+        insert_batch.append((video_id, value, field_name))
+
+    # 每1000个记录执行一次批量插入
+    if len(insert_batch) >= 1000:
+        cursor.executemany("""
+            INSERT INTO video_content_mapping (video_id, tag, tag_type)
+            VALUES (%s, %s, %s)
+        """, insert_batch)
+        # 清空列表以便下一次批量插入
+        print(f"Inserting records {len(insert_batch)} rows...")
+        insert_batch.clear()
+
+# 插入剩余的记录(如果有)
+if insert_batch:
+    cursor.executemany("""
+        INSERT INTO video_content_mapping (video_id, tag, tag_type)
+        VALUES (%s, %s, %s)
+    """, insert_batch)
+    print(f"Inserting records {len(insert_batch)} rows...")
+
+# 提交事务
+cnx.commit()
+
+# 关闭游标和连接
+cursor.close()
+cnx.close()