sunxy hace 11 meses
padre
commit
491c0d4309
Se han modificado 3 ficheros con 76 adiciones y 87 borrados
  1. 4 4
      ai_tag_task.py
  2. 0 83
      result_convertor.py
  3. 72 0
      result_save.py

+ 4 - 4
ai_tag_task.py

@@ -14,7 +14,7 @@ from gpt_tag import request_gpt
 from config import set_config
 from log import Log
 from ReadXlsxFile import getVideoInfoInXlxs
-import mysql_connect
+from result_save import insert_content
 
 config_ = set_config()
 log_ = Log()
@@ -81,7 +81,7 @@ def get_video_ai_tags(video_id, asr_file, video_info):
                     log_message.update(parseRes)
 
                     # 6. 保存结果
-                    mysql_connect.insert_content(parseRes)
+                    insert_content(parseRes)
 
                 except:
                     log_.error(traceback.format_exc())
@@ -270,7 +270,7 @@ def timer_check():
 
 if __name__ == '__main__':
     # timer_check()
-    feature_df = getVideoInfoInXlxs('aigc-test/past_videos.xlsx')
+    feature_df = getVideoInfoInXlxs('past_videos.xlsx')
     video_id_list = feature_df['videoid'].to_list()
     video_info = {}
     for video_id in video_id_list:
@@ -287,7 +287,7 @@ if __name__ == '__main__':
             # print(video_id, title)
     print(len(video_info))
     # 获取已asr识别的视频
-    asr_folder = 'aigc-test/asr_res'
+    asr_folder = 'asr_res'
     retry = 0
     while retry < 30:
         asr_file_list = os.listdir(asr_folder)

+ 0 - 83
result_convertor.py

@@ -1,83 +0,0 @@
-import mysql.connector
-import json
-
-# 配置数据库连接参数
-db_config = {
-    'host': 'rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com',
-    'database': 'incentive',
-    'port': 3306,
-    'user': 'wx2016_longvideo',
-    'password': 'wx2016_longvideoP@assword1234',
-}
-
-# 连接到MySQL数据库
-cnx = mysql.connector.connect(**db_config)
-cursor = cnx.cursor()
-
-# 定义JSON字段名称
-all_field_names = ['key_words', 'search_keys', 'extra_keys', 'category_list', 'tone', 'target_audience',
-                   'target_age', 'target_gender', 'address', 'theme']
-
-json_field_names = ['key_words', 'search_keys', 'extra_keys', 'category_list']
-
-normal_field_names = ['tone', 'target_audience',
-                      'target_age', 'target_gender', 'address', 'theme']
-
-# 批量插入的参数列表
-insert_batch = []
-
-
-# 读取video_content表中的JSON数据并解析
-select_sql = "SELECT * FROM video_content;"
-cursor.execute(select_sql)
-rows = cursor.fetchall()  # 使用fetchall()确保读取所有行
-print("Reading data from video_content table...")
-print("row count: ", len(rows))
-
-for row in rows:
-    video_id = row[1]
-    # 遍历所有的JSON字段
-    for field_name in json_field_names:
-        # 获取对应的JSON字符串
-        json_data = row[all_field_names.index(field_name) + 2]
-        # 判断是否是json字符串
-        if not json_data:
-            continue
-        if json_data[0] != '[':
-            continue
-        # 解析JSON字符串
-        tags = json.loads(json_data) if json_data else []
-        # 构建批量插入的参数
-        for tag in tags:
-            insert_batch.append((video_id, tag, field_name))
-
-    for field_name in normal_field_names:
-        # 获取对应的字段值
-        value = row[all_field_names.index(field_name) + 2]
-        # 构建批量插入的参数
-        insert_batch.append((video_id, value, field_name))
-
-    # 每1000个记录执行一次批量插入
-    if len(insert_batch) >= 1000:
-        cursor.executemany("""
-            INSERT INTO video_content_mapping (video_id, tag, tag_type)
-            VALUES (%s, %s, %s)
-        """, insert_batch)
-        # 清空列表以便下一次批量插入
-        print(f"Inserting records {len(insert_batch)} rows...")
-        insert_batch.clear()
-
-# 插入剩余的记录(如果有)
-if insert_batch:
-    cursor.executemany("""
-        INSERT INTO video_content_mapping (video_id, tag, tag_type)
-        VALUES (%s, %s, %s)
-    """, insert_batch)
-    print(f"Inserting records {len(insert_batch)} rows...")
-
-# 提交事务
-cnx.commit()
-
-# 关闭游标和连接
-cursor.close()
-cnx.close()

+ 72 - 0
result_save.py

@@ -0,0 +1,72 @@
+import mysql.connector
+import json
+
+# 配置数据库连接参数
+db_config = {
+    'host': 'rm-bp19uc56sud25ag4o.mysql.rds.aliyuncs.com',
+    'database': 'longvideo',
+    'port': 3306,
+    'user': 'wx2016_longvideo',
+    'password': 'wx2016_longvideoP@assword1234',
+}
+
+json_field_names = ['key_words', 'search_keys', 'extra_keys', 'category_list']
+
+normal_field_names = ['tone', 'target_audience',
+                      'target_age', 'target_gender', 'address', 'theme']
+
+
+def insert_content(gpt_res):
+    """ 连接MySQL数据库并插入一行数据 """
+    try:
+        # 连接MySQL数据库
+        conn = mysql.connector.connect(
+            host=db_config['host'],
+            database=db_config['database'],
+            user=db_config['user'],
+            password=db_config['password'],
+        )
+
+        if conn.is_connected():
+            print('成功连接到数据库')
+            cursor = conn.cursor()
+
+            insert_batch = []
+
+            # 插入数据的SQL语句
+            sql = """
+            INSERT INTO video_content_mapping (video_id, tag, tag_type)
+            VALUES (%s, %s, %s)
+            """
+
+            video_id = gpt_res.get('video_id', '')
+            for field_name in json_field_names:
+                # 获取对应的JSON字符串
+                tags = gpt_res.get(field_name, '')
+                # 判断是否是json字符串
+                if not tags or not isinstance(tags, list):
+                    continue
+                # 构建批量插入的参数
+                for tag in tags:
+                    insert_batch.append((video_id, tag, field_name))
+
+            for field_name in normal_field_names:
+                # 获取对应的字段值
+                value = gpt_res.get(field_name, '')
+                # 构建批量插入的参数
+                insert_batch.append((video_id, value, field_name))
+
+            # 执行批量插入操作
+            cursor.executemany(sql, insert_batch)
+            print(f"Inserting records {len(insert_batch)} rows...")
+            insert_batch.clear()
+
+            # 提交事务
+            conn.commit()
+
+            # 关闭游标和连接
+            cursor.close()
+            conn.close()
+            print('数据库连接已关闭')
+    except mysql.connector.Error as e:
+        print('数据库连接或操作出错:', e)