hace 1 año · 491c0d4309
--- a/ai_tag_task.py
+++ b/ai_tag_task.py
@@ -14,7 +14,7 @@ from gpt_tag import request_gpt
 
				 from config import set_config
			
 
				 from log import Log
			
 
				 from ReadXlsxFile import getVideoInfoInXlxs
			
 
				-import mysql_connect
			
 
				+from result_save import insert_content
			
 
				 
			
 
				 config_ = set_config()
			
 
				 log_ = Log()
			
@@ -81,7 +81,7 @@ def get_video_ai_tags(video_id, asr_file, video_info):
 
				                     log_message.update(parseRes)
			
 
				 
			
 
				                     # 6. 保存结果
			
 
				-                    mysql_connect.insert_content(parseRes)
			
 
				+                    insert_content(parseRes)
			
 
				 
			
 
				                 except:
			
 
				                     log_.error(traceback.format_exc())
			
@@ -270,7 +270,7 @@ def timer_check():
 
				 
			
 
				 if __name__ == '__main__':
			
 
				     # timer_check()
			
 
				-    feature_df = getVideoInfoInXlxs('aigc-test/past_videos.xlsx')
			
 
				+    feature_df = getVideoInfoInXlxs('past_videos.xlsx')
			
 
				     video_id_list = feature_df['videoid'].to_list()
			
 
				     video_info = {}
			
 
				     for video_id in video_id_list:
			
@@ -287,7 +287,7 @@ if __name__ == '__main__':
 
				             # print(video_id, title)
			
 
				     print(len(video_info))
			
 
				     # 获取已asr识别的视频
			
 
				-    asr_folder = 'aigc-test/asr_res'
			
 
				+    asr_folder = 'asr_res'
			
 
				     retry = 0
			
 
				     while retry < 30:
			
 
				         asr_file_list = os.listdir(asr_folder)
			
--- a/result_convertor.py
+++ b/result_convertor.py
@@ -1,83 +0,0 @@
 
				-import mysql.connector
			
 
				-import json
			
 
				-
			
 
				-# 配置数据库连接参数
			
 
				-db_config = {
			
 
				-    'host': 'rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com',
			
 
				-    'database': 'incentive',
			
 
				-    'port': 3306,
			
 
				-    'user': 'wx2016_longvideo',
			
 
				-    'password': 'wx2016_longvideoP@assword1234',
			
 
				-}
			
 
				-
			
 
				-# 连接到MySQL数据库
			
 
				-cnx = mysql.connector.connect(**db_config)
			
 
				-cursor = cnx.cursor()
			
 
				-
			
 
				-# 定义JSON字段名称
			
 
				-all_field_names = ['key_words', 'search_keys', 'extra_keys', 'category_list', 'tone', 'target_audience',
			
 
				-                   'target_age', 'target_gender', 'address', 'theme']
			
 
				-
			
 
				-json_field_names = ['key_words', 'search_keys', 'extra_keys', 'category_list']
			
 
				-
			
 
				-normal_field_names = ['tone', 'target_audience',
			
 
				-                      'target_age', 'target_gender', 'address', 'theme']
			
 
				-
			
 
				-# 批量插入的参数列表
			
 
				-insert_batch = []
			
 
				-
			
 
				-
			
 
				-# 读取video_content表中的JSON数据并解析
			
 
				-select_sql = "SELECT * FROM video_content;"
			
 
				-cursor.execute(select_sql)
			
 
				-rows = cursor.fetchall()  # 使用fetchall()确保读取所有行
			
 
				-print("Reading data from video_content table...")
			
 
				-print("row count: ", len(rows))
			
 
				-
			
 
				-for row in rows:
			
 
				-    video_id = row[1]
			
 
				-    # 遍历所有的JSON字段
			
 
				-    for field_name in json_field_names:
			
 
				-        # 获取对应的JSON字符串
			
 
				-        json_data = row[all_field_names.index(field_name) + 2]
			
 
				-        # 判断是否是json字符串
			
 
				-        if not json_data:
			
 
				-            continue
			
 
				-        if json_data[0] != '[':
			
 
				-            continue
			
 
				-        # 解析JSON字符串
			
 
				-        tags = json.loads(json_data) if json_data else []
			
 
				-        # 构建批量插入的参数
			
 
				-        for tag in tags:
			
 
				-            insert_batch.append((video_id, tag, field_name))
			
 
				-
			
 
				-    for field_name in normal_field_names:
			
 
				-        # 获取对应的字段值
			
 
				-        value = row[all_field_names.index(field_name) + 2]
			
 
				-        # 构建批量插入的参数
			
 
				-        insert_batch.append((video_id, value, field_name))
			
 
				-
			
 
				-    # 每1000个记录执行一次批量插入
			
 
				-    if len(insert_batch) >= 1000:
			
 
				-        cursor.executemany("""
			
 
				-            INSERT INTO video_content_mapping (video_id, tag, tag_type)
			
 
				-            VALUES (%s, %s, %s)
			
 
				-        """, insert_batch)
			
 
				-        # 清空列表以便下一次批量插入
			
 
				-        print(f"Inserting records {len(insert_batch)} rows...")
			
 
				-        insert_batch.clear()
			
 
				-
			
 
				-# 插入剩余的记录（如果有）
			
 
				-if insert_batch:
			
 
				-    cursor.executemany("""
			
 
				-        INSERT INTO video_content_mapping (video_id, tag, tag_type)
			
 
				-        VALUES (%s, %s, %s)
			
 
				-    """, insert_batch)
			
 
				-    print(f"Inserting records {len(insert_batch)} rows...")
			
 
				-
			
 
				-# 提交事务
			
 
				-cnx.commit()
			
 
				-
			
 
				-# 关闭游标和连接
			
 
				-cursor.close()
			
 
				-cnx.close()
			
--- a/result_save.py
+++ b/result_save.py
@@ -0,0 +1,72 @@
 
				+import mysql.connector
			
 
				+import json
			
 
				+
			
 
				+# 配置数据库连接参数
			
 
				+db_config = {
			
 
				+    'host': 'rm-bp19uc56sud25ag4o.mysql.rds.aliyuncs.com',
			
 
				+    'database': 'longvideo',
			
 
				+    'port': 3306,
			
 
				+    'user': 'wx2016_longvideo',
			
 
				+    'password': 'wx2016_longvideoP@assword1234',
			
 
				+}
			
 
				+
			
 
				+json_field_names = ['key_words', 'search_keys', 'extra_keys', 'category_list']
			
 
				+
			
 
				+normal_field_names = ['tone', 'target_audience',
			
 
				+                      'target_age', 'target_gender', 'address', 'theme']
			
 
				+
			
 
				+
			
 
				+def insert_content(gpt_res):
			
 
				+    """ 连接MySQL数据库并插入一行数据 """
			
 
				+    try:
			
 
				+        # 连接MySQL数据库
			
 
				+        conn = mysql.connector.connect(
			
 
				+            host=db_config['host'],
			
 
				+            database=db_config['database'],
			
 
				+            user=db_config['user'],
			
 
				+            password=db_config['password'],
			
 
				+        )
			
 
				+
			
 
				+        if conn.is_connected():
			
 
				+            print('成功连接到数据库')
			
 
				+            cursor = conn.cursor()
			
 
				+
			
 
				+            insert_batch = []
			
 
				+
			
 
				+            # 插入数据的SQL语句
			
 
				+            sql = """
			
 
				+            INSERT INTO video_content_mapping (video_id, tag, tag_type)
			
 
				+            VALUES (%s, %s, %s)
			
 
				+            """
			
 
				+
			
 
				+            video_id = gpt_res.get('video_id', '')
			
 
				+            for field_name in json_field_names:
			
 
				+                # 获取对应的JSON字符串
			
 
				+                tags = gpt_res.get(field_name, '')
			
 
				+                # 判断是否是json字符串
			
 
				+                if not tags or not isinstance(tags, list):
			
 
				+                    continue
			
 
				+                # 构建批量插入的参数
			
 
				+                for tag in tags:
			
 
				+                    insert_batch.append((video_id, tag, field_name))
			
 
				+
			
 
				+            for field_name in normal_field_names:
			
 
				+                # 获取对应的字段值
			
 
				+                value = gpt_res.get(field_name, '')
			
 
				+                # 构建批量插入的参数
			
 
				+                insert_batch.append((video_id, value, field_name))
			
 
				+
			
 
				+            # 执行批量插入操作
			
 
				+            cursor.executemany(sql, insert_batch)
			
 
				+            print(f"Inserting records {len(insert_batch)} rows...")
			
 
				+            insert_batch.clear()
			
 
				+
			
 
				+            # 提交事务
			
 
				+            conn.commit()
			
 
				+
			
 
				+            # 关闭游标和连接
			
 
				+            cursor.close()
			
 
				+            conn.close()
			
 
				+            print('数据库连接已关闭')
			
 
				+    except mysql.connector.Error as e:
			
 
				+        print('数据库连接或操作出错:', e)