|
@@ -24,49 +24,6 @@ class QueryDataTool:
|
|
|
# 返回默认数据
|
|
|
default_data = {
|
|
|
"crawl_data": {
|
|
|
- "channel": 1,
|
|
|
- "channel_content_id": "684a789b000000002202a61b",
|
|
|
- "content_link": "https://www.xiaohongshu.com/explore/684a789b000000002202a61b",
|
|
|
- "wx_sn": None,
|
|
|
- "title": "一个视频学会,5个剪辑工具,超详细教程",
|
|
|
- "content_type": "video",
|
|
|
- "body_text": "#剪辑教程[话题]# #剪辑[话题]# #手机剪辑[话题]# #视频制作[话题]# #视频剪辑[话题]# #自学剪辑[话题]# #原创视频[话题]# #新手小白学剪辑[话题]#",
|
|
|
- "location": "未知",
|
|
|
- "source_url": None,
|
|
|
- "mini_program": None,
|
|
|
- "topic_list": [],
|
|
|
- "image_url_list": [
|
|
|
- {
|
|
|
- "image_type": 2,
|
|
|
- "image_url": "http://rescdn.yishihui.com/pipeline/image/5be8f08a-4691-41b6-8dda-0b63cc2c1056.jpg"
|
|
|
- }
|
|
|
- ],
|
|
|
- "video_url_list": [
|
|
|
- {
|
|
|
- "video_url": "http://rescdn.yishihui.com/pipeline/video/6c2330e3-0674-4f01-b5b2-fc8c240158f8.mp4",
|
|
|
- "video_duration": 615
|
|
|
- }
|
|
|
- ],
|
|
|
- "bgm_data": None,
|
|
|
- "ad_info": None,
|
|
|
- "is_original": False,
|
|
|
- "voice_data": None,
|
|
|
- "channel_account_id": "670a10ac000000001d0216ec",
|
|
|
- "channel_account_name": "小伍剪辑视频",
|
|
|
- "channel_account_avatar": "https://sns-avatar-qc.xhscdn.com/avatar/1040g2jo31e469dkq0e005poa22m7c5ncbtuk1g0?imageView2/2/w/80/format/jpg",
|
|
|
- "item_index": None,
|
|
|
- "view_count": None,
|
|
|
- "play_count": None,
|
|
|
- "like_count": 692,
|
|
|
- "collect_count": 996,
|
|
|
- "comment_count": 37,
|
|
|
- "share_count": None,
|
|
|
- "looking_count": None,
|
|
|
- "publish_timestamp": 1749711589000,
|
|
|
- "modify_timestamp": 1749711589000,
|
|
|
- "update_timestamp": 1755239186502
|
|
|
- },
|
|
|
- "raw": {
|
|
|
"channel": 1,
|
|
|
"channel_content_id": "684a789b000000002202a61b",
|
|
|
"content_link": "https://www.xiaohongshu.com/explore/684a789b000000002202a61b",
|
|
@@ -205,14 +162,49 @@ class StructureTool:
|
|
|
|
|
|
@staticmethod
|
|
|
def store_parsing_result(request_id: str, crawl_raw: Dict[str, Any], identify_result: Dict[str, Any]) -> Optional[int]:
|
|
|
- payload = {
|
|
|
- 'request_id': request_id,
|
|
|
- 'crawl_raw': crawl_raw,
|
|
|
- 'identify_result': identify_result,
|
|
|
- }
|
|
|
- sql = (
|
|
|
- "INSERT INTO knowledge_parsing_content (request_id, parsing_result, created_at) "
|
|
|
- "VALUES (%s, %s, NOW())"
|
|
|
- )
|
|
|
- params = (request_id, json.dumps(payload, ensure_ascii=False))
|
|
|
- return MysqlHelper.update_values(sql, params)
|
|
|
+ """
|
|
|
+ 存储解析结果到 knowledge_parsing_content 表
|
|
|
+
|
|
|
+ Args:
|
|
|
+ request_id: 请求ID
|
|
|
+ crawl_raw: 原始爬取数据
|
|
|
+ identify_result: 识别结果
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 插入的行ID,失败返回None
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ # 从原始数据中提取必要字段
|
|
|
+ content_id = crawl_raw.get('channel_content_id') or ''
|
|
|
+ task_id = 0 # 默认任务ID,可根据需要调整
|
|
|
+
|
|
|
+ # 构建存储数据
|
|
|
+ parsing_data = {
|
|
|
+ 'crawl_raw': crawl_raw,
|
|
|
+ 'identify_result': identify_result,
|
|
|
+ }
|
|
|
+
|
|
|
+ sql = (
|
|
|
+ "INSERT INTO knowledge_parsing_content "
|
|
|
+ "(content_id, request_id, task_id, parsing_data, create_time, status) "
|
|
|
+ "VALUES (%s, %s, %s, %s, NOW(), %s)"
|
|
|
+ )
|
|
|
+
|
|
|
+ # 状态:2 表示处理完成
|
|
|
+ status = 2
|
|
|
+ params = (
|
|
|
+ content_id,
|
|
|
+ request_id,
|
|
|
+ task_id,
|
|
|
+ json.dumps(parsing_data, ensure_ascii=False),
|
|
|
+ status
|
|
|
+ )
|
|
|
+
|
|
|
+ result = MysqlHelper.insert_and_get_id(sql, params)
|
|
|
+ if result:
|
|
|
+ logger.info(f"存储解析结果成功: request_id={request_id}, content_id={content_id}, insert_id={result}")
|
|
|
+ return result
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"存储解析结果失败: request_id={request_id}, error={e}")
|
|
|
+ return None
|