Browse Source

feat: 完成分段式prompt

huangzhichao 1 week ago
parent
commit
83ca42635e

+ 3 - 2
src/app.module.ts

@@ -1,8 +1,9 @@
 import { Module } from '@nestjs/common'
 
-import { TencentCloudModule } from '@/proxy/tencent-cloud/tencent-cloud.module'
+import { SqlAgentModule } from '@/module/sql-agent/sql-agent.module'
+import { TencentCloudModule } from '@/module/tencent-cloud/tencent-cloud.module'
 
 @Module({
-  imports: [TencentCloudModule]
+  imports: [SqlAgentModule, TencentCloudModule]
 })
 export class AppModule {}

+ 45 - 0
src/module/service/lls.service.ts

@@ -0,0 +1,45 @@
+import * as dotenv from 'dotenv'
+import * as tencentcloud from 'tencentcloud-sdk-nodejs'
+
+const { secretId, secretKey } = dotenv.config({
+  path: ['.env.local', '.env']
+}).parsed
+
+const LkeapClient = tencentcloud.lkeap.v20240522.Client
+
+const client = new LkeapClient({
+  credential: {
+    secretId,
+    secretKey
+  },
+  region: 'ap-guangzhou',
+  profile: {
+    httpProfile: {
+      endpoint: 'lkeap.tencentcloudapi.com',
+      reqTimeout: 40000
+    }
+  }
+})
+
+export function chatReason(Content): Promise<string> {
+  const params = {
+    Model: 'deepseek-r1',
+    Messages: [{ Role: 'user', Content }],
+    Stream: false
+  }
+  console.log('开始查询...')
+  return new Promise((resolve, reject) => {
+    client.ChatCompletions(params, function (err, resp) {
+      if (err) {
+        reject(err)
+        return
+      }
+
+      const { Choices } = resp
+      const { Message } = Choices[0]
+      console.log('回答:', Message.Content)
+      resolve(Message.Content)
+    })
+  })
+  // return Promise.resolve('123')
+}

+ 29 - 0
src/module/sql-agent/sql-agent.controller.ts

@@ -0,0 +1,29 @@
+import { Controller, Post, Body, Header, HttpCode } from '@nestjs/common'
+
+import { SqlAgentService } from './sql-agent.service'
+
+@Controller('sqlAgent')
+export class SqlAgentController {
+  constructor(private readonly sqlAgentService: SqlAgentService) {}
+
+  @Post('chat')
+  @Header('Access-Control-Allow-Origin', '*')
+  @Header('Content-type', 'application/json')
+  @HttpCode(200)
+  async chat(@Body() { chat }) {
+    if (!chat) {
+      return {
+        code: -1,
+        msg: '请求成功',
+        data: '无当前对话'
+      }
+    }
+
+    const data = await this.sqlAgentService.chat(chat)
+    return {
+      code: 0,
+      msg: '请求成功',
+      data
+    }
+  }
+}

+ 10 - 0
src/module/sql-agent/sql-agent.module.ts

@@ -0,0 +1,10 @@
+import { Module } from '@nestjs/common'
+
+import { SqlAgentController } from './sql-agent.controller'
+import { SqlAgentService } from './sql-agent.service'
+
+@Module({
+  controllers: [SqlAgentController],
+  providers: [SqlAgentService]
+})
+export class SqlAgentModule {}

+ 12 - 0
src/module/sql-agent/sql-agent.service.ts

@@ -0,0 +1,12 @@
+import { Injectable } from '@nestjs/common'
+
+import { chatReason } from '@/module/service/lls.service'
+import { buildSqlPrompt } from '@/prompt/sql-prompt/sql'
+
+@Injectable()
+export class SqlAgentService {
+  async chat(chat: string) {
+    const chatPrompt = buildSqlPrompt(chat)
+    return await chatReason(JSON.stringify(chatPrompt))
+  }
+}

+ 0 - 0
src/proxy/tencent-cloud/tencent-cloud.controller.ts → src/module/tencent-cloud/tencent-cloud.controller.ts


+ 0 - 0
src/proxy/tencent-cloud/tencent-cloud.module.ts → src/module/tencent-cloud/tencent-cloud.module.ts


+ 2 - 6
src/proxy/tencent-cloud/tencent-cloud.service.ts → src/module/tencent-cloud/tencent-cloud.service.ts

@@ -1,11 +1,7 @@
 import { Injectable } from '@nestjs/common'
 
-import {
-  longTerm,
-  shortTerm,
-  conversation,
-  chatReason
-} from '@/utils/prompt.util'
+import { chatReason } from '@/module/service/lls.service'
+import { longTerm, shortTerm, conversation } from '@/utils/prompt.util'
 
 @Injectable()
 export class TencentCloudService {

+ 100 - 0
src/prompt/sql-prompt/query-examples.ts

@@ -0,0 +1,100 @@
+export default [
+  `
+  SELECT  a.advertiserid
+        ,sum(曝光次数) as 曝光次数
+        ,sum(点击次数) as 点击次数
+        ,SUM(目标转化量) AS 转化量
+        ,SUM(包装消耗次数) AS 收入
+FROM    (
+            SELECT  a.advertiserid
+                    ,d.advertiser_code
+                    ,d.name 
+                    ,COUNT(CASE    WHEN businesstype = 'adView' THEN a.pqtid END) AS 曝光次数
+                    ,COUNT(CASE    WHEN businesstype = 'adClick' THEN a.pqtid END) AS 点击次数
+                    ,COUNT(DISTINCT c.pqtid) AS 目标转化量
+                    ,SUM(t.packagecost) as 包装消耗次数
+            FROM    loghubods.ad_action_log_own a
+            LEFT JOIN loghubods.advertiser d
+            ON      a.advertiserid = d.id 
+            LEFT JOIN   (
+                            SELECT  DISTINCT pqtid
+                                    ,eventtype
+                            FROM    loghubods.ad_action_log_conv_flow
+                            WHERE   year || month || day = '\${bizdate}'
+                        ) c
+            ON      a.pqtid = c.pqtid
+
+            LEFT JOIN   (
+                            SELECT  ad_id
+                                    ,ad_code
+                                    ,targeting_conversion
+                            FROM    loghubods.ad_config
+                            WHERE   is_delete = 0
+                        ) g
+            ON      a.adid = g.ad_id
+            LEFT JOIN   (
+                            SELECT  pqtid
+                                    ,packageclick
+                                    ,packagecost
+                                    ,coefficient
+                            FROM    loghubods.ad_platform_package_cost_per5min
+                            WHERE   dt REGEXP '\${bizdate}'
+                        ) t
+            ON      a.pqtid = t.pqtid
+            and a.businesstype = 'adClick'
+            WHERE   a.dt = '\${bizdate}'
+            AND     a.ownadsystemtype = 'ownPlatform'
+            GROUP BY a.advertiserid
+                     ,d.advertiser_code
+                     ,d.name
+        ) a
+GROUP BY a.advertiserid
+  `,
+
+  `
+  SELECT  a.advertiserid
+                    ,d.advertiser_code
+                    ,d.name 
+                    ,COUNT(CASE    WHEN businesstype = 'adView' THEN a.pqtid END) AS 曝光次数 
+                    ,COUNT(CASE    WHEN businesstype = 'adClick' THEN a.pqtid END) AS 点击次数 
+                    ,sum(CASE    WHEN businesstype = 'adClick'
+                                        AND eventtype IS NOT NULL
+                                        AND eventtype = g.targeting_conversion THEN 1
+                                    WHEN eventtype IS NULL THEN 0
+                            END )AS 转化量
+                    ,sum(t.packagecost) as 收入
+            FROM    loghubods.ad_action_log_own a
+            LEFT JOIN loghubods.advertiser d
+            ON      a.advertiserid = d.id 
+            LEFT JOIN   (
+                            SELECT  DISTINCT pqtid
+                                    ,eventtype
+                            FROM    loghubods.ad_action_log_conv_flow
+                            WHERE   year || month || day = '\${bizdate}'
+                        ) c
+            ON      a.pqtid = c.pqtid
+            LEFT JOIN   (
+                            SELECT  ad_id
+                                    ,ad_code
+                                    ,targeting_conversion
+                            FROM    loghubods.ad_config
+                            WHERE   is_delete = 0
+                        ) g
+            ON      a.adid = g.ad_id
+            LEFT JOIN   (
+                            SELECT  pqtid
+                                    ,packageclick
+                                    ,packagecost
+                                    ,coefficient
+                            FROM    loghubods.ad_platform_package_cost_per5min
+                            WHERE   dt REGEXP '\${bizdate}'
+                        ) t
+            ON      a.pqtid = t.pqtid
+            and a.businesstype = 'adClick'
+            WHERE   a.dt = '\${bizdate}'
+            AND     a.ownadsystemtype = 'ownPlatform'
+            GROUP BY a.advertiserid
+                     ,d.advertiser_code
+                     ,d.name
+  `
+]

+ 158 - 0
src/prompt/sql-prompt/sql.ts

@@ -0,0 +1,158 @@
+import QUERY_EXAMPLES from './query-examples'
+import SCHEMA_DEFINITIONS from './table-base.json'
+
+export function buildSqlPrompt(input: string) {
+  // console.log(businessRuleEnhancement(JSON.stringify(QUERY_EXAMPLES)))
+  return {
+    prompt_id: 'bi_sql_phase_prompt_v2',
+    language: 'zh',
+    description:
+      '多阶段结构化自然语言转 SQL Copilot,包含用户意图解析 → 业务规则增强 → Hive SQL 构造。适配工程部署、支持大表结构与规则注入。',
+    role: {
+      title: '阿里云 BI SQL Copilot 专家',
+      description:
+        '你是一名熟悉自然语言解析与阿里云 Hive SQL 构造的专家,精通表结构、字段注释、广告业务指标,擅长将用户输入转为可执行 SQL 查询语句。你在执行任务时保持结构清晰、逻辑准确、语义对齐。'
+    },
+    phases: [
+      structuredIntentParsing(input, JSON.stringify(SCHEMA_DEFINITIONS)),
+      businessRuleEnhancement(JSON.stringify(QUERY_EXAMPLES)),
+      hiveSqlConstruction()
+    ],
+    output_description: '输出内容为 JSON 结构,包含 phase 1、2、3 的输出结果'
+  }
+}
+
+// 结构化意图解析
+function structuredIntentParsing(input: string, schemaDefinitions: string) {
+  return {
+    phase: 'Phase 1',
+    name: '结构化意图解析',
+    instruction:
+      '你需要从用户输入中识别查询目标、字段、时间、过滤等信息,形成结构化 JSON 结构。此阶必须参照 schema_definitions 产出结构',
+    workflow: [
+      '提取用户核心查询目标',
+      '识别涉及的字段和表结构',
+      '明确时间范围',
+      '建立字段与表之间的关系',
+      '构建字段、JOIN、聚合等意图结构'
+    ],
+    input: {
+      user_input: input,
+      schema_definitions: schemaDefinitions
+    },
+    output_format: {
+      query_goal: 'string',
+      time_range: 'string',
+      fields: [
+        {
+          name: 'string',
+          desc: 'string'
+        }
+      ],
+      joins: [
+        {
+          left: 'string',
+          right: 'string',
+          type: 'LEFT JOIN | INNER JOIN',
+          reason: 'string'
+        }
+      ],
+      filters: [
+        {
+          field: 'string',
+          op: '= | > | < | IN',
+          value: 'string'
+        }
+      ],
+      aggregations: [
+        {
+          func: 'SUM | COUNT | AVG',
+          field: 'string'
+        }
+      ],
+      group_by: ['string'],
+      biz_focus: ['string']
+    }
+  }
+}
+
+// 业务规则增强与案例匹配
+function businessRuleEnhancement(query_examples: string) {
+  return {
+    phase: 'Phase 2',
+    name: '业务规则增强与案例匹配',
+    instruction:
+      '你现在可以参考业务知识库与历史 SQL 案例,对意图结构进行增强与校正,仅在需要时激活使用。',
+    workflow: [
+      '检查 biz_focus 是否包含转化、ROI、实验组等关键词',
+      '匹配历史查询样例中的结构逻辑',
+      '补充字段表达、筛选、JOIN 逻辑',
+      '增强聚合与 group_by,避免缺失字段',
+      '记录字段增强来源(推理 or 规则)'
+    ],
+    input: {
+      intent_structure: '{{phase1_output}}',
+      query_examples
+    },
+    business_rules: [
+      {
+        name: '转化',
+        rule: '转化通常来讲是目标转化,ad_config 表中使用 targeting_conversion 字段表示广告主的目标转化类型,与ad_action_log_conv_flow 中的 eventtype字段对应。使用 COUNT(DISTINCT pqtid) 统计每个广告主的 唯一用户转化次数,避免同一用户多次行为重复计数。关联广告配置表**ad_config** 中的targeting_conversion字段,确保转化与广告目标匹配'
+      },
+      {
+        name: '收入计算',
+        rule: '收入数据来自ad_platform_package_cost_per5min表的packagecost字段,仅关联广告点击事件(businesstype = adClick),使用 SUM(packagecost) 累加每个广告主的总消耗金额,不结合包装系数计算'
+      }
+    ],
+    output_format: {
+      enhanced_fields: ['string'],
+      final_filters: ['string'],
+      final_joins: ['string'],
+      aggregations: ['string'],
+      group_by: ['string'],
+      notes: ['字段 A 推理自规则 X', 'JOIN B 来源于历史查询案例 Y']
+    }
+  }
+}
+
+// SQL 构造与输出
+function hiveSqlConstruction() {
+  return {
+    phase: 'Phase 3',
+    name: 'SQL 构造与输出',
+    instruction:
+      '你需要将结构化意图转化为可执行 SQL 查询语句,遵循在阿里云大数据平台运行的 Hive 语法规范,字段必须明确,结构清晰。',
+    role_hint: '你是结构意图驱动 SQL 构造器,输出需标准化、准确并包含注释。',
+    rules: [
+      '必须显式指定字段,禁止使用 SELECT *',
+      '若用户未指定时间范围,默认查询当天数据,并以注释注明',
+      'JOIN 查询必须显式声明表名、关联字段和 ON 条件',
+      '字段选择和 JOIN 逻辑可参考历史查询 case,如结构类似请合理继承',
+      '所有字段、表名必须基于输入提供的结构,不得杜撰',
+      '通过 GET_JSON_OBJECT 方法解析 JSON 字段',
+      '所有分区表的查询语句 必须显示表明时间范围限制'
+    ],
+    workflow: [
+      '提取 SELECT 字段(加别名与注释)',
+      '组合 FROM 与 JOIN 语句(含 ON 与 JOIN 类型)',
+      '添加时间与业务 WHERE 过滤条件',
+      '加入聚合函数与 group_by 逻辑(如适用)',
+      '最终组装为完整 Hive SQL 语句'
+    ],
+    input: {
+      enhanced_structure: '{{phase2_output}}'
+    },
+    output_format: {
+      sql: 'string'
+    },
+    constraints: [
+      '仅生成 Hive SQL 子集语法,确保兼容阿里云大数据查询引擎',
+      '禁止 无 WHERE 条件的全表扫描',
+      '禁止 无关联条件的 JOIN 查询',
+      '禁止生成任何 UPDATE、INSERT、DELETE,仅限 SELECT',
+      '不引入未提供的字段或表',
+      '不处理数据权限、治理、ETL 或调度相关逻辑',
+      '只返回一条最优 SQL 查询语句,不提供多个备选方案'
+    ]
+  }
+}

+ 168 - 0
src/prompt/sql-prompt/table-base.json

@@ -0,0 +1,168 @@
+{
+  "loghubods.creative": {
+    "description": "创意表",
+    "fields": {
+      "id": "主键id",
+      "ad_id": "所属广告Id",
+      "creative_code": "创意code/策略编码",
+      "creative_pattern": "创意样式(0上下分区 1单区域 2沉浸式)",
+      "position_id": "版位id",
+      "creative_title": "创意标题",
+      "material_type": "",
+      "material_address": "素材地址",
+      "creative_logo_address": "创意logo",
+      "click_button_text": "按钮文案",
+      "click_button_color": "按钮颜色(十六进制颜色)",
+      "click_button_effects": "按钮特效(0无特效,1呼吸特效)",
+      "landing_page_type": "落地页类型(1半屏小程序 2h5)",
+      "landing_page_appid": "落地页-小程序-appid",
+      "landing_page_address": "落地页地址",
+      "status": "状态(审核中,审核不通过,审核通过,可投放)",
+      "check_reason": "审核原因",
+      "is_delete": "是否删除(0正常 1删除)",
+      "create_user": "创建人",
+      "update_user": "更新人",
+      "create_time": "创建时间",
+      "update_time": "更新时间",
+      "creative_name": "创意名称",
+      "weight": "宽度",
+      "material_md5": ""
+    }
+  },
+  "loghubods.ad_ownplatform": {
+    "description": "广告表",
+    "fields": {
+      "id": "主键id",
+      "campaign_id": "所属投放计划Id",
+      "ad_code": "广告code/策略编码",
+      "ad_name": "广告名称",
+      "ad_pattern": "版位()",
+      "issue_date_from": "投放日期(开始)(空不限制)",
+      "issue_date_to": "投放日期(结束)(空不限制)",
+      "issue_time_from": "起始时间(空不限制)",
+      "issue_time_to": "结束时间(空不限制)",
+      "bid_type": "出价方式(0CPC 1CPM)",
+      "unit_price": "出价",
+      "day_amount_limit": "单日金额限制(空 不限制)",
+      "network_type": "网络类型(逗号分割)(空不限制)",
+      "phone_model": "机型(逗号分割)(空不限制)",
+      "territory": "地域(逗号分割)(空不限制)",
+      "status": "状态(未投放,启用中,删除,已结束)",
+      "is_delete": "是否删除(0正常 1删除)",
+      "create_user": "创建人",
+      "update_user": "更新人",
+      "create_time": "创建时间",
+      "update_time": "更新时间"
+    }
+  },
+  "loghubods.ad_action_log_own": {
+    "description": "广告相关日志",
+    "fields": {
+      "adcode": "广告code",
+      "adposition": "广告位置",
+      "adtype": "广告类型",
+      "apptype": "产品类型",
+      "appversioncode": "小程序版本",
+      "businesstype": "事件类型。adView:曝光,adClick:点击,adSelfLandingLoad:自建站加载,adSelfLandingView:自建站曝光,adSelfBlockToastView:自建站拦截弹窗展现,adSelfLandingUnload:自建站返回",
+      "clienttimestamp": "客户端时间戳",
+      "extparams": "扩展信息,存储实验组信息等,例如{eventInfos:{ab_test008:\"ab100\"}}。",
+      "headvideoid": "头部视频",
+      "hotsencetype": "热启动场景值",
+      "loginuid": "用户uid(同一个人多产品id相同)",
+      "machinecode": "用户id (同一个人不同产品id不同)",
+      "networktype": "网络类型,4G,5G,wifi等",
+      "pagesource": "页面路径",
+      "platform": "操作系统,ios,android等",
+      "requestid": "请求id",
+      "sencetype": "场景值",
+      "subsessionid": "用户会话id",
+      "videoid": "视频id",
+      "clientip": "用户ip",
+      "pqtid": "广告链路id",
+      "advertiserid": "广告主id",
+      "campaigncode": "计划code",
+      "campaignid": "计划id",
+      "creativecode": "创意code",
+      "id": "创意id",
+      "positionid": "位置id",
+      "adid": "广告id",
+      "advertisercode": "广告主code",
+      "dt": "分区字段,格式:yyyyMMdd"
+    }
+  },
+  "loghubods.ad_advertising_mapping": {
+    "description": "广告主 计划 广告 创意关系表",
+    "fields": {
+      "id": "主键id",
+      "agent_id": "代理商对应id",
+      "advertiser_id": "广告主id",
+      "campaign_id": "计划id",
+      "ad_id": "广告id",
+      "creative_id": "创意id",
+      "ad_status": "广告状态",
+      "is_delete": "是否删除(0正常 1删除)",
+      "create_user": "创建人",
+      "update_user": "更新人",
+      "create_time": "创建时间",
+      "update_time": "更新时间"
+    }
+  },
+  "loghubods.advertiser": {
+    "description": "广告主表",
+    "fields": {
+      "id": "主键id",
+      "name": "广告主名称",
+      "advertiser_code": "广告主code/策略编码",
+      "unit_price": "出价",
+      "is_delete": "是否删除(0正常 1删除)",
+      "create_user": "创建人",
+      "update_user": "更新人",
+      "create_time": "创建时间",
+      "update_time": "更新时间",
+      "profession": ""
+    }
+  },
+  "loghubods.ad_platform_package_cost_per5min": {
+    "description": "用户消耗表",
+    "fields": {
+      "apptype": "产品类型",
+      "clientip": "客户端ip",
+      "coefficient": "包装系数",
+      "creativecode": "创意code",
+      "extparams": "拓展字段",
+      "machinecode": "用户id,单产品唯一",
+      "packageclick": "包装点击",
+      "packagecost": "包装消耗",
+      "pqtid": "广告链路id",
+      "dt": "yyyyMMddhhmmss,例如:20250402163500"
+    }
+  },
+  "loghubods.ad_config": {
+    "description": "用户转化目标表",
+    "fields": {
+      "ad_id": "广告id",
+      "ad_code": "广告code",
+      "targeting_conversion": "转化目标值",
+      "is_delete": "是否删除(0正常 1删除)",
+      "create_user": "创建人",
+      "update_user": "更新人",
+      "create_time": "创建时间",
+      "update_time": "更新时间"
+    }
+  },
+  "loghubods.ad_action_log_conv_flow": {
+    "description": "用户转化行为日志",
+    "fields": {
+      "appid": "应用id",
+      "clientip": "客户端ip",
+      "eventtime": "事件时间",
+      "eventtype": "事件类型",
+      "extparams": "扩展字段",
+      "pqtid": "广告链路id",
+      "year": "modify year",
+      "month": "modify month",
+      "day": "modify day",
+      "hour": "modify hour"
+    }
+  }
+}

+ 0 - 50
src/utils/prompt.util.ts

@@ -1,30 +1,3 @@
-import * as dotenv from 'dotenv'
-import * as tencentcloud from 'tencentcloud-sdk-nodejs'
-
-const { secretId, secretKey } = dotenv.config({
-  path: ['.env.local', '.env']
-}).parsed
-
-console.log(process.env)
-
-// 引入对应产品版本的 Client
-const LkeapClient = tencentcloud.lkeap.v20240522.Client
-
-// 实例化客户端,传入认证信息、区域和网络配置
-const client = new LkeapClient({
-  credential: {
-    secretId,
-    secretKey
-  },
-  region: 'ap-guangzhou',
-  profile: {
-    httpProfile: {
-      endpoint: 'lkeap.tencentcloudapi.com',
-      reqTimeout: 40000
-    }
-  }
-})
-
 export function longTerm(historyProfile = '', chat, historyChat = '') {
   return `
     # 角色
@@ -193,26 +166,3 @@ export function conversation(profile, intente, chat, historyChat = '') {
       对话回复
   `
 }
-
-// question
-
-export function chatReason(Content): Promise<string> {
-  const params = {
-    Model: 'deepseek-r1',
-    Messages: [{ Role: 'user', Content }],
-    Stream: false
-  }
-
-  return new Promise((resolve, reject) => {
-    client.ChatCompletions(params, function (err, resp) {
-      if (err) {
-        reject(err)
-        return
-      }
-
-      const { Choices } = resp
-      const { Message } = Choices[0]
-      resolve(Message.Content)
-    })
-  })
-}

+ 2 - 0
tsconfig.json

@@ -17,6 +17,8 @@
     "strictBindCallApply": false,
     "forceConsistentCasingInFileNames": false,
     "noFallthroughCasesInSwitch": false,
+    "resolveJsonModule": true,
+    "esModuleInterop": true,
     "paths": {
       "@/*": ["src/*"]
     }