from openai import OpenAI from ..schemas.base import DataResponse, BusinessLicensePayload from ..core.config import get_settings from ..core.logger import get_logger from openai.types.chat import ChatCompletionToolParam import json settings = get_settings() logger = get_logger("understand_image_provider") FIELD_LABELS_ZH = { "company_name": "公司名称", "unified_social_credit_code": "统一社会信用代码", "legal_representative": "法定代表人", "business_address": "住所/经营场所", } SYSTEM_PROMPT = """ <角色> 你是一名资深广告文案专家。你的任务是根据输入的一张广告图片中的文字内容，生成一句简洁有力的广告文案。 <受众> 目标用户：50岁以上中老年人。语言需亲切、直白、易理解，避免专业术语与复杂长句。 <生成逻辑> 1. 首先对用户提供的广告图片进行OCR识别，提取出所有文字信息。 2. 文案生成必须严格基于识别到的文本内容，不得编造图片中不存在的优惠、动作指令或承诺。 3. 若图片中没有明确的转化行为，则默认使用“长按二维码”作为行动指令；若无法区分是加群还是加微信，默认使用“加群”，但若文案中明确表示或可推断出“加微”则准确体现。 <结构公式> [行动指令]，[低门槛/优惠承诺]，[核心价值/具体收益]；[紧迫感/稀缺性提醒] <转化策略> <条目>文案必须以「[行动指令] + [低门槛/优惠承诺]」连续开头，但允许等义表述，以提升多样性。 <条目>可接受的等义表述示例：行动指令（任选其一，按素材出现）：“长按二维码”“扫码二维码”“识别二维码”“长按识别”；低门槛/优惠承诺（任选其一，按素材出现）：“0元入群”“免费进群”“0元加入”“限时免费加入”“0元加微”。 <条目>若仅满足其中之一（真实存在且与素材一致），则仅前置该项；严禁捏造另一项。 <条目>若两项均不在素材中出现，则改用图片中存在的真实动作入口与优惠描述（如“点击查看”“立即预约”“限时立减”等）。 <条目>上述两要素需置于句首，越靠前越好；其后再写核心收益与稀缺提醒。 <约束> 1. 文案必须准确传达广告图片中的产品/服务信息，不得杜撰不存在的内容。 2. 加入紧迫感或稀缺性（如“限时”“名额有限”“马上领取”等），但不得虚构或夸大事实。 3. 避免医疗或功效的绝对化/保证性用语（如“治愈”“根治”“无副作用”“永久有效”）。 4. 不得包含违法、虚假、低俗、敏感、歧视性内容，不引导危险行为，不传播迷信。 5. 涉及健康/养生场景时，表述应为辅助/改善/建议性质，不承诺疗效。 6. 仅输出一句中文广告文案，简短醒目，适合作为宣传主标题。 7. 标点与短句分隔：动作、优惠承诺、核心收益之间用逗号分隔；紧迫感/稀缺性提醒用分号与前半部分隔开；全句≤50字（含标点）。 <示例 few-shot="true"> 长按二维码，0元入群，领取中医调理养生建议；名额有限，赶快行动吧 <示例 few-shot="true"> 扫码二维码，免费进群，获取控糖日常饮食要点；数量有限，立即参加！ <示例 few-shot="true"> 识别二维码，0元加入，领取适合中老年人的养生课程；限时开放，先到先得！ <示例 few-shot="true"> 长按二维码，0元加微，获取声乐老师在线辅导；名额有限，尽快添加！ <自检> 在输出前自检：是否基于图片文字生成；是否以「行动指令 + 低门槛/优惠承诺」连续开头（或在素材不支持时作真实替换）；是否遵循标点与长度；是否包含真实的稀缺/紧迫提醒；是否合规且未承诺疗效。 <输出要求> 仅输出生成的一句广告文案，不要附加解释或其他信息。 """ BUSINESS_LICENSE_SYSTEM_PROMPT = """ <角色> 你是一名企业证照信息提取助手，负责从中国大陆营业执照图片中准确提取关键字段信息，并判断是否需要人工复核。 <字段定义> <字段> <名称>company_name <中文名称>公司名称 <描述>公司名称（位于营业执照上“名称”字段） <字段> <名称>unified_social_credit_code <中文名称>统一社会信用代码 <描述> 统一社会信用代码（位于营业执照左上角），包括了18位的主体内容，如有后缀内容（会以括号形式展示），须全部提取。 <字段> <名称>legal_representative <中文名称>法定代表人 <描述>法定代表人（营业执照上“法定代表人”字段） <字段> <名称>business_address <中文名称>住所/经营场所 <描述>住所（营业执照上“住所”字段，若无则使用“经营场所”） <字段> <名称>need_manual_review <描述> 是否需要人工复核（布尔值）。当识别结果不符合规则，存在异常时，设为 true，否则为 false。 <字段> <名称>inaccurate_fields <描述> 可能识别不准确的字段key数组。仅允许以下值： "company_name"、"unified_social_credit_code"、"legal_representative"、"business_address"。当 need_manual_review 为 false 时必须返回 []。 <字段> <名称>inaccurate_fields_zh <中文名称>可能不准确字段（中文） <描述> 可能识别不准确的字段中文名称数组。仅允许以下值： "公司名称"、"统一社会信用代码"、"法定代表人"、"住所/经营场所"。当 need_manual_review 为 false 时必须返回 []。 <约束> <规则>1. 所有字段必须仅根据图像中可见内容提取，禁止补全、猜测或逻辑推断。 <规则>2. unified_social_credit_code： a) 如有后缀，须完整保留括号后缀（如“(1-1)”）； b) 主体必须为18位字符，若不足18位或含有明显识别错误，应设 need_manual_review 为 true； <规则>3. 若无法识别某字段内容，应输出空字符串 ""，不要用 null 或其他占位符。 <规则>4. 所有字段输出必须为 JSON 格式结构，字段命名需与定义一致，不含解释性文字或多余内容。 <规则>5. 当 need_manual_review=true 时，inaccurate_fields 必须给出至少一个可能不准确字段。 <输出格式> { "company_name": "", "unified_social_credit_code": "", "legal_representative": "", "business_address": "", "need_manual_review": false|true, "inaccurate_fields": [], "inaccurate_fields_zh": [] } <输入说明> 输入是一张中国大陆营业执照图片，请依据图像内容提取字段并输出结构化结果。如识别不全，标记为需人工复核。 """ copywriting_tools: list[ChatCompletionToolParam] = [ { "type": "function", "function": { "name": "generate_ocr_text", "description": "生成一句适合中老年用户的广告文案（遵循结构公式与约束）", "parameters": { "type": "object", "properties": { "ocr_text": { "type": "string", "description": "最终的一句广告文案（中文，简短醒目，合规）" } }, "required": ["ocr_text"], "additionalProperties": False } } } ] business_license_tools: list[ChatCompletionToolParam] = [ { "type": "function", "function": { "name": "extract_business_license_fields", "description": "从营业执照提取公司名称、统一社会信用代码、法定代表人、住所/经营场所，并标记是否需要人工复核", "parameters": { "type": "object", "properties": { "company_name": { "type": "string", "description": "公司名称" }, "unified_social_credit_code": { "type": "string", "description": "统一社会信用代码，包含括号后缀" }, "legal_representative": { "type": "string", "description": "法定代表人" }, "business_address": { "type": "string", "description": "住所/经营场所（优先使用“住所”字段）" }, "need_manual_review": { "type": "boolean", "description": "是否需要人工复核。当统一社会信用代码主体不足18位或识别异常时应为 true" }, "inaccurate_fields": { "type": "array", "description": "可能识别不准确的字段名列表；当 need_manual_review 为 false 时返回空数组", "items": { "type": "string", "enum": [ "company_name", "unified_social_credit_code", "legal_representative", "business_address" ] } } }, "required": [ "company_name", "unified_social_credit_code", "legal_representative", "business_address", "need_manual_review", "inaccurate_fields" ], "additionalProperties": False } } } ] class UnderstandImageProvider: print("UnderstandImageProvider called") def _create_client(self) -> OpenAI: return OpenAI( api_key = settings.dashscope_api_key or "", base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", ) def understand_image(self, image_url: str, *, model: str) -> DataResponse: client = self._create_client() if not client: logger.error("OpenAI client is not initialized.") return DataResponse(code=1, data=None, msg=f"OpenAI client is not initialized") completion = client.chat.completions.create( model=model, messages=[ {"role": "system", "content": SYSTEM_PROMPT}, { "role": "user", "content": [{ "type": "image_url", "image_url": { "url": image_url } }], }, ], tools=copywriting_tools, tool_choice={ "type": "function", "function": {"name": "generate_ocr_text"} }, temperature=0.5 ) msg = completion.choices[0].message # Safely parse tool call arguments (if any) ocr_text = "" try: tool_calls = getattr(msg, "tool_calls", None) or [] if tool_calls: call = tool_calls[0] arg_str = getattr(getattr(call, "function", None), "arguments", None) if isinstance(arg_str, str) and arg_str.strip(): args = json.loads(arg_str) if isinstance(args, dict): ocr_text = str(args.get("ocr_text", "")).strip() except Exception as e: logger.error("parse tool call failed: %s", e, exc_info=True) return DataResponse(code=1, data=None, msg=f"parse tool call failed: {e}") # Fallback: if no tool-calls returned, try to read text content content = getattr(msg, "content", None) if not ocr_text and isinstance(content, str): ocr_text = content.strip() print("✅ OCR_TEXT:\n", ocr_text) return DataResponse(code=0, data=ocr_text, msg="success") def extract_business_license(self, image_url: str, *, model: str) -> DataResponse: client = self._create_client() if not client: logger.error("OpenAI client is not initialized.") return DataResponse(code=1, data=None, msg="OpenAI client is not initialized") completion = client.chat.completions.create( model=model, messages=[ {"role": "system", "content": BUSINESS_LICENSE_SYSTEM_PROMPT}, { "role": "user", "content": [{ "type": "image_url", "image_url": { "url": image_url } }], }, ], tools=business_license_tools, tool_choice={ "type": "function", "function": {"name": "extract_business_license_fields"} }, temperature=0.2 ) msg = completion.choices[0].message payload = BusinessLicensePayload( company_name="", unified_social_credit_code="", legal_representative="", business_address="", need_manual_review=False, inaccurate_fields=[], inaccurate_fields_zh=[], ) try: tool_calls = getattr(msg, "tool_calls", None) or [] if tool_calls: call = tool_calls[0] arg_str = getattr(getattr(call, "function", None), "arguments", None) if isinstance(arg_str, str) and arg_str.strip(): args = json.loads(arg_str) if isinstance(args, dict): allowed_fields = { "company_name", "unified_social_credit_code", "legal_representative", "business_address", } raw_inaccurate_fields = args.get("inaccurate_fields", []) inaccurate_fields: list[str] = [] if isinstance(raw_inaccurate_fields, list): inaccurate_fields = [ str(field).strip() for field in raw_inaccurate_fields if str(field).strip() in allowed_fields ] need_manual_review = bool(args.get("need_manual_review", False)) company_name = str(args.get("company_name", "")).strip() unified_social_credit_code = str(args.get("unified_social_credit_code", "")).strip() legal_representative = str(args.get("legal_representative", "")).strip() business_address = str(args.get("business_address", "")).strip() # Fallback for model omissions: if marked for review but no fields provided, infer likely problematic ones. if need_manual_review and not inaccurate_fields: if not company_name: inaccurate_fields.append("company_name") if not unified_social_credit_code or len(unified_social_credit_code) < 18: inaccurate_fields.append("unified_social_credit_code") if not legal_representative: inaccurate_fields.append("legal_representative") if not business_address: inaccurate_fields.append("business_address") if not inaccurate_fields: inaccurate_fields.append("unified_social_credit_code") if not need_manual_review: inaccurate_fields = [] inaccurate_fields_zh = [ FIELD_LABELS_ZH[field] for field in inaccurate_fields if field in FIELD_LABELS_ZH ] payload = BusinessLicensePayload( company_name=company_name, unified_social_credit_code=unified_social_credit_code, legal_representative=legal_representative, business_address=business_address, need_manual_review=need_manual_review, inaccurate_fields=inaccurate_fields, inaccurate_fields_zh=inaccurate_fields_zh, ) except Exception as e: logger.error("parse business license tool call failed: %s", e, exc_info=True) return DataResponse(code=1, data=None, msg=f"parse tool call failed: {e}") return DataResponse(code=0, data=payload, msg="success")