Browse Source

feat: add hailuo i2v fl2v r2v

feitianbubu 3 months ago
parent
commit
d8dc8029c0

+ 9 - 64
relay/channel/task/hailuo/adaptor.go

@@ -22,6 +22,7 @@ import (
 	"github.com/QuantumNous/new-api/service"
 )
 
+// https://platform.minimaxi.com/docs/api-reference/video-generation-intro
 type TaskAdaptor struct {
 	ChannelType int
 	apiKey      string
@@ -84,7 +85,7 @@ func (a *TaskAdaptor) DoResponse(c *gin.Context, resp *http.Response, info *rela
 	}
 	_ = resp.Body.Close()
 
-	var hResp TextToVideoResponse
+	var hResp VideoResponse
 	if err := json.Unmarshal(responseBody, &hResp); err != nil {
 		taskErr = service.TaskErrorWrapper(errors.Wrapf(err, "body: %s", responseBody), "unmarshal_response_body_failed", http.StatusInternalServerError)
 		return
@@ -136,86 +137,28 @@ func (a *TaskAdaptor) GetChannelName() string {
 	return ChannelName
 }
 
-func (a *TaskAdaptor) convertToRequestPayload(req *relaycommon.TaskSubmitReq) (*TextToVideoRequest, error) {
+func (a *TaskAdaptor) convertToRequestPayload(req *relaycommon.TaskSubmitReq) (*VideoRequest, error) {
 	modelConfig := GetModelConfig(req.Model)
-	if !contains(ModelList, req.Model) {
-		return nil, fmt.Errorf("unsupported model: %s", req.Model)
-	}
-
 	duration := DefaultDuration
 	if req.Duration > 0 {
 		duration = req.Duration
 	}
-
-	if !containsInt(modelConfig.SupportedDurations, duration) {
-		return nil, fmt.Errorf("duration %d is not supported by model %s, supported durations: %v",
-			duration, req.Model, modelConfig.SupportedDurations)
-	}
-
 	resolution := modelConfig.DefaultResolution
 	if req.Size != "" {
 		resolution = a.parseResolutionFromSize(req.Size, modelConfig)
 	}
 
-	if !contains(modelConfig.SupportedResolutions, resolution) {
-		return nil, fmt.Errorf("resolution %s is not supported by model %s, supported resolutions: %v",
-			resolution, req.Model, modelConfig.SupportedResolutions)
-	}
-
-	hailuoReq := &TextToVideoRequest{
+	videoRequest := &VideoRequest{
 		Model:      req.Model,
 		Prompt:     req.Prompt,
 		Duration:   &duration,
 		Resolution: resolution,
 	}
-
-	promptOptimizer := DefaultPromptOptimizer
-	hailuoReq.PromptOptimizer = &promptOptimizer
-
-	metadata := req.Metadata
-	if metadata != nil {
-		metadataBytes, err := json.Marshal(metadata)
-		if err != nil {
-			return nil, errors.Wrap(err, "marshal metadata failed")
-		}
-
-		var metadataMap map[string]interface{}
-		if err := json.Unmarshal(metadataBytes, &metadataMap); err != nil {
-			return nil, errors.Wrap(err, "unmarshal metadata failed")
-		}
-
-		if val, exists := metadataMap["prompt_optimizer"]; exists {
-			if boolVal, ok := val.(bool); ok {
-				hailuoReq.PromptOptimizer = &boolVal
-			}
-		}
-
-		if modelConfig.HasFastPretreatment {
-			if val, exists := metadataMap["fast_pretreatment"]; exists {
-				if boolVal, ok := val.(bool); ok {
-					hailuoReq.FastPretreatment = &boolVal
-				}
-			}
-		}
-
-		if val, exists := metadataMap["callback_url"]; exists {
-			if strVal, ok := val.(string); ok {
-				hailuoReq.CallbackURL = strVal
-			}
-		}
-
-		if val, exists := metadataMap["aigc_watermark"]; exists {
-			if boolVal, ok := val.(bool); ok {
-				hailuoReq.AigcWatermark = &boolVal
-			}
-		}
-	}
-
-	if req.HasImage() {
-		return nil, fmt.Errorf("image input is not supported by hailuo video generation")
+	if err := req.UnmarshalMetadata(&videoRequest); err != nil {
+		return nil, errors.Wrap(err, "unmarshal metadata to video request failed")
 	}
 
-	return hailuoReq, nil
+	return videoRequest, nil
 }
 
 func (a *TaskAdaptor) parseResolutionFromSize(size string, modelConfig ModelConfig) string {
@@ -226,6 +169,8 @@ func (a *TaskAdaptor) parseResolutionFromSize(size string, modelConfig ModelConf
 		return Resolution768P
 	case strings.Contains(size, "720"):
 		return Resolution720P
+	case strings.Contains(size, "512"):
+		return Resolution512P
 	default:
 		return modelConfig.DefaultResolution
 	}

+ 8 - 3
relay/channel/task/hailuo/constants.go

@@ -6,9 +6,14 @@ const (
 
 var ModelList = []string{
 	"MiniMax-Hailuo-2.3",
+	"MiniMax-Hailuo-2.3-Fast",
 	"MiniMax-Hailuo-02",
 	"T2V-01-Director",
 	"T2V-01",
+	"I2V-01-Director",
+	"I2V-01-live",
+	"I2V-01",
+	"S2V-01",
 }
 
 const (
@@ -35,13 +40,13 @@ const (
 )
 
 const (
+	Resolution512P  = "512P"
 	Resolution720P  = "720P"
 	Resolution768P  = "768P"
 	Resolution1080P = "1080P"
 )
 
 const (
-	DefaultDuration        = 6
-	DefaultResolution      = Resolution768P
-	DefaultPromptOptimizer = true
+	DefaultDuration   = 6
+	DefaultResolution = Resolution720P
 )

+ 61 - 13
relay/channel/task/hailuo/models.go

@@ -1,17 +1,25 @@
 package hailuo
 
-type TextToVideoRequest struct {
-	Model            string `json:"model"`
-	Prompt           string `json:"prompt"`
-	PromptOptimizer  *bool  `json:"prompt_optimizer,omitempty"`
-	FastPretreatment *bool  `json:"fast_pretreatment,omitempty"`
-	Duration         *int   `json:"duration,omitempty"`
-	Resolution       string `json:"resolution,omitempty"`
-	CallbackURL      string `json:"callback_url,omitempty"`
-	AigcWatermark    *bool  `json:"aigc_watermark,omitempty"`
+type SubjectReference struct {
+	Type  string   `json:"type"`  // Subject type, currently only supports "character"
+	Image []string `json:"image"` // Array of subject reference images (currently only supports single image)
 }
 
-type TextToVideoResponse struct {
+type VideoRequest struct {
+	Model            string             `json:"model"`
+	Prompt           string             `json:"prompt,omitempty"`
+	PromptOptimizer  *bool              `json:"prompt_optimizer,omitempty"`
+	FastPretreatment *bool              `json:"fast_pretreatment,omitempty"`
+	Duration         *int               `json:"duration,omitempty"`
+	Resolution       string             `json:"resolution,omitempty"`
+	CallbackURL      string             `json:"callback_url,omitempty"`
+	AigcWatermark    *bool              `json:"aigc_watermark,omitempty"`
+	FirstFrameImage  string             `json:"first_frame_image,omitempty"` // For image-to-video and start-end-to-video
+	LastFrameImage   string             `json:"last_frame_image,omitempty"`  // For start-end-to-video
+	SubjectReference []SubjectReference `json:"subject_reference,omitempty"` // For subject-reference-to-video
+}
+
+type VideoResponse struct {
 	TaskID   string   `json:"task_id"`
 	BaseResp BaseResp `json:"base_resp"`
 }
@@ -81,11 +89,19 @@ func GetModelConfig(model string) ModelConfig {
 			HasPromptOptimizer:   true,
 			HasFastPretreatment:  true,
 		},
+		"MiniMax-Hailuo-2.3-Fast": {
+			Name:                 "MiniMax-Hailuo-2.3-Fast",
+			DefaultResolution:    Resolution768P,
+			SupportedDurations:   []int{6, 10},
+			SupportedResolutions: []string{Resolution768P, Resolution1080P},
+			HasPromptOptimizer:   true,
+			HasFastPretreatment:  true,
+		},
 		"MiniMax-Hailuo-02": {
 			Name:                 "MiniMax-Hailuo-02",
 			DefaultResolution:    Resolution768P,
 			SupportedDurations:   []int{6, 10},
-			SupportedResolutions: []string{Resolution768P, Resolution1080P},
+			SupportedResolutions: []string{Resolution512P, Resolution768P, Resolution1080P},
 			HasPromptOptimizer:   true,
 			HasFastPretreatment:  true,
 		},
@@ -105,6 +121,38 @@ func GetModelConfig(model string) ModelConfig {
 			HasPromptOptimizer:   true,
 			HasFastPretreatment:  false,
 		},
+		"I2V-01-Director": {
+			Name:                 "I2V-01-Director",
+			DefaultResolution:    Resolution720P,
+			SupportedDurations:   []int{6},
+			SupportedResolutions: []string{Resolution720P, Resolution1080P},
+			HasPromptOptimizer:   true,
+			HasFastPretreatment:  false,
+		},
+		"I2V-01-live": {
+			Name:                 "I2V-01-live",
+			DefaultResolution:    Resolution720P,
+			SupportedDurations:   []int{6},
+			SupportedResolutions: []string{Resolution720P, Resolution1080P},
+			HasPromptOptimizer:   true,
+			HasFastPretreatment:  false,
+		},
+		"I2V-01": {
+			Name:                 "I2V-01",
+			DefaultResolution:    Resolution720P,
+			SupportedDurations:   []int{6},
+			SupportedResolutions: []string{Resolution720P, Resolution1080P},
+			HasPromptOptimizer:   true,
+			HasFastPretreatment:  false,
+		},
+		"S2V-01": {
+			Name:                 "S2V-01",
+			DefaultResolution:    Resolution720P,
+			SupportedDurations:   []int{6},
+			SupportedResolutions: []string{Resolution720P},
+			HasPromptOptimizer:   true,
+			HasFastPretreatment:  false,
+		},
 	}
 
 	if config, exists := configs[model]; exists {
@@ -113,9 +161,9 @@ func GetModelConfig(model string) ModelConfig {
 
 	return ModelConfig{
 		Name:                 model,
-		DefaultResolution:    Resolution720P,
+		DefaultResolution:    DefaultResolution,
 		SupportedDurations:   []int{6},
-		SupportedResolutions: []string{Resolution720P},
+		SupportedResolutions: []string{DefaultResolution},
 		HasPromptOptimizer:   true,
 		HasFastPretreatment:  false,
 	}

+ 16 - 2
relay/common/relay_info.go

@@ -498,11 +498,11 @@ type TaskSubmitReq struct {
 	Metadata       map[string]interface{} `json:"metadata,omitempty"`
 }
 
-func (t TaskSubmitReq) GetPrompt() string {
+func (t *TaskSubmitReq) GetPrompt() string {
 	return t.Prompt
 }
 
-func (t TaskSubmitReq) HasImage() bool {
+func (t *TaskSubmitReq) HasImage() bool {
 	return len(t.Images) > 0
 }
 
@@ -537,6 +537,20 @@ func (t *TaskSubmitReq) UnmarshalJSON(data []byte) error {
 
 	return nil
 }
+func (t *TaskSubmitReq) UnmarshalMetadata(v any) error {
+	metadata := t.Metadata
+	if metadata != nil {
+		metadataBytes, err := json.Marshal(metadata)
+		if err != nil {
+			return fmt.Errorf("marshal metadata failed: %w", err)
+		}
+		err = json.Unmarshal(metadataBytes, v)
+		if err != nil {
+			return fmt.Errorf("unmarshal metadata to target failed: %w", err)
+		}
+	}
+	return nil
+}
 
 type TaskInfo struct {
 	Code             int    `json:"code"`