cache_ratio.go 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. package ratio_setting
  2. import (
  3. "encoding/json"
  4. "one-api/common"
  5. "sync"
  6. )
  7. var defaultCacheRatio = map[string]float64{
  8. "gpt-4": 0.5,
  9. "o1": 0.5,
  10. "o1-2024-12-17": 0.5,
  11. "o1-preview-2024-09-12": 0.5,
  12. "o1-preview": 0.5,
  13. "o1-mini-2024-09-12": 0.5,
  14. "o1-mini": 0.5,
  15. "o3-mini": 0.5,
  16. "o3-mini-2025-01-31": 0.5,
  17. "gpt-4o-2024-11-20": 0.5,
  18. "gpt-4o-2024-08-06": 0.5,
  19. "gpt-4o": 0.5,
  20. "gpt-4o-mini-2024-07-18": 0.5,
  21. "gpt-4o-mini": 0.5,
  22. "gpt-4o-realtime-preview": 0.5,
  23. "gpt-4o-mini-realtime-preview": 0.5,
  24. "gpt-4.5-preview": 0.5,
  25. "gpt-4.5-preview-2025-02-27": 0.5,
  26. "deepseek-chat": 0.25,
  27. "deepseek-reasoner": 0.25,
  28. "deepseek-coder": 0.25,
  29. "claude-3-sonnet-20240229": 0.1,
  30. "claude-3-opus-20240229": 0.1,
  31. "claude-3-haiku-20240307": 0.1,
  32. "claude-3-5-haiku-20241022": 0.1,
  33. "claude-3-5-sonnet-20240620": 0.1,
  34. "claude-3-5-sonnet-20241022": 0.1,
  35. "claude-3-7-sonnet-20250219": 0.1,
  36. "claude-3-7-sonnet-20250219-thinking": 0.1,
  37. "claude-sonnet-4-20250514": 0.1,
  38. "claude-sonnet-4-20250514-thinking": 0.1,
  39. "claude-opus-4-20250514": 0.1,
  40. "claude-opus-4-20250514-thinking": 0.1,
  41. "claude-opus-4-1-20250805": 0.1,
  42. "claude-opus-4-1-20250805-thinking": 0.1,
  43. }
  44. var defaultCreateCacheRatio = map[string]float64{
  45. "claude-3-sonnet-20240229": 1.25,
  46. "claude-3-opus-20240229": 1.25,
  47. "claude-3-haiku-20240307": 1.25,
  48. "claude-3-5-haiku-20241022": 1.25,
  49. "claude-3-5-sonnet-20240620": 1.25,
  50. "claude-3-5-sonnet-20241022": 1.25,
  51. "claude-3-7-sonnet-20250219": 1.25,
  52. "claude-3-7-sonnet-20250219-thinking": 1.25,
  53. "claude-sonnet-4-20250514": 1.25,
  54. "claude-sonnet-4-20250514-thinking": 1.25,
  55. "claude-opus-4-20250514": 1.25,
  56. "claude-opus-4-20250514-thinking": 1.25,
  57. "claude-opus-4-1-20250805": 1.25,
  58. "claude-opus-4-1-20250805-thinking": 1.25,
  59. }
  60. //var defaultCreateCacheRatio = map[string]float64{}
  61. var cacheRatioMap map[string]float64
  62. var cacheRatioMapMutex sync.RWMutex
  63. // GetCacheRatioMap returns the cache ratio map
  64. func GetCacheRatioMap() map[string]float64 {
  65. cacheRatioMapMutex.RLock()
  66. defer cacheRatioMapMutex.RUnlock()
  67. return cacheRatioMap
  68. }
  69. // CacheRatio2JSONString converts the cache ratio map to a JSON string
  70. func CacheRatio2JSONString() string {
  71. cacheRatioMapMutex.RLock()
  72. defer cacheRatioMapMutex.RUnlock()
  73. jsonBytes, err := json.Marshal(cacheRatioMap)
  74. if err != nil {
  75. common.SysError("error marshalling cache ratio: " + err.Error())
  76. }
  77. return string(jsonBytes)
  78. }
  79. // UpdateCacheRatioByJSONString updates the cache ratio map from a JSON string
  80. func UpdateCacheRatioByJSONString(jsonStr string) error {
  81. cacheRatioMapMutex.Lock()
  82. defer cacheRatioMapMutex.Unlock()
  83. cacheRatioMap = make(map[string]float64)
  84. err := json.Unmarshal([]byte(jsonStr), &cacheRatioMap)
  85. if err == nil {
  86. InvalidateExposedDataCache()
  87. }
  88. return err
  89. }
  90. // GetCacheRatio returns the cache ratio for a model
  91. func GetCacheRatio(name string) (float64, bool) {
  92. cacheRatioMapMutex.RLock()
  93. defer cacheRatioMapMutex.RUnlock()
  94. ratio, ok := cacheRatioMap[name]
  95. if !ok {
  96. return 1, false // Default to 1 if not found
  97. }
  98. return ratio, true
  99. }
  100. func GetCreateCacheRatio(name string) (float64, bool) {
  101. ratio, ok := defaultCreateCacheRatio[name]
  102. if !ok {
  103. return 1.25, false // Default to 1.25 if not found
  104. }
  105. return ratio, true
  106. }
  107. func GetCacheRatioCopy() map[string]float64 {
  108. cacheRatioMapMutex.RLock()
  109. defer cacheRatioMapMutex.RUnlock()
  110. copyMap := make(map[string]float64, len(cacheRatioMap))
  111. for k, v := range cacheRatioMap {
  112. copyMap[k] = v
  113. }
  114. return copyMap
  115. }