cache_ratio.go 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. package ratio_setting
  2. import (
  3. "encoding/json"
  4. "sync"
  5. "github.com/QuantumNous/new-api/common"
  6. )
  7. var defaultCacheRatio = map[string]float64{
  8. "gemini-3-flash-preview": 0.25,
  9. "gemini-3-pro-preview": 0.25,
  10. "gpt-4": 0.5,
  11. "o1": 0.5,
  12. "o1-2024-12-17": 0.5,
  13. "o1-preview-2024-09-12": 0.5,
  14. "o1-preview": 0.5,
  15. "o1-mini-2024-09-12": 0.5,
  16. "o1-mini": 0.5,
  17. "o3-mini": 0.5,
  18. "o3-mini-2025-01-31": 0.5,
  19. "gpt-4o-2024-11-20": 0.5,
  20. "gpt-4o-2024-08-06": 0.5,
  21. "gpt-4o": 0.5,
  22. "gpt-4o-mini-2024-07-18": 0.5,
  23. "gpt-4o-mini": 0.5,
  24. "gpt-4o-realtime-preview": 0.5,
  25. "gpt-4o-mini-realtime-preview": 0.5,
  26. "gpt-4.5-preview": 0.5,
  27. "gpt-4.5-preview-2025-02-27": 0.5,
  28. "gpt-4.1": 0.25,
  29. "gpt-4.1-mini": 0.25,
  30. "gpt-4.1-nano": 0.25,
  31. "gpt-5": 0.1,
  32. "gpt-5-2025-08-07": 0.1,
  33. "gpt-5-chat-latest": 0.1,
  34. "gpt-5-mini": 0.1,
  35. "gpt-5-mini-2025-08-07": 0.1,
  36. "gpt-5-nano": 0.1,
  37. "gpt-5-nano-2025-08-07": 0.1,
  38. "deepseek-chat": 0.25,
  39. "deepseek-reasoner": 0.25,
  40. "deepseek-coder": 0.25,
  41. "claude-3-sonnet-20240229": 0.1,
  42. "claude-3-opus-20240229": 0.1,
  43. "claude-3-haiku-20240307": 0.1,
  44. "claude-3-5-haiku-20241022": 0.1,
  45. "claude-haiku-4-5-20251001": 0.1,
  46. "claude-3-5-sonnet-20240620": 0.1,
  47. "claude-3-5-sonnet-20241022": 0.1,
  48. "claude-3-7-sonnet-20250219": 0.1,
  49. "claude-3-7-sonnet-20250219-thinking": 0.1,
  50. "claude-sonnet-4-20250514": 0.1,
  51. "claude-sonnet-4-20250514-thinking": 0.1,
  52. "claude-opus-4-20250514": 0.1,
  53. "claude-opus-4-20250514-thinking": 0.1,
  54. "claude-opus-4-1-20250805": 0.1,
  55. "claude-opus-4-1-20250805-thinking": 0.1,
  56. "claude-sonnet-4-5-20250929": 0.1,
  57. "claude-sonnet-4-5-20250929-thinking": 0.1,
  58. "claude-opus-4-5-20251101": 0.1,
  59. "claude-opus-4-5-20251101-thinking": 0.1,
  60. "claude-opus-4-6": 0.1,
  61. "claude-opus-4-6-thinking": 0.1,
  62. "claude-opus-4-6-max": 0.1,
  63. "claude-opus-4-6-high": 0.1,
  64. "claude-opus-4-6-medium": 0.1,
  65. "claude-opus-4-6-low": 0.1,
  66. }
  67. var defaultCreateCacheRatio = map[string]float64{
  68. "claude-3-sonnet-20240229": 1.25,
  69. "claude-3-opus-20240229": 1.25,
  70. "claude-3-haiku-20240307": 1.25,
  71. "claude-3-5-haiku-20241022": 1.25,
  72. "claude-haiku-4-5-20251001": 1.25,
  73. "claude-3-5-sonnet-20240620": 1.25,
  74. "claude-3-5-sonnet-20241022": 1.25,
  75. "claude-3-7-sonnet-20250219": 1.25,
  76. "claude-3-7-sonnet-20250219-thinking": 1.25,
  77. "claude-sonnet-4-20250514": 1.25,
  78. "claude-sonnet-4-20250514-thinking": 1.25,
  79. "claude-opus-4-20250514": 1.25,
  80. "claude-opus-4-20250514-thinking": 1.25,
  81. "claude-opus-4-1-20250805": 1.25,
  82. "claude-opus-4-1-20250805-thinking": 1.25,
  83. "claude-sonnet-4-5-20250929": 1.25,
  84. "claude-sonnet-4-5-20250929-thinking": 1.25,
  85. "claude-opus-4-5-20251101": 1.25,
  86. "claude-opus-4-5-20251101-thinking": 1.25,
  87. "claude-opus-4-6": 1.25,
  88. "claude-opus-4-6-thinking": 1.25,
  89. "claude-opus-4-6-max": 1.25,
  90. "claude-opus-4-6-high": 1.25,
  91. "claude-opus-4-6-medium": 1.25,
  92. "claude-opus-4-6-low": 1.25,
  93. }
  94. //var defaultCreateCacheRatio = map[string]float64{}
  95. var cacheRatioMap map[string]float64
  96. var cacheRatioMapMutex sync.RWMutex
  97. var createCacheRatioMap map[string]float64
  98. var createCacheRatioMapMutex sync.RWMutex
  99. // GetCacheRatioMap returns the cache ratio map
  100. func GetCacheRatioMap() map[string]float64 {
  101. cacheRatioMapMutex.RLock()
  102. defer cacheRatioMapMutex.RUnlock()
  103. return cacheRatioMap
  104. }
  105. // CacheRatio2JSONString converts the cache ratio map to a JSON string
  106. func CacheRatio2JSONString() string {
  107. cacheRatioMapMutex.RLock()
  108. defer cacheRatioMapMutex.RUnlock()
  109. jsonBytes, err := json.Marshal(cacheRatioMap)
  110. if err != nil {
  111. common.SysLog("error marshalling cache ratio: " + err.Error())
  112. }
  113. return string(jsonBytes)
  114. }
  115. // CreateCacheRatio2JSONString converts the create cache ratio map to a JSON string
  116. func CreateCacheRatio2JSONString() string {
  117. createCacheRatioMapMutex.RLock()
  118. defer createCacheRatioMapMutex.RUnlock()
  119. jsonBytes, err := json.Marshal(createCacheRatioMap)
  120. if err != nil {
  121. common.SysLog("error marshalling create cache ratio: " + err.Error())
  122. }
  123. return string(jsonBytes)
  124. }
  125. // UpdateCacheRatioByJSONString updates the cache ratio map from a JSON string
  126. func UpdateCacheRatioByJSONString(jsonStr string) error {
  127. cacheRatioMapMutex.Lock()
  128. defer cacheRatioMapMutex.Unlock()
  129. cacheRatioMap = make(map[string]float64)
  130. err := json.Unmarshal([]byte(jsonStr), &cacheRatioMap)
  131. if err == nil {
  132. InvalidateExposedDataCache()
  133. }
  134. return err
  135. }
  136. // UpdateCreateCacheRatioByJSONString updates the create cache ratio map from a JSON string
  137. func UpdateCreateCacheRatioByJSONString(jsonStr string) error {
  138. createCacheRatioMapMutex.Lock()
  139. defer createCacheRatioMapMutex.Unlock()
  140. createCacheRatioMap = make(map[string]float64)
  141. err := json.Unmarshal([]byte(jsonStr), &createCacheRatioMap)
  142. if err == nil {
  143. InvalidateExposedDataCache()
  144. }
  145. return err
  146. }
  147. // GetCacheRatio returns the cache ratio for a model
  148. func GetCacheRatio(name string) (float64, bool) {
  149. cacheRatioMapMutex.RLock()
  150. defer cacheRatioMapMutex.RUnlock()
  151. ratio, ok := cacheRatioMap[name]
  152. if !ok {
  153. return 1, false // Default to 1 if not found
  154. }
  155. return ratio, true
  156. }
  157. func GetCreateCacheRatio(name string) (float64, bool) {
  158. createCacheRatioMapMutex.RLock()
  159. defer createCacheRatioMapMutex.RUnlock()
  160. ratio, ok := createCacheRatioMap[name]
  161. if !ok {
  162. return 1.25, false // Default to 1.25 if not found
  163. }
  164. return ratio, true
  165. }
  166. func GetCacheRatioCopy() map[string]float64 {
  167. cacheRatioMapMutex.RLock()
  168. defer cacheRatioMapMutex.RUnlock()
  169. copyMap := make(map[string]float64, len(cacheRatioMap))
  170. for k, v := range cacheRatioMap {
  171. copyMap[k] = v
  172. }
  173. return copyMap
  174. }
  175. func GetCreateCacheRatioCopy() map[string]float64 {
  176. createCacheRatioMapMutex.RLock()
  177. defer createCacheRatioMapMutex.RUnlock()
  178. copyMap := make(map[string]float64, len(createCacheRatioMap))
  179. for k, v := range createCacheRatioMap {
  180. copyMap[k] = v
  181. }
  182. return copyMap
  183. }