models.py 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. from typing import List, Dict, Optional, Set
  2. import json
  3. from dataclasses import dataclass, field
  4. import hashlib
  5. class FNV:
  6. INIT64 = int("cbf29ce484222325", 16)
  7. PRIME64 = int("100000001b3", 16)
  8. MOD64 = 2**64
  9. @staticmethod
  10. def fnv1_64(data: bytes) -> int:
  11. hash_value = FNV.INIT64
  12. for byte in data:
  13. hash_value = (hash_value * FNV.PRIME64) % FNV.MOD64
  14. hash_value = hash_value ^ byte
  15. return hash_value
  16. class DiversionBucket:
  17. def match(self, experiment_context):
  18. raise NotImplementedError("Subclasses must implement this method")
  19. class UidDiversionBucket(DiversionBucket):
  20. def __init__(self, total_buckets: int, buckets: str):
  21. self.total_buckets = total_buckets
  22. if buckets:
  23. self.buckets = set(map(int, buckets.split(",")))
  24. else:
  25. self.buckets = set()
  26. def match(self, experiment_context):
  27. uid_hash = int(experiment_context.uid)
  28. bucket = uid_hash % self.total_buckets
  29. # print(f"Matching UID {experiment_context.uid} with hash {uid_hash} to bucket {bucket} in {self.buckets}")
  30. return bucket in self.buckets
  31. class FilterDiversionBucket(DiversionBucket):
  32. def __init__(self, filter_condition: str):
  33. self.filter_condition = filter_condition
  34. def match(self, experiment_context):
  35. raise NotImplementedError("not implemented")
  36. class Feature:
  37. def __init__(self, params=None):
  38. self.params = params
  39. def init(self):
  40. # Initialize feature-specific logic
  41. pass
  42. class ExperimentContext:
  43. def __init__(self, uid=None, filter_params=None):
  44. self.uid = uid
  45. self.filter_params = filter_params or {}
  46. def __str__(self):
  47. return f"ExperimentContext(uid={self.uid}, filter_params={self.filter_params})"
  48. class Domain:
  49. def __init__(self, domain_id, name, flow: int, buckets: str, bucket_type: str, debug_crowd_ids=None, is_default_domain=False, exp_layer_id=None,
  50. debug_users=""):
  51. self.id = int(domain_id)
  52. self.name = name
  53. self.debug_crowd_ids = debug_crowd_ids
  54. self.is_default_domain = is_default_domain
  55. self.exp_layer_id = int(exp_layer_id) if exp_layer_id is not None else None
  56. self.features = []
  57. self.layers = []
  58. self.debug_users = debug_users
  59. self.flow = flow
  60. self.buckets = buckets
  61. self.diversion_bucket = None
  62. self.bucket_type = bucket_type
  63. self.debug_user_set = set()
  64. def add_debug_users(self, users: List[str]):
  65. self.debug_user_set.update(users)
  66. def match_debug_users(self, experiment_context):
  67. return experiment_context.uid in self.debug_user_set
  68. def add_feature(self, feature: Feature):
  69. self.features.append(feature)
  70. def add_layer(self, layer):
  71. self.layers.append(layer)
  72. def init(self):
  73. self.debug_user_set.update(self.debug_users.split(","))
  74. self.diversion_bucket = UidDiversionBucket(100, self.buckets)
  75. def match(self, experiment_context):
  76. if self.flow == 0:
  77. return False
  78. elif self.flow == 100:
  79. return True
  80. if self.diversion_bucket:
  81. return self.diversion_bucket.match(experiment_context)
  82. return False
  83. @dataclass
  84. class Layer:
  85. id: int
  86. name: str
  87. experiments: List['Experiment'] = field(default_factory=list)
  88. domains: List[Domain] = field(default_factory=list)
  89. def add_experiment(self, experiment):
  90. self.experiments.append(experiment)
  91. def add_domain(self, domain):
  92. self.domains.append(domain)
  93. @dataclass
  94. class Experiment:
  95. id: int
  96. flow: int
  97. crowd_ids: List[str]
  98. debug_users: str
  99. buckets: str
  100. filter_condition: str
  101. bucket_type: str = "Random"
  102. debug_user_set: Set[str] = field(default_factory=set)
  103. diversion_bucket: Optional[DiversionBucket] = None
  104. experiment_versions: List['ExperimentVersion'] = field(default_factory=list)
  105. def add_debug_users(self, users: List[str]):
  106. self.debug_user_set.update(users)
  107. def match_debug_users(self, experiment_context):
  108. return experiment_context.uid in self.debug_user_set
  109. def add_experiment_version(self, version):
  110. self.experiment_versions.append(version)
  111. def match(self, experiment_context: ExperimentContext) -> bool:
  112. if self.bucket_type == "Random":
  113. if self.flow == 0:
  114. return False
  115. elif self.flow == 100:
  116. return True
  117. if self.diversion_bucket:
  118. return self.diversion_bucket.match(experiment_context)
  119. return False
  120. def init(self):
  121. # 初始化 debug_user_map
  122. if self.debug_users:
  123. self.debug_user_set.update(self.debug_users.split(","))
  124. # 初始化 diversion_bucket
  125. if self.bucket_type == "Random": # ExpBucketTypeRand
  126. self.diversion_bucket = UidDiversionBucket(100, self.buckets)
  127. elif self.bucket_type == "Condition" and self.filter_condition: # ExpBucketTypeCond
  128. self.diversion_bucket = FilterDiversionBucket(self.filter_condition)
  129. class ExperimentVersion:
  130. def __init__(self, exp_version_id, flow, buckets: str, exp_version_name=None, debug_users: str = '',
  131. config=None, debug_crowd_ids=None):
  132. self.id = int(exp_version_id)
  133. self.exp_version_name = exp_version_name
  134. self.config = config
  135. self.debug_crowd_ids = debug_crowd_ids
  136. self.debug_users = debug_users
  137. self.params = {}
  138. self.flow = flow
  139. self.buckets = buckets
  140. self.debug_user_set = set()
  141. self.diversion_bucket = None
  142. def add_debug_users(self, users: List[str]):
  143. self.debug_user_set.update(users)
  144. def match_debug_users(self, experiment_context):
  145. return experiment_context.uid in self.debug_user_set
  146. def match(self, experiment_context: ExperimentContext):
  147. if self.flow == 0:
  148. return False
  149. elif self.flow == 100:
  150. return True
  151. if self.diversion_bucket:
  152. return self.diversion_bucket.match(experiment_context)
  153. return False
  154. def init(self):
  155. self.debug_user_set.update(self.debug_users.split(","))
  156. self.diversion_bucket = UidDiversionBucket(100, self.buckets)
  157. params = json.loads(self.config)
  158. for kv in params:
  159. self.params[kv['key']] = kv['value']
  160. class Project:
  161. def __init__(self, project_name=None, project_id=None):
  162. self.project_name = project_name
  163. self.id = int(project_id)
  164. self.domains = []
  165. self.layers = []
  166. self.default_domain : Optional[Domain] = None
  167. self.layer_map = {}
  168. self.domain_map = {}
  169. def add_domain(self, domain):
  170. self.domains.append(domain)
  171. self.domain_map[domain.id] = domain
  172. def add_layer(self, layer):
  173. self.layers.append(layer)
  174. self.layer_map[layer.id] = layer
  175. def set_default_domain(self, domain: Domain):
  176. self.default_domain = domain
  177. class ExperimentResult:
  178. def __init__(self, project=None, experiment_context=None, project_name=None):
  179. self.project = project
  180. self.experiment_context = experiment_context
  181. self.project_name = project_name
  182. self.params = {}
  183. self.experiment_versions = []
  184. def add_params(self, params: Dict[str, str]):
  185. self.params.update(params)
  186. def add_experiment_version(self, version):
  187. self.experiment_versions.append(version)
  188. def init(self):
  189. # Initialize result-specific logic
  190. pass
  191. def __str__(self):
  192. return f"ExperimentResult(project={self.project_name}, params={self.params}, experiment_context={self.experiment_context}, experiment_versions={self.experiment_versions})"