aiditApi.py 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213
  1. """
  2. @author: luojunhui
  3. 通过抓包 aigc 平台,自动化一些操作
  4. """
  5. import requests
  6. import json
  7. from applications.decoratorApi import retryOnTimeout
  8. from applications.denetMysql import DeNetMysql
  9. HEADERS = {
  10. 'Accept': 'application/json',
  11. 'Accept-Language': 'zh,zh-CN;q=0.9',
  12. 'Content-Type': 'application/json',
  13. 'Origin': 'http://admin.cybertogether.net',
  14. 'Proxy-Connection': 'keep-alive',
  15. 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36'
  16. }
  17. PERSON_COOKIE = {
  18. "token": "af54cdc404c3464d896745df389b2dce",
  19. "appType": 9,
  20. "platform": "pc",
  21. "appVersionCode": 1000,
  22. "clientTimestamp": 1,
  23. "fid": 1,
  24. "loginUid": 1,
  25. "pageSource": 1,
  26. "requestId": 1,
  27. "rid": 1,
  28. "uid": 1
  29. }
  30. def get_generated_article_title(generate_task_id):
  31. """
  32. 生成计划 id 获取该生成计划已经生成过的文章标题
  33. :param generate_task_id:
  34. :return: title_set
  35. """
  36. db = DeNetMysql()
  37. sql = f"""
  38. SELECT DISTINCT output.output
  39. FROM produce_plan_exe_record planExeRecord
  40. JOIN produce_plan_module_output output ON output.plan_exe_id = planExeRecord.plan_exe_id AND output.produce_module_type = 3
  41. WHERE planExeRecord.plan_id = '{generate_task_id}';
  42. """
  43. title_tuple = db.select(sql)
  44. title_set = set([i[0] for i in title_tuple])
  45. return title_set
  46. def get_publish_account_from_aigc():
  47. """
  48. 从 aigc 系统中获取正在发布的账号
  49. :return:
  50. name: 公众号名称
  51. gh_id: 公众号 gh_id
  52. follower_count: 粉丝数量
  53. service_type_info: '公众号类型:0-订阅号,1-由历史老账号升级后的订阅号,2-服务号',
  54. verify_type_info:'公众号认证类型:-1-未认证,0-微信认证,1-新浪微博认证,3-已资质认证通过但还未通过名称认证,4-已资质认证通过、还未通过名称认证,但通过了新浪微博认证'
  55. """
  56. db = DeNetMysql()
  57. sql = """
  58. SELECT DISTINCT
  59. t3.`name`,
  60. t3.gh_id,
  61. t3.follower_count,
  62. t3.create_timestamp,
  63. t4.service_type_info,
  64. t4.verify_type_info
  65. FROM
  66. publish_plan t1
  67. JOIN publish_plan_account t2 ON t1.id = t2.plan_id
  68. JOIN publish_account t3 ON t2.account_id = t3.id
  69. LEFT JOIN publish_account_wx_type t4 on t3.id = t4.account_id
  70. WHERE
  71. t1.plan_status = 1
  72. AND t3.channel = 5
  73. GROUP BY t3.id
  74. ORDER BY t3.create_timestamp DESC
  75. """
  76. info_tuple = db.select(sql)
  77. info_list = [
  78. {
  79. "name": line[0],
  80. "ghId": line[1],
  81. "follower_count": line[2],
  82. "account_init_timestamp": int(line[3] / 1000),
  83. "account_type": line[4],
  84. "account_auth": line[5]
  85. } for line in info_tuple
  86. ]
  87. return info_list
  88. def auto_create_crawler_task(plan_id, plan_name, plan_tag, url_list):
  89. """
  90. 通过 url 自动创建抓取计划
  91. :param plan_id: 计划 id, 若往已经存在的 plan_id 中加文章则需要传,否则会新生成一个 id
  92. :param plan_name: 计划名称
  93. :param plan_tag: 计划标签
  94. :param url_list: 输入的 url_list
  95. :return:
  96. """
  97. url = "http://aigc-api.cybertogether.net/aigc/crawler/plan/save"
  98. payload = json.dumps({
  99. "params": {
  100. "contentFilters": [],
  101. "accountFilters": [],
  102. "filterAccountMatchMode": 1,
  103. "filterContentMatchMode": 1,
  104. "selectModeValues": [],
  105. "searchModeValues": [],
  106. "contentModal": 3,
  107. "analyze": {},
  108. "crawlerComment": 0,
  109. "inputGroup": None,
  110. "inputSourceGroups": [],
  111. "modePublishTime": [],
  112. "planType": 2,
  113. "frequencyType": 2,
  114. "planTag": plan_tag,
  115. "tagPenetrateFlag": 0,
  116. "id": plan_id,
  117. "name": plan_name,
  118. "channel": 5,
  119. "crawlerMode": 5,
  120. "inputModeValues": url_list,
  121. "modePublishTimeStart": None,
  122. "modePublishTimeEnd": None,
  123. "executeRate": None,
  124. "executeDate": None,
  125. "executeWindowStart": None,
  126. "executeWindowEnd": None,
  127. "executeTimeInterval": None,
  128. "executeNum": None,
  129. "addModal": None,
  130. "addChannel": None,
  131. "fileUpload": None,
  132. "prompt": None,
  133. "acelFlag": None,
  134. "tasks": []
  135. },
  136. "baseInfo": PERSON_COOKIE
  137. })
  138. response = requests.request("POST", url, headers=HEADERS, data=payload)
  139. return response.json()
  140. def bind_crawler_task_to_generate_task(crawler_task_list, generate_task_id):
  141. """
  142. 将抓取计划绑定至生成计划
  143. 生成计划已经存在
  144. :crawler_task_list: 要输入的抓取计划List
  145. :generate_task_id: 目标生成计划 id
  146. :return: response
  147. """
  148. url = "http://aigc-api.cybertogether.net/aigc/produce/plan/save"
  149. plan_info = get_generate_task_detail(generate_task_id)
  150. input_source_groups = plan_info.get("inputSourceGroups")
  151. existed_crawler_task = input_source_groups[0].get("inputSources")
  152. new_task_list = existed_crawler_task + crawler_task_list
  153. input_source_group_0 = input_source_groups[0]
  154. input_source_group_0['inputSources'] = new_task_list
  155. payload = json.dumps({
  156. "params": {
  157. "contentFilters": [],
  158. "produceModal": plan_info.get("produceModal"),
  159. "inputModal": plan_info.get("inputModal"),
  160. "tasks": plan_info.get("tasks", []),
  161. "modules": [],
  162. "moduleGroups": plan_info.get("moduleGroups"),
  163. "inputSourceGroups": [input_source_group_0],
  164. "layoutType": plan_info.get("layoutType"),
  165. "activeManualReview": plan_info.get("activeManualReview"),
  166. "totalProduceNum": plan_info.get("totalProduceNum"),
  167. "dailyProduceNum": plan_info.get("dailyProduceNum"),
  168. "maxConcurrentNum": plan_info.get("maxConcurrentNum"),
  169. "id": generate_task_id,
  170. "name": plan_info.get("name"),
  171. "planTag": plan_info.get("planTag"),
  172. "tagPenetrateFlag": plan_info.get("tagPenetrateFlag"),
  173. "inputType": plan_info.get("inputType"),
  174. "inputChannel": plan_info.get("inputChannel"),
  175. "activeManualReviewCount": plan_info.get("activeManualReviewCount"),
  176. "autoComposite": plan_info.get("autoComposite")
  177. },
  178. "baseInfo": PERSON_COOKIE
  179. })
  180. response = requests.request("POST", url, headers=HEADERS, data=payload)
  181. return response
  182. @retryOnTimeout()
  183. def get_generate_task_detail(generate_task_id):
  184. """
  185. 通过生成计划的 id,获取该生成计划已有的抓取计划 list
  186. :param generate_task_id:
  187. :return:
  188. """
  189. url = "http://aigc-api.cybertogether.net/aigc/produce/plan/detail"
  190. payload = json.dumps({
  191. "params": {
  192. "id": generate_task_id
  193. },
  194. "baseInfo": PERSON_COOKIE
  195. })
  196. response = requests.request("POST", url, headers=HEADERS, data=payload, timeout=10)
  197. result = response.json()
  198. if result['msg'] == 'success':
  199. return result['data']
  200. else:
  201. return {}