aiditApi.py 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238
  1. """
  2. @author: luojunhui
  3. 通过抓包 aigc 平台,自动化一些操作
  4. """
  5. import requests
  6. import json
  7. from applications.decoratorApi import retryOnTimeout
  8. from applications.denetMysql import DeNetMysql
  9. HEADERS = {
  10. 'Accept': 'application/json',
  11. 'Accept-Language': 'zh,zh-CN;q=0.9',
  12. 'Content-Type': 'application/json',
  13. 'Origin': 'http://admin.cybertogether.net',
  14. 'Proxy-Connection': 'keep-alive',
  15. 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36'
  16. }
  17. PERSON_COOKIE = {
  18. "token": "af54cdc404c3464d896745df389b2dce",
  19. "appType": 9,
  20. "platform": "pc",
  21. "appVersionCode": 1000,
  22. "clientTimestamp": 1,
  23. "fid": 1,
  24. "loginUid": 1,
  25. "pageSource": 1,
  26. "requestId": 1,
  27. "rid": 1,
  28. "uid": 1
  29. }
  30. def get_generated_article_list(plan_id):
  31. db = DeNetMysql()
  32. sql = f"""
  33. SELECT
  34. account.wx_gh,
  35. content.title,
  36. content.content_link,
  37. content.view_count,
  38. content.like_count,
  39. from_unixtime(cprr.create_timestamp / 1000) AS 抓取时间,
  40. from_unixtime(content.publish_timestamp / 1000) AS 发布时间
  41. FROM crawler_plan_result_rel cprr
  42. JOIN crawler_plan plan ON cprr.plan_id = plan.id
  43. JOIN crawler_content content ON cprr.channel_source_id = content.channel_content_id
  44. JOIN crawler_account account ON content.channel_account_id = account.channel_account_id
  45. WHERE plan_id IN (
  46. SELECT
  47. input_source_value
  48. FROM
  49. produce_plan_input_source
  50. WHERE plan_id = '{plan_id}'
  51. );
  52. """
  53. article_list = db.select(sql)
  54. return article_list
  55. def get_generated_article_title(generate_task_id):
  56. """
  57. 生成计划 id 获取该生成计划已经生成过的文章标题
  58. :param generate_task_id:
  59. :return: title_set
  60. """
  61. db = DeNetMysql()
  62. sql = f"""
  63. SELECT DISTINCT output.output
  64. FROM produce_plan_exe_record planExeRecord
  65. JOIN produce_plan_module_output output ON output.plan_exe_id = planExeRecord.plan_exe_id AND output.produce_module_type = 3
  66. WHERE planExeRecord.plan_id = '{generate_task_id}';
  67. """
  68. title_tuple = db.select(sql)
  69. title_set = set([i[0] for i in title_tuple])
  70. return title_set
  71. def get_publish_account_from_aigc():
  72. """
  73. 从 aigc 系统中获取正在发布的账号
  74. :return:
  75. name: 公众号名称
  76. gh_id: 公众号 gh_id
  77. follower_count: 粉丝数量
  78. service_type_info: '公众号类型:0-订阅号,1-由历史老账号升级后的订阅号,2-服务号',
  79. verify_type_info:'公众号认证类型:-1-未认证,0-微信认证,1-新浪微博认证,3-已资质认证通过但还未通过名称认证,4-已资质认证通过、还未通过名称认证,但通过了新浪微博认证'
  80. """
  81. db = DeNetMysql()
  82. sql = """
  83. SELECT DISTINCT
  84. t3.`name`,
  85. t3.gh_id,
  86. t3.follower_count,
  87. t3.create_timestamp,
  88. t4.service_type_info,
  89. t4.verify_type_info
  90. FROM
  91. publish_plan t1
  92. JOIN publish_plan_account t2 ON t1.id = t2.plan_id
  93. JOIN publish_account t3 ON t2.account_id = t3.id
  94. LEFT JOIN publish_account_wx_type t4 on t3.id = t4.account_id
  95. WHERE
  96. t1.plan_status = 1
  97. AND t3.channel = 5
  98. GROUP BY t3.id
  99. ORDER BY t3.create_timestamp DESC
  100. """
  101. info_tuple = db.select(sql)
  102. info_list = [
  103. {
  104. "name": line[0],
  105. "ghId": line[1],
  106. "follower_count": line[2],
  107. "account_init_timestamp": int(line[3] / 1000),
  108. "account_type": line[4],
  109. "account_auth": line[5]
  110. } for line in info_tuple
  111. ]
  112. return info_list
  113. def auto_create_crawler_task(plan_id, plan_name, plan_tag, url_list):
  114. """
  115. 通过 url 自动创建抓取计划
  116. :param plan_id: 计划 id, 若往已经存在的 plan_id 中加文章则需要传,否则会新生成一个 id
  117. :param plan_name: 计划名称
  118. :param plan_tag: 计划标签
  119. :param url_list: 输入的 url_list
  120. :return:
  121. """
  122. url = "http://aigc-api.cybertogether.net/aigc/crawler/plan/save"
  123. payload = json.dumps({
  124. "params": {
  125. "contentFilters": [],
  126. "accountFilters": [],
  127. "filterAccountMatchMode": 1,
  128. "filterContentMatchMode": 1,
  129. "selectModeValues": [],
  130. "searchModeValues": [],
  131. "contentModal": 3,
  132. "analyze": {},
  133. "crawlerComment": 0,
  134. "inputGroup": None,
  135. "inputSourceGroups": [],
  136. "modePublishTime": [],
  137. "planType": 2,
  138. "frequencyType": 2,
  139. "planTag": plan_tag,
  140. "tagPenetrateFlag": 0,
  141. "id": plan_id,
  142. "name": plan_name,
  143. "channel": 5,
  144. "crawlerMode": 5,
  145. "inputModeValues": url_list,
  146. "modePublishTimeStart": None,
  147. "modePublishTimeEnd": None,
  148. "executeRate": None,
  149. "executeDate": None,
  150. "executeWindowStart": None,
  151. "executeWindowEnd": None,
  152. "executeTimeInterval": None,
  153. "executeNum": None,
  154. "addModal": None,
  155. "addChannel": None,
  156. "fileUpload": None,
  157. "prompt": None,
  158. "acelFlag": None,
  159. "tasks": []
  160. },
  161. "baseInfo": PERSON_COOKIE
  162. })
  163. response = requests.request("POST", url, headers=HEADERS, data=payload)
  164. return response.json()
  165. def bind_crawler_task_to_generate_task(crawler_task_list, generate_task_id):
  166. """
  167. 将抓取计划绑定至生成计划
  168. 生成计划已经存在
  169. :crawler_task_list: 要输入的抓取计划List
  170. :generate_task_id: 目标生成计划 id
  171. :return: response
  172. """
  173. url = "http://aigc-api.cybertogether.net/aigc/produce/plan/save"
  174. plan_info = get_generate_task_detail(generate_task_id)
  175. input_source_groups = plan_info.get("inputSourceGroups")
  176. existed_crawler_task = input_source_groups[0].get("inputSources")
  177. new_task_list = existed_crawler_task + crawler_task_list
  178. input_source_group_0 = input_source_groups[0]
  179. input_source_group_0['inputSources'] = new_task_list
  180. payload = json.dumps({
  181. "params": {
  182. "contentFilters": [],
  183. "produceModal": plan_info.get("produceModal"),
  184. "inputModal": plan_info.get("inputModal"),
  185. "tasks": plan_info.get("tasks", []),
  186. "modules": [],
  187. "moduleGroups": plan_info.get("moduleGroups"),
  188. "inputSourceGroups": [input_source_group_0],
  189. "layoutType": plan_info.get("layoutType"),
  190. "activeManualReview": plan_info.get("activeManualReview"),
  191. "totalProduceNum": plan_info.get("totalProduceNum"),
  192. "dailyProduceNum": plan_info.get("dailyProduceNum"),
  193. "maxConcurrentNum": plan_info.get("maxConcurrentNum"),
  194. "id": generate_task_id,
  195. "name": plan_info.get("name"),
  196. "planTag": plan_info.get("planTag"),
  197. "tagPenetrateFlag": plan_info.get("tagPenetrateFlag"),
  198. "inputType": plan_info.get("inputType"),
  199. "inputChannel": plan_info.get("inputChannel"),
  200. "activeManualReviewCount": plan_info.get("activeManualReviewCount"),
  201. "autoComposite": plan_info.get("autoComposite")
  202. },
  203. "baseInfo": PERSON_COOKIE
  204. })
  205. response = requests.request("POST", url, headers=HEADERS, data=payload)
  206. return response.json()
  207. @retryOnTimeout()
  208. def get_generate_task_detail(generate_task_id):
  209. """
  210. 通过生成计划的 id,获取该生成计划已有的抓取计划 list
  211. :param generate_task_id:
  212. :return:
  213. """
  214. url = "http://aigc-api.cybertogether.net/aigc/produce/plan/detail"
  215. payload = json.dumps({
  216. "params": {
  217. "id": generate_task_id
  218. },
  219. "baseInfo": PERSON_COOKIE
  220. })
  221. response = requests.request("POST", url, headers=HEADERS, data=payload, timeout=10)
  222. result = response.json()
  223. if result['msg'] == 'success':
  224. return result['data']
  225. else:
  226. return {}