aiditApi.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444
  1. """
  2. @author: luojunhui
  3. 通过抓包 aigc 平台,自动化一些操作
  4. """
  5. import requests
  6. import json
  7. from applications.decoratorApi import retryOnTimeout
  8. from applications.denetMysql import DeNetMysql
  9. from config import apolloConfig
  10. config = apolloConfig()
  11. backup_gzh_account_name = json.loads(config.getConfigValue("backup_gzh_account_name"))
  12. HEADERS = {
  13. 'Accept': 'application/json',
  14. 'Accept-Language': 'zh,zh-CN;q=0.9',
  15. 'Content-Type': 'application/json',
  16. 'Origin': 'http://admin.cybertogether.net',
  17. 'Proxy-Connection': 'keep-alive',
  18. 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36'
  19. }
  20. PERSON_COOKIE = {
  21. "token": "af54cdc404c3464d896745df389b2dce",
  22. "appType": 9,
  23. "platform": "pc",
  24. "appVersionCode": 1000,
  25. "clientTimestamp": 1,
  26. "fid": 1,
  27. "loginUid": 1,
  28. "pageSource": 1,
  29. "requestId": 1,
  30. "rid": 1,
  31. "uid": 1
  32. }
  33. def get_generated_article_list(plan_id):
  34. """
  35. 自动生成计划 id 获取该生成计划已经生成过的文章列表
  36. :param plan_id:
  37. :return:
  38. """
  39. db = DeNetMysql()
  40. sql = f"""
  41. SELECT
  42. account.wx_gh,
  43. content.title,
  44. content.content_link,
  45. content.view_count,
  46. content.like_count,
  47. from_unixtime(cprr.create_timestamp / 1000) AS 抓取时间,
  48. from_unixtime(content.publish_timestamp / 1000) AS 发布时间
  49. FROM crawler_plan_result_rel cprr
  50. JOIN crawler_plan plan ON cprr.plan_id = plan.id
  51. JOIN crawler_content content ON cprr.channel_source_id = content.channel_content_id
  52. JOIN crawler_account account ON content.channel_account_id = account.channel_account_id
  53. WHERE plan_id IN (
  54. SELECT
  55. input_source_value
  56. FROM
  57. produce_plan_input_source
  58. WHERE plan_id = '{plan_id}'
  59. );
  60. """
  61. article_list = db.select(sql)
  62. return article_list
  63. def get_generated_article_title(generate_task_id):
  64. """
  65. 生成计划 id 获取该生成计划已经生成过的文章标题
  66. :param generate_task_id:
  67. :return: title_set
  68. """
  69. db = DeNetMysql()
  70. sql = f"""
  71. SELECT DISTINCT output.output
  72. FROM produce_plan_exe_record planExeRecord
  73. JOIN produce_plan_module_output output ON output.plan_exe_id = planExeRecord.plan_exe_id AND output.produce_module_type = 3
  74. WHERE planExeRecord.plan_id = '{generate_task_id}';
  75. """
  76. title_tuple = db.select(sql)
  77. title_set = set([i[0] for i in title_tuple])
  78. return title_set
  79. def get_publish_account_from_aigc():
  80. """
  81. 从 aigc 系统中获取正在发布的账号
  82. :return:
  83. name: 公众号名称
  84. gh_id: 公众号 gh_id
  85. follower_count: 粉丝数量
  86. service_type_info: '公众号类型:0-订阅号,1-由历史老账号升级后的订阅号,2-服务号',
  87. verify_type_info:'公众号认证类型:-1-未认证,0-微信认证,1-新浪微博认证,3-已资质认证通过但还未通过名称认证,4-已资质认证通过、还未通过名称认证,但通过了新浪微博认证'
  88. """
  89. db = DeNetMysql()
  90. sql = """
  91. SELECT DISTINCT
  92. t3.`name`,
  93. t3.gh_id,
  94. t3.follower_count,
  95. t3.create_timestamp,
  96. t4.service_type_info,
  97. t4.verify_type_info,
  98. t3.id,
  99. group_concat(distinct t5.remark) as account_remark
  100. FROM
  101. publish_plan t1
  102. JOIN publish_plan_account t2 ON t1.id = t2.plan_id
  103. JOIN publish_account t3 ON t2.account_id = t3.id
  104. LEFT JOIN publish_account_wx_type t4 on t3.id = t4.account_id
  105. LEFT JOIN publish_account_remark t5 on t3.id = t5.publish_account_id
  106. WHERE
  107. t1.plan_status = 1
  108. AND t3.channel = 5
  109. GROUP BY t3.id;
  110. """
  111. info_tuple = db.select(sql)
  112. info_list = [
  113. {
  114. "name": line[0] if line[0] else backup_gzh_account_name.get(line[0], line[1]),
  115. "ghId": line[1],
  116. "follower_count": line[2],
  117. "account_init_timestamp": int(line[3]),
  118. "account_type": line[4],
  119. "account_auth": line[5],
  120. "account_id": line[6]
  121. } for line in info_tuple if '自动回复' not in str(line[7])
  122. ]
  123. return info_list
  124. def auto_create_crawler_task(plan_id, plan_name, plan_tag, url_list, article_source):
  125. """
  126. 通过 url 自动创建抓取计划
  127. :param plan_id: 计划 id, 若往已经存在的 plan_id 中加文章则需要传,否则会新生成一个 id
  128. :param plan_name: 计划名称
  129. :param plan_tag: 计划标签
  130. :param url_list: 输入的 url_list
  131. :param article_source: 文章来源
  132. :return:
  133. """
  134. match article_source:
  135. case "toutiao":
  136. channel = 6
  137. case "weixin":
  138. channel = 5
  139. case _:
  140. return
  141. url = "http://aigc-api.cybertogether.net/aigc/crawler/plan/save"
  142. payload = json.dumps({
  143. "params": {
  144. "contentFilters": [],
  145. "accountFilters": [],
  146. "filterAccountMatchMode": 1,
  147. "filterContentMatchMode": 1,
  148. "selectModeValues": [],
  149. "searchModeValues": [],
  150. "contentModal": 3,
  151. "analyze": {},
  152. "crawlerComment": 0,
  153. "inputGroup": None,
  154. "inputSourceGroups": [],
  155. "modePublishTime": [],
  156. "planType": 2,
  157. "frequencyType": 2,
  158. "planTag": plan_tag,
  159. "tagPenetrateFlag": 0,
  160. "id": plan_id,
  161. "name": plan_name,
  162. "channel": channel,
  163. "crawlerMode": 5,
  164. "inputModeValues": url_list,
  165. "modePublishTimeStart": None,
  166. "modePublishTimeEnd": None,
  167. "executeRate": None,
  168. "executeDate": None,
  169. "executeWindowStart": None,
  170. "executeWindowEnd": None,
  171. "executeTimeInterval": None,
  172. "executeNum": None,
  173. "addModal": None,
  174. "addChannel": None,
  175. "fileUpload": None,
  176. "prompt": None,
  177. "acelFlag": None,
  178. "tasks": []
  179. },
  180. "baseInfo": PERSON_COOKIE
  181. })
  182. response = requests.request("POST", url, headers=HEADERS, data=payload)
  183. return response.json()
  184. def bind_crawler_task_to_generate_task(crawler_task_list, generate_task_id):
  185. """
  186. 将抓取计划绑定至生成计划
  187. 生成计划已经存在
  188. :crawler_task_list: 要输入的抓取计划List
  189. :generate_task_id: 目标生成计划 id
  190. :article_source: 账号类型
  191. :return: response
  192. """
  193. url = "http://aigc-api.cybertogether.net/aigc/produce/plan/save"
  194. plan_info = get_generate_task_detail(generate_task_id)
  195. input_source_groups = plan_info.get("inputSourceGroups")
  196. existed_crawler_task = input_source_groups[0].get("inputSources")
  197. new_task_list = existed_crawler_task + crawler_task_list
  198. input_source_group_0 = input_source_groups[0]
  199. input_source_group_0['inputSources'] = new_task_list
  200. payload = json.dumps({
  201. "params": {
  202. "contentFilters": [],
  203. "produceModal": plan_info.get("produceModal"),
  204. "inputModal": plan_info.get("inputModal"),
  205. "tasks": plan_info.get("tasks", []),
  206. "modules": [],
  207. "moduleGroups": plan_info.get("moduleGroups"),
  208. "inputSourceGroups": [input_source_group_0],
  209. "layoutType": plan_info.get("layoutType"),
  210. "activeManualReview": plan_info.get("activeManualReview"),
  211. "totalProduceNum": plan_info.get("totalProduceNum"),
  212. "dailyProduceNum": plan_info.get("dailyProduceNum"),
  213. "maxConcurrentNum": plan_info.get("maxConcurrentNum"),
  214. "id": generate_task_id,
  215. "name": plan_info.get("name"),
  216. "planTag": plan_info.get("planTag"),
  217. "tagPenetrateFlag": plan_info.get("tagPenetrateFlag"),
  218. "inputType": plan_info.get("inputType"),
  219. "inputChannel": plan_info.get("inputChannel"),
  220. "activeManualReviewCount": plan_info.get("activeManualReviewCount"),
  221. "autoComposite": plan_info.get("autoComposite")
  222. },
  223. "baseInfo": PERSON_COOKIE
  224. })
  225. response = requests.request("POST", url, headers=HEADERS, data=payload)
  226. return response.json()
  227. @retryOnTimeout()
  228. def get_generate_task_detail(generate_task_id):
  229. """
  230. 通过生成计划的 id,获取该生成计划已有的抓取计划 list
  231. :param generate_task_id:
  232. :return:
  233. """
  234. url = "http://aigc-api.cybertogether.net/aigc/produce/plan/detail"
  235. payload = json.dumps({
  236. "params": {
  237. "id": generate_task_id
  238. },
  239. "baseInfo": PERSON_COOKIE
  240. })
  241. response = requests.request("POST", url, headers=HEADERS, data=payload, timeout=10)
  242. result = response.json()
  243. if result['msg'] == 'success':
  244. return result['data']
  245. else:
  246. return {}
  247. @retryOnTimeout()
  248. def get_publish_task_detail(publish_task_id):
  249. """
  250. 通过发布计划的 id,获取该发布计划已有的抓取计划 list
  251. :param publish_task_id:
  252. :param generate_task_id:
  253. :return:
  254. """
  255. url = "http://aigc-api.cybertogether.net/aigc/publish/plan/detail"
  256. payload = json.dumps({
  257. "params": {
  258. "id": publish_task_id
  259. },
  260. "baseInfo": PERSON_COOKIE
  261. })
  262. response = requests.request("POST", url, headers=HEADERS, data=payload)
  263. return response.json()
  264. def bind_crawler_task_to_publish_task(target_publish_task_id, crawler_task_name, crawler_task_id):
  265. """
  266. 将抓取计划绑定至发布计划
  267. 发布计划已经存在
  268. :param crawler_task_id: 抓取计划ID
  269. :param crawler_task_name: 抓取计划名称
  270. :param target_publish_task_id: 目标发布计划 id
  271. :return: response
  272. """
  273. publish_task_detail = get_publish_task_detail(target_publish_task_id)
  274. publish_task_detail_data = publish_task_detail.get("data")
  275. already_exist_crawler_task_list = publish_task_detail_data.get("inputGroups")[0].get("inputSources")
  276. new_crawler_task_list = [
  277. {
  278. "sourceCategory": 1,
  279. "inputSourceValueType": 1,
  280. "inputSourceValue": crawler_task_id,
  281. "inputSourceLabel": crawler_task_name
  282. }
  283. ]
  284. new_input_source_group = already_exist_crawler_task_list + new_crawler_task_list
  285. if publish_task_detail_data:
  286. url = "http://aigc-api.cybertogether.net/aigc/publish/plan/save"
  287. payload = json.dumps({
  288. "params": {
  289. "accountIds": [i['id'] for i in publish_task_detail_data.get("accountIds")],
  290. "inputGroups": [
  291. {
  292. "groupId": "e40cd06daeb5345ed26256c8744f7a33",
  293. "groupName": None,
  294. "channel": None,
  295. "contentModal": None,
  296. "groupIndex": 1,
  297. "filterMatchMode": 2,
  298. "inputSources": new_input_source_group,
  299. "inputFilters": [],
  300. "inputOrders": [],
  301. "label": "input1"
  302. }
  303. ],
  304. "inputSources": [],
  305. "inputFilters": [],
  306. "activeManualReview": publish_task_detail_data.get("activeManualReview"),
  307. "channel": publish_task_detail_data.get("channel"),
  308. "contentAllocationRules": publish_task_detail_data.get("contentAllocationRules"),
  309. "contentModal": publish_task_detail_data.get("contentModal"),
  310. "contentSortingRules": publish_task_detail_data.get("contentSortingRules"),
  311. "douyinPublishAccoutSetting": publish_task_detail_data.get("douyinPublishAccoutSetting"),
  312. "filterMatchMode": 1,
  313. "name": publish_task_detail_data.get("name"),
  314. "publishAccoutJson": "",
  315. "publishBgmType": publish_task_detail_data.get("publishBgmType"),
  316. "publishDate": publish_task_detail_data.get("publishDate"),
  317. "publishLocation": publish_task_detail_data.get("publishLocation"),
  318. "publishNum": publish_task_detail_data.get("publishNum"),
  319. "publishPushTime": publish_task_detail_data.get("publishPushTime"),
  320. "publishRate": publish_task_detail_data.get("publishRate"),
  321. "publishTimeInterval": publish_task_detail_data.get("publishTimeInterval"),
  322. "publishWindowEnd": publish_task_detail_data.get("publishWindowEnd"),
  323. "publishWindowStart": publish_task_detail_data.get("publishWindowStart"),
  324. "wxContentInsert": publish_task_detail_data.get("wxContentInsert"),
  325. "wxVideoPublishAccountSetting": publish_task_detail_data.get("wxVideoPublishAccountSetting"),
  326. "scoreJudgeFlag": publish_task_detail_data.get("scoreJudgeFlag"),
  327. "scoreJudgeTasks": publish_task_detail_data.get("scoreJudgeTasks"),
  328. "machineReviewMatchMode": publish_task_detail_data.get("machineReviewMatchMode"),
  329. "id": publish_task_detail_data.get("id"),
  330. "planType": publish_task_detail_data.get("planType"),
  331. "planTag": publish_task_detail_data.get("planTag"),
  332. "tagPenetrateFlag": publish_task_detail_data.get("tagPenetrateFlag"),
  333. "actionObjects": publish_task_detail_data.get("actionObjects"),
  334. "actionContents": publish_task_detail_data.get("actionContents"),
  335. "accountFrom": publish_task_detail_data.get("accountFrom"),
  336. "actionContentAllocationRule": publish_task_detail_data.get("actionContentAllocationRule"),
  337. "publishPerNum": publish_task_detail_data.get("publishPerNum"),
  338. "publishPerMinNum": publish_task_detail_data.get("publishPerMinNum"),
  339. "pushType": publish_task_detail_data.get("pushType"),
  340. "triggerEvent": publish_task_detail_data.get("triggerEvent"),
  341. "pushContentSortingRules": publish_task_detail_data.get("pushContentSortingRules"),
  342. "biliDistrict": publish_task_detail_data.get("biliDistrict"),
  343. "firstItemScoreJudgeTaskId": publish_task_detail_data.get("firstItemScoreJudgeTaskId"),
  344. "secondItemScoreJudgeTaskId": publish_task_detail_data.get("secondItemScoreJudgeTaskId"),
  345. "otherItemScoreJudgeTaskId": publish_task_detail_data.get("otherItemScoreJudgeTaskId"),
  346. "gzhArticleSortFlag": publish_task_detail_data.get("gzhArticleSortFlag"),
  347. "gzhArticleSortTask": publish_task_detail_data.get("gzhArticleSortTask"),
  348. "miniprogramInsertFlag": publish_task_detail_data.get("miniprogramInsertFlag"),
  349. "miniprogramInsertTasks": publish_task_detail_data.get("miniprogramInsertTasks"),
  350. "machineReviewConditions": publish_task_detail_data.get("machineReviewConditions"),
  351. "gzhTriggerSyncFrequency": publish_task_detail_data.get("gzhTriggerSyncFrequency"),
  352. "gzhTriggerSendContentType": publish_task_detail_data.get("gzhTriggerSendContentType"),
  353. "longArticleSystemHost": publish_task_detail_data.get("longArticleSystemHost"),
  354. },
  355. "baseInfo": PERSON_COOKIE
  356. })
  357. response = requests.request("POST", url, headers=HEADERS, data=payload)
  358. print(response.json())
  359. else:
  360. return
  361. def delete_articles(gh_id, title):
  362. """
  363. 删除公众号文章
  364. :param gh_id:
  365. :param title:
  366. :return:
  367. """
  368. url = "http://101.37.174.139:80/articleAudit/titleDangerFindDelete"
  369. payload = {
  370. "ghId": gh_id,
  371. 'title': title
  372. }
  373. headers = {
  374. 'Content-Type': 'application/json;charset=UTF-8'
  375. }
  376. response = requests.request("POST", url, headers=headers, json=payload, timeout=600)
  377. return response
  378. def get_only_auto_reply_accounts():
  379. """
  380. 获取即转的账号
  381. """
  382. sql = "select publish_account_id from publish_account_remark where remark like '%即转%';"
  383. denet = DeNetMysql()
  384. result = denet.select(sql)
  385. account_id_list = [i[0] for i in result]
  386. return set(account_id_list)
  387. def auto_create_single_video_crawler_task(plan_name, plan_tag, video_id_list):
  388. url = "http://aigc-api.cybertogether.net/aigc/crawler/plan/save"
  389. payload = json.dumps({
  390. "params": {
  391. "contentFilters": [],
  392. "accountFilters": [],
  393. "filterAccountMatchMode": 1,
  394. "filterContentMatchMode": 1,
  395. "selectModeValues": [],
  396. "searchModeValues": [],
  397. "contentModal": 4,
  398. "analyze": {},
  399. "crawlerComment": 0,
  400. "inputGroup": [],
  401. "inputSourceGroups": [],
  402. "modePublishTime": [],
  403. "name": plan_name,
  404. "frequencyType": 2,
  405. "channel": 10,
  406. "crawlerMode": 5,
  407. "planTag": plan_tag,
  408. "voiceExtractFlag": 1,
  409. "srtExtractFlag": 1,
  410. "videoKeyFrameType": 1,
  411. "inputModeValues": video_id_list,
  412. "planType": 2
  413. },
  414. "baseInfo": PERSON_COOKIE
  415. })
  416. response = requests.request("POST", url, headers=HEADERS, data=payload)
  417. return response.json()