123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186 |
- import json
- from typing import Optional, Dict, List, TypedDict
- from applications.utils import AsyncHttpClient
- HEADERS = {
- "Accept": "application/json",
- "Accept-Language": "zh,zh-CN;q=0.9",
- "Content-Type": "application/json",
- "Origin": "http://admin.cybertogether.net",
- "Proxy-Connection": "keep-alive",
- "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
- }
- PERSON_COOKIE = {
- "token": "af54cdc404c3464d896745df389b2dce",
- "appType": 9,
- "platform": "pc",
- "appVersionCode": 1000,
- "clientTimestamp": 1,
- "fid": 1,
- "loginUid": 1,
- "pageSource": 1,
- "requestId": 1,
- "rid": 1,
- "uid": 1,
- }
- class RelationDict(TypedDict):
- videoPoolTraceId: str
- channelContentId: str
- platform: str
- async def delete_illegal_gzh_articles(gh_id: str, title: str):
- """
- Delete illegal gzh articles
- :param gh_id: gzh id
- :param title: article title
- """
- url = "http://101.37.174.139:80/articleAudit/titleDangerFindDelete"
- payload = {
- "title": title,
- "ghId": gh_id,
- }
- headers = {"Content-Type": "application/json;charset=UTF-8"}
- async with AsyncHttpClient(timeout=600) as client:
- res = await client.post(url=url, headers=headers, json=payload)
- return res
- async def auto_create_crawler_task(plan_id, plan_name, plan_tag, url_list, platform):
- """
- Create crawler task
- """
- match platform:
- case "weixin":
- channel = 5
- case "toutiao":
- channel = 6
- case _:
- raise RuntimeError(f"Unsupported platform: {platform}")
- url = "http://aigc-api.cybertogether.net/aigc/crawler/plan/save"
- payload = {
- "params": {
- "contentFilters": [],
- "accountFilters": [],
- "filterAccountMatchMode": 1,
- "filterContentMatchMode": 1,
- "selectModeValues": [],
- "searchModeValues": [],
- "contentModal": 3,
- "analyze": {},
- "crawlerComment": 0,
- "inputGroup": None,
- "inputSourceGroups": [],
- "modePublishTime": [],
- "planType": 2,
- "frequencyType": 2,
- "planTag": plan_tag,
- "tagPenetrateFlag": 0,
- "id": plan_id,
- "name": plan_name,
- "channel": channel,
- "crawlerMode": 5,
- "inputModeValues": url_list,
- "modePublishTimeStart": None,
- "modePublishTimeEnd": None,
- "executeRate": None,
- "executeDate": None,
- "executeWindowStart": None,
- "executeWindowEnd": None,
- "executeTimeInterval": None,
- "executeNum": None,
- "addModal": None,
- "addChannel": None,
- "fileUpload": None,
- "prompt": None,
- "acelFlag": None,
- "tasks": [],
- },
- "baseInfo": PERSON_COOKIE,
- }
- async with AsyncHttpClient(timeout=600) as client:
- res = await client.post(url=url, headers=HEADERS, json=payload)
- return res
- async def add_to_crawler_task():
- pass
- async def get_crawler_task_detail():
- pass
- async def auto_bind_crawler_task_to_generate_task(crawler_task_list, generate_task_id):
- url = "http://aigc-api.cybertogether.net/aigc/produce/plan/save"
- plan_info = await get_generate_task_detail(generate_task_id)
- input_source_groups = plan_info.get("inputSourceGroups")
- existed_crawler_task = input_source_groups[0].get("inputSources")
- new_task_list = existed_crawler_task + crawler_task_list
- input_source_group_0 = input_source_groups[0]
- input_source_group_0["inputSources"] = new_task_list
- payload = json.dumps(
- {
- "params": {
- "contentFilters": [],
- "produceModal": plan_info.get("produceModal"),
- "inputModal": plan_info.get("inputModal"),
- "tasks": plan_info.get("tasks", []),
- "modules": [],
- "moduleGroups": plan_info.get("moduleGroups"),
- "inputSourceGroups": [input_source_group_0],
- "layoutType": plan_info.get("layoutType"),
- "activeManualReview": plan_info.get("activeManualReview"),
- "totalProduceNum": plan_info.get("totalProduceNum"),
- "dailyProduceNum": plan_info.get("dailyProduceNum"),
- "maxConcurrentNum": plan_info.get("maxConcurrentNum"),
- "id": generate_task_id,
- "name": plan_info.get("name"),
- "planTag": plan_info.get("planTag"),
- "tagPenetrateFlag": plan_info.get("tagPenetrateFlag"),
- "inputType": plan_info.get("inputType"),
- "inputChannel": plan_info.get("inputChannel"),
- "activeManualReviewCount": plan_info.get("activeManualReviewCount"),
- "autoComposite": plan_info.get("autoComposite"),
- },
- "baseInfo": PERSON_COOKIE,
- }
- )
- async with AsyncHttpClient(timeout=600) as client:
- response = await client.post(url=url, headers=HEADERS, data=payload)
- return response
- async def get_generate_task_detail(generate_task_id):
- """
- 通过生成计划的 id,获取该生成计划已有的抓取计划 list
- :param generate_task_id:
- :return:
- """
- url = "http://aigc-api.cybertogether.net/aigc/produce/plan/detail"
- payload = json.dumps(
- {"params": {"id": generate_task_id}, "baseInfo": PERSON_COOKIE}
- )
- async with AsyncHttpClient(timeout=600) as client:
- res = await client.post(url=url, headers=HEADERS, data=payload)
- if res["msg"] == "success":
- return res["data"]
- else:
- return {}
- async def insert_crawler_relation_to_aigc_system(
- relation_list: List[RelationDict],
- ) -> Optional[Dict]:
- url = "http://aigc-api.cybertogether.net/aigc/crawler/content/videoPoolCrawlerRelation"
- payload = json.dumps({"params": {"relations": relation_list}})
- async with AsyncHttpClient(timeout=60) as client:
- res = await client.post(url=url, headers=HEADERS, data=payload)
- return res
|