|
@@ -4,7 +4,7 @@ import time
|
|
|
|
|
|
import requests
|
|
|
|
|
|
-from common import Common, AliyunLogger
|
|
|
+from common import Common, AliyunLogger, Feishu
|
|
|
from common.sql_help import sqlCollect
|
|
|
from data_channel.data_help import dataHelp
|
|
|
|
|
@@ -30,83 +30,73 @@ class SPH:
|
|
|
if history_id:
|
|
|
return history_id
|
|
|
else:
|
|
|
- url = "http://61.48.133.26:30001/Find_Video_Content"
|
|
|
+ url = "http://8.217.190.241:8888/crawler/wei_xin/shi_pin_hao/account_info"
|
|
|
payload = json.dumps({
|
|
|
- "content": account_name,
|
|
|
- "type": "19"
|
|
|
+ "account_name": account_name
|
|
|
})
|
|
|
headers = {
|
|
|
'Content-Type': 'application/json'
|
|
|
}
|
|
|
+
|
|
|
response = requests.request("POST", url, headers=headers, data=payload)
|
|
|
- info_list = response.json()['info_list']
|
|
|
- if len(info_list) == 0:
|
|
|
- return False
|
|
|
- target_user = cls.find_target_user(name=account_name, user_list=info_list)
|
|
|
- # 写入 MySql 数据库
|
|
|
- if target_user:
|
|
|
- target = target_user['contact']['username']
|
|
|
- sqlCollect.insert_history_id(account_name, target, channel)
|
|
|
+ response = response.json()
|
|
|
+ if response['code'] == 0:
|
|
|
+ data = response['data']['data']
|
|
|
+
|
|
|
+ channel_account_id = data['channel_account_id']
|
|
|
+ if channel_account_id:
|
|
|
+ sqlCollect.insert_history_id(account_name, channel_account_id, channel)
|
|
|
|
|
|
- return target_user['contact']["username"]
|
|
|
+
|
|
|
+ return channel_account_id
|
|
|
+ else:
|
|
|
+ return False
|
|
|
else:
|
|
|
+ Feishu.finish_bot("shi_pin_hao/account_info接口获取失败",
|
|
|
+ "https://open.feishu.cn/open-apis/bot/v2/hook/575ca6a1-84b4-4a2f-983b-1d178e7b16eb",
|
|
|
+ "【视频号接口异常提示 】")
|
|
|
return False
|
|
|
|
|
|
@classmethod
|
|
|
def get_sph_url(cls, task_mark, url_id, number, mark, channel_id, name):
|
|
|
account_id = cls.get_account_id(url_id)
|
|
|
if account_id:
|
|
|
- url = "http://61.48.133.26:30001/FinderGetUpMasterNextPage"
|
|
|
- last_buffer = ""
|
|
|
+ url = "http://8.217.190.241:8888/crawler/wei_xin/shi_pin_hao/blogger"
|
|
|
+ next_cursor = ""
|
|
|
list = []
|
|
|
for i in range(10):
|
|
|
headers = {
|
|
|
'Content-Type': 'application/json'
|
|
|
}
|
|
|
payload = json.dumps({
|
|
|
- "username": account_id,
|
|
|
- "last_buffer": last_buffer
|
|
|
+ "account_id": account_id,
|
|
|
+ "cursor": next_cursor
|
|
|
})
|
|
|
|
|
|
response = requests.request("POST", url, headers=headers, data=payload)
|
|
|
time.sleep(random.randint(1, 5))
|
|
|
res_json = response.json()
|
|
|
- try:
|
|
|
- if len(res_json["DownloadAddress"]) == 0 or res_json["DownloadAddress"] == "" or res_json["DownloadAddress"] == None:
|
|
|
- return list
|
|
|
- except:
|
|
|
- pass
|
|
|
- if "objectId" not in response.text or response.status_code != 200:
|
|
|
- continue
|
|
|
- if len(res_json["UpMasterHomePage"]) == 0:
|
|
|
- continue
|
|
|
- if not res_json["UpMasterHomePage"]:
|
|
|
- continue
|
|
|
- else:
|
|
|
- last_buffer = res_json.get('last_buffer')
|
|
|
- for obj in res_json["UpMasterHomePage"]:
|
|
|
- objectId = obj['objectId']
|
|
|
+ if res_json['code'] == 0:
|
|
|
+ next_cursor = res_json['data']['next_cursor']
|
|
|
+ data_lsit = res_json['data']['data']
|
|
|
+ for obj in data_lsit:
|
|
|
+ objectId = obj['id']
|
|
|
status = sqlCollect.is_used(task_mark, objectId, mark, "视频号")
|
|
|
- objectNonceId = obj['objectNonceId']
|
|
|
- url1 = "http://61.48.133.26:30001/GetFinderDownloadAddress"
|
|
|
- payload = json.dumps({
|
|
|
- "objectId": objectId,
|
|
|
- "objectNonceId": objectNonceId
|
|
|
- })
|
|
|
- headers = {
|
|
|
- 'Content-Type': 'text/plain'
|
|
|
- }
|
|
|
- response = requests.request("POST", url1, headers=headers, data=payload)
|
|
|
- time.sleep(random.randint(0, 1))
|
|
|
- video_obj = response.json()
|
|
|
- video_url = video_obj.get('DownloadAddress')
|
|
|
+
|
|
|
+ old_title = obj['object_desc']['description']
|
|
|
+ url_p = obj['object_desc']['media'][0]['url']
|
|
|
+ url_token = obj['object_desc']['media'][0]['url_token']
|
|
|
+ video_url = f"{url_p}{url_token}"
|
|
|
+ decode_key = obj['object_desc']['media'][0]['decode_key']
|
|
|
+ cover = obj['object_desc']['media'][0]['cover_url']
|
|
|
+
|
|
|
share_cnt = int(obj['forward_count']) # 分享
|
|
|
like_cnt = int(obj['like_count']) # 点赞
|
|
|
- old_title = video_obj.get('title').split("\n")[0].split("#")[0]
|
|
|
- duration = dataHelp.video_duration(video_url)
|
|
|
+ duration_ms = obj['object_desc']['media'][0]['spec'][0]["duration_ms"]
|
|
|
+ duration = int(duration_ms) / 1000
|
|
|
+
|
|
|
log_data = f"user:{url_id},,video_id:{objectId},,video_url:{video_url},,original_title:{old_title},,share_count:{share_cnt},,like_count:{like_cnt},,duration:{duration}"
|
|
|
AliyunLogger.logging(channel_id, name, url_id, objectId, "扫描到一条视频", "2001", log_data)
|
|
|
-
|
|
|
Common.logger("sph").info(
|
|
|
f"扫描:{task_mark},用户主页id:{url_id},视频id{objectId} ,分享:{share_cnt},点赞:{like_cnt}")
|
|
|
if status:
|
|
@@ -122,9 +112,8 @@ class SPH:
|
|
|
"2003", log_data)
|
|
|
|
|
|
continue
|
|
|
- cover = video_obj.get('thumb_url')
|
|
|
AliyunLogger.logging(channel_id, name, url_id, objectId, "符合规则等待改造", "2004", log_data)
|
|
|
- all_data = {"video_id": objectId, "cover": cover, "video_url": video_url, "rule": video_percent, "old_title": old_title}
|
|
|
+ all_data = {"video_id": objectId, "cover": cover, "video_url": video_url, "rule": video_percent, "old_title": old_title, "decode_key":decode_key}
|
|
|
list.append(all_data)
|
|
|
if len(list) == int(number):
|
|
|
Common.logger(mark).info(f"获取视频号视频总数:{len(list)}\n")
|
|
@@ -142,4 +131,4 @@ class SPH:
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
- SPH.get_sph_url('1',"霖霖觅影",'10','2',"视频号",'视频号品类账号')
|
|
|
+ SPH.get_sph_url('1',"人民日报",'10','2',"视频号",'视频号品类账号')
|