|
@@ -0,0 +1,106 @@
|
|
|
+import configparser
|
|
|
+import json
|
|
|
+import os
|
|
|
+import random
|
|
|
+import time
|
|
|
+
|
|
|
+import requests
|
|
|
+
|
|
|
+from common import Material, Oss, Common
|
|
|
+from common.sql_help import sqlCollect
|
|
|
+from data_channel.shipinhao import SPH
|
|
|
+class SphHistory:
|
|
|
+
|
|
|
+ """获取视频号所有内容"""
|
|
|
+ @classmethod
|
|
|
+ def sph_data_info(cls):
|
|
|
+ user_list = cls.get_sph_user()
|
|
|
+ if user_list == None:
|
|
|
+ return
|
|
|
+ for user in user_list:
|
|
|
+ Common.logger("sph_crawling").info(f"{user}开始获取数据")
|
|
|
+ account_id = SPH.get_account_id(user)
|
|
|
+ if account_id == False:
|
|
|
+ print(f"{account_id}:没有获取到视频account_id,无法抓取数据")
|
|
|
+ url = "http://61.48.133.26:30001/FinderGetUpMasterNextPage"
|
|
|
+ last_buffer = ""
|
|
|
+ try:
|
|
|
+ while True:
|
|
|
+ headers = {
|
|
|
+ 'Content-Type': 'application/json'
|
|
|
+ }
|
|
|
+ payload = json.dumps({
|
|
|
+ "username": account_id,
|
|
|
+ "last_buffer": last_buffer
|
|
|
+ })
|
|
|
+
|
|
|
+ response = requests.request("POST", url, headers=headers, data=payload)
|
|
|
+ time.sleep(random.randint(1, 5))
|
|
|
+ res_json = response.json()
|
|
|
+ try:
|
|
|
+ if len(res_json["DownloadAddress"]) == 0 or res_json["DownloadAddress"] == "" or res_json["DownloadAddress"] == None:
|
|
|
+ break
|
|
|
+ except:
|
|
|
+ pass
|
|
|
+ if "objectId" not in response.text or response.status_code != 200:
|
|
|
+ break
|
|
|
+ if len(res_json["UpMasterHomePage"]) == 0:
|
|
|
+ break
|
|
|
+ if not res_json["UpMasterHomePage"]:
|
|
|
+ break
|
|
|
+ last_buffer = res_json.get('last_buffer')
|
|
|
+ for obj in res_json["UpMasterHomePage"]:
|
|
|
+ objectId = obj['objectId']
|
|
|
+ objectNonceId = obj['objectNonceId']
|
|
|
+ url = "http://61.48.133.26:30001/GetFinderDownloadAddress"
|
|
|
+ payload = json.dumps({
|
|
|
+ "objectId": objectId,
|
|
|
+ "objectNonceId": objectNonceId
|
|
|
+ })
|
|
|
+ headers = {
|
|
|
+ 'Content-Type': 'text/plain'
|
|
|
+ }
|
|
|
+ response = requests.request("POST", url, headers=headers, data=payload)
|
|
|
+ time.sleep(random.randint(0, 1))
|
|
|
+ video_obj = response.json()
|
|
|
+ video_url = video_obj.get('DownloadAddress')
|
|
|
+ if len(video_url) == 0:
|
|
|
+ continue
|
|
|
+ v_id = f"sph_{objectId}.mp4"
|
|
|
+ c_id = f"sph_{objectId}.jpg"
|
|
|
+ oss_video_path = Oss.channel_upload_oss(video_url, v_id)
|
|
|
+ oss_video_key = oss_video_path.get("oss_object_key")
|
|
|
+ share_cnt = int(obj['forward_count']) # 分享
|
|
|
+ like_cnt = int(obj['like_count']) # 点赞
|
|
|
+ video_title = video_obj.get('title').split("\n")[0].split("#")[0]
|
|
|
+ cover = video_obj.get('thumb_url')
|
|
|
+ oss_cover_path = Oss.channel_upload_oss(cover, c_id)
|
|
|
+ oss_cover_key = oss_cover_path.get("oss_object_key")
|
|
|
+ Common.logger("sph_crawling").info(f"{user}oss地址:视频{oss_video_key},封面{oss_cover_key}")
|
|
|
+ create_time = obj['createtime'] # 发布时间
|
|
|
+ user_name = obj['username'] # 用户名标示
|
|
|
+ nick_name = obj['nickname'] # 用户名
|
|
|
+ comment_count = obj['comment_count'] # 评论数
|
|
|
+ fav_count = obj['fav_count'] # 大拇指点赞数
|
|
|
+ sqlCollect.sph_data_info('视频号', objectId, video_url, cover, video_title, str(share_cnt), str(like_cnt), oss_video_key, oss_cover_key, nick_name, user_name, comment_count, fav_count, create_time)
|
|
|
+ Common.logger("sph_crawling").info(f"{nick_name}插入数据成功")
|
|
|
+ return "完成"
|
|
|
+ except:
|
|
|
+ continue
|
|
|
+
|
|
|
+ @classmethod
|
|
|
+ def get_sph_user(cls):
|
|
|
+ data = sqlCollect.sph_channel_user_list()
|
|
|
+ if data == None:
|
|
|
+ user_list = Material.get_sph_user()
|
|
|
+ if user_list:
|
|
|
+ for user in user_list:
|
|
|
+ sqlCollect.insert_sph_channel_user("视频号", user)
|
|
|
+ else:
|
|
|
+ return None
|
|
|
+ result_list = [item for sublist in data for item in sublist]
|
|
|
+ return result_list
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ SphHistory.sph_data_info()
|