|
@@ -8,13 +8,40 @@ import requests
|
|
|
|
|
|
from common import Material, Oss, Common
|
|
from common import Material, Oss, Common
|
|
from common.sql_help import sqlCollect
|
|
from common.sql_help import sqlCollect
|
|
|
|
+from data_channel.piaoquan import PQ
|
|
from data_channel.shipinhao import SPH
|
|
from data_channel.shipinhao import SPH
|
|
class SphHistory:
|
|
class SphHistory:
|
|
|
|
|
|
|
|
+ @classmethod
|
|
|
|
+ def remove_files(cls, video_path_url):
|
|
|
|
+ """
|
|
|
|
+ 删除指定目录下的所有文件和子目录
|
|
|
|
+ """
|
|
|
|
+ if os.path.exists(video_path_url) and os.path.isdir(video_path_url):
|
|
|
|
+ for root, dirs, files in os.walk(video_path_url):
|
|
|
|
+ for file in files:
|
|
|
|
+ file_path = os.path.join(root, file)
|
|
|
|
+ os.remove(file_path)
|
|
|
|
+ for dir in dirs:
|
|
|
|
+ dir_path = os.path.join(root, dir)
|
|
|
|
+ os.rmdir(dir_path)
|
|
|
|
+
|
|
|
|
+ @classmethod
|
|
|
|
+ def create_folders(cls):
|
|
|
|
+ """
|
|
|
|
+ 根据标示和任务标示创建目录
|
|
|
|
+ """
|
|
|
|
+ # video_path_url = "/Users/tzld/Desktop/video_rewriting/path/sph_crawling/"
|
|
|
|
+ video_path_url = '/root/video_rewriting/path/sph_crawling/'
|
|
|
|
+ if not os.path.exists(video_path_url):
|
|
|
|
+ os.makedirs(video_path_url)
|
|
|
|
+ return video_path_url
|
|
|
|
+
|
|
"""获取视频号所有内容"""
|
|
"""获取视频号所有内容"""
|
|
@classmethod
|
|
@classmethod
|
|
def sph_data_info(cls):
|
|
def sph_data_info(cls):
|
|
user_list = cls.get_sph_user()
|
|
user_list = cls.get_sph_user()
|
|
|
|
+ video_path_url = cls.create_folders()
|
|
if user_list == None:
|
|
if user_list == None:
|
|
return
|
|
return
|
|
for user in user_list:
|
|
for user in user_list:
|
|
@@ -22,6 +49,7 @@ class SphHistory:
|
|
account_id = SPH.get_account_id(user)
|
|
account_id = SPH.get_account_id(user)
|
|
if account_id == False:
|
|
if account_id == False:
|
|
print(f"{account_id}:没有获取到视频account_id,无法抓取数据")
|
|
print(f"{account_id}:没有获取到视频account_id,无法抓取数据")
|
|
|
|
+ continue
|
|
url = "http://61.48.133.26:30001/FinderGetUpMasterNextPage"
|
|
url = "http://61.48.133.26:30001/FinderGetUpMasterNextPage"
|
|
last_buffer = ""
|
|
last_buffer = ""
|
|
try:
|
|
try:
|
|
@@ -50,6 +78,7 @@ class SphHistory:
|
|
break
|
|
break
|
|
last_buffer = res_json.get('last_buffer')
|
|
last_buffer = res_json.get('last_buffer')
|
|
for obj in res_json["UpMasterHomePage"]:
|
|
for obj in res_json["UpMasterHomePage"]:
|
|
|
|
+ Common.logger("sph_crawling").info(f"{user}扫描到一条数据")
|
|
objectId = obj['objectId']
|
|
objectId = obj['objectId']
|
|
objectNonceId = obj['objectNonceId']
|
|
objectNonceId = obj['objectNonceId']
|
|
url = "http://61.48.133.26:30001/GetFinderDownloadAddress"
|
|
url = "http://61.48.133.26:30001/GetFinderDownloadAddress"
|
|
@@ -66,16 +95,19 @@ class SphHistory:
|
|
video_url = video_obj.get('DownloadAddress')
|
|
video_url = video_obj.get('DownloadAddress')
|
|
if len(video_url) == 0:
|
|
if len(video_url) == 0:
|
|
continue
|
|
continue
|
|
- v_id = f"sph_{objectId}.mp4"
|
|
|
|
- c_id = f"sph_{objectId}.jpg"
|
|
|
|
- oss_video_path = Oss.channel_upload_oss(video_url, v_id)
|
|
|
|
- oss_video_key = oss_video_path.get("oss_object_key")
|
|
|
|
|
|
+ v_id = f"sph/{objectId}"
|
|
|
|
+ oss_video_key = Oss.channel_upload_oss(video_url, v_id) # 视频发送OSS
|
|
|
|
+ oss_video_key = oss_video_key.get("oss_object_key")
|
|
share_cnt = int(obj['forward_count']) # 分享
|
|
share_cnt = int(obj['forward_count']) # 分享
|
|
like_cnt = int(obj['like_count']) # 点赞
|
|
like_cnt = int(obj['like_count']) # 点赞
|
|
video_title = video_obj.get('title').split("\n")[0].split("#")[0]
|
|
video_title = video_obj.get('title').split("\n")[0].split("#")[0]
|
|
cover = video_obj.get('thumb_url')
|
|
cover = video_obj.get('thumb_url')
|
|
- oss_cover_path = Oss.channel_upload_oss(cover, c_id)
|
|
|
|
- oss_cover_key = oss_cover_path.get("oss_object_key")
|
|
|
|
|
|
+ jpg_path = PQ.download_video_jpg(cover, video_path_url, v_id) # 下载视频封面
|
|
|
|
+ if os.path.isfile(jpg_path):
|
|
|
|
+ oss_jpg_key = Oss.stitching_fm_upload_oss(jpg_path, v_id) # 封面发送OSS
|
|
|
|
+ oss_cover_key = oss_jpg_key.get("oss_object_key")
|
|
|
|
+ else:
|
|
|
|
+ oss_cover_key = ''
|
|
Common.logger("sph_crawling").info(f"{user}oss地址:视频{oss_video_key},封面{oss_cover_key}")
|
|
Common.logger("sph_crawling").info(f"{user}oss地址:视频{oss_video_key},封面{oss_cover_key}")
|
|
create_time = obj['createtime'] # 发布时间
|
|
create_time = obj['createtime'] # 发布时间
|
|
user_name = obj['username'] # 用户名标示
|
|
user_name = obj['username'] # 用户名标示
|
|
@@ -84,8 +116,11 @@ class SphHistory:
|
|
fav_count = obj['fav_count'] # 大拇指点赞数
|
|
fav_count = obj['fav_count'] # 大拇指点赞数
|
|
sqlCollect.sph_data_info('视频号', objectId, video_url, cover, video_title, str(share_cnt), str(like_cnt), oss_video_key, oss_cover_key, nick_name, user_name, comment_count, fav_count, create_time)
|
|
sqlCollect.sph_data_info('视频号', objectId, video_url, cover, video_title, str(share_cnt), str(like_cnt), oss_video_key, oss_cover_key, nick_name, user_name, comment_count, fav_count, create_time)
|
|
Common.logger("sph_crawling").info(f"{nick_name}插入数据成功")
|
|
Common.logger("sph_crawling").info(f"{nick_name}插入数据成功")
|
|
|
|
+ cls.remove_files(video_path_url)
|
|
return "完成"
|
|
return "完成"
|
|
- except:
|
|
|
|
|
|
+ except Exception as e:
|
|
|
|
+ Common.logger("sph_crawling").info(f"{user}异常,异常信息{e}")
|
|
|
|
+ cls.remove_files(video_path_url)
|
|
continue
|
|
continue
|
|
|
|
|
|
@classmethod
|
|
@classmethod
|