# -*- coding: utf-8 -*- # @Author: wangkun # @Time: 2022/8/23 import base64 import os import random import subprocess import sys import time import requests import urllib3 sys.path.append(os.getcwd()) from main.common import Common from main.feishu import Feishu from main.publish import Publish proxies = {"http": None, "https": None} class Follow: # 个人主页视频翻页参数 offset = 0 # 获取用户信息(字典格式). 注意:部分 user_id 字符类型是 int / str @classmethod def get_user_info_from_feishu(cls, log_type): try: user_sheet = Feishu.get_values_batch(log_type, 'xigua', '5tlTYB') user_dict = {} for i in range(1, len(user_sheet)): user_name = user_sheet[i][0] user_id = user_sheet[i][1] our_id = user_sheet[i][3] if user_name is None or user_id is None or our_id is None: pass else: user_dict[user_name] = str(user_id)+','+str(our_id) return user_dict except Exception as e: Common.logger(log_type).error('get_user_id_from_feishu异常:{}', e) # 下载规则 @staticmethod def download_rule(duration, width, height): if int(duration) >= 60: if int(width) >= 720 or int(height) >= 720: return True else: return False else: return False # 过滤词库 @classmethod def filter_words(cls, log_type): try: filter_words_sheet = Feishu.get_values_batch(log_type, 'xigua', 'KGB4Hc') filter_words_list = [] for x in filter_words_sheet: for y in x: if y is None: pass else: filter_words_list.append(y) return filter_words_list except Exception as e: Common.logger(log_type).error('filter_words异常:{}', e) # PC端:西瓜用户主页视频列表. 注意:参数_signature有效期时长只有一小时 @classmethod def get_follow_feeds_by_pc(cls, log_type, userid): try: url = "https://www.ixigua.com/api/videov2/author/new_video_list?" headers = { 'sec-ch-ua': '".Not/A)Brand";v="99", "Google Chrome";v="103", "Chromium";v="103"', 'accept': 'application/json, text/plain, */*', 'sec-ch-ua-mobile': '?0', 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko)' ' Chrome/103.0.0.0 Safari/537.36', 'sec-ch-ua-platform': '"macOS"', 'sec-fetch-site': 'same-origin', 'sec-fetch-mode': 'cors', 'sec-fetch-dest': 'document', 'referer': 'https://www.ixigua.com/home/' + str(userid), 'accept-encoding': 'gzip, deflate, br', 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8', } params = { 'to_user_id': str(userid), 'offset': str(cls.offset), 'limit': '30', 'maxBehotTime': '0', 'order': 'new', 'isHome': '0', 'msToken': '2ZHINOMBPK-qlCKApv37xVCBKkXyPli8mTYNlTSXvr17eZ0Ea8B__Otimkx6q_enDc9m8Kgzi3Re7wpLIMSSE9dofTYdqQgvB7mHQbx_AMnVnf5lsByU', 'X-Bogus': 'DFSzswVuVvTANe2BSBBMCR/F6qyc', '_signature': Feishu.get_values_batch(log_type, 'xigua', '6tZHhs')[1][1], } cookies = { '__ac_signature': '_02B4Z6wo00f017vzS8QAAIDCwz2gwwDpX9-7009AAI4Bc4', 'MONITOR_WEB_ID': 'fd4244aa-2003-4e19-a2a4-715c19310a56', 'ixigua-a-s': '1', 'support_webp': 'true', 'support_avif': 'true', '_tea_utm_cache_1300': 'undefined', 'ttcid': '16a3b6b9b80b4a87ae258f5f3f101e6310', 'msToken': 'G8pL2oH-9Zl1hrLZPyOMSceMaII3ejKda2o-tgO1heYrj7b_fgm9vGlvwyLOA2H8oUShZgAYfxEvIuktT7OuxBuy85N-ousFfqxuAIrfruMEFZUTYp2z', 'tt_scid': 'a0zhISPImN-dVMMdbeb1Kzhl1x4oJS5Yr81FzH6qYk3jDtj1d2E5gsywN4rwna8ib398', 'ttwid': '1%7CvorN1HQjbSgBViRkEoZYEbqP_sQVoQqaUqGcFA-bzpA%7C1661324763%7Ce040213e1107973ebb0db64f0e77cfb027375f1fb5854bb40588d692d025af1f', } urllib3.disable_warnings() response = requests.get(url=url, headers=headers, params=params, cookies=cookies, proxies=proxies, verify=False) # Common.logger(log_type).info('response:{}', response.text) cls.offset += 30 if 'data' not in response.text or response.json()['data'] == '' or response.json()['code'] != 200: Common.logger(log_type).info('get_follow_feeds: response:{}', response.text) else: feeds = response.json()['data']['videoList'] # print(len(feeds)) for i in range(len(feeds)): # video_title if 'title' not in feeds[i]: video_title = 0 else: video_title = feeds[i]['title'].strip().replace('手游', '') # video_id if 'video_id' not in feeds[i]: video_id = 0 else: video_id = feeds[i]['video_id'] # gid if 'gid' not in feeds[i]: gid = 0 else: gid = feeds[i]['gid'] # play_cnt if 'video_detail_info' not in feeds[i]: play_cnt = 0 elif 'video_watch_count' not in feeds[i]['video_detail_info']: play_cnt = 0 else: play_cnt = feeds[i]['video_detail_info']['video_watch_count'] # comment_cnt if 'comment_count' not in feeds[i]: comment_cnt = 0 else: comment_cnt = feeds[i]['comment_count'] # like_cnt if 'digg_count' not in feeds[i]: like_cnt = 0 else: like_cnt = feeds[i]['digg_count'] # share_cnt share_cnt = 0 # video_duration if 'video_duration' not in feeds[i]: video_duration = 0 else: video_duration = feeds[i]['video_duration'] # send_time if 'publish_time' not in feeds[i]: send_time = 0 else: send_time = feeds[i]['publish_time'] # user_name if 'user_info' not in feeds[i]: user_name = 0 elif 'name' not in feeds[i]['user_info']: user_name = 0 else: user_name = feeds[i]['user_info']['name'] # user_id if 'user_info' not in feeds[i]: user_id = 0 elif 'user_id' not in feeds[i]['user_info']: user_id = 0 else: user_id = feeds[i]['user_info']['user_id'] # head_url if 'user_info' not in feeds[i]: head_url = 0 elif 'avatar_url' not in feeds[i]['user_info']: head_url = 0 else: head_url = feeds[i]['user_info']['avatar_url'] # cover_url if 'video_detail_info' not in feeds[i]: cover_url = 0 elif 'detail_video_large_image' not in feeds[i]['video_detail_info']: cover_url = 0 elif 'url' in feeds[i]['video_detail_info']['detail_video_large_image']: cover_url = feeds[i]['video_detail_info']['detail_video_large_image']['url'] else: cover_url = feeds[i]['video_detail_info']['detail_video_large_image']['url_list'][0]['url'] video_url_info = cls.get_video_info(log_type, gid) video_width = video_url_info[2] video_height = video_url_info[-1] video_url = video_url_info[0] audio_url = video_url_info[1] Common.logger(log_type).info('video_title:{}', video_title) Common.logger(log_type).info('video_id:{}', video_id) Common.logger(log_type).info('play_cnt:{}', play_cnt) Common.logger(log_type).info('send_time:{}', time.strftime('%Y/%m/%d %H:%M:%S', time.localtime(send_time))) if gid == 0 or video_id == 0: Common.logger(log_type).info('无效视频\n') elif int(time.time()) - int(send_time) > 3600 * 24 * 10: Common.logger(log_type).info('发布时间超过10天:{}\n', time.strftime('%Y/%m/%d %H:%M:%S', time.localtime(send_time))) cls.offset = 0 return elif cls.download_rule(video_duration, video_width, video_height) is False: Common.logger(log_type).info('不满足抓取规则\n') elif any(word if word in video_title else False for word in cls.filter_words(log_type)) is True: Common.logger(log_type).info('标题已中过滤词:{}\n', video_title) elif str(video_id) in [x for y in Feishu.get_values_batch(log_type, 'xigua', 'e075e9') for x in y]: Common.logger(log_type).info('视频已下载\n') elif str(video_id) in [x for y in Feishu.get_values_batch(log_type, 'xigua', 'wjhpDs') for x in y]: Common.logger(log_type).info('视频已存在\n') else: Feishu.insert_columns(log_type, 'xigua', 'wjhpDs', 'ROWS', 1, 2) get_feeds_time = time.time() values = [[time.strftime('%Y/%m/%d %H:%M:%S', time.localtime(get_feeds_time)), '关注榜', video_title, str(video_id), gid, play_cnt, comment_cnt, like_cnt, share_cnt, video_duration, str(video_width) + '*' + str(video_height), time.strftime('%Y/%m/%d %H:%M:%S', time.localtime(send_time)), user_name, user_id, head_url, cover_url, video_url, audio_url]] time.sleep(1) Feishu.update_values(log_type, 'xigua', 'wjhpDs', 'A2:Z2', values) Common.logger(log_type).info('视频信息写入飞书成功\n') time.sleep(random.randint(1, 3)) except Exception as e: Common.logger(log_type).error('get_follow_feeds_by_pc异常:{}\n', e) # 获取视频详情 @classmethod def get_video_info(cls, log_type, gid): try: url = 'https://www.ixigua.com/api/mixVideo/information?' headers = { "accept-encoding": "gzip, deflate, br", "accept-language": "zh-CN,zh-Hans;q=0.9", "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) " "AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15", "referer": "https://www.ixigua.com/7102614741050196520?logTag=0531c88ac04f38ab2c62", } params = { 'mixId': gid, 'msToken': 'IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfC' 'NVVIOBNjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA', 'X-Bogus': 'DFSzswVupYTANCJOSBk0P53WxM-r', '_signature': '_02B4Z6wo0000119LvEwAAIDCuktNZ0y5wkdfS7jAALThuOR8D9yWNZ.EmWHKV0WSn6Px' 'fPsH9-BldyxVje0f49ryXgmn7Tzk-swEHNb15TiGqa6YF.cX0jW8Eds1TtJOIZyfc9s5emH7gdWN94', } cookies = { 'ixigua-a-s': '1', 'msToken': 'IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfCNVVIOB' 'NjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA', 'ttwid': '1%7C_yXQeHWwLZgCsgHClOwTCdYSOt_MjdOkgnPIkpi-Sr8%7C1661241238%7Cf57d0c5ef3f1d7' '6e049fccdca1ac54887c34d1f8731c8e51a49780ff0ceab9f8', 'tt_scid': 'QZ4l8KXDG0YAEaMCSbADdcybdKbUfG4BC6S4OBv9lpRS5VyqYLX2bIR8CTeZeGHR9ee3', 'MONITOR_WEB_ID': '0a49204a-7af5-4e96-95f0-f4bafb7450ad', '__ac_nonce': '06304878000964fdad287', '__ac_signature': '_02B4Z6wo00f017Rcr3AAAIDCUVxeW1tOKEu0fKvAAI4cvoYzV-wBhq7B6D8k0no7lb' 'FlvYoinmtK6UXjRIYPXnahUlFTvmWVtb77jsMkKAXzAEsLE56m36RlvL7ky.M3Xn52r9t1IEb7IR3ke8', 'ttcid': 'e56fabf6e85d4adf9e4d91902496a0e882', '_tea_utm_cache_1300': 'undefined', 'support_avif': 'false', 'support_webp': 'false', 'xiguavideopcwebid': '7134967546256016900', 'xiguavideopcwebid.sig': 'xxRww5R1VEMJN_dQepHorEu_eAc', } urllib3.disable_warnings() response = requests.get(url=url, headers=headers, params=params, cookies=cookies, proxies=proxies, verify=False) if 'data' not in response.json() or response.json()['data'] == '': Common.logger(log_type).warning('get_video_info: response: {}', response) else: video_info = response.json()['data']['gidInformation']['packerData']['video'] video_url = '' audio_url = '' video_width = '' video_height = '' # video_url if 'videoResource' not in video_info: video_url = 0 audio_url = 0 video_width = 0 video_height = 0 elif 'dash' in video_info['videoResource']: video_url = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][ 'main_url'] audio_url = video_info['videoResource']['dash']['dynamic_video']['dynamic_audio_list'][-1][ 'main_url'] video_url = base64.b64decode(video_url).decode('utf8') audio_url = base64.b64decode(audio_url).decode('utf8') video_width = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][ 'vwidth'] video_height = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][ 'vheight'] elif 'normal' in video_info['videoResource']: video_list = video_info['videoResource']['normal']['video_list'] if 'video_4' in video_list.keys(): # Common.logger(log_type).info('{}', video_list['video_4']) video_url = video_list['video_4']['main_url'] audio_url = video_list['video_4']['main_url'] video_url = base64.b64decode(video_url).decode('utf8') audio_url = base64.b64decode(audio_url).decode('utf8') video_width = video_list['video_4']['vwidth'] video_height = video_list['video_4']['vheight'] elif 'video_3' in video_list.keys(): # Common.logger(log_type).info('{}', video_list['video_3']) video_url = video_list['video_3']['main_url'] audio_url = video_list['video_3']['main_url'] video_url = base64.b64decode(video_url).decode('utf8') audio_url = base64.b64decode(audio_url).decode('utf8') video_width = video_list['video_3']['vwidth'] video_height = video_list['video_3']['vheight'] elif 'video_2' in video_list.keys(): # Common.logger(log_type).info('{}', video_list['video_2']) video_url = video_list['video_2']['main_url'] audio_url = video_list['video_2']['main_url'] video_url = base64.b64decode(video_url).decode('utf8') audio_url = base64.b64decode(audio_url).decode('utf8') video_width = video_list['video_2']['vwidth'] video_height = video_list['video_2']['vheight'] elif 'video_1' in video_list.keys(): # Common.logger(log_type).info('{}', video_list['video_1']) video_url = video_list['video_1']['main_url'] audio_url = video_list['video_1']['main_url'] video_url = base64.b64decode(video_url).decode('utf8') audio_url = base64.b64decode(audio_url).decode('utf8') video_width = video_list['video_1']['vwidth'] video_height = video_list['video_1']['vheight'] else: video_url = 0 audio_url = 0 video_width = 0 video_height = 0 return video_url, audio_url, video_width, video_height except Exception as e: Common.logger(log_type).error('get_video_info异常:{}', e) # APP端:西瓜视频用户主页 @classmethod def get_follow_feeds_by_app(cls, log_type, userid): while True: try: url = "https://api5-normal-quic-lq.ixigua.com/video/app/user/videolist_tab/v3/?" headers = { 'Host': 'api5-normal-quic-lq.ixigua.com', 'Cookie': 'passport_csrf_token=9dc29668504aefd8f810d194c1591b27; passport_csrf_token_default=9dc29668504aefd8f810d194c1591b27; d_ticket=8cc008f231ad00a57481e490f82f4bedebe99; n_mh=Zi1ukqZaOfwMQ8RKEEaBFHPd94g9LJFrf_5jskG0uhY; odin_tt=79986f6d46fe14e0f0cf5c6d831005ef2d2ba797151d32eb7678d9ec14a770349dcc7f5cce1746a00dc493838a94db296ef2135712d40b5de1b4ebb170e7e3bf; sessionid=cd61dd6003146ce5b8d19b1eeb29d5b6; sessionid_ss=cd61dd6003146ce5b8d19b1eeb29d5b6; sid_guard=cd61dd6003146ce5b8d19b1eeb29d5b6%7C1661320113%7C5184000%7CSun%2C+23-Oct-2022+05%3A48%3A33+GMT; sid_tt=cd61dd6003146ce5b8d19b1eeb29d5b6; uid_tt=6544aadbdc13b980ab4906f550c70af5; uid_tt_ss=6544aadbdc13b980ab4906f550c70af5; install_id=541373572069224; ttreq=1$27a2ec895a960525ef828e684768bef579920543; msToken=6hA48Lf7RVYOl0Okgng_KQzBwfUpN2M5tB6opL8N6YB3EX0VsNQNhGH4kT-vRxO3Yjac8E4w7Zk4rkFF5JCRTilK', 'x-tt-token': '00cd61dd6003146ce5b8d19b1eeb29d5b603e056899dfc41b69bf336d3ce3bfc61b2822bbd85f84cfdfb3bf876b7bb71ea85363bff7cb21186b571d3418b30838538c78e169a0db8500261060669094c3ed23032496d65f19a0fa66fc54cc4eed2c55-1.0.1', 'request-startime': '683091411.831285', 'x-vc-bdturing-sdk-version': '2.2.8', 'x-ss-cookie': 'install_id=541373572069224; msToken=6hA48Lf7RVYOl0Okgng_KQzBwfUpN2M5tB6opL8N6YB3EX0VsNQNhGH4kT-vRxO3Yjac8E4w7Zk4rkFF5JCRTilK; ttreq=1$27a2ec895a960525ef828e684768bef579920543; d_ticket=8cc008f231ad00a57481e490f82f4bedebe99; n_mh=Zi1ukqZaOfwMQ8RKEEaBFHPd94g9LJFrf_5jskG0uhY; odin_tt=79986f6d46fe14e0f0cf5c6d831005ef2d2ba797151d32eb7678d9ec14a770349dcc7f5cce1746a00dc493838a94db296ef2135712d40b5de1b4ebb170e7e3bf; sessionid=cd61dd6003146ce5b8d19b1eeb29d5b6; sessionid_ss=cd61dd6003146ce5b8d19b1eeb29d5b6; sid_guard=cd61dd6003146ce5b8d19b1eeb29d5b6%7C1661320113%7C5184000%7CSun%2C+23-Oct-2022+05%3A48%3A33+GMT; sid_tt=cd61dd6003146ce5b8d19b1eeb29d5b6; uid_tt=6544aadbdc13b980ab4906f550c70af5; uid_tt_ss=6544aadbdc13b980ab4906f550c70af5; passport_csrf_token=9dc29668504aefd8f810d194c1591b27; passport_csrf_token_default=9dc29668504aefd8f810d194c1591b27', 'tt-request-time': '1661398611831', 'user-agent': 'Video 6.8.8 rv:6.8.8.12 (iPhone; iOS 14.7.1; zh_CN) Cronet', 'sdk-version': '2', 'x-tt-dt': 'AAARLMRFIGV63HLKR2OFYMAN4ECX3S3FF7T6VF3ZUGZVJHJRTAR6TZ6TXKNYXU5US4L72542CDEO4CJAORJUPSELHB52LINBZAWN7DIMVSPRKPKSIJYA2S2ZS7PIYZQBQ3OFWJETR35OAD55FXYP6OY', 'passport-sdk-version': '5.14.3', 'x-bd-kmsv': '1', 'x-ss-dp': '32', 'x-tt-trace-id': '00-d312f8fb0dae06939d00507998be0020-d312f8fb0dae0693-01', 'x-argus': 'OoPWDUi7xa1FAheuXaB4U+12sViNA+0vZEq7RpA1HvKF5CreKftmWWAtl1ndNdJNbk4zPogps8WNxsRJWdgZOzLg5CUTwVWrMQ/ptLgYrFTXbKf4P4CpqSRoJEHca/LVYRXUrTxTsi+AS7u/S3BTCrzm6nwvZB43GyiLGyN1W38poinJoMkPltgUNoSkAilVXCTu3iSWFLUYayOF7MwFRnYFxU4vBu+XmYCtl74XVCCARZD6uYf/cjkIH9wRD+uv0HBNlI70mqjaQOTYtlINi2i61yctngEjgwpV6s+4GLWQQYY6KXq+eu9mEppFDLSI9WY=', 'x-gorgon': '8404e06000002dfc1ace57427120b4f72a226ce677bde6d67b92', 'x-khronos': '1661398611', 'x-ladon': '7bRfCQvXSDeU17k7XA6Y7TSO0rsUmxbxtqt+apKfuSx/juZZ' } params = { 'anti_addiction_model': '0', 'version_code': '10.8.8', 'app_name': 'video_article', 'device_id': '3061492313228551', 'channel': 'App%20Store', 'resolution': '828*1792', 'aid': '32', 'ab_feature': 'z1', 'ab_version': '668851,4601580,668854,4594840,4601552,4622288,4641673,668858,4601444,668859,4601563,668856,4601562,668855,4601507,668853,4601558,668852,4601533', 'update_version_code': '108812', 'cdid': '7425DF80-0324-4CEF-AAEC-6596F45F2C7A', 'ac': 'WIFI', 'os_version': '14.7.1', 'user_version': '6.8.8', 'ssmix': 'a', 'ipad_adapter_enable': '0', 'device_platform': 'iphone', 'iid': '541373572069224', 'device_type': 'iPhone%2011', 'ab_client': 'a1,f2,f7,e1', 'cdid_ts': '1661312788', 'offset': str(cls.offset), 'orderby': 'publishtime', 'to_user_id': userid, 'count': '20', 'language': 'zh-Hans-CN', 'loc_mode': '0', 'ab_version_vid_list': '4413540%2C2190089', 'enable_publish_status': '0', 'play_param': 'codec_type%3A7%2Cenable_dash%3A1%2Cresolution%3A828%2A1792%2Cis_order_flow%3A-1%2Cis_hdr%3A1', 'client_extra': '%7B%22last_ad_position%22%3A-1%7D', } urllib3.disable_warnings() response = requests.get(url=url, headers=headers, params=params, proxies=proxies, verify=False) cls.offset += 30 if 'data' not in response.text or response.json()['code'] != 0 or len(response.json()['data']) == 0: Common.logger(log_type).warning('get_follow_feeds_by_app: response: {}', response.text) else: feeds = response.json()['data'] for i in range(len(feeds)): # video_title if 'title' in feeds[i]: video_title = feeds[i]['title'].strip().replace('手游', '') else: video_title = 0 # video_id if 'video_id' in feeds[i]: video_id = feeds[i]['video_id'] else: video_id = 0 # gid if 'gid' in feeds[i]: gid = feeds[i]['gid'] else: gid = 0 # play_cnt if 'video_detail_info' not in feeds[i]: play_cnt = 0 elif 'video_watch_count' not in feeds[i]['video_detail_info']: play_cnt = 0 else: play_cnt = feeds[i]['video_detail_info']['video_watch_count'] # comment_cnt if 'comment_count' in feeds[i]: comment_count = feeds[i]['comment_count'] else: comment_count = 0 # like_cnt if 'digg_count' in feeds[i]: like_cnt = feeds[i]['digg_count'] else: like_cnt = 0 # share_cnt if 'share_count' in feeds[i]: share_cnt = feeds[i]['share_count'] else: share_cnt = 0 # video_duration if 'video_duration' in feeds[i]: video_duration = feeds[i]['video_duration'] else: video_duration = 0 # send_time if 'publish_time' in feeds[i]: send_time = feeds[i]['publish_time'] else: send_time = 0 # user_name if 'user_info' not in feeds[i]: user_name = 0 elif 'name' not in feeds[i]['user_info']: user_name = 0 else: user_name = feeds[i]['user_info']['name'] # user_id if 'user_info' not in feeds[i]: user_id = 0 elif 'user_id' not in feeds[i]['user_info']: user_id = 0 else: user_id = feeds[i]['user_info']['user_id'] # head_url if 'user_info' not in feeds[i]: head_url = 0 elif 'avatar_url' not in feeds[i]['user_info']: head_url = 0 else: head_url = feeds[i]['user_info']['avatar_url'] # cover_url if 'video_detail_info' not in feeds[i]: cover_url = 0 elif 'detail_video_large_image' not in feeds[i]['video_detail_info']: cover_url = 0 elif 'url' not in feeds[i]['video_detail_info']['detail_video_large_image']: cover_url = 0 else: cover_url = feeds[i]['video_detail_info']['detail_video_large_image']['url'] url_info = cls.get_video_info(log_type, gid) video_url = url_info[0] audio_url = url_info[1] video_width = url_info[2] video_height = url_info[3] Common.logger(log_type).info('video_title:{}', video_title) Common.logger(log_type).info('video_id:{}', video_id) Common.logger(log_type).info('play_cnt:{}', play_cnt) Common.logger(log_type).info('video_duration:{}', video_duration) Common.logger(log_type).info('video_width_height:{}', str(video_width) + '*' + str(video_height)) Common.logger(log_type).info('send_time:{}', time.strftime('%Y/%m/%d %H:%M:%S', time.localtime(send_time))) if gid == 0 or video_url == 0 or audio_url == 0: Common.logger(log_type).info('无效视频:{}\n', video_title) elif int(time.time()) - int(send_time) > 3600 * 24 * 10: Common.logger(log_type).info('发布时间超过10天:{}\n', time.strftime('%Y/%m/%d %H:%M:%S'), time.localtime(send_time)) cls.offset = 0 return elif cls.download_rule(video_duration, video_width, video_height) is False: Common.logger(log_type).info('不满足抓取规则\n') elif any(word if word in video_title else False for word in cls.filter_words(log_type)) is True: Common.logger(log_type).info('标题已中过滤词:{}\n', video_title) elif str(video_id) in [x for y in Feishu.get_values_batch(log_type, 'xigua', 'e075e9') for x in y]: Common.logger(log_type).info('视频已下载:{}\n', video_title) elif str(video_id) in [x for y in Feishu.get_values_batch(log_type, 'xigua', 'wjhpDs') for x in y]: Common.logger(log_type).info('视频已存在:{}\n', video_title) else: Feishu.insert_columns(log_type, 'xigua', 'wjhpDs', 'ROWS', 1, 2) get_feeds_time = int(time.time()) values = [[time.strftime('%Y/%m/%d %H:%M:%S', time.localtime(get_feeds_time)), '关注榜', video_title, str(video_id), gid, int(play_cnt), int(comment_count), int(like_cnt), int(share_cnt), video_duration, str(video_width) + '*' + str(video_height), time.strftime('%Y/%m/%d %H:%M:%S', time.localtime(send_time)), user_name, str(user_id), head_url, cover_url, video_url, audio_url]] time.sleep(1) Feishu.update_values(log_type, 'xigua', 'wjhpDs', 'A2:Z2', values) Common.logger(log_type).info('当前视频信息写入飞书成功\n') time.sleep(random.randint(1, 3)) except Exception as e: Common.logger(log_type).error('get_follow_feeds_by_app异常:{}\n', e) # 获取所有用户主页视频 @classmethod def get_all_person_videos(cls, log_type, env): try: user_list = cls.get_user_info_from_feishu(log_type) if len(user_list) == 0: Common.logger(log_type).warning('用户ID列表为空\n') else: for k, v in user_list.items(): Common.logger(log_type).info('正在获取 {} 主页视频\n', k) # cls.get_follow_feeds_by_app(log_type, v.split(',')[0]) cls.get_follow_feeds_by_pc(log_type, v.split(',')[0]) time.sleep(1) cls.run_download_publish(log_type, env, v.split(',')[-1]) time.sleep(random.randint(5, 10)) except Exception as e: Common.logger(log_type).error('get_all_person_videos异常:{}\n', e) # 合并音视频 @classmethod def video_compose(cls, log_type, video_title): video_path = './videos/' + str(video_title) + '/video1.mp4' audio_path = './videos/' + str(video_title) + '/audio1.mp4' out_path = './videos/' + str(video_title) + '/video.mp4' cmd = 'ffmpeg -i ' + video_path + ' -i ' + audio_path + ' -c:v copy -c:a aac -strict experimental -map 0:v:0 -map 1:a:0 ' + out_path # print(cmd) subprocess.call(cmd, shell=True) for file in os.listdir('./videos/' + str(video_title)): if file.split('.mp4')[0] == 'video1' or file.split('.mp4')[0] == 'audio1': os.remove('./videos/' + str(video_title) + '/' + file) Common.logger(log_type).info('合成成功') # 下载 / 上传 @classmethod def download_publish(cls, log_type, env, uid): try: feeds_sheet = Feishu.get_values_batch(log_type, 'xigua', 'wjhpDs') for i in range(1, len(feeds_sheet)): download_video_title = feeds_sheet[i][2] download_video_id = feeds_sheet[i][3] download_video_gid = feeds_sheet[i][4] download_play_cnt = feeds_sheet[i][5] download_comment_cnt = feeds_sheet[i][6] download_like_cnt = feeds_sheet[i][7] download_share_cnt = feeds_sheet[i][8] download_video_duration = feeds_sheet[i][9] download_video_width_height = feeds_sheet[i][10] download_send_time = feeds_sheet[i][11] download_user_name = feeds_sheet[i][12] download_user_id = feeds_sheet[i][13] download_head_url = feeds_sheet[i][14] download_cover_url = feeds_sheet[i][15] download_video_url = feeds_sheet[i][16] download_audio_url = feeds_sheet[i][17] Common.logger(log_type).info('正在判断第{}行:{}', i + 1, download_video_title) Common.logger(log_type).info('download_video_id:{}', download_video_id) Common.logger(log_type).info('download_video_duration:{}', download_video_duration) Common.logger(log_type).info('download_send_time:{}', download_send_time) # 过滤空行 if download_video_title is None or download_video_id is None: Feishu.dimension_range(log_type, 'xigua', 'wjhpDs', 'ROWS', i + 1, i + 1) Common.logger(log_type).info('空行,删除成功\n') return elif str(download_video_id) in [x for y in Feishu.get_values_batch(log_type, 'xigua', 'e075e9') for x in y]: Feishu.dimension_range(log_type, 'xigua', 'wjhpDs', 'ROWS', i + 1, i + 1) Common.logger(log_type).info('视频已下载,删除成功\n') return else: # 下载封面 Common.download_method(log_type=log_type, text='cover', d_name=download_video_title, d_url=download_cover_url) # 下载视频 Common.download_method(log_type=log_type, text='video', d_name=download_video_title, d_url=download_video_url) # 下载音频 Common.download_method(log_type=log_type, text='audio', d_name=download_video_title, d_url=download_audio_url) # 保存视频信息至 "./videos/{download_video_title}/info.txt" with open("./videos/" + download_video_title + "/" + "info.txt", "a", encoding="UTF-8") as f_a: f_a.write(str(download_video_id) + "\n" + str(download_video_title) + "\n" + str(download_video_duration) + "\n" + str(download_play_cnt) + "\n" + str(download_comment_cnt) + "\n" + str(download_like_cnt) + "\n" + str(download_share_cnt) + "\n" + str(download_video_width_height) + "\n" + str(int(time.mktime( time.strptime(download_send_time, "%Y/%m/%d %H:%M:%S")))) + "\n" + str(download_user_name) + "\n" + str(download_head_url) + "\n" + str(download_video_url) + "\n" + str(download_cover_url) + "\n" + "xigua"+str(int(time.time()))) Common.logger("follow").info("==========视频信息已保存至info.txt==========") # 合成音视频 cls.video_compose(log_type, download_video_title) # 上传视频 Common.logger(log_type).info("开始上传视频:{}".format(download_video_title)) our_video_id = Publish.upload_and_publish(log_type, env, uid) if env == 'dev': our_video_link = "https://testadmin.piaoquantv.com/cms/post-detail/" + str(our_video_id) + "/info" else: our_video_link = "https://admin.piaoquantv.com/cms/post-detail/" + str(our_video_id) + "/info" Common.logger(log_type).info("视频上传完成:{}\n", download_video_title) # 视频ID工作表,插入首行 Feishu.insert_columns(log_type, 'xigua', "e075e9", "ROWS", 1, 2) # 视频ID工作表,首行写入数据 upload_time = int(time.time()) values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time)), "关注榜", download_video_title, str(download_video_id), our_video_link, download_video_gid, download_play_cnt, download_comment_cnt, download_like_cnt, download_share_cnt, download_video_duration, download_video_width_height, download_send_time, download_user_name, download_user_id, download_head_url, download_cover_url, download_video_url, download_audio_url]] Common.logger(log_type).info('values:{}\n', values) time.sleep(1) Feishu.update_values(log_type, 'xigua', "e075e9", "F2:Z2", values) Common.logger(log_type).info("视频已保存至云文档:{}", download_video_title) # 删除行或列,可选 ROWS、COLUMNS Feishu.dimension_range(log_type, 'xigua', "wjhpDs", "ROWS", i + 1, i + 1) Common.logger(log_type).info("视频:{},下载/上传成功\n", download_video_title) return except Exception as e: Common.logger(log_type).error('download_publish异常:{}\n', e) # 执行 下载 / 上传 @classmethod def run_download_publish(cls, log_type, env, uid): try: while True: if len(Feishu.get_values_batch(log_type, 'xigua', 'wjhpDs')) == 1: Common.logger(log_type).info('下载 / 上传 完成\n') break else: cls.download_publish(log_type, env, uid) time.sleep(random.randint(1, 3)) except Exception as e: Common.logger(log_type).error('run_download_publish异常:{}\n', e) if __name__ == '__main__': # Follow.get_follow_feeds_by_pc('follow', '6431477489') # Follow.get_follow_feeds_by_app('xigua', '6431477489') # Follow.get_follow_feeds_by_app('follow', '3865480345435996') # Follow.get_user_info_from_feishu('follow') # Follow.filter_words('follow') # Follow.get_all_person_videos('follow', 'dev') Follow.download_publish('follow', 'dev', '6267141') pass