""" @Author : luojunhui 小年糕账号爬虫 """ import os import sys import json import time import uuid import random import asyncio import aiohttp import datetime sys.path.append(os.getcwd()) from application.items import VideoItem from application.pipeline import PiaoQuanPipeline from application.common.messageQueue import MQ from application.common.log import AliyunLogger class XiaoNianGaoAuthor(object): """ 小年糕账号爬虫 """ def __init__(self, platform, mode, rule_dict, user_list, env="prod"): self.platform = platform self.mode = mode self.rule_dict = rule_dict self.user_list = user_list self.env = env self.download_cnt = 0 self.mq = MQ(topic_name="topic_crawler_etl_" + self.env) self.expire_flag = False self.aliyun_log = AliyunLogger(platform=self.platform, mode=self.mode) def split_accounts(self): """ 操作 user_list,把重要账号挑选出来 :return: """ return self.user_list async def get_user_videos(self, user_dict): """ 小年糕执行代码 """ url = "https://kapi-xng-app.xiaoniangao.cn/v1/album/user_public" headers = { 'Host': 'kapi-xng-app.xiaoniangao.cn', 'content-type': 'application/json; charset=utf-8', 'accept': '*/*', 'authorization': 'hSNQ2s9pvPxvFn4LaQJxKQ6/7Is=', 'verb': 'POST', 'content-md5': 'c7b7f8663984e8800e3bcd9b44465083', 'x-b3-traceid': '2f9da41f960ae077', 'accept-language': 'zh-cn', 'date': 'Mon, 19 Jun 2023 06:41:17 GMT', 'x-token-id': '', 'x-signaturemethod': 'hmac-sha1', 'user-agent': 'xngapp/157 CFNetwork/1335.0.3.1 Darwin/21.6.0' } async with aiohttp.ClientSession() as session: next_index = -1 # 只抓取更新的视频,如果刷到已经更新的立即退出 while True: payload = { "token": "", "limit": 20, "start_t": next_index, "visited_mid": int(user_dict["link"]), "share_width": 300, "share_height": 240, } async with session.post(url, headers=headers, data=json.dumps(payload)) as response: data = await response.json() print(data) async def scan_important_accounts(self, accounts): """ 批量扫描重要账号 :param accounts:重要账号 """ tasks = [self.get_user_videos(account) for account in accounts] await asyncio.gather(*tasks) async def run(self): """ 控制函数代码 :return: """ self.split_acoounts()