xiaoniangao.py 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. """
  2. @Author : luojunhui
  3. 小年糕账号爬虫
  4. """
  5. import os
  6. import sys
  7. import json
  8. import time
  9. import uuid
  10. import random
  11. import asyncio
  12. import aiohttp
  13. import datetime
  14. sys.path.append(os.getcwd())
  15. from application.items import VideoItem
  16. from application.pipeline import PiaoQuanPipeline
  17. from application.common.messageQueue import MQ
  18. from application.common.log import AliyunLogger
  19. class XiaoNianGaoAuthor(object):
  20. """
  21. 小年糕账号爬虫
  22. """
  23. def __init__(self, platform, mode, rule_dict, user_list, env="prod"):
  24. self.platform = platform
  25. self.mode = mode
  26. self.rule_dict = rule_dict
  27. self.user_list = user_list
  28. self.env = env
  29. self.download_cnt = 0
  30. self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
  31. self.expire_flag = False
  32. self.aliyun_log = AliyunLogger(platform=self.platform, mode=self.mode)
  33. async def get_user_videos(self, user_dict):
  34. """
  35. 小年糕执行代码
  36. """
  37. url = "https://kapi-xng-app.xiaoniangao.cn/v1/album/user_public"
  38. headers = {
  39. 'Host': 'kapi-xng-app.xiaoniangao.cn',
  40. 'content-type': 'application/json; charset=utf-8',
  41. 'accept': '*/*',
  42. 'authorization': 'hSNQ2s9pvPxvFn4LaQJxKQ6/7Is=',
  43. 'verb': 'POST',
  44. 'content-md5': 'c7b7f8663984e8800e3bcd9b44465083',
  45. 'x-b3-traceid': '2f9da41f960ae077',
  46. 'accept-language': 'zh-cn',
  47. 'date': 'Mon, 19 Jun 2023 06:41:17 GMT',
  48. 'x-token-id': '',
  49. 'x-signaturemethod': 'hmac-sha1',
  50. 'user-agent': 'xngapp/157 CFNetwork/1335.0.3.1 Darwin/21.6.0'
  51. }
  52. async with aiohttp.ClientSession() as session:
  53. next_index = -1
  54. # 只抓取更新的视频,如果刷到已经更新的立即退出
  55. while True:
  56. payload = {
  57. "token": "",
  58. "limit": 20,
  59. "start_t": next_index,
  60. "visited_mid": int(user_dict["link"]),
  61. "share_width": 300,
  62. "share_height": 240,
  63. }
  64. async with session.post(
  65. url,
  66. headers=headers,
  67. data=json.dumps(payload)
  68. ) as response:
  69. data = await response.json()