# -*- coding: utf-8 -*- import json import os import random import subprocess import sys import time import uuid from datetime import datetime import requests from appium import webdriver from appium.webdriver.extensions.android.nativekey import AndroidKey from appium.webdriver.common.touch_action import TouchAction from bs4 import BeautifulSoup from selenium.common.exceptions import NoSuchElementException from selenium.webdriver.common.by import By sys.path.append(os.getcwd()) from application.common import MysqlHelper, Feishu class XiaoNianGaoZH(object): """ 小年糕+线下爬虫 """ def __init__(self): self.count = 0 self.swipe_count = 0 chromedriverExecutable = "/Users/a123456/Downloads/chromedriver-mac-x64/chromedriver" print("启动微信") # 微信的配置文件 caps = { "platformName": "Android", "devicesName": "Android", "appPackage": "com.tencent.mm", "appActivity": ".ui.LauncherUI", "autoGrantPermissions": True, "noReset": True, "resetkeyboard": True, "unicodekeyboard": True, "showChromedriverLog": True, "printPageSourceOnFailure": True, "recreateChromeDriverSessions": True, "enableWebviewDetailsCollection": True, "setWebContentsDebuggingEnabled": True, "newCommandTimeout": 6000, "automationName": "UiAutomator2", "chromedriverExecutable": chromedriverExecutable, "chromeOptions": {"androidProcess": "com.tencent.mm:appbrand0"}, } try: self.driver = webdriver.Remote("http://localhost:4723/wd/hub", caps) except Exception as e: print(e) return self.driver.implicitly_wait(30) for i in range(10): try: if self.driver.find_elements(By.ID, "com.tencent.mm:id/f2s"): print("启动微信成功") break elif self.driver.find_element( By.ID, "com.android.systemui:id/dismiss_view" ): print("发现并关闭系统下拉菜单") size = self.driver.get_window_size() self.driver.swipe( int(size["width"] * 0.5), int(size["height"] * 0.8), int(size["width"] * 0.5), int(size["height"] * 0.2), 200, ) else: pass except Exception as e: print(f"打开微信异常:{e}") time.sleep(1) size = self.driver.get_window_size() self.driver.swipe( int(size["width"] * 0.5), int(size["height"] * 0.2), int(size["width"] * 0.5), int(size["height"] * 0.8), 200, ) time.sleep(1) command = 'adb shell service call statusbar 2' process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE) process.communicate() self.driver.find_elements(By.XPATH, '//*[@text="小年糕+"]')[-1].click() print("打开小程序小年糕+成功") time.sleep(5) self.get_videoList() time.sleep(1) self.driver.quit() def save_pq_uid(self, uid): url = "https://admin.piaoquantv.com/manager/crawler/v3/task/save" payload = { "taskName": "小年糕账号", "source": "xiaoniangao", "mode": "author", "modeValue": "0", "modeBoard": "0", "spiderName": "run_xng_author", "startTime": 1720589960000, "interval": 4800, "uid": "58527261,58527262,58527263,58527264,58527265,58527266,58527267,58527268,58527269,58527270,58527271,58527272,58527274,58527275,58527277,58527278,58527279,58527280,58527281,58527282,58527284,58527285,58527287,58527289,58527291,58527292,58527293,58527294,58527297,58527302,58527303,58527304,58527307,58527310,58527313,58527314,58527316,58527317,58527318,58527319,58527320,58527323,58527324,58527326,58527328,58527329,58527331,58527332,58527333,58527334,58527335,58527336,58527337,58527338,58527339,58527341,58527342,58527343,58527344,58527345,58527347,58527348,58527349,58527350,58527351,58527352,58527354,58527355,58527356,58527357,58527358,58527359,58527360,58527361,58527362,58527363,58527364,58527365,58527366,58527367,58527368,58527369,58527370,58527372,58527373,58527374,58527375,58527376,58527377,58527378,58527380,58527381,58527382,58527384,58527386,58527388,58527389,58527390,58527391,58527392,58527393,58527394,58527395,58527396,58527397,58527398,58527399,58527401,58527402,58527403,58527404,58527405,58527406,58527407,58527408,58527555,58527556,58527557,58527558,58527559,58527560,58527561,58527562,58527563,58527564,58527565,58527566,58527567,58527569,58527570,58527572,58527573,58527574,58527575,58527577,58527578,58527579,58527580,58527582,58527583,58527584,58527585,58527586,58527587,58527588,58527590,58527591,58527592,58527594,58527595,58527596,58527597,58527598,58527599,58527600,58527601,58527602,58527603,58527604,58527605,58527606,58527607,58527608,58527610,58527611,58527612,58527613,58527614,58527615,58527616,58527617,58527618,58527619,58527620,58527622,58527623,58527624,58527625,58527626,58527627,58527628,58527630,58527631,58527632,58527633,58527634,58527635,58527636,58527637,58527638,58527639,58527640,58527641,58527642,58527643,58527644,58527646,58527647,58527648,58527649,58527650,58527651,58527653,58527654,58527655,58527656,58527657,58527658,58527659,58527661,58527662,58527663,58527665,58527666,58527668,58527669,58527670,58527671,58527672,58527674,58527675,58527676,58527677,58527678,58527679,58527680,58527681,58527682,58527683,58527684,58527686,58527687,58527689,58527691,58527693,58527694,58527695,58527696,58527699,58527700,58527701,58527703,58527774,58527775,58527776,58527777,58527778,58527779,58527780,58527781,58527782,58527783,58527784,58527785,58527786,58527787,58527788,58527789,58527790,58527791,58528067,58528069,58528070,58528071,58528072,58528075,58528076,58528077,58528083,58528084,58528085,58528087,58528089,58528090,58528095,58528101,58528102,58528104,58528108,58528110,58528111,58528112,58528113,58528114,58528116,58528117,58528118,58528119,58528120,58528121,58528122,58528126,58528127,58528128,58528129,58528130,58528131,58528133,58528134,58528136,58528137,58528138,58528139,58528140,58528141,58528142,58528143,58528144,58528145,58528146,58528147,58528148,58528149,58528150,58528151,58528153,58528154,58528155,58528157,58528158,58528159,58528160,58528161,58528162,58528164,58528165,58528166,58528167,58528168,58528169,58528170,58528172,58528173,58528174,58528175,58528176,58528178,58528180,58528182,58528183,58528184,58528185,58528186,58528187,58528191,58528192,58528193,58528194,58528195,58528196,58528243,58528244,58528245,58528246,58528248,58528249,58528251,58528252,58528254,58528256,58528257,58528258,58528259,58528261,58528262,58528263,58528264,58528265,58528266,58528267,58528268,58528269,58528270,58528271,58528273,58528275,58528276,58528277,58528278,58528279,58528281,58528282,58528285,58528286,58528287,58528288,58528289,58528290,58528291,58528292,58528293,58528294,58528295,58528296,58528297,58528298,58528300,58528301,58528302,58528304,58528305,58528306,58528307,58528308,58528309,58528311,58528313,58528314,58528315,58528316,58528317,58528318,58528319,58528322,58528323,58528324,58528325,58528326,58528327,58528328,58528329,58528330,58528331,58528332,58528333,58528334,58528335,58528336,58528337,58528338,58528339,58528340,58528341,58528342,58528343,58528344,58528345,58528346,58528347,58528349,58528350,58528351,58528352,58528353,58528354,58528355,58528356,58528357,58528358,58528359,58528360,58528361,58528362,58528363,58528364,58528365,58528366,58528368,58528369,58528370,58528371,58528372,58528373,58528374,58528375,58528376,58528377,58528378,58528379,58528380,58528381,58528382,58528383,58528385,58528386,58528387,58528389,59441154,59441155,59441156,59441157,59441159,59441161,59441162,59441163,59441164,59441165,59441167,59441168,59441169,59441171,59441172,59441173,59441174,59441175,59441176,59441177,59441178,59441179,59441180,59441181,59441184,59441185,59441187,59441189,59441190,59441191,59441192,59441193,59441195,59441196,59441198,59441199,59441200,59441201,59441203,59441204,59441205,59441206,59441207,59441208,59441209,59441542,59441543,59441544,59441545,59441546,59441548,59441549,59441550,59441551,59441552,59441553,59441554,59441555,59441556,59441557,59441558,59441559,59441561,59441562,59441563,59441564,59441567,59441568,59441570,59441571,59441572,59441573,59441574,59441575,59441576,59441578,59441579,59441580,59441581,59441582,59441583,59441585,59441586,59441587,59441588,59441589,59441590,59441591,59441592,59441593,59758578,60450738,60450739,60450741,60450742,60450743,60450745,60450746,60450804,60450805,60450806,60450807,60450808,60450809,60450810,60450864,60450865,60450867,60450868,60450869,60450871,61423955,61423957,61423959,61423960,61423961,61423962,61423963,61424502,63618094,63618095,63618096,63618097,63618098,63618100,63618101,63618102,63618103,63618104,63618105,63642197,63642198,63642199,63642200,63642201,63642202,63642203,63642204,63642205,63642206,63642207,63642208,63642209,63642210,63642211,63642212,63642213,64174802,64174948,64552369,64552371,64552372,64552373,64552375,64553460,64989051,64989053,64989054,64989055,64989056,64989057,64989058,64989059,64989060,64989062,64989063,64989065,64989066,64989067,64989069,64989071,64989073,64989074,64989075,64989076,64989077,65084914,65084915,65084916,65084917,65452079,65452080,65452082,65452083,65487732,65487734,65487735,65487736,65533417,65533418,65676858,65676859,65676860,65676862,65886766,65886769,65886770,65886771,65886772,65926643,66145108,66145109,66145110,66145111,66145112,66145114,66145117,66807288,66807289,66807290,66807291,66807292,66807293,66807294,66807296,66807297,66807298,66807300,66807301,66807302,66807303,66807304,66807305,66807306,66807307,66807308,66807310,68346900,68346902,68346904,68346905,68346906,68346907,68346908,68346909,68346910,68346911,68346912,68346914,68346915,68346916,68346917,68346920,68346921,68346923,68892013,68892178,68892325,68892424,69017089,69017581,69017582,69084282,69084363,69091905,69091954,69091983,69092023,69161560,69161602,69161850,69163633,69163634,69179667,69180147,69180493,69180556,69180599,69180683,69180710,69180734,69180776,69181050,69297987,69298154,69298238,69304074,69304179,69304253,69304319,69304462,69304493,69304813,69304841,69324493,69324596,69324714,69325233,69325509,69325784,69328364,69343378,69344922,69345210,69345361,69347833,69347873,69386357,69480915,69480934,69480960,69481068,69481088,69483032,69483054,69483187,69493233,69493272,69493437,69493438,69493538,69493779,69493801,69493817,69493984,69493986,69493987,69493988,69514013,69580705,69580740,69580783,69580858,69580977,69591985,69592122,69592123,69610885,69610951,70915819,70915891,70915922,70915979,70916046,70916056,70916058,70916059,70916078,70916092,70916137,70916250,70921732,70921733,70921735,70921736,70921737,70921738,70921739,70921740,70921741,70921742,70921743,70921744,70921746,70921747,70921749,70921750,70921752,70921754,70937312,70937313,70937365,70943283,70943312,70943503,71075813,71080124,71105342,71105754,71105836,71106177,71106292,71106719,71106782,71106857,71107010,71107213,71107656,71107962,71108256,71108389,71108562,71108583,71108983,71109030,71109204,71109271,71110512,71110659,71110728,71110909,71111012,71111286,71111691,71112132,71112174,71112329,71112791,71132231,71132393,71132506,71132673,71132730,71132886,71133848,71134279,71134404,71134791,71134830,71134988,71135278,71136315,71136398,71136587,71136909,71137395,71137425,71137703,71138142,71138353,71138689,71139404,71159271,71159510,71159644,71159884,71160166,71160210,71160616,71161162,71161764,71161857,71162146,71162563,71162939,71163345,71164417,71164818,71165300,71165365,71165824,71166074,71166378,71166531,71191366,71191499,71191936,71192522,71192553,71193793,71194093,71194296,71194818,71195076,71195822,71196276,71196724,71196804,71197109,71197175,71198217,71198711,71199541,71199580,71200008,71201487,71201612,71201706,71241510,71242012,71242327,71242695,71243018,71243887,71244006,71244951,71245411,71245457,71249762,71250870,71251336,71251833,71280753,71281457,71281917,71282352,71282742,71283423,71283505,71284478,71285890,71286177,71286894,71288832,71312885,71313427,71313854,71314180,71316095,71316151,71316526,71317777,71318449,71318504,71318539,71318698,71320268,71345584,71346008,71346178,71347856,71348630,71349539,71349621,71350830,71350981,71351770,71375796,71375860,71376356,71376472,71377289,71379151,71379639,71379699,71379845,71379907,71380924,71381502,71382022,71383383,71406670,71406744,71406767", "machine": "aliyun", "rule": [{"period": {"min": 15, "max": 3}}, {"duration": {"min": 50, "max": 0}}, {"share_cnt": {"min": 2, "max": 0}}, {"videos_cnt": {"min": 300, "max": 0}}], "id": 21 } payload['uid'] += f",{str(uid)}" headers = { 'accept': 'application/json', 'content-type': 'application/json;', 'cookie': 'SESSION=ZmYwMzBmOWItM2M5YS00ZGMyLTk3MjctMzE0YzE4MmUxNThh', 'origin': 'https://admin.piaoquantv.com', 'pragma': 'no-cache', 'priority': 'u=1, i', 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36' } requests.request("POST", url, headers=headers, json=payload) def search_elements(self, xpath): time.sleep(1) windowHandles = self.driver.window_handles for handle in windowHandles: self.driver.switch_to.window(handle) time.sleep(1) try: elements = self.driver.find_elements(By.XPATH, xpath) if elements: return elements except NoSuchElementException: pass def check_to_applet(self, xpath): time.sleep(1) webViews = self.driver.contexts self.driver.switch_to.context(webViews[-1]) windowHandles = self.driver.window_handles for handle in windowHandles: self.driver.switch_to.window(handle) time.sleep(1) try: self.driver.find_element(By.XPATH, xpath) print("切换到WebView成功\n") return except NoSuchElementException: time.sleep(1) def swipe_up(self): self.search_elements('//*[@class="list-list--list"]') size = self.driver.get_window_size() action = TouchAction(self.driver) action.press(x=int(size["width"] * 0.5), y=int(size["height"] * 0.85)) action.wait(ms=1300) # 可以调整等待时间 action.move_to(x=int(size["width"] * 0.5), y=int(size["height"] * 0.2)) action.release() action.perform() self.swipe_count += 1 def get_video_url(self, video_title_element): for i in range(3): self.search_elements('//*[@class="list-list--list"]') time.sleep(1) self.driver.execute_script( "arguments[0].scrollIntoView({block:'center',inline:'center'});", video_title_element[0], ) time.sleep(3) video_title_element[0].click() self.check_to_applet( xpath=r'//wx-video[@class="dynamic-index--video-item dynamic-index--video"]' ) time.sleep(10) video_url_elements = self.search_elements( '//wx-video[@class="dynamic-index--video-item dynamic-index--video"]' ) return video_url_elements[0].get_attribute("src") def parse_detail(self, index): page_source = self.driver.page_source soup = BeautifulSoup(page_source, "html.parser") soup.prettify() video_list = soup.findAll( name="wx-view", attrs={"class": "expose--adapt-parent"} ) index = index + 1 element_list = [i for i in video_list][index:] return element_list[0] def get_video_info_2(self, video_element): self.count += 1 video_title = video_element.find("wx-view", class_="dynamic--title").text # 头像 URL avatar_url = video_element.find("wx-image", class_="avatar--avatar")["src"] # 用户名称 user_name = video_element.find("wx-view", class_="dynamic--nick-top").text name_url = self.select_name_url(avatar_url, user_name) if name_url: video_title_element = self.search_elements(f'//*[contains(text(), "{video_title}")]') if video_title_element is None: return self.get_video_url(video_title_element) video_mid_elements = self.search_elements("//wx-view[@class='bar--navBar-content-capsule-wrap']") mid = int(video_mid_elements[0].get_attribute("data-mid")) self.driver.press_keycode(AndroidKey.BACK) time.sleep(5) uid = self.select_id(mid) if uid: self.update_name_url(mid, avatar_url, user_name) else: time.sleep(1) link = self.select_id_status(mid) if link: current_time = datetime.now() formatted_time = current_time.strftime("%Y%m%d") date_int = int(formatted_time) # 获取时间标签 tag_id = self.get_tag_id(date_int) time.sleep(5) print(tag_id) # 新增账号 pq_uid = self.insert_number(mid, tag_id) time.sleep(5) if pq_uid: self.insert_name_url(mid, avatar_url, user_name) time.sleep(2) self.save_pq_uid(pq_uid) # 获取当前时间 current_time = datetime.now() formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S") values = [[ str(mid), user_name, avatar_url, str(pq_uid), formatted_time, ]] Feishu.insert_columns('xiaoniangao', 'xiaoniangao', "8zlceR", "ROWS", 1, 2) time.sleep(0.5) Feishu.update_values('xiaoniangao', 'xiaoniangao', "8zlceR", "A2:Z2", values) print("写入飞书表格成功") def get_video_info(self, video_element): try: self.get_video_info_2(video_element) except Exception as e: print(f"抓取单条视频异常:{e}\n") def get_videoList(self): """ 获取视频列表 :return: """ # while True: self.driver.implicitly_wait(20) # 切换到 web_view self.check_to_applet(xpath='//*[@class="tab-bar--tab tab-bar--tab-selected"]') print("切换到 webview 成功") time.sleep(1) if self.search_elements('//*[@class="list-list--list"]') is None: print("窗口已销毁") self.count = 0 self.download_cnt = 0 self.element_list = [] return print("开始获取视频信息") for i in range(50): print("下滑{}次".format(i)) element = self.parse_detail(i) self.get_video_info(element) self.swipe_up() time.sleep(random.randint(1, 5)) def insert_number(self, mid, tag_id): for i in range(3): url = "https://admin.piaoquantv.com/manager/crawler/v3/user/save" payload = { "source": "xiaoniangao", "mode": "author", "modeValue": "", "modeBoard": "", "recomStatus": -6, "appRecomStatus": -6, "autoAuditStatus": 0, "tag": f"459,454,106,8240,{int(tag_id)}", "contentCategory": 0, "link": str(mid) } headers = { 'content-length': '0', 'cookie': 'SESSION=MWM4YzVlMTctNzdkNC00NjE3LWIxZTctOGQwYzgzYmVmN2Qw', 'origin': 'https://admin.piaoquantv.com', 'priority': 'u=1, i', 'sec-ch-ua': '"Not/A)Brand";v="8", "Chromium";v="126", "Google Chrome";v="126"', 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"macOS"' } response = requests.request("POST", url, headers=headers, json=payload) response = response.json() code = response["code"] if code == 0: print("添加账号成功") time.sleep(1) url = "https://admin.piaoquantv.com/manager/crawler/v3/user/list" payload = { "pageNum": 1, "pageSize": 20 } response = requests.request("POST", url, headers=headers, json=payload) response = response.json() list = response["content"]['list'] link = list[0]["link"] if link == str(mid): print("获取站内账号ID成功") return list[0]["uid"] """ 查询用户名+头像是否存在 """ def select_name_url(self, avatar_url, user_name): sql = f""" select uid from xng_uid where avatar_url = "{avatar_url}" and user_name="{user_name}"; """ db = MysqlHelper() repeat_video = db.select(sql=sql) if repeat_video: return False return True def get_tag_id(self, date_int): for i in range(3): url = f"https://admin.piaoquantv.com/manager/user/up/searchUserTypeTag?keyword={date_int}&muid=7" payload = {} headers = { 'content-length': '0', 'cookie': 'SESSION=MWM4YzVlMTctNzdkNC00NjE3LWIxZTctOGQwYzgzYmVmN2Qw', 'origin': 'https://admin.piaoquantv.com', 'priority': 'u=1, i', 'sec-ch-ua': '"Not/A)Brand";v="8", "Chromium";v="126", "Google Chrome";v="126"', 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"macOS"' } response = requests.request("POST", url, headers=headers, data=payload) response = response.json() content = response["content"] if content: tagId = content[0]['tagId'] return tagId else: url = f"https://admin.piaoquantv.com/manager/user/up/createUserTypeTag?tagName={date_int}&muid=7" response = requests.request("POST", url, headers=headers, data=payload) response = response.json() content = response["content"] if content: tagId = content['tagId'] return tagId """ 修改用户名+头像 """ def update_name_url(self, mid, avatar_url, user_name): sql = f""" update xng_uid set avatar_url = "{avatar_url}", user_name="{user_name}" where uid = "{mid}"; """ db = MysqlHelper() repeat_video = db.update(sql=sql) if repeat_video: return True return False """ 插入 用户名 头像 用户id """ def insert_name_url(self, uid, avatar_url, user_name): current_time = datetime.now() formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S") insert_sql = f"""INSERT INTO xng_uid (uid, avatar_url, user_name, data_time) values ('{uid}' ,'{avatar_url}','{user_name}', '{formatted_time}')""" db = MysqlHelper() repeat_video = db.update(sql=insert_sql) if repeat_video: return True return False """ 查询用户id是否存在 """ def select_id(self, uid): sql = f""" select uid from xng_uid where uid = "{uid}"; """ db = MysqlHelper() repeat_video = db.select(sql=sql) if repeat_video: return True return False """ 查询用户id是否之前已添加过 """ def select_id_status(self, uid): sql = f""" select uid from crawler_user_v3 where link = "{uid}"; """ db = MysqlHelper() repeat_video = db.select(sql=sql) if repeat_video: return False return True if __name__ == "__main__": XiaoNianGaoZH()