Browse Source

增加视频号相似溯源

zhangyong 8 months ago
parent
commit
820be4a773

+ 4 - 2
common/feishu_utils.py

@@ -316,12 +316,14 @@ class Feishu:
     @classmethod
     def bot(cls, log_type, crawler, text, mark_name):
         try:
-            url = "https://open.feishu.cn/open-apis/bot/v2/hook/e7697dc6-5254-4411-8b59-3cd0742bf703"
+
             headers = {'Content-Type': 'application/json'}
             if crawler == "机器自动改造消息通知":
+                url = "https://open.feishu.cn/open-apis/bot/v2/hook/e7697dc6-5254-4411-8b59-3cd0742bf703"
                 sheet_url = "https://w42nne6hzg.feishu.cn/sheets/KsoMsyP2ghleM9tzBfmcEEXBnXg?sheet=bc154d"
                 users = f"<at id=" + str(cls.get_userid(log_type)) + f">{mark_name}</at>"
             else:
+                url = "https://open.feishu.cn/open-apis/bot/v2/hook/7928f182-08c1-4c4d-b2f7-82e10c93ca80"
                 sheet_url = "https://w42nne6hzg.feishu.cn/sheets/KsoMsyP2ghleM9tzBfmcEEXBnXg?sheet=bc154d"
                 users = f"<at id=" + str(cls.get_userid(log_type)) + f">{mark_name}</at>"
             data = json.dumps({
@@ -352,7 +354,7 @@ class Feishu:
                     }],
                     "header": {
                         "title": {
-                            "content": "📣Cookie 过期提醒",
+                            "content": "📣消息提醒",
                             "tag": "plain_text"
                         }
                     }

+ 17 - 0
common/sql_help.py

@@ -268,3 +268,20 @@ class sqlCollect():
             sql=insert_sql
         )
 
+
+    """相似溯源-视频号数据插入"""
+    @classmethod
+    def insert_xssy_sph_info(cls, account_user: str, traceable_user: str, traceable_user_v2: str,  has_used: str, appid:Optional[str] = None, pq_id:Optional[str] = None):
+        insert_sql = f"""INSERT INTO xssy_sph (account_user, traceable_user, traceable_user_v2, pq_id, has_used, appid) values ("{account_user}", "{traceable_user}","{traceable_user_v2}","{pq_id}", {has_used},"{appid}")"""
+        MysqlHelper.update_values(
+            sql=insert_sql
+        )
+
+    """查询该账号是否存在"""
+    @classmethod
+    def select_crawler_uesr_v3(cls, link: str):
+        sql = """SELECT link FROM crawler_uesr_v3 WHERE link = %s  and source = 'jiqizidonggaizao'"""
+        data = MysqlHelper.get_values(sql, (link))
+        if data:
+            return data
+        return None

+ 8 - 1
data_channel/data_help.py

@@ -13,4 +13,11 @@ class dataHelp():
             frame_num = cap.get(7)
             duration = frame_num / rate
             return duration
-        return 0
+        return 0
+
+
+
+if __name__ == '__main__':
+
+    a = dataHelp.video_duration("/Users/tzld/Desktop/video_rewriting/path/aiyuepw_video.mp4")
+    print(a)

+ 2 - 2
data_channel/douyin.py

@@ -55,9 +55,9 @@ class DY:
             data = obj.get('aweme_list', [])
             if data == [] and len(data) == 0:
                 if name == '抖音品类账号' or name == '抖音品类账号-1':
-                    Feishu.bot("wangxueke", '机器自动改造消息通知', f'{name}cookie过期,请及时更换', 'wangxueke')
+                    Feishu.bot("wangxueke", '机器自动改造消息通知', f'抖音-{name}cookie过期,请及时更换', 'wangxueke')
                 else:
-                    Feishu.bot(mark, '机器自动改造消息通知', f'{name}cookie过期,请及时更换', name)
+                    Feishu.bot(mark, '机器自动改造消息通知', f'抖音-{name}cookie过期,请及时更换', name)
                 return list
             response.close()
             for i in range(len(data)):

+ 2 - 2
data_channel/kuaishou.py

@@ -76,9 +76,9 @@ class KS:
                 return list
             elif "visionProfilePhotoList" not in response.json()["data"]:
                 if name == '快手品类账号':
-                    Feishu.bot("wangxueke", '机器自动改造消息通知', f'{name}cookie过期,请及时更换', 'wangxueke')
+                    Feishu.bot("wangxueke", '机器自动改造消息通知', f'快手-{name}cookie过期,请及时更换', 'wangxueke')
                 else:
-                    Feishu.bot(mark, '机器自动改造消息通知', f'{name}cookie过期,请及时更换', name)
+                    Feishu.bot(mark, '机器自动改造消息通知', f'快手-{name}cookie过期,请及时更换', name)
                 time.sleep(900)
                 continue
             elif "feeds" not in response.json()["data"]["visionProfilePhotoList"]:

+ 22 - 0
sph_sy_main.py

@@ -0,0 +1,22 @@
+import schedule
+import time
+
+from xssy_channel.sph_nrxs import SphNrxs
+
+def video_start_sph():
+    print("视频号任务开始...")
+
+    try:
+        SphNrxs.sph_nrxs_data()
+        print("视频号任务成功完成")
+    except Exception as e:
+        print(f"视频号任务执行过程中发生错误: {e}")
+    print("视频号任务结束")
+
+
+# 每天早上 6 点执行
+schedule.every().day.at("01:00").do(video_start_sph)
+SphNrxs.sph_nrxs_data()
+while True:
+    schedule.run_pending()
+    time.sleep(1)

+ 1 - 0
video_rewriting/video_processor.py

@@ -479,6 +479,7 @@ class VideoProcessor:
         try:
             data = get_data(mark, task_data)
             if not data:
+                Common.logger("redis").error(f"{mark}任务开始新的一轮\n")
                 return
             task = json.loads(data)
             VideoProcessor.process_task(task, mark, name, feishu_id, cookie_sheet)

+ 206 - 28
xssy_channel/sph_nrxs.py

@@ -10,11 +10,152 @@ from common.sql_help import sqlCollect
 
 class SphNrxs:
 
+    """创建票圈账号"""
+    @classmethod
+    def insert_number(cls, mid):
+        for i in range(3):
+            url = "https://admin.piaoquantv.com/manager/crawler/v3/user/save"
+            payload = {
+                "source": "jiqizidonggaizao",
+                "mode": "author",
+                "modeValue": "",
+                "modeBoard": "",
+                "recomStatus": -7,
+                "appRecomStatus": -7,
+                "autoAuditStatus": 0,
+                "tag": f"7592,452,8776,467",
+                "contentCategory": 0,
+                "link": str(mid)
+            }
+            cookie = Material.get_cookie_data("KsoMsyP2ghleM9tzBfmcEEXBnXg", "U1gySe", "票圈后台-cookie")
+
+            headers = {
+                'content-length': '0',
+                'cookie': cookie,
+                'origin': 'https://admin.piaoquantv.com',
+                'priority': 'u=1, i',
+                'sec-ch-ua': '"Not/A)Brand";v="8", "Chromium";v="126", "Google Chrome";v="126"',
+                'sec-ch-ua-mobile': '?0',
+                'sec-ch-ua-platform': '"macOS"'
+            }
+
+            response = requests.request("POST", url, headers=headers, json=payload)
+            response = response.json()
+            code = response["code"]
+            if code == 0:
+                print("添加账号成功")
+                time.sleep(1)
+                url = "https://admin.piaoquantv.com/manager/crawler/v3/user/list"
+                payload = {
+                    "pageNum": 1,
+                    "pageSize": 20
+                }
+                response = requests.request("POST", url, headers=headers, json=payload)
+                response = response.json()
+                list = response["content"]['list']
+                link = list[0]["link"]
+                if link == str(mid):
+                    print("获取站内账号ID成功")
+                    return list[0]["uid"]
+            else:
+                if code == '10010':
+                    return None
+                Feishu.bot("xinxin", '票圈后台提醒', f'票圈后台cookie 失效了,请即使更换', 'xinxin')
+
+    """腾讯互选平台通过appid获取观众画像"""
+    @classmethod
+    def get_hx(cls, aid):
+        url = "http://8.217.190.241:8888/crawler/wei_xin/shi_pin_hao/hu_xuan_detail"
+        cookie = Material.get_cookie_data("KsoMsyP2ghleM9tzBfmcEEXBnXg", "U1gySe", "腾讯互选平台-cookie")
+        account_id = Material.get_cookie_data("KsoMsyP2ghleM9tzBfmcEEXBnXg", "U1gySe", "腾讯互选平台-account_id")
+        payload = json.dumps({
+            "account_id": aid,
+            "uid": str(account_id),
+            "cookie": cookie
+        })
+        headers = {
+            'Content-Type': 'application/json'
+        }
+
+        response = requests.request("POST", url, headers=headers, data=payload)
+        response = response.json()
+        ret = response['code']
+        if ret == 0:
+            data = response['data']['data']
+            if data:
+                age_ranges = ['<18 岁', '18 ~ 24 岁', '25 ~ 29 岁', '30 ~ 39 岁', '40 ~ 49 岁', '>50 岁']
+                viewer_portrait = data['viewer_portrait']  # 观众画像
+                # 找到占比最大的项
+                viewer_max_age_range = max(
+                    (item for item in viewer_portrait if item['name'] in age_ranges),
+                    key=lambda x: float(x['percentage'].strip('%'))
+                )
+                if viewer_max_age_range['name'] != '>50 岁':
+                    return "2"
+                fans_portrait = data['fans_portrait']  # 粉丝画像
+                # 找到占比最大的项
+                fans_max_age_range = max(
+                    (item for item in fans_portrait if item['name'] in age_ranges),
+                    key=lambda x: float(x['percentage'].strip('%'))
+                )
+                if fans_max_age_range['name'] != '>50 岁':
+                    return "3"
+            return "0"
+        else:
+            Feishu.bot("xinxin", '腾讯互选平台提醒', f'腾讯互选平台cookie 失效了,请即使更换', 'xinxin')
+            return None
 
+    """腾讯互选平台通过搜索获取appid"""
     @classmethod
-    def get_sph_data(cls, user, nick_name, uid):
+    def get_hxpt_appid(cls, user):
+        url = "https://huxuan.qq.com/cgi-bin/advertiser/finder_publisher/search"
+        cookie = Material.get_cookie_data("KsoMsyP2ghleM9tzBfmcEEXBnXg", "U1gySe", "腾讯互选平台-cookie")
+        account_id = Material.get_cookie_data("KsoMsyP2ghleM9tzBfmcEEXBnXg", "U1gySe", "腾讯互选平台-account_id")
+
+        payload = json.dumps({
+            "keyword": user,
+            "page": {
+                "no": 1,
+                "size": 50
+            }
+        })
+        headers = {
+            'Accept': 'application/json, text/plain, */*',
+            'Accept-Language': 'zh-CN,zh;q=0.9',
+            'Cache-Control': 'no-cache',
+            'Connection': 'keep-alive',
+            'Content-Type': 'application/json',
+            'Cookie': cookie,
+            'Origin': 'https://huxuan.qq.com',
+            'Pragma': 'no-cache',
+            'Referer': 'https://huxuan.qq.com/trade/selection/46251713/selection_list?type=finder-trade',
+            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36',
+            'account_id': str(account_id),
+            'sec-ch-ua': '"Not)A;Brand";v="99", "Google Chrome";v="127", "Chromium";v="127"',
+            'sec-ch-ua-mobile': '?0',
+            'sec-ch-ua-platform': '"macOS"'
+        }
+        response = requests.request("POST", url, headers=headers, data=payload)
+        response = response.json()
+        ret = response['ret']
+        if ret == 0:
+            try:
+                appid = response['data']['item'][0]['appid']
+                if appid:
+                    return appid
+            except Exception as e:
+                Common.logger("sph_nrxs").info(f"{user}获取腾讯互选平台appid异常,异常信息{e}")
+                return None
+
+        else:
+            Feishu.bot("xinxin", '腾讯互选平台提醒', f'腾讯互选平台cookie 失效了,请即使更换', 'xinxin')
+
+        print(response.text)
+
+    """获取用户主页是否符合规则"""
+    @classmethod
+    def get_sph_data(cls, user, uid):
         url = "http://61.48.133.26:30001/FinderGetUpMasterNextPage"
-        count = 1
         headers = {
             'Content-Type': 'application/json'
         }
@@ -25,8 +166,6 @@ class SphNrxs:
 
         response = requests.request("POST", url, headers=headers, data=payload)
         time.sleep(random.randint(1, 5))
-        Common.logger("sph_nrxs").info(f"{user}获取第{count}页视频")
-        count += 1
         if response.text == "" or response.text == None:
             return
         res_json = response.json()
@@ -44,7 +183,7 @@ class SphNrxs:
             return
         try:
             for obj in res_json["UpMasterHomePage"]:
-                Common.logger("sph_crawling").info(f"{user}扫描到一条数据")
+                Common.logger("sph_nrxs").info(f"{user}扫描到一条数据")
                 objectId = obj['objectId']
                 object_id = sqlCollect.sph_data_info_v_id(objectId, "视频号")
                 if object_id:
@@ -72,27 +211,22 @@ class SphNrxs:
                 nick_name = obj['nickname']  # 用户名
                 # comment_count = obj['comment_count']  # 评论数
                 # fav_count = obj['fav_count']  # 大拇指点赞数
-                values = [
-                    [
-                        uid,
-                        nick_name,
-                        like_cnt,
-                        share_cnt,
-                        duration,
-                        video_url
-                    ]
-                ]
-                Feishu.insert_columns("UBvisMdE7hkI6rtIfzycCtdsnWM", '3476ab', "ROWS", 1, 2)
-                time.sleep(0.5)
-                Feishu.update_values("UBvisMdE7hkI6rtIfzycCtdsnWM", '3476ab', "A2:Z2", values)
-                Common.logger("sph_nrxs").info(f"{nick_name}符合规则")
+                video_percent = '%.2f' % (share_cnt / like_cnt)
+                special = float(0.25)
+                Common.logger("sph_nrxs").info(
+                    f"扫描:原用户主页名:{uid},溯源用户主页id:{url},视频id{objectId} ,分享:{share_cnt},点赞:{like_cnt} ,时长:{duration},视频链接:{video_url}")
+                if share_cnt >= 300 and float(video_percent) >= special and int(duration) >= 30:
+                    Common.logger("sph_nrxs").info(f"{nick_name}符合规则")
+                    return nick_name
+            return None
         except Exception as e:
             Common.logger("sph_nrxs").info(f"{user}异常,异常信息{e}")
-            return
+            return None
+
 
+    """视频号加热平台相似溯源"""
     @classmethod
     def get_nrxs_list(cls, uid):
-        list = []
         cookie = Material.get_cookie_data("KsoMsyP2ghleM9tzBfmcEEXBnXg", "U1gySe", "视频号加热")
         url = "http://8.217.190.241:8888/crawler/wei_xin/shi_pin_hao/jia_re"
 
@@ -110,33 +244,77 @@ class SphNrxs:
             response = response.json()
             code = response['code']
             if code == 0:
+                sqlCollect.update_machine_making_reflux(uid)
                 data_list = response['data']['data']
                 if data_list:
                     for data in data_list:
                         nick_name = data['nickName']  # 用户名
                         user_name = data['username']  # 用户v2
-                        data_dict = {"nick_name": nick_name, "user_name": user_name}
-                        cls.get_sph_data(user_name, nick_name, uid)
-                        list.append(data_dict)
-                    return list
+                        status = sqlCollect.select_crawler_uesr_v3(nick_name)
+                        if status:
+                            sqlCollect.insert_xssy_sph_info(uid, nick_name, user_name, "4")
+                            continue
+                        user = cls.get_sph_data(user_name, uid)
+                        if user:
+                            time.sleep(180)
+                            appid = cls.get_hxpt_appid(user)
+                            if appid:
+                                time.sleep(180)
+                                has_used = cls.get_hx(appid)
+                                if has_used:
+                                    if has_used == '0':
+                                        pq_id = cls.insert_number(nick_name)
+                                        if pq_id:
+                                            sqlCollect.insert_xssy_sph_info(uid, nick_name, user_name, has_used, appid, pq_id)
+                                            values = [
+                                                [
+                                                    "视频号",
+                                                    user_name,
+                                                    pq_id,
+                                                    "5",
+                                                    "通用-安全分享",
+                                                    "AI片尾引导",
+                                                    "",
+                                                    "",
+                                                    "AI标题",
+                                                    "",
+                                                    f"溯源账号:{uid}"
+                                                ]
+                                            ]
+                                            Feishu.insert_columns("WGIYsSDdxhItBwtJ0xgc0yE7nEg", '0701bd', "ROWS", 1, 2)
+                                            time.sleep(0.5)
+                                            Feishu.update_values("WGIYsSDdxhItBwtJ0xgc0yE7nEg", '0701bd', "B2:Z2",
+                                                                 values)
+                                    else:
+                                        sqlCollect.insert_xssy_sph_info(uid, nick_name, user_name, has_used, appid)
+                        else:
+                            sqlCollect.insert_xssy_sph_info(uid, nick_name, user_name, "1")
+                            continue
+
+
                 else:
-                    Feishu.bot("xinxin", '视频号加热提醒', f'cookie 失效了,请即使更换', 'xinxin')
                     return None
+            else:
+                Feishu.bot("xinxin", '视频号加热提醒', f'视频号加热平台 cookie 失效了,请即使更换', 'xinxin')
+                return None
         except Exception as e:
-            Feishu.bot("xinxin", '视频号加热提醒', f'cookie 失效了,请即使更换', 'xinxin')
-            Common.logger("feishu").error(f"视频号加热bot异常:{e}\n")
+            Feishu.bot("xinxin", '视频号加热提醒', f'视频号加热平台 cookie 失效了,请即使更换', 'xinxin')
+            Common.logger("sph_nrxs").error(f"用户名:{uid}视频号加热bot异常:{e}\n")
             return None
 
+    """获取需溯源账号"""
     @classmethod
     def sph_nrxs_data(cls):
         user = sqlCollect.get_machine_making_reflux("视频号", "单点视频")
         if user == None:
             return
         user = [item[0] for item in user]
+        Feishu.bot("xinxin", '视频号溯源提醒', f'今日需溯源账号共{len(user)}条', 'xinxin')
         for uid in user:
             if re.match(r'^[A-Za-z0-9]+$', uid):
                 # 匹配成功,进行下一次循环
                 continue
+            Feishu.bot("xinxin", '视频号溯源提醒', f'开始溯源账号名称{uid}', 'xinxin')
             list = cls.get_nrxs_list(uid)
             print(list)