""" @author: luojunhui 微信 search """ import os import sys import json import time import requests sys.path.append(os.getcwd()) from application.items import VideoItem from application.common.messageQueue import MQ from application.common.log import AliyunLogger ETL_MQ = MQ(topic_name="topic_crawler_etl_prod") aliyun_logger = AliyunLogger(platform="weixin_search", mode="search") async def weixin_search(params): """ 通过搜索爬虫 + search_keys 来获取视频信息,并且以 MQ 的方式发送给 ETL, 正常上传发布 只抓一页,不做去重 :param params: [] :return: """ gh_id_dict = { "gh_01f8afd03366": { "uid": 69637520, "nick_name": "非亲非故" }, "gh_058e41145a0c": { "uid": 69637476, "nick_name": "甜腻梦话" }, "gh_084a485e859a": { "uid": 69637472, "nick_name": "梦星月" }, "gh_0921c03402cd": { "uid": 69637531, "nick_name": "你的女友" }, "gh_0c89e11f8bf3": { "uid": 69637508, "nick_name": "粟米" }, "gh_171cec079b2a": { "uid": 69637501, "nick_name": "海上" }, "gh_183d80deffb8": { "uid": 69637491, "nick_name": "论趣" }, "gh_1ee2e1b39ccf": { "uid": 69637473, "nick_name": "纵有疾风起" }, "gh_234ef02cdee5": { "uid": 69637513, "nick_name": "夹逼" }, "gh_26a307578776": { "uid": 69637490, "nick_name": "最宝贝的宝贝" }, "gh_29074b51f2b7": { "uid": 69637530, "nick_name": "沉舸" }, "gh_2b8c6aa035ae": { "uid": 69637470, "nick_name": "懶得取名" }, "gh_34318194fd0e": { "uid": 69637517, "nick_name": "徒四壁" }, "gh_3845af6945d0": { "uid": 69637545, "nick_name": "秋水娉婷" }, "gh_3ac6d7208961": { "uid": 69637497, "nick_name": "小熊的少女梦" }, "gh_3c7d38636846": { "uid": 69637519, "nick_name": "油腻腻" }, "gh_3df10391639c": { "uid": 69637541, "nick_name": "六郎娇面" }, "gh_40a0ad154478": { "uid": 69637516, "nick_name": "禁止" }, "gh_424c8eeabced": { "uid": 69637522, "nick_name": "认命" }, "gh_4568b5a7e2fe": { "uid": 69637482, "nick_name": "香腮" }, "gh_45beb952dc74": { "uid": 69637488, "nick_name": "毋庸" }, "gh_484de412b0ef": { "uid": 69637481, "nick_name": "婪" }, "gh_4c058673c07e": { "uid": 69637474, "nick_name": "影帝" }, "gh_538f78f9d3aa": { "uid": 69637478, "nick_name": "伤痕" }, "gh_56a6765df869": { "uid": 69637514, "nick_name": "风月" }, "gh_56ca3dae948c": { "uid": 69637538, "nick_name": "留下太多回忆" }, "gh_5e543853d8f0": { "uid": 69637543, "nick_name": "不知春秋" }, "gh_5ff48e9fb9ef": { "uid": 69637494, "nick_name": "寻她找他" }, "gh_671f460c856c": { "uid": 69637523, "nick_name": "绝不改悔" }, "gh_6b7c2a257263": { "uid": 69637528, "nick_name": "奶牙" }, "gh_6d205db62f04": { "uid": 69637509, "nick_name": "怕羞" }, "gh_6d9f36e3a7be": { "uid": 69637498, "nick_name": "望长安" }, "gh_73be0287bb94": { "uid": 69637537, "nick_name": "戏剧" }, "gh_744cb16f6e16": { "uid": 69637505, "nick_name": "反駁" }, "gh_7b4a5f86d68c": { "uid": 69637477, "nick_name": "我很想你" }, "gh_7bca1c99aea0": { "uid": 69637511, "nick_name": "从小就很傲" }, "gh_7e5818b2dd83": { "uid": 69637532, "nick_name": "二八佳人" }, "gh_89ef4798d3ea": { "uid": 69637533, "nick_name": "彼岸花" }, "gh_901b0d722749": { "uid": 69637518, "nick_name": "深情不为我" }, "gh_9161517e5676": { "uid": 69637495, "nick_name": "折磨" }, "gh_93e00e187787": { "uid": 69637504, "nick_name": "理会" }, "gh_9877c8541764": { "uid": 69637506, "nick_name": "我沿着悲伤" }, "gh_9cf3b7ff486b": { "uid": 69637492, "nick_name": "hoit" }, "gh_9e559b3b94ca": { "uid": 69637471, "nick_name": "我与你相遇" }, "gh_9f8dc5b0c74e": { "uid": 69637496, "nick_name": "港口" }, "gh_a182cfc94dad": { "uid": 69637539, "nick_name": "四海八荒" }, "gh_a2901d34f75b": { "uid": 69637535, "nick_name": "听腻了谎话" }, "gh_a307072c04b9": { "uid": 69637521, "nick_name": "踏步" }, "gh_a6351b447819": { "uid": 69637540, "nick_name": "七猫酒馆" }, "gh_ac43e43b253b": { "uid": 69637499, "nick_name": "一厢情愿" }, "gh_adca24a8f429": { "uid": 69637483, "nick_name": "对你何止一句喜欢" }, "gh_b15de7c99912": { "uid": 69637536, "nick_name": "糖炒板栗" }, "gh_b32125c73861": { "uid": 69637493, "nick_name": "发尾" }, "gh_b3ffc1ca3a04": { "uid": 69637546, "nick_name": "主宰你心" }, "gh_b8baac4296cb": { "uid": 69637489, "nick_name": "生性" }, "gh_b9b99173ff8a": { "uid": 69637524, "nick_name": "养一只月亮" }, "gh_bd57b6978e06": { "uid": 69637527, "nick_name": "厌遇" }, "gh_be8c29139989": { "uid": 69637502, "nick_name": "不负" }, "gh_bfe5b705324a": { "uid": 69637529, "nick_name": "乐极" }, "gh_bff0bcb0694a": { "uid": 69637534, "nick_name": "简迷离" }, "gh_c69776baf2cd": { "uid": 69637512, "nick_name": "骄纵" }, "gh_c91b42649690": { "uid": 69637503, "nick_name": "荟萃" }, "gh_d2cc901deca7": { "uid": 69637487, "nick_name": "恶意调笑" }, "gh_d5f935d0d1f2": { "uid": 69637500, "nick_name": "青少年哪吒" }, "gh_da76772d8d15": { "uid": 69637526, "nick_name": "独揽风月" }, "gh_de9f9ebc976b": { "uid": 69637475, "nick_name": "剑出鞘恩怨了" }, "gh_e0eb490115f5": { "uid": 69637486, "nick_name": "赋别" }, "gh_e24da99dc899": { "uid": 69637484, "nick_name": "恋雨夏季" }, "gh_e2576b7181c6": { "uid": 69637515, "nick_name": "满天星" }, "gh_e75dbdc73d80": { "uid": 69637542, "nick_name": "情战" }, "gh_e9d819f9e147": { "uid": 69637525, "nick_name": "与卿" }, "gh_efaf7da157f5": { "uid": 69637547, "nick_name": "心野性子浪" }, "gh_f4594783f5b8": { "uid": 69637544, "nick_name": "自缚" }, "gh_fe6ef3a65a48": { "uid": 69637480, "nick_name": "风间" } } aliyun_logger.logging( code="2000", message="请求参数", data=params ) search_keys = params['title'] user = gh_id_dict.get(params['ghId']) trace_id = params['trace_id'] url = "http://8.217.190.241:8888/crawler/wei_xin/keyword" payload = json.dumps({ "keyword": search_keys, "cursor": "0", "content_type": "video" }) headers = { 'Content-Type': 'application/json' } response = requests.request("POST", url, headers=headers, data=payload) aliyun_logger.logging( code="2000", message="微信抓取成功", data=response.json() ) try: data_list = response.json()['data']['data'] for item in data_list[:10]: video_obj = item['items'][0] # await process_weixin_video_obj(video_obj, user) try: aliyun_logger.logging( code="1001", message="扫描到一条视频", account=user['uid'], data=video_obj ) await process_weixin_video_obj(video_obj, user, trace_id) except Exception as e: aliyun_logger.logging( code="3000", message="有报错信息---{}".format(e), account=user['uid'] ) except Exception as e: aliyun_logger.logging( code="3000", message="有报错信息---{}---微信搜索视频失败".format(e), account=user['uid'] ) async def process_weixin_video_obj(video_obj, user, trace_id): """ 异步处理微信 video_obj 公众号和站内账号一一对应 :param trace_id: :param user: :param video_obj: :return: """ platform = "weixin_search" publish_time_stamp = int(video_obj['pubTime']) title = video_obj['title'].replace('', '').replace('', '').replace("#", "") item = VideoItem() item.add_video_info("user_id", user["uid"]) item.add_video_info("user_name", user["nick_name"]) item.add_video_info("video_id", video_obj['hashDocID']) item.add_video_info("video_title", title) item.add_video_info("publish_time_stamp", int(publish_time_stamp)) item.add_video_info("video_url", video_obj["videoUrl"]) item.add_video_info("cover_url", video_obj["image"]) item.add_video_info("out_video_id", video_obj['hashDocID']) item.add_video_info("out_user_id", trace_id) item.add_video_info("platform", platform) item.add_video_info("strategy", "search") item.add_video_info("session", "{}-{}".format(platform, int(time.time()))) mq_obj = item.produce_item() ETL_MQ.send_msg(video_dict=mq_obj) aliyun_logger.logging( code="1002", message="成功发送到 ETL", account=user["uid"], data=mq_obj )