""" @author: luojunhui 微信 search """ import os import sys import json import time import requests sys.path.append(os.getcwd()) from application.items import VideoItem from application.common.messageQueue import MQ from application.common.log import AliyunLogger ETL_MQ = MQ(topic_name="topic_crawler_etl_prod") aliyun_logger = AliyunLogger(platform="weixin_search", mode="search") async def weixin_search(params): """ 通过搜索爬虫 + search_keys 来获取视频信息,并且以 MQ 的方式发送给 ETL, 正常上传发布 只抓一页,不做去重 :param params: [] :return: """ gh_id_dict = { "gh_01f8afd03366": { "uid": 69629493, "nick_name": "非亲非故" }, "gh_058e41145a0c": { "uid": 69629452, "nick_name": "甜腻梦话" }, "gh_084a485e859a": { "uid": 69629447, "nick_name": "梦星月" }, "gh_0921c03402cd": { "uid": 69629504, "nick_name": "你的女友" }, "gh_0c89e11f8bf3": { "uid": 69629482, "nick_name": "粟米" }, "gh_171cec079b2a": { "uid": 69629475, "nick_name": "海上" }, "gh_183d80deffb8": { "uid": 69629465, "nick_name": "论趣" }, "gh_1ee2e1b39ccf": { "uid": 69629448, "nick_name": "纵有疾风起" }, "gh_234ef02cdee5": { "uid": 69629486, "nick_name": "夹逼" }, "gh_26a307578776": { "uid": 69629464, "nick_name": "最宝贝的宝贝" }, "gh_29074b51f2b7": { "uid": 69629503, "nick_name": "沉舸" }, "gh_2b8c6aa035ae": { "uid": 69629443, "nick_name": "懶得取名" }, "gh_34318194fd0e": { "uid": 69629490, "nick_name": "徒四壁" }, "gh_3845af6945d0": { "uid": 69629518, "nick_name": "秋水娉婷" }, "gh_3ac6d7208961": { "uid": 69629471, "nick_name": "小熊的少女梦" }, "gh_3c7d38636846": { "uid": 69629492, "nick_name": "油腻腻" }, "gh_3df10391639c": { "uid": 69629514, "nick_name": "六郎娇面" }, "gh_40a0ad154478": { "uid": 69629489, "nick_name": "禁止" }, "gh_424c8eeabced": { "uid": 69629495, "nick_name": "认命" }, "gh_4568b5a7e2fe": { "uid": 69629457, "nick_name": "香腮" }, "gh_45beb952dc74": { "uid": 69629462, "nick_name": "毋庸" }, "gh_484de412b0ef": { "uid": 69629456, "nick_name": "婪" }, "gh_4c058673c07e": { "uid": 69629449, "nick_name": "影帝" }, "gh_538f78f9d3aa": { "uid": 69629454, "nick_name": "伤痕" }, "gh_56a6765df869": { "uid": 69629487, "nick_name": "风月" }, "gh_56ca3dae948c": { "uid": 69629511, "nick_name": "留下太多回忆" }, "gh_5e543853d8f0": { "uid": 69629516, "nick_name": "不知春秋" }, "gh_5ff48e9fb9ef": { "uid": 69629468, "nick_name": "寻她找他" }, "gh_671f460c856c": { "uid": 69629496, "nick_name": "绝不改悔" }, "gh_6b7c2a257263": { "uid": 69629501, "nick_name": "奶牙" }, "gh_6d205db62f04": { "uid": 69629483, "nick_name": "怕羞" }, "gh_6d9f36e3a7be": { "uid": 69629472, "nick_name": "望长安" }, "gh_73be0287bb94": { "uid": 69629510, "nick_name": "戏剧" }, "gh_744cb16f6e16": { "uid": 69629479, "nick_name": "反駁" }, "gh_7b4a5f86d68c": { "uid": 69629453, "nick_name": "我很想你" }, "gh_7bca1c99aea0": { "uid": 69629484, "nick_name": "从小就很傲" }, "gh_7e5818b2dd83": { "uid": 69629505, "nick_name": "二八佳人" }, "gh_89ef4798d3ea": { "uid": 69629506, "nick_name": "彼岸花" }, "gh_901b0d722749": { "uid": 69629491, "nick_name": "深情不为我" }, "gh_9161517e5676": { "uid": 69629469, "nick_name": "折磨" }, "gh_93e00e187787": { "uid": 69629478, "nick_name": "理会" }, "gh_9877c8541764": { "uid": 69629481, "nick_name": "我沿着悲伤" }, "gh_9cf3b7ff486b": { "uid": 69629466, "nick_name": "hoit" }, "gh_9e559b3b94ca": { "uid": 69629444, "nick_name": "我与你相遇" }, "gh_9f8dc5b0c74e": { "uid": 69629470, "nick_name": "港口" }, "gh_a182cfc94dad": { "uid": 69629512, "nick_name": "四海八荒" }, "gh_a2901d34f75b": { "uid": 69629508, "nick_name": "听腻了谎话" }, "gh_a307072c04b9": { "uid": 69629494, "nick_name": "踏步" }, "gh_a6351b447819": { "uid": 69629513, "nick_name": "七猫酒馆" }, "gh_ac43e43b253b": { "uid": 69629473, "nick_name": "一厢情愿" }, "gh_adca24a8f429": { "uid": 69629458, "nick_name": "对你何止一句喜欢" }, "gh_b15de7c99912": { "uid": 69629509, "nick_name": "糖炒板栗" }, "gh_b32125c73861": { "uid": 69629467, "nick_name": "发尾" }, "gh_b3ffc1ca3a04": { "uid": 69629519, "nick_name": "主宰你心" }, "gh_b8baac4296cb": { "uid": 69629463, "nick_name": "生性" }, "gh_b9b99173ff8a": { "uid": 69629497, "nick_name": "养一只月亮" }, "gh_bd57b6978e06": { "uid": 69629500, "nick_name": "厌遇" }, "gh_be8c29139989": { "uid": 69629476, "nick_name": "不负" }, "gh_bfe5b705324a": { "uid": 69629502, "nick_name": "乐极" }, "gh_bff0bcb0694a": { "uid": 69629507, "nick_name": "简迷离" }, "gh_c69776baf2cd": { "uid": 69629485, "nick_name": "骄纵" }, "gh_c91b42649690": { "uid": 69629477, "nick_name": "荟萃" }, "gh_d2cc901deca7": { "uid": 69629461, "nick_name": "恶意调笑" }, "gh_d5f935d0d1f2": { "uid": 69629474, "nick_name": "青少年哪吒" }, "gh_da76772d8d15": { "uid": 69629499, "nick_name": "独揽风月" }, "gh_de9f9ebc976b": { "uid": 69629450, "nick_name": "剑出鞘恩怨了" }, "gh_e0eb490115f5": { "uid": 69629460, "nick_name": "赋别" }, "gh_e24da99dc899": { "uid": 69629459, "nick_name": "恋雨夏季" }, "gh_e2576b7181c6": { "uid": 69629488, "nick_name": "满天星" }, "gh_e75dbdc73d80": { "uid": 69629515, "nick_name": "情战" }, "gh_e9d819f9e147": { "uid": 69629498, "nick_name": "与卿" }, "gh_efaf7da157f5": { "uid": 69629520, "nick_name": "心野性子浪" }, "gh_f4594783f5b8": { "uid": 69629517, "nick_name": "自缚" }, "gh_fe6ef3a65a48": { "uid": 69629455, "nick_name": "风间" } } search_keys = params['search_keys'] user = gh_id_dict.get(params['ghId']) url = "http://8.217.190.241:8888/crawler/wei_xin/keyword" payload = json.dumps({ "keyword": ",".join(search_keys), "cursor": "0", "content_type": "video" }) headers = { 'Content-Type': 'application/json' } response = requests.request("POST", url, headers=headers, data=payload) data_list = response.json()['data']['data'] for item in data_list: video_obj = item['items'][0] # await process_weixin_video_obj(video_obj, user) try: aliyun_logger.logging( code="1001", message="扫描到一条视频", account=user['uid'], data=video_obj ) await process_weixin_video_obj(video_obj, user) except Exception as e: aliyun_logger.logging( code="3000", message="有报错信息---{}".format(e), account=user['uid'] ) async def process_weixin_video_obj(video_obj, user): """ 异步处理微信 video_obj 公众号和站内账号一一对应 :param user: :param video_obj: :return: """ platform = "weixin_search" publish_time_stamp = int(video_obj['pubTime']) title = video_obj['title'].replace('', '').replace('', '').replace("#", "") item = VideoItem() item.add_video_info("user_id", user["uid"]) item.add_video_info("user_name", user["nick_name"]) item.add_video_info("video_id", video_obj['hashDocID']) item.add_video_info("video_title", title) item.add_video_info("publish_time_stamp", int(publish_time_stamp)) item.add_video_info("video_url", video_obj["videoUrl"]) item.add_video_info("cover_url", video_obj["image"]) item.add_video_info("out_video_id", video_obj['hashDocID']) item.add_video_info("platform", platform) item.add_video_info("strategy", "search") item.add_video_info("session", "{}-{}".format(platform, int(time.time()))) mq_obj = item.produce_item() ETL_MQ.send_msg(video_dict=mq_obj) aliyun_logger.logging( code="1002", message="成功发送到 ETL", account=user["uid"], data=mq_obj )