| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409 | """@author: luojunhui微信 search"""import osimport sysimport jsonimport timeimport requestssys.path.append(os.getcwd())from application.items import VideoItemfrom application.common.messageQueue import MQfrom application.common.log import AliyunLoggerETL_MQ = MQ(topic_name="topic_crawler_etl_prod")aliyun_logger = AliyunLogger(platform="weixin_search", mode="search")async def weixin_search(params):    """    通过搜索爬虫 + search_keys 来获取视频信息,并且以 MQ 的方式发送给 ETL, 正常上传发布    只抓一页,不做去重    :param params: []    :return:    """    gh_id_dict = {        "gh_01f8afd03366": {            "uid": 69637520,            "nick_name": "非亲非故"        },        "gh_058e41145a0c": {            "uid": 69637476,            "nick_name": "甜腻梦话"        },        "gh_084a485e859a": {            "uid": 69637472,            "nick_name": "梦星月"        },        "gh_0921c03402cd": {            "uid": 69637531,            "nick_name": "你的女友"        },        "gh_0c89e11f8bf3": {            "uid": 69637508,            "nick_name": "粟米"        },        "gh_171cec079b2a": {            "uid": 69637501,            "nick_name": "海上"        },        "gh_183d80deffb8": {            "uid": 69637491,            "nick_name": "论趣"        },        "gh_1ee2e1b39ccf": {            "uid": 69637473,            "nick_name": "纵有疾风起"        },        "gh_234ef02cdee5": {            "uid": 69637513,            "nick_name": "夹逼"        },        "gh_26a307578776": {            "uid": 69637490,            "nick_name": "最宝贝的宝贝"        },        "gh_29074b51f2b7": {            "uid": 69637530,            "nick_name": "沉舸"        },        "gh_2b8c6aa035ae": {            "uid": 69637470,            "nick_name": "懶得取名"        },        "gh_34318194fd0e": {            "uid": 69637517,            "nick_name": "徒四壁"        },        "gh_3845af6945d0": {            "uid": 69637545,            "nick_name": "秋水娉婷"        },        "gh_3ac6d7208961": {            "uid": 69637497,            "nick_name": "小熊的少女梦"        },        "gh_3c7d38636846": {            "uid": 69637519,            "nick_name": "油腻腻"        },        "gh_3df10391639c": {            "uid": 69637541,            "nick_name": "六郎娇面"        },        "gh_40a0ad154478": {            "uid": 69637516,            "nick_name": "禁止"        },        "gh_424c8eeabced": {            "uid": 69637522,            "nick_name": "认命"        },        "gh_4568b5a7e2fe": {            "uid": 69637482,            "nick_name": "香腮"        },        "gh_45beb952dc74": {            "uid": 69637488,            "nick_name": "毋庸"        },        "gh_484de412b0ef": {            "uid": 69637481,            "nick_name": "婪"        },        "gh_4c058673c07e": {            "uid": 69637474,            "nick_name": "影帝"        },        "gh_538f78f9d3aa": {            "uid": 69637478,            "nick_name": "伤痕"        },        "gh_56a6765df869": {            "uid": 69637514,            "nick_name": "风月"        },        "gh_56ca3dae948c": {            "uid": 69637538,            "nick_name": "留下太多回忆"        },        "gh_5e543853d8f0": {            "uid": 69637543,            "nick_name": "不知春秋"        },        "gh_5ff48e9fb9ef": {            "uid": 69637494,            "nick_name": "寻她找他"        },        "gh_671f460c856c": {            "uid": 69637523,            "nick_name": "绝不改悔"        },        "gh_6b7c2a257263": {            "uid": 69637528,            "nick_name": "奶牙"        },        "gh_6d205db62f04": {            "uid": 69637509,            "nick_name": "怕羞"        },        "gh_6d9f36e3a7be": {            "uid": 69637498,            "nick_name": "望长安"        },        "gh_73be0287bb94": {            "uid": 69637537,            "nick_name": "戏剧"        },        "gh_744cb16f6e16": {            "uid": 69637505,            "nick_name": "反駁"        },        "gh_7b4a5f86d68c": {            "uid": 69637477,            "nick_name": "我很想你"        },        "gh_7bca1c99aea0": {            "uid": 69637511,            "nick_name": "从小就很傲"        },        "gh_7e5818b2dd83": {            "uid": 69637532,            "nick_name": "二八佳人"        },        "gh_89ef4798d3ea": {            "uid": 69637533,            "nick_name": "彼岸花"        },        "gh_901b0d722749": {            "uid": 69637518,            "nick_name": "深情不为我"        },        "gh_9161517e5676": {            "uid": 69637495,            "nick_name": "折磨"        },        "gh_93e00e187787": {            "uid": 69637504,            "nick_name": "理会"        },        "gh_9877c8541764": {            "uid": 69637506,            "nick_name": "我沿着悲伤"        },        "gh_9cf3b7ff486b": {            "uid": 69637492,            "nick_name": "hoit"        },        "gh_9e559b3b94ca": {            "uid": 69637471,            "nick_name": "我与你相遇"        },        "gh_9f8dc5b0c74e": {            "uid": 69637496,            "nick_name": "港口"        },        "gh_a182cfc94dad": {            "uid": 69637539,            "nick_name": "四海八荒"        },        "gh_a2901d34f75b": {            "uid": 69637535,            "nick_name": "听腻了谎话"        },        "gh_a307072c04b9": {            "uid": 69637521,            "nick_name": "踏步"        },        "gh_a6351b447819": {            "uid": 69637540,            "nick_name": "七猫酒馆"        },        "gh_ac43e43b253b": {            "uid": 69637499,            "nick_name": "一厢情愿"        },        "gh_adca24a8f429": {            "uid": 69637483,            "nick_name": "对你何止一句喜欢"        },        "gh_b15de7c99912": {            "uid": 69637536,            "nick_name": "糖炒板栗"        },        "gh_b32125c73861": {            "uid": 69637493,            "nick_name": "发尾"        },        "gh_b3ffc1ca3a04": {            "uid": 69637546,            "nick_name": "主宰你心"        },        "gh_b8baac4296cb": {            "uid": 69637489,            "nick_name": "生性"        },        "gh_b9b99173ff8a": {            "uid": 69637524,            "nick_name": "养一只月亮"        },        "gh_bd57b6978e06": {            "uid": 69637527,            "nick_name": "厌遇"        },        "gh_be8c29139989": {            "uid": 69637502,            "nick_name": "不负"        },        "gh_bfe5b705324a": {            "uid": 69637529,            "nick_name": "乐极"        },        "gh_bff0bcb0694a": {            "uid": 69637534,            "nick_name": "简迷离"        },        "gh_c69776baf2cd": {            "uid": 69637512,            "nick_name": "骄纵"        },        "gh_c91b42649690": {            "uid": 69637503,            "nick_name": "荟萃"        },        "gh_d2cc901deca7": {            "uid": 69637487,            "nick_name": "恶意调笑"        },        "gh_d5f935d0d1f2": {            "uid": 69637500,            "nick_name": "青少年哪吒"        },        "gh_da76772d8d15": {            "uid": 69637526,            "nick_name": "独揽风月"        },        "gh_de9f9ebc976b": {            "uid": 69637475,            "nick_name": "剑出鞘恩怨了"        },        "gh_e0eb490115f5": {            "uid": 69637486,            "nick_name": "赋别"        },        "gh_e24da99dc899": {            "uid": 69637484,            "nick_name": "恋雨夏季"        },        "gh_e2576b7181c6": {            "uid": 69637515,            "nick_name": "满天星"        },        "gh_e75dbdc73d80": {            "uid": 69637542,            "nick_name": "情战"        },        "gh_e9d819f9e147": {            "uid": 69637525,            "nick_name": "与卿"        },        "gh_efaf7da157f5": {            "uid": 69637547,            "nick_name": "心野性子浪"        },        "gh_f4594783f5b8": {            "uid": 69637544,            "nick_name": "自缚"        },        "gh_fe6ef3a65a48": {            "uid": 69637480,            "nick_name": "风间"        }    }    aliyun_logger.logging(        code="2000",        message="请求参数",        data=params    )    search_keys = params['title']    user = gh_id_dict.get(params['ghId'])    trace_id = params['trace_id']    url = "http://8.217.190.241:8888/crawler/wei_xin/keyword"    payload = json.dumps({        "keyword": search_keys,        "cursor": "0",        "content_type": "video"    })    headers = {        'Content-Type': 'application/json'    }    response = requests.request("POST", url, headers=headers, data=payload)    aliyun_logger.logging(        code="2000",        message="微信抓取成功",        data=response.json()    )    try:        data_list = response.json()['data']['data']        for item in data_list[:10]:            video_obj = item['items'][0]            # await process_weixin_video_obj(video_obj, user)            try:                aliyun_logger.logging(                    code="1001",                    message="扫描到一条视频",                    account=user['uid'],                    data=video_obj                )                await process_weixin_video_obj(video_obj, user, trace_id)            except Exception as e:                aliyun_logger.logging(                    code="3000",                    message="有报错信息---{}".format(e),                    account=user['uid']                )    except Exception as e:        aliyun_logger.logging(            code="3000",            message="有报错信息---{}---微信搜索视频失败".format(e),            account=user['uid']        )async def process_weixin_video_obj(video_obj, user, trace_id):    """    异步处理微信 video_obj    公众号和站内账号一一对应    :param trace_id:    :param user:    :param video_obj:    :return:    """    platform = "weixin_search"    publish_time_stamp = int(video_obj['pubTime'])    title = video_obj['title'].replace('<em class=\"highlight\">', '').replace('</em>', '').replace("#", "")    item = VideoItem()    item.add_video_info("user_id", user["uid"])    item.add_video_info("user_name", user["nick_name"])    item.add_video_info("video_id", video_obj['hashDocID'])    item.add_video_info("video_title", title)    item.add_video_info("publish_time_stamp", int(publish_time_stamp))    item.add_video_info("video_url", video_obj["videoUrl"])    item.add_video_info("cover_url", video_obj["image"])    item.add_video_info("out_video_id", video_obj['hashDocID'])    item.add_video_info("out_user_id", trace_id)    item.add_video_info("platform", platform)    item.add_video_info("strategy", "search")    item.add_video_info("session", "{}-{}".format(platform, int(time.time())))    mq_obj = item.produce_item()    ETL_MQ.send_msg(video_dict=mq_obj)    aliyun_logger.logging(        code="1002",        message="成功发送到 ETL",        account=user["uid"],        data=mq_obj    )
 |