123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409 |
- """
- @author: luojunhui
- 微信 search
- """
- import os
- import sys
- import json
- import time
- import requests
- sys.path.append(os.getcwd())
- from application.items import VideoItem
- from application.common.messageQueue import MQ
- from application.common.log import AliyunLogger
- ETL_MQ = MQ(topic_name="topic_crawler_etl_prod")
- aliyun_logger = AliyunLogger(platform="weixin_search", mode="search")
- async def weixin_search(params):
- """
- 通过搜索爬虫 + search_keys 来获取视频信息,并且以 MQ 的方式发送给 ETL, 正常上传发布
- 只抓一页,不做去重
- :param params: []
- :return:
- """
- gh_id_dict = {
- "gh_01f8afd03366": {
- "uid": 69637520,
- "nick_name": "非亲非故"
- },
- "gh_058e41145a0c": {
- "uid": 69637476,
- "nick_name": "甜腻梦话"
- },
- "gh_084a485e859a": {
- "uid": 69637472,
- "nick_name": "梦星月"
- },
- "gh_0921c03402cd": {
- "uid": 69637531,
- "nick_name": "你的女友"
- },
- "gh_0c89e11f8bf3": {
- "uid": 69637508,
- "nick_name": "粟米"
- },
- "gh_171cec079b2a": {
- "uid": 69637501,
- "nick_name": "海上"
- },
- "gh_183d80deffb8": {
- "uid": 69637491,
- "nick_name": "论趣"
- },
- "gh_1ee2e1b39ccf": {
- "uid": 69637473,
- "nick_name": "纵有疾风起"
- },
- "gh_234ef02cdee5": {
- "uid": 69637513,
- "nick_name": "夹逼"
- },
- "gh_26a307578776": {
- "uid": 69637490,
- "nick_name": "最宝贝的宝贝"
- },
- "gh_29074b51f2b7": {
- "uid": 69637530,
- "nick_name": "沉舸"
- },
- "gh_2b8c6aa035ae": {
- "uid": 69637470,
- "nick_name": "懶得取名"
- },
- "gh_34318194fd0e": {
- "uid": 69637517,
- "nick_name": "徒四壁"
- },
- "gh_3845af6945d0": {
- "uid": 69637545,
- "nick_name": "秋水娉婷"
- },
- "gh_3ac6d7208961": {
- "uid": 69637497,
- "nick_name": "小熊的少女梦"
- },
- "gh_3c7d38636846": {
- "uid": 69637519,
- "nick_name": "油腻腻"
- },
- "gh_3df10391639c": {
- "uid": 69637541,
- "nick_name": "六郎娇面"
- },
- "gh_40a0ad154478": {
- "uid": 69637516,
- "nick_name": "禁止"
- },
- "gh_424c8eeabced": {
- "uid": 69637522,
- "nick_name": "认命"
- },
- "gh_4568b5a7e2fe": {
- "uid": 69637482,
- "nick_name": "香腮"
- },
- "gh_45beb952dc74": {
- "uid": 69637488,
- "nick_name": "毋庸"
- },
- "gh_484de412b0ef": {
- "uid": 69637481,
- "nick_name": "婪"
- },
- "gh_4c058673c07e": {
- "uid": 69637474,
- "nick_name": "影帝"
- },
- "gh_538f78f9d3aa": {
- "uid": 69637478,
- "nick_name": "伤痕"
- },
- "gh_56a6765df869": {
- "uid": 69637514,
- "nick_name": "风月"
- },
- "gh_56ca3dae948c": {
- "uid": 69637538,
- "nick_name": "留下太多回忆"
- },
- "gh_5e543853d8f0": {
- "uid": 69637543,
- "nick_name": "不知春秋"
- },
- "gh_5ff48e9fb9ef": {
- "uid": 69637494,
- "nick_name": "寻她找他"
- },
- "gh_671f460c856c": {
- "uid": 69637523,
- "nick_name": "绝不改悔"
- },
- "gh_6b7c2a257263": {
- "uid": 69637528,
- "nick_name": "奶牙"
- },
- "gh_6d205db62f04": {
- "uid": 69637509,
- "nick_name": "怕羞"
- },
- "gh_6d9f36e3a7be": {
- "uid": 69637498,
- "nick_name": "望长安"
- },
- "gh_73be0287bb94": {
- "uid": 69637537,
- "nick_name": "戏剧"
- },
- "gh_744cb16f6e16": {
- "uid": 69637505,
- "nick_name": "反駁"
- },
- "gh_7b4a5f86d68c": {
- "uid": 69637477,
- "nick_name": "我很想你"
- },
- "gh_7bca1c99aea0": {
- "uid": 69637511,
- "nick_name": "从小就很傲"
- },
- "gh_7e5818b2dd83": {
- "uid": 69637532,
- "nick_name": "二八佳人"
- },
- "gh_89ef4798d3ea": {
- "uid": 69637533,
- "nick_name": "彼岸花"
- },
- "gh_901b0d722749": {
- "uid": 69637518,
- "nick_name": "深情不为我"
- },
- "gh_9161517e5676": {
- "uid": 69637495,
- "nick_name": "折磨"
- },
- "gh_93e00e187787": {
- "uid": 69637504,
- "nick_name": "理会"
- },
- "gh_9877c8541764": {
- "uid": 69637506,
- "nick_name": "我沿着悲伤"
- },
- "gh_9cf3b7ff486b": {
- "uid": 69637492,
- "nick_name": "hoit"
- },
- "gh_9e559b3b94ca": {
- "uid": 69637471,
- "nick_name": "我与你相遇"
- },
- "gh_9f8dc5b0c74e": {
- "uid": 69637496,
- "nick_name": "港口"
- },
- "gh_a182cfc94dad": {
- "uid": 69637539,
- "nick_name": "四海八荒"
- },
- "gh_a2901d34f75b": {
- "uid": 69637535,
- "nick_name": "听腻了谎话"
- },
- "gh_a307072c04b9": {
- "uid": 69637521,
- "nick_name": "踏步"
- },
- "gh_a6351b447819": {
- "uid": 69637540,
- "nick_name": "七猫酒馆"
- },
- "gh_ac43e43b253b": {
- "uid": 69637499,
- "nick_name": "一厢情愿"
- },
- "gh_adca24a8f429": {
- "uid": 69637483,
- "nick_name": "对你何止一句喜欢"
- },
- "gh_b15de7c99912": {
- "uid": 69637536,
- "nick_name": "糖炒板栗"
- },
- "gh_b32125c73861": {
- "uid": 69637493,
- "nick_name": "发尾"
- },
- "gh_b3ffc1ca3a04": {
- "uid": 69637546,
- "nick_name": "主宰你心"
- },
- "gh_b8baac4296cb": {
- "uid": 69637489,
- "nick_name": "生性"
- },
- "gh_b9b99173ff8a": {
- "uid": 69637524,
- "nick_name": "养一只月亮"
- },
- "gh_bd57b6978e06": {
- "uid": 69637527,
- "nick_name": "厌遇"
- },
- "gh_be8c29139989": {
- "uid": 69637502,
- "nick_name": "不负"
- },
- "gh_bfe5b705324a": {
- "uid": 69637529,
- "nick_name": "乐极"
- },
- "gh_bff0bcb0694a": {
- "uid": 69637534,
- "nick_name": "简迷离"
- },
- "gh_c69776baf2cd": {
- "uid": 69637512,
- "nick_name": "骄纵"
- },
- "gh_c91b42649690": {
- "uid": 69637503,
- "nick_name": "荟萃"
- },
- "gh_d2cc901deca7": {
- "uid": 69637487,
- "nick_name": "恶意调笑"
- },
- "gh_d5f935d0d1f2": {
- "uid": 69637500,
- "nick_name": "青少年哪吒"
- },
- "gh_da76772d8d15": {
- "uid": 69637526,
- "nick_name": "独揽风月"
- },
- "gh_de9f9ebc976b": {
- "uid": 69637475,
- "nick_name": "剑出鞘恩怨了"
- },
- "gh_e0eb490115f5": {
- "uid": 69637486,
- "nick_name": "赋别"
- },
- "gh_e24da99dc899": {
- "uid": 69637484,
- "nick_name": "恋雨夏季"
- },
- "gh_e2576b7181c6": {
- "uid": 69637515,
- "nick_name": "满天星"
- },
- "gh_e75dbdc73d80": {
- "uid": 69637542,
- "nick_name": "情战"
- },
- "gh_e9d819f9e147": {
- "uid": 69637525,
- "nick_name": "与卿"
- },
- "gh_efaf7da157f5": {
- "uid": 69637547,
- "nick_name": "心野性子浪"
- },
- "gh_f4594783f5b8": {
- "uid": 69637544,
- "nick_name": "自缚"
- },
- "gh_fe6ef3a65a48": {
- "uid": 69637480,
- "nick_name": "风间"
- }
- }
- aliyun_logger.logging(
- code="2000",
- message="请求参数",
- data=params
- )
- search_keys = params['title']
- user = gh_id_dict.get(params['ghId'])
- trace_id = params['trace_id']
- url = "http://8.217.190.241:8888/crawler/wei_xin/keyword"
- payload = json.dumps({
- "keyword": search_keys,
- "cursor": "0",
- "content_type": "video"
- })
- headers = {
- 'Content-Type': 'application/json'
- }
- response = requests.request("POST", url, headers=headers, data=payload)
- aliyun_logger.logging(
- code="2000",
- message="微信抓取成功",
- data=response.json()
- )
- try:
- data_list = response.json()['data']['data']
- for item in data_list[:10]:
- video_obj = item['items'][0]
- # await process_weixin_video_obj(video_obj, user)
- try:
- aliyun_logger.logging(
- code="1001",
- message="扫描到一条视频",
- account=user['uid'],
- data=video_obj
- )
- await process_weixin_video_obj(video_obj, user, trace_id)
- except Exception as e:
- aliyun_logger.logging(
- code="3000",
- message="有报错信息---{}".format(e),
- account=user['uid']
- )
- except Exception as e:
- aliyun_logger.logging(
- code="3000",
- message="有报错信息---{}---微信搜索视频失败".format(e),
- account=user['uid']
- )
- async def process_weixin_video_obj(video_obj, user, trace_id):
- """
- 异步处理微信 video_obj
- 公众号和站内账号一一对应
- :param trace_id:
- :param user:
- :param video_obj:
- :return:
- """
- platform = "weixin_search"
- publish_time_stamp = int(video_obj['pubTime'])
- title = video_obj['title'].replace('<em class=\"highlight\">', '').replace('</em>', '').replace("#", "")
- item = VideoItem()
- item.add_video_info("user_id", user["uid"])
- item.add_video_info("user_name", user["nick_name"])
- item.add_video_info("video_id", video_obj['hashDocID'])
- item.add_video_info("video_title", title)
- item.add_video_info("publish_time_stamp", int(publish_time_stamp))
- item.add_video_info("video_url", video_obj["videoUrl"])
- item.add_video_info("cover_url", video_obj["image"])
- item.add_video_info("out_video_id", video_obj['hashDocID'])
- item.add_video_info("out_user_id", trace_id)
- item.add_video_info("platform", platform)
- item.add_video_info("strategy", "search")
- item.add_video_info("session", "{}-{}".format(platform, int(time.time())))
- mq_obj = item.produce_item()
- ETL_MQ.send_msg(video_dict=mq_obj)
- aliyun_logger.logging(
- code="1002",
- message="成功发送到 ETL",
- account=user["uid"],
- data=mq_obj
- )
|