|
@@ -0,0 +1,390 @@
|
|
|
+"""
|
|
|
+@author: luojunhui
|
|
|
+微信 search
|
|
|
+"""
|
|
|
+import os
|
|
|
+import sys
|
|
|
+import json
|
|
|
+import time
|
|
|
+import requests
|
|
|
+
|
|
|
+sys.path.append(os.getcwd())
|
|
|
+
|
|
|
+from application.items import VideoItem
|
|
|
+from application.common.messageQueue import MQ
|
|
|
+from application.common.log import AliyunLogger
|
|
|
+
|
|
|
+ETL_MQ = MQ(topic_name="topic_crawler_etl_prod")
|
|
|
+aliyun_logger = AliyunLogger(platform="weixin_search", mode="search")
|
|
|
+
|
|
|
+
|
|
|
+async def weixin_search(params):
|
|
|
+ """
|
|
|
+ 通过搜索爬虫 + search_keys 来获取视频信息,并且以 MQ 的方式发送给 ETL, 正常上传发布
|
|
|
+ 只抓一页,不做去重
|
|
|
+ :param params: []
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ gh_id_dict = {
|
|
|
+ "gh_01f8afd03366": {
|
|
|
+ "uid": 69629493,
|
|
|
+ "nick_name": "非亲非故"
|
|
|
+ },
|
|
|
+ "gh_058e41145a0c": {
|
|
|
+ "uid": 69629452,
|
|
|
+ "nick_name": "甜腻梦话"
|
|
|
+ },
|
|
|
+ "gh_084a485e859a": {
|
|
|
+ "uid": 69629447,
|
|
|
+ "nick_name": "梦星月"
|
|
|
+ },
|
|
|
+ "gh_0921c03402cd": {
|
|
|
+ "uid": 69629504,
|
|
|
+ "nick_name": "你的女友"
|
|
|
+ },
|
|
|
+ "gh_0c89e11f8bf3": {
|
|
|
+ "uid": 69629482,
|
|
|
+ "nick_name": "粟米"
|
|
|
+ },
|
|
|
+ "gh_171cec079b2a": {
|
|
|
+ "uid": 69629475,
|
|
|
+ "nick_name": "海上"
|
|
|
+ },
|
|
|
+ "gh_183d80deffb8": {
|
|
|
+ "uid": 69629465,
|
|
|
+ "nick_name": "论趣"
|
|
|
+ },
|
|
|
+ "gh_1ee2e1b39ccf": {
|
|
|
+ "uid": 69629448,
|
|
|
+ "nick_name": "纵有疾风起"
|
|
|
+ },
|
|
|
+ "gh_234ef02cdee5": {
|
|
|
+ "uid": 69629486,
|
|
|
+ "nick_name": "夹逼"
|
|
|
+ },
|
|
|
+ "gh_26a307578776": {
|
|
|
+ "uid": 69629464,
|
|
|
+ "nick_name": "最宝贝的宝贝"
|
|
|
+ },
|
|
|
+ "gh_29074b51f2b7": {
|
|
|
+ "uid": 69629503,
|
|
|
+ "nick_name": "沉舸"
|
|
|
+ },
|
|
|
+ "gh_2b8c6aa035ae": {
|
|
|
+ "uid": 69629443,
|
|
|
+ "nick_name": "懶得取名"
|
|
|
+ },
|
|
|
+ "gh_34318194fd0e": {
|
|
|
+ "uid": 69629490,
|
|
|
+ "nick_name": "徒四壁"
|
|
|
+ },
|
|
|
+ "gh_3845af6945d0": {
|
|
|
+ "uid": 69629518,
|
|
|
+ "nick_name": "秋水娉婷"
|
|
|
+ },
|
|
|
+ "gh_3ac6d7208961": {
|
|
|
+ "uid": 69629471,
|
|
|
+ "nick_name": "小熊的少女梦"
|
|
|
+ },
|
|
|
+ "gh_3c7d38636846": {
|
|
|
+ "uid": 69629492,
|
|
|
+ "nick_name": "油腻腻"
|
|
|
+ },
|
|
|
+ "gh_3df10391639c": {
|
|
|
+ "uid": 69629514,
|
|
|
+ "nick_name": "六郎娇面"
|
|
|
+ },
|
|
|
+ "gh_40a0ad154478": {
|
|
|
+ "uid": 69629489,
|
|
|
+ "nick_name": "禁止"
|
|
|
+ },
|
|
|
+ "gh_424c8eeabced": {
|
|
|
+ "uid": 69629495,
|
|
|
+ "nick_name": "认命"
|
|
|
+ },
|
|
|
+ "gh_4568b5a7e2fe": {
|
|
|
+ "uid": 69629457,
|
|
|
+ "nick_name": "香腮"
|
|
|
+ },
|
|
|
+ "gh_45beb952dc74": {
|
|
|
+ "uid": 69629462,
|
|
|
+ "nick_name": "毋庸"
|
|
|
+ },
|
|
|
+ "gh_484de412b0ef": {
|
|
|
+ "uid": 69629456,
|
|
|
+ "nick_name": "婪"
|
|
|
+ },
|
|
|
+ "gh_4c058673c07e": {
|
|
|
+ "uid": 69629449,
|
|
|
+ "nick_name": "影帝"
|
|
|
+ },
|
|
|
+ "gh_538f78f9d3aa": {
|
|
|
+ "uid": 69629454,
|
|
|
+ "nick_name": "伤痕"
|
|
|
+ },
|
|
|
+ "gh_56a6765df869": {
|
|
|
+ "uid": 69629487,
|
|
|
+ "nick_name": "风月"
|
|
|
+ },
|
|
|
+ "gh_56ca3dae948c": {
|
|
|
+ "uid": 69629511,
|
|
|
+ "nick_name": "留下太多回忆"
|
|
|
+ },
|
|
|
+ "gh_5e543853d8f0": {
|
|
|
+ "uid": 69629516,
|
|
|
+ "nick_name": "不知春秋"
|
|
|
+ },
|
|
|
+ "gh_5ff48e9fb9ef": {
|
|
|
+ "uid": 69629468,
|
|
|
+ "nick_name": "寻她找他"
|
|
|
+ },
|
|
|
+ "gh_671f460c856c": {
|
|
|
+ "uid": 69629496,
|
|
|
+ "nick_name": "绝不改悔"
|
|
|
+ },
|
|
|
+ "gh_6b7c2a257263": {
|
|
|
+ "uid": 69629501,
|
|
|
+ "nick_name": "奶牙"
|
|
|
+ },
|
|
|
+ "gh_6d205db62f04": {
|
|
|
+ "uid": 69629483,
|
|
|
+ "nick_name": "怕羞"
|
|
|
+ },
|
|
|
+ "gh_6d9f36e3a7be": {
|
|
|
+ "uid": 69629472,
|
|
|
+ "nick_name": "望长安"
|
|
|
+ },
|
|
|
+ "gh_73be0287bb94": {
|
|
|
+ "uid": 69629510,
|
|
|
+ "nick_name": "戏剧"
|
|
|
+ },
|
|
|
+ "gh_744cb16f6e16": {
|
|
|
+ "uid": 69629479,
|
|
|
+ "nick_name": "反駁"
|
|
|
+ },
|
|
|
+ "gh_7b4a5f86d68c": {
|
|
|
+ "uid": 69629453,
|
|
|
+ "nick_name": "我很想你"
|
|
|
+ },
|
|
|
+ "gh_7bca1c99aea0": {
|
|
|
+ "uid": 69629484,
|
|
|
+ "nick_name": "从小就很傲"
|
|
|
+ },
|
|
|
+ "gh_7e5818b2dd83": {
|
|
|
+ "uid": 69629505,
|
|
|
+ "nick_name": "二八佳人"
|
|
|
+ },
|
|
|
+ "gh_89ef4798d3ea": {
|
|
|
+ "uid": 69629506,
|
|
|
+ "nick_name": "彼岸花"
|
|
|
+ },
|
|
|
+ "gh_901b0d722749": {
|
|
|
+ "uid": 69629491,
|
|
|
+ "nick_name": "深情不为我"
|
|
|
+ },
|
|
|
+ "gh_9161517e5676": {
|
|
|
+ "uid": 69629469,
|
|
|
+ "nick_name": "折磨"
|
|
|
+ },
|
|
|
+ "gh_93e00e187787": {
|
|
|
+ "uid": 69629478,
|
|
|
+ "nick_name": "理会"
|
|
|
+ },
|
|
|
+ "gh_9877c8541764": {
|
|
|
+ "uid": 69629481,
|
|
|
+ "nick_name": "我沿着悲伤"
|
|
|
+ },
|
|
|
+ "gh_9cf3b7ff486b": {
|
|
|
+ "uid": 69629466,
|
|
|
+ "nick_name": "hoit"
|
|
|
+ },
|
|
|
+ "gh_9e559b3b94ca": {
|
|
|
+ "uid": 69629444,
|
|
|
+ "nick_name": "我与你相遇"
|
|
|
+ },
|
|
|
+ "gh_9f8dc5b0c74e": {
|
|
|
+ "uid": 69629470,
|
|
|
+ "nick_name": "港口"
|
|
|
+ },
|
|
|
+ "gh_a182cfc94dad": {
|
|
|
+ "uid": 69629512,
|
|
|
+ "nick_name": "四海八荒"
|
|
|
+ },
|
|
|
+ "gh_a2901d34f75b": {
|
|
|
+ "uid": 69629508,
|
|
|
+ "nick_name": "听腻了谎话"
|
|
|
+ },
|
|
|
+ "gh_a307072c04b9": {
|
|
|
+ "uid": 69629494,
|
|
|
+ "nick_name": "踏步"
|
|
|
+ },
|
|
|
+ "gh_a6351b447819": {
|
|
|
+ "uid": 69629513,
|
|
|
+ "nick_name": "七猫酒馆"
|
|
|
+ },
|
|
|
+ "gh_ac43e43b253b": {
|
|
|
+ "uid": 69629473,
|
|
|
+ "nick_name": "一厢情愿"
|
|
|
+ },
|
|
|
+ "gh_adca24a8f429": {
|
|
|
+ "uid": 69629458,
|
|
|
+ "nick_name": "对你何止一句喜欢"
|
|
|
+ },
|
|
|
+ "gh_b15de7c99912": {
|
|
|
+ "uid": 69629509,
|
|
|
+ "nick_name": "糖炒板栗"
|
|
|
+ },
|
|
|
+ "gh_b32125c73861": {
|
|
|
+ "uid": 69629467,
|
|
|
+ "nick_name": "发尾"
|
|
|
+ },
|
|
|
+ "gh_b3ffc1ca3a04": {
|
|
|
+ "uid": 69629519,
|
|
|
+ "nick_name": "主宰你心"
|
|
|
+ },
|
|
|
+ "gh_b8baac4296cb": {
|
|
|
+ "uid": 69629463,
|
|
|
+ "nick_name": "生性"
|
|
|
+ },
|
|
|
+ "gh_b9b99173ff8a": {
|
|
|
+ "uid": 69629497,
|
|
|
+ "nick_name": "养一只月亮"
|
|
|
+ },
|
|
|
+ "gh_bd57b6978e06": {
|
|
|
+ "uid": 69629500,
|
|
|
+ "nick_name": "厌遇"
|
|
|
+ },
|
|
|
+ "gh_be8c29139989": {
|
|
|
+ "uid": 69629476,
|
|
|
+ "nick_name": "不负"
|
|
|
+ },
|
|
|
+ "gh_bfe5b705324a": {
|
|
|
+ "uid": 69629502,
|
|
|
+ "nick_name": "乐极"
|
|
|
+ },
|
|
|
+ "gh_bff0bcb0694a": {
|
|
|
+ "uid": 69629507,
|
|
|
+ "nick_name": "简迷离"
|
|
|
+ },
|
|
|
+ "gh_c69776baf2cd": {
|
|
|
+ "uid": 69629485,
|
|
|
+ "nick_name": "骄纵"
|
|
|
+ },
|
|
|
+ "gh_c91b42649690": {
|
|
|
+ "uid": 69629477,
|
|
|
+ "nick_name": "荟萃"
|
|
|
+ },
|
|
|
+ "gh_d2cc901deca7": {
|
|
|
+ "uid": 69629461,
|
|
|
+ "nick_name": "恶意调笑"
|
|
|
+ },
|
|
|
+ "gh_d5f935d0d1f2": {
|
|
|
+ "uid": 69629474,
|
|
|
+ "nick_name": "青少年哪吒"
|
|
|
+ },
|
|
|
+ "gh_da76772d8d15": {
|
|
|
+ "uid": 69629499,
|
|
|
+ "nick_name": "独揽风月"
|
|
|
+ },
|
|
|
+ "gh_de9f9ebc976b": {
|
|
|
+ "uid": 69629450,
|
|
|
+ "nick_name": "剑出鞘恩怨了"
|
|
|
+ },
|
|
|
+ "gh_e0eb490115f5": {
|
|
|
+ "uid": 69629460,
|
|
|
+ "nick_name": "赋别"
|
|
|
+ },
|
|
|
+ "gh_e24da99dc899": {
|
|
|
+ "uid": 69629459,
|
|
|
+ "nick_name": "恋雨夏季"
|
|
|
+ },
|
|
|
+ "gh_e2576b7181c6": {
|
|
|
+ "uid": 69629488,
|
|
|
+ "nick_name": "满天星"
|
|
|
+ },
|
|
|
+ "gh_e75dbdc73d80": {
|
|
|
+ "uid": 69629515,
|
|
|
+ "nick_name": "情战"
|
|
|
+ },
|
|
|
+ "gh_e9d819f9e147": {
|
|
|
+ "uid": 69629498,
|
|
|
+ "nick_name": "与卿"
|
|
|
+ },
|
|
|
+ "gh_efaf7da157f5": {
|
|
|
+ "uid": 69629520,
|
|
|
+ "nick_name": "心野性子浪"
|
|
|
+ },
|
|
|
+ "gh_f4594783f5b8": {
|
|
|
+ "uid": 69629517,
|
|
|
+ "nick_name": "自缚"
|
|
|
+ },
|
|
|
+ "gh_fe6ef3a65a48": {
|
|
|
+ "uid": 69629455,
|
|
|
+ "nick_name": "风间"
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ search_keys = params['search_keys']
|
|
|
+ user = gh_id_dict.get(params['ghId'])
|
|
|
+ url = "http://8.217.190.241:8888/crawler/wei_xin/keyword"
|
|
|
+ payload = json.dumps({
|
|
|
+ "keyword": ",".join(search_keys),
|
|
|
+ "cursor": "0",
|
|
|
+ "content_type": "video"
|
|
|
+ })
|
|
|
+ headers = {
|
|
|
+ 'Content-Type': 'application/json'
|
|
|
+ }
|
|
|
+ response = requests.request("POST", url, headers=headers, data=payload)
|
|
|
+ data_list = response.json()['data']['data']
|
|
|
+ for item in data_list:
|
|
|
+ video_obj = item['items'][0]
|
|
|
+ # await process_weixin_video_obj(video_obj, user)
|
|
|
+ try:
|
|
|
+ aliyun_logger.logging(
|
|
|
+ code="1001",
|
|
|
+ message="扫描到一条视频",
|
|
|
+ account=user['uid'],
|
|
|
+ data=video_obj
|
|
|
+ )
|
|
|
+ await process_weixin_video_obj(video_obj, user)
|
|
|
+ except Exception as e:
|
|
|
+ aliyun_logger.logging(
|
|
|
+ code="3000",
|
|
|
+ message="有报错信息---{}".format(e),
|
|
|
+ account=user['uid']
|
|
|
+ )
|
|
|
+
|
|
|
+
|
|
|
+async def process_weixin_video_obj(video_obj, user):
|
|
|
+ """
|
|
|
+ 异步处理微信 video_obj
|
|
|
+ 公众号和站内账号一一对应
|
|
|
+ :param user:
|
|
|
+ :param video_obj:
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+
|
|
|
+ platform = "weixin_search"
|
|
|
+ publish_time_stamp = int(video_obj['pubTime'])
|
|
|
+ title = video_obj['title'].replace('<em class=\"highlight\">', '').replace('</em>', '').replace("#", "")
|
|
|
+ item = VideoItem()
|
|
|
+ item.add_video_info("user_id", user["uid"])
|
|
|
+ item.add_video_info("user_name", user["nick_name"])
|
|
|
+ item.add_video_info("video_id", video_obj['hashDocID'])
|
|
|
+ item.add_video_info("video_title", title)
|
|
|
+ item.add_video_info("publish_time_stamp", int(publish_time_stamp))
|
|
|
+ item.add_video_info("video_url", video_obj["videoUrl"])
|
|
|
+ item.add_video_info("cover_url", video_obj["image"])
|
|
|
+ item.add_video_info("out_video_id", video_obj['hashDocID'])
|
|
|
+ item.add_video_info("platform", platform)
|
|
|
+ item.add_video_info("strategy", "search")
|
|
|
+ item.add_video_info("session", "{}-{}".format(platform, int(time.time())))
|
|
|
+ mq_obj = item.produce_item()
|
|
|
+ ETL_MQ.send_msg(video_dict=mq_obj)
|
|
|
+ aliyun_logger.logging(
|
|
|
+ code="1002",
|
|
|
+ message="成功发送到 ETL",
|
|
|
+ account=user["uid"],
|
|
|
+ data=mq_obj
|
|
|
+ )
|