|  | @@ -122,7 +122,8 @@ def getMediaInfo():
 | 
	
		
			
				|  |  |      for task_info in result:
 | 
	
		
			
				|  |  |          media_id = task_info['media_id']
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -        media_info = requests.get(url=conf['select_media_url'], params={'uid': media_id},verify=False).json()['content']
 | 
	
		
			
				|  |  | +        media_info = requests.get(url=conf['select_media_url'], params={'uid': media_id}, verify=False).json()[
 | 
	
		
			
				|  |  | +            'content']
 | 
	
		
			
				|  |  |          media_name = media_info['longvideoNickName'] if media_info['longvideoNickName'] else media_info['nickName']
 | 
	
		
			
				|  |  |          nick_name = task_info['nick_name']
 | 
	
		
			
				|  |  |          spider_link = task_info['spider_link']
 | 
	
	
		
			
				|  | @@ -222,7 +223,7 @@ def get_repeat_list():
 | 
	
		
			
				|  |  |          if result:
 | 
	
		
			
				|  |  |              repeat_list.append(spider_link)
 | 
	
		
			
				|  |  |      if repeat_list:
 | 
	
		
			
				|  |  | -        return jsonify({'code': 400, 'message': '名单重复', 'repeat_list': repeat_list})
 | 
	
		
			
				|  |  | +        return jsonify({'code': 200, 'message': '抓取名单校验通过', 'repeat_list': repeat_list})
 | 
	
		
			
				|  |  |      else:
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |          return jsonify({'code': 200, 'message': '抓取名单校验通过', 'repeat_list': repeat_list})
 | 
	
	
		
			
				|  | @@ -487,85 +488,115 @@ def create_uid(task, task_id, spider_link):
 | 
	
		
			
				|  |  |          content_tag_list.append(tag['tagName'])
 | 
	
		
			
				|  |  |      user_tags = ','.join(str(i) for i in tag_name_list)
 | 
	
		
			
				|  |  |      user_content_tags = ','.join(str(i) for i in content_tag_list)
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | +    repeat_list = []
 | 
	
		
			
				|  |  | +    create_user_list = []
 | 
	
		
			
				|  |  |      for author_url in spider_link:
 | 
	
		
			
				|  |  | -        now_time = int(time.time())
 | 
	
		
			
				|  |  | -        time_array = time.localtime(now_time)
 | 
	
		
			
				|  |  | -        str_time = time.strftime("%Y%m%d", time_array)
 | 
	
		
			
				|  |  | -        # 生成创建用户的tag
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -        tags = ''
 | 
	
		
			
				|  |  | -        if task['task_type'] == 'author':
 | 
	
		
			
				|  |  | -            spider_task = '账号'
 | 
	
		
			
				|  |  | -            tags_list = ['spider', spider_task, spider_platform, user_tags, task['content_category_str'], str_time]
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -        elif task['task_type'] == 'search':
 | 
	
		
			
				|  |  | -            spider_task = '搜索'
 | 
	
		
			
				|  |  | -            tags_list = ['spider', spider_task, spider_platform, user_tags, author_url, task['content_category_str'], str_time]
 | 
	
		
			
				|  |  | -        elif task['task_type'] == 'board':
 | 
	
		
			
				|  |  | -            spider_task = '榜单'
 | 
	
		
			
				|  |  | -            mode_tags = task['mode_board_str']
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -            tags_list = ['spider', spider_task, spider_platform, user_tags, mode_tags, task['content_category_str'], str_time]
 | 
	
		
			
				|  |  | -        elif task['task_type'] == 'recommend':
 | 
	
		
			
				|  |  | -            spider_task = '推荐'
 | 
	
		
			
				|  |  | -            mode_tags = task['mode_name_str'] + task['mode_board_str']
 | 
	
		
			
				|  |  | -            tags_list = ['spider', spider_task, spider_platform, user_tags, mode_tags, task['content_category_str'], str_time]
 | 
	
		
			
				|  |  | +        repeat_sql = f'select * from crawler_author_map where spider_link="{author_url}"'
 | 
	
		
			
				|  |  | +        result = mysql_con.get_values(repeat_sql)
 | 
	
		
			
				|  |  | +        if result:
 | 
	
		
			
				|  |  | +            old_task_id = result[0]['task_id']
 | 
	
		
			
				|  |  | +            is_del = result[0]['is_del']
 | 
	
		
			
				|  |  | +            if task_id == old_task_id:
 | 
	
		
			
				|  |  | +                if is_del:
 | 
	
		
			
				|  |  | +                    continue
 | 
	
		
			
				|  |  | +                else:
 | 
	
		
			
				|  |  | +                    up_sql = f'update crawler_author_map set is_del=1 where spider_link="{author_url}"'
 | 
	
		
			
				|  |  | +                    mysql_con.update_values(up_sql)
 | 
	
		
			
				|  |  | +            else:
 | 
	
		
			
				|  |  | +                if is_del:
 | 
	
		
			
				|  |  | +                    repeat_list.append(author_url)
 | 
	
		
			
				|  |  | +                else:
 | 
	
		
			
				|  |  | +                    up_sql = f'update crawler_author_map set task_id={task_id},is_del=1 where spider_link="{author_url}"'
 | 
	
		
			
				|  |  | +                    mysql_con.update_values(up_sql)
 | 
	
		
			
				|  |  |          else:
 | 
	
		
			
				|  |  | -            tags_list = ['spider', spider_platform, user_tags, task['content_category_str'], str_time]
 | 
	
		
			
				|  |  | -        for v in tags_list:
 | 
	
		
			
				|  |  | -            if v:
 | 
	
		
			
				|  |  | -                tags += str(v) + ','
 | 
	
		
			
				|  |  | -        post_data = {
 | 
	
		
			
				|  |  | -            # 'count': 1,     # (必须)账号个数:传1
 | 
	
		
			
				|  |  | -            # 'accountType': 4,   # (必须)账号类型 :传 4 app虚拟账号
 | 
	
		
			
				|  |  | -            'pwd': '',  # 密码 默认 12346
 | 
	
		
			
				|  |  | -            'nickName': '',  # 昵称  默认 vuser......
 | 
	
		
			
				|  |  | -            'avatarUrl': '',
 | 
	
		
			
				|  |  | -            # 头像Url  默认 http://weapppiccdn.yishihui.com/resources/images/pic_normal.png
 | 
	
		
			
				|  |  | -            'tagName': tags[:-1],  # 多条数据用英文逗号分割
 | 
	
		
			
				|  |  | -        }
 | 
	
		
			
				|  |  | -        try:
 | 
	
		
			
				|  |  | -            response = requests.post(url=conf['media_url'], params=post_data)
 | 
	
		
			
				|  |  | -            media_id = response.json()['data']
 | 
	
		
			
				|  |  | -            media_info = requests.get(url=conf['select_media_url'], params={'uid': media_id},verify=False).json()['content']
 | 
	
		
			
				|  |  | -        except Exception as e:
 | 
	
		
			
				|  |  | -            logging.warning(f'创建账户:{spider_link},失败,原因:{e}')
 | 
	
		
			
				|  |  | -            fail_list.append(author_url)
 | 
	
		
			
				|  |  | -            continue
 | 
	
		
			
				|  |  | +            create_user_list.append(author_url)
 | 
	
		
			
				|  |  | +    if repeat_list:
 | 
	
		
			
				|  |  | +        message = f'该任务和其他任务抓取名单重复:{repeat_list}'
 | 
	
		
			
				|  |  | +        return jsonify({'code': 400, 'message': message})
 | 
	
		
			
				|  |  | +    else:
 | 
	
		
			
				|  |  | +        for author_url in create_user_list:
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +            now_time = int(time.time())
 | 
	
		
			
				|  |  | +            time_array = time.localtime(now_time)
 | 
	
		
			
				|  |  | +            str_time = time.strftime("%Y%m%d", time_array)
 | 
	
		
			
				|  |  | +            # 生成创建用户的tag
 | 
	
		
			
				|  |  | +            tags = ''
 | 
	
		
			
				|  |  | +            if task['task_type'] == 'author':
 | 
	
		
			
				|  |  | +                spider_task = '账号'
 | 
	
		
			
				|  |  | +                tags_list = ['spider', spider_task, spider_platform, user_tags, task['content_category_str'], str_time]
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +            elif task['task_type'] == 'search':
 | 
	
		
			
				|  |  | +                spider_task = '搜索'
 | 
	
		
			
				|  |  | +                tags_list = ['spider', spider_task, spider_platform, user_tags, author_url,
 | 
	
		
			
				|  |  | +                             task['content_category_str'], str_time]
 | 
	
		
			
				|  |  | +            elif task['task_type'] == 'board':
 | 
	
		
			
				|  |  | +                spider_task = '榜单'
 | 
	
		
			
				|  |  | +                mode_tags = task['mode_board_str']
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +                tags_list = ['spider', spider_task, spider_platform, user_tags, mode_tags, task['content_category_str'],
 | 
	
		
			
				|  |  | +                             str_time]
 | 
	
		
			
				|  |  | +            elif task['task_type'] == 'recommend':
 | 
	
		
			
				|  |  | +                spider_task = '推荐'
 | 
	
		
			
				|  |  | +                mode_tags = task['mode_name_str'] + task['mode_board_str']
 | 
	
		
			
				|  |  | +                tags_list = ['spider', spider_task, spider_platform, user_tags, mode_tags, task['content_category_str'],
 | 
	
		
			
				|  |  | +                             str_time]
 | 
	
		
			
				|  |  | +            else:
 | 
	
		
			
				|  |  | +                tags_list = ['spider', spider_platform, user_tags, task['content_category_str'], str_time]
 | 
	
		
			
				|  |  | +            for v in tags_list:
 | 
	
		
			
				|  |  | +                if v:
 | 
	
		
			
				|  |  | +                    tags += str(v) + ','
 | 
	
		
			
				|  |  | +            post_data = {
 | 
	
		
			
				|  |  | +                # 'count': 1,     # (必须)账号个数:传1
 | 
	
		
			
				|  |  | +                # 'accountType': 4,   # (必须)账号类型 :传 4 app虚拟账号
 | 
	
		
			
				|  |  | +                'pwd': '',  # 密码 默认 12346
 | 
	
		
			
				|  |  | +                'nickName': '',  # 昵称  默认 vuser......
 | 
	
		
			
				|  |  | +                'avatarUrl': '',
 | 
	
		
			
				|  |  | +                # 头像Url  默认 http://weapppiccdn.yishihui.com/resources/images/pic_normal.png
 | 
	
		
			
				|  |  | +                'tagName': tags[:-1],  # 多条数据用英文逗号分割
 | 
	
		
			
				|  |  | +            }
 | 
	
		
			
				|  |  | +            try:
 | 
	
		
			
				|  |  | +                response = requests.post(url=conf['media_url'], params=post_data)
 | 
	
		
			
				|  |  | +                media_id = response.json()['data']
 | 
	
		
			
				|  |  | +                media_info = requests.get(url=conf['select_media_url'], params={'uid': media_id}, verify=False).json()[
 | 
	
		
			
				|  |  | +                    'content']
 | 
	
		
			
				|  |  | +            except Exception as e:
 | 
	
		
			
				|  |  | +                logging.warning(f'创建账户:{spider_link},失败,原因:{e}')
 | 
	
		
			
				|  |  | +                fail_list.append(author_url)
 | 
	
		
			
				|  |  | +                continue
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -        data = dict(
 | 
	
		
			
				|  |  | -            spider_link=author_url,
 | 
	
		
			
				|  |  | -            media_id=media_id,
 | 
	
		
			
				|  |  | -            media_name=media_info['longvideoNickName'] if media_info['longvideoNickName'] else media_info['nickName'],
 | 
	
		
			
				|  |  | -            source=source,
 | 
	
		
			
				|  |  | -            task_type=task_type,
 | 
	
		
			
				|  |  | -            applets_status=applets_status,
 | 
	
		
			
				|  |  | -            app_status=app_status,
 | 
	
		
			
				|  |  | -            user_tag=user_tags,
 | 
	
		
			
				|  |  | -            user_content_tag=user_content_tags,
 | 
	
		
			
				|  |  | -            insert_time=int(time.time()),
 | 
	
		
			
				|  |  | -            update_time=int(time.time()),
 | 
	
		
			
				|  |  | -            create_user_time=now_time,
 | 
	
		
			
				|  |  | -            mode_name_str=task['mode_name_str'],
 | 
	
		
			
				|  |  | -            mode_board_str=task['mode_board_str'],
 | 
	
		
			
				|  |  | -            content_category_str=task['content_category_str'],
 | 
	
		
			
				|  |  | -            # mode_value_str=mode_value_str,
 | 
	
		
			
				|  |  | -            task_id=task_id,
 | 
	
		
			
				|  |  | -            media_main_url=conf['media_main_url'].format(media_id)
 | 
	
		
			
				|  |  | -        )
 | 
	
		
			
				|  |  | -        keys = ','.join(data.keys())
 | 
	
		
			
				|  |  | -        values = ','.join(['%s'] * len(data))
 | 
	
		
			
				|  |  | -        table = 'crawler_author_map'
 | 
	
		
			
				|  |  | -        sql = f"""insert into {table}({keys}) VALUES({values})"""
 | 
	
		
			
				|  |  | -        mysql_con.insert_values(sql, tuple(data.values()))
 | 
	
		
			
				|  |  | -        uer_info = dict(
 | 
	
		
			
				|  |  | -            outer_id=author_url,
 | 
	
		
			
				|  |  | -            uid=media_id
 | 
	
		
			
				|  |  | -        )
 | 
	
		
			
				|  |  | -        success_list.append(uer_info)
 | 
	
		
			
				|  |  | +            data = dict(
 | 
	
		
			
				|  |  | +                spider_link=author_url,
 | 
	
		
			
				|  |  | +                media_id=media_id,
 | 
	
		
			
				|  |  | +                media_name=media_info['longvideoNickName'] if media_info['longvideoNickName'] else media_info[
 | 
	
		
			
				|  |  | +                    'nickName'],
 | 
	
		
			
				|  |  | +                source=source,
 | 
	
		
			
				|  |  | +                task_type=task_type,
 | 
	
		
			
				|  |  | +                applets_status=applets_status,
 | 
	
		
			
				|  |  | +                app_status=app_status,
 | 
	
		
			
				|  |  | +                user_tag=user_tags,
 | 
	
		
			
				|  |  | +                user_content_tag=user_content_tags,
 | 
	
		
			
				|  |  | +                insert_time=int(time.time()),
 | 
	
		
			
				|  |  | +                update_time=int(time.time()),
 | 
	
		
			
				|  |  | +                create_user_time=now_time,
 | 
	
		
			
				|  |  | +                mode_name_str=task['mode_name_str'],
 | 
	
		
			
				|  |  | +                mode_board_str=task['mode_board_str'],
 | 
	
		
			
				|  |  | +                content_category_str=task['content_category_str'],
 | 
	
		
			
				|  |  | +                # mode_value_str=mode_value_str,
 | 
	
		
			
				|  |  | +                task_id=task_id,
 | 
	
		
			
				|  |  | +                media_main_url=conf['media_main_url'].format(media_id)
 | 
	
		
			
				|  |  | +            )
 | 
	
		
			
				|  |  | +            keys = ','.join(data.keys())
 | 
	
		
			
				|  |  | +            values = ','.join(['%s'] * len(data))
 | 
	
		
			
				|  |  | +            table = 'crawler_author_map'
 | 
	
		
			
				|  |  | +            sql = f"""insert into {table}({keys}) VALUES({values})"""
 | 
	
		
			
				|  |  | +            mysql_con.insert_values(sql, tuple(data.values()))
 | 
	
		
			
				|  |  | +            uer_info = dict(
 | 
	
		
			
				|  |  | +                outer_id=author_url,
 | 
	
		
			
				|  |  | +                uid=media_id
 | 
	
		
			
				|  |  | +            )
 | 
	
		
			
				|  |  | +            success_list.append(uer_info)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    return success_list, fail_list
 | 
	
		
			
				|  |  | +        return success_list, fail_list
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  @app.route("/v1/crawler/author/create", methods=["POST"])
 |