|
@@ -30,6 +30,7 @@ def addSpiderLink():
|
|
|
|
|
|
sql = f'select * from crawler_author_map where spider_link={spider_link}'
|
|
|
result = mysql_con.get_values(sql)
|
|
|
+ now_time = int(time.time())
|
|
|
if result:
|
|
|
old_task_id = result[0]['task_id']
|
|
|
if task_id == old_task_id:
|
|
@@ -42,7 +43,7 @@ def addSpiderLink():
|
|
|
task = mysql_con.get_values(task_sql)
|
|
|
spider_links = eval(task[0]['spider_link'])
|
|
|
spider_links.append(spider_link)
|
|
|
- u_sql = f'update crawler_task set spider_link="{spider_links}" where task_id={task_id}'
|
|
|
+ u_sql = f'update crawler_task set spider_link="{spider_links}",update_time={now_time} where task_id={task_id}'
|
|
|
mysql_con.update_values(u_sql)
|
|
|
return jsonify({'code': 200, 'message': '抓取名单增加成功', 'del_link': spider_link})
|
|
|
else:
|
|
@@ -52,7 +53,7 @@ def addSpiderLink():
|
|
|
|
|
|
spider_links = eval(result[0]['spider_link'])
|
|
|
spider_links.append(spider_link)
|
|
|
- u_sql = f'update crawler_task set spider_link="{spider_links}" where task_id={task_id}'
|
|
|
+ u_sql = f'update crawler_task set spider_link="{spider_links},update_time={now_time} " where task_id={task_id}'
|
|
|
mysql_con.update_values(u_sql)
|
|
|
|
|
|
return jsonify({'code': 200, 'message': '抓取名单增加成功', 'add_link': success_list})
|
|
@@ -73,8 +74,8 @@ def delSpiderLink():
|
|
|
task = mysql_con.get_values(sql)
|
|
|
spider_links = eval(task[0]['spider_link'])
|
|
|
spider_links.remove(spider_link)
|
|
|
-
|
|
|
- u_sql = f'update crawler_task set spider_link="{spider_links}" where task_id={task_id}'
|
|
|
+ now_time = int(time.time())
|
|
|
+ u_sql = f'update crawler_task set spider_link="{spider_links}",update_time={now_time} where task_id={task_id}'
|
|
|
mysql_con.update_values(u_sql)
|
|
|
if spider_link:
|
|
|
return jsonify({'code': 200, 'message': '抓取名单删除成功', 'del_link': spider_link})
|
|
@@ -285,10 +286,11 @@ def getAllTask():
|
|
|
values += f'{k}="{v}" and '
|
|
|
sql = f"select task_id from crawler_author_map where {values[:-4]} and is_del=1" # [:-1]是为了去掉末尾的逗号
|
|
|
res = mysql_con.get_values(sql)
|
|
|
-
|
|
|
+ task_id_set = set()
|
|
|
+ for task in res:
|
|
|
+ task_id_set.add(task['task_id'])
|
|
|
task_list = list()
|
|
|
- for author_info in res:
|
|
|
- task_id = author_info['task_id']
|
|
|
+ for task_id in task_id_set:
|
|
|
sql = f'select * from crawler_task where task_id={task_id} order by update_time desc limit {start_count}, {end_count}'
|
|
|
task_info = mysql_con.get_values(sql)[0]
|
|
|
task_data = dict(
|
|
@@ -434,6 +436,11 @@ def create_uid(task, task_id, spider_link):
|
|
|
mode_board_list = mysql_con.get_values(mb_sql)
|
|
|
cc_sql = f'select * from crawler_content_category where id={content_category_id}'
|
|
|
content_category_list = mysql_con.get_values(cc_sql)
|
|
|
+
|
|
|
+ source_sql = f'select * from crawler_source where source="{source}"'
|
|
|
+ source_res = mysql_con.get_values(source_sql)[0]
|
|
|
+ spider_platform = source_res['source_desc']
|
|
|
+
|
|
|
if mode_name_list:
|
|
|
task['mode_name_str'] = mode_name_list[0]['mode_name']
|
|
|
else:
|
|
@@ -465,17 +472,29 @@ def create_uid(task, task_id, spider_link):
|
|
|
for author_url in spider_link:
|
|
|
now_time = int(time.time())
|
|
|
time_array = time.localtime(now_time)
|
|
|
- str_time = time.strftime("%Y-%m-%d", time_array)
|
|
|
+ str_time = time.strftime("%Y%m%d", time_array)
|
|
|
# 生成创建用户的tag
|
|
|
|
|
|
-
|
|
|
- tags = ""
|
|
|
- tags_list = ['spider', user_tags, task['mode_name_str'], task['mode_board_str'],
|
|
|
- task['content_category_str'], str_time]
|
|
|
+ mode_tags = task['mode_name_str']+task['mode_board_str']
|
|
|
+ tags = ''
|
|
|
+ if task['task_type'] == 'author':
|
|
|
+ spider_task = '账户'
|
|
|
+ tags_list = ['spider', spider_task, spider_platform, user_tags, author_url, task['content_category_str'], str_time]
|
|
|
+
|
|
|
+ elif task['task_type'] == 'search':
|
|
|
+ spider_task = '搜索'
|
|
|
+ tags_list = ['spider', spider_task, spider_platform, user_tags, author_url, task['content_category_str'], str_time]
|
|
|
+ elif task['task_type'] == 'board':
|
|
|
+ spider_task = '榜单'
|
|
|
+ tags_list = ['spider', spider_task, spider_platform, user_tags, mode_tags, task['content_category_str'], str_time]
|
|
|
+ elif task['task_type'] == 'recommend':
|
|
|
+ spider_task = '推荐'
|
|
|
+ tags_list = ['spider', spider_task, spider_platform, user_tags, mode_tags, task['content_category_str'], str_time]
|
|
|
+ else:
|
|
|
+ tags_list = ['spider', spider_platform, user_tags, mode_tags, task['content_category_str'], str_time]
|
|
|
for v in tags_list:
|
|
|
if v:
|
|
|
tags += str(v) + ','
|
|
|
-
|
|
|
post_data = {
|
|
|
# 'count': 1, # (必须)账号个数:传1
|
|
|
# 'accountType': 4, # (必须)账号类型 :传 4 app虚拟账号
|