wangkun vor 2 Jahren
Ursprung
Commit
f39cf87612

+ 3 - 0
douyin/douyin_author/__init__.py

@@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/5/26

+ 2 - 2
douyin/douyin_follow/dy_author_scheduling.py → douyin/douyin_author/douyin_author_scheduling.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
-# @Author: lierqiang
-# @Time: 2023/4/12
+# @Author: wangkun
+# @Time: 2023/5/26
 import json
 import os
 import random

+ 3 - 0
douyin/douyin_main/run_douyin_author_scheduling.py

@@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/5/26

+ 50 - 0
douyin/douyin_main/run_douyin_recommend_scheduling.py

@@ -0,0 +1,50 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/5/26
+import argparse
+import os
+import random
+import sys
+sys.path.append(os.getcwd())
+from common.common import Common
+from common.public import task_fun
+from common.scheduling_db import MysqlHelper
+from douyin.douyin_recommend.douyin_recommend_scheduling import DouyinrecommendScheduling
+
+
+def main(log_type, crawler, task, env):
+    task_dict = task_fun(task)['task_dict']
+    rule_dict = task_fun(task)['rule_dict']
+    task_id = task_dict['task_id']
+    select_user_sql = f"""select * from crawler_user_v3 where task_id={task_id}"""
+    user_list = MysqlHelper.get_values(log_type, crawler, select_user_sql, env, action="")
+    our_uid_list = []
+    for user in user_list:
+        our_uid_list.append(user["uid"])
+    our_uid = random.choice(our_uid_list)
+    Common.logger(log_type, crawler).info(f"调度任务:{task_dict}")
+    Common.logger(log_type, crawler).info(f"抓取规则:{rule_dict}")
+    # Common.logger(log_type, crawler).info(f"用户列表:{user_list}\n")
+    Common.logger(log_type, crawler).info(f'开始抓取 {task_dict["task_name"]}\n')
+    DouyinrecommendScheduling.get_videoList(log_type=log_type,
+                                            crawler=crawler,
+                                            rule_dict=rule_dict,
+                                            our_uid=our_uid,
+                                            env=env)
+    Common.del_logs(log_type, crawler)
+    Common.logger(log_type, crawler).info('抓取任务结束\n')
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
+    parser.add_argument('--log_type', default='recommend')  ## 添加参数,注明参数类型
+    parser.add_argument('--crawler', default='kuaishou')  ## 添加参数
+    parser.add_argument('--task')  ## 添加参数
+    parser.add_argument('--env', default='prod')  ## 添加参数
+    args = parser.parse_args()  ### 参数赋值,也可以通过终端赋值
+    main(log_type=args.log_type,
+         crawler=args.crawler,
+         task=args.task,
+         env=args.env)
+
+

+ 0 - 57
douyin/douyin_main/run_dy_author_scheduling.py

@@ -1,57 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: lierqiang
-# @Time: 2023/4/21
-import argparse
-import os
-import sys
-
-sys.path.append(os.getcwd())
-from common.common import Common
-from douyin.douyin_follow.dy_author_scheduling import DyAuthorScheduling
-from common.public import task_fun
-
-
-def main(log_type, crawler, task, oss_endpoint, env):
-    task = task_fun(task)
-    try:
-        Common.logger(log_type, crawler).info(f'开始抓取 {crawler}视频 定向榜\n')
-        DyAuthorScheduling.get_follow_videos(log_type=log_type,
-                                             crawler=crawler,
-                                             task=task,
-                                             oss_endpoint=oss_endpoint,
-                                             env=env)
-        Common.del_logs(log_type, crawler)
-        Common.logger(log_type, crawler).info('抓取任务结束\n')
-    except Exception as e:
-        Common.logger(log_type, crawler).info(f"{crawler}视频异常,触发报警:{e}\n")
-        # Feishu.bot(log_type, crawler, f"{e}")
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
-    parser.add_argument('--log_type', default='author')  ## 添加参数,注明参数类型
-    parser.add_argument('--crawler', default='douyin')  ## 添加参数
-    parser.add_argument('--strategy', default='定向抓取')  ## 添加参数
-    parser.add_argument('--task')  ## 添加参数
-    parser.add_argument('--oss_endpoint', default='outer')  ## 添加参数
-    parser.add_argument('--env', default='dev')  ## 添加参数
-    # parser.add_argument('--machine')  ## 添加参数
-    args = parser.parse_args()  ### 参数赋值,也可以通过终端赋值
-    task = {
-        'task_dict': {'task_id': '17', 'task_name': '西瓜测试4.21', 'source': 'douyin', 'start_time': '1682010720000',
-                      'interval': '24', 'mode': 'author',
-                      'rule': {'duration': {'min': 40, 'max': 0}, 'play_cnt': {'min': 4000, 'max': 0},
-                               'period': {'min': 10, 'max': 0}, 'fans_cnt': {'min': 0, 'max': 0},
-                               'videos_cnt': {'min': 0, 'max': 0}, 'like_cnt': {'min': 0, 'max': 0},
-                               'width': {'min': 0, 'max': 0}, 'height': {'min': 0, 'max': 0}},
-                      'spider_name': 'run_dy_author_scheduling', 'machine': 'aliyun', 'status': '0',
-                      'create_time': '1682048632396', 'update_time': '1682048632396', 'operator': ''},
-        'rule_dict': {'duration': {'min': 0, 'max': 0}, 'play_cnt': {'min': 0, 'max': 0},
-                      'period': {'min': 0, 'max': 0}, 'fans_cnt': {'min': 0, 'max': 0}, 'videos_cnt': {'min': 0, 'max': 0},
-                      'like_cnt': {'min': 0, 'max': 0}, 'width': {'min': 0, 'max': 0},
-                      'height': {'min': 0, 'max': 0}}}
-    main(log_type=args.log_type,
-         crawler=args.crawler,
-         task=task,
-         oss_endpoint=args.oss_endpoint,
-         env=args.env)

+ 0 - 58
douyin/douyin_main/run_dy_recommend_scheduling.py

@@ -1,58 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: lierqiang
-# @Time: 2023/4/21
-import argparse
-import os
-import sys
-
-sys.path.append(os.getcwd())
-from common.common import Common
-from douyin.douyin_recommend.dy_recommend_scheduling import DyRecommendScheduling
-from common.public import task_fun
-
-
-def main(log_type, crawler, task, oss_endpoint, env):
-    task = task_fun(task)
-    try:
-        Common.logger(log_type, crawler).info(f'开始抓取 {crawler}视频 定向榜\n')
-        DyRecommendScheduling.get_recommend_videos(log_type=log_type,
-                                                   crawler=crawler,
-                                                   task=task,
-                                                   oss_endpoint=oss_endpoint,
-                                                   env=env)
-        Common.del_logs(log_type, crawler)
-        Common.logger(log_type, crawler).info('抓取任务结束\n')
-    except Exception as e:
-        Common.logger(log_type, crawler).info(f"{crawler}视频异常,触发报警:{e}\n")
-        # Feishu.bot(log_type, crawler, f"{e}")
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
-    parser.add_argument('--log_type', default='recommend')  ## 添加参数,注明参数类型
-    parser.add_argument('--crawler', default='douyin')  ## 添加参数
-    parser.add_argument('--strategy', default='推荐')  ## 添加参数
-    parser.add_argument('--task')  ## 添加参数
-    parser.add_argument('--oss_endpoint', default='outer')  ## 添加参数
-    parser.add_argument('--env', default='dev')  ## 添加参数
-    # parser.add_argument('--machine')  ## 添加参数
-    args = parser.parse_args()  ### 参数赋值,也可以通过终端赋值
-    task = {
-        'task_dict': {'task_id': '17', 'task_name': '西瓜测试4.21', 'source': 'douyin', 'start_time': '1682010720000',
-                      'interval': '24', 'mode': 'author',
-                      'rule': {'duration': {'min': 40, 'max': 0}, 'play_cnt': {'min': 4000, 'max': 0},
-                               'period': {'min': 10, 'max': 0}, 'fans_cnt': {'min': 0, 'max': 0},
-                               'videos_cnt': {'min': 0, 'max': 0}, 'like_cnt': {'min': 0, 'max': 0},
-                               'width': {'min': 0, 'max': 0}, 'height': {'min': 0, 'max': 0}},
-                      'spider_name': 'run_dy_author_scheduling', 'machine': 'aliyun', 'status': '0',
-                      'create_time': '1682048632396', 'update_time': '1682048632396', 'operator': ''},
-        'rule_dict': {'duration': {'min': 0, 'max': 0}, 'play_cnt': {'min': 0, 'max': 0},
-                      'period': {'min': 0, 'max': 0}, 'fans_cnt': {'min': 0, 'max': 0},
-                      'videos_cnt': {'min': 0, 'max': 0},
-                      'like_cnt': {'min': 0, 'max': 0}, 'width': {'min': 0, 'max': 0},
-                      'height': {'min': 0, 'max': 0}}}
-    main(log_type=args.log_type,
-         crawler=args.crawler,
-         task=task,
-         oss_endpoint=args.oss_endpoint,
-         env=args.env)

+ 2 - 1
douyin/douyin_recommend/__init__.py

@@ -4,7 +4,8 @@ import os
 
 def get_xb(f_url, ua):
     js_path = os.path.abspath(os.path.dirname(os.getcwd()))
-    with open(f'{js_path}/xb.js', 'r', encoding='utf-8') as f:
+    # with open(f'{js_path}/xb.js', 'r', encoding='utf-8') as f:
+    with open(f'{js_path}/piaoquan_crawler/douyin/xb.js', 'r', encoding='utf-8') as f:
         douyin_js = f.read()
 
     params = f_url.split('/?')[1]

Datei-Diff unterdrückt, da er zu groß ist
+ 62 - 0
douyin/douyin_recommend/douyin_recommend_scheduling.py


Datei-Diff unterdrückt, da er zu groß ist
+ 0 - 81
douyin/douyin_recommend/dy_recommend_scheduling.py


+ 3 - 0
douyin/logs/__init__.py

@@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/5/26

Einige Dateien werden nicht angezeigt, da zu viele Dateien in diesem Diff geändert wurden.