فهرست منبع

update douyin

lierqiang 2 سال پیش
والد
کامیت
6436c7c9cb
4فایلهای تغییر یافته به همراه46 افزوده شده و 17 حذف شده
  1. 7 0
      README.MD
  2. 12 0
      douyin/douyin_recommend/__init__.py
  3. BIN
      douyin/douyin_recommend/douyin/.DS_Store
  4. 27 17
      douyin/douyin_recommend/recommend_dy.py

+ 7 - 0
README.MD

@@ -151,6 +151,13 @@ cd ~ && source ./base_profile && ps aux | grep weixinzhishu | grep -v grep | awk
 ```
 
 
+#### 抖音
+```commandline
+阿里云 102 服务器
+sh ./main/main.sh ./douyin/douyin_main/run_douyin_recommend.py --log_type="recommend" --crawler="douyin" --strategy="推荐爬虫策略" --oss_endpoint="inner" --env="prod" --machine="aliyun" douyin/recommend.log
+# sh ./main/main.sh ./kuaishou/douyin_main/run_douyin_recommend.py --log_type="recommend" --crawler="douyin" --strategy="定向爬策策略" --env="prod" --machine="aliyun" kuaishou/nohup.log
+本机
+
 #### 爬虫进程监测
 ```commandline
 阿里云 102 服务器:/usr/bin/sh /data5/piaoquan_crawler/main/process.sh "prod"

+ 12 - 0
douyin/douyin_recommend/__init__.py

@@ -1,3 +1,15 @@
+import execjs
+import os
 
 
+def get_xb(f_url, ua):
+    js_path = os.path.abspath(os.path.dirname(os.getcwd()))
+    with open(f'{js_path}/xb.js', 'r', encoding='utf-8') as f:
+        douyin_js = f.read()
 
+    params = f_url.split('/?')[1]
+    # params = urllib.parse.unquote(params)
+    # params = urllib.parse.unquote(urllib.parse.urlencode(params, safe='='))
+    ctx = execjs.compile(douyin_js)
+    xb = ctx.call('_0x11bbd8', params, ua)
+    return xb

BIN
douyin/douyin_recommend/douyin/.DS_Store


+ 27 - 17
douyin/douyin_recommend/recommend_dy.py

@@ -3,34 +3,21 @@
 # @Time: 2023/4/06
 import json
 import os
+import random
 import shutil
 import sys
 import time
 import requests
-import execjs
 from hashlib import md5
-
+from douyin.douyin_recommend import get_xb
 sys.path.append(os.getcwd())
 from common.db import MysqlHelper
 from common.feishu import Feishu
 from common.publish import Publish
-from common.public import get_user_from_mysql
 from common.userAgent import get_random_user_agent
 from common.common import Common
 
 
-def get_xb(f_url, ua):
-    with open('../xb.js', 'r', encoding='utf-8') as f:
-        douyin_js = f.read()
-
-    params = f_url.split('/?')[1]
-    # params = urllib.parse.unquote(params)
-    # params = urllib.parse.unquote(urllib.parse.urlencode(params, safe='='))
-    ctx = execjs.compile(douyin_js)
-    xb = ctx.call('_0x11bbd8', params, ua)
-    return xb
-
-
 class DyRecommend(object):
     # 个人主页视频翻页参数
     # offset = 0
@@ -103,6 +90,25 @@ class DyRecommend(object):
         except Exception as e:
             Common.logger(log_type, crawler).error(f'filter_words异常:{e}\n')
 
+    @classmethod
+    def random_title(cls, log_type, crawler):
+        try:
+            while True:
+                random_title_sheet = Feishu.get_values_batch(log_type, crawler, 'sPK2oY')
+                if random_title_sheet is None:
+                    Common.logger(log_type, crawler).warning(f"filter_words_sheet:{random_title_sheet} 10秒钟后重试")
+                    continue
+                random_title_list = []
+                for x in random_title_sheet:
+                    for y in x:
+                        if y is None:
+                            pass
+                        else:
+                            random_title_list.append(y)
+                return random.choice(random_title_list)
+        except Exception as e:
+            Common.logger(log_type, crawler).error(f'random_title:{e}\n')
+
     @classmethod
     def get_videolist(cls, log_type, crawler, strategy, our_id, oss_endpoint, env, machine):
         rule_dict = cls.get_rule(log_type, crawler)
@@ -130,8 +136,12 @@ class DyRecommend(object):
                     publish_time = info['create_time']
                     publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time))
                     publish_day = int((int(time.time()) - publish_time) / (3600 * 24))
+                    if not info['desc']:
+                        video_title = cls.random_title(log_type, crawler)
+                    else:
+                        video_title = info['desc']
 
-                    video_dict = {'video_title': info['desc'],
+                    video_dict = {'video_title': video_title,
                                   'video_id': info['aweme_id'],
                                   'play_cnt': info['statistics']['play_count'],
                                   'comment_cnt': info['statistics']['comment_count'],
@@ -225,7 +235,7 @@ class DyRecommend(object):
                     return
 
                 # 视频写入飞书
-                # Feishu.insert_columns(log_type, 'douyin', "82c8d9", "ROWS", 1, 2)
+                Feishu.insert_columns(log_type, 'douyin', "82c8d9", "ROWS", 1, 2)
                 upload_time = int(time.time())
                 values = [[
                     our_video_id,