Browse Source

Merge remote-tracking branch 'origin/master'

crawler 1 year ago
parent
commit
08c0f3a226
54 changed files with 4378 additions and 238 deletions
  1. 1 1
      analysis/analysis.py
  2. 3 1
      common/mq.py
  3. 0 6
      haitunzhufu/haitunzhufu_recommend/haitunzhufu_recommend3.py
  4. 0 0
      ip_change/__init__.py
  5. 1044 0
      ip_change/ip_change.py
  6. 0 0
      jingdianfuqiwang/__init__.py
  7. 0 0
      jingdianfuqiwang/jingdianfuqiwang_main/__init__.py
  8. 153 0
      jingdianfuqiwang/jingdianfuqiwang_main/run_jdfqw_recommend.py
  9. 1 0
      jingdianfuqiwang/jingdianfuqiwang_recommend/__init__.py
  10. 26 0
      jingdianfuqiwang/jingdianfuqiwang_recommend/jingdianfuqiwang_dev.py
  11. 159 0
      jingdianfuqiwang/jingdianfuqiwang_recommend/jingdianfuqiwang_recommend_scheduling.py
  12. 25 0
      jingdianfuqiwang/jingdianfuqiwang_recommend/test3.py
  13. 2 0
      kanyikan/kanyikan_main/run_kykoffline_recommend.py
  14. 0 0
      kanyikan/kanyikan_recommend/kanyikan/chlsfiles/charles202311231411.txt
  15. 1 1
      kanyikan/kanyikan_recommend/kanyikan_recommend_feed.py
  16. 1 1
      kanyikan/kanyikan_recommend/kanyikan_recommend_offline.py
  17. 1 1
      kanyikan/kanyikan_recommend/kanyikan_recommend_plus.py
  18. 1 1
      kanyikan/kanyikan_recommend/kanyikan_recommend_video_id.py
  19. 6 0
      main/process_mq.sh
  20. 93 34
      main/process_offline(old).sh
  21. 106 88
      main/process_offline.sh
  22. 0 0
      meitiansongzhufu/__init__.py
  23. 0 0
      meitiansongzhufu/meitiansongzhufu_main/__init__.py
  24. 153 0
      meitiansongzhufu/meitiansongzhufu_main/run_mtszf_recommend.py
  25. 1 0
      meitiansongzhufu/meitiansongzhufu_recommend/__init__.py
  26. 70 0
      meitiansongzhufu/meitiansongzhufu_recommend/main_request.py
  27. 158 0
      meitiansongzhufu/meitiansongzhufu_recommend/meitiansongzhufu_dev.py
  28. 190 0
      meitiansongzhufu/meitiansongzhufu_recommend/meitiansongzhufu_recommend.py
  29. 0 0
      piaopiaoquan/__init__.py
  30. 0 0
      piaopiaoquan/logs/__init__.py
  31. 0 0
      piaopiaoquan/piaopiaoquan/__init__.py
  32. 393 0
      piaopiaoquan/piaopiaoquan/piaopiaoquan_recommend.py
  33. 380 0
      piaopiaoquan/piaopiaoquan/piaopiaoquan_sift.py
  34. 0 0
      piaopiaoquan/piaopiaoquan_main/__init__.py
  35. 188 0
      piaopiaoquan/piaopiaoquan_main/run_ppq_recommend.py
  36. 188 0
      piaopiaoquan/piaopiaoquan_main/run_ppqsift_recommend.py
  37. 0 0
      shanhuzhufu/__init__.py
  38. 27 0
      shanhuzhufu/crypt/decrypt.py
  39. 153 0
      shanhuzhufu/shanhuzhufu_main/run_shzf_recommend.py
  40. 1 0
      shanhuzhufu/shanhuzhufu_recommend/__init__.py
  41. 103 0
      shanhuzhufu/shanhuzhufu_recommend/shanhuzhufu_recommend_dev.py
  42. 158 0
      shanhuzhufu/shanhuzhufu_recommend/shanhuzhufu_recommend_scheduling.py
  43. 7 7
      xiaoniangaoplus/xiaoniangaoplus/xiaoniangao_plus_get_userid.py
  44. 20 16
      xiaoniangaoplus/xiaoniangaoplus/xiaoniangao_plus_scheduling2.py
  45. 20 0
      xiaoniangaoplus/xiaoniangaoplus/xng_scheduling.py
  46. 74 29
      xiaoniangaoplus/xiaoniangaoplus_main/run_xngplus_recommend.py
  47. 2 0
      xiaoniangaoplus/xiaoniangaoplus_main/run_xngrule_recommend.py
  48. 1 1
      zhongmiaoyinxin/zhongmiaoyinxin_recommend/zhongmiaoyinxin_recommend_new.py
  49. 50 18
      zhufuquanzi/zhufuquanzi_main/run_zfqz_recommend.py
  50. 9 7
      zhufuquanzi/zhufuquanzi_recommend/zhufuquanzi_recommend_new.py
  51. 155 0
      zhuwanwufusu/zhuwanwufusu_main/run_zwwfs_recommend.py
  52. 1 0
      zhuwanwufusu/zhuwanwufusu_recommend/__init__.py
  53. 249 19
      zhuwanwufusu/zhuwanwufusu_recommend/zwwfs_recommend.py
  54. 4 7
      zhuwanwufusu/zhuwanwufusu_recommend/zwwfs_recommend_test.py

+ 1 - 1
analysis/analysis.py

@@ -41,7 +41,7 @@ class Analysis(object):
 class Drawer(object):
     def __init__(self, json_obj):
         self.ori_data = json_obj
-        self.x_list = ["2023-11-{:02}".format(i) for i in range(1, 24)]
+        self.x_list = ["2023-11-{:02}".format(i) for i in range(1, 28)]
 
     def draw_line(self):
         line = Line()

+ 3 - 1
common/mq.py

@@ -18,7 +18,9 @@ class MQ:
         platform = video_dict["platform"]
         try:
             msg = TopicMessage(json.dumps(video_dict))
-            msg.set_message_key(platform + "-" + strategy + "-" + video_dict["out_video_id"])
+            message_key = "{}-{}-{}".format(platform, strategy, video_dict['out_video_id'])
+            # msg.set_message_key(platform + "-" + strategy + "-" + video_dict["out_video_id"])
+            msg.set_message_key(message_key)
             re_msg = self.producer.publish_message(msg)
             Common.logger(strategy, platform).info("Publish Message Succeed. MessageID:%s, BodyMD5:%s\n" %
                                                    (re_msg.message_id, re_msg.message_body_md5))

+ 0 - 6
haitunzhufu/haitunzhufu_recommend/haitunzhufu_recommend3.py

@@ -112,12 +112,6 @@ class HTZFScheduling:
             Common.logger(self.log_type, self.crawler).info(
                 f"get_videoList:{response.text}\n"
             )
-            # Common.logging(
-            #     self.log_type,
-            #     self.crawler,
-            #     self.env,
-            #     f"get_videoList:{response.text}\n",
-            # )
             AliyunLogger.logging(
                 code="2000",
                 platform=self.crawler,

+ 0 - 0
ip_change/__init__.py


+ 1044 - 0
ip_change/ip_change.py

@@ -0,0 +1,1044 @@
+import ctypes
+from copy import deepcopy
+from datetime import datetime
+from random import choice, randint
+
+import pytz
+import requests
+from apscheduler.schedulers.blocking import BlockingScheduler
+
+URL = 'http://192.168.88.1/jsproxy'
+HEADERS = {
+    'Accept-Language': '',
+    'Content-Type': 'msg',
+    'Cookie': 'username=root',
+}
+
+
+def int_overflow(val) -> int:
+    maxint = 2147483647
+    if not -maxint - 1 <= val <= maxint:
+        val = (val + (maxint + 1)) % (2 * (maxint + 1)) - maxint - 1
+    return val & 0xFFFFFFFF
+
+
+def unsigned_right_shift(n, i) -> int:
+    """实现无符号右移"""
+    if n < 0:
+        n = ctypes.c_uint32(n).value
+    if i < 0:
+        return -int_overflow(n << abs(i))
+    return int_overflow(n >> i)
+
+
+class Buffer(object):
+
+    def __init__(self) -> None:
+        self.MASK_FTYPE = 0xf8000000
+        self.FT_BOOL = int_overflow(0 << 27)
+        self.FT_U32 = int_overflow(1 << 27)
+        self.FT_U64 = int_overflow(2 << 27)
+        self.FT_ADDR6 = int_overflow(3 << 27)
+        self.FT_STRING = int_overflow(4 << 27)
+        self.FT_MESSAGE = int_overflow(5 << 27)
+        self.FT_RAW = int_overflow(6 << 27)
+        self.FT_BOOL_ARRAY = int_overflow(16 << 27)
+        self.FT_U32_ARRAY = int_overflow(17 << 27)
+        self.FT_U64_ARRAY = int_overflow(18 << 27)
+        self.FT_ADDR6_ARRAY = int_overflow(19 << 27)
+        self.FT_STRING_ARRAY = int_overflow(20 << 27)
+        self.FT_MESSAGE_ARRAY = int_overflow(21 << 27)
+        self.FT_RAW_ARRAY = int_overflow(22 << 27)
+        self.FS_SHORT = int_overflow(1 << 24)
+
+        self.arr = [0] * 64 * 1024
+        self.pos = 0
+
+    def msg2buffer(self, msg):
+        self.arr[self.pos] = 0x4d
+        self.pos += 1
+        self.arr[self.pos] = 0x32
+        self.pos += 1
+        for r in msg:
+            pfx = r[0]
+            if pfx == '_':
+                continue
+            val = msg[r]
+            match pfx:
+                case 'b':
+                    self.write_id(self.FT_BOOL | (
+                        self.FS_SHORT if val else 0), r)
+                case 'u':
+                    val = val if val is not None else -1
+                    if 0 <= val < 256:
+                        self.write_id(self.FT_U32 | self.FS_SHORT, r)
+                        self.arr[self.pos] = val
+                        self.pos += 1
+                    else:
+                        self.write_id(self.FT_U32, r)
+                        self.write_32(val)
+                case 'q':
+                    self.write_id(self.FT_U64, r)
+                    self.write_64(val)
+                case 'a':
+                    self.write_id(self.FT_ADDR6, r)
+                    for i in range(16):
+                        self.arr[self.pos] = val[i]
+                        self.pos += 1
+                case 's':
+                    if len(val) < 256:
+                        self.write_id(self.FT_STRING | self.FS_SHORT, r)
+                        self.arr[self.pos] = len(val)
+                        self.pos += 1
+                    else:
+                        self.write_id(self.FT_STRING, r)
+                        self.write_16(len(val))
+                    for i in range(len(val)):
+                        self.arr[self.pos] = ord(val[i])
+                        self.pos += 1
+                case 'r':
+                    if len(val) < 256:
+                        self.write_id(self.FT_RAW | self.FS_SHORT, r)
+                        self.arr[self.pos] = len(val)
+                        self.pos += 1
+                    else:
+                        self.write_id(self.FT_RAW, r)
+                        self.write_16(len(val))
+                        for i in range(len(val)):
+                            self.arr[self.pos] = val[i]
+                            self.pos += 1
+                case 'm':
+                    x = self.msg2buffer(val)
+                    if len(x) < 256:
+                        self.write_id(self.FT_MESSAGE | self.FS_SHORT, r)
+                        self.arr[self.pos] = len(x)
+                        self.pos += 1
+                    else:
+                        self.write_id(self.FT_MESSAGE, r)
+                        self.write_16(len(x))
+                    for item in x[::-1]:
+                        self.arr[self.pos] = item
+                    self.pos += len(x)
+                case 'B':
+                    self.write_id(self.FT_BOOL_ARRAY, r)
+                    self.write_16(len(val))
+                    for i in range(len(val)):
+                        self.arr[self.pos] = val[i]
+                        self.pos += 1
+                case 'U':
+                    self.write_id(self.FT_U32_ARRAY, r)
+                    self.write_16(len(val))
+                    for i in range(len(val)):
+                        self.write_32(val[i])
+                case 'Q':
+                    self.write_id(self.FT_U64_ARRAY, r)
+                    self.write_16(len(val))
+                    for i in range(len(val)):
+                        self.write_64(val[i])
+                case 'A':
+                    self.write_id(self.FT_ADDR6_ARRAY, r)
+                    self.write_16(len(val))
+                    for i in range(len(val)):
+                        for k in range(16):
+                            self.arr[self.pos] = val[i][k]
+                            self.pos += 1
+                case 'S':
+                    self.write_id(self.FT_STRING_ARRAY, r)
+                    self.write_16(len(val))
+                    for i in range(len(val)):
+                        self.write_16(len(val[i]))
+                        for k in range(len(val[i])):
+                            self.arr[self.pos] = ord(val[i][k])
+                            self.pos += 1
+                case 'R':
+                    self.write_id(self.FT_RAW_ARRAY, r)
+                    self.write_16(len(val))
+                    for i in range(len(val)):
+                        self.write_16(len(val[i]))
+                        for k in range(len(val[i])):
+                            self.arr[self.pos] = val[i][k]
+                            self.pos += 1
+                case 'M':
+                    self.write_id(self.FT_MESSAGE_ARRAY, r)
+                    self.write_16(len(val))
+                    for i in range(len(val)):
+                        x = self.msg2buffer(val[i])
+                        self.write_16(len(x))
+                        for item in x[::-1]:
+                            self.arr[self.pos] = item
+                        self.pos += len(x)
+                case _:
+                    return None
+        return self.arr[:self.pos]
+
+    def buffer2msg(self, arr, offset: int = 0):
+        self.arr, self.pos, ret = arr, 2, dict()
+        if self.arr[0] != 0x4d or self.arr[1] != 0x32:
+            return ret
+        while self.pos < len(self.arr):
+            _id = self.read_32()
+            match _id & self.MASK_FTYPE:
+                case self.FT_BOOL:
+                    ret['b' + self.idnum2hex(_id)] = 1 if (_id & self.FS_SHORT) else 0
+                case self.FT_U32:
+                    if _id & self.FS_SHORT:
+                        ret['u' + self.idnum2hex(_id)] = self.arr[self.pos]
+                        self.pos += 1
+                    else:
+                        ret['u' + self.idnum2hex(_id)
+                            ] = self.int2num(self.read_32())
+                case self.FT_U64:
+                    ret['q' + self.idnum2hex(_id)] = self.read_64()
+                case self.FT_ADDR6:
+                    a = []
+                    for i in range(16):
+                        a[i] = self.arr[self.pos]
+                        self.pos += 1
+                    ret['a' + self.idnum2hex(_id)] = a
+                case self.FT_STRING:
+                    length = self.arr[self.pos]
+                    self.pos += 1
+                    if not (_id & self.FS_SHORT):
+                        length |= self.arr[self.pos] << 8
+                        self.pos += 1
+                    s = ''
+                    for i in range(length):
+                        s = s + chr(self.arr[self.pos])
+                        self.pos += 1
+                    ret['s' + self.idnum2hex(_id)] = s
+                case self.FT_RAW:
+                    length = self.arr[self.pos]
+                    self.pos += 1
+                    if not (_id & self.FS_SHORT):
+                        length |= self.arr[self.pos] << 8
+                        self.pos += 1
+                    a = [0] * length
+                    for i in range(length):
+                        a[i] = self.arr[self.pos]
+                        self.pos += 1
+                    ret['r' + self.idnum2hex(_id)] = a
+                case self.FT_MESSAGE:
+                    length = self.arr[self.pos]
+                    self.pos += 1
+                    if not (_id & self.FS_SHORT):
+                        length |= self.arr[self.pos] << 8
+                        self.pos += 1
+                    ret['m' + self.idnum2hex(_id)] = self.buffer2msg(
+                        self.arr[offset + self.pos:offset + self.pos + length])
+                    self.pos += length
+                    offset += self.pos
+                case self.FT_BOOL_ARRAY:
+                    length = self.read_16()
+                    a = [0] * length
+                    for i in range(length):
+                        a[i] = not (not self.arr[self.pos])
+                        self.pos += 1
+                    ret['B' + self.idnum2hex(_id)] = a
+                case self.FT_U32_ARRAY:
+                    length = self.read_16()
+                    a = [0] * length
+                    for i in range(length):
+                        a[i] = self.int2num(self.read_32())
+                    ret['U' + self.idnum2hex(_id)] = a
+                case self.FT_U64_ARRAY:
+                    length = self.read_16()
+                    a = [0] * length
+                    for i in range(length):
+                        a[i] = self.read_64()
+                    ret['Q' + self.idnum2hex(_id)] = a
+                case self.FT_ADDR6_ARRAY:
+                    length = self.read_16()
+                    a = [0] * length
+                    for i in range(length):
+                        x = [0] * 16
+                        for k in range(16):
+                            x[k] = self.arr[self.pos]
+                            self.pos += 1
+                        a[i] = x
+                    ret['A' + self.idnum2hex(_id)] = a
+                case self.FT_STRING_ARRAY:
+                    length = self.read_16()
+                    a = [0] * length
+                    for i in range(length):
+                        x = ''
+                        x_len = self.read_16()
+                        for k in range(x_len):
+                            x = x + chr(self.arr[self.pos])
+                            self.pos += 1
+                        a[i] = x
+                    ret['S' + self.idnum2hex(_id)] = a
+                case self.FT_RAW_ARRAY:
+                    length = self.read_16()
+                    a = [0] * length
+                    for i in range(length):
+                        x_len = self.read_16()
+                        x = [0] * x_len
+                        for k in range(x_len):
+                            x[k] = self.arr[self.pos]
+                            self.pos += 1
+                        a[i] = x
+                    ret['R' + self.idnum2hex(_id)] = a
+                case self.FT_MESSAGE_ARRAY:
+                    length = self.read_16()
+                    a = [0] * length
+                    for i in range(length):
+                        x_len = self.read_16()
+                        a[i] = self.buffer2msg(self.arr[offset + self.pos:offset + self.pos + x_len], offset + self.pos)
+                        self.pos += x_len
+                        offset += self.pos
+                    ret['M' + self.idnum2hex(_id)] = a
+        return ret
+
+    def buffer2msgs(self, arr, offset: int = 0):
+        ret, pos = [], 0
+        while pos + 2 <= len(arr):
+            length = (arr[pos] << 8) | arr[pos + 1]
+            arr[pos] = 0x4d
+            arr[pos + 1] = 0x32
+            msg = self.buffer2msg(arr[:offset + pos + length], offset + pos)
+            pos += length
+            ret.append(msg)
+        return ret
+
+    def write_id(self, id_type, id_str):
+        x = int(id_str[1:], 16)
+        self.arr[self.pos] = x & 0xff
+        self.pos += 1
+        self.arr[self.pos] = (x >> 8) & 0xff
+        self.pos += 1
+        self.arr[self.pos] = (x >> 16) & 0xff
+        self.pos += 1
+        self.arr[self.pos] = (id_type >> 24) & 0xff
+        self.pos += 1
+
+    def write_16(self, val):
+        self.arr[self.pos] = val & 0xff
+        self.pos += 1
+        self.arr[self.pos] = (val >> 8) & 0xff
+        self.pos += 1
+
+    def write_32(self, val):
+        for i in range(4):
+            self.arr[self.pos] = (val >> (i * 8)) & 0xff
+            self.pos += 1
+
+    def write_64(self, val):
+        for i in range(4):
+            self.arr[self.pos] = (val >> (i * 8)) & 0xff
+            self.pos += 1
+        temp = int(val / 4294967296)
+        for i in range(4):
+            self.arr[self.pos] = (temp >> (i * 8)) & 0xff
+            self.pos += 1
+
+    def num2hex(self, ccc):
+        if ccc < 10:
+            return chr(ccc + 48)
+        return chr(ccc + 87)
+
+    def idnum2hex(self, _id):
+        ret = ''
+        for i in range(6):
+            x = (_id >> (20 - (i * 4))) & 0xf
+            if len(ret) == 0 and not x:
+                continue
+            ret = ret + self.num2hex(x)
+        if len(ret) == 0:
+            ret = '0'
+        return ret
+
+    def read_16(self):
+        ret = 0
+        for i in range(2):
+            ret |= int_overflow(self.arr[self.pos] << (i * 8))
+            self.pos += 1
+        return ret
+
+    def read_32(self):
+        ret = 0
+        for i in range(4):
+            ret |= int_overflow(self.arr[self.pos] << (i * 8))
+            self.pos += 1
+        return ret
+
+    def read_64(self):
+        ret = 0
+        for i in range(4):
+            ret |= int_overflow(self.arr[self.pos] << (i * 8))
+            self.pos += 1
+        temp = 0
+        for i in range(4):
+            temp |= int_overflow(self.arr[self.pos] << (i * 8))
+            self.pos += 1
+        return self.int2num(ret) + temp * 4294967296
+
+    def int2num(self, v):
+        return 0x100000000 + v if v < 0 else v
+
+
+class Curve(object):
+
+    @classmethod
+    def curve_a2u(cls, a):
+        r = [0] * 32
+        for i in range(32):
+            r[i >> 1] |= a[31 - i] << (i & 1) * 8
+        return r
+
+    @classmethod
+    def curve_u2a(cls, a):
+        r = [0] * 32
+        for i in range(32):
+            r[31 - i] = (a[i >> 1] >> ((i & 1) * 8)) & 0xff
+        return r
+
+    @classmethod
+    def byte2str(cls, b):
+        b &= 0xff
+        return chr(b if b else 256)
+
+    @classmethod
+    def word2str(cls, w):
+        return cls.byte2str(w >> 24) + cls.byte2str(w >> 16) + cls.byte2str(w >> 8) + cls.byte2str(w)
+
+    @classmethod
+    def str2byte(cls, s, off):
+        return s[off] & 0xff
+
+    @classmethod
+    def str2word(cls, s, off):
+        return int_overflow(cls.str2byte(s, off) << 24) | int_overflow(cls.str2byte(s, off + 1) << 16) | int_overflow(
+            cls.str2byte(s, off + 2) << 8) | int_overflow(cls.str2byte(s, off + 3))
+
+    @classmethod
+    def str2a(cls, s):
+        res = []
+        for i in range(len(s)):
+            res.append(s[i] & 0xff)
+        return res
+
+    @classmethod
+    def a2str(cls, a):
+        x = []
+        for i in range(len(a)):
+            x.append(cls.byte2str(a[i]))
+        return ''.join(x)
+
+    @classmethod
+    def c255lgetbit(cls, n, c):
+        return (n[c >> 4] >> (c & 0xf)) & 1
+
+    @classmethod
+    def c255lzero(cls):
+        return [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+
+    @classmethod
+    def c255lone(cls):
+        return [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+
+    @classmethod
+    def c255lbase(cls):
+        return [9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+
+    @classmethod
+    def c255lsqr8h(cls, a7, a6, a5, a4, a3, a2, a1, a0):
+        r = [0] * 16
+        v = a0 * a0
+        r[0] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + 2 * a0 * a1
+        r[1] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + 2 * a0 * a2 + a1 * a1
+        r[2] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + 2 * a0 * a3 + 2 * a1 * a2
+        r[3] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + 2 * a0 * a4 + 2 * a1 * a3 + a2 * a2
+        r[4] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + 2 * a0 * a5 + 2 * a1 * a4 + 2 * a2 * a3
+        r[5] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + 2 * a0 * a6 + \
+            2 * a1 * a5 + 2 * a2 * a4 + a3 * a3
+        r[6] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + 2 * a0 * a7 + \
+            2 * a1 * a6 + 2 * a2 * a5 + 2 * a3 * a4
+        r[7] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + 2 * a1 * a7 + \
+            2 * a2 * a6 + 2 * a3 * a5 + a4 * a4
+        r[8] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + 2 * a2 * a7 + 2 * a3 * a6 + 2 * a4 * a5
+        r[9] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + 2 * a3 * a7 + 2 * a4 * a6 + a5 * a5
+        r[10] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + 2 * a4 * a7 + 2 * a5 * a6
+        r[11] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + 2 * a5 * a7 + a6 * a6
+        r[12] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + 2 * a6 * a7
+        r[13] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + a7 * a7
+        r[14] = v & 0xffff
+        r[15] = 0 | int(v / 0x10000)
+        return r
+
+    @classmethod
+    def c255lsqrmodp(cls, a):
+        x = cls.c255lsqr8h(a[15], a[14], a[13], a[12],
+                           a[11], a[10], a[9], a[8])
+        z = cls.c255lsqr8h(a[7], a[6], a[5], a[4], a[3], a[2], a[1], a[0])
+        y = cls.c255lsqr8h(a[15] + a[7], a[14] + a[6], a[13] + a[5], a[12] +
+                           a[4], a[11] + a[3], a[10] + a[2], a[9] + a[1], a[8] + a[0])
+        r = [0] * 16
+        v = 0x800000 + z[0] + (y[8] - x[8] - z[8] + x[0] - 0x80) * 38
+        r[0] = v & 0xffff
+        for i in range(1, 8):
+            v = 0x7fff80 + \
+                unsigned_right_shift(
+                    v, 16) + z[i] + (y[i + 8] - x[i + 8] - z[i + 8] + x[i]) * 38
+            r[i] = v & 0xffff
+        for i in range(8, 15):
+            v = 0x7fff80 + \
+                unsigned_right_shift(
+                    v, 16) + z[i] + y[i - 8] - x[i - 8] - z[i - 8] + x[i] * 38
+            r[i] = v & 0xffff
+        r[15] = 0x7fff80 + \
+                unsigned_right_shift(v, 16) + \
+                z[15] + y[7] - x[7] - z[7] + x[15] * 38
+        cls.c255lreduce(r)
+        return r
+
+    @classmethod
+    def c255lmul8h(cls, a7, a6, a5, a4, a3, a2, a1, a0, b7, b6, b5, b4, b3, b2, b1, b0):
+        r = [0] * 16
+        v = a0 * b0
+        r[0] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + a0 * b1 + a1 * b0
+        r[1] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + a0 * b2 + a1 * b1 + a2 * b0
+        r[2] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + a0 * b3 + a1 * b2 + a2 * b1 + a3 * b0
+        r[3] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + a0 * b4 + \
+            a1 * b3 + a2 * b2 + a3 * b1 + a4 * b0
+        r[4] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + a0 * b5 + a1 * \
+            b4 + a2 * b3 + a3 * b2 + a4 * b1 + a5 * b0
+        r[5] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + a0 * b6 + a1 * b5 + \
+            a2 * b4 + a3 * b3 + a4 * b2 + a5 * b1 + a6 * b0
+        r[6] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + a0 * b7 + a1 * b6 + a2 * \
+            b5 + a3 * b4 + a4 * b3 + a5 * b2 + a6 * b1 + a7 * b0
+        r[7] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + a1 * b7 + a2 * b6 + \
+            a3 * b5 + a4 * b4 + a5 * b3 + a6 * b2 + a7 * b1
+        r[8] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + a2 * b7 + a3 * \
+            b6 + a4 * b5 + a5 * b4 + a6 * b3 + a7 * b2
+        r[9] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + a3 * b7 + \
+            a4 * b6 + a5 * b5 + a6 * b4 + a7 * b3
+        r[10] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + a4 * b7 + a5 * b6 + a6 * b5 + a7 * b4
+        r[11] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + a5 * b7 + a6 * b6 + a7 * b5
+        r[12] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + a6 * b7 + a7 * b6
+        r[13] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + a7 * b7
+        r[14] = v & 0xffff
+        r[15] = 0 | int(v / 0x10000)
+        return r
+
+    @classmethod
+    def c255lmulmodp(cls, a, b):
+        x = cls.c255lmul8h(a[15], a[14], a[13], a[12], a[11], a[10], a[9],
+                           a[8], b[15], b[14], b[13], b[12], b[11], b[10], b[9], b[8])
+        z = cls.c255lmul8h(a[7], a[6], a[5], a[4], a[3], a[2], a[1],
+                           a[0], b[7], b[6], b[5], b[4], b[3], b[2], b[1], b[0])
+        y = cls.c255lmul8h(a[15] + a[7], a[14] + a[6], a[13] + a[5], a[12] + a[4], a[11] + a[3], a[10] + a[2],
+                           a[9] + a[1], a[8] +
+                           a[0], b[15] + b[7], b[14] + b[6], b[13] + b[5], b[12] + b[4], b[11] + b[3], b[10] + b[2],
+                           b[9] + b[1], b[8] + b[0])
+        r = [0] * 16
+        v = 0x800000 + z[0] + (y[8] - x[8] - z[8] + x[0] - 0x80) * 38
+        r[0] = v & 0xffff
+        for i in range(1, 8):
+            v = 0x7fff80 + \
+                unsigned_right_shift(
+                    v, 16) + z[i] + (y[i + 8] - x[i + 8] - z[i + 8] + x[i]) * 38
+            r[i] = v & 0xffff
+        for i in range(8, 15):
+            v = 0x7fff80 + \
+                unsigned_right_shift(
+                    v, 16) + z[i] + y[i - 8] - x[i - 8] - z[i - 8] + x[i] * 38
+            r[i] = v & 0xffff
+        r[15] = 0x7fff80 + \
+                unsigned_right_shift(v, 16) + \
+                z[15] + y[7] - x[7] - z[7] + x[15] * 38
+        cls.c255lreduce(r)
+        return r
+
+    @classmethod
+    def c255lreduce(cls, a):
+        v = a[15]
+        a[15] = v & 0x7fff
+        v = (0 | int(v / 0x8000)) * 19
+        for i in range(15):
+            v += a[i]
+            a[i] = v & 0xffff
+            v = unsigned_right_shift(v, 16)
+        a[15] += v
+
+    @classmethod
+    def c255laddmodp(cls, a, b):
+        r = [0] * 16
+        v = ((0 | unsigned_right_shift(
+            a[15], 15)) + (0 | unsigned_right_shift(b[15], 15))) * 19 + a[0] + b[0]
+        r[0] = v & 0xffff
+        for i in range(1, 15):
+            v = unsigned_right_shift(v, 16) + a[i] + b[i]
+            r[i] = v & 0xffff
+        r[15] = unsigned_right_shift(
+            v, 16) + (a[15] & 0x7fff) + (b[15] & 0x7fff)
+        return r
+
+    @classmethod
+    def c255lsubmodp(cls, a, b):
+        r = [0] * 16
+        v = 0x80000 + ((0 | unsigned_right_shift(a[15], 15)) - (
+                0 | unsigned_right_shift(b[15], 15)) - 1) * 19 + a[0] - b[0]
+        r[0] = v & 0xffff
+        for i in range(1, 15):
+            v = unsigned_right_shift(v, 16) + 0x7fff8 + a[i] - b[i]
+            r[i] = v & 0xffff
+        r[15] = unsigned_right_shift(
+            v, 16) + 0x7ff8 + (a[15] & 0x7fff) - (b[15] & 0x7fff)
+        return r
+
+    @classmethod
+    def c255linvmodp(cls, a):
+        c, i = a, 249
+        while i > 0:
+            i -= 1
+            a = cls.c255lsqrmodp(a)
+            a = cls.c255lmulmodp(a, c)
+        a = cls.c255lsqrmodp(a)
+        a = cls.c255lsqrmodp(a)
+        a = cls.c255lmulmodp(a, c)
+        a = cls.c255lsqrmodp(a)
+        a = cls.c255lsqrmodp(a)
+        a = cls.c255lmulmodp(a, c)
+        a = cls.c255lsqrmodp(a)
+        a = cls.c255lmulmodp(a, c)
+        return a
+
+    @classmethod
+    def c255lmulasmall(cls, a):
+        m, r = 121665, [0] * 16
+        v = a[0] * m
+        r[0] = v & 0xffff
+        for i in range(1, 15):
+            v = (0 | int(v / 0x10000)) + a[i] * m
+            r[i] = v & 0xffff
+        r[15] = (0 | int(v / 0x10000)) + a[15] * m
+        cls.c255lreduce(r)
+        return r
+
+    @classmethod
+    def c255ldbl(cls, x, z):
+        m = cls.c255lsqrmodp(cls.c255laddmodp(x, z))
+        n = cls.c255lsqrmodp(cls.c255lsubmodp(x, z))
+        o = cls.c255lsubmodp(m, n)
+        x_2 = cls.c255lmulmodp(n, m)
+        z_2 = cls.c255lmulmodp(cls.c255laddmodp(cls.c255lmulasmall(o), m), o)
+        return [x_2, z_2]
+
+    @classmethod
+    def c255lsum(cls, x, z, x_p, z_p, x_1):
+        p = cls.c255lmulmodp(cls.c255lsubmodp(
+            x, z), cls.c255laddmodp(x_p, z_p))
+        q = cls.c255lmulmodp(cls.c255laddmodp(
+            x, z), cls.c255lsubmodp(x_p, z_p))
+        x_3 = cls.c255lsqrmodp(cls.c255laddmodp(p, q))
+        z_3 = cls.c255lmulmodp(cls.c255lsqrmodp(cls.c255lsubmodp(p, q)), x_1)
+        return [x_3, z_3]
+
+    @classmethod
+    def curve25519_raw(cls, f, c):
+        x_1 = c
+        a = cls.c255ldbl(x_1, cls.c255lone())
+        q = [deepcopy(x_1), cls.c255lone()]
+        n = 255
+        while cls.c255lgetbit(f, n) == 0:
+            n -= 1
+            if n < 0:
+                return cls.c255lzero()
+        n -= 1
+        while n >= 0:
+            b = cls.c255lgetbit(f, n)
+            a_or_q = [[0] * 16, [0] * 16]
+            cls.cond_copy(a_or_q[0], q[0], a[0], b)
+            cls.cond_copy(a_or_q[1], q[1], a[1], b)
+            r = cls.c255lsum(a[0], a[1], q[0], q[1], x_1)
+            s = cls.c255ldbl(a_or_q[0], a_or_q[1])
+            cls.cond_copy(q[0], s[0], r[0], b)
+            cls.cond_copy(q[1], s[1], r[1], b)
+            cls.cond_copy(a[0], r[0], s[0], b)
+            cls.cond_copy(a[1], r[1], s[1], b)
+            n -= 1
+        q[1] = cls.c255linvmodp(q[1])
+        q[0] = cls.c255lmulmodp(q[0], q[1])
+        cls.c255lreduce(q[0])
+        return q[0]
+
+    @classmethod
+    def cond_copy(cls, r, a, b, c):
+        m2 = (-c) & 0xffff
+        m1 = (~m2) & 0xffff
+        n = 0
+        while n < 16:
+            r[n] = (a[n] & m1) | (b[n] & m2)
+            n += 1
+
+    @classmethod
+    def curve25519(cls, f, c: list = None):
+        if not c:
+            c = cls.c255lbase()
+        f[0] &= 0xFFF8
+        f[15] = (f[15] & 0x7FFF) | 0x4000
+        c[15] &= 0x7FFF
+        return cls.curve25519_raw(f, c)
+
+    @classmethod
+    def sha1(cls, msg):
+        length = len(msg)
+        total_length = length + 9
+        total_length = (total_length + 63) & -64
+        padding = [0x80]
+        padding.extend([0 for _ in range(length + 1, total_length)])
+        msg.extend(padding)
+        cls.packbe(msg, total_length - 4, length * 8)
+        h0, h1, h2, h3, h4, w = 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0, [
+            0] * 80
+        for j in range(0, len(msg), 64):
+            for i in range(16):
+                w[i] = int_overflow(cls.unpackbe(msg, j + i * 4))
+            for i in range(16, 80):
+                w[i] = int_overflow(cls.rrotate(
+                    w[i - 3] ^ w[i - 8] ^ w[i - 14] ^ w[i - 16], 31))
+            a, b, c, d, e = h0, h1, h2, h3, h4
+            for i in range(80):
+                if i < 20:
+                    f = int_overflow((b & c) | (~b & d))
+                    k = 0x5A827999
+                elif i < 40:
+                    f = int_overflow(b ^ c ^ d)
+                    k = 0x6ED9EBA1
+                elif i < 60:
+                    f = int_overflow((b & c) | (b & d) | (c & d))
+                    k = 0x8F1BBCDC
+                else:
+                    f = int_overflow(b ^ c ^ d)
+                    k = 0xCA62C1D6
+                t = int_overflow(Curve.rrotate(a, 27) + f + e + k + w[i])
+                e = d
+                d = c
+                c = int_overflow(Curve.rrotate(b, 2))
+                b = a
+                a = int_overflow(t << 0)
+            h0 = int_overflow((h0 + a) << 0)
+            h1 = int_overflow((h1 + b) << 0)
+            h2 = int_overflow((h2 + c) << 0)
+            h3 = int_overflow((h3 + d) << 0)
+            h4 = int_overflow((h4 + e) << 0)
+        res = [0] * 20
+        Curve.packbe(res, 0, h0)
+        Curve.packbe(res, 4, h1)
+        Curve.packbe(res, 8, h2)
+        Curve.packbe(res, 12, h3)
+        Curve.packbe(res, 16, h4)
+        return res
+
+    @classmethod
+    def rrotate(cls, v, r):
+        return unsigned_right_shift(v, r) | (v << (32 - r))
+
+    @classmethod
+    def unpackbe(cls, a, off):
+        v = 0
+        for i in range(4):
+            v |= a[off + i] << (24 - (i * 8))
+        return v
+
+    @classmethod
+    def packbe(cls, a, off, v):
+        for i in range(4):
+            a[off + i] = (v >> (24 - i * 8)) & 0xff
+
+
+class RC4(object):
+
+    def __init__(self):
+        self.S = []
+        self.i = 0
+        self.j = 0
+
+    def set_key(self, key):
+        self.S = [i for i in range(256)]
+        S, j = self.S, 0
+        for i in range(256):
+            j = (j + key[i % len(key)] + S[i]) & 255
+            S[i], S[j] = S[j], S[i]
+        for _ in range(768):
+            self.gen()
+
+    def gen(self):
+        S = self.S
+        i = self.i = (self.i + 1) & 255
+        j = self.j = (self.j + S[i]) & 255
+        S[i], S[j] = S[j], S[i]
+        return S[(S[i] + S[j]) & 255]
+
+    def crypt_uint8array(self, dst, src, start):
+        for i in range(len(src)):
+            dst[start + i] = src[i] ^ self.gen()
+
+    def encrypt(self, s):
+        a = ''
+        for i in range(len(s)):
+            c = s[i] ^ self.gen()
+            if c == 0:
+                c = 256
+            a += chr(c)
+        return a
+
+
+class RouterSession(object):
+
+    def __init__(self):
+        self.id = None
+        self.pri_key = None
+        self.pub_key = None
+        self.padding = [32] * 8
+        self.rx_seq = 1
+        self.rx_enc = RC4()
+        self.tx_seq = 1
+        self.tx_enc = RC4()
+
+    def make_initial_request(self):
+        self.pri_key = bytes([randint(0, 255) for _ in range(32)])
+        pub_key = Curve.curve_u2a(
+            Curve.curve25519(Curve.curve_a2u(self.pri_key)))
+        self.pub_key = Curve.word2str(
+            0) + Curve.word2str(0) + Curve.a2str(pub_key)
+        self.pub_key = self.pub_key.encode()
+
+    def key_exchange(self, body):
+        self.id = Curve.str2word(body, 0)
+        r_pub_key = Curve.str2a(body[8:])
+        master_key = Curve.curve_u2a(Curve.curve25519(
+            Curve.curve_a2u(self.pri_key), Curve.curve_a2u(r_pub_key)))
+        self.rx_enc.set_key(self.make_key(master_key, False, False))
+        self.tx_enc.set_key(self.make_key(master_key, True, False))
+
+    def make_key(self, master_key, is_send, is_server):
+        magic_2 = 'On the client side, this is the send key; on the server side, it is the receive key.'
+        magic_3 = 'On the client side, this is the receive key; on the server side, it is the send key.'
+        v = deepcopy(master_key)
+        v.extend([0 for _ in range(40)])
+        if is_send == is_server:
+            v.extend(Curve.str2a(magic_3.encode()))
+        else:
+            v.extend(Curve.str2a(magic_2.encode()))
+        v.extend([0xf2 for _ in range(40)])
+        return Curve.sha1(v)[:16]
+
+    def encrypt_uint8array(self, arr):
+        narr = [0] * (len(arr) + 16)
+        narr[1] = self.id >> 16
+        narr[2] = self.id >> 8
+        narr[3] = self.id & 0xff
+        narr[4] = self.tx_seq >> 24
+        narr[5] = self.tx_seq >> 16
+        narr[6] = self.tx_seq >> 8
+        narr[7] = self.tx_seq
+        self.tx_enc.crypt_uint8array(narr, arr, 8)
+        for i in range(len(arr) + 8, len(narr)):
+            narr[i] = 32
+        xarr = narr[len(arr) + 8:len(arr) + 16]
+        self.tx_enc.crypt_uint8array(narr, xarr, len(arr) + 8)
+        self.tx_seq += len(arr) + 8
+        return bytes(narr)
+
+    def decrypt_uint8array(self, arr):
+        if len(arr) < 16:
+            return False
+        _id = int_overflow(arr[0] << 24) | int_overflow(arr[1] << 16) | int_overflow(arr[2] << 8) | arr[3]
+        seq = int_overflow(arr[4] << 24) | int_overflow(arr[5] << 16) | int_overflow(arr[6] << 8) | arr[7]
+        if _id != self.id:
+            return False
+        if seq != self.rx_seq:
+            return True
+        self.rx_seq += len(arr) - 8
+        self.rx_enc.crypt_uint8array(arr, arr[8:], 8)
+        for i in range(len(arr) - 8, len(arr)):
+            if arr[i] != 32:
+                return False
+        msgs = Buffer().buffer2msgs(arr[8:len(arr) - 8], 8)
+        if msgs:
+            for i in range(len(msgs)):
+                print(msgs[i])
+        return True
+
+    def encrypt(self, s):
+        seq = self.tx_seq
+        self.tx_seq += len(s) + 8
+        return (Curve.word2str(self.id) + Curve.word2str(seq)) + self.tx_enc.encrypt(s.encode()) + self.tx_enc.encrypt(
+            self.padding)
+
+    def encrypt_uri(self, uri):
+        s = self.encrypt(uri)
+        r = ''
+        for i in range(len(s)):
+            r += chr(ord(s[i]) & 0xff)
+        return r
+
+    def fetch(self, url, headers, data):
+        data = self.encrypt_uint8array(Buffer().msg2buffer(data))
+        response = requests.post(url=url, headers=headers, data=data)
+        body = list(response.content)
+        self.decrypt_uint8array(body)
+
+    def login(self):
+        response = requests.post(url=URL, data=self.pub_key)
+        body = [ord(item) for item in response.content.decode()]
+        self.key_exchange(body)
+
+        data = {'s1': 'root', 's3': ''}
+        self.fetch(url=URL, headers=HEADERS, data=data)
+
+    def change_vpn(self, vpn_idx, vpn_server):
+        data = {
+            "U1003c": [9, 0, 0, 0, 0, 0, 0, 0],
+            "bdd": 0,
+            "be1": 0,
+            "be3": 0,
+            "bfe000a": 0,
+            "b1000e": 0,
+            "ufe0001": vpn_idx + 48,
+            "u10001": 34,
+            "u10003": 0,
+            "u10002": 16384,
+            "uca": 1450,
+            "ucb": 1450,
+            "ud9": 4294967294,
+            "udb": 30,
+            "udc": 0,
+            "ude": 60,
+            "udf": 1,
+            "sb0004": "disabled",
+            "s10006": f"Vpn{vpn_idx}",
+            "s1001e": "l2tp-out",
+            "s10066": "",
+            "se0": vpn_server,
+            "se2": "",
+            "sfe0009": "",
+            "sd6": "123qqq",
+            "sd7": "hnszs3ds",
+            "Uff0014": [134217944],
+            "Uff0001": [20, 0],
+            "uff0007": 16646147
+        }
+        self.fetch(url=URL, headers=HEADERS, data=data)
+
+
+class AdminSession(object):
+
+    def __init__(self):
+        self.username = '17600025055'
+        self.password = 'zhangyong0712'
+        self.headers = None
+
+    def cookie2str(self, cookies):
+        ret = []
+        for key, value in cookies.iteritems():
+            ret.append(f'{key}={value}')
+        return '; '.join(ret)
+
+    def login(self):
+        url = 'https://hwq.yycyk.com/'
+        self.headers = {
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
+            'Accept-Encoding': 'gzip, deflate, br',
+            'Accept-Language': 'zh-CN,zh;q=0.9',
+            'Cache-Control': 'no-cache',
+            'Origin': 'https://hwq.yycyk.com',
+            'Pragma': 'no-cache',
+            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
+        }
+        response = requests.get(url=url, headers=self.headers)
+        self.headers.update({'Cookie': self.cookie2str(response.cookies)})
+
+        url = 'https://hwq.yycyk.com/passport/loginact'
+        self.headers.update({
+            'Accept': '*/*',
+            'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+            'Referer': 'https://hwq.yycyk.com/passport/login',
+            'X-Requested-With': 'XMLHttpRequest',
+        })
+        data = {
+            'phone': self.username,
+            'password': self.password,
+            'captcha': '',
+        }
+        response = requests.post(url=url, headers=self.headers, data=data)
+        self.headers.update({'Cookie': self.cookie2str(response.cookies)})
+
+    def get_proxy_list(self):
+        url = 'https://hwq.yycyk.com/welcome/dynamicLines'
+        self.headers.update({'Referer': 'https://hwq.yycyk.com/welcome'})
+        data = {
+            'search_str': '',
+            'type': '3',
+        }
+        response = requests.post(url=url, headers=self.headers, data=data)
+        obj = response.json()
+        proxy_list = []
+        for item in obj.get('res', {}).get('data', {}).values():
+            proxy_list.append(item.get('info', {}).get('province_domain'))
+            for sub_item in item.get('line', []):
+                if int(sub_item.get('status', 0)) == 0:  # 维护中的跳过
+                    continue
+                if int(sub_item.get('ping', 1000)) >= 200:  # 延迟超过200ms的跳过
+                    continue
+                proxy_list.append(sub_item.get('domain'))
+        return proxy_list
+
+
+def job():
+    admin_session = AdminSession()
+    admin_session.login()
+    vpn_server = choice(admin_session.get_proxy_list())
+
+    router_session = RouterSession()
+    router_session.make_initial_request()
+    router_session.login()
+    router_session.change_vpn(vpn_idx=1, vpn_server=vpn_server)
+
+    now = datetime.now(tz=pytz.timezone('Asia/Shanghai')).strftime('%Y-%m-%d %H:%M:%S')
+    print(f'[+] {now} 切换代理地址为: {vpn_server}')
+
+
+def main():
+    scheduler = BlockingScheduler({
+        'apscheduler.timezone': 'Asia/Shanghai',
+    })
+    scheduler.add_job(job, 'cron', hour=12, minute=0, second=0)
+    try:
+        print('[+] 定时任务已启动')
+        scheduler.start()
+    except KeyboardInterrupt:
+        print('[+] 定时任务已停止')
+
+
+if __name__ == '__main__':
+    main()

+ 0 - 0
jingdianfuqiwang/__init__.py


+ 0 - 0
jingdianfuqiwang/jingdianfuqiwang_main/__init__.py


+ 153 - 0
jingdianfuqiwang/jingdianfuqiwang_main/run_jdfqw_recommend.py

@@ -0,0 +1,153 @@
+import argparse
+from mq_http_sdk.mq_client import *
+from mq_http_sdk.mq_consumer import *
+from mq_http_sdk.mq_exception import MQExceptionBase
+
+sys.path.append(os.getcwd())
+from common.public import task_fun_mq, get_consumer, ack_message
+from common.scheduling_db import MysqlHelper
+from common import AliyunLogger
+from jingdianfuqiwang.jingdianfuqiwang_recommend import TFuQiWangRecommend
+
+
+def main(log_type, crawler, topic_name, group_id, env):
+    consumer = get_consumer(topic_name, group_id)
+    # 长轮询表示如果Topic没有消息,则客户端请求会在服务端挂起3秒,3秒内如果有消息可以消费则立即返回响应。
+    # 长轮询时间3秒(最多可设置为30秒)。
+    wait_seconds = 30
+    # 一次最多消费3条(最多可设置为16条)。
+    batch = 1
+    AliyunLogger.logging(
+        code="1000",
+        platform=crawler,
+        mode=log_type,
+        env=env,
+        message=f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
+        f"WaitSeconds:{wait_seconds}\n"
+        f"TopicName:{topic_name}\n"
+        f"MQConsumer:{group_id}",
+    )
+    while True:
+        try:
+            # 长轮询消费消息。
+            recv_msgs = consumer.consume_message(batch, wait_seconds)
+            for msg in recv_msgs:
+                AliyunLogger.logging(
+                    code="1000",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message=f"Receive\n"
+                    f"MessageId:{msg.message_id}\n"
+                    f"MessageBodyMD5:{msg.message_body_md5}\n"
+                    f"MessageTag:{msg.message_tag}\n"
+                    f"ConsumedTimes:{msg.consumed_times}\n"
+                    f"PublishTime:{msg.publish_time}\n"
+                    f"Body:{msg.message_body}\n"
+                    f"NextConsumeTime:{msg.next_consume_time}\n"
+                    f"ReceiptHandle:{msg.receipt_handle}\n"
+                    f"Properties:{msg.properties}",
+                )
+                # ack_mq_message
+                ack_message(
+                    log_type=log_type,
+                    crawler=crawler,
+                    recv_msgs=recv_msgs,
+                    consumer=consumer,
+                )
+                # 解析 task_dict
+                task_dict = task_fun_mq(msg.message_body)["task_dict"]
+                AliyunLogger.logging(
+                    code="1000",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message="f调度任务:{task_dict}",
+                )
+                # 解析 rule_dict
+                rule_dict = task_fun_mq(msg.message_body)["rule_dict"]
+                AliyunLogger.logging(
+                    code="1000",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message=f"抓取规则:{rule_dict}\n",
+                )
+                # 解析 user_list
+                task_id = task_dict["id"]
+                select_user_sql = (
+                    f"""select * from crawler_user_v3 where task_id={task_id}"""
+                )
+                user_list = MysqlHelper.get_values(
+                    log_type, crawler, select_user_sql, env, action=""
+                )
+                AliyunLogger.logging(
+                    code="1003",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message="开始抓取"
+                )
+                AliyunLogger.logging(
+                    code="1000",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message="开始抓取经典福气旺——推荐",
+                )
+                main_process = TFuQiWangRecommend(
+                    platform=crawler,
+                    mode=log_type,
+                    rule_dict=rule_dict,
+                    user_list=user_list,
+                    env=env
+                )
+                main_process.get_video_list()
+                AliyunLogger.logging(
+                    code="1000",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message="完成抓取——经典福气旺",
+                )
+                AliyunLogger.logging(
+                    code="1004", platform=crawler, mode=log_type, env=env,message="结束一轮抓取"
+                )
+
+        except MQExceptionBase as err:
+            # Topic中没有消息可消费。
+            if err.type == "MessageNotExist":
+                AliyunLogger.logging(
+                    code="2000",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message=f"No new message! RequestId:{err.req_id}\n",
+                )
+                continue
+            AliyunLogger.logging(
+                code="2000",
+                platform=crawler,
+                mode=log_type,
+                env=env,
+                message=f"Consume Message Fail! Exception:{err}\n",
+            )
+            time.sleep(2)
+            continue
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
+    parser.add_argument("--log_type", type=str)  ## 添加参数,注明参数类型
+    parser.add_argument("--crawler")  ## 添加参数
+    parser.add_argument("--topic_name")  ## 添加参数
+    parser.add_argument("--group_id")  ## 添加参数
+    parser.add_argument("--env")  ## 添加参数
+    args = parser.parse_args()  ### 参数赋值,也可以通过终端赋值
+    main(
+        log_type=args.log_type,
+        crawler=args.crawler,
+        topic_name=args.topic_name,
+        group_id=args.group_id,
+        env=args.env,
+    )

+ 1 - 0
jingdianfuqiwang/jingdianfuqiwang_recommend/__init__.py

@@ -0,0 +1 @@
+from .jingdianfuqiwang_recommend_scheduling import TFuQiWangRecommend

+ 26 - 0
jingdianfuqiwang/jingdianfuqiwang_recommend/jingdianfuqiwang_dev.py

@@ -0,0 +1,26 @@
+import json
+import requests
+from common import tunnel_proxies
+
+url = "https://jdfqw.wentingyou.cn/index.php/v111/index/index"
+obj = {"cid":"","page":1,"is_ads":1}
+w = json.dumps(obj)
+# params = {}
+headers = {
+  'Host': 'jdfqw.wentingyou.cn',
+  'chatkey': 'wx79ef316d416e5da1',
+  'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.5(0x13080510)XWEB/1100',
+  'content-type': 'application/x-www-form-urlencoded',
+  'xweb_xhr': '1',
+  'vision': '1.1.0',
+  'token': '',
+  'accept': '*/*',
+  'referer': 'https://servicewechat.com/wx79ef316d416e5da1/1/page-frame.html',
+  'accept-language': 'en-US,en;q=0.9'
+}
+
+
+response = requests.request("GET", url, headers=headers, params={"parameter": w}, proxies=tunnel_proxies())
+print(response.url)
+
+print(json.dumps(response.json(), ensure_ascii=False, indent=4))

+ 159 - 0
jingdianfuqiwang/jingdianfuqiwang_recommend/jingdianfuqiwang_recommend_scheduling.py

@@ -0,0 +1,159 @@
+import os
+import json
+import random
+import sys
+import time
+import uuid
+
+import requests
+
+sys.path.append(os.getcwd())
+from common.video_item import VideoItem
+from common import PiaoQuanPipeline, AliyunLogger, tunnel_proxies
+from common.mq import MQ
+
+
+class TFuQiWangRecommend(object):
+    def __init__(self, platform, mode, rule_dict, user_list, env):
+        self.platform = platform
+        self.mode = mode
+        self.rule_dict = rule_dict
+        self.user_list = user_list
+        self.env = env
+        self.download_cnt = 0
+        self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
+        self.limit_flag = False
+
+    def get_video_list(self):
+        base_url = "https://jdfqw.wentingyou.cn/index.php/v111/index/index"
+        headers = {
+            'chatkey': 'wx79ef316d416e5da1',
+            'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.5(0x13080510)XWEB/1100',
+            'content-type': 'application/x-www-form-urlencoded',
+            'xweb_xhr': '1',
+            'vision': '1.1.0',
+            'token': '',
+            'accept': '*/*',
+            'referer': 'https://servicewechat.com/wx79ef316d416e5da1/1/page-frame.html',
+            'accept-language': 'en-US,en;q=0.9'
+        }
+        page_index = 1
+        while True:
+            time.sleep(random.randint(1, 10))
+            try:
+                if self.limit_flag:
+                    AliyunLogger.logging(
+                        code="2000",
+                        platform=self.platform,
+                        mode=self.mode,
+                        env=self.env,
+                        message="本轮已经抓取到足够的数据,自动退出\t{}".format(self.download_cnt),
+                    )
+                    return
+                else:
+                    obj = {
+                        "cid": "",
+                        "page": 1,
+                        "is_ads": 1
+                    }
+                    params = {
+                        "parameter": json.dumps(obj)
+                    }
+                    response = requests.get(
+                        url=base_url,
+                        headers=headers,
+                        params=params,
+                        proxies=tunnel_proxies(),
+                    )
+                    video_list = response.json()['data']['list']
+                    if video_list:
+                        for index, video_obj in enumerate(video_list, 1):
+                            try:
+                                if video_obj.get("title"):
+                                    AliyunLogger.logging(
+                                        code="1001",
+                                        platform=self.platform,
+                                        mode=self.mode,
+                                        env=self.env,
+                                        message="扫描到一条视频",
+                                        data=video_obj,
+                                    )
+                                    self.process_video_obj(video_obj)
+                            except Exception as e:
+                                AliyunLogger.logging(
+                                    code="3000",
+                                    platform=self.platform,
+                                    mode=self.mode,
+                                    env=self.env,
+                                    data=video_obj,
+                                    message="抓取第{}条的时候出现问题, 报错信息是{}".format(index, e),
+                                )
+                            page_index += 1
+                    else:
+                        AliyunLogger.logging(
+                            code="2000",
+                            platform=self.platform,
+                            mode=self.mode,
+                            env=self.env,
+                            message="已经抓完了,自动退出"
+                        )
+                        return
+            except Exception as e:
+                AliyunLogger.logging(
+                    code="3000",
+                    platform=self.platform,
+                    mode=self.mode,
+                    env=self.env,
+                    message="抓取第{}页时候出现错误, 报错信息是{}".format(i + 1, e),
+                )
+
+    def process_video_obj(self, video_obj):
+        trace_id = self.platform + str(uuid.uuid1())
+        our_user = random.choice(self.user_list)
+        item = VideoItem()
+        item.add_video_info("user_id", our_user["uid"])
+        item.add_video_info("user_name", our_user["nick_name"])
+        item.add_video_info("video_id", video_obj["nid"])
+        item.add_video_info("video_title", video_obj["title"])
+        item.add_video_info("publish_time_stamp", int(video_obj['update_time']))
+        item.add_video_info("video_url", video_obj["video_url"])
+        item.add_video_info("cover_url", video_obj["video_cover"])
+        item.add_video_info("out_video_id", video_obj["nid"])
+        item.add_video_info("platform", self.platform)
+        item.add_video_info("strategy", self.mode)
+        item.add_video_info("session", "{}-{}".format(self.platform, int(time.time())))
+        mq_obj = item.produce_item()
+        pipeline = PiaoQuanPipeline(
+            platform=self.platform,
+            mode=self.mode,
+            rule_dict=self.rule_dict,
+            env=self.env,
+            item=mq_obj,
+            trace_id=trace_id,
+        )
+        if pipeline.process_item():
+            self.download_cnt += 1
+            self.mq.send_msg(mq_obj)
+            AliyunLogger.logging(
+                code="1002",
+                platform=self.platform,
+                mode=self.mode,
+                env=self.env,
+                message="成功发送至 ETL",
+                data=mq_obj,
+            )
+            if self.download_cnt >= int(
+                self.rule_dict.get("videos_cnt", {}).get("min", 200)
+            ):
+                self.limit_flag = True
+
+
+if __name__ == '__main__':
+    S = TFuQiWangRecommend(
+        platform="jingdianfuqiwang",
+        mode="recommend",
+        env="dev",
+        rule_dict={},
+        user_list=[{'nick_name': "Ivring", 'uid': "1997"}, {'nick_name': "paul", 'uid': "1998"}]
+    )
+    S.get_video_list()

+ 25 - 0
jingdianfuqiwang/jingdianfuqiwang_recommend/test3.py

@@ -0,0 +1,25 @@
+import requests
+
+url = "https://music-video-bos.cdn.bcebos.com/4d78865df946c655b74dfa71bb09f265/65683a1a/video/20230816/a8b22de9c0d2332fde0dae480780f426_1.mp4"
+
+payload = {}
+headers = {
+  'Host': 'music-video-bos.cdn.bcebos.com',
+  'sec-ch-ua': '"Google Chrome";v="119", "Chromium";v="119", "Not?A_Brand";v="24"',
+  'sec-ch-ua-mobile': '?0',
+  'sec-ch-ua-platform': '"macOS"',
+  'upgrade-insecure-requests': '1',
+  'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
+  'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
+  'sec-fetch-site': 'none',
+  'sec-fetch-mode': 'navigate',
+  'sec-fetch-user': '?1',
+  'sec-fetch-dest': 'document',
+'referer': 'https://servicewechat.com/',
+  'accept-language': 'en,zh-CN;q=0.9,zh;q=0.8',
+}
+
+response = requests.request("GET", url, headers=headers, data=payload)
+
+print(response.text)
+

+ 2 - 0
kanyikan/kanyikan_main/run_kykoffline_recommend.py

@@ -27,6 +27,8 @@ def run(args1, args2, args3, args4, args5):
 class ZFQZMain:
     @classmethod
     def zhufuquanzi_main(cls, log_type, crawler, topic_name, group_id, env):
+        topic_name="kykoffline_recommend_prod"
+        group_id="kykoffline_recommend_prod"
         consumer = get_consumer(topic_name, group_id)
         # 长轮询表示如果Topic没有消息,则客户端请求会在服务端挂起3秒,3秒内如果有消息可以消费则立即返回响应。
         # 长轮询时间3秒(最多可设置为30秒)。

File diff suppressed because it is too large
+ 0 - 0
kanyikan/kanyikan_recommend/kanyikan/chlsfiles/charles202311231411.txt


+ 1 - 1
kanyikan/kanyikan_recommend/kanyikan_recommend_feed.py

@@ -22,7 +22,7 @@ proxies = {"http": None, "https": None}
 
 
 class KanyikanRecommend:
-    platform = "看一看"
+    platform = "看一看-feed流"
     strategy = "feed流"
 
     @classmethod

+ 1 - 1
kanyikan/kanyikan_recommend/kanyikan_recommend_offline.py

@@ -30,7 +30,7 @@ class KanyikanRecommend:
         if env == "dev":
             chromedriverExecutable = "/Users/tzld/Downloads/chromedriver_v111/chromedriver"
         else:
-            chromedriverExecutable = "/Users/crawler/Downloads/chromedriver_v111/chromedriver"
+            chromedriverExecutable = "/Users/piaoquan/Downloads/chromedriver"
 
         Common.logger(log_type, crawler).info("启动微信")
         Common.logging(log_type, crawler, env, '启动微信')

+ 1 - 1
kanyikan/kanyikan_recommend/kanyikan_recommend_plus.py

@@ -19,7 +19,7 @@ proxies = {"http": None, "https": None}
 
 
 class KanyikanRecommend:
-    platform = "看一看"
+    platform = "看一看-plus"
     strategy = "随机数据抓取"
 
     @classmethod

+ 1 - 1
kanyikan/kanyikan_recommend/kanyikan_recommend_video_id.py

@@ -22,7 +22,7 @@ proxies = {"http": None, "https": None}
 
 
 class KanyikanViodeRecommend:
-    platform = "看一看"
+    platform = "看一看-feed流"
     strategy = "video_id-feed流"
 
 

+ 6 - 0
main/process_mq.sh

@@ -74,6 +74,12 @@ elif [ ${crawler} = "xngrule" ] && [ ${log_type} = "recommend" ];then
   python=python3
   log_path=${piaoquan_crawler_dir}main/main_logs/process-mq-$(date +%Y-%m-%d).log
 
+elif [ ${crawler} = "zfqz" ] && [ ${log_type} = "recommend" ];then
+  piaoquan_crawler_dir=/Users/tzld/Desktop/piaoquan_crawler/
+  profile_path=/.base_profile
+  python=python3
+  log_path=${piaoquan_crawler_dir}main/main_logs/process-mq-$(date +%Y-%m-%d).log
+
 elif [ ${crawler} = "xnguser" ] && [ ${log_type} = "recommend" ];then
   piaoquan_crawler_dir=/Users/tzld/Desktop/piaoquan_crawler/
   profile_path=/.base_profile

+ 93 - 34
main/process_offline_new.sh → main/process_offline(old).sh

@@ -8,7 +8,7 @@ if [ ${env} = "dev" ];then
   node_path=/opt/homebrew/bin/node
   log_path=${piaoquan_crawler_dir}main/main_logs/process-$(date +%Y-%m-%d).log
 else
-  piaoquan_crawler_dir=//Users/tzld/Desktop/piaoquan_crawler/
+  piaoquan_crawler_dir=/Users/piaoquan/Desktop/piaoquan_crawler/
   profile_path=./base_profile
   node_path=/usr/local/bin/node
   log_path=${piaoquan_crawler_dir}main/main_logs/process-$(date +%Y-%m-%d).log
@@ -29,8 +29,48 @@ else
   echo "$(date "+%Y-%m-%d %H:%M:%S") Appium 进程状态正常" >> ${log_path}
 fi
 
+# 海豚祝福
+#if [[ "$time" > "00:00:00"  &&  "$time" < "00:59:59" || "$time" > "12:00:00"  &&  "$time" < "12:59:59" ]];then
+#  echo "$(date "+%Y-%m-%d %H:%M:%S") 开始启动 海豚祝福 爬虫脚本任务" >> ${log_path}
+#  ps aux | grep run_zhongmiaoyinxin | grep -v grep | awk '{print $2}' | xargs kill -9
+#  ps aux | grep run_ganggangdouchuan | grep -v grep | awk '{print $2}' | xargs kill -9
+#  ps aux | grep run_jixiangxingfu | grep -v grep | awk '{print $2}' | xargs kill -9
+#  ps -ef | grep "run_htzf_recommend.py" | grep -v "grep"
+#  if [ "$?" -eq 1 ];then
+#    echo "$(date "+%Y-%m-%d %H:%M:%S") 海豚祝福小程序爬虫, 异常停止, 正在重启!" >> ${log_path}
+#    adb forward --remove-all
+#    cd ${piaoquan_crawler_dir}
+#    nohup python3 -u haitunzhufu/haitunzhufu_main/run_htzf_recommend.py --log_type="recommend" --crawler="haitunzhufu" --env=${env} >> haitunzhufu/logs/nohup-recommend.log 2>&1 &
+#    echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
+#  else
+#    echo "$(date "+%Y-%m-%d %H:%M:%S") 海豚祝福小程序爬虫, 进程状态正常" >> ${log_path}
+#  fi
+#else
+#  echo "$(date "+%Y-%m-%d %H:%M:%S") 海豚祝福 爬虫脚本任务结束" >> ${log_path}
+#fi
+#
+## 刚刚都传
+#if [[ "$time" > "01:00:00"  &&  "$time" < "01:59:59" || "$time" > "13:00:00"  &&  "$time" < "13:59:59" ]];then
+#  echo "$(date "+%Y-%m-%d %H:%M:%S") 开始启动 刚刚都传 爬虫脚本任务" >> ${log_path}
+#  ps aux | grep run_htzf | grep -v grep | awk '{print $2}' | xargs kill -9
+#  ps aux | grep run_zhongmiaoyinxin | grep -v grep | awk '{print $2}' | xargs kill -9
+#  ps aux | grep run_jixiangxingfu | grep -v grep | awk '{print $2}' | xargs kill -9
+#  ps -ef | grep "run_ganggangdouchuan_recommend.py" | grep -v "grep"
+#  if [ "$?" -eq 1 ];then
+#    echo "$(date "+%Y-%m-%d %H:%M:%S") 刚刚都传小程序爬虫, 异常停止, 正在重启!" >> ${log_path}
+#    adb forward --remove-all
+#    cd ${piaoquan_crawler_dir}
+#    nohup python3 -u ganggangdouchuan/ganggangdouchuan_main/run_ganggangdouchuan_recommend.py --log_type="recommend" --crawler="ganggangdouchuan" --env=${env} >>ganggangdouchuan/logs/nohup-recommend.log 2>&1 &
+#    echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
+#  else
+#    echo "$(date "+%Y-%m-%d %H:%M:%S") 刚刚都传小程序爬虫, 进程状态正常" >> ${log_path}
+#  fi
+#else
+#  echo "$(date "+%Y-%m-%d %H:%M:%S") 刚刚都传小程序爬虫, 任务结束" >> ${log_path}
+#fi
+
 # 吉祥幸福
-if [[ "$time" > "00:00:00"  &&  "$time" < "02:59:59" || "$time" > "05:00:00"  &&  "$time" < "06:59:59" ]];then
+if [[ "$time" > "00:00:00"  &&  "$time" < "02:59:59" || "$time" > "12:00:00"  &&  "$time" < "14:59:59" ]];then
   echo "$(date "+%Y-%m-%d %H:%M:%S") 开始启动 吉祥幸福 爬虫脚本任务" >> ${log_path}
 #  ps aux | grep run_htzf | grep -v grep | awk '{print $2}' | xargs kill -9
 #  ps aux | grep run_zhongmiaoyinxin | grep -v grep | awk '{print $2}' | xargs kill -9
@@ -50,12 +90,33 @@ else
 fi
 
 
+# 众妙音信
+if [[ "$time" > "03:00:00"  &&  "$time" < "04:59:59" || "$time" > "15:00:00"  &&  "$time" < "16:59:59" ]];then
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 开始启动 众妙音信 爬虫脚本任务" >> ${log_path}
+#  ps aux | grep run_htzf | grep -v grep | awk '{print $2}' | xargs kill -9
+#  ps aux | grep run_ganggangdouchuan | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_jixiangxingfu | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps -ef | grep "run_zhongmiaoyinxin_recommend.py" | grep -v "grep"
+  if [ "$?" -eq 1 ];then
+    echo "$(date "+%Y-%m-%d %H:%M:%S") 众妙音信小程序爬虫, 异常停止, 正在重启!" >> ${log_path}
+    adb forward --remove-all
+    cd ${piaoquan_crawler_dir}
+    nohup python3 -u zhongmiaoyinxin/zhongmiaoyinxin_main/run_zhongmiaoyinxin_recommend.py --log_type="recommend" --crawler="zhongmiaoyinxin" --env=${env} >>zhongmiaoyinxin/logs/nohup-recommend.log 2>&1 &
+    echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
+  else
+    echo "$(date "+%Y-%m-%d %H:%M:%S") 众妙音信小程序爬虫, 进程状态正常" >> ${log_path}
+  fi
+
+else
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 众妙音信 爬虫脚本任务结束" >> ${log_path}
+fi
+
 # 众妙音信-new
-if [[ "$time" > "03:00:00"  &&  "$time" < "04:59:59" || "$time" > "07:00:00"  &&  "$time" < "08:59:59" ]];then
+if [[ "$time" > "05:00:00"  &&  "$time" < "06:59:59" || "$time" > "17:00:00"  &&  "$time" < "18:59:59" ]];then
   echo "$(date "+%Y-%m-%d %H:%M:%S") 开始启动 众妙音信-new 爬虫脚本任务" >> ${log_path}
 #  ps aux | grep run_htzf | grep -v grep | awk '{print $2}' | xargs kill -9
 #  ps aux | grep run_ganggangdouchuan | grep -v grep | awk '{print $2}' | xargs kill -9
-  ps aux | grep run_jixiangxingfu | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_zhongmiaoyinxin | grep -v grep | awk '{print $2}' | xargs kill -9
   ps -ef | grep "run_zmyx_recommend.py" | grep -v "grep"
   if [ "$?" -eq 1 ];then
     echo "$(date "+%Y-%m-%d %H:%M:%S") 众妙音信-new小程序爬虫, 异常停止, 正在重启!" >> ${log_path}
@@ -71,56 +132,54 @@ else
   echo "$(date "+%Y-%m-%d %H:%M:%S") 众妙音信 爬虫脚本任务结束" >> ${log_path}
 fi
 
-## 小年糕-rule
-#if [[ "$time" > "09:00:00"  &&  "$time" < "10:59:59" || "$time" > "19:00:00"  &&  "$time" < "21:59:59" ]];then
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 开始启动 小年糕-rule 爬虫脚本任务" >> ${log_path}
-##  ps aux | grep run_htzf | grep -v grep | awk '{print $2}' | xargs kill -9
-##  ps aux | grep run_ganggangdouchuan | grep -v grep | awk '{print $2}' | xargs kill -9
-#  ps aux | grep run_zmyx | grep -v grep | awk '{print $2}' | xargs kill -9
-#  ps -ef | grep "run_xngrule_recommend.py" | grep -v "grep"
-#  if [ "$?" -eq 1 ];then
-#    echo "$(date "+%Y-%m-%d %H:%M:%S") 小年糕-rule 小程序爬虫, 异常停止, 正在重启!" >> ${log_path}
-#    adb forward --remove-all
-#    cd ${piaoquan_crawler_dir}
-#    nohup python3 -u xiaoniangaoplus/xiaoniangaoplus_main/run_xngrule_recommend.py --log_type="recommend" --crawler="xiaoniangaoplus" --env=${env} >>xiaoniangaoplus/logs/nohup-recommend.log 2>&1 &
-#    echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
-#  else
-#    echo "$(date "+%Y-%m-%d %H:%M:%S") 小年糕-rule 程序爬虫, 进程状态正常" >> ${log_path}
-#  fi
-#
-#else
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 小年糕-rule 爬虫脚本任务结束" >> ${log_path}
-#fi
+# 小年糕-rule
+if [[ "$time" > "07:00:00"  &&  "$time" < "08:59:59" || "$time" > "19:00:00"  &&  "$time" < "21:59:59" ]];then
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 开始启动 小年糕-rule 爬虫脚本任务" >> ${log_path}
+#  ps aux | grep run_htzf | grep -v grep | awk '{print $2}' | xargs kill -9
+#  ps aux | grep run_ganggangdouchuan | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_zmyx | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps -ef | grep "run_xngrule_recommend.py" | grep -v "grep"
+  if [ "$?" -eq 1 ];then
+    echo "$(date "+%Y-%m-%d %H:%M:%S") 小年糕-rule 小程序爬虫, 异常停止, 正在重启!" >> ${log_path}
+    adb forward --remove-all
+    cd ${piaoquan_crawler_dir}
+    nohup python3 -u xiaoniangaoplus/xiaoniangaoplus_main/run_xngrule_recommend.py --log_type="recommend" --crawler="xiaoniangaoplus" --env=${env} >>xiaoniangaoplus/logs/nohup-recommend.log 2>&1 &
+    echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
+  else
+    echo "$(date "+%Y-%m-%d %H:%M:%S") 小年糕-rule 程序爬虫, 进程状态正常" >> ${log_path}
+  fi
 
+else
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 小年糕-rule 爬虫脚本任务结束" >> ${log_path}
+fi
 
-## 小年糕+
-#if [[ "$time" > "07:00:00"  &&  "$time" < "08:59:59" || "$time" > "19:00:00"  &&  "$time" < "21:59:59" ]];then
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 开始启动 小年糕+ 爬虫脚本任务" >> ${log_path}
+## 看一看-线下offline
+#if [[ "$time" > "10:00:00"  &&  "$time" < "11:59:59" || "$time" > "22:00:00"  &&  "$time" < "23:59:59" ]];then
+#  echo "$(date "+%Y-%m-%d %H:%M:%S") 开始启动 看一看-线下offline 爬虫脚本任务" >> ${log_path}
 ##  ps aux | grep run_htzf | grep -v grep | awk '{print $2}' | xargs kill -9
 ##  ps aux | grep run_ganggangdouchuan | grep -v grep | awk '{print $2}' | xargs kill -9
 #  ps aux | grep run_xngrule | grep -v grep | awk '{print $2}' | xargs kill -9
-#  ps -ef | grep "run_xngplus_recommend.py" | grep -v "grep"
+#  ps -ef | grep "run_kykoffline_recommend.py" | grep -v "grep"
 #  if [ "$?" -eq 1 ];then
-#    echo "$(date "+%Y-%m-%d %H:%M:%S") 小年糕+ 小程序爬虫, 异常停止, 正在重启!" >> ${log_path}
+#    echo "$(date "+%Y-%m-%d %H:%M:%S") 看一看-线下offline 小程序爬虫, 异常停止, 正在重启!" >> ${log_path}
 #    adb forward --remove-all
 #    cd ${piaoquan_crawler_dir}
-#    nohup python3 -u xiaoniangaoplus/xiaoniangaoplus_main/run_xngplus_recommend.py --log_type="recommend" --crawler="xiaoniangaoplus" --env=${env} >>xiaoniangaoplus/logs/nohup-recommend.log 2>&1 &
+#    nohup python3 -u kanyikan/kanyikan_main/run_kykoffline_recommend.py --log_type="recommend" --crawler="kanyikan" --env=${env} >>kanyikan/logs/nohup-recommend.log 2>&1 &
 #    echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
 #  else
-#    echo "$(date "+%Y-%m-%d %H:%M:%S") 小年糕+ 程序爬虫, 进程状态正常" >> ${log_path}
+#    echo "$(date "+%Y-%m-%d %H:%M:%S") 看一看-线下offline 程序爬虫, 进程状态正常" >> ${log_path}
 #  fi
 #
 #else
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 小年糕+ 爬虫脚本任务结束" >> ${log_path}
+#  echo "$(date "+%Y-%m-%d %H:%M:%S") 看一看-线下offline 爬虫脚本任务结束" >> ${log_path}
 #fi
 
-
 # 祝福圈子
 if [[ "$time" > "10:00:00"  &&  "$time" < "11:59:59" || "$time" > "22:00:00"  &&  "$time" < "23:59:59" ]];then
   echo "$(date "+%Y-%m-%d %H:%M:%S") 开始启动 祝福圈子 爬虫脚本任务" >> ${log_path}
 #  ps aux | grep run_htzf | grep -v grep | awk '{print $2}' | xargs kill -9
 #  ps aux | grep run_ganggangdouchuan | grep -v grep | awk '{print $2}' | xargs kill -9
-  ps aux | grep run_xngplus | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_xngrule | grep -v grep | awk '{print $2}' | xargs kill -9
   ps -ef | grep "run_zfqz_recommend.py" | grep -v "grep"
   if [ "$?" -eq 1 ];then
     echo "$(date "+%Y-%m-%d %H:%M:%S") 祝福圈子 小程序爬虫, 异常停止, 正在重启!" >> ${log_path}

+ 106 - 88
main/process_offline.sh

@@ -29,52 +29,16 @@ else
   echo "$(date "+%Y-%m-%d %H:%M:%S") Appium 进程状态正常" >> ${log_path}
 fi
 
-# 海豚祝福
-#if [[ "$time" > "00:00:00"  &&  "$time" < "00:59:59" || "$time" > "12:00:00"  &&  "$time" < "12:59:59" ]];then
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 开始启动 海豚祝福 爬虫脚本任务" >> ${log_path}
-#  ps aux | grep run_zhongmiaoyinxin | grep -v grep | awk '{print $2}' | xargs kill -9
-#  ps aux | grep run_ganggangdouchuan | grep -v grep | awk '{print $2}' | xargs kill -9
-#  ps aux | grep run_jixiangxingfu | grep -v grep | awk '{print $2}' | xargs kill -9
-#  ps -ef | grep "run_htzf_recommend.py" | grep -v "grep"
-#  if [ "$?" -eq 1 ];then
-#    echo "$(date "+%Y-%m-%d %H:%M:%S") 海豚祝福小程序爬虫, 异常停止, 正在重启!" >> ${log_path}
-#    adb forward --remove-all
-#    cd ${piaoquan_crawler_dir}
-#    nohup python3 -u haitunzhufu/haitunzhufu_main/run_htzf_recommend.py --log_type="recommend" --crawler="haitunzhufu" --env=${env} >> haitunzhufu/logs/nohup-recommend.log 2>&1 &
-#    echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
-#  else
-#    echo "$(date "+%Y-%m-%d %H:%M:%S") 海豚祝福小程序爬虫, 进程状态正常" >> ${log_path}
-#  fi
-#else
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 海豚祝福 爬虫脚本任务结束" >> ${log_path}
-#fi
-#
-## 刚刚都传
-#if [[ "$time" > "01:00:00"  &&  "$time" < "01:59:59" || "$time" > "13:00:00"  &&  "$time" < "13:59:59" ]];then
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 开始启动 刚刚都传 爬虫脚本任务" >> ${log_path}
-#  ps aux | grep run_htzf | grep -v grep | awk '{print $2}' | xargs kill -9
-#  ps aux | grep run_zhongmiaoyinxin | grep -v grep | awk '{print $2}' | xargs kill -9
-#  ps aux | grep run_jixiangxingfu | grep -v grep | awk '{print $2}' | xargs kill -9
-#  ps -ef | grep "run_ganggangdouchuan_recommend.py" | grep -v "grep"
-#  if [ "$?" -eq 1 ];then
-#    echo "$(date "+%Y-%m-%d %H:%M:%S") 刚刚都传小程序爬虫, 异常停止, 正在重启!" >> ${log_path}
-#    adb forward --remove-all
-#    cd ${piaoquan_crawler_dir}
-#    nohup python3 -u ganggangdouchuan/ganggangdouchuan_main/run_ganggangdouchuan_recommend.py --log_type="recommend" --crawler="ganggangdouchuan" --env=${env} >>ganggangdouchuan/logs/nohup-recommend.log 2>&1 &
-#    echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
-#  else
-#    echo "$(date "+%Y-%m-%d %H:%M:%S") 刚刚都传小程序爬虫, 进程状态正常" >> ${log_path}
-#  fi
-#else
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 刚刚都传小程序爬虫, 任务结束" >> ${log_path}
-#fi
-
 # 吉祥幸福
-if [[ "$time" > "00:00:00"  &&  "$time" < "02:59:59" || "$time" > "12:00:00"  &&  "$time" < "14:59:59" ]];then
+if [[ "$time" > "00:00:59"  &&  "$time" < "01:59:59" || "$time" > "05:00:00"  &&  "$time" < "05:59:59" ]];then
   echo "$(date "+%Y-%m-%d %H:%M:%S") 开始启动 吉祥幸福 爬虫脚本任务" >> ${log_path}
 #  ps aux | grep run_htzf | grep -v grep | awk '{print $2}' | xargs kill -9
 #  ps aux | grep run_zhongmiaoyinxin | grep -v grep | awk '{print $2}' | xargs kill -9
-  ps aux | grep run_kykoffline | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_ppqsift | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_zfqz | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_zmyx | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_xngplus | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_xngrule | grep -v grep | awk '{print $2}' | xargs kill -9
   ps -ef | grep "run_jixiangxingfu_recommend.py" | grep -v "grep"
   if [ "$?" -eq 1 ];then
     echo "$(date "+%Y-%m-%d %H:%M:%S") 吉祥幸福爬虫, 异常停止, 正在重启!" >> ${log_path}
@@ -90,33 +54,15 @@ else
 fi
 
 
-# 众妙音信
-if [[ "$time" > "03:00:00"  &&  "$time" < "04:59:59" || "$time" > "15:00:00"  &&  "$time" < "16:59:59" ]];then
-  echo "$(date "+%Y-%m-%d %H:%M:%S") 开始启动 众妙音信 爬虫脚本任务" >> ${log_path}
-#  ps aux | grep run_htzf | grep -v grep | awk '{print $2}' | xargs kill -9
-#  ps aux | grep run_ganggangdouchuan | grep -v grep | awk '{print $2}' | xargs kill -9
-  ps aux | grep run_jixiangxingfu | grep -v grep | awk '{print $2}' | xargs kill -9
-  ps -ef | grep "run_zhongmiaoyinxin_recommend.py" | grep -v "grep"
-  if [ "$?" -eq 1 ];then
-    echo "$(date "+%Y-%m-%d %H:%M:%S") 众妙音信小程序爬虫, 异常停止, 正在重启!" >> ${log_path}
-    adb forward --remove-all
-    cd ${piaoquan_crawler_dir}
-    nohup python3 -u zhongmiaoyinxin/zhongmiaoyinxin_main/run_zhongmiaoyinxin_recommend.py --log_type="recommend" --crawler="zhongmiaoyinxin" --env=${env} >>zhongmiaoyinxin/logs/nohup-recommend.log 2>&1 &
-    echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
-  else
-    echo "$(date "+%Y-%m-%d %H:%M:%S") 众妙音信小程序爬虫, 进程状态正常" >> ${log_path}
-  fi
-
-else
-  echo "$(date "+%Y-%m-%d %H:%M:%S") 众妙音信 爬虫脚本任务结束" >> ${log_path}
-fi
-
 # 众妙音信-new
-if [[ "$time" > "05:00:00"  &&  "$time" < "06:59:59" || "$time" > "17:00:00"  &&  "$time" < "18:59:59" ]];then
+if [[ "$time" > "02:00:00"  &&  "$time" < "03:59:59" || "$time" > "06:00:00"  &&  "$time" < "06:59:59" ]];then
   echo "$(date "+%Y-%m-%d %H:%M:%S") 开始启动 众妙音信-new 爬虫脚本任务" >> ${log_path}
 #  ps aux | grep run_htzf | grep -v grep | awk '{print $2}' | xargs kill -9
 #  ps aux | grep run_ganggangdouchuan | grep -v grep | awk '{print $2}' | xargs kill -9
-  ps aux | grep run_zhongmiaoyinxin | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_jixiangxingfu | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_zfqz | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_xngplus | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_xngrule | grep -v grep | awk '{print $2}' | xargs kill -9
   ps -ef | grep "run_zmyx_recommend.py" | grep -v "grep"
   if [ "$?" -eq 1 ];then
     echo "$(date "+%Y-%m-%d %H:%M:%S") 众妙音信-new小程序爬虫, 异常停止, 正在重启!" >> ${log_path}
@@ -133,11 +79,12 @@ else
 fi
 
 # 小年糕-rule
-if [[ "$time" > "07:00:00"  &&  "$time" < "08:59:59" || "$time" > "19:00:00"  &&  "$time" < "21:59:59" ]];then
+if [[ "$time" > "21:00:00"  &&  "$time" < "21:59:59" ]];then
   echo "$(date "+%Y-%m-%d %H:%M:%S") 开始启动 小年糕-rule 爬虫脚本任务" >> ${log_path}
 #  ps aux | grep run_htzf | grep -v grep | awk '{print $2}' | xargs kill -9
 #  ps aux | grep run_ganggangdouchuan | grep -v grep | awk '{print $2}' | xargs kill -9
   ps aux | grep run_zmyx | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_xngplus | grep -v grep | awk '{print $2}' | xargs kill -9
   ps -ef | grep "run_xngrule_recommend.py" | grep -v "grep"
   if [ "$?" -eq 1 ];then
     echo "$(date "+%Y-%m-%d %H:%M:%S") 小年糕-rule 小程序爬虫, 异常停止, 正在重启!" >> ${log_path}
@@ -153,39 +100,48 @@ else
   echo "$(date "+%Y-%m-%d %H:%M:%S") 小年糕-rule 爬虫脚本任务结束" >> ${log_path}
 fi
 
-## 看一看-线下offline
-#if [[ "$time" > "10:00:00"  &&  "$time" < "11:59:59" || "$time" > "22:00:00"  &&  "$time" < "23:59:59" ]];then
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 开始启动 看一看-线下offline 爬虫脚本任务" >> ${log_path}
-##  ps aux | grep run_htzf | grep -v grep | awk '{print $2}' | xargs kill -9
-##  ps aux | grep run_ganggangdouchuan | grep -v grep | awk '{print $2}' | xargs kill -9
-#  ps aux | grep run_xngrule | grep -v grep | awk '{print $2}' | xargs kill -9
-#  ps -ef | grep "run_kykoffline_recommend.py" | grep -v "grep"
-#  if [ "$?" -eq 1 ];then
-#    echo "$(date "+%Y-%m-%d %H:%M:%S") 看一看-线下offline 小程序爬虫, 异常停止, 正在重启!" >> ${log_path}
-#    adb forward --remove-all
-#    cd ${piaoquan_crawler_dir}
-#    nohup python3 -u kanyikan/kanyikan_main/run_kykoffline_recommend.py --log_type="recommend" --crawler="kanyikan" --env=${env} >>kanyikan/logs/nohup-recommend.log 2>&1 &
-#    echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
-#  else
-#    echo "$(date "+%Y-%m-%d %H:%M:%S") 看一看-线下offline 程序爬虫, 进程状态正常" >> ${log_path}
-#  fi
-#
-#else
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 看一看-线下offline 爬虫脚本任务结束" >> ${log_path}
-#fi
+
+# 小年糕+
+if [[ "$time" > "10:00:00"  &&  "$time" < "11:59:59" || "$time" > "19:00:00"  &&  "$time" < "20:59:59" || "$time" > "13:00:00"  &&  "$time" < "14:59:59" || "$time" > "17:00:00"  &&  "$time" < "17:59:59" ]];then
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 开始启动 小年糕+ 爬虫脚本任务" >> ${log_path}
+#  ps aux | grep run_htzf | grep -v grep | awk '{print $2}' | xargs kill -9
+#  ps aux | grep run_ganggangdouchuan | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_xngrule | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_zfqz | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_zmyx | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_jixiangxingfu | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_ppq | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps -ef | grep "run_xngplus_recommend.py" | grep -v "grep"
+  if [ "$?" -eq 1 ];then
+    echo "$(date "+%Y-%m-%d %H:%M:%S") 小年糕+ 小程序爬虫, 异常停止, 正在重启!" >> ${log_path}
+    adb forward --remove-all
+    cd ${piaoquan_crawler_dir}
+    nohup python3.10 -u xiaoniangaoplus/xiaoniangaoplus_main/run_xngplus_recommend.py --log_type="recommend" --crawler="xiaoniangaoplus" --env=${env} >>xiaoniangaoplus/logs/nohup-recommend.log 2>&1 &
+    echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
+  else
+    echo "$(date "+%Y-%m-%d %H:%M:%S") 小年糕+ 程序爬虫, 进程状态正常" >> ${log_path}
+  fi
+
+else
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 小年糕+ 爬虫脚本任务结束" >> ${log_path}
+fi
+
 
 # 祝福圈子
-if [[ "$time" > "10:00:00"  &&  "$time" < "11:59:59" || "$time" > "22:00:00"  &&  "$time" < "23:59:59" ]];then
+if [[ "$time" > "07:00:00"  &&  "$time" < "08:59:59" || "$time" > "15:00:00"  &&  "$time" < "15:59:59" ]];then
   echo "$(date "+%Y-%m-%d %H:%M:%S") 开始启动 祝福圈子 爬虫脚本任务" >> ${log_path}
 #  ps aux | grep run_htzf | grep -v grep | awk '{print $2}' | xargs kill -9
 #  ps aux | grep run_ganggangdouchuan | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_xngplus | grep -v grep | awk '{print $2}' | xargs kill -9
   ps aux | grep run_xngrule | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_jixiangxingfu | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_zmyx | grep -v grep | awk '{print $2}' | xargs kill -9
   ps -ef | grep "run_zfqz_recommend.py" | grep -v "grep"
   if [ "$?" -eq 1 ];then
     echo "$(date "+%Y-%m-%d %H:%M:%S") 祝福圈子 小程序爬虫, 异常停止, 正在重启!" >> ${log_path}
     adb forward --remove-all
     cd ${piaoquan_crawler_dir}
-    nohup python3 -u zhufuquanzi/zhufuquanzi_main/run_zfqz_recommend.py --log_type="recommend" --crawler="zfqz" --env=${env} >>zhufuquanzi/logs/nohup-recommend.log 2>&1 &
+    nohup python3 -u zhufuquanzi/zhufuquanzi_main/run_zfqz_recommend.py --log_type="recommend" --crawler="zhufuquanzi" --env=${env} >>zhufuquanzi/logs/nohup-recommend.log 2>&1 &
     echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
   else
     echo "$(date "+%Y-%m-%d %H:%M:%S") 祝福圈子 程序爬虫, 进程状态正常" >> ${log_path}
@@ -195,6 +151,68 @@ else
   echo "$(date "+%Y-%m-%d %H:%M:%S") 祝福圈子 爬虫脚本任务结束" >> ${log_path}
 fi
 
+
+# 漂漂圈
+if [[ "$time" > "16:00:00"  &&  "$time" < "16:59:59" ]];then
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 开始启动 漂漂圈 爬虫脚本任务" >> ${log_path}
+#  ps aux | grep run_htzf | grep -v grep | awk '{print $2}' | xargs kill -9
+#  ps aux | grep run_ganggangdouchuan | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_xngplus | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_xngrule | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_jixiangxingfu | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_zmyx | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_zfqz | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps -ef | grep "run_ppq_recommend.py" | grep -v "grep"
+  if [ "$?" -eq 1 ];then
+    echo "$(date "+%Y-%m-%d %H:%M:%S") 漂漂圈 小程序爬虫, 异常停止, 正在重启!" >> ${log_path}
+    adb forward --remove-all
+    cd ${piaoquan_crawler_dir}
+    nohup python3 -u piaopiaoquan/piaopiaoquan_main/run_ppq_recommend.py --log_type="recommend" --crawler="piaopiaoquan" --env=${env} >>piaopiaoquan/logs/nohup-recommend.log 2>&1 &
+    echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
+  else
+    echo "$(date "+%Y-%m-%d %H:%M:%S") 漂漂圈 程序爬虫, 进程状态正常" >> ${log_path}
+  fi
+
+# 漂漂圈-精选
+if [[ "$time" > "22:00:00"  &&  "$time" < "23:59:59" ]];then
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 开始启动 漂漂圈精选 爬虫脚本任务" >> ${log_path}
+#  ps aux | grep run_htzf | grep -v grep | awk '{print $2}' | xargs kill -9
+#  ps aux | grep run_ganggangdouchuan | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_xngplus | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_xngrule | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_jixiangxingfu | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_zmyx | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_zfqz | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_ppq | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps -ef | grep "run_ppqsift_recommend.py" | grep -v "grep"
+  if [ "$?" -eq 1 ];then
+    echo "$(date "+%Y-%m-%d %H:%M:%S") 漂漂圈精选 小程序爬虫, 异常停止, 正在重启!" >> ${log_path}
+    adb forward --remove-all
+    cd ${piaoquan_crawler_dir}
+    nohup python3 -u piaopiaoquan/piaopiaoquan_main/run_ppqsift_recommend.py --log_type="recommend" --crawler="piaopiaoquan" --env=${env} >>piaopiaoquan/logs/nohup-recommend.log 2>&1 &
+    echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
+  else
+    echo "$(date "+%Y-%m-%d %H:%M:%S") 漂漂圈精选 程序爬虫, 进程状态正常" >> ${log_path}
+  fi
+
+else
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 漂漂圈精选 爬虫脚本任务结束" >> ${log_path}
+fi
+
+
+
+
+#if [[ "$time" > "13:00:00"  &&  "$time" < "13:59:59" || "$time" > "16:00:00"  &&  "$time" < "18:59:59" || "$time" > "22:00:00"  &&  "$time" < "23:59:59" ]];then
+#  echo "$(date "+%Y-%m-%d %H:%M:%S") 爬虫脚本任务结束" >> ${log_path}
+##  ps aux | grep run_htzf | grep -v grep | awk '{print $2}' | xargs kill -9
+##  ps aux | grep run_ganggangdouchuan | grep -v grep | awk '{print $2}' | xargs kill -9
+#  ps aux | grep run_xngplus | grep -v grep | awk '{print $2}' | xargs kill -9
+#  ps aux | grep run_zfqz | grep -v grep | awk '{print $2}' | xargs kill -9
+#  ps aux | grep run_xngrule | grep -v grep | awk '{print $2}' | xargs kill -9
+#  ps aux | grep run_jixiangxingfu | grep -v grep | awk '{print $2}' | xargs kill -9
+#  ps aux | grep run_zmyx | grep -v grep | awk '{print $2}' | xargs kill -9
+#fi
+
 # 删除日志
 echo "$(date "+%Y-%m-%d %H:%M:%S") 开始清理 10 天前的日志文件" >> ${log_path}
 find ${piaoquan_crawler_dir}main/main_logs/ -mtime +10 -name "*.log" -exec rm -rf {} \;

+ 0 - 0
meitiansongzhufu/__init__.py


+ 0 - 0
meitiansongzhufu/meitiansongzhufu_main/__init__.py


+ 153 - 0
meitiansongzhufu/meitiansongzhufu_main/run_mtszf_recommend.py

@@ -0,0 +1,153 @@
+import argparse
+from mq_http_sdk.mq_client import *
+from mq_http_sdk.mq_consumer import *
+from mq_http_sdk.mq_exception import MQExceptionBase
+
+sys.path.append(os.getcwd())
+from common.public import task_fun_mq, get_consumer, ack_message
+from common.scheduling_db import MysqlHelper
+from common import AliyunLogger
+from meitiansongzhufu.meitiansongzhufu_recommend import SongZhuFuRecommend
+
+
+def main(log_type, crawler, topic_name, group_id, env):
+    consumer = get_consumer(topic_name, group_id)
+    # 长轮询表示如果Topic没有消息,则客户端请求会在服务端挂起3秒,3秒内如果有消息可以消费则立即返回响应。
+    # 长轮询时间3秒(最多可设置为30秒)。
+    wait_seconds = 30
+    # 一次最多消费3条(最多可设置为16条)。
+    batch = 1
+    AliyunLogger.logging(
+        code="1000",
+        platform=crawler,
+        mode=log_type,
+        env=env,
+        message=f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
+        f"WaitSeconds:{wait_seconds}\n"
+        f"TopicName:{topic_name}\n"
+        f"MQConsumer:{group_id}",
+    )
+    while True:
+        try:
+            # 长轮询消费消息。
+            recv_msgs = consumer.consume_message(batch, wait_seconds)
+            for msg in recv_msgs:
+                AliyunLogger.logging(
+                    code="1000",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message=f"Receive\n"
+                    f"MessageId:{msg.message_id}\n"
+                    f"MessageBodyMD5:{msg.message_body_md5}\n"
+                    f"MessageTag:{msg.message_tag}\n"
+                    f"ConsumedTimes:{msg.consumed_times}\n"
+                    f"PublishTime:{msg.publish_time}\n"
+                    f"Body:{msg.message_body}\n"
+                    f"NextConsumeTime:{msg.next_consume_time}\n"
+                    f"ReceiptHandle:{msg.receipt_handle}\n"
+                    f"Properties:{msg.properties}",
+                )
+                # ack_mq_message
+                ack_message(
+                    log_type=log_type,
+                    crawler=crawler,
+                    recv_msgs=recv_msgs,
+                    consumer=consumer,
+                )
+                # 解析 task_dict
+                task_dict = task_fun_mq(msg.message_body)["task_dict"]
+                AliyunLogger.logging(
+                    code="1000",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message="f调度任务:{task_dict}",
+                )
+                # 解析 rule_dict
+                rule_dict = task_fun_mq(msg.message_body)["rule_dict"]
+                AliyunLogger.logging(
+                    code="1000",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message=f"抓取规则:{rule_dict}\n",
+                )
+                # 解析 user_list
+                task_id = task_dict["id"]
+                select_user_sql = (
+                    f"""select * from crawler_user_v3 where task_id={task_id}"""
+                )
+                user_list = MysqlHelper.get_values(
+                    log_type, crawler, select_user_sql, env, action=""
+                )
+                AliyunLogger.logging(
+                    code="1003",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message="开始抓取"
+                )
+                AliyunLogger.logging(
+                    code="1000",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message="开始抓取每天送祝福——推荐",
+                )
+                main_process = SongZhuFuRecommend(
+                    platform=crawler,
+                    mode=log_type,
+                    rule_dict=rule_dict,
+                    user_list=user_list,
+                    env=env
+                )
+                main_process.get_video_list()
+                AliyunLogger.logging(
+                    code="1000",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message="完成抓取——每天送祝福",
+                )
+                AliyunLogger.logging(
+                    code="1004", platform=crawler, mode=log_type, env=env,message="结束一轮抓取"
+                )
+
+        except MQExceptionBase as err:
+            # Topic中没有消息可消费。
+            if err.type == "MessageNotExist":
+                AliyunLogger.logging(
+                    code="2000",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message=f"No new message! RequestId:{err.req_id}\n",
+                )
+                continue
+            AliyunLogger.logging(
+                code="2000",
+                platform=crawler,
+                mode=log_type,
+                env=env,
+                message=f"Consume Message Fail! Exception:{err}\n",
+            )
+            time.sleep(2)
+            continue
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
+    parser.add_argument("--log_type", type=str)  ## 添加参数,注明参数类型
+    parser.add_argument("--crawler")  ## 添加参数
+    parser.add_argument("--topic_name")  ## 添加参数
+    parser.add_argument("--group_id")  ## 添加参数
+    parser.add_argument("--env")  ## 添加参数
+    args = parser.parse_args()  ### 参数赋值,也可以通过终端赋值
+    main(
+        log_type=args.log_type,
+        crawler=args.crawler,
+        topic_name=args.topic_name,
+        group_id=args.group_id,
+        env=args.env,
+    )

+ 1 - 0
meitiansongzhufu/meitiansongzhufu_recommend/__init__.py

@@ -0,0 +1 @@
+from .meitiansongzhufu_recommend import SongZhuFuRecommend

+ 70 - 0
meitiansongzhufu/meitiansongzhufu_recommend/main_request.py

@@ -0,0 +1,70 @@
+import json
+
+import requests
+
+
+def get_video_detail(vid, userad, cate):
+    # URL and payload
+    url = "https://gkvxwq2023.we-media.cn/app/index.php"
+    payload = {
+        "i": "1",
+        "t": "0",
+        "m": "jyt_txvideo",
+        "v": "1.0.0",
+        "from": "wxapp",
+        "c": "entry",
+        "a": "wxapp",
+        "do": "videodetail",
+        "vid": vid,
+        "userad": userad,
+        "cate": cate,
+    }
+    headers = {
+        "Host": "gkvxwq2023.we-media.cn",
+        "xweb_xhr": "1",
+        "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.5(0x13080510)XWEB/1100",
+        "content-type": "application/x-www-form-urlencoded",
+        "accept": "*/*",
+        "referer": "https://servicewechat.com/wx49f06df06becc7fa/2/page-frame.html",
+        "accept-language": "en-US,en;q=0.9",
+    }
+    response = requests.request("POST", url, headers=headers, data=payload)
+    print(json.dumps(response.json(), ensure_ascii=False, indent=4))
+
+
+def get_video_list():
+    url = "https://gkvxwq2023.we-media.cn/app/index.php"
+    payload = {
+        "i": "1",
+        "t": "0",
+        "m": "jyt_txvideo",
+        "v": "1.0.0",
+        "from": "wxapp",
+        "c": "entry",
+        "a": "wxapp",
+        "do": "videotui",
+        "city": "",
+        "category": "494",
+    }
+    headers = {
+        "Host": "gkvxwq2023.we-media.cn",
+        "xweb_xhr": "1",
+        "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.5(0x13080510)XWEB/1100",
+        "content-type": "application/x-www-form-urlencoded",
+        "accept": "*/*",
+        "referer": "https://servicewechat.com/wx49f06df06becc7fa/2/page-frame.html",
+        "accept-language": "en-US,en;q=0.9",
+    }
+    response = requests.post(url, data=payload, headers=headers)
+    return response.json()
+
+
+
+if __name__ == "__main__":
+    result = get_video_list()
+    # print(json.dumps(result, ensure_ascii=False, indent=4))
+    for video_obj in result["data"]:
+        vid = video_obj['vid']
+        cate = video_obj['category']
+        userad = ''
+        get_video_detail(vid=vid, userad=userad, cate=cate)

+ 158 - 0
meitiansongzhufu/meitiansongzhufu_recommend/meitiansongzhufu_dev.py

@@ -0,0 +1,158 @@
+import os
+import json
+import random
+import sys
+import time
+import uuid
+
+import requests
+
+sys.path.append(os.getcwd())
+from common.video_item import VideoItem
+from common import tunnel_proxies
+from common.pipeline import PiaoQuanPipelineTest
+
+
+def get_video_detail(vid, userad, cate):
+    time.sleep(random.randint(5, 25))
+    url = "https://gkvxwq2023.we-media.cn/app/index.php"
+    payload = {
+        "i": "1",
+        "t": "0",
+        "m": "jyt_txvideo",
+        "v": "1.0.0",
+        "from": "wxapp",
+        "c": "entry",
+        "a": "wxapp",
+        "do": "videodetail",
+        "vid": vid,
+        "userad": userad,
+        "cate": cate,
+    }
+    headers = {
+        "Host": "gkvxwq2023.we-media.cn",
+        "xweb_xhr": "1",
+        "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.5(0x13080510)XWEB/1100",
+        "content-type": "application/x-www-form-urlencoded",
+        "accept": "*/*",
+        "referer": "https://servicewechat.com/wx49f06df06becc7fa/2/page-frame.html",
+        "accept-language": "en-US,en;q=0.9",
+    }
+    response = requests.request("POST", url, headers=headers, data=payload)
+    return response.json()
+
+
+class SongZhuFuRecommend(object):
+    def __init__(self, platform, mode, rule_dict, user_list, env):
+        self.platform = platform
+        self.mode = mode
+        self.rule_dict = rule_dict
+        self.user_list = user_list
+        self.env = env
+        self.download_cnt = 0
+        self.limit_flag = False
+
+    def get_video_list(self):
+        """
+        推荐流并没有页数,每次请求数据不同,设置每天的抓取视频数量为100-200条
+        """
+        base_url = "https://gkvxwq2023.we-media.cn/app/index.php"
+        headers = {
+            "Host": "gkvxwq2023.we-media.cn",
+            "xweb_xhr": "1",
+            "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.5(0x13080510)XWEB/1100",
+            "content-type": "application/x-www-form-urlencoded",
+            "accept": "*/*",
+            "referer": "https://servicewechat.com/wx49f06df06becc7fa/2/page-frame.html",
+            "accept-language": "en-US,en;q=0.9",
+        }
+        params = {
+            "i": "1",
+            "t": "0",
+            "m": "jyt_txvideo",
+            "v": "1.0.0",
+            "from": "wxapp",
+            "c": "entry",
+            "a": "wxapp",
+            "do": "videotui",
+            "city": "",
+            "category": "494",
+        }
+        while True:
+            time.sleep(random.randint(1, 10))
+            try:
+                if self.limit_flag:
+                    message="本轮已经抓取到足够的数据,自动退出\t{}".format(self.download_cnt)
+                    print(message)
+                    return
+                else:
+                    response = requests.get(
+                        url=base_url,
+                        headers=headers,
+                        params=params,
+                        proxies=tunnel_proxies(),
+                    )
+                    video_list = response.json()['data']
+                    if video_list:
+                        for index, video_obj in enumerate(video_list, 1):
+                            try:
+                                print("扫描到一条视频")
+                                self.process_video_obj(video_obj)
+                            except Exception as e:
+                                print("抓取第{}条的时候出现问题, 报错信息是{}".format(index, e))
+                    else:
+                        print("已经抓完了,自动退出")
+                        return
+            except Exception as e:
+                print("抓取推荐页的时候出现错误, 报错信息是{}".format(e))
+
+    def process_video_obj(self, video_obj):
+        trace_id = self.platform + str(uuid.uuid1())
+        our_user = random.choice(self.user_list)
+        item = VideoItem()
+        item.add_video_info("user_id", our_user["uid"])
+        item.add_video_info("user_name", our_user["nick_name"])
+        item.add_video_info("video_id", video_obj["id"])
+        item.add_video_info("video_title", video_obj["vtitle"])
+        item.add_video_info("publish_time_stamp", int(video_obj['create_time']))
+        vid = video_obj['vid']
+        userad = ""
+        cate = video_obj['category']
+        detail_obj = get_video_detail(vid=vid, userad=userad, cate=cate)['data']
+        item.add_video_info("video_url", detail_obj['vid'])
+        item.add_video_info("cover_url", detail_obj["poster"])
+        item.add_video_info("duration", detail_obj['vtime'])
+        item.add_video_info("play_cnt", detail_obj['realview'])
+        item.add_video_info("out_video_id", video_obj["id"])
+        item.add_video_info("platform", self.platform)
+        item.add_video_info("strategy", self.mode)
+        item.add_video_info("session", "{}-{}".format(self.platform, int(time.time())))
+        mq_obj = item.produce_item()
+        pipeline = PiaoQuanPipelineTest(
+            platform=self.platform,
+            mode=self.mode,
+            rule_dict=self.rule_dict,
+            env=self.env,
+            item=mq_obj,
+            trace_id=trace_id,
+        )
+        if pipeline.process_item():
+            self.download_cnt += 1
+            # self.mq.send_msg(mq_obj)
+            print(mq_obj)
+            print("成功发送至 ETL")
+            if self.download_cnt >= int(
+                self.rule_dict.get("videos_cnt", {}).get("min", 200)
+            ):
+                self.limit_flag = True
+
+
+if __name__ == '__main__':
+    S = SongZhuFuRecommend(
+        platform="meitiansongzhufu",
+        mode="recommend",
+        env="dev",
+        rule_dict={},
+        user_list=[{'nick_name': "Ivring", 'uid': "1997"}, {'nick_name': "paul", 'uid': "1998"}]
+    )
+    S.get_video_list()

+ 190 - 0
meitiansongzhufu/meitiansongzhufu_recommend/meitiansongzhufu_recommend.py

@@ -0,0 +1,190 @@
+import os
+import json
+import random
+import sys
+import time
+import uuid
+
+import requests
+
+sys.path.append(os.getcwd())
+from common.video_item import VideoItem
+from common import PiaoQuanPipeline, AliyunLogger, tunnel_proxies
+from common.mq import MQ
+
+
+def get_video_detail(vid, userad, cate):
+    time.sleep(random.randint(5, 25))
+    url = "https://gkvxwq2023.we-media.cn/app/index.php"
+    payload = {
+        "i": "1",
+        "t": "0",
+        "m": "jyt_txvideo",
+        "v": "1.0.0",
+        "from": "wxapp",
+        "c": "entry",
+        "a": "wxapp",
+        "do": "videodetail",
+        "vid": vid,
+        "userad": userad,
+        "cate": cate,
+    }
+    headers = {
+        "Host": "gkvxwq2023.we-media.cn",
+        "xweb_xhr": "1",
+        "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.5(0x13080510)XWEB/1100",
+        "content-type": "application/x-www-form-urlencoded",
+        "accept": "*/*",
+        "referer": "https://servicewechat.com/wx49f06df06becc7fa/2/page-frame.html",
+        "accept-language": "en-US,en;q=0.9",
+    }
+    response = requests.request("POST", url, headers=headers, data=payload)
+    return response.json()
+
+
+class SongZhuFuRecommend(object):
+    def __init__(self, platform, mode, rule_dict, user_list, env):
+        self.platform = platform
+        self.mode = mode
+        self.rule_dict = rule_dict
+        self.user_list = user_list
+        self.env = env
+        self.download_cnt = 0
+        self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
+        self.limit_flag = False
+
+    def get_video_list(self):
+        """
+        推荐流并没有页数,每次请求数据不同,设置每天的抓取视频数量为100-200条
+        """
+        base_url = "https://gkvxwq2023.we-media.cn/app/index.php"
+        headers = {
+            "Host": "gkvxwq2023.we-media.cn",
+            "xweb_xhr": "1",
+            "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.5(0x13080510)XWEB/1100",
+            "content-type": "application/x-www-form-urlencoded",
+            "accept": "*/*",
+            "referer": "https://servicewechat.com/wx49f06df06becc7fa/2/page-frame.html",
+            "accept-language": "en-US,en;q=0.9",
+        }
+        params = {
+            "i": "1",
+            "t": "0",
+            "m": "jyt_txvideo",
+            "v": "1.0.0",
+            "from": "wxapp",
+            "c": "entry",
+            "a": "wxapp",
+            "do": "videotui",
+            "city": "",
+            "category": "494",
+        }
+        while True:
+            time.sleep(random.randint(1, 10))
+            try:
+                if self.limit_flag:
+                    AliyunLogger.logging(
+                        code="2000",
+                        platform=self.platform,
+                        mode=self.mode,
+                        env=self.env,
+                        message="本轮已经抓取到足够的数据,自动退出\t{}".format(self.download_cnt),
+                    )
+                    return
+                else:
+                    response = requests.get(
+                        url=base_url,
+                        headers=headers,
+                        params=params,
+                        proxies=tunnel_proxies(),
+                    )
+                    video_list = response.json()['data']
+                    if video_list:
+                        for index, video_obj in enumerate(video_list, 1):
+                            try:
+                                AliyunLogger.logging(
+                                    code="1001",
+                                    platform=self.platform,
+                                    mode=self.mode,
+                                    env=self.env,
+                                    message="扫描到一条视频",
+                                    data=video_obj,
+                                )
+                                self.process_video_obj(video_obj)
+                            except Exception as e:
+                                AliyunLogger.logging(
+                                    code="3000",
+                                    platform=self.platform,
+                                    mode=self.mode,
+                                    env=self.env,
+                                    data=video_obj,
+                                    message="抓取第{}条的时候出现问题, 报错信息是{}".format(index, e),
+                                )
+                    else:
+                        AliyunLogger.logging(
+                            code="2000",
+                            platform=self.platform,
+                            mode=self.mode,
+                            env=self.env,
+                            message="已经抓完了,自动退出"
+                        )
+                        return
+            except Exception as e:
+                AliyunLogger.logging(
+                    code="3000",
+                    platform=self.platform,
+                    mode=self.mode,
+                    env=self.env,
+                    message="抓取推荐页的时候出现错误, 报错信息是{}".format(e),
+                )
+
+    def process_video_obj(self, video_obj):
+        trace_id = self.platform + str(uuid.uuid1())
+        our_user = random.choice(self.user_list)
+        item = VideoItem()
+        item.add_video_info("user_id", our_user["uid"])
+        item.add_video_info("user_name", our_user["nick_name"])
+        item.add_video_info("video_id", video_obj["id"])
+        item.add_video_info("video_title", video_obj["vtitle"])
+        item.add_video_info("publish_time_stamp", int(video_obj['create_time']))
+        vid = video_obj['vid']
+        if vid.startswith("new"):
+            userad = ""
+            cate = video_obj['category']
+        else:
+            return
+        detail_obj = get_video_detail(vid=vid, userad=userad, cate=cate)['data']
+        item.add_video_info("video_url", detail_obj['vid'])
+        item.add_video_info("cover_url", detail_obj["poster"])
+        item.add_video_info("duration", detail_obj['vtime'])
+        item.add_video_info("play_cnt", detail_obj['realview'])
+        item.add_video_info("out_video_id", video_obj["id"])
+        item.add_video_info("platform", self.platform)
+        item.add_video_info("strategy", self.mode)
+        item.add_video_info("session", "{}-{}".format(self.platform, int(time.time())))
+        mq_obj = item.produce_item()
+        pipeline = PiaoQuanPipeline(
+            platform=self.platform,
+            mode=self.mode,
+            rule_dict=self.rule_dict,
+            env=self.env,
+            item=mq_obj,
+            trace_id=trace_id,
+        )
+        if pipeline.process_item():
+            self.download_cnt += 1
+            self.mq.send_msg(mq_obj)
+            # print(mq_obj)
+            AliyunLogger.logging(
+                code="1002",
+                platform=self.platform,
+                mode=self.mode,
+                env=self.env,
+                message="成功发送至 ETL",
+                data=mq_obj,
+            )
+            if self.download_cnt >= int(
+                self.rule_dict.get("videos_cnt", {}).get("min", 200)
+            ):
+                self.limit_flag = True
+

+ 0 - 0
piaopiaoquan/__init__.py


+ 0 - 0
piaopiaoquan/logs/__init__.py


+ 0 - 0
piaopiaoquan/piaopiaoquan/__init__.py


+ 393 - 0
piaopiaoquan/piaopiaoquan/piaopiaoquan_recommend.py

@@ -0,0 +1,393 @@
+# -*- coding: utf-8 -*-
+# @Author: zhangyong
+# @Time: 2023/11/24
+import json
+import os
+import random
+import sys
+import time
+import uuid
+from hashlib import md5
+
+import requests
+from appium import webdriver
+from appium.webdriver.extensions.android.nativekey import AndroidKey
+from appium.webdriver.webdriver import WebDriver
+from bs4 import BeautifulSoup
+from selenium.common.exceptions import NoSuchElementException
+from selenium.webdriver.common.by import By
+import multiprocessing
+
+
+sys.path.append(os.getcwd())
+from common import AliyunLogger, PiaoQuanPipeline, get_redirect_url
+from common.common import Common
+from common.mq import MQ
+
+
+class PPQRecommend:
+    env = None
+    driver = None
+    log_type = None
+
+    def __init__(self, log_type, crawler, env, rule_dict, our_uid):
+        self.mq = None
+        self.platform = "piaopiaoquan"
+        self.download_cnt = 0
+        self.element_list = []
+        self.count = 0
+        self.swipe_count = 0
+        self.log_type = log_type
+        self.crawler = crawler
+        self.env = env
+        self.rule_dict = rule_dict
+        self.our_uid = our_uid
+        if self.env == "dev":
+            chromedriverExecutable = "/Users/piaoquan/Downloads/chromedriver"
+        else:
+            chromedriverExecutable = "/Users/piaoquan/Downloads/chromedriver"
+
+        Common.logger(self.log_type, self.crawler).info("启动微信")
+        # Common.logging(self.log_type, self.crawler, self.env, '启动微信')
+        # 微信的配置文件
+        caps = {
+            "platformName": "Android",
+            "devicesName": "Android",
+            # "platformVersion": "11",
+            # "udid": "emulator-5554",
+            "appPackage": "com.tencent.mm",
+            "appActivity": ".ui.LauncherUI",
+            "autoGrantPermissions": "true",
+            "noReset": True,
+            "resetkeyboard": True,
+            "unicodekeyboard": True,
+            "showChromedriverLog": True,
+            "printPageSourceOnFailure": True,
+            "recreateChromeDriverSessions": True,
+            "enableWebviewDetailsCollection": True,
+            "setWebContentsDebuggingEnabled": True,
+            "newCommandTimeout": 6000,
+            "automationName": "UiAutomator2",
+            "chromedriverExecutable": chromedriverExecutable,
+            "chromeOptions": {"androidProcess": "com.tencent.mm:appbrand0"},
+        }
+        try:
+            self.driver = webdriver.Remote("http://localhost:4723/wd/hub", caps)
+        except Exception as e:
+            print(e)
+            AliyunLogger.logging(
+                code="3002",
+                platform=self.platform,
+                mode=self.log_type,
+                env=self.env,
+                message=f'appium 启动异常: {e}'
+            )
+            return
+        self.driver.implicitly_wait(30)
+
+        for i in range(120):
+            try:
+                if self.driver.find_elements(By.ID, "com.tencent.mm:id/f2s"):
+                    Common.logger(self.log_type, self.crawler).info("微信启动成功")
+                    # Common.logging(self.log_type, self.crawler, self.env, '微信启动成功')
+                    AliyunLogger.logging(
+                        code="1000",
+                        platform=self.platform,
+                        mode=self.log_type,
+                        env=self.env,
+                        message="启动微信成功"
+                    )
+                    break
+                elif self.driver.find_element(By.ID, "com.android.systemui:id/dismiss_view"):
+                    Common.logger(self.log_type, self.crawler).info("发现并关闭系统下拉菜单")
+                    # Common.logging(self.log_type, self.crawler, self.env, '发现并关闭系统下拉菜单')
+                    AliyunLogger.logging(
+                        code="1000",
+                        platform=self.platform,
+                        mode=self.log_type,
+                        env=self.env,
+                        message="发现并关闭系统下拉菜单"
+                    )
+                    self.driver.find_element(By.ID, "com.android.system:id/dismiss_view").click()
+                else:
+                    pass
+            except NoSuchElementException:
+                AliyunLogger.logging(
+                    code="3001",
+                    platform=self.platform,
+                    mode=self.log_type,
+                    env=self.env,
+                    message="打开微信异常"
+                )
+                time.sleep(1)
+
+        Common.logger(self.log_type, self.crawler).info("下滑,展示小程序选择面板")
+        size = self.driver.get_window_size()
+        self.driver.swipe(int(size['width'] * 0.5), int(size['height'] * 0.2),
+                          int(size['width'] * 0.5), int(size['height'] * 0.8), 200)
+        time.sleep(1)
+        Common.logger(self.log_type, self.crawler).info('打开小程序"漂漂圈丨福年"')
+        self.driver.find_elements(By.XPATH, '//*[@text="漂漂圈丨福年"]')[-1].click()
+        AliyunLogger.logging(
+            code="1000",
+            platform=self.platform,
+            env=self.env,
+            mode=self.log_type,
+            message="打开小程序漂漂圈丨福年成功"
+
+        )
+        time.sleep(5)
+        self.get_videoList()
+        time.sleep(1)
+        self.driver.quit()
+
+    def search_elements(self, xpath):
+        time.sleep(1)
+        windowHandles = self.driver.window_handles
+        for handle in windowHandles:
+            self.driver.switch_to.window(handle)
+            time.sleep(1)
+            try:
+                elements = self.driver.find_elements(By.XPATH, xpath)
+                if elements:
+                    return elements
+            except NoSuchElementException:
+                pass
+
+    def check_to_applet(self, xpath):
+        time.sleep(1)
+        webViews = self.driver.contexts
+        self.driver.switch_to.context(webViews[-1])
+        windowHandles = self.driver.window_handles
+        for handle in windowHandles:
+            self.driver.switch_to.window(handle)
+            time.sleep(1)
+            try:
+                self.driver.find_element(By.XPATH, xpath)
+                Common.logger(self.log_type, self.crawler).info("切换到WebView成功\n")
+                # Common.logging(self.log_type, self.crawler, self.env, '切换到WebView成功\n')
+                AliyunLogger.logging(
+                    code="1000",
+                    platform=self.platform,
+                    mode=self.log_type,
+                    env=self.env,
+                    message="成功切换到 webview"
+                )
+                return
+            except NoSuchElementException:
+                time.sleep(1)
+
+    def swipe_up(self):
+        self.search_elements('//*[@class="dynamic--title-container"]')
+        size = self.driver.get_window_size()
+        self.driver.swipe(int(size["width"] * 0.5), int(size["height"] * 0.8),
+                          int(size["width"] * 0.5), int(size["height"] * 0.442), 200)
+        self.swipe_count += 1
+
+    def get_video_url(self, video_title_element):
+        for i in range(3):
+            self.search_elements('//*[@class="dynamic--title-container"]')
+            Common.logger(self.log_type, self.crawler).info(f"video_title_element:{video_title_element[0]}")
+            time.sleep(1)
+            Common.logger(self.log_type, self.crawler).info("滑动标题至可见状态")
+            self.driver.execute_script("arguments[0].scrollIntoView({block:'center',inline:'center'});",
+                                       video_title_element[0])
+            time.sleep(3)
+            Common.logger(self.log_type, self.crawler).info("点击标题")
+            video_title_element[0].click()
+            self.check_to_applet(xpath=r'//wx-video[@class="infos--title infos--ellipsis"]')
+            Common.logger(self.log_type, self.crawler).info("点击标题完成")
+            time.sleep(10)
+            video_url_elements = self.search_elements(
+                '//wx-video[@class="dynamic-index--video-item dynamic-index--video"]')
+            if video_url_elements:
+                return video_url_elements[0].get_attribute("src")
+
+    def parse_detail(self, index):
+        page_source = self.driver.page_source
+        soup = BeautifulSoup(page_source, 'html.parser')
+        soup.prettify()
+        video_list = soup.findAll(name="wx-view", attrs={"class": "expose--adapt-parent"})
+        element_list = [i for i in video_list][index:]
+        return element_list[0]
+
+    def get_video_info_2(self, video_element):
+        Common.logger(self.log_type, self.crawler).info(f"本轮已抓取{self.download_cnt}条视频\n")
+        # Common.logging(self.log_type, self.crawler, self.env, f"本轮已抓取{self.download_cnt}条视频\n")
+        if self.download_cnt >= int(self.rule_dict.get("videos_cnt", {}).get("min", 10)):
+            self.count = 0
+            self.download_cnt = 0
+            self.element_list = []
+            return
+        self.count += 1
+        Common.logger(self.log_type, self.crawler).info(f"第{self.count}条视频")
+        # 获取 trace_id, 并且把该 id 当做视频生命周期唯一索引
+        trace_id = self.crawler + str(uuid.uuid1())
+        AliyunLogger.logging(
+            code="1001",
+            platform=self.platform,
+            mode=self.log_type,
+            env=self.env,
+            trace_id=trace_id,
+            message="扫描到一条视频",
+        )
+        # 标题
+        video_title = video_element.find("wx-view", class_="dynamic--title").text
+        # 播放量字符串
+        play_str = video_element.find("wx-view", class_="dynamic--views").text
+        # 视频时长
+        duration_str = video_element.find("wx-view", class_="dynamic--duration").text
+        user_name = video_element.find("wx-view", class_="dynamic--nick-top").text
+        # 头像 URL
+        avatar_url = video_element.find("wx-image", class_="avatar--avatar")["src"]
+        # 封面 URL
+        cover_url = video_element.find("wx-image", class_="dynamic--bg-image")["src"]
+        play_cnt = int(play_str.replace("+", "").replace("次播放", ""))
+        duration = int(duration_str.split(":")[0].strip()) * 60 + int(duration_str.split(":")[-1].strip())
+        out_video_id = md5(video_title.encode('utf8')).hexdigest()
+        out_user_id = md5(user_name.encode('utf8')).hexdigest()
+
+        video_dict = {
+            "video_title": video_title,
+            "video_id": out_video_id,
+            'out_video_id': out_video_id,
+            "duration_str": duration_str,
+            "duration": duration,
+            "play_str": play_str,
+            "play_cnt": play_cnt,
+            "like_str": "",
+            "like_cnt": 0,
+            "comment_cnt": 0,
+            "share_cnt": 0,
+            "user_name": user_name,
+            "user_id": out_user_id,
+            'publish_time_stamp': int(time.time()),
+            'publish_time_str': time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time()))),
+            'update_time_stamp': int(time.time()),
+            "avatar_url": avatar_url,
+            "cover_url": cover_url,
+            "session": f"piaopiiaoquan-{int(time.time())}"
+        }
+        pipeline = PiaoQuanPipeline(
+            platform=self.crawler,
+            mode=self.log_type,
+            item=video_dict,
+            rule_dict=self.rule_dict,
+            env=self.env,
+            trace_id=trace_id
+        )
+        flag = pipeline.process_item()
+        if flag:
+            video_title_element = self.search_elements(f'//*[contains(text(), "{video_title}")]')
+            if video_title_element is None:
+                return
+            Common.logger(self.log_type, self.crawler).info("点击标题,进入视频详情页")
+            AliyunLogger.logging(
+                code="1000",
+                platform=self.platform,
+                mode=self.log_type,
+                env=self.env,
+                message="点击标题,进入视频详情页",
+            )
+            video_url = self.get_video_url(video_title_element)
+            video_url = get_redirect_url(video_url)
+            if video_url is None:
+                self.driver.press_keycode(AndroidKey.BACK)
+                time.sleep(5)
+                return
+            video_dict['video_url'] = video_url
+            video_dict["platform"] = self.crawler
+            video_dict["strategy"] = self.log_type
+            video_dict["out_video_id"] = video_dict["video_id"]
+            video_dict["crawler_rule"] = json.dumps(self.rule_dict)
+            video_dict["user_id"] = self.our_uid
+            video_dict["publish_time"] = video_dict["publish_time_str"]
+            self.mq.send_msg(video_dict)
+            self.download_cnt += 1
+            self.driver.press_keycode(AndroidKey.BACK)
+            time.sleep(5)
+
+    def get_video_info(self, video_element):
+        try:
+            self.get_video_info_2(video_element)
+        except Exception as e:
+            self.driver.press_keycode(AndroidKey.BACK)
+            Common.logger(self.log_type, self.crawler).error(f"抓取单条视频异常:{e}\n")
+            AliyunLogger.logging(
+                code="3001",
+                platform=self.platform,
+                mode=self.log_type,
+                env=self.env,
+                message=f"抓取单条视频异常:{e}\n"
+            )
+
+    def get_videoList(self):
+        self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
+        self.driver.implicitly_wait(20)
+        # 切换到 web_view
+        self.check_to_applet(xpath='//*[@class="expose--adapt-parent"]')
+        print("切换到 webview 成功")
+        time.sleep(1)
+        page = 0
+        if self.search_elements('//*[@class="expose--adapt-parent"]') is None:
+            Common.logger(self.log_type, self.crawler).info("窗口已销毁\n")
+            # Common.logging(self.log_type, self.crawler, self.env, '窗口已销毁\n')
+            AliyunLogger.logging(
+                code="3000",
+                platform=self.platform,
+                mode=self.log_type,
+                env=self.env,
+                message="窗口已销毁"
+            )
+            self.count = 0
+            self.download_cnt = 0
+            self.element_list = []
+            return
+
+        print("开始获取视频信息")
+        for i in range(50):
+            print("下滑{}次".format(i))
+            element = self.parse_detail(i)
+            self.get_video_info(element)
+            self.swipe_up()
+            time.sleep(1)
+            if self.swipe_count > 100:
+                return
+
+        print("下滑完成")
+        # time.sleep(100)
+        Common.logger(self.log_type, self.crawler).info("已抓取完一组,休眠 5 秒\n")
+        # Common.logging(self.log_type, self.crawler, self.env, "已抓取完一组,休眠 5 秒\n")
+        AliyunLogger.logging(
+            code="1000",
+            platform=self.platform,
+            mode=self.log_type,
+            env=self.env,
+            message="已抓取完一组,休眠 5 秒\n",
+        )
+        time.sleep(5)
+
+
+def run():
+    rule_dict1 = {"period": {"min": 365, "max": 365},
+                  "duration": {"min": 30, "max": 1800},
+                  "favorite_cnt": {"min": 0, "max": 0},
+                  "videos_cnt": {"min": 5000, "max": 0},
+                  "share_cnt": {"min": 0, "max": 0}}
+    PPQRecommend("recommend", "piaopiaoquan", "dev", rule_dict1, [64120158, 64120157, 63676778])
+
+
+if __name__ == "__main__":
+    process = multiprocessing.Process(
+        target=run
+    )
+    process.start()
+    while True:
+        if not process.is_alive():
+            print("正在重启")
+            process.terminate()
+            time.sleep(60)
+            os.system("adb forward --remove-all")
+            process = multiprocessing.Process(target=run)
+            process.start()
+        time.sleep(60)

+ 380 - 0
piaopiaoquan/piaopiaoquan/piaopiaoquan_sift.py

@@ -0,0 +1,380 @@
+# -*- coding: utf-8 -*-
+# @Author: zhangyong
+# @Time: 2023/11/30
+import json
+import os
+import sys
+import time
+import uuid
+from hashlib import md5
+
+from appium import webdriver
+from appium.webdriver.extensions.android.nativekey import AndroidKey
+from bs4 import BeautifulSoup
+from selenium.common.exceptions import NoSuchElementException
+from selenium.webdriver.common.by import By
+import multiprocessing
+
+
+sys.path.append(os.getcwd())
+from common import AliyunLogger, PiaoQuanPipeline, get_redirect_url
+from common.common import Common
+from common.mq import MQ
+
+
+class PPQSiftRecommend:
+    env = None
+    driver = None
+    log_type = None
+
+    def __init__(self, log_type, crawler, env, rule_dict, our_uid):
+        self.mq = None
+        self.platform = "piaopiaoquan-sift"
+        self.download_cnt = 0
+        self.element_list = []
+        self.count = 0
+        self.swipe_count = 0
+        self.log_type = log_type
+        self.crawler = crawler
+        self.env = env
+        self.rule_dict = rule_dict
+        self.our_uid = our_uid
+        if self.env == "dev":
+            chromedriverExecutable = "/Users/tzld/Downloads/chromedriver_v111/chromedriver"
+        else:
+            chromedriverExecutable = "/Users/tzld/Downloads/chromedriver_v111/chromedriver"
+
+        Common.logger(self.log_type, self.crawler).info("启动微信")
+        # 微信的配置文件
+        caps = {
+            "platformName": "Android",
+            "devicesName": "Android",
+            # "platformVersion": "11",
+            # "udid": "emulator-5554",
+            "appPackage": "com.tencent.mm",
+            "appActivity": ".ui.LauncherUI",
+            "autoGrantPermissions": "true",
+            "noReset": True,
+            "resetkeyboard": True,
+            "unicodekeyboard": True,
+            "showChromedriverLog": True,
+            "printPageSourceOnFailure": True,
+            "recreateChromeDriverSessions": True,
+            "enableWebviewDetailsCollection": True,
+            "setWebContentsDebuggingEnabled": True,
+            "newCommandTimeout": 6000,
+            "automationName": "UiAutomator2",
+            "chromedriverExecutable": chromedriverExecutable,
+            "chromeOptions": {"androidProcess": "com.tencent.mm:appbrand0"},
+        }
+        try:
+            self.driver = webdriver.Remote("http://localhost:4723/wd/hub", caps)
+        except Exception as e:
+            print(e)
+            AliyunLogger.logging(
+                code="3002",
+                platform=self.platform,
+                mode=self.log_type,
+                env=self.env,
+                message=f'appium 启动异常: {e}'
+            )
+            return
+        self.driver.implicitly_wait(30)
+        for i in range(120):
+            try:
+                if self.driver.find_elements(By.ID, "com.tencent.mm:id/f2s"):
+                    Common.logger(self.log_type, self.crawler).info("微信启动成功")
+                    AliyunLogger.logging(
+                        code="1000",
+                        platform=self.platform,
+                        mode=self.log_type,
+                        env=self.env,
+                        message="启动微信成功"
+                    )
+                    break
+                else:
+                    pass
+            except NoSuchElementException:
+                AliyunLogger.logging(
+                    code="3001",
+                    platform=self.platform,
+                    mode=self.log_type,
+                    env=self.env,
+                    message="打开微信异常"
+                )
+                time.sleep(1)
+
+        Common.logger(self.log_type, self.crawler).info("点击漂漂圈精选视频群聊")
+        self.driver.find_elements(By.XPATH, '//*[@text="漂漂圈精选视频"]')[-1].click()
+
+        time.sleep(5)
+        Common.logger(self.log_type, self.crawler).info('点击"漂漂圈丨福年"卡片')
+        self.driver.find_elements(By.XPATH, '//*[@text="漂漂圈丨福年"]')[-1].click()
+        AliyunLogger.logging(
+            code="1000",
+            platform=self.platform,
+            env=self.env,
+            mode=self.log_type,
+            message="打开漂漂圈丨福年卡片成功进入小程序"
+
+        )
+
+        time.sleep(5)
+        self.driver.press_keycode(AndroidKey.BACK)
+        AliyunLogger.logging(
+            code="1000",
+            platform=self.platform,
+            env=self.env,
+            mode=self.log_type,
+            message="进入漂漂圈丨福年 更多热门成功"
+
+        )
+        time.sleep(5)
+
+        self.get_videoList()
+        time.sleep(1)
+        self.driver.quit()
+
+    def search_elements(self, xpath):
+        time.sleep(1)
+        windowHandles = self.driver.window_handles
+        for handle in windowHandles:
+            self.driver.switch_to.window(handle)
+            time.sleep(1)
+            try:
+                elements = self.driver.find_elements(By.XPATH, xpath)
+                if elements:
+                    return elements
+            except NoSuchElementException:
+                pass
+
+    def check_to_applet(self, xpath):
+        time.sleep(1)
+        webViews = self.driver.contexts
+        self.driver.switch_to.context(webViews[-1])
+        windowHandles = self.driver.window_handles
+        for handle in windowHandles:
+            self.driver.switch_to.window(handle)
+            time.sleep(1)
+            try:
+                self.driver.find_element(By.XPATH, xpath)
+                Common.logger(self.log_type, self.crawler).info("切换到WebView成功\n")
+                AliyunLogger.logging(
+                    code="1000",
+                    platform=self.platform,
+                    mode=self.log_type,
+                    env=self.env,
+                    message="成功切换到 webview"
+                )
+                return
+            except NoSuchElementException:
+                time.sleep(1)
+
+    def swipe_up(self):
+        self.search_elements('//*[@class="single--title"]')
+        size = self.driver.get_window_size()
+        self.driver.swipe(int(size["width"] * 0.5), int(size["height"] * 0.8),
+                          int(size["width"] * 0.5), int(size["height"] * 0.442), 200)
+        self.swipe_count += 1
+
+    def get_video_url(self, video_title_element):
+        for i in range(3):
+            self.search_elements('//*[@class="single--title"]')
+            Common.logger(self.log_type, self.crawler).info(f"video_title_element:{video_title_element[0]}")
+            time.sleep(1)
+            Common.logger(self.log_type, self.crawler).info("滑动标题至可见状态")
+            self.driver.execute_script("arguments[0].scrollIntoView({block:'center',inline:'center'});",
+                                       video_title_element[0])
+            time.sleep(3)
+            Common.logger(self.log_type, self.crawler).info("点击标题")
+            video_title_element[0].click()
+            self.check_to_applet(xpath=r'//wx-video[@class="infos--flex-box infos--title-wrap"]')
+            Common.logger(self.log_type, self.crawler).info("点击标题完成")
+            time.sleep(10)
+            video_url_elements = self.search_elements(
+                '//wx-video[@class="dynamic-index--video-item dynamic-index--video"]')
+            if video_url_elements:
+                return video_url_elements[0].get_attribute("src")
+
+    def parse_detail(self, index):
+        page_source = self.driver.page_source
+        soup = BeautifulSoup(page_source, 'html.parser')
+        soup.prettify()
+        video_list = soup.findAll(name="wx-view", attrs={"class": "single--dynamic-item"})
+        element_list = [i for i in video_list][index:]
+        return element_list[0]
+
+    def get_video_info_2(self, video_element):
+        Common.logger(self.log_type, self.crawler).info(f"本轮已抓取{self.download_cnt}条视频\n")
+        if self.download_cnt >= int(self.rule_dict.get("videos_cnt", {}).get("min", 10)):
+            self.count = 0
+            self.download_cnt = 0
+            self.element_list = []
+            return
+        self.count += 1
+        Common.logger(self.log_type, self.crawler).info(f"第{self.count}条视频")
+        # 获取 trace_id, 并且把该 id 当做视频生命周期唯一索引
+        trace_id = self.crawler + str(uuid.uuid1())
+        AliyunLogger.logging(
+            code="1001",
+            platform=self.platform,
+            mode=self.log_type,
+            env=self.env,
+            trace_id=trace_id,
+            message="扫描到一条视频",
+        )
+        # 标题
+        video_title = video_element.find("wx-view", class_="single--title").text
+        # 播放量字符串
+        play_str = video_element.find("wx-view", class_="single--favor-text").text
+        user_name = video_element.find("wx-view", class_="single--nick").text
+        # 头像 URL
+        avatar_url = video_element.find("wx-image", class_="single--avatar-image")["src"]
+        # 封面 URL
+        cover_url = video_element.find("wx-image", class_="single--image")["src"]
+        play_cnt = play_str.replace("+", "").replace("次播放", "")
+        if "万" in play_cnt:
+            play_cnt = int(play_cnt.split("万")[0]) * 10000
+        out_video_id = md5(video_title.encode('utf8')).hexdigest()
+        out_user_id = md5(user_name.encode('utf8')).hexdigest()
+
+        video_dict = {
+            "video_title": video_title,
+            "video_id": out_video_id,
+            'out_video_id': out_video_id,
+            "duration_str": '',
+            "duration": 0,
+            "play_str": play_str,
+            "play_cnt": play_cnt,
+            "like_str": "",
+            "like_cnt": 0,
+            "comment_cnt": 0,
+            "share_cnt": 0,
+            "user_name": user_name,
+            "user_id": out_user_id,
+            'publish_time_stamp': int(time.time()),
+            'publish_time_str': time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time()))),
+            'update_time_stamp': int(time.time()),
+            "avatar_url": avatar_url,
+            "cover_url": cover_url,
+            "session": f"piaopiaoquan_sift-{int(time.time())}"
+        }
+        pipeline = PiaoQuanPipeline(
+            platform=self.crawler,
+            mode=self.log_type,
+            item=video_dict,
+            rule_dict=self.rule_dict,
+            env=self.env,
+            trace_id=trace_id
+        )
+        flag = pipeline.process_item()
+        if flag:
+            video_title_element = self.search_elements(f'//*[contains(text(), "{video_title}")]')
+            if video_title_element is None:
+                return
+            Common.logger(self.log_type, self.crawler).info("点击标题,进入视频详情页")
+            AliyunLogger.logging(
+                code="1000",
+                platform=self.platform,
+                mode=self.log_type,
+                env=self.env,
+                message="点击标题,进入视频详情页",
+            )
+            video_url = self.get_video_url(video_title_element)
+            video_url = get_redirect_url(video_url)
+            if video_url is None:
+                self.driver.press_keycode(AndroidKey.BACK)
+                time.sleep(5)
+                return
+            video_dict['video_url'] = video_url
+            video_dict["platform"] = self.crawler
+            video_dict["strategy"] = self.log_type
+            video_dict["out_video_id"] = video_dict["video_id"]
+            video_dict["crawler_rule"] = json.dumps(self.rule_dict)
+            video_dict["user_id"] = self.our_uid
+            video_dict["publish_time"] = video_dict["publish_time_str"]
+            print(video_dict)
+            self.driver.press_keycode(AndroidKey.BACK)
+            self.mq.send_msg(video_dict)
+            self.download_cnt += 1
+            time.sleep(5)
+
+    def get_video_info(self, video_element):
+        try:
+            self.get_video_info_2(video_element)
+        except Exception as e:
+            Common.logger(self.log_type, self.crawler).error(f"抓取单条视频异常:{e}\n")
+            AliyunLogger.logging(
+                code="3001",
+                platform=self.platform,
+                mode=self.log_type,
+                env=self.env,
+                message=f"抓取单条视频异常:{e}\n"
+            )
+
+    def get_videoList(self):
+        self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
+        self.driver.implicitly_wait(20)
+        # 切换到 web_view
+        self.check_to_applet(xpath='//*[@class="single--item-content"]')
+        print("切换到 webview 成功")
+        time.sleep(1)
+        if self.search_elements('//*[@class="single--item-content"]') is None:
+            Common.logger(self.log_type, self.crawler).info("窗口已销毁\n")
+            AliyunLogger.logging(
+                code="3000",
+                platform=self.platform,
+                mode=self.log_type,
+                env=self.env,
+                message="窗口已销毁"
+            )
+            self.count = 0
+            self.download_cnt = 0
+            self.element_list = []
+            return
+
+        print("开始获取视频信息")
+        for i in range(50):
+            print("下滑{}次".format(i))
+            element = self.parse_detail(i)
+            self.get_video_info(element)
+            self.swipe_up()
+            time.sleep(1)
+            if self.swipe_count > 100:
+                return
+
+        print("下滑完成")
+        Common.logger(self.log_type, self.crawler).info("已抓取完一组,休眠 5 秒\n")
+        AliyunLogger.logging(
+            code="1000",
+            platform=self.platform,
+            mode=self.log_type,
+            env=self.env,
+            message="已抓取完一组,休眠 5 秒\n",
+        )
+        time.sleep(5)
+
+
+def run():
+    rule_dict1 = {"period": {"min": 365, "max": 365},
+                  "duration": {"min": 0, "max": 0},
+                  "favorite_cnt": {"min": 0, "max": 0},
+                  "videos_cnt": {"min": 5000, "max": 0},
+                  "share_cnt": {"min": 0, "max": 0}}
+    PPQSiftRecommend("recommend", "piaopiaoquan", "dev", rule_dict1, [64120158])
+
+
+if __name__ == "__main__":
+    process = multiprocessing.Process(
+        target=run
+    )
+    process.start()
+    while True:
+        if not process.is_alive():
+            print("正在重启")
+            process.terminate()
+            time.sleep(60)
+            os.system("adb forward --remove-all")
+            process = multiprocessing.Process(target=run)
+            process.start()
+        time.sleep(60)

+ 0 - 0
piaopiaoquan/piaopiaoquan_main/__init__.py


+ 188 - 0
piaopiaoquan/piaopiaoquan_main/run_ppq_recommend.py

@@ -0,0 +1,188 @@
+# -*- coding: utf-8 -*-
+# @Time: 2023/11/27
+import argparse
+import random
+from mq_http_sdk.mq_client import *
+from mq_http_sdk.mq_consumer import *
+from mq_http_sdk.mq_exception import MQExceptionBase
+import multiprocessing
+
+
+sys.path.append(os.getcwd())
+from common.public import get_consumer, ack_message, task_fun_mq, get_rule_from_mysql
+from common.common import Common
+from common.scheduling_db import MysqlHelper
+from common import AliyunLogger
+from piaopiaoquan.piaopiaoquan.piaopiaoquan_recommend import PPQRecommend
+
+
+
+def run(args1, args2, args3, args4, args5):
+    PPQRecommend(
+        log_type=args1,
+        crawler=args2,
+        env=args3,
+        rule_dict=args4,
+        our_uid=args5
+    )
+
+
+def main(log_type, crawler, topic_name, group_id, env):
+    topic_name = "ppq_recommend_prod"
+    group_id = "ppq_recommend_prod"
+    consumer = get_consumer(topic_name, group_id)
+    # 长轮询表示如果Topic没有消息,则客户端请求会在服务端挂起3秒,3秒内如果有消息可以消费则立即返回响应。
+    # 长轮询时间3秒(最多可设置为30秒)。
+    wait_seconds = 30
+    # 一次最多消费3条(最多可设置为16条)。
+    batch = 1
+    Common.logger(log_type, crawler).info(f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
+                                          f'WaitSeconds:{wait_seconds}\n'
+                                          f'TopicName:{topic_name}\n'
+                                          f'MQConsumer:{group_id}')
+    AliyunLogger.logging(
+        code="1000",
+        platform=log_type,
+        mode=crawler,
+        env=env,
+        message=f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
+                f"WaitSeconds:{wait_seconds}\n"
+                f"TopicName:{topic_name}\n"
+                f"MQConsumer:{group_id}",
+    )
+    while True:
+        try:
+            # 长轮询消费消息。
+            recv_msgs = consumer.consume_message(batch, wait_seconds)
+            for msg in recv_msgs:
+                Common.logger(log_type, crawler).info(f"Receive\n"
+                                                      f"MessageId:{msg.message_id}\n"
+                                                      f"MessageBodyMD5:{msg.message_body_md5}\n"
+                                                      f"MessageTag:{msg.message_tag}\n"
+                                                      f"ConsumedTimes:{msg.consumed_times}\n"
+                                                      f"PublishTime:{msg.publish_time}\n"
+                                                      f"Body:{msg.message_body}\n"
+                                                      f"NextConsumeTime:{msg.next_consume_time}\n"
+                                                      f"ReceiptHandle:{msg.receipt_handle}\n"
+                                                      f"Properties:{msg.properties}")
+                AliyunLogger.logging(
+                    code="1000",
+                    platform=log_type,
+                    mode=crawler,
+                    env=env,
+                    message=f"Receive\n"
+                            f"MessageId:{msg.message_id}\n"
+                            f"MessageBodyMD5:{msg.message_body_md5}\n"
+                            f"MessageTag:{msg.message_tag}\n"
+                            f"ConsumedTimes:{msg.consumed_times}\n"
+                            f"PublishTime:{msg.publish_time}\n"
+                            f"Body:{msg.message_body}\n"
+                            f"NextConsumeTime:{msg.next_consume_time}\n"
+                            f"ReceiptHandle:{msg.receipt_handle}\n"
+                            f"Properties:{msg.properties}",
+                )
+                # ack_mq_message
+                ack_message(log_type=log_type, crawler=crawler, recv_msgs=recv_msgs, consumer=consumer)
+
+                # 处理爬虫业务
+                task_dict = task_fun_mq(msg.message_body)['task_dict']
+                rule_dict = task_fun_mq(msg.message_body)['rule_dict']
+                task_id = task_dict['id']
+                select_user_sql = f"""select * from crawler_user_v3 where task_id={task_id}"""
+                user_list = MysqlHelper.get_values(log_type, crawler, select_user_sql, env, action="")
+                our_uid_list = []
+                for user in user_list:
+                    our_uid_list.append(user["uid"])
+                our_uid = random.choice(our_uid_list)
+                AliyunLogger.logging(
+                    "1000", log_type, crawler, env, f"调度任务:{task_dict}"
+                )
+                # 解析 rule_dict
+                rule_dict = task_fun_mq(msg.message_body)["rule_dict"]
+                AliyunLogger.logging(
+                    "1000", log_type, crawler, env, f"抓取规则:{rule_dict}\n"
+                )
+                Common.logger(log_type, crawler).info(f"用户列表:{user_list}\n")
+                new_r = get_rule_from_mysql(task_id=task_id, log_type=log_type, crawler=crawler, env=env)
+                r_d = {}
+                for item in new_r:
+                    for k, val in item.items():
+                        r_d[k] = val
+
+                process = multiprocessing.Process(
+                    target=run,
+                    args=(log_type, crawler, env, r_d, our_uid)
+                )
+                process.start()
+                AliyunLogger.logging(
+                    code="1003",
+                    platform=log_type,
+                    mode=crawler,
+                    env=env,
+                    message="成功获取信息,启动爬虫,开始一轮抓取",
+                )
+                print("进程开始")
+                while True:
+                    if not process.is_alive():
+                        print("正在重启")
+                        process.terminate()
+                        os.system("adb forward --remove-all")
+                        time.sleep(60)
+                        new_r = get_rule_from_mysql(task_id=task_id, log_type=log_type, crawler=crawler, env=env)
+                        r_d = {}
+                        for item in new_r:
+                            for k, val in item.items():
+                                r_d[k] = val
+                        Common.logger(log_type, crawler).info(f'抓取规则:{r_d}')
+                        Common.logging(log_type, crawler, env, f"抓取规则:{r_d}")
+                        process = multiprocessing.Process(target=run, args=(log_type, crawler, env, r_d, our_uid))
+                        process.start()
+                        AliyunLogger.logging(
+                            code="1004",
+                            platform=log_type,
+                            mode=crawler,
+                            env=env,
+                            message="成功抓取完一轮",
+                        )
+                    time.sleep(60)
+        except MQExceptionBase as err:
+            # Topic中没有消息可消费。
+            if err.type == "MessageNotExist":
+                AliyunLogger.logging(
+                    code="1000",
+                    platform=log_type,
+                    mode=crawler,
+                    env=env,
+                    message=f"No new message! RequestId:{err.req_id}\n",
+                )
+                Common.logging(
+                    log_type=log_type,
+                    crawler=crawler,
+                    env=env,
+                    message=f"No new message! RequestId:{err.req_id}\n",
+                )
+                continue
+            AliyunLogger.logging(
+                code="1000",
+                platform=log_type,
+                mode=crawler,
+                env=env,
+                message=f"Consume Message Fail! Exception:{err}\n",
+            )
+            time.sleep(2)
+            continue
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
+    parser.add_argument('--log_type', type=str)  ## 添加参数,注明参数类型
+    parser.add_argument('--crawler')  ## 添加参数
+    parser.add_argument('--topic_name')  ## 添加参数
+    parser.add_argument('--group_id')  ## 添加参数
+    parser.add_argument('--env')  ## 添加参数
+    args = parser.parse_args()  ### 参数赋值,也可以通过终端赋值
+    main(log_type=args.log_type,
+         crawler=args.crawler,
+         topic_name=args.topic_name,
+         group_id=args.group_id,
+         env=args.env)

+ 188 - 0
piaopiaoquan/piaopiaoquan_main/run_ppqsift_recommend.py

@@ -0,0 +1,188 @@
+# -*- coding: utf-8 -*-
+# @Time: 2023/12/01
+import argparse
+import random
+from mq_http_sdk.mq_client import *
+from mq_http_sdk.mq_consumer import *
+from mq_http_sdk.mq_exception import MQExceptionBase
+import multiprocessing
+
+
+sys.path.append(os.getcwd())
+from common.public import get_consumer, ack_message, task_fun_mq, get_rule_from_mysql
+from common.common import Common
+from common.scheduling_db import MysqlHelper
+from common import AliyunLogger
+from piaopiaoquan.piaopiaoquan.piaopiaoquan_sift import PPQSiftRecommend
+
+
+
+def run(args1, args2, args3, args4, args5):
+    PPQSiftRecommend(
+        log_type=args1,
+        crawler=args2,
+        env=args3,
+        rule_dict=args4,
+        our_uid=args5
+    )
+
+
+def main(log_type, crawler, topic_name, group_id, env):
+    topic_name = "ppqsift_recommend_prod"
+    group_id = "ppqsift_recommend_prod"
+    consumer = get_consumer(topic_name, group_id)
+    # 长轮询表示如果Topic没有消息,则客户端请求会在服务端挂起3秒,3秒内如果有消息可以消费则立即返回响应。
+    # 长轮询时间3秒(最多可设置为30秒)。
+    wait_seconds = 30
+    # 一次最多消费3条(最多可设置为16条)。
+    batch = 1
+    Common.logger(log_type, crawler).info(f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
+                                          f'WaitSeconds:{wait_seconds}\n'
+                                          f'TopicName:{topic_name}\n'
+                                          f'MQConsumer:{group_id}')
+    AliyunLogger.logging(
+        code="1000",
+        platform=log_type,
+        mode=crawler,
+        env=env,
+        message=f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
+                f"WaitSeconds:{wait_seconds}\n"
+                f"TopicName:{topic_name}\n"
+                f"MQConsumer:{group_id}",
+    )
+    while True:
+        try:
+            # 长轮询消费消息。
+            recv_msgs = consumer.consume_message(batch, wait_seconds)
+            for msg in recv_msgs:
+                Common.logger(log_type, crawler).info(f"Receive\n"
+                                                      f"MessageId:{msg.message_id}\n"
+                                                      f"MessageBodyMD5:{msg.message_body_md5}\n"
+                                                      f"MessageTag:{msg.message_tag}\n"
+                                                      f"ConsumedTimes:{msg.consumed_times}\n"
+                                                      f"PublishTime:{msg.publish_time}\n"
+                                                      f"Body:{msg.message_body}\n"
+                                                      f"NextConsumeTime:{msg.next_consume_time}\n"
+                                                      f"ReceiptHandle:{msg.receipt_handle}\n"
+                                                      f"Properties:{msg.properties}")
+                AliyunLogger.logging(
+                    code="1000",
+                    platform=log_type,
+                    mode=crawler,
+                    env=env,
+                    message=f"Receive\n"
+                            f"MessageId:{msg.message_id}\n"
+                            f"MessageBodyMD5:{msg.message_body_md5}\n"
+                            f"MessageTag:{msg.message_tag}\n"
+                            f"ConsumedTimes:{msg.consumed_times}\n"
+                            f"PublishTime:{msg.publish_time}\n"
+                            f"Body:{msg.message_body}\n"
+                            f"NextConsumeTime:{msg.next_consume_time}\n"
+                            f"ReceiptHandle:{msg.receipt_handle}\n"
+                            f"Properties:{msg.properties}",
+                )
+                # ack_mq_message
+                ack_message(log_type=log_type, crawler=crawler, recv_msgs=recv_msgs, consumer=consumer)
+
+                # 处理爬虫业务
+                task_dict = task_fun_mq(msg.message_body)['task_dict']
+                rule_dict = task_fun_mq(msg.message_body)['rule_dict']
+                task_id = task_dict['id']
+                select_user_sql = f"""select * from crawler_user_v3 where task_id={task_id}"""
+                user_list = MysqlHelper.get_values(log_type, crawler, select_user_sql, env, action="")
+                our_uid_list = []
+                for user in user_list:
+                    our_uid_list.append(user["uid"])
+                our_uid = random.choice(our_uid_list)
+                AliyunLogger.logging(
+                    "1000", log_type, crawler, env, f"调度任务:{task_dict}"
+                )
+                # 解析 rule_dict
+                rule_dict = task_fun_mq(msg.message_body)["rule_dict"]
+                AliyunLogger.logging(
+                    "1000", log_type, crawler, env, f"抓取规则:{rule_dict}\n"
+                )
+                Common.logger(log_type, crawler).info(f"用户列表:{user_list}\n")
+                new_r = get_rule_from_mysql(task_id=task_id, log_type=log_type, crawler=crawler, env=env)
+                r_d = {}
+                for item in new_r:
+                    for k, val in item.items():
+                        r_d[k] = val
+
+                process = multiprocessing.Process(
+                    target=run,
+                    args=(log_type, crawler, env, r_d, our_uid)
+                )
+                process.start()
+                AliyunLogger.logging(
+                    code="1003",
+                    platform=log_type,
+                    mode=crawler,
+                    env=env,
+                    message="成功获取信息,启动爬虫,开始一轮抓取",
+                )
+                print("进程开始")
+                while True:
+                    if not process.is_alive():
+                        print("正在重启")
+                        process.terminate()
+                        os.system("adb forward --remove-all")
+                        time.sleep(60)
+                        new_r = get_rule_from_mysql(task_id=task_id, log_type=log_type, crawler=crawler, env=env)
+                        r_d = {}
+                        for item in new_r:
+                            for k, val in item.items():
+                                r_d[k] = val
+                        Common.logger(log_type, crawler).info(f'抓取规则:{r_d}')
+                        Common.logging(log_type, crawler, env, f"抓取规则:{r_d}")
+                        process = multiprocessing.Process(target=run, args=(log_type, crawler, env, r_d, our_uid))
+                        process.start()
+                        AliyunLogger.logging(
+                            code="1004",
+                            platform=log_type,
+                            mode=crawler,
+                            env=env,
+                            message="成功抓取完一轮",
+                        )
+                    time.sleep(60)
+        except MQExceptionBase as err:
+            # Topic中没有消息可消费。
+            if err.type == "MessageNotExist":
+                AliyunLogger.logging(
+                    code="1000",
+                    platform=log_type,
+                    mode=crawler,
+                    env=env,
+                    message=f"No new message! RequestId:{err.req_id}\n",
+                )
+                Common.logging(
+                    log_type=log_type,
+                    crawler=crawler,
+                    env=env,
+                    message=f"No new message! RequestId:{err.req_id}\n",
+                )
+                continue
+            AliyunLogger.logging(
+                code="1000",
+                platform=log_type,
+                mode=crawler,
+                env=env,
+                message=f"Consume Message Fail! Exception:{err}\n",
+            )
+            time.sleep(2)
+            continue
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
+    parser.add_argument('--log_type', type=str)  ## 添加参数,注明参数类型
+    parser.add_argument('--crawler')  ## 添加参数
+    parser.add_argument('--topic_name')  ## 添加参数
+    parser.add_argument('--group_id')  ## 添加参数
+    parser.add_argument('--env')  ## 添加参数
+    args = parser.parse_args()  ### 参数赋值,也可以通过终端赋值
+    main(log_type=args.log_type,
+         crawler=args.crawler,
+         topic_name=args.topic_name,
+         group_id=args.group_id,
+         env=args.env)

+ 0 - 0
shanhuzhufu/__init__.py


+ 27 - 0
shanhuzhufu/crypt/decrypt.py

@@ -0,0 +1,27 @@
+from base64 import b64encode, b64decode
+from Crypto.Cipher import AES
+from Crypto.Util.Padding import pad, unpad
+
+
+class ShanHuZhuFuAes:
+    def __init__(self):
+        self.key = 'xlc2ze7qnqg8xi1d'.encode('utf-8')  # 需要一个bytes类型的key
+        self.iv = self.key  # 在这个例子中,key和iv是相同的
+
+    def encrypt(self, data):
+        cipher = AES.new(self.key, AES.MODE_CBC, self.iv)
+        ct_bytes = cipher.encrypt(pad(data.encode('utf-8'), AES.block_size))
+        ct = b64encode(ct_bytes).decode('utf-8')
+        return ct
+
+    def decrypt(self, data):
+        try:
+            ct = b64decode(data.encode('utf-8'))
+            cipher = AES.new(self.key, AES.MODE_CBC, self.iv)
+            pt = unpad(cipher.decrypt(ct), AES.block_size)
+            return pt.decode('utf-8')
+        except Exception as e:
+            print("Incorrect decryption")
+            return None
+
+

+ 153 - 0
shanhuzhufu/shanhuzhufu_main/run_shzf_recommend.py

@@ -0,0 +1,153 @@
+import argparse
+from mq_http_sdk.mq_client import *
+from mq_http_sdk.mq_consumer import *
+from mq_http_sdk.mq_exception import MQExceptionBase
+
+sys.path.append(os.getcwd())
+from common.public import task_fun_mq, get_consumer, ack_message
+from common.scheduling_db import MysqlHelper
+from common import AliyunLogger
+from shanhuzhufu.shanhuzhufu_recommend import ShanHuZhuFuRecommend
+
+
+def main(log_type, crawler, topic_name, group_id, env):
+    consumer = get_consumer(topic_name, group_id)
+    # 长轮询表示如果Topic没有消息,则客户端请求会在服务端挂起3秒,3秒内如果有消息可以消费则立即返回响应。
+    # 长轮询时间3秒(最多可设置为30秒)。
+    wait_seconds = 30
+    # 一次最多消费3条(最多可设置为16条)。
+    batch = 1
+    AliyunLogger.logging(
+        code="1000",
+        platform=crawler,
+        mode=log_type,
+        env=env,
+        message=f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
+        f"WaitSeconds:{wait_seconds}\n"
+        f"TopicName:{topic_name}\n"
+        f"MQConsumer:{group_id}",
+    )
+    while True:
+        try:
+            # 长轮询消费消息。
+            recv_msgs = consumer.consume_message(batch, wait_seconds)
+            for msg in recv_msgs:
+                AliyunLogger.logging(
+                    code="1000",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message=f"Receive\n"
+                    f"MessageId:{msg.message_id}\n"
+                    f"MessageBodyMD5:{msg.message_body_md5}\n"
+                    f"MessageTag:{msg.message_tag}\n"
+                    f"ConsumedTimes:{msg.consumed_times}\n"
+                    f"PublishTime:{msg.publish_time}\n"
+                    f"Body:{msg.message_body}\n"
+                    f"NextConsumeTime:{msg.next_consume_time}\n"
+                    f"ReceiptHandle:{msg.receipt_handle}\n"
+                    f"Properties:{msg.properties}",
+                )
+                # ack_mq_message
+                ack_message(
+                    log_type=log_type,
+                    crawler=crawler,
+                    recv_msgs=recv_msgs,
+                    consumer=consumer,
+                )
+                # 解析 task_dict
+                task_dict = task_fun_mq(msg.message_body)["task_dict"]
+                AliyunLogger.logging(
+                    code="1000",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message="f调度任务:{task_dict}",
+                )
+                # 解析 rule_dict
+                rule_dict = task_fun_mq(msg.message_body)["rule_dict"]
+                AliyunLogger.logging(
+                    code="1000",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message=f"抓取规则:{rule_dict}\n",
+                )
+                # 解析 user_list
+                task_id = task_dict["id"]
+                select_user_sql = (
+                    f"""select * from crawler_user_v3 where task_id={task_id}"""
+                )
+                user_list = MysqlHelper.get_values(
+                    log_type, crawler, select_user_sql, env, action=""
+                )
+                AliyunLogger.logging(
+                    code="1003",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message="开始抓取"
+                )
+                AliyunLogger.logging(
+                    code="1000",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message="开始抓取祝珊瑚祝福——推荐",
+                )
+                main_process = ShanHuZhuFuRecommend(
+                    platform=crawler,
+                    mode=log_type,
+                    rule_dict=rule_dict,
+                    user_list=user_list,
+                    env=env
+                )
+                main_process.get_video_list()
+                AliyunLogger.logging(
+                    code="1000",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message="完成抓取——珊瑚祝福",
+                )
+                AliyunLogger.logging(
+                    code="1004", platform=crawler, mode=log_type, env=env,message="结束一轮抓取"
+                )
+
+        except MQExceptionBase as err:
+            # Topic中没有消息可消费。
+            if err.type == "MessageNotExist":
+                AliyunLogger.logging(
+                    code="2000",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message=f"No new message! RequestId:{err.req_id}\n",
+                )
+                continue
+            AliyunLogger.logging(
+                code="2000",
+                platform=crawler,
+                mode=log_type,
+                env=env,
+                message=f"Consume Message Fail! Exception:{err}\n",
+            )
+            time.sleep(2)
+            continue
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
+    parser.add_argument("--log_type", type=str)  ## 添加参数,注明参数类型
+    parser.add_argument("--crawler")  ## 添加参数
+    parser.add_argument("--topic_name")  ## 添加参数
+    parser.add_argument("--group_id")  ## 添加参数
+    parser.add_argument("--env")  ## 添加参数
+    args = parser.parse_args()  ### 参数赋值,也可以通过终端赋值
+    main(
+        log_type=args.log_type,
+        crawler=args.crawler,
+        topic_name=args.topic_name,
+        group_id=args.group_id,
+        env=args.env,
+    )

+ 1 - 0
shanhuzhufu/shanhuzhufu_recommend/__init__.py

@@ -0,0 +1 @@
+from .shanhuzhufu_recommend_scheduling import ShanHuZhuFuRecommend

+ 103 - 0
shanhuzhufu/shanhuzhufu_recommend/shanhuzhufu_recommend_dev.py

@@ -0,0 +1,103 @@
+import os
+import json
+import random
+import sys
+import time
+import uuid
+
+import requests
+import datetime
+
+sys.path.append(os.getcwd())
+from common.video_item import VideoItem
+from common import AliyunLogger, tunnel_proxies
+from common.pipeline import PiaoQuanPipelineTest
+from common.mq import MQ
+from shanhuzhufu.crypt.decrypt import ShanHuZhuFuAes as AES
+
+
+class ShanHuZhuFuRecommend(object):
+    def __init__(self, platform, mode, rule_dict, user_list, env):
+        self.platform = platform
+        self.mode = mode
+        self.rule_dict = rule_dict
+        self.user_list = user_list
+        self.env = env
+        self.download_cnt = 0
+        self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
+        self.limit_flag = False
+        self.cryptor = AES()
+
+    def get_video_list(self):
+        base_url = "https://shanhu.nnapi.cn/videos/api.videos/getItem"
+        headers = {
+            'Host': 'shanhu.nnapi.cn',
+            'xweb_xhr': '1',
+            'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.5(0x13080510)XWEB/1100',
+            'content-type': 'application/json',
+            'accept': '*/*',
+            'sec-fetch-site': 'cross-site',
+            'sec-fetch-mode': 'cors',
+            'sec-fetch-dest': 'empty',
+            'referer': 'https://servicewechat.com/wxc2088c70f666b45e/2/page-frame.html',
+            'accept-language': 'en-US,en;q=0.9',
+            'Cookie': 'PHPSESSID=562dc39e8e68ad3e76c237f687bd049b; lang=zh-cn'
+        }
+        for i in range(100):
+            params = {
+                "mark": "",
+                "page": i + 1
+            }
+            response = requests.get(url=base_url, headers=headers, params=params, proxies=tunnel_proxies())
+            encrypted_info = response.json()['data']
+            decrypted_info = json.loads(self.cryptor.decrypt(data=encrypted_info))
+            # print(json.dumps(decrypted_info, ensure_ascii=False, indent=4))
+            video_list = decrypted_info['list']
+            for index, video_obj in enumerate(video_list):
+                self.process_video_obj(video_obj)
+
+    def process_video_obj(self, video_obj):
+        trace_id = self.platform + str(uuid.uuid1())
+        our_user = random.choice(self.user_list)
+        publish_time_stamp = datetime.datetime.strptime(video_obj['create_at'], "%Y-%m-%d %H:%M:%S").timestamp()
+        item = VideoItem()
+        item.add_video_info("user_id", our_user['uid'])
+        item.add_video_info("user_name", our_user['nick_name'])
+        item.add_video_info("video_id", video_obj['id'])
+        item.add_video_info("video_title", video_obj['name'])
+        item.add_video_info("publish_time_str", video_obj['create_at'])
+        item.add_video_info("publish_time_stamp", int(publish_time_stamp))
+        item.add_video_info("video_url", video_obj['cover'])
+        item.add_video_info("cover_url", video_obj['cover'] + '&vframe/png/offset/1/w/200')
+        item.add_video_info("like_cnt", video_obj['num_like'])
+        item.add_video_info("play_cnt", video_obj['num_read'])
+        item.add_video_info("comment_cnt", video_obj['num_comment'])
+        item.add_video_info("out_video_id", video_obj['id'])
+        item.add_video_info("platform", self.platform)
+        item.add_video_info("strategy", self.mode)
+        item.add_video_info("session", "{}-{}".format(self.platform, int(time.time())))
+        mq_obj = item.produce_item()
+        pipeline = PiaoQuanPipelineTest(
+            platform=self.platform,
+            mode=self.mode,
+            rule_dict=self.rule_dict,
+            env=self.env,
+            item=mq_obj,
+            trace_id=trace_id,
+        )
+        if pipeline.process_item():
+            self.download_cnt += 1
+            print(json.dumps(mq_obj, ensure_ascii=False, indent=4))
+            if self.download_cnt >= int(self.rule_dict.get("videos_cnt", {}).get("min", 200)):
+                self.limit_flag = True
+
+
+if __name__ == '__main__':
+    S = ShanHuZhuFuRecommend(
+        platform="shanhuzhufu",
+        mode="recommend",
+        env="dev",
+        rule_dict={},
+        user_list=[{'nick_name': "Ivring", 'uid': "1997"}, {'nick_name': "paul", 'uid': "1998"}]
+    )
+    S.get_video_list()

+ 158 - 0
shanhuzhufu/shanhuzhufu_recommend/shanhuzhufu_recommend_scheduling.py

@@ -0,0 +1,158 @@
+import os
+import json
+import random
+import sys
+import time
+import uuid
+
+import requests
+import datetime
+
+sys.path.append(os.getcwd())
+from common.video_item import VideoItem
+from common import PiaoQuanPipeline, AliyunLogger, tunnel_proxies
+from common.mq import MQ
+from shanhuzhufu.crypt.decrypt import ShanHuZhuFuAes as AES
+
+
+class ShanHuZhuFuRecommend(object):
+    def __init__(self, platform, mode, rule_dict, user_list, env):
+        self.platform = platform
+        self.mode = mode
+        self.rule_dict = rule_dict
+        self.user_list = user_list
+        self.env = env
+        self.download_cnt = 0
+        self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
+        self.limit_flag = False
+        self.cryptor = AES()
+
+    def get_video_list(self):
+        base_url = "https://shanhu.nnapi.cn/videos/api.videos/getItem"
+        headers = {
+            "Host": "shanhu.nnapi.cn",
+            "xweb_xhr": "1",
+            "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.5(0x13080510)XWEB/1100",
+            "content-type": "application/json",
+            "accept": "*/*",
+            "sec-fetch-site": "cross-site",
+            "sec-fetch-mode": "cors",
+            "sec-fetch-dest": "empty",
+            "referer": "https://servicewechat.com/wxc2088c70f666b45e/2/page-frame.html",
+            "accept-language": "en-US,en;q=0.9",
+            "Cookie": "PHPSESSID=562dc39e8e68ad3e76c237f687bd049b; lang=zh-cn",
+        }
+        for i in range(100):
+            time.sleep(random.randint(1, 10))
+            try:
+                if self.limit_flag:
+                    AliyunLogger.logging(
+                        code="2000",
+                        platform=self.platform,
+                        mode=self.mode,
+                        env=self.env,
+                        message="本轮已经抓取到足够的数据,自动退出\t{}".format(self.download_cnt),
+                    )
+                    return
+                else:
+                    params = {"mark": "", "page": i + 1}
+                    response = requests.get(
+                        url=base_url,
+                        headers=headers,
+                        params=params,
+                        proxies=tunnel_proxies(),
+                    )
+                    encrypted_info = response.json()["data"]
+                    decrypted_info = json.loads(
+                        self.cryptor.decrypt(data=encrypted_info)
+                    )
+                    video_list = decrypted_info["list"]
+                    for index, video_obj in enumerate(video_list, 1):
+                        try:
+                            AliyunLogger.logging(
+                                code="1001",
+                                platform=self.platform,
+                                mode=self.mode,
+                                env=self.env,
+                                message="扫描到一条视频",
+                                data=video_obj,
+                            )
+                            self.process_video_obj(video_obj)
+                        except Exception as e:
+                            AliyunLogger.logging(
+                                code="3000",
+                                platform=self.platform,
+                                mode=self.mode,
+                                env=self.env,
+                                data=video_obj,
+                                message="抓取第{}条的时候出现问题, 报错信息是{}".format(index, e),
+                            )
+            except Exception as e:
+                AliyunLogger.logging(
+                    code="3000",
+                    platform=self.platform,
+                    mode=self.mode,
+                    env=self.env,
+                    message="抓取第{}页时候出现错误, 报错信息是{}".format(i + 1, e),
+                )
+
+    def process_video_obj(self, video_obj):
+        trace_id = self.platform + str(uuid.uuid1())
+        our_user = random.choice(self.user_list)
+        publish_time_stamp = datetime.datetime.strptime(
+            video_obj["create_at"], "%Y-%m-%d %H:%M:%S"
+        ).timestamp()
+        item = VideoItem()
+        item.add_video_info("user_id", our_user["uid"])
+        item.add_video_info("user_name", our_user["nick_name"])
+        item.add_video_info("video_id", video_obj["id"])
+        item.add_video_info("video_title", video_obj["name"])
+        item.add_video_info("publish_time_str", video_obj["create_at"])
+        item.add_video_info("publish_time_stamp", int(publish_time_stamp))
+        item.add_video_info("video_url", video_obj["cover"])
+        item.add_video_info(
+            "cover_url", video_obj["cover"] + "&vframe/png/offset/1/w/200"
+        )
+        item.add_video_info("like_cnt", video_obj["num_like"])
+        item.add_video_info("play_cnt", video_obj["num_read"])
+        item.add_video_info("comment_cnt", video_obj["num_comment"])
+        item.add_video_info("out_video_id", video_obj["id"])
+        item.add_video_info("platform", self.platform)
+        item.add_video_info("strategy", self.mode)
+        item.add_video_info("session", "{}-{}".format(self.platform, int(time.time())))
+        mq_obj = item.produce_item()
+        pipeline = PiaoQuanPipeline(
+            platform=self.platform,
+            mode=self.mode,
+            rule_dict=self.rule_dict,
+            env=self.env,
+            item=mq_obj,
+            trace_id=trace_id,
+        )
+        if pipeline.process_item():
+            self.download_cnt += 1
+            # print(mq_obj)
+            self.mq.send_msg(mq_obj)
+            AliyunLogger.logging(
+                code="1002",
+                platform=self.platform,
+                mode=self.mode,
+                env=self.env,
+                message="成功发送至 ETL",
+                data=mq_obj,
+            )
+            if self.download_cnt >= int(
+                self.rule_dict.get("videos_cnt", {}).get("min", 200)
+            ):
+                self.limit_flag = True
+
+
+if __name__ == '__main__':
+    S = ShanHuZhuFuRecommend(
+        platform="shanhuzhufu",
+        mode="recommend",
+        env="dev",
+        rule_dict={},
+        user_list=[{'nick_name': "Ivring", 'uid': "1997"}, {'nick_name': "paul", 'uid': "1998"}]
+    )
+    S.get_video_list()

+ 7 - 7
xiaoniangaoplus/xiaoniangaoplus/xiaoniangao_plus_get_userid.py

@@ -38,7 +38,7 @@ class XiaoNianGaoPlusRecommend:
 
     def __init__(self, log_type, crawler, env, rule_dict, our_uid):
         self.mq = None
-        self.platform = "小年糕"
+        self.platform = "小年糕+主页账号ID"
         self.download_cnt = 0
         self.element_list = []
         self.count = 0
@@ -49,9 +49,9 @@ class XiaoNianGaoPlusRecommend:
         self.rule_dict = rule_dict
         self.our_uid = our_uid
         if self.env == "dev":
-            chromedriverExecutable = "/Users/tzld/Downloads/chromedriver_V111/chromedriver"
+            chromedriverExecutable = "/Users/piaoquan/Downloads/chromedriver"
         else:
-            chromedriverExecutable = "/Users/tzld/Downloads/chromedriver_v111/chromedriver"
+            chromedriverExecutable = "/Users/piaoquan/Downloads/chromedriver"
 
         Common.logger(self.log_type, self.crawler).info("启动微信")
         # 微信的配置文件
@@ -187,14 +187,14 @@ class XiaoNianGaoPlusRecommend:
         Common.logger(self.log_type, self.crawler).info("点击标题,进入视频详情页")
         self.get_video_url(video_title_element)
 
-        video_mid_elements = self.search_elements("//wx-view[@class='bar--navBar-content-capsule']")
+        video_mid_elements = self.search_elements("//wx-view[@class='bar--navBar-content-capsule-wrap']")
         mid = int(video_mid_elements[0].get_attribute("data-mid"))
         repeat_video_id= self.repeat_video_id(mid)
         data_list = []
         if repeat_video_id != 0:
             Common.logger(self.log_type, self.crawler).info(f"该用户已经存在")
-            status = 1
-            self.insert_user(mid, user_name, data_list, status)
+            # status = 0
+            # self.insert_user(mid, user_name, data_list, status)
 
             self.driver.press_keycode(AndroidKey.BACK)
             return
@@ -282,7 +282,7 @@ class XiaoNianGaoPlusRecommend:
 
 
     def repeat_video_id(self,mid):
-        sql = f"SELECT `uid`  FROM `crawler_user_v3` WHERE  `source` = 'xiaoniangao'  and `uid` = {mid}"
+        sql = f"SELECT `link`  FROM `crawler_user_v3` WHERE  `source` = 'xiaoniangao'  and `link` = {mid}"
         repeat_video_id = MysqlHelper.get_values(self.log_type, self.crawler, sql, self.env)
         return len(repeat_video_id)
 

+ 20 - 16
xiaoniangaoplus/xiaoniangaoplus/xiaoniangao_plus_scheduling2.py

@@ -3,6 +3,7 @@
 # @Time: 2023/9/27
 import json
 import os
+import random
 import sys
 import time
 import uuid
@@ -39,7 +40,7 @@ class XiaoNianGaoPlusRecommend:
 
     def __init__(self, log_type, crawler, env, rule_dict, our_uid):
         self.mq = None
-        self.platform = "小年糕"
+        self.platform = "xiaoniangaoplus"
         self.download_cnt = 0
         self.element_list = []
         self.count = 0
@@ -50,17 +51,17 @@ class XiaoNianGaoPlusRecommend:
         self.rule_dict = rule_dict
         self.our_uid = our_uid
         if self.env == "dev":
-            chromedriverExecutable = "/Users/tzld/Downloads/chromedriver_V111/chromedriver"
+            chromedriverExecutable = "/Users/piaoquan/Downloads/chromedriver"
         else:
-            chromedriverExecutable = "/Users/a123456/Downloads/chromedriver_v111/chromedriver"
+            chromedriverExecutable = "/Users/piaoquan/Downloads/chromedriver"
 
         Common.logger(self.log_type, self.crawler).info("启动微信")
-        Common.logging(self.log_type, self.crawler, self.env, '启动微信')
+        # Common.logging(self.log_type, self.crawler, self.env, '启动微信')
         # 微信的配置文件
         caps = {
             "platformName": "Android",
             "devicesName": "Android",
-            # "platformVersion": "13",
+            # "platformVersion": "11",
             # "udid": "emulator-5554",
             "appPackage": "com.tencent.mm",
             "appActivity": ".ui.LauncherUI",
@@ -80,13 +81,14 @@ class XiaoNianGaoPlusRecommend:
         }
         try:
             self.driver = webdriver.Remote("http://localhost:4723/wd/hub", caps)
-        except:
+        except Exception as e:
+            print(e)
             AliyunLogger.logging(
                 code="3002",
                 platform=self.platform,
                 mode=self.log_type,
                 env=self.env,
-                message="appium 启动异常"
+                message=f'appium 启动异常: {e}'
             )
             return
         self.driver.implicitly_wait(30)
@@ -95,7 +97,7 @@ class XiaoNianGaoPlusRecommend:
             try:
                 if self.driver.find_elements(By.ID, "com.tencent.mm:id/f2s"):
                     Common.logger(self.log_type, self.crawler).info("微信启动成功")
-                    Common.logging(self.log_type, self.crawler, self.env, '微信启动成功')
+                    # Common.logging(self.log_type, self.crawler, self.env, '微信启动成功')
                     AliyunLogger.logging(
                         code="1000",
                         platform=self.platform,
@@ -106,7 +108,7 @@ class XiaoNianGaoPlusRecommend:
                     break
                 elif self.driver.find_element(By.ID, "com.android.systemui:id/dismiss_view"):
                     Common.logger(self.log_type, self.crawler).info("发现并关闭系统下拉菜单")
-                    Common.logging(self.log_type, self.crawler, self.env, '发现并关闭系统下拉菜单')
+                    # Common.logging(self.log_type, self.crawler, self.env, '发现并关闭系统下拉菜单')
                     AliyunLogger.logging(
                         code="1000",
                         platform=self.platform,
@@ -171,7 +173,7 @@ class XiaoNianGaoPlusRecommend:
             try:
                 self.driver.find_element(By.XPATH, xpath)
                 Common.logger(self.log_type, self.crawler).info("切换到WebView成功\n")
-                Common.logging(self.log_type, self.crawler, self.env, '切换到WebView成功\n')
+                # Common.logging(self.log_type, self.crawler, self.env, '切换到WebView成功\n')
                 AliyunLogger.logging(
                     code="1000",
                     platform=self.platform,
@@ -211,8 +213,8 @@ class XiaoNianGaoPlusRecommend:
             time.sleep(10)
             video_url_elements = self.search_elements(
                 '//wx-video[@class="dynamic-index--video-item dynamic-index--video"]')
-            if video_url_elements:
-                return video_url_elements[0].get_attribute("src")
+            Common.logger(self.log_type, self.crawler).info(f"{video_url_elements[0].get_attribute('src')}")
+            return video_url_elements[0].get_attribute('src')
 
     def parse_detail(self, index):
         page_source = self.driver.page_source
@@ -310,7 +312,7 @@ class XiaoNianGaoPlusRecommend:
             if video_title_element is None:
                 return
             Common.logger(self.log_type, self.crawler).info("点击标题,进入视频详情页")
-            Common.logging(self.log_type, self.crawler, self.env, "点击标题,进入视频详情页")
+            # Common.logging(self.log_type, self.crawler, self.env, "点击标题,进入视频详情页")
             AliyunLogger.logging(
                 code="1000",
                 platform=self.platform,
@@ -335,12 +337,14 @@ class XiaoNianGaoPlusRecommend:
             # print(video_dict)
             self.download_cnt += 1
             self.driver.press_keycode(AndroidKey.BACK)
+            # self.driver.back()
             time.sleep(5)
 
     def get_video_info(self, video_element):
         try:
             self.get_video_info_2(video_element)
         except Exception as e:
+            self.driver.press_keycode(AndroidKey.BACK)
             Common.logger(self.log_type, self.crawler).error(f"抓取单条视频异常:{e}\n")
             AliyunLogger.logging(
                 code="3001",
@@ -360,7 +364,7 @@ class XiaoNianGaoPlusRecommend:
         page = 0
         if self.search_elements('//*[@class="list-list--list"]') is None:
             Common.logger(self.log_type, self.crawler).info("窗口已销毁\n")
-            Common.logging(self.log_type, self.crawler, self.env, '窗口已销毁\n')
+            # Common.logging(self.log_type, self.crawler, self.env, '窗口已销毁\n')
             AliyunLogger.logging(
                 code="3000",
                 platform=self.platform,
@@ -386,7 +390,7 @@ class XiaoNianGaoPlusRecommend:
         print("下滑完成")
         # time.sleep(100)
         Common.logger(self.log_type, self.crawler).info("已抓取完一组,休眠 5 秒\n")
-        Common.logging(self.log_type, self.crawler, self.env, "已抓取完一组,休眠 5 秒\n")
+        # Common.logging(self.log_type, self.crawler, self.env, "已抓取完一组,休眠 5 秒\n")
         AliyunLogger.logging(
             code="1000",
             platform=self.platform,
@@ -403,7 +407,7 @@ def run():
                   "favorite_cnt": {"min": 0, "max": 0},
                   "videos_cnt": {"min": 5000, "max": 0},
                   "share_cnt": {"min": 0, "max": 0}}
-    XiaoNianGaoPlusRecommend("recommend", "xiaoniangao", "dev", rule_dict1, 6267141)
+    XiaoNianGaoPlusRecommend("recommend", "xiaoniangaoplus", "prod", rule_dict1, [64120158, 64120157, 63676778])
 
 
 if __name__ == "__main__":

+ 20 - 0
xiaoniangaoplus/xiaoniangaoplus/xng_scheduling.py

@@ -0,0 +1,20 @@
+import os
+import time
+import schedule
+
+
+def run_xng_plus():
+    # os.system("ps aux | grep xiaoniangao | grep -v grep | awk '{ print $2}' | xargs kill -9")
+    os.system("python3 xiaoniangao_plus_scheduling2.py")
+
+
+def run_xng_rule():
+    os.system("ps aux | grep xiaoniangao | grep -v grep | awk '{ print $2}' | xargs kill -9")
+    os.system("python3 xiaoniangao_plus_get_userid.py")
+
+
+if __name__ == "__main__":
+    schedule.every().day.at("19:34").do(run_xng_plus)
+    while True:
+        schedule.run_pending()
+        time.sleep(1)

+ 74 - 29
xiaoniangaoplus/xiaoniangaoplus_main/run_xngplus_recommend.py

@@ -8,12 +8,14 @@ from mq_http_sdk.mq_consumer import *
 from mq_http_sdk.mq_exception import MQExceptionBase
 import multiprocessing
 
+
 sys.path.append(os.getcwd())
 from common.public import get_consumer, ack_message, task_fun_mq, get_rule_from_mysql
 from common.common import Common
 from common.scheduling_db import MysqlHelper
 # from xiaoniangaoplus.xiaoniangaoplus.xiaoniangao_plus_scheduling import XiaoNianGaoPlusRecommend
 from xiaoniangaoplus.xiaoniangaoplus.xiaoniangao_plus_scheduling2 import XiaoNianGaoPlusRecommend
+from common import AliyunLogger
 
 
 def run(args1, args2, args3, args4, args5):
@@ -27,6 +29,8 @@ def run(args1, args2, args3, args4, args5):
 
 
 def main(log_type, crawler, topic_name, group_id, env):
+    topic_name = "xngplus_recommend_prod"
+    group_id = "xngplus_recommend_prod"
     consumer = get_consumer(topic_name, group_id)
     # 长轮询表示如果Topic没有消息,则客户端请求会在服务端挂起3秒,3秒内如果有消息可以消费则立即返回响应。
     # 长轮询时间3秒(最多可设置为30秒)。
@@ -37,10 +41,16 @@ def main(log_type, crawler, topic_name, group_id, env):
                                           f'WaitSeconds:{wait_seconds}\n'
                                           f'TopicName:{topic_name}\n'
                                           f'MQConsumer:{group_id}')
-    Common.logging(log_type, crawler, env, f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
-                                           f'WaitSeconds:{wait_seconds}\n'
-                                           f'TopicName:{topic_name}\n'
-                                           f'MQConsumer:{group_id}')
+    AliyunLogger.logging(
+        code="1000",
+        platform=log_type,
+        mode=crawler,
+        env=env,
+        message=f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
+                f"WaitSeconds:{wait_seconds}\n"
+                f"TopicName:{topic_name}\n"
+                f"MQConsumer:{group_id}",
+    )
     while True:
         try:
             # 长轮询消费消息。
@@ -57,16 +67,22 @@ def main(log_type, crawler, topic_name, group_id, env):
                                                       f"NextConsumeTime:{msg.next_consume_time}\n"
                                                       f"ReceiptHandle:{msg.receipt_handle}\n"
                                                       f"Properties:{msg.properties}")
-                Common.logging(log_type, crawler, env, f"Receive\n"
-                                                       f"MessageId:{msg.message_id}\n"
-                                                       f"MessageBodyMD5:{msg.message_body_md5}\n"
-                                                       f"MessageTag:{msg.message_tag}\n"
-                                                       f"ConsumedTimes:{msg.consumed_times}\n"
-                                                       f"PublishTime:{msg.publish_time}\n"
-                                                       f"Body:{msg.message_body}\n"
-                                                       f"NextConsumeTime:{msg.next_consume_time}\n"
-                                                       f"ReceiptHandle:{msg.receipt_handle}\n"
-                                                       f"Properties:{msg.properties}")
+                AliyunLogger.logging(
+                    code="1000",
+                    platform=log_type,
+                    mode=crawler,
+                    env=env,
+                    message=f"Receive\n"
+                            f"MessageId:{msg.message_id}\n"
+                            f"MessageBodyMD5:{msg.message_body_md5}\n"
+                            f"MessageTag:{msg.message_tag}\n"
+                            f"ConsumedTimes:{msg.consumed_times}\n"
+                            f"PublishTime:{msg.publish_time}\n"
+                            f"Body:{msg.message_body}\n"
+                            f"NextConsumeTime:{msg.next_consume_time}\n"
+                            f"ReceiptHandle:{msg.receipt_handle}\n"
+                            f"Properties:{msg.properties}",
+                )
                 # ack_mq_message
                 ack_message(log_type=log_type, crawler=crawler, recv_msgs=recv_msgs, consumer=consumer)
 
@@ -80,28 +96,34 @@ def main(log_type, crawler, topic_name, group_id, env):
                 for user in user_list:
                     our_uid_list.append(user["uid"])
                 our_uid = random.choice(our_uid_list)
-                Common.logger(log_type, crawler).info(f"调度任务:{task_dict}")
-                Common.logging(log_type, crawler, env, f"调度任务:{task_dict}")
-                # Common.logger(log_type, crawler).info(f"抓取规则:{rule_dict}")
-                # Common.logging(log_type, crawler, env, f"抓取规则:{rule_dict}")
+                AliyunLogger.logging(
+                    "1000", log_type, crawler, env, f"调度任务:{task_dict}"
+                )
+                # 解析 rule_dict
+                rule_dict = task_fun_mq(msg.message_body)["rule_dict"]
+                AliyunLogger.logging(
+                    "1000", log_type, crawler, env, f"抓取规则:{rule_dict}\n"
+                )
                 Common.logger(log_type, crawler).info(f"用户列表:{user_list}\n")
-                Common.logger(log_type, crawler).info(f'开始抓取:{task_dict["taskName"]}\n')
-                Common.logging(log_type, crawler, env, f'开始抓取:{task_dict["taskName"]}\n')
                 new_r = get_rule_from_mysql(task_id=task_id, log_type=log_type, crawler=crawler, env=env)
-                # Common.logger(log_type, crawler).info(f'rule_dict:{new_r}\n')
                 r_d = {}
                 for item in new_r:
                     for k, val in item.items():
                         r_d[k] = val
-                Common.logger(log_type, crawler).info(f"抓取规则:{r_d}")
-                Common.logging(log_type, crawler, env, f"抓取规则:{r_d}")
+
                 process = multiprocessing.Process(
                     target=run,
                     args=(log_type, crawler, env, r_d, our_uid)
                 )
                 process.start()
+                AliyunLogger.logging(
+                    code="1003",
+                    platform=log_type,
+                    mode=crawler,
+                    env=env,
+                    message="成功获取信息,启动爬虫,开始一轮抓取",
+                )
                 print("进程开始")
-
                 while True:
                     if not process.is_alive():
                         print("正在重启")
@@ -117,7 +139,15 @@ def main(log_type, crawler, topic_name, group_id, env):
                         Common.logging(log_type, crawler, env, f"抓取规则:{r_d}")
                         process = multiprocessing.Process(target=run, args=(log_type, crawler, env, r_d, our_uid))
                         process.start()
+                        AliyunLogger.logging(
+                            code="1004",
+                            platform=log_type,
+                            mode=crawler,
+                            env=env,
+                            message="成功抓取完一轮",
+                        )
                     time.sleep(60)
+
                 # XiaoNianGaoPlusRecommend.start_wechat(log_type=log_type,
                 #                                       crawler=crawler,
                 #                                       rule_dict=rule_dict,
@@ -133,12 +163,27 @@ def main(log_type, crawler, topic_name, group_id, env):
         except MQExceptionBase as err:
             # Topic中没有消息可消费。
             if err.type == "MessageNotExist":
-                Common.logger(log_type, crawler).info(f"No new message! RequestId:{err.req_id}\n")
-                Common.logging(log_type, crawler, env, f"No new message! RequestId:{err.req_id}\n")
+                AliyunLogger.logging(
+                    code="1000",
+                    platform=log_type,
+                    mode=crawler,
+                    env=env,
+                    message=f"No new message! RequestId:{err.req_id}\n",
+                )
+                Common.logging(
+                    log_type=log_type,
+                    crawler=crawler,
+                    env=env,
+                    message=f"No new message! RequestId:{err.req_id}\n",
+                )
                 continue
-
-            Common.logger(log_type, crawler).info(f"Consume Message Fail! Exception:{err}\n")
-            Common.logging(log_type, crawler, env, f"Consume Message Fail! Exception:{err}\n")
+            AliyunLogger.logging(
+                code="1000",
+                platform=log_type,
+                mode=crawler,
+                env=env,
+                message=f"Consume Message Fail! Exception:{err}\n",
+            )
             time.sleep(2)
             continue
 

+ 2 - 0
xiaoniangaoplus/xiaoniangaoplus_main/run_xngrule_recommend.py

@@ -29,6 +29,8 @@ def run(args1, args2, args3, args4, args5):
 class Main:
     @classmethod
     def main(cls, log_type, crawler, topic_name, group_id, env):
+        topic_name = "xngrule_recommend_prod"
+        group_id = "xngrule_recommend_prod"
         consumer = get_consumer(topic_name, group_id)
         # 长轮询表示如果Topic没有消息,则客户端请求会在服务端挂起3秒,3秒内如果有消息可以消费则立即返回响应。
         # 长轮询时间3秒(最多可设置为30秒)。

+ 1 - 1
zhongmiaoyinxin/zhongmiaoyinxin_recommend/zhongmiaoyinxin_recommend_new.py

@@ -37,7 +37,7 @@ class ZMYXRecommend:
         if self.env == "dev":
             chromedriverExecutable = "/Users/luojunhui/Downloads/chromedriver_V111/chromedriver"
         else:
-            chromedriverExecutable = '/Users/luojunhui/Downloads/chromedriver_V111/chromedriver'  # Mac 爬虫机器
+            chromedriverExecutable = "/Users/piaoquan/Downloads/chromedriver"
         # 微信的配置文件
         caps = {
             "platformName": "Android",  # 手机操作系统 Android / iOS

+ 50 - 18
zhufuquanzi/zhufuquanzi_main/run_zfqz_recommend.py

@@ -10,11 +10,13 @@ from mq_http_sdk.mq_client import *
 from mq_http_sdk.mq_consumer import *
 from mq_http_sdk.mq_exception import MQExceptionBase
 
+
 sys.path.append(os.getcwd())
 from common.common import Common
 from common.public import get_consumer, ack_message, task_fun_mq
 from common.scheduling_db import MysqlHelper
 from zhufuquanzi.zhufuquanzi_recommend.zhufuquanzi_recommend_new import ZFQZRecommend
+from common import AliyunLogger
 
 
 def run(args1, args2, args3, args4, args5):
@@ -28,6 +30,8 @@ def run(args1, args2, args3, args4, args5):
 class ZFQZMain:
     @classmethod
     def zhufuquanzi_main(cls, log_type, crawler, topic_name, group_id, env):
+        group_id = "zfqz_recommend_prod"
+        topic_name = "zfqz_recommend_prod"
         consumer = get_consumer(topic_name, group_id)
         # 长轮询表示如果Topic没有消息,则客户端请求会在服务端挂起3秒,3秒内如果有消息可以消费则立即返回响应。
         # 长轮询时间3秒(最多可设置为30秒)。
@@ -38,10 +42,16 @@ class ZFQZMain:
                                               f'WaitSeconds:{wait_seconds}\n'
                                               f'TopicName:{topic_name}\n'
                                               f'MQConsumer:{group_id}')
-        Common.logging(log_type, crawler, env, f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
-                                               f'WaitSeconds:{wait_seconds}\n'
-                                               f'TopicName:{topic_name}\n'
-                                               f'MQConsumer:{group_id}')
+        AliyunLogger.logging(
+            code="1000",
+            platform=log_type,
+            mode=crawler,
+            env=env,
+            message=f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
+                    f"WaitSeconds:{wait_seconds}\n"
+                    f"TopicName:{topic_name}\n"
+                    f"MQConsumer:{group_id}",
+        )
         while True:
             try:
                 # 长轮询消费消息。
@@ -57,16 +67,22 @@ class ZFQZMain:
                                                           f"NextConsumeTime:{msg.next_consume_time}\n"
                                                           f"ReceiptHandle:{msg.receipt_handle}\n"
                                                           f"Properties:{msg.properties}")
-                    Common.logging(log_type, crawler, env, f"Receive\n"
-                                                           f"MessageId:{msg.message_id}\n"
-                                                           f"MessageBodyMD5:{msg.message_body_md5}\n"
-                                                           f"MessageTag:{msg.message_tag}\n"
-                                                           f"ConsumedTimes:{msg.consumed_times}\n"
-                                                           f"PublishTime:{msg.publish_time}\n"
-                                                           f"Body:{msg.message_body}\n"
-                                                           f"NextConsumeTime:{msg.next_consume_time}\n"
-                                                           f"ReceiptHandle:{msg.receipt_handle}\n"
-                                                           f"Properties:{msg.properties}")
+                    AliyunLogger.logging(
+                        code="1000",
+                        platform=log_type,
+                        mode=crawler,
+                        env=env,
+                        message=f"Receive\n"
+                                f"MessageId:{msg.message_id}\n"
+                                f"MessageBodyMD5:{msg.message_body_md5}\n"
+                                f"MessageTag:{msg.message_tag}\n"
+                                f"ConsumedTimes:{msg.consumed_times}\n"
+                                f"PublishTime:{msg.publish_time}\n"
+                                f"Body:{msg.message_body}\n"
+                                f"NextConsumeTime:{msg.next_consume_time}\n"
+                                f"ReceiptHandle:{msg.receipt_handle}\n"
+                                f"Properties:{msg.properties}",
+                    )
                     # ack_mq_message
                     ack_message(log_type=log_type, crawler=crawler, recv_msgs=recv_msgs, consumer=consumer)
 
@@ -81,19 +97,28 @@ class ZFQZMain:
                         our_uid_list.append(user["uid"])
                     our_uid = random.choice(our_uid_list)
                     Common.logger(log_type, crawler).info(f"调度任务:{task_dict}")
-                    Common.logging(log_type, crawler, env, f"调度任务:{task_dict}")
+                    AliyunLogger.logging(
+                        "1000", log_type, crawler, env, f"调度任务:{task_dict}"
+                    )
                     Common.logger(log_type, crawler).info(f"抓取规则:{rule_dict}")
-                    Common.logging(log_type, crawler, env, f"抓取规则:{rule_dict}")
+                    AliyunLogger.logging(
+                        "1000", log_type, crawler, env, f"抓取规则:{rule_dict}\n"
+                    )
                     Common.logger(log_type, crawler).info(f"用户列表:{user_list}\n")
-                    Common.logging(log_type, crawler, env, f"用户列表:{user_list}\n")
                     Common.logger(log_type, crawler).info(f'开始抓取:{task_dict["taskName"]}\n')
-                    Common.logging(log_type, crawler, env, f'开始抓取:{task_dict["taskName"]}\n')
 
                     process = multiprocessing.Process(
                         target=run,
                         args=(log_type, crawler, rule_dict, our_uid, env)
                     )
                     process.start()
+                    AliyunLogger.logging(
+                        code="1003",
+                        platform=log_type,
+                        mode=crawler,
+                        env=env,
+                        message="成功获取信息,启动爬虫,开始一轮抓取",
+                    )
                     print("进程开始")
 
                     for i in range(10):
@@ -108,6 +133,13 @@ class ZFQZMain:
                                 args=(log_type, crawler, rule_dict, our_uid, env)
                             )
                             process.start()
+                            AliyunLogger.logging(
+                                code="1004",
+                                platform=log_type,
+                                mode=crawler,
+                                env=env,
+                                message="成功抓取完一轮",
+                            )
                         time.sleep(60)
 
                     # # 抓取符合规则的视频列表

+ 9 - 7
zhufuquanzi/zhufuquanzi_recommend/zhufuquanzi_recommend_new.py

@@ -16,10 +16,9 @@ from selenium.webdriver.common.by import By
 
 
 sys.path.append(os.getcwd())
-from common import AliyunLogger, PiaoQuanPipeline
+from common import AliyunLogger, PiaoQuanPipeline, get_redirect_url
 from common.common import Common
 from common.mq import MQ
-from common.public import download_rule, get_config_from_mysql
 from common.scheduling_db import MysqlHelper
 
 
@@ -32,12 +31,12 @@ class ZFQZRecommend:
     @classmethod
     def start_wechat(cls, log_type, crawler, env, rule_dict, our_uid):
         if env == "dev":
-            chromedriverExecutable = "/Users/tzld/Downloads/chromedriver_v111/chromedriver"
+            chromedriverExecutable = "/Users/piaoquan/Downloads/chromedriver"
         else:
-            chromedriverExecutable = "/Users/crawler/Downloads/chromedriver_v111/chromedriver"
+            chromedriverExecutable = "/Users/piaoquan/Downloads/chromedriver"
 
         Common.logger(log_type, crawler).info("启动微信")
-        Common.logging(log_type, crawler, env, '启动微信')
+        # Common.logging(log_type, crawler, env, '启动微信')
         caps = {
             "platformName": "Android",
             "devicesName": "Android",
@@ -61,13 +60,13 @@ class ZFQZRecommend:
         }
         try:
             driver = webdriver.Remote("http://localhost:4723/wd/hub", caps)
-        except:
+        except Exception as e:
             AliyunLogger.logging(
                 code="3002",
                 platform=ZFQZRecommend.platform,
                 mode=log_type,
                 env=env,
-                message="appium 启动异常"
+                message="appium 启动异常, 报错原因是{}".format(e)
             )
             return
         driver.implicitly_wait(30)
@@ -302,6 +301,7 @@ class ZFQZRecommend:
                         comment_cnt = int(comment_str)
                     out_video_id = md5(video_title.encode('utf8')).hexdigest()
                     out_user_id = md5(user_name.encode('utf8')).hexdigest()
+                    Common.logger(log_type, crawler).warning(f"视频标题:{video_title},点赞:{like_str},播放:{play_cnt},用户名称:{user_name},")
 
                     video_dict = {
                         "video_title": video_title,
@@ -347,6 +347,7 @@ class ZFQZRecommend:
                             message=f"点击标题,进入视频详情页\n"
                         )
                         video_url = cls.get_video_url(log_type, crawler, driver, video_title_element)
+                        video_url = get_redirect_url(video_url)
                         if video_url is None:
                             driver.press_keycode(AndroidKey.BACK)
                             time.sleep(5)
@@ -367,6 +368,7 @@ class ZFQZRecommend:
                         cls.swipe_up(driver)
                 except Exception as e:
                     Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
+                    driver.press_keycode(AndroidKey.BACK)
                     AliyunLogger.logging(
                         code="3001",
                         platform=ZFQZRecommend.platform,

+ 155 - 0
zhuwanwufusu/zhuwanwufusu_main/run_zwwfs_recommend.py

@@ -0,0 +1,155 @@
+import argparse
+import time
+import random
+from mq_http_sdk.mq_client import *
+from mq_http_sdk.mq_consumer import *
+from mq_http_sdk.mq_exception import MQExceptionBase
+
+sys.path.append(os.getcwd())
+from common.public import task_fun_mq, get_consumer, ack_message
+from common.scheduling_db import MysqlHelper
+from common import AliyunLogger
+from zhuwanwufusu.zhuwanwufusu_recommend import ZhuWanWuFuSuRecommend
+
+
+def main(log_type, crawler, topic_name, group_id, env):
+    consumer = get_consumer(topic_name, group_id)
+    # 长轮询表示如果Topic没有消息,则客户端请求会在服务端挂起3秒,3秒内如果有消息可以消费则立即返回响应。
+    # 长轮询时间3秒(最多可设置为30秒)。
+    wait_seconds = 30
+    # 一次最多消费3条(最多可设置为16条)。
+    batch = 1
+    AliyunLogger.logging(
+        code="1000",
+        platform=crawler,
+        mode=log_type,
+        env=env,
+        message=f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
+        f"WaitSeconds:{wait_seconds}\n"
+        f"TopicName:{topic_name}\n"
+        f"MQConsumer:{group_id}",
+    )
+    while True:
+        try:
+            # 长轮询消费消息。
+            recv_msgs = consumer.consume_message(batch, wait_seconds)
+            for msg in recv_msgs:
+                AliyunLogger.logging(
+                    code="1000",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message=f"Receive\n"
+                    f"MessageId:{msg.message_id}\n"
+                    f"MessageBodyMD5:{msg.message_body_md5}\n"
+                    f"MessageTag:{msg.message_tag}\n"
+                    f"ConsumedTimes:{msg.consumed_times}\n"
+                    f"PublishTime:{msg.publish_time}\n"
+                    f"Body:{msg.message_body}\n"
+                    f"NextConsumeTime:{msg.next_consume_time}\n"
+                    f"ReceiptHandle:{msg.receipt_handle}\n"
+                    f"Properties:{msg.properties}",
+                )
+                # ack_mq_message
+                ack_message(
+                    log_type=log_type,
+                    crawler=crawler,
+                    recv_msgs=recv_msgs,
+                    consumer=consumer,
+                )
+                # 解析 task_dict
+                task_dict = task_fun_mq(msg.message_body)["task_dict"]
+                AliyunLogger.logging(
+                    code="1000",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message="f调度任务:{task_dict}",
+                )
+                # 解析 rule_dict
+                rule_dict = task_fun_mq(msg.message_body)["rule_dict"]
+                AliyunLogger.logging(
+                    code="1000",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message=f"抓取规则:{rule_dict}\n",
+                )
+                # 解析 user_list
+                task_id = task_dict["id"]
+                select_user_sql = (
+                    f"""select * from crawler_user_v3 where task_id={task_id}"""
+                )
+                user_list = MysqlHelper.get_values(
+                    log_type, crawler, select_user_sql, env, action=""
+                )
+                AliyunLogger.logging(
+                    code="1003",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message="开始抓取"
+                )
+                AliyunLogger.logging(
+                    code="1000",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message="开始抓取祝万物复苏——推荐",
+                )
+                main_process = ZhuWanWuFuSuRecommend(
+                    platform=crawler,
+                    mode=log_type,
+                    rule_dict=rule_dict,
+                    user_list=user_list,
+                    env=env
+                )
+                main_process.schedule()
+                AliyunLogger.logging(
+                    code="1000",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message="完成抓取——祝万物复苏",
+                )
+                AliyunLogger.logging(
+                    code="1004", platform=crawler, mode=log_type, env=env,message="结束一轮抓取"
+                )
+
+        except MQExceptionBase as err:
+            # Topic中没有消息可消费。
+            if err.type == "MessageNotExist":
+                AliyunLogger.logging(
+                    code="2000",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message=f"No new message! RequestId:{err.req_id}\n",
+                )
+                continue
+            AliyunLogger.logging(
+                code="2000",
+                platform=crawler,
+                mode=log_type,
+                env=env,
+                message=f"Consume Message Fail! Exception:{err}\n",
+            )
+            time.sleep(2)
+            continue
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
+    parser.add_argument("--log_type", type=str)  ## 添加参数,注明参数类型
+    parser.add_argument("--crawler")  ## 添加参数
+    parser.add_argument("--topic_name")  ## 添加参数
+    parser.add_argument("--group_id")  ## 添加参数
+    parser.add_argument("--env")  ## 添加参数
+    args = parser.parse_args()  ### 参数赋值,也可以通过终端赋值
+    main(
+        log_type=args.log_type,
+        crawler=args.crawler,
+        topic_name=args.topic_name,
+        group_id=args.group_id,
+        env=args.env,
+    )

+ 1 - 0
zhuwanwufusu/zhuwanwufusu_recommend/__init__.py

@@ -0,0 +1 @@
+from .zwwfs_recommend import ZhuWanWuFuSuRecommend

+ 249 - 19
zhuwanwufusu/zhuwanwufusu_recommend/zwwfs_recommend.py

@@ -1,6 +1,4 @@
 import os
-import re
-import base64
 import json
 import random
 import sys
@@ -13,20 +11,20 @@ sys.path.append(os.getcwd())
 from common.video_item import VideoItem
 from common import PiaoQuanPipeline, AliyunLogger, tunnel_proxies
 from common.mq import MQ
-from common.scheduling_db import MysqlHelper
+from common.db import MysqlHelper
 from zhuwanwufusu.crypt import AESCipher as AES
 
 
 class ZhuWanWuFuSuRecommend(object):
-    def __init__(self, platform, mode, rule_dict, user_dict, env):
+    def __init__(self, platform, mode, rule_dict, user_list, env):
         self.platform = platform
         self.mode = mode
         self.rule_dict = rule_dict
-        self.user_dict = user_dict
+        self.user_list = user_list
         self.env = env
         self.download_cnt = 0
         self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
-        self.expire_flag = False
+        self.limit_flag = False
         self.cryptor = AES()
 
     def get_recommend_list(self):
@@ -88,11 +86,88 @@ class ZhuWanWuFuSuRecommend(object):
                     env=self.env,
                     message="抓取第{}页的时候失败, 报错原因是{}".format(page_index, e)
                 )
+            time.sleep(random.randint(5, 10))
+
+    def get_user_videos(self, user_id):
+        """
+        在抓取完推荐页之后,去抓每一个用户的主页视频
+        """
+        url = "https://api.lidongze.cn/jeecg-boot/ugc/getAuthVideoList"
+        headers = {
+            'Host': 'api.lidongze.cn',
+            'xweb_xhr': '1',
+            'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.4(0x13080410)XWEB/31009',
+            'token': '',
+            'content-type': 'application/json',
+            'accept': '*/*',
+            'referer': 'https://servicewechat.com/wx0afdc2669ed8df2f/3/page-frame.html',
+            'accept-language': 'en-US,en;q=0.9'
+        }
+        page_index = 1
+        total_page = 1
+        while page_index <= total_page:
+            query = {
+                "pageNo": page_index,
+                "pageSize": 10,
+                "authid": user_id
+            }
+            params = {
+                "v": self.cryptor.aes_encrypt(data=json.dumps(query))
+            }
+            response = requests.request("GET", url, headers=headers, params=params, proxies=tunnel_proxies())
+            result = json.loads(self.cryptor.aes_decrypt(response.text))
+            total_page = result['list']['pages']
+            page_index = result['list']['current'] + 1
+            for index, video_temp in enumerate(result['list']['records']):
+                video_id = video_temp['id']
+                detail_query = {
+                    "videoId": video_id
+                }
+                detail_params = {
+                    "v": self.cryptor.aes_encrypt(data=json.dumps(detail_query))
+                }
+                url = "https://api.lidongze.cn/jeecg-boot/ugc/getVideosDataEn"
+                headers = {
+                    'Host': 'api.lidongze.cn',
+                    'xweb_xhr': '1',
+                    'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.4(0x13080410)XWEB/31009',
+                    'token': '',
+                    'content-type': 'application/json',
+                    'accept': '*/*',
+                    'referer': 'https://servicewechat.com/wx0afdc2669ed8df2f/3/page-frame.html',
+                    'accept-language': 'en-US,en;q=0.9'
+                }
+                detail_response = requests.request("GET", url, headers=headers, params=detail_params,
+                                                   proxies=tunnel_proxies())
+                detail_video = json.loads(self.cryptor.aes_decrypt(detail_response.text))
+                if detail_video['success']:
+                    try:
+                        AliyunLogger.logging(
+                            code="1001",
+                            platform=self.platform,
+                            mode=self.mode,
+                            env=self.env,
+                            message="扫描到一条视频",
+                            data=detail_video['data']
+                        )
+                        self.process_video_obj(detail_video['data'])
+                    except Exception as e:
+                        AliyunLogger.logging(
+                            code="3000",
+                            platform=self.platform,
+                            mode=self.mode,
+                            env=self.env,
+                            message="抓取单条视频失败, 该视频位于第{}条报错原因是{}".format(index, e)
+                        )
 
     def process_video_obj(self, video_obj):
         trace_id = self.platform + str(uuid.uuid1())
-        play_cnt = int(video_obj['playnum'].replace("万+", "0000")) if "万+" in video_obj['playnum'] else int(
-            video_obj['playnum'])
+        if video_obj.get("playnum"):
+            play_cnt = int(video_obj['playnum'].replace("万+", "0000")) if "万+" in video_obj['playnum'] else int(
+                video_obj['playnum'])
+        else:
+            play_cnt = 0
+        our_user = random.choice(self.user_list)
         item = VideoItem()
         item.add_video_info("video_id", video_obj['id'])
         item.add_video_info("video_title", video_obj['vname'])
@@ -106,9 +181,10 @@ class ZhuWanWuFuSuRecommend(object):
         item.add_video_info("platform", self.platform)
         item.add_video_info("strategy", self.mode)
         item.add_video_info("session", "{}-{}".format(self.platform, int(time.time())))
-        item.add_video_info("user_id", self.user_dict['uid'])
-        item.add_video_info("user_name", self.user_dict['nick_name'])
-
+        item.add_video_info("user_id", our_user['uid'])
+        item.add_video_info("user_name", our_user['nick_name'])
+        # 把扫描到的账号存到 accounts 表中
+        self.manage_auth_id(out_user_id=video_obj['authid'], out_user_name=video_obj['authname'])
         mq_obj = item.produce_item()
         pipeline = PiaoQuanPipeline(
             platform=self.platform,
@@ -129,14 +205,168 @@ class ZhuWanWuFuSuRecommend(object):
                 message="成功发送至 ETL",
                 data=mq_obj
             )
+            if self.download_cnt >= int(self.rule_dict.get("videos_cnt", {}).get("min", 200)):
+                self.limit_flag = True
+
+    def manage_auth_id(self, out_user_id, out_user_name):
+        """
+        out_user_id: 外站视频的用户 id
+        out_user_name: 外站视频用户名字
+        逻辑: 对新扫描到的视频的用户 id 进行判断,若用户 id 不存在,则把视频 id 存到表中,
+              如果用户 id 存在,则判断用户是否修改名字,若名字修改则更新名字
+        """
+        select_user_sql = f"""select name, name_id from accounts where name_id = "{out_user_id}" and platform = "{self.platform}" and useful = 1 limit 1"""
+        out_user_info = MysqlHelper.get_values(
+            log_type=self.mode,
+            crawler=self.platform,
+            sql=select_user_sql,
+            env=self.env,
+            machine="",
+        )
+        if out_user_info:
+            name, name_id = out_user_info[0]
+            if name == out_user_name:
+                return
+            else:
+                update_sql = f"""update accounts set name = "{out_user_name}" where name_id = "{out_user_id}";"""
+                MysqlHelper.update_values(
+                    log_type=self.mode,
+                    crawler=self.platform,
+                    sql=update_sql,
+                    env=self.env,
+                    machine=""
+                )
+        else:
+            insert_sql = f"""INSERT INTO accounts (name, name_id, platform, useful) values ("{out_user_name}", "{out_user_id}", "{self.platform}", 1 )"""
+            MysqlHelper.update_values(
+                log_type=self.mode,
+                crawler=self.platform,
+                sql=insert_sql,
+                env=self.env,
+                machine="",
+            )
+
+    def get_user_list(self):
+        select_user_sql = f"""select name_id from accounts where platform = "{self.platform}" and useful = 1"""
+        out_user_info = MysqlHelper.get_values(
+            log_type=self.mode,
+            crawler=self.platform,
+            sql=select_user_sql,
+            env=self.env,
+            machine="",
+        )
+        if out_user_info:
+            result = []
+            for i in out_user_info:
+                result.append(i[0])
+            return result
+        else:
+            return []
+
+    def get_detail_video_list(self):
+        url = "https://api.lidongze.cn/jeecg-boot/ugc/getDetailVideoListsEn2"
+        headers = {
+            'Host': 'api.lidongze.cn',
+            'xweb_xhr': '1',
+            'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.5(0x13080510)XWEB/1100',
+            'token': '',
+            'referer': 'https://servicewechat.com/wx0afdc2669ed8df2f/3/page-frame.html',
+            'accept-language': 'en-US,en;q=0.9'
+        }
+        page_index = 1
+        total_page = 2
+        while page_index <= total_page:
+            try:
+                if self.limit_flag:
+                    AliyunLogger.logging(
+                        code="2000",
+                        platform=self.platform,
+                        mode=self.mode,
+                        env=self.env,
+                        message="本轮已经抓取足够数量的视频"
+                    )
+                    return
+                else:
+                    query = {
+                        "groupId": "1650323161797439489",
+                        "pageNo": page_index,
+                        "pageSize": 10,
+                        "appid": "wx0afdc2669ed8df2f",
+                        "type": 3,
+                        "hxid": "1556555457243828666"
+                    }
+                    params = {
+                        "v": self.cryptor.aes_encrypt(data=json.dumps(query))
+                    }
+                    response = requests.request("GET", url, headers=headers, params=params)
+                    result = json.loads(self.cryptor.aes_decrypt(response.text))
+                    total_page = result['list']['pages']
+                    page_index = result['list']['current'] + 1
+                    for index, video_obj in enumerate(result['list']['records'], 1):
+                        try:
+                            AliyunLogger.logging(
+                                code="1001",
+                                platform=self.platform,
+                                mode=self.mode,
+                                env=self.env,
+                                message="扫描到一条视频",
+                                data=video_obj
+                            )
+                            self.process_video_obj(video_obj)
+                        except Exception as e:
+                            AliyunLogger.logging(
+                                code="3000",
+                                platform=self.platform,
+                                mode=self.mode,
+                                env=self.env,
+                                message="抓取单条视频失败, 该视频位于第{}页第{}条报错原因是{}".format(page_index, index, e)
+                            )
+            except Exception as e:
+                AliyunLogger.logging(
+                    code="3000",
+                    platform=self.platform,
+                    mode=self.mode,
+                    env=self.env,
+                    message="抓取第{}页的时候失败, 报错原因是{}".format(page_index, e)
+                )
+            time.sleep(random.randint(5, 10))
+
+    def schedule(self):
+        """
+        先抓取推荐列表的视频, 等待 2 分钟后抓取 detail 页面,等待 5 分钟后,抓取账号视频
+        """
+        self.get_recommend_list()
+        if self.limit_flag:
+            return
+        time.sleep(2 * 60)
+        self.get_detail_video_list()
+        if self.limit_flag:
+            return
+        time.sleep(5 * 60)
+        self.mode = "author"
+        user_list = self.get_user_list()
+        if user_list:
+            for index, user_id in enumerate(user_list):
+                try:
+                    if self.limit_flag:
+                        AliyunLogger.logging(
+                            code="2000",
+                            platform=self.platform,
+                            mode=self.mode,
+                            env=self.env,
+                            message="本轮已经抓取足够数量的视频"
+                        )
+                        return
+                    self.get_user_videos(user_id=user_id)
+                except Exception as e:
+                    AliyunLogger.logging(
+                        code="3000",
+                        platform=self.platform,
+                        mode=self.mode,
+                        env=self.env,
+                        message="抓取账号视频出现异常,账号 id 是{}, 报错原因是{}".format(user_id, e)
+                    )
 
 
 if __name__ == '__main__':
-    Z = ZhuWanWuFuSuRecommend(
-        platform="zhuwanwufusu",
-        mode="recommend",
-        rule_dict={},
-        user_dict={"uid": 123456, "nick_name": "luojunhuishuaige"},
-        env="prod"
-    )
-    Z.get_recommend_list()
+    pass

+ 4 - 7
zhuwanwufusu/zhuwanwufusu_recommend/zwwfs_recommend_test.py

@@ -1,8 +1,5 @@
 import os
-import re
-import base64
 import json
-import random
 import sys
 import time
 import uuid
@@ -11,9 +8,9 @@ import requests
 
 sys.path.append(os.getcwd())
 from common.video_item import VideoItem
-from common import PiaoQuanPipeline, AliyunLogger, tunnel_proxies
+from common import tunnel_proxies
+from common.pipeline import PiaoQuanPipelineTest
 from common.mq import MQ
-from common.scheduling_db import MysqlHelper
 from zhuwanwufusu.crypt import AESCipher as AES
 
 
@@ -84,7 +81,7 @@ class ZhuWanWuFuSuRecommend(object):
         item.add_video_info("user_name", self.user_dict['nick_name'])
 
         mq_obj = item.produce_item()
-        pipeline = PiaoQuanPipeline(
+        pipeline = PiaoQuanPipelineTest(
             platform=self.platform,
             mode=self.mode,
             rule_dict=self.rule_dict,
@@ -104,6 +101,6 @@ if __name__ == '__main__':
         mode="recommend",
         rule_dict={},
         user_dict={"uid": 123456, "nick_name": "luojunhuishuaige"},
-        env="dev"
+        env="prod"
     )
     Z.get_recommend_list()

Some files were not shown because too many files changed in this diff