Prechádzať zdrojové kódy

Merge remote-tracking branch 'origin/master'

piaoquan 1 rok pred
rodič
commit
e1d348c4c3

+ 1 - 0
analysis/__init__.py

@@ -0,0 +1 @@
+from .analysis import Drawer

+ 42 - 3
analysis/analysis.py

@@ -1,11 +1,13 @@
-import pandas as pd
 import json
 import pymysql
+import pyecharts.options as opts
+from pyecharts.charts import Line
 
 
 class Analysis(object):
     def __init__(self):
-        self.platform_list = ["xiaoniangao", "gongzhonghao", "shipinhao", "douyin", "kuaishou", "fuqiwang", "haitunzhufu"]
+        self.platform_list = ["xiaoniangao", "gongzhonghao", "shipinhao", "douyin", "kuaishou", "fuqiwang",
+                              "haitunzhufu", "haokanshipin", "benshanzhufu", "zhongmiaoyinxin"]
         self.date_last = "2023-11-01"
         self.out_put = {}
 
@@ -36,7 +38,44 @@ class Analysis(object):
         connection.close()
 
 
+class Drawer(object):
+    def __init__(self, json_obj):
+        self.ori_data = json_obj
+        self.x_list = ["2023-11-{:02}".format(i) for i in range(1, 24)]
+
+    def draw_line(self):
+        line = Line()
+        line.add_xaxis(xaxis_data=self.x_list)
+        for key in self.ori_data:
+            # print(key, self.ori_data[key])
+            each_obj = self.ori_data[key]
+            line.add_yaxis(
+                series_name=key,
+                stack="Total",
+                y_axis=[each_obj.get(i, 0) for i in self.x_list],
+                label_opts=opts.LabelOpts(is_show=False),
+                is_smooth=True
+            )
+        line.set_global_opts(
+            title_opts=opts.TitleOpts(title="入库量折线图", item_gap=10, padding=10, pos_top="10"),
+            legend_opts=opts.LegendOpts(pos_right=True),
+            tooltip_opts=opts.TooltipOpts(trigger="axis", padding=100),
+            yaxis_opts=opts.AxisOpts(
+                type_="value",
+                axistick_opts=opts.AxisTickOpts(is_show=True),
+                splitline_opts=opts.SplitLineOpts(is_show=True),
+                offset=10
+            ),
+            xaxis_opts=opts.AxisOpts(type_="category", boundary_gap=True, offset=10),
+
+        )
+        line.render("test.html")
+
+
 if __name__ == '__main__':
     A = Analysis()
     A.analysis_videos()
-    print(json.dumps(A.out_put, ensure_ascii=False, indent=4))
+    result_data = A.out_put
+    print(json.dumps(result_data['gongzhonghao'], ensure_ascii=False, indent=4))
+    D = Drawer(result_data)
+    D.draw_line()

+ 2 - 1
common/__init__.py

@@ -1,3 +1,4 @@
 from .aliyun_log import AliyunLogger
 from .redirect_url import get_redirect_url
-from .pipeline import PiaoQuanPipeline
+from .pipeline import PiaoQuanPipeline
+from .proxy import tunnel_proxies

+ 8 - 9
common/pipeline.py

@@ -18,13 +18,16 @@ class PiaoQuanPipeline:
         # 判断发布时间
         publish_time_stamp = self.item["publish_time_stamp"]
         update_time_stamp = self.item["update_time_stamp"]
+        max_d = self.rule_dict.get("period", {}).get("max", 1000)
+        min_d = self.rule_dict.get("period", {}).get("min", 1000)
+        days = max_d if max_d > min_d else min_d
         if self.platform == "gongzhonghao":
             if (
                 int(time.time()) - publish_time_stamp
-                > 3600 * 24 * int(self.rule_dict.get("period", {}).get("max", 1000))
+                > 3600 * 24 * days
             ) and (
                 int(time.time()) - update_time_stamp
-                > 3600 * 24 * int(self.rule_dict.get("period", {}).get("max", 1000))
+                > 3600 * 24 * days
             ):
                 AliyunLogger.logging(
                     code="2004",
@@ -33,15 +36,13 @@ class PiaoQuanPipeline:
                     mode=self.mode,
                     env=self.env,
                     data=self.item,
-                    message="发布时间超过{}天".format(
-                        int(self.rule_dict.get("period", {}).get("max", 1000))
-                    ),
+                    message="发布时间超过{}天".format(days),
                 )
                 return False
         else:
             if (
                 int(time.time()) - publish_time_stamp
-                > 3600 * 24 * int(self.rule_dict.get("period", {}).get("max", 1000))
+                > 3600 * 24 * days
             ):
                 AliyunLogger.logging(
                     code="2004",
@@ -50,9 +51,7 @@ class PiaoQuanPipeline:
                     mode=self.mode,
                     env=self.env,
                     data=self.item,
-                    message="发布时间超过{}天".format(
-                        int(self.rule_dict.get("period", {}).get("max", 1000))
-                    ),
+                    message="发布时间超过{}天".format(days),
                 )
                 return False
         return True

+ 11 - 0
common/proxy.py

@@ -0,0 +1,11 @@
+def tunnel_proxies():
+    # 隧道域名:端口号
+    tunnel = "q796.kdltps.com:15818"
+    # 用户名密码方式
+    username = "t17772369458618"
+    password = "5zqcjkmy"
+    proxies = {
+        "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel},
+        "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel}
+    }
+    return proxies

+ 24 - 11
haokanshipin/haokanshipin_author/hksp_author.py

@@ -212,17 +212,30 @@ class HaoKanVideoAccount(object):
             self.expire_flag = True
             return
         if pipeline.process_item():
-            self.mq.send_msg(mq_obj)
-            # print(mq_obj)
-            AliyunLogger.logging(
-                code="1002",
-                platform=self.platform,
-                mode=self.mode,
-                env=self.env,
-                message="成功发送至 ETL",
-                data=mq_obj,
-                trace_id=trace_id,
-            )
+            # 再增加一条特殊规则
+            if int(mq_obj['play_cnt']) > 300 and float(mq_obj['like_cnt']) / float(mq_obj['play_cnt']) >= 0.02:
+                self.mq.send_msg(mq_obj)
+                # print(mq_obj)
+                AliyunLogger.logging(
+                    code="1002",
+                    platform=self.platform,
+                    mode=self.mode,
+                    env=self.env,
+                    message="成功发送至 ETL",
+                    data=mq_obj,
+                    trace_id=trace_id,
+                )
+            else:
+                AliyunLogger.logging(
+                    code="2008",
+                    platform=self.platform,
+                    mode=self.mode,
+                    env=self.env,
+                    message="不满足特殊规则",
+                    data=mq_obj
+                )
+
+
 
     def schedule(self):
         """

+ 0 - 0
ip_change/__init__.py


+ 1044 - 0
ip_change/ip_change.py

@@ -0,0 +1,1044 @@
+import ctypes
+from copy import deepcopy
+from datetime import datetime
+from random import choice, randint
+
+import pytz
+import requests
+from apscheduler.schedulers.blocking import BlockingScheduler
+
+URL = 'http://192.168.88.1/jsproxy'
+HEADERS = {
+    'Accept-Language': '',
+    'Content-Type': 'msg',
+    'Cookie': 'username=root',
+}
+
+
+def int_overflow(val) -> int:
+    maxint = 2147483647
+    if not -maxint - 1 <= val <= maxint:
+        val = (val + (maxint + 1)) % (2 * (maxint + 1)) - maxint - 1
+    return val & 0xFFFFFFFF
+
+
+def unsigned_right_shift(n, i) -> int:
+    """实现无符号右移"""
+    if n < 0:
+        n = ctypes.c_uint32(n).value
+    if i < 0:
+        return -int_overflow(n << abs(i))
+    return int_overflow(n >> i)
+
+
+class Buffer(object):
+
+    def __init__(self) -> None:
+        self.MASK_FTYPE = 0xf8000000
+        self.FT_BOOL = int_overflow(0 << 27)
+        self.FT_U32 = int_overflow(1 << 27)
+        self.FT_U64 = int_overflow(2 << 27)
+        self.FT_ADDR6 = int_overflow(3 << 27)
+        self.FT_STRING = int_overflow(4 << 27)
+        self.FT_MESSAGE = int_overflow(5 << 27)
+        self.FT_RAW = int_overflow(6 << 27)
+        self.FT_BOOL_ARRAY = int_overflow(16 << 27)
+        self.FT_U32_ARRAY = int_overflow(17 << 27)
+        self.FT_U64_ARRAY = int_overflow(18 << 27)
+        self.FT_ADDR6_ARRAY = int_overflow(19 << 27)
+        self.FT_STRING_ARRAY = int_overflow(20 << 27)
+        self.FT_MESSAGE_ARRAY = int_overflow(21 << 27)
+        self.FT_RAW_ARRAY = int_overflow(22 << 27)
+        self.FS_SHORT = int_overflow(1 << 24)
+
+        self.arr = [0] * 64 * 1024
+        self.pos = 0
+
+    def msg2buffer(self, msg):
+        self.arr[self.pos] = 0x4d
+        self.pos += 1
+        self.arr[self.pos] = 0x32
+        self.pos += 1
+        for r in msg:
+            pfx = r[0]
+            if pfx == '_':
+                continue
+            val = msg[r]
+            match pfx:
+                case 'b':
+                    self.write_id(self.FT_BOOL | (
+                        self.FS_SHORT if val else 0), r)
+                case 'u':
+                    val = val if val is not None else -1
+                    if 0 <= val < 256:
+                        self.write_id(self.FT_U32 | self.FS_SHORT, r)
+                        self.arr[self.pos] = val
+                        self.pos += 1
+                    else:
+                        self.write_id(self.FT_U32, r)
+                        self.write_32(val)
+                case 'q':
+                    self.write_id(self.FT_U64, r)
+                    self.write_64(val)
+                case 'a':
+                    self.write_id(self.FT_ADDR6, r)
+                    for i in range(16):
+                        self.arr[self.pos] = val[i]
+                        self.pos += 1
+                case 's':
+                    if len(val) < 256:
+                        self.write_id(self.FT_STRING | self.FS_SHORT, r)
+                        self.arr[self.pos] = len(val)
+                        self.pos += 1
+                    else:
+                        self.write_id(self.FT_STRING, r)
+                        self.write_16(len(val))
+                    for i in range(len(val)):
+                        self.arr[self.pos] = ord(val[i])
+                        self.pos += 1
+                case 'r':
+                    if len(val) < 256:
+                        self.write_id(self.FT_RAW | self.FS_SHORT, r)
+                        self.arr[self.pos] = len(val)
+                        self.pos += 1
+                    else:
+                        self.write_id(self.FT_RAW, r)
+                        self.write_16(len(val))
+                        for i in range(len(val)):
+                            self.arr[self.pos] = val[i]
+                            self.pos += 1
+                case 'm':
+                    x = self.msg2buffer(val)
+                    if len(x) < 256:
+                        self.write_id(self.FT_MESSAGE | self.FS_SHORT, r)
+                        self.arr[self.pos] = len(x)
+                        self.pos += 1
+                    else:
+                        self.write_id(self.FT_MESSAGE, r)
+                        self.write_16(len(x))
+                    for item in x[::-1]:
+                        self.arr[self.pos] = item
+                    self.pos += len(x)
+                case 'B':
+                    self.write_id(self.FT_BOOL_ARRAY, r)
+                    self.write_16(len(val))
+                    for i in range(len(val)):
+                        self.arr[self.pos] = val[i]
+                        self.pos += 1
+                case 'U':
+                    self.write_id(self.FT_U32_ARRAY, r)
+                    self.write_16(len(val))
+                    for i in range(len(val)):
+                        self.write_32(val[i])
+                case 'Q':
+                    self.write_id(self.FT_U64_ARRAY, r)
+                    self.write_16(len(val))
+                    for i in range(len(val)):
+                        self.write_64(val[i])
+                case 'A':
+                    self.write_id(self.FT_ADDR6_ARRAY, r)
+                    self.write_16(len(val))
+                    for i in range(len(val)):
+                        for k in range(16):
+                            self.arr[self.pos] = val[i][k]
+                            self.pos += 1
+                case 'S':
+                    self.write_id(self.FT_STRING_ARRAY, r)
+                    self.write_16(len(val))
+                    for i in range(len(val)):
+                        self.write_16(len(val[i]))
+                        for k in range(len(val[i])):
+                            self.arr[self.pos] = ord(val[i][k])
+                            self.pos += 1
+                case 'R':
+                    self.write_id(self.FT_RAW_ARRAY, r)
+                    self.write_16(len(val))
+                    for i in range(len(val)):
+                        self.write_16(len(val[i]))
+                        for k in range(len(val[i])):
+                            self.arr[self.pos] = val[i][k]
+                            self.pos += 1
+                case 'M':
+                    self.write_id(self.FT_MESSAGE_ARRAY, r)
+                    self.write_16(len(val))
+                    for i in range(len(val)):
+                        x = self.msg2buffer(val[i])
+                        self.write_16(len(x))
+                        for item in x[::-1]:
+                            self.arr[self.pos] = item
+                        self.pos += len(x)
+                case _:
+                    return None
+        return self.arr[:self.pos]
+
+    def buffer2msg(self, arr, offset: int = 0):
+        self.arr, self.pos, ret = arr, 2, dict()
+        if self.arr[0] != 0x4d or self.arr[1] != 0x32:
+            return ret
+        while self.pos < len(self.arr):
+            _id = self.read_32()
+            match _id & self.MASK_FTYPE:
+                case self.FT_BOOL:
+                    ret['b' + self.idnum2hex(_id)] = 1 if (_id & self.FS_SHORT) else 0
+                case self.FT_U32:
+                    if _id & self.FS_SHORT:
+                        ret['u' + self.idnum2hex(_id)] = self.arr[self.pos]
+                        self.pos += 1
+                    else:
+                        ret['u' + self.idnum2hex(_id)
+                            ] = self.int2num(self.read_32())
+                case self.FT_U64:
+                    ret['q' + self.idnum2hex(_id)] = self.read_64()
+                case self.FT_ADDR6:
+                    a = []
+                    for i in range(16):
+                        a[i] = self.arr[self.pos]
+                        self.pos += 1
+                    ret['a' + self.idnum2hex(_id)] = a
+                case self.FT_STRING:
+                    length = self.arr[self.pos]
+                    self.pos += 1
+                    if not (_id & self.FS_SHORT):
+                        length |= self.arr[self.pos] << 8
+                        self.pos += 1
+                    s = ''
+                    for i in range(length):
+                        s = s + chr(self.arr[self.pos])
+                        self.pos += 1
+                    ret['s' + self.idnum2hex(_id)] = s
+                case self.FT_RAW:
+                    length = self.arr[self.pos]
+                    self.pos += 1
+                    if not (_id & self.FS_SHORT):
+                        length |= self.arr[self.pos] << 8
+                        self.pos += 1
+                    a = [0] * length
+                    for i in range(length):
+                        a[i] = self.arr[self.pos]
+                        self.pos += 1
+                    ret['r' + self.idnum2hex(_id)] = a
+                case self.FT_MESSAGE:
+                    length = self.arr[self.pos]
+                    self.pos += 1
+                    if not (_id & self.FS_SHORT):
+                        length |= self.arr[self.pos] << 8
+                        self.pos += 1
+                    ret['m' + self.idnum2hex(_id)] = self.buffer2msg(
+                        self.arr[offset + self.pos:offset + self.pos + length])
+                    self.pos += length
+                    offset += self.pos
+                case self.FT_BOOL_ARRAY:
+                    length = self.read_16()
+                    a = [0] * length
+                    for i in range(length):
+                        a[i] = not (not self.arr[self.pos])
+                        self.pos += 1
+                    ret['B' + self.idnum2hex(_id)] = a
+                case self.FT_U32_ARRAY:
+                    length = self.read_16()
+                    a = [0] * length
+                    for i in range(length):
+                        a[i] = self.int2num(self.read_32())
+                    ret['U' + self.idnum2hex(_id)] = a
+                case self.FT_U64_ARRAY:
+                    length = self.read_16()
+                    a = [0] * length
+                    for i in range(length):
+                        a[i] = self.read_64()
+                    ret['Q' + self.idnum2hex(_id)] = a
+                case self.FT_ADDR6_ARRAY:
+                    length = self.read_16()
+                    a = [0] * length
+                    for i in range(length):
+                        x = [0] * 16
+                        for k in range(16):
+                            x[k] = self.arr[self.pos]
+                            self.pos += 1
+                        a[i] = x
+                    ret['A' + self.idnum2hex(_id)] = a
+                case self.FT_STRING_ARRAY:
+                    length = self.read_16()
+                    a = [0] * length
+                    for i in range(length):
+                        x = ''
+                        x_len = self.read_16()
+                        for k in range(x_len):
+                            x = x + chr(self.arr[self.pos])
+                            self.pos += 1
+                        a[i] = x
+                    ret['S' + self.idnum2hex(_id)] = a
+                case self.FT_RAW_ARRAY:
+                    length = self.read_16()
+                    a = [0] * length
+                    for i in range(length):
+                        x_len = self.read_16()
+                        x = [0] * x_len
+                        for k in range(x_len):
+                            x[k] = self.arr[self.pos]
+                            self.pos += 1
+                        a[i] = x
+                    ret['R' + self.idnum2hex(_id)] = a
+                case self.FT_MESSAGE_ARRAY:
+                    length = self.read_16()
+                    a = [0] * length
+                    for i in range(length):
+                        x_len = self.read_16()
+                        a[i] = self.buffer2msg(self.arr[offset + self.pos:offset + self.pos + x_len], offset + self.pos)
+                        self.pos += x_len
+                        offset += self.pos
+                    ret['M' + self.idnum2hex(_id)] = a
+        return ret
+
+    def buffer2msgs(self, arr, offset: int = 0):
+        ret, pos = [], 0
+        while pos + 2 <= len(arr):
+            length = (arr[pos] << 8) | arr[pos + 1]
+            arr[pos] = 0x4d
+            arr[pos + 1] = 0x32
+            msg = self.buffer2msg(arr[:offset + pos + length], offset + pos)
+            pos += length
+            ret.append(msg)
+        return ret
+
+    def write_id(self, id_type, id_str):
+        x = int(id_str[1:], 16)
+        self.arr[self.pos] = x & 0xff
+        self.pos += 1
+        self.arr[self.pos] = (x >> 8) & 0xff
+        self.pos += 1
+        self.arr[self.pos] = (x >> 16) & 0xff
+        self.pos += 1
+        self.arr[self.pos] = (id_type >> 24) & 0xff
+        self.pos += 1
+
+    def write_16(self, val):
+        self.arr[self.pos] = val & 0xff
+        self.pos += 1
+        self.arr[self.pos] = (val >> 8) & 0xff
+        self.pos += 1
+
+    def write_32(self, val):
+        for i in range(4):
+            self.arr[self.pos] = (val >> (i * 8)) & 0xff
+            self.pos += 1
+
+    def write_64(self, val):
+        for i in range(4):
+            self.arr[self.pos] = (val >> (i * 8)) & 0xff
+            self.pos += 1
+        temp = int(val / 4294967296)
+        for i in range(4):
+            self.arr[self.pos] = (temp >> (i * 8)) & 0xff
+            self.pos += 1
+
+    def num2hex(self, ccc):
+        if ccc < 10:
+            return chr(ccc + 48)
+        return chr(ccc + 87)
+
+    def idnum2hex(self, _id):
+        ret = ''
+        for i in range(6):
+            x = (_id >> (20 - (i * 4))) & 0xf
+            if len(ret) == 0 and not x:
+                continue
+            ret = ret + self.num2hex(x)
+        if len(ret) == 0:
+            ret = '0'
+        return ret
+
+    def read_16(self):
+        ret = 0
+        for i in range(2):
+            ret |= int_overflow(self.arr[self.pos] << (i * 8))
+            self.pos += 1
+        return ret
+
+    def read_32(self):
+        ret = 0
+        for i in range(4):
+            ret |= int_overflow(self.arr[self.pos] << (i * 8))
+            self.pos += 1
+        return ret
+
+    def read_64(self):
+        ret = 0
+        for i in range(4):
+            ret |= int_overflow(self.arr[self.pos] << (i * 8))
+            self.pos += 1
+        temp = 0
+        for i in range(4):
+            temp |= int_overflow(self.arr[self.pos] << (i * 8))
+            self.pos += 1
+        return self.int2num(ret) + temp * 4294967296
+
+    def int2num(self, v):
+        return 0x100000000 + v if v < 0 else v
+
+
+class Curve(object):
+
+    @classmethod
+    def curve_a2u(cls, a):
+        r = [0] * 32
+        for i in range(32):
+            r[i >> 1] |= a[31 - i] << (i & 1) * 8
+        return r
+
+    @classmethod
+    def curve_u2a(cls, a):
+        r = [0] * 32
+        for i in range(32):
+            r[31 - i] = (a[i >> 1] >> ((i & 1) * 8)) & 0xff
+        return r
+
+    @classmethod
+    def byte2str(cls, b):
+        b &= 0xff
+        return chr(b if b else 256)
+
+    @classmethod
+    def word2str(cls, w):
+        return cls.byte2str(w >> 24) + cls.byte2str(w >> 16) + cls.byte2str(w >> 8) + cls.byte2str(w)
+
+    @classmethod
+    def str2byte(cls, s, off):
+        return s[off] & 0xff
+
+    @classmethod
+    def str2word(cls, s, off):
+        return int_overflow(cls.str2byte(s, off) << 24) | int_overflow(cls.str2byte(s, off + 1) << 16) | int_overflow(
+            cls.str2byte(s, off + 2) << 8) | int_overflow(cls.str2byte(s, off + 3))
+
+    @classmethod
+    def str2a(cls, s):
+        res = []
+        for i in range(len(s)):
+            res.append(s[i] & 0xff)
+        return res
+
+    @classmethod
+    def a2str(cls, a):
+        x = []
+        for i in range(len(a)):
+            x.append(cls.byte2str(a[i]))
+        return ''.join(x)
+
+    @classmethod
+    def c255lgetbit(cls, n, c):
+        return (n[c >> 4] >> (c & 0xf)) & 1
+
+    @classmethod
+    def c255lzero(cls):
+        return [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+
+    @classmethod
+    def c255lone(cls):
+        return [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+
+    @classmethod
+    def c255lbase(cls):
+        return [9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+
+    @classmethod
+    def c255lsqr8h(cls, a7, a6, a5, a4, a3, a2, a1, a0):
+        r = [0] * 16
+        v = a0 * a0
+        r[0] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + 2 * a0 * a1
+        r[1] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + 2 * a0 * a2 + a1 * a1
+        r[2] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + 2 * a0 * a3 + 2 * a1 * a2
+        r[3] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + 2 * a0 * a4 + 2 * a1 * a3 + a2 * a2
+        r[4] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + 2 * a0 * a5 + 2 * a1 * a4 + 2 * a2 * a3
+        r[5] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + 2 * a0 * a6 + \
+            2 * a1 * a5 + 2 * a2 * a4 + a3 * a3
+        r[6] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + 2 * a0 * a7 + \
+            2 * a1 * a6 + 2 * a2 * a5 + 2 * a3 * a4
+        r[7] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + 2 * a1 * a7 + \
+            2 * a2 * a6 + 2 * a3 * a5 + a4 * a4
+        r[8] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + 2 * a2 * a7 + 2 * a3 * a6 + 2 * a4 * a5
+        r[9] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + 2 * a3 * a7 + 2 * a4 * a6 + a5 * a5
+        r[10] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + 2 * a4 * a7 + 2 * a5 * a6
+        r[11] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + 2 * a5 * a7 + a6 * a6
+        r[12] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + 2 * a6 * a7
+        r[13] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + a7 * a7
+        r[14] = v & 0xffff
+        r[15] = 0 | int(v / 0x10000)
+        return r
+
+    @classmethod
+    def c255lsqrmodp(cls, a):
+        x = cls.c255lsqr8h(a[15], a[14], a[13], a[12],
+                           a[11], a[10], a[9], a[8])
+        z = cls.c255lsqr8h(a[7], a[6], a[5], a[4], a[3], a[2], a[1], a[0])
+        y = cls.c255lsqr8h(a[15] + a[7], a[14] + a[6], a[13] + a[5], a[12] +
+                           a[4], a[11] + a[3], a[10] + a[2], a[9] + a[1], a[8] + a[0])
+        r = [0] * 16
+        v = 0x800000 + z[0] + (y[8] - x[8] - z[8] + x[0] - 0x80) * 38
+        r[0] = v & 0xffff
+        for i in range(1, 8):
+            v = 0x7fff80 + \
+                unsigned_right_shift(
+                    v, 16) + z[i] + (y[i + 8] - x[i + 8] - z[i + 8] + x[i]) * 38
+            r[i] = v & 0xffff
+        for i in range(8, 15):
+            v = 0x7fff80 + \
+                unsigned_right_shift(
+                    v, 16) + z[i] + y[i - 8] - x[i - 8] - z[i - 8] + x[i] * 38
+            r[i] = v & 0xffff
+        r[15] = 0x7fff80 + \
+                unsigned_right_shift(v, 16) + \
+                z[15] + y[7] - x[7] - z[7] + x[15] * 38
+        cls.c255lreduce(r)
+        return r
+
+    @classmethod
+    def c255lmul8h(cls, a7, a6, a5, a4, a3, a2, a1, a0, b7, b6, b5, b4, b3, b2, b1, b0):
+        r = [0] * 16
+        v = a0 * b0
+        r[0] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + a0 * b1 + a1 * b0
+        r[1] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + a0 * b2 + a1 * b1 + a2 * b0
+        r[2] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + a0 * b3 + a1 * b2 + a2 * b1 + a3 * b0
+        r[3] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + a0 * b4 + \
+            a1 * b3 + a2 * b2 + a3 * b1 + a4 * b0
+        r[4] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + a0 * b5 + a1 * \
+            b4 + a2 * b3 + a3 * b2 + a4 * b1 + a5 * b0
+        r[5] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + a0 * b6 + a1 * b5 + \
+            a2 * b4 + a3 * b3 + a4 * b2 + a5 * b1 + a6 * b0
+        r[6] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + a0 * b7 + a1 * b6 + a2 * \
+            b5 + a3 * b4 + a4 * b3 + a5 * b2 + a6 * b1 + a7 * b0
+        r[7] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + a1 * b7 + a2 * b6 + \
+            a3 * b5 + a4 * b4 + a5 * b3 + a6 * b2 + a7 * b1
+        r[8] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + a2 * b7 + a3 * \
+            b6 + a4 * b5 + a5 * b4 + a6 * b3 + a7 * b2
+        r[9] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + a3 * b7 + \
+            a4 * b6 + a5 * b5 + a6 * b4 + a7 * b3
+        r[10] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + a4 * b7 + a5 * b6 + a6 * b5 + a7 * b4
+        r[11] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + a5 * b7 + a6 * b6 + a7 * b5
+        r[12] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + a6 * b7 + a7 * b6
+        r[13] = v & 0xffff
+        v = (0 | int(v / 0x10000)) + a7 * b7
+        r[14] = v & 0xffff
+        r[15] = 0 | int(v / 0x10000)
+        return r
+
+    @classmethod
+    def c255lmulmodp(cls, a, b):
+        x = cls.c255lmul8h(a[15], a[14], a[13], a[12], a[11], a[10], a[9],
+                           a[8], b[15], b[14], b[13], b[12], b[11], b[10], b[9], b[8])
+        z = cls.c255lmul8h(a[7], a[6], a[5], a[4], a[3], a[2], a[1],
+                           a[0], b[7], b[6], b[5], b[4], b[3], b[2], b[1], b[0])
+        y = cls.c255lmul8h(a[15] + a[7], a[14] + a[6], a[13] + a[5], a[12] + a[4], a[11] + a[3], a[10] + a[2],
+                           a[9] + a[1], a[8] +
+                           a[0], b[15] + b[7], b[14] + b[6], b[13] + b[5], b[12] + b[4], b[11] + b[3], b[10] + b[2],
+                           b[9] + b[1], b[8] + b[0])
+        r = [0] * 16
+        v = 0x800000 + z[0] + (y[8] - x[8] - z[8] + x[0] - 0x80) * 38
+        r[0] = v & 0xffff
+        for i in range(1, 8):
+            v = 0x7fff80 + \
+                unsigned_right_shift(
+                    v, 16) + z[i] + (y[i + 8] - x[i + 8] - z[i + 8] + x[i]) * 38
+            r[i] = v & 0xffff
+        for i in range(8, 15):
+            v = 0x7fff80 + \
+                unsigned_right_shift(
+                    v, 16) + z[i] + y[i - 8] - x[i - 8] - z[i - 8] + x[i] * 38
+            r[i] = v & 0xffff
+        r[15] = 0x7fff80 + \
+                unsigned_right_shift(v, 16) + \
+                z[15] + y[7] - x[7] - z[7] + x[15] * 38
+        cls.c255lreduce(r)
+        return r
+
+    @classmethod
+    def c255lreduce(cls, a):
+        v = a[15]
+        a[15] = v & 0x7fff
+        v = (0 | int(v / 0x8000)) * 19
+        for i in range(15):
+            v += a[i]
+            a[i] = v & 0xffff
+            v = unsigned_right_shift(v, 16)
+        a[15] += v
+
+    @classmethod
+    def c255laddmodp(cls, a, b):
+        r = [0] * 16
+        v = ((0 | unsigned_right_shift(
+            a[15], 15)) + (0 | unsigned_right_shift(b[15], 15))) * 19 + a[0] + b[0]
+        r[0] = v & 0xffff
+        for i in range(1, 15):
+            v = unsigned_right_shift(v, 16) + a[i] + b[i]
+            r[i] = v & 0xffff
+        r[15] = unsigned_right_shift(
+            v, 16) + (a[15] & 0x7fff) + (b[15] & 0x7fff)
+        return r
+
+    @classmethod
+    def c255lsubmodp(cls, a, b):
+        r = [0] * 16
+        v = 0x80000 + ((0 | unsigned_right_shift(a[15], 15)) - (
+                0 | unsigned_right_shift(b[15], 15)) - 1) * 19 + a[0] - b[0]
+        r[0] = v & 0xffff
+        for i in range(1, 15):
+            v = unsigned_right_shift(v, 16) + 0x7fff8 + a[i] - b[i]
+            r[i] = v & 0xffff
+        r[15] = unsigned_right_shift(
+            v, 16) + 0x7ff8 + (a[15] & 0x7fff) - (b[15] & 0x7fff)
+        return r
+
+    @classmethod
+    def c255linvmodp(cls, a):
+        c, i = a, 249
+        while i > 0:
+            i -= 1
+            a = cls.c255lsqrmodp(a)
+            a = cls.c255lmulmodp(a, c)
+        a = cls.c255lsqrmodp(a)
+        a = cls.c255lsqrmodp(a)
+        a = cls.c255lmulmodp(a, c)
+        a = cls.c255lsqrmodp(a)
+        a = cls.c255lsqrmodp(a)
+        a = cls.c255lmulmodp(a, c)
+        a = cls.c255lsqrmodp(a)
+        a = cls.c255lmulmodp(a, c)
+        return a
+
+    @classmethod
+    def c255lmulasmall(cls, a):
+        m, r = 121665, [0] * 16
+        v = a[0] * m
+        r[0] = v & 0xffff
+        for i in range(1, 15):
+            v = (0 | int(v / 0x10000)) + a[i] * m
+            r[i] = v & 0xffff
+        r[15] = (0 | int(v / 0x10000)) + a[15] * m
+        cls.c255lreduce(r)
+        return r
+
+    @classmethod
+    def c255ldbl(cls, x, z):
+        m = cls.c255lsqrmodp(cls.c255laddmodp(x, z))
+        n = cls.c255lsqrmodp(cls.c255lsubmodp(x, z))
+        o = cls.c255lsubmodp(m, n)
+        x_2 = cls.c255lmulmodp(n, m)
+        z_2 = cls.c255lmulmodp(cls.c255laddmodp(cls.c255lmulasmall(o), m), o)
+        return [x_2, z_2]
+
+    @classmethod
+    def c255lsum(cls, x, z, x_p, z_p, x_1):
+        p = cls.c255lmulmodp(cls.c255lsubmodp(
+            x, z), cls.c255laddmodp(x_p, z_p))
+        q = cls.c255lmulmodp(cls.c255laddmodp(
+            x, z), cls.c255lsubmodp(x_p, z_p))
+        x_3 = cls.c255lsqrmodp(cls.c255laddmodp(p, q))
+        z_3 = cls.c255lmulmodp(cls.c255lsqrmodp(cls.c255lsubmodp(p, q)), x_1)
+        return [x_3, z_3]
+
+    @classmethod
+    def curve25519_raw(cls, f, c):
+        x_1 = c
+        a = cls.c255ldbl(x_1, cls.c255lone())
+        q = [deepcopy(x_1), cls.c255lone()]
+        n = 255
+        while cls.c255lgetbit(f, n) == 0:
+            n -= 1
+            if n < 0:
+                return cls.c255lzero()
+        n -= 1
+        while n >= 0:
+            b = cls.c255lgetbit(f, n)
+            a_or_q = [[0] * 16, [0] * 16]
+            cls.cond_copy(a_or_q[0], q[0], a[0], b)
+            cls.cond_copy(a_or_q[1], q[1], a[1], b)
+            r = cls.c255lsum(a[0], a[1], q[0], q[1], x_1)
+            s = cls.c255ldbl(a_or_q[0], a_or_q[1])
+            cls.cond_copy(q[0], s[0], r[0], b)
+            cls.cond_copy(q[1], s[1], r[1], b)
+            cls.cond_copy(a[0], r[0], s[0], b)
+            cls.cond_copy(a[1], r[1], s[1], b)
+            n -= 1
+        q[1] = cls.c255linvmodp(q[1])
+        q[0] = cls.c255lmulmodp(q[0], q[1])
+        cls.c255lreduce(q[0])
+        return q[0]
+
+    @classmethod
+    def cond_copy(cls, r, a, b, c):
+        m2 = (-c) & 0xffff
+        m1 = (~m2) & 0xffff
+        n = 0
+        while n < 16:
+            r[n] = (a[n] & m1) | (b[n] & m2)
+            n += 1
+
+    @classmethod
+    def curve25519(cls, f, c: list = None):
+        if not c:
+            c = cls.c255lbase()
+        f[0] &= 0xFFF8
+        f[15] = (f[15] & 0x7FFF) | 0x4000
+        c[15] &= 0x7FFF
+        return cls.curve25519_raw(f, c)
+
+    @classmethod
+    def sha1(cls, msg):
+        length = len(msg)
+        total_length = length + 9
+        total_length = (total_length + 63) & -64
+        padding = [0x80]
+        padding.extend([0 for _ in range(length + 1, total_length)])
+        msg.extend(padding)
+        cls.packbe(msg, total_length - 4, length * 8)
+        h0, h1, h2, h3, h4, w = 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0, [
+            0] * 80
+        for j in range(0, len(msg), 64):
+            for i in range(16):
+                w[i] = int_overflow(cls.unpackbe(msg, j + i * 4))
+            for i in range(16, 80):
+                w[i] = int_overflow(cls.rrotate(
+                    w[i - 3] ^ w[i - 8] ^ w[i - 14] ^ w[i - 16], 31))
+            a, b, c, d, e = h0, h1, h2, h3, h4
+            for i in range(80):
+                if i < 20:
+                    f = int_overflow((b & c) | (~b & d))
+                    k = 0x5A827999
+                elif i < 40:
+                    f = int_overflow(b ^ c ^ d)
+                    k = 0x6ED9EBA1
+                elif i < 60:
+                    f = int_overflow((b & c) | (b & d) | (c & d))
+                    k = 0x8F1BBCDC
+                else:
+                    f = int_overflow(b ^ c ^ d)
+                    k = 0xCA62C1D6
+                t = int_overflow(Curve.rrotate(a, 27) + f + e + k + w[i])
+                e = d
+                d = c
+                c = int_overflow(Curve.rrotate(b, 2))
+                b = a
+                a = int_overflow(t << 0)
+            h0 = int_overflow((h0 + a) << 0)
+            h1 = int_overflow((h1 + b) << 0)
+            h2 = int_overflow((h2 + c) << 0)
+            h3 = int_overflow((h3 + d) << 0)
+            h4 = int_overflow((h4 + e) << 0)
+        res = [0] * 20
+        Curve.packbe(res, 0, h0)
+        Curve.packbe(res, 4, h1)
+        Curve.packbe(res, 8, h2)
+        Curve.packbe(res, 12, h3)
+        Curve.packbe(res, 16, h4)
+        return res
+
+    @classmethod
+    def rrotate(cls, v, r):
+        return unsigned_right_shift(v, r) | (v << (32 - r))
+
+    @classmethod
+    def unpackbe(cls, a, off):
+        v = 0
+        for i in range(4):
+            v |= a[off + i] << (24 - (i * 8))
+        return v
+
+    @classmethod
+    def packbe(cls, a, off, v):
+        for i in range(4):
+            a[off + i] = (v >> (24 - i * 8)) & 0xff
+
+
+class RC4(object):
+
+    def __init__(self):
+        self.S = []
+        self.i = 0
+        self.j = 0
+
+    def set_key(self, key):
+        self.S = [i for i in range(256)]
+        S, j = self.S, 0
+        for i in range(256):
+            j = (j + key[i % len(key)] + S[i]) & 255
+            S[i], S[j] = S[j], S[i]
+        for _ in range(768):
+            self.gen()
+
+    def gen(self):
+        S = self.S
+        i = self.i = (self.i + 1) & 255
+        j = self.j = (self.j + S[i]) & 255
+        S[i], S[j] = S[j], S[i]
+        return S[(S[i] + S[j]) & 255]
+
+    def crypt_uint8array(self, dst, src, start):
+        for i in range(len(src)):
+            dst[start + i] = src[i] ^ self.gen()
+
+    def encrypt(self, s):
+        a = ''
+        for i in range(len(s)):
+            c = s[i] ^ self.gen()
+            if c == 0:
+                c = 256
+            a += chr(c)
+        return a
+
+
+class RouterSession(object):
+
+    def __init__(self):
+        self.id = None
+        self.pri_key = None
+        self.pub_key = None
+        self.padding = [32] * 8
+        self.rx_seq = 1
+        self.rx_enc = RC4()
+        self.tx_seq = 1
+        self.tx_enc = RC4()
+
+    def make_initial_request(self):
+        self.pri_key = bytes([randint(0, 255) for _ in range(32)])
+        pub_key = Curve.curve_u2a(
+            Curve.curve25519(Curve.curve_a2u(self.pri_key)))
+        self.pub_key = Curve.word2str(
+            0) + Curve.word2str(0) + Curve.a2str(pub_key)
+        self.pub_key = self.pub_key.encode()
+
+    def key_exchange(self, body):
+        self.id = Curve.str2word(body, 0)
+        r_pub_key = Curve.str2a(body[8:])
+        master_key = Curve.curve_u2a(Curve.curve25519(
+            Curve.curve_a2u(self.pri_key), Curve.curve_a2u(r_pub_key)))
+        self.rx_enc.set_key(self.make_key(master_key, False, False))
+        self.tx_enc.set_key(self.make_key(master_key, True, False))
+
+    def make_key(self, master_key, is_send, is_server):
+        magic_2 = 'On the client side, this is the send key; on the server side, it is the receive key.'
+        magic_3 = 'On the client side, this is the receive key; on the server side, it is the send key.'
+        v = deepcopy(master_key)
+        v.extend([0 for _ in range(40)])
+        if is_send == is_server:
+            v.extend(Curve.str2a(magic_3.encode()))
+        else:
+            v.extend(Curve.str2a(magic_2.encode()))
+        v.extend([0xf2 for _ in range(40)])
+        return Curve.sha1(v)[:16]
+
+    def encrypt_uint8array(self, arr):
+        narr = [0] * (len(arr) + 16)
+        narr[1] = self.id >> 16
+        narr[2] = self.id >> 8
+        narr[3] = self.id & 0xff
+        narr[4] = self.tx_seq >> 24
+        narr[5] = self.tx_seq >> 16
+        narr[6] = self.tx_seq >> 8
+        narr[7] = self.tx_seq
+        self.tx_enc.crypt_uint8array(narr, arr, 8)
+        for i in range(len(arr) + 8, len(narr)):
+            narr[i] = 32
+        xarr = narr[len(arr) + 8:len(arr) + 16]
+        self.tx_enc.crypt_uint8array(narr, xarr, len(arr) + 8)
+        self.tx_seq += len(arr) + 8
+        return bytes(narr)
+
+    def decrypt_uint8array(self, arr):
+        if len(arr) < 16:
+            return False
+        _id = int_overflow(arr[0] << 24) | int_overflow(arr[1] << 16) | int_overflow(arr[2] << 8) | arr[3]
+        seq = int_overflow(arr[4] << 24) | int_overflow(arr[5] << 16) | int_overflow(arr[6] << 8) | arr[7]
+        if _id != self.id:
+            return False
+        if seq != self.rx_seq:
+            return True
+        self.rx_seq += len(arr) - 8
+        self.rx_enc.crypt_uint8array(arr, arr[8:], 8)
+        for i in range(len(arr) - 8, len(arr)):
+            if arr[i] != 32:
+                return False
+        msgs = Buffer().buffer2msgs(arr[8:len(arr) - 8], 8)
+        if msgs:
+            for i in range(len(msgs)):
+                print(msgs[i])
+        return True
+
+    def encrypt(self, s):
+        seq = self.tx_seq
+        self.tx_seq += len(s) + 8
+        return (Curve.word2str(self.id) + Curve.word2str(seq)) + self.tx_enc.encrypt(s.encode()) + self.tx_enc.encrypt(
+            self.padding)
+
+    def encrypt_uri(self, uri):
+        s = self.encrypt(uri)
+        r = ''
+        for i in range(len(s)):
+            r += chr(ord(s[i]) & 0xff)
+        return r
+
+    def fetch(self, url, headers, data):
+        data = self.encrypt_uint8array(Buffer().msg2buffer(data))
+        response = requests.post(url=url, headers=headers, data=data)
+        body = list(response.content)
+        self.decrypt_uint8array(body)
+
+    def login(self):
+        response = requests.post(url=URL, data=self.pub_key)
+        body = [ord(item) for item in response.content.decode()]
+        self.key_exchange(body)
+
+        data = {'s1': 'root', 's3': ''}
+        self.fetch(url=URL, headers=HEADERS, data=data)
+
+    def change_vpn(self, vpn_idx, vpn_server):
+        data = {
+            "U1003c": [9, 0, 0, 0, 0, 0, 0, 0],
+            "bdd": 0,
+            "be1": 0,
+            "be3": 0,
+            "bfe000a": 0,
+            "b1000e": 0,
+            "ufe0001": vpn_idx + 48,
+            "u10001": 34,
+            "u10003": 0,
+            "u10002": 16384,
+            "uca": 1450,
+            "ucb": 1450,
+            "ud9": 4294967294,
+            "udb": 30,
+            "udc": 0,
+            "ude": 60,
+            "udf": 1,
+            "sb0004": "disabled",
+            "s10006": f"Vpn{vpn_idx}",
+            "s1001e": "l2tp-out",
+            "s10066": "",
+            "se0": vpn_server,
+            "se2": "",
+            "sfe0009": "",
+            "sd6": "123qqq",
+            "sd7": "hnszs3ds",
+            "Uff0014": [134217944],
+            "Uff0001": [20, 0],
+            "uff0007": 16646147
+        }
+        self.fetch(url=URL, headers=HEADERS, data=data)
+
+
+class AdminSession(object):
+
+    def __init__(self):
+        self.username = '17600025055'
+        self.password = 'zhangyong0712'
+        self.headers = None
+
+    def cookie2str(self, cookies):
+        ret = []
+        for key, value in cookies.iteritems():
+            ret.append(f'{key}={value}')
+        return '; '.join(ret)
+
+    def login(self):
+        url = 'https://hwq.yycyk.com/'
+        self.headers = {
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
+            'Accept-Encoding': 'gzip, deflate, br',
+            'Accept-Language': 'zh-CN,zh;q=0.9',
+            'Cache-Control': 'no-cache',
+            'Origin': 'https://hwq.yycyk.com',
+            'Pragma': 'no-cache',
+            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
+        }
+        response = requests.get(url=url, headers=self.headers)
+        self.headers.update({'Cookie': self.cookie2str(response.cookies)})
+
+        url = 'https://hwq.yycyk.com/passport/loginact'
+        self.headers.update({
+            'Accept': '*/*',
+            'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+            'Referer': 'https://hwq.yycyk.com/passport/login',
+            'X-Requested-With': 'XMLHttpRequest',
+        })
+        data = {
+            'phone': self.username,
+            'password': self.password,
+            'captcha': '',
+        }
+        response = requests.post(url=url, headers=self.headers, data=data)
+        self.headers.update({'Cookie': self.cookie2str(response.cookies)})
+
+    def get_proxy_list(self):
+        url = 'https://hwq.yycyk.com/welcome/dynamicLines'
+        self.headers.update({'Referer': 'https://hwq.yycyk.com/welcome'})
+        data = {
+            'search_str': '',
+            'type': '3',
+        }
+        response = requests.post(url=url, headers=self.headers, data=data)
+        obj = response.json()
+        proxy_list = []
+        for item in obj.get('res', {}).get('data', {}).values():
+            proxy_list.append(item.get('info', {}).get('province_domain'))
+            for sub_item in item.get('line', []):
+                if int(sub_item.get('status', 0)) == 0:  # 维护中的跳过
+                    continue
+                if int(sub_item.get('ping', 1000)) >= 200:  # 延迟超过200ms的跳过
+                    continue
+                proxy_list.append(sub_item.get('domain'))
+        return proxy_list
+
+
+def job():
+    admin_session = AdminSession()
+    admin_session.login()
+    vpn_server = choice(admin_session.get_proxy_list())
+
+    router_session = RouterSession()
+    router_session.make_initial_request()
+    router_session.login()
+    router_session.change_vpn(vpn_idx=1, vpn_server=vpn_server)
+
+    now = datetime.now(tz=pytz.timezone('Asia/Shanghai')).strftime('%Y-%m-%d %H:%M:%S')
+    print(f'[+] {now} 切换代理地址为: {vpn_server}')
+
+
+def main():
+    scheduler = BlockingScheduler({
+        'apscheduler.timezone': 'Asia/Shanghai',
+    })
+    scheduler.add_job(job, 'cron', hour=12, minute=0, second=0)
+    try:
+        print('[+] 定时任务已启动')
+        scheduler.start()
+    except KeyboardInterrupt:
+        print('[+] 定时任务已停止')
+
+
+if __name__ == '__main__':
+    main()

+ 7 - 1
kanyikan/kanyikan_main/run_kykfeed_recommend.py

@@ -10,6 +10,8 @@ from common.common import Common
 from common.public import get_consumer, ack_message, task_fun_mq
 from common.scheduling_db import MysqlHelper
 from kanyikan.kanyikan_recommend.kanyikan_recommend_feed import KanyikanRecommend
+from kanyikan.kanyikan_recommend.kanyikan_recommend_video_id import KanyikanViodeRecommend
+
 
 
 def main(log_type, crawler, topic_name, group_id, env):
@@ -72,12 +74,16 @@ def main(log_type, crawler, topic_name, group_id, env):
                 Common.logger(log_type, crawler).info(f"用户列表:{user_list}\n")
                 Common.logger(log_type, crawler).info(f'开始抓取:{task_dict["taskName"]}\n')
                 Common.logging(log_type, crawler, env, f'开始抓取:{task_dict["taskName"]}\n')
-
                 KanyikanRecommend.get_videoList(log_type=log_type,
                                                 crawler=crawler,
                                                 rule_dict=rule_dict,
                                                 our_uid=our_uid,
                                                 env=env)
+                KanyikanViodeRecommend.get_videoList_video_id(log_type=log_type,
+                                                crawler=crawler,
+                                                rule_dict=rule_dict,
+                                                our_uid=our_uid,
+                                                env=env)
                 Common.del_charles_files(log_type, crawler)
                 Common.logger(log_type, crawler).info('抓取一轮结束\n')
                 Common.logging(log_type, crawler, env, '抓取一轮结束\n')

+ 15 - 2
kanyikan/kanyikan_recommend/kanyikan_recommend_plus.py

@@ -28,6 +28,11 @@ class KanyikanRecommend:
         repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
         return len(repeat_video)
 
+    @classmethod
+    def insert_video_id(cls, log_type, crawler, video_id, env):
+        insert_sql = f"""insert into crawler_kyk_video_id( kyk_video_id , status) values ("{video_id}",0)"""
+        MysqlHelper.update_values(log_type, crawler, insert_sql, env, action='')
+
     @classmethod
     def get_videoList(cls, log_type, crawler, our_uid, rule_dict, env):
         mq = MQ(topic_name="topic_crawler_etl_" + env)
@@ -160,8 +165,8 @@ class KanyikanRecommend:
                             video_url = feeds[i]["videoInfo"]["videoCdnInfo"]["ctnInfo"]["urlInfo"][0]["url"]
                         else:
                             video_url = feeds[i]["videoInfo"]["videoCdnInfo"]["urlInfo"][0]["url"]
-                        videoId = feeds[i].get("videoId", "")
-                        videoId = "{}kyk_plus".format(videoId)
+                        video_id = feeds[i].get("videoId", "")
+                        videoId = "{}kyk_plus".format(video_id)
                         playCount = int(feeds[i].get("playCount", 0))
                         shared_cnt = int(feeds[i].get("shared_cnt", 0))
                         video_dict = {
@@ -273,6 +278,14 @@ class KanyikanRecommend:
                             video_dict["crawler_rule"] = json.dumps(rule_dict)
                             video_dict["user_id"] = our_uid
                             video_dict["publish_time"] = video_dict["publish_time_str"]
+                            cls.insert_video_id(log_type, crawler, video_id, env)
+                            AliyunLogger.logging(
+                                code="1010",
+                                platform=crawler,
+                                mode=log_type,
+                                env=env,
+                                message=f"看一看video_id:{video_id}入库",
+                            )
                             mq.send_msg(video_dict)
                         time.sleep(random.randint(10, 15))
                     except Exception as e:

+ 287 - 0
kanyikan/kanyikan_recommend/kanyikan_recommend_video_id.py

@@ -0,0 +1,287 @@
+# -*- coding: utf-8 -*-
+# @Time: 2023/11/23
+import json
+import os
+import random
+import sys
+import time
+import requests
+import urllib3
+
+
+sys.path.append(os.getcwd())
+from common import AliyunLogger
+from common.mq import MQ
+from common.common import Common
+from common.scheduling_db import MysqlHelper
+from common.public import get_config_from_mysql, download_rule
+proxies = {"http": None, "https": None}
+
+
+
+
+
+class KanyikanViodeRecommend:
+    platform = "看一看"
+    strategy = "video_id-feed流"
+
+
+    @classmethod
+    def repeat_video(cls, log_type, crawler, video_id, env):
+        sql = f""" select * from crawler_video where platform in ("{crawler}","{cls.platform}") and create_time>='2023-10-09' and out_video_id="{video_id}"; """
+        repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
+        return len(repeat_video)
+
+    @classmethod
+    def get_video_id(cls, log_type, crawler, env):
+        sql = f""" select kyk_video_id from crawler_kyk_video_id where status = 0 """
+        repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
+        return repeat_video
+
+    @classmethod
+    def update_video_id(cls, log_type, crawler, env, vid):
+        sql = f""" update crawler_kyk_video_id set status={1}  WHERE kyk_video_id ="{vid}" """
+        MysqlHelper.update_values(log_type, crawler, sql, env, action="")
+
+
+    @classmethod
+    def get_videoList_video_id(cls, log_type, crawler, our_uid, rule_dict, env):
+        mq = MQ(topic_name="topic_crawler_etl_" + env)
+        try:
+            session = Common.get_session(log_type, crawler, env)
+            if session is None:
+                time.sleep(1)
+                cls.get_videoList_video_id(log_type, crawler, our_uid, rule_dict, env)
+            vid_list = cls.get_video_id(log_type, crawler, env,)
+            if len(vid_list) == 0:
+                Common.logger(log_type, crawler).info(f"vid_list:{vid_list}")
+                Common.logging(log_type, crawler, env, f"vid_list:{vid_list}")
+                return
+            for video_id in vid_list:
+                vid = video_id["kyk_video_id"]
+                url1 = "https://search.weixin.qq.com/cgi-bin/recwxa/recwxavideolist?"
+                header = {
+                    'Host': 'search.weixin.qq.com',
+                    'Content-Type': 'application/json',
+                    'X-WX-ClientVersion': '0x33050520',
+                    'X-WECHAT-UIN': 'b2hfbTQ1WGNjSzQxemdfanpMSml1TEtfbEtsVQ==',
+                    'Accept': '*/*',
+                    'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E217 MicroMessenger/6.8.0(0x16080000) NetType/WIFI Language/en Branch/Br_trunk MiniProgramEnv/Mac',
+                    'Referer': 'https://servicewechat.com/wxbb9a805eb4f9533c/269/page-frame.html',
+                    'Accept-Language': 'zh-cn'
+                }
+                params = {
+                    "session": session,
+                    "offset": 0,
+                    "count": "30",
+                    "channelid": "200201",
+                    "vid": vid,
+                    "scene": "310",
+                    "subscene": '1098'
+                }
+                urllib3.disable_warnings()
+                response = requests.get(url=url1, headers=header, params=params, proxies=proxies, verify=False)
+                if "data" not in response.text:
+                    Common.logger(log_type, crawler).info("获取视频list时,session过期,随机睡眠 31-50 秒")
+                    Common.logging(log_type, crawler, env, "获取视频list时,session过期,随机睡眠 31-50 秒")
+                    AliyunLogger.logging(
+                        code="2000",
+                        platform=crawler,
+                        mode=log_type,
+                        env=env,
+                        message=f"获取视频list时,session过期,随机睡眠 31-50 秒"
+                    )
+                    # 如果返回空信息,则随机睡眠 31-40 秒
+                    time.sleep(random.randint(31, 40))
+                    cls.get_videoList_video_id(log_type, crawler, our_uid, rule_dict, env)
+                elif "items" not in response.json()["data"]:
+                    Common.logger(log_type, crawler).info(f"get_feeds:{response.json()},随机睡眠 1-3 分钟")
+                    Common.logging(log_type, crawler, env, f"get_feeds:{response.json()},随机睡眠 1-3 分钟")
+                    AliyunLogger.logging(
+                        code="2000",
+                        platform=crawler,
+                        mode=log_type,
+                        env=env,
+                        message=f"get_feeds:{response.json()},随机睡眠 1-3 分钟"
+                    )
+                    # 如果返回空信息,则随机睡眠 1-3 分钟
+                    time.sleep(random.randint(60, 180))
+                    cls.get_videoList_video_id(log_type, crawler, our_uid, rule_dict, env)
+                feeds = response.json().get("data", {}).get("items", "")
+                if feeds == "":
+                    Common.logger(log_type, crawler).info(f"feeds:{feeds}")
+                    Common.logging(log_type, crawler, env, f"feeds:{feeds}")
+                    AliyunLogger.logging(
+                        code="2001",
+                        platform=crawler,
+                        mode=log_type,
+                        env=env,
+                        message=f"没有更多视频啦 ~\n"
+                    )
+                    return
+                for j in range(len(feeds)):
+                    try:
+                        AliyunLogger.logging(
+                            code="1001",
+                            platform=crawler,
+                            mode=log_type,
+                            env=env,
+                            message='扫描到一条视频\n'
+                        )
+                        cls.update_video_id(log_type, crawler, env, vid)
+                        AliyunLogger.logging(
+                            code="1011",
+                            platform=crawler,
+                            mode=log_type,
+                            env=env,
+                            message=f"看一看video_id:{vid},状态修改为已抓取",
+                        )
+                        video_title = feeds[j].get("title", "").strip().replace("\n", "") \
+                            .replace("/", "").replace("\\", "").replace("\r", "") \
+                            .replace(":", "").replace("*", "").replace("?", "") \
+                            .replace("?", "").replace('"', "").replace("<", "") \
+                            .replace(">", "").replace("|", "").replace(" ", "") \
+                            .replace("&NBSP", "").replace(".", "。").replace(" ", "") \
+                            .replace("'", "").replace("#", "").replace("Merge", "")
+                        publish_time_stamp = feeds[j].get("date", 0)
+                        publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
+                        # 获取播放地址
+                        if "videoInfo" not in feeds[j]:
+                            video_url = ""
+                        elif "mpInfo" in feeds[j]["videoInfo"]["videoCdnInfo"]:
+                            if len(feeds[j]["videoInfo"]["videoCdnInfo"]["mpInfo"]["urlInfo"]) > 2:
+                                video_url = feeds[j]["videoInfo"]["videoCdnInfo"]["mpInfo"]["urlInfo"][2]["url"]
+                            else:
+                                video_url = feeds[j]["videoInfo"]["videoCdnInfo"]["mpInfo"]["urlInfo"][0]["url"]
+                        elif "ctnInfo" in feeds[j]["videoInfo"]["videoCdnInfo"]:
+                            video_url = feeds[j]["videoInfo"]["videoCdnInfo"]["ctnInfo"]["urlInfo"][0]["url"]
+                        else:
+                            video_url = feeds[j]["videoInfo"]["videoCdnInfo"]["urlInfo"][0]["url"]
+                        video_id = feeds[j].get("videoId", "")+"video_id"
+                        s_cnt = int(feeds[j].get("shared_cnt", 0))
+                        p_count = int(feeds[j].get("playCount", 0))
+                        l_cnt = int(feeds[j].get("liked_cnt", 0))
+                        if p_count < 100000:
+                            if s_cnt < 200 and p_count < 15000:
+                                Common.logger(log_type, crawler).info(f"分享:{s_cnt},播放量:{p_count}\n")
+                                Common.logging(log_type, crawler, env, f"分享:{s_cnt},播放量:{p_count}\n")
+                                continue
+                        video_dict = {
+                            "video_title": video_title,
+                            "video_id": video_id,
+                            "play_cnt": feeds[j].get("playCount", 0),
+                            "like_cnt": feeds[j].get("liked_cnt", 0),
+                            "comment_cnt": feeds[j].get("comment_cnt", 0),
+                            "share_cnt": feeds[j].get("shared_cnt", 0),
+                            "duration": feeds[j].get("mediaDuration", 0),
+                            "video_width": feeds[j].get("short_video_info", {}).get("width", 0),
+                            "video_height": feeds[j].get("short_video_info", {}).get("height", 0),
+                            "publish_time_stamp": publish_time_stamp,
+                            "publish_time_str": publish_time_str,
+                            "user_name": feeds[j].get("source", "").strip().replace("\n", ""),
+                            "user_id": feeds[j].get("openid", ""),
+                            "avatar_url": feeds[j].get("bizIcon", ""),
+                            "cover_url": feeds[j].get("thumbUrl", ""),
+                            "video_url": video_url,
+                            "session": session,
+                        }
+                        for k, v in video_dict.items():
+                            Common.logger(log_type, crawler).info(f"{k}:{v}")
+                        Common.logging(log_type, crawler, env, f"video_dict:{video_dict}")
+                        AliyunLogger.logging(
+                            code="1000",
+                            platform=crawler,
+                            mode=log_type,
+                            env=env,
+                            message=f"{video_dict}\n"
+                        )
+
+                        if video_dict["video_id"] == "" or video_dict["video_title"] == "" or video_dict[
+                            "video_url"] == "":
+                            Common.logger(log_type, crawler).info("无效视频\n")
+                            Common.logging(log_type, crawler, env, "无效视频\n")
+                            AliyunLogger.logging(
+                                code="2004",
+                                platform=crawler,
+                                mode=log_type,
+                                env=env,
+                                message='无效视频\n'
+                            )
+                        elif download_rule(log_type=log_type, crawler=crawler, video_dict=video_dict,
+                                           rule_dict=rule_dict) is False:
+                            Common.logger(log_type, crawler).info("不满足抓取规则\n")
+                            Common.logging(log_type, crawler, env, "不满足抓取规则\n")
+                            AliyunLogger.logging(
+                                code="2004",
+                                platform=crawler,
+                                mode=log_type,
+                                env=env,
+                                message='不满足抓取规则\n'
+                            )
+                        elif any(str(word) if str(word) in video_dict["video_title"] else False
+                                 for word in get_config_from_mysql(log_type=log_type,
+                                                                   source=crawler,
+                                                                   env=env,
+                                                                   text="filter",
+                                                                   action="")) is True:
+                            Common.logger(log_type, crawler).info('已中过滤词\n')
+                            Common.logging(log_type, crawler, env, '已中过滤词\n')
+                            AliyunLogger.logging(
+                                code="2004",
+                                platform=crawler,
+                                mode=log_type,
+                                env=env,
+                                message='已中过滤词\n'
+                            )
+                        elif cls.repeat_video(log_type, crawler, video_dict["video_id"], env) != 0:
+                            Common.logger(log_type, crawler).info('视频已下载\n')
+                            Common.logging(log_type, crawler, env, '视频已下载\n')
+                            AliyunLogger.logging(
+                                code="2002",
+                                platform=crawler,
+                                mode=log_type,
+                                env=env,
+                                message='视频已下载\n'
+                            )
+                        else:
+                            video_dict["out_user_id"] = video_dict["user_id"]
+                            video_dict["platform"] = crawler
+                            video_dict["strategy"] = log_type
+                            video_dict["strategy_type"] = "hcm"
+                            video_dict["out_video_id"] = video_dict["video_id"]
+                            video_dict["width"] = video_dict["video_width"]
+                            video_dict["height"] = video_dict["video_height"]
+                            video_dict["crawler_rule"] = json.dumps(rule_dict)
+                            video_dict["user_id"] = our_uid
+                            video_dict["publish_time"] = video_dict["publish_time_str"]
+                            mq.send_msg(video_dict)
+                    except Exception as e:
+                        Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
+                        Common.logging(log_type, crawler, env, f"抓取单条视频异常:{e}\n")
+                        AliyunLogger.logging(
+                            code="3000",
+                            platform=crawler,
+                            mode=log_type,
+                            env=env,
+                            message=f"抓取单条视频异常:{e}\n"
+                        )
+        except Exception as e:
+            Common.logger(log_type, crawler).error(f"抓取列表页时异常:{e}\n")
+            Common.logging(log_type, crawler, env, f"抓取列表页时异常:{e}\n")
+            AliyunLogger.logging(
+                code="3000",
+                platform=crawler,
+                mode=log_type,
+                env=env,
+                message=f"抓取列表页时异常:{e}\n"
+            )
+
+
+if __name__ == "__main__":
+    KanyikanViodeRecommend.get_videoList_video_id(
+        log_type="recommend",
+        crawler="kanyikan",
+        env="prod",
+        rule_dict={'share_cnt': {'min': 300, 'max': 0}},
+        our_uid=64080779
+    )

+ 2 - 1
xiaoniangao/xiaoniangao_author/xiaoniangao_author_v2.py

@@ -47,7 +47,8 @@ class XiaoNianGaoAuthor:
         for user_dict in self.user_list:
             if self.download_count <= max_count:
                 self.get_video_list(user_dict)
-                time.sleep(random.randint(1, 15))
+                # time.sleep(random.randint(1, 10))
+                time.sleep(1)
             else:
                 AliyunLogger.logging(
                     code="2000",

+ 0 - 0
zhuwanwufusu/__init__.py


+ 1 - 0
zhuwanwufusu/crypt/__init__.py

@@ -0,0 +1 @@
+from .crypt import AESCipher

+ 26 - 0
zhuwanwufusu/crypt/crypt.py

@@ -0,0 +1,26 @@
+from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
+from cryptography.hazmat.backends import default_backend
+
+
+class AESCipher:
+    def __init__(self):
+        self.key = b'50102fa64073ad76'  # 用适当的方式转换或直接定义为字节串
+        self.iv = b'173d023138824bb0'  # 同上
+
+    def aes_encrypt(self, data):
+        cipher = Cipher(algorithms.AES(self.key), modes.CBC(self.iv), backend=default_backend())
+        encryptor = cipher.encryptor()
+        ct = encryptor.update(self._pad(data).encode()) + encryptor.finalize()
+        return ct.hex().upper()
+
+    def aes_decrypt(self, data):
+        cipher = Cipher(algorithms.AES(self.key), modes.CBC(self.iv), backend=default_backend())
+        decryptor = cipher.decryptor()
+        decrypted_data = decryptor.update(bytes.fromhex(data)) + decryptor.finalize()
+        return self._unpad(decrypted_data).decode()
+
+    def _pad(self, s):
+        return s + (16 - len(s) % 16) * chr(16 - len(s) % 16)
+
+    def _unpad(self, s):
+        return s[:-ord(s[len(s) - 1:])]

+ 0 - 0
zhuwanwufusu/zhuwanwufusu_author/__init__.py


+ 0 - 0
zhuwanwufusu/zhuwanwufusu_author/zwwfs_author.py


+ 0 - 0
zhuwanwufusu/zhuwanwufusu_main/__init__.py


+ 156 - 0
zhuwanwufusu/zhuwanwufusu_main/run_zwwfs_recommend.py

@@ -0,0 +1,156 @@
+import argparse
+import time
+import random
+from mq_http_sdk.mq_client import *
+from mq_http_sdk.mq_consumer import *
+from mq_http_sdk.mq_exception import MQExceptionBase
+
+sys.path.append(os.getcwd())
+from common.public import task_fun_mq, get_consumer, ack_message
+from common.scheduling_db import MysqlHelper
+from common import AliyunLogger
+from zhuwanwufusu.zhuwanwufusu_recommend import ZhuWanWuFuSuRecommend
+
+
+def main(log_type, crawler, topic_name, group_id, env):
+    consumer = get_consumer(topic_name, group_id)
+    # 长轮询表示如果Topic没有消息,则客户端请求会在服务端挂起3秒,3秒内如果有消息可以消费则立即返回响应。
+    # 长轮询时间3秒(最多可设置为30秒)。
+    wait_seconds = 30
+    # 一次最多消费3条(最多可设置为16条)。
+    batch = 1
+    AliyunLogger.logging(
+        code="1000",
+        platform=crawler,
+        mode=log_type,
+        env=env,
+        message=f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
+        f"WaitSeconds:{wait_seconds}\n"
+        f"TopicName:{topic_name}\n"
+        f"MQConsumer:{group_id}",
+    )
+    while True:
+        try:
+            # 长轮询消费消息。
+            recv_msgs = consumer.consume_message(batch, wait_seconds)
+            for msg in recv_msgs:
+                AliyunLogger.logging(
+                    code="1000",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message=f"Receive\n"
+                    f"MessageId:{msg.message_id}\n"
+                    f"MessageBodyMD5:{msg.message_body_md5}\n"
+                    f"MessageTag:{msg.message_tag}\n"
+                    f"ConsumedTimes:{msg.consumed_times}\n"
+                    f"PublishTime:{msg.publish_time}\n"
+                    f"Body:{msg.message_body}\n"
+                    f"NextConsumeTime:{msg.next_consume_time}\n"
+                    f"ReceiptHandle:{msg.receipt_handle}\n"
+                    f"Properties:{msg.properties}",
+                )
+                # ack_mq_message
+                ack_message(
+                    log_type=log_type,
+                    crawler=crawler,
+                    recv_msgs=recv_msgs,
+                    consumer=consumer,
+                )
+                # 解析 task_dict
+                task_dict = task_fun_mq(msg.message_body)["task_dict"]
+                AliyunLogger.logging(
+                    code="1000",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message="f调度任务:{task_dict}",
+                )
+                # 解析 rule_dict
+                rule_dict = task_fun_mq(msg.message_body)["rule_dict"]
+                AliyunLogger.logging(
+                    code="1000",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message=f"抓取规则:{rule_dict}\n",
+                )
+                # 解析 user_list
+                task_id = task_dict["id"]
+                select_user_sql = (
+                    f"""select * from crawler_user_v3 where task_id={task_id}"""
+                )
+                user_list = MysqlHelper.get_values(
+                    log_type, crawler, select_user_sql, env, action=""
+                )
+                user_dict = random.choice(user_list)
+                AliyunLogger.logging(
+                    code="1003",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message="开始抓取"
+                )
+                AliyunLogger.logging(
+                    code="1000",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message="开始抓取祝万物复苏——推荐",
+                )
+                main_process = ZhuWanWuFuSuRecommend(
+                    platform=crawler,
+                    mode=log_type,
+                    rule_dict=rule_dict,
+                    user_dict=user_dict,
+                    env=env
+                )
+                main_process.schedule()
+                AliyunLogger.logging(
+                    code="1000",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message="完成抓取——祝万物复苏",
+                )
+                AliyunLogger.logging(
+                    code="1004", platform=crawler, mode=log_type, env=env,message="结束一轮抓取"
+                )
+
+        except MQExceptionBase as err:
+            # Topic中没有消息可消费。
+            if err.type == "MessageNotExist":
+                AliyunLogger.logging(
+                    code="2000",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message=f"No new message! RequestId:{err.req_id}\n",
+                )
+                continue
+            AliyunLogger.logging(
+                code="2000",
+                platform=crawler,
+                mode=log_type,
+                env=env,
+                message=f"Consume Message Fail! Exception:{err}\n",
+            )
+            time.sleep(2)
+            continue
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
+    parser.add_argument("--log_type", type=str)  ## 添加参数,注明参数类型
+    parser.add_argument("--crawler")  ## 添加参数
+    parser.add_argument("--topic_name")  ## 添加参数
+    parser.add_argument("--group_id")  ## 添加参数
+    parser.add_argument("--env")  ## 添加参数
+    args = parser.parse_args()  ### 参数赋值,也可以通过终端赋值
+    main(
+        log_type=args.log_type,
+        crawler=args.crawler,
+        topic_name=args.topic_name,
+        group_id=args.group_id,
+        env=args.env,
+    )

+ 1 - 0
zhuwanwufusu/zhuwanwufusu_recommend/__init__.py

@@ -0,0 +1 @@
+from .zwwfs_recommend import ZhuWanWuFuSuRecommend

+ 288 - 0
zhuwanwufusu/zhuwanwufusu_recommend/zwwfs_recommend.py

@@ -0,0 +1,288 @@
+import os
+import json
+import random
+import sys
+import time
+import uuid
+
+import requests
+
+sys.path.append(os.getcwd())
+from common.video_item import VideoItem
+from common import PiaoQuanPipeline, AliyunLogger, tunnel_proxies
+from common.mq import MQ
+from common.db import MysqlHelper
+from zhuwanwufusu.crypt import AESCipher as AES
+
+
+class ZhuWanWuFuSuRecommend(object):
+    def __init__(self, platform, mode, rule_dict, user_dict, env):
+        self.platform = platform
+        self.mode = mode
+        self.rule_dict = rule_dict
+        self.user_dict = user_dict
+        self.env = env
+        self.download_cnt = 0
+        self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
+        self.expire_flag = False
+        self.cryptor = AES()
+
+    def get_recommend_list(self):
+        url = "https://api.lidongze.cn/jeecg-boot/ugc/getVideoListsEn2"
+        headers = {
+            'Host': 'api.lidongze.cn',
+            'xweb_xhr': '1',
+            'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.4(0x13080410)XWEB/31009',
+            'token': '',
+            'content-type': 'application/json',
+            'accept': '*/*',
+            'referer': 'https://servicewechat.com/wx0afdc2669ed8df2f/3/page-frame.html',
+            'accept-language': 'en-US,en;q=0.9'
+        }
+        page_index = 1
+        total_page = 2
+        while page_index <= total_page:
+            try:
+                query = {
+                    "pageNo": page_index,
+                    "pageSize": 10,
+                    "groupId": "1650323161797439489",  # 推荐流的 ID
+                    "vn": 1,
+                    "gx": 1,
+                    "appid": "wx0afdc2669ed8df2f",
+                    "type": 0
+                }
+                params = {
+                    "v": self.cryptor.aes_encrypt(data=json.dumps(query))
+                }
+                response = requests.request("GET", url, headers=headers, params=params, proxies=tunnel_proxies())
+                result = json.loads(self.cryptor.aes_decrypt(response.text))
+                total_page = result['list']['pages']
+                page_index = result['list']['current'] + 1
+                for index, video_obj in enumerate(result['list']['records'], 1):
+                    try:
+                        AliyunLogger.logging(
+                            code="1001",
+                            platform=self.platform,
+                            mode=self.mode,
+                            env=self.env,
+                            message="扫描到一条视频",
+                            data=video_obj
+                        )
+                        self.process_video_obj(video_obj)
+                    except Exception as e:
+                        AliyunLogger.logging(
+                            code="3000",
+                            platform=self.platform,
+                            mode=self.mode,
+                            env=self.env,
+                            message="抓取单条视频失败, 该视频位于第{}页第{}条报错原因是{}".format(page_index, index, e)
+                        )
+            except Exception as e:
+                AliyunLogger.logging(
+                    code="3000",
+                    platform=self.platform,
+                    mode=self.mode,
+                    env=self.env,
+                    message="抓取第{}页的时候失败, 报错原因是{}".format(page_index, e)
+                )
+            time.sleep(random.randint(5, 10))
+
+    def get_user_videos(self, user_id):
+        """
+        在抓取完推荐页之后,去抓每一个用户的主页视频
+        """
+        url = "https://api.lidongze.cn/jeecg-boot/ugc/getAuthVideoList"
+        headers = {
+            'Host': 'api.lidongze.cn',
+            'xweb_xhr': '1',
+            'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.4(0x13080410)XWEB/31009',
+            'token': '',
+            'content-type': 'application/json',
+            'accept': '*/*',
+            'referer': 'https://servicewechat.com/wx0afdc2669ed8df2f/3/page-frame.html',
+            'accept-language': 'en-US,en;q=0.9'
+        }
+        page_index = 1
+        total_page = 1
+        while page_index <= total_page:
+            query = {
+                "pageNo": page_index,
+                "pageSize": 10,
+                "authid": user_id
+            }
+            params = {
+                "v": self.cryptor.aes_encrypt(data=json.dumps(query))
+            }
+            response = requests.request("GET", url, headers=headers, params=params, proxies=tunnel_proxies())
+            result = json.loads(self.cryptor.aes_decrypt(response.text))
+            total_page = result['list']['pages']
+            page_index = result['list']['current'] + 1
+            for index, video_temp in enumerate(result['list']['records']):
+                video_id = video_temp['id']
+                detail_query = {
+                    "videoId": video_id
+                }
+                detail_params = {
+                    "v": self.cryptor.aes_encrypt(data=json.dumps(detail_query))
+                }
+                url = "https://api.lidongze.cn/jeecg-boot/ugc/getVideosDataEn"
+                headers = {
+                    'Host': 'api.lidongze.cn',
+                    'xweb_xhr': '1',
+                    'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.4(0x13080410)XWEB/31009',
+                    'token': '',
+                    'content-type': 'application/json',
+                    'accept': '*/*',
+                    'referer': 'https://servicewechat.com/wx0afdc2669ed8df2f/3/page-frame.html',
+                    'accept-language': 'en-US,en;q=0.9'
+                }
+                detail_response = requests.request("GET", url, headers=headers, params=detail_params,
+                                                   proxies=tunnel_proxies())
+                detail_video = json.loads(self.cryptor.aes_decrypt(detail_response.text))
+                if detail_video['success']:
+                    # print(json.dumps(detail_video, ensure_ascii=False, indent=4))
+                    try:
+                        AliyunLogger.logging(
+                            code="1001",
+                            platform=self.platform,
+                            mode=self.mode,
+                            env=self.env,
+                            message="扫描到一条视频",
+                            data=detail_video['data']
+                        )
+                        self.process_video_obj(detail_video['data'])
+                    except Exception as e:
+                        AliyunLogger.logging(
+                            code="3000",
+                            platform=self.platform,
+                            mode=self.mode,
+                            env=self.env,
+                            message="抓取单条视频失败, 该视频位于第{}条报错原因是{}".format(index, e)
+                        )
+
+    def process_video_obj(self, video_obj):
+        trace_id = self.platform + str(uuid.uuid1())
+        if video_obj.get("playnum"):
+            play_cnt = int(video_obj['playnum'].replace("万+", "0000")) if "万+" in video_obj['playnum'] else int(
+                video_obj['playnum'])
+        else:
+            play_cnt = 0
+        item = VideoItem()
+        item.add_video_info("video_id", video_obj['id'])
+        item.add_video_info("video_title", video_obj['vname'])
+        item.add_video_info("play_cnt", play_cnt)
+        item.add_video_info("publish_time_stamp", int(time.time()))
+        item.add_video_info("out_user_id", video_obj['authid'])
+        item.add_video_info("cover_url", video_obj['shareimg'])
+        item.add_video_info("like_cnt", int(video_obj['likenum']))
+        item.add_video_info("video_url", video_obj['videoaddr'])
+        item.add_video_info("out_video_id", video_obj['id'])
+        item.add_video_info("platform", self.platform)
+        item.add_video_info("strategy", self.mode)
+        item.add_video_info("session", "{}-{}".format(self.platform, int(time.time())))
+        item.add_video_info("user_id", self.user_dict['uid'])
+        item.add_video_info("user_name", self.user_dict['nick_name'])
+        # 把扫描到的账号存到 accounts 表中
+        self.manage_auth_id(out_user_id=video_obj['authid'], out_user_name=video_obj['authname'])
+        mq_obj = item.produce_item()
+        pipeline = PiaoQuanPipeline(
+            platform=self.platform,
+            mode=self.mode,
+            rule_dict=self.rule_dict,
+            env=self.env,
+            item=mq_obj,
+            trace_id=trace_id,
+        )
+        if pipeline.process_item():
+            self.download_cnt += 1
+            print(mq_obj)
+            # self.mq.send_msg(mq_obj)
+            AliyunLogger.logging(
+                code="1002",
+                platform=self.platform,
+                mode=self.mode,
+                env=self.env,
+                message="成功发送至 ETL",
+                data=mq_obj
+            )
+
+    def manage_auth_id(self, out_user_id, out_user_name):
+        """
+        out_user_id: 外站视频的用户 id
+        out_user_name: 外站视频用户名字
+        逻辑: 对新扫描到的视频的用户 id 进行判断,若用户 id 不存在,则把视频 id 存到表中,
+              如果用户 id 存在,则判断用户是否修改名字,若名字修改则更新名字
+        """
+        select_user_sql = f"""select name, name_id from accounts where name_id = "{out_user_id}" and platform = "{self.platform}" and useful = 1 limit 1"""
+        out_user_info = MysqlHelper.get_values(
+            log_type=self.mode,
+            crawler=self.platform,
+            sql=select_user_sql,
+            env=self.env,
+            machine="",
+        )
+        if out_user_info:
+            name, name_id = out_user_info[0]
+            if name == out_user_name:
+                return
+            else:
+                update_sql = f"""update accounts set name = "{out_user_name}" where name_id = "{out_user_id}";"""
+                MysqlHelper.update_values(
+                    log_type=self.mode,
+                    crawler=self.platform,
+                    sql=update_sql,
+                    env=self.env,
+                    machine=""
+                )
+        else:
+            insert_sql = f"""INSERT INTO accounts (name, name_id, platform, useful) values ("{out_user_name}", "{out_user_id}", "{self.platform}", 1 )"""
+            MysqlHelper.update_values(
+                log_type=self.mode,
+                crawler=self.platform,
+                sql=insert_sql,
+                env=self.env,
+                machine="",
+            )
+
+    def get_user_list(self):
+        select_user_sql = f"""select name_id from accounts where platform = "{self.platform}" and useful = 1"""
+        out_user_info = MysqlHelper.get_values(
+            log_type=self.mode,
+            crawler=self.platform,
+            sql=select_user_sql,
+            env=self.env,
+            machine="",
+        )
+        if out_user_info:
+            result = []
+            for i in out_user_info:
+                result.append(i[0])
+            return result
+        else:
+            return []
+
+    def schedule(self):
+        """
+        先抓取推荐列表的视频, 等待 5 分钟后,抓取账号视频
+        """
+        self.get_recommend_list()
+        time.sleep(5 * 60)
+        self.mode = "author"
+        user_list = self.get_user_list()
+        if user_list:
+            for index, user_id in enumerate(user_list):
+                try:
+                    self.get_user_videos(user_id=user_id)
+                except Exception as e:
+                    AliyunLogger.logging(
+                        code="3000",
+                        platform=self.platform,
+                        mode=self.mode,
+                        env=self.env,
+                        message="抓取账号视频出现异常,账号 id 是{}, 报错原因是{}".format(user_id, e)
+                    )
+
+
+if __name__ == '__main__':
+    pass

+ 110 - 0
zhuwanwufusu/zhuwanwufusu_recommend/zwwfs_recommend_test.py

@@ -0,0 +1,110 @@
+import os
+import re
+import base64
+import json
+import random
+import sys
+import time
+import uuid
+
+import requests
+
+sys.path.append(os.getcwd())
+from common.video_item import VideoItem
+from common import PiaoQuanPipeline, AliyunLogger, tunnel_proxies
+from common.mq import MQ
+from common.scheduling_db import MysqlHelper
+from zhuwanwufusu.crypt import AESCipher as AES
+
+
+class ZhuWanWuFuSuRecommend(object):
+    def __init__(self, platform, mode, rule_dict, user_dict, env):
+        self.platform = platform
+        self.mode = mode
+        self.rule_dict = rule_dict
+        self.user_dict = user_dict
+        self.env = env
+        self.download_cnt = 0
+        self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
+        self.expire_flag = False
+        self.cryptor = AES()
+
+    def get_recommend_list(self):
+        url = "https://api.lidongze.cn/jeecg-boot/ugc/getVideoListsEn2"
+        headers = {
+            'Host': 'api.lidongze.cn',
+            'xweb_xhr': '1',
+            'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.4(0x13080410)XWEB/31009',
+            'token': '',
+            'content-type': 'application/json',
+            'accept': '*/*',
+            'referer': 'https://servicewechat.com/wx0afdc2669ed8df2f/3/page-frame.html',
+            'accept-language': 'en-US,en;q=0.9'
+        }
+        page_index = 1
+        total_page = 2
+        while page_index <= total_page:
+            query = {
+                "pageNo": page_index,
+                "pageSize": 10,
+                "groupId": "1650323161797439489",  # 推荐流的 ID
+                "vn": 1,
+                "gx": 1,
+                "appid": "wx0afdc2669ed8df2f",
+                "type": 0
+            }
+            params = {
+                "v": self.cryptor.aes_encrypt(data=json.dumps(query))
+            }
+            response = requests.request("GET", url, headers=headers, params=params, proxies=tunnel_proxies())
+            result = json.loads(self.cryptor.aes_decrypt(response.text))
+            total_page = result['list']['pages']
+            page_index = result['list']['current'] + 1
+            for index, video_obj in enumerate(result['list']['records']):
+                self.process_video_obj(video_obj)
+
+    def process_video_obj(self, video_obj):
+        print(json.dumps(video_obj, ensure_ascii=False, indent=4))
+        trace_id = self.platform + str(uuid.uuid1())
+        play_cnt = int(video_obj['playnum'].replace("万+", "0000")) if "万+" in video_obj['playnum'] else int(
+            video_obj['playnum'])
+        item = VideoItem()
+        item.add_video_info("video_id", video_obj['id'])
+        item.add_video_info("video_title", video_obj['vname'])
+        item.add_video_info("play_cnt", play_cnt)
+        item.add_video_info("publish_time_stamp", int(time.time()))
+        item.add_video_info("out_user_id", video_obj['authid'])
+        item.add_video_info("cover_url", video_obj['shareimg'])
+        item.add_video_info("like_cnt", int(video_obj['likenum']))
+        item.add_video_info("video_url", video_obj['videoaddr'])
+        item.add_video_info("out_video_id", video_obj['id'])
+        item.add_video_info("platform", self.platform)
+        item.add_video_info("strategy", self.mode)
+        item.add_video_info("session", "{}-{}".format(self.platform, int(time.time())))
+        item.add_video_info("user_id", self.user_dict['uid'])
+        item.add_video_info("user_name", self.user_dict['nick_name'])
+
+        mq_obj = item.produce_item()
+        pipeline = PiaoQuanPipeline(
+            platform=self.platform,
+            mode=self.mode,
+            rule_dict=self.rule_dict,
+            env=self.env,
+            item=mq_obj,
+            trace_id=trace_id,
+        )
+        if pipeline.process_item():
+            print(json.dumps(mq_obj, ensure_ascii=False, indent=4))
+            self.download_cnt += 1
+            print(self.download_cnt)
+
+
+if __name__ == '__main__':
+    Z = ZhuWanWuFuSuRecommend(
+        platform="zhuwanwufusu",
+        mode="recommend",
+        rule_dict={},
+        user_dict={"uid": 123456, "nick_name": "luojunhuishuaige"},
+        env="dev"
+    )
+    Z.get_recommend_list()