فهرست منبع

增加了分析代码, 针对数据的入库量进行分析

罗俊辉 1 سال پیش
والد
کامیت
f3d830fabf
3فایلهای تغییر یافته به همراه44 افزوده شده و 1 حذف شده
  1. 0 0
      analysis/__init__.py
  2. 42 0
      analysis/analysis.py
  3. 2 1
      haokanshipin/haokanshipin_author/hksp_test.py

+ 0 - 0
analysis/__init__.py


+ 42 - 0
analysis/analysis.py

@@ -0,0 +1,42 @@
+import pandas as pd
+import json
+import pymysql
+
+
+class Analysis(object):
+    def __init__(self):
+        self.platform_list = ["xiaoniangao", "gongzhonghao", "shipinhao", "douyin", "kuaishou", "fuqiwang", "haitunzhufu"]
+        self.date_last = "2023-11-01"
+        self.out_put = {}
+
+    def analysis_videos(self):
+        connection = pymysql.connect(
+            host="rm-bp1159bu17li9hi94ro.mysql.rds.aliyuncs.com",  # 数据库IP地址,内网地址
+            port=3306,  # 端口号
+            user="crawler",  # mysql用户名
+            passwd="crawler123456@",  # mysql用户登录密码
+            db="piaoquan-crawler",  # 数据库名
+            # 如果数据库里面的文本是utf8编码的,charset指定是utf8
+            charset="utf8")
+        for platform in self.platform_list:
+            select_sql = f"""SELECT DATE(create_time) as DATE, count(1) as Total
+                    FROM crawler_video 
+                    WHERE `platform`  = "{platform}" and create_time > "{self.date_last}"
+                    GROUP BY DATE( `create_time` )
+                    ORDER BY DATE( `create_time`)  DESC; """
+            out_dict = {}
+            mysql = connection.cursor()
+            mysql.execute(select_sql)
+            data_lines = mysql.fetchall()
+            for i in data_lines:
+                date_info = i[0].strftime('%Y-%m-%d')
+                count = i[1]
+                out_dict[date_info] = count
+            self.out_put[platform] = out_dict
+        connection.close()
+
+
+if __name__ == '__main__':
+    A = Analysis()
+    A.analysis_videos()
+    print(json.dumps(A.out_put, ensure_ascii=False, indent=4))

+ 2 - 1
haokanshipin/haokanshipin_author/hksp_test.py

@@ -205,8 +205,9 @@ if __name__ == "__main__":
     T = HaoKanVideoAccount(
         platform="haokanshipin",
         mode="author",
-        rule_dict={"period": {"min": 5, "max": 5}},
+        rule_dict={"period": {"min": 10, "max": 10}},
         user_dict=user_list[1],
         env="prod",
     )
+    print(user_list[1])
     T.schedule()