瀏覽代碼

修改 updateAccountAvgDaily.py
增加账号信息

罗俊辉 7 月之前
父節點
當前提交
0f060b510c
共有 5 個文件被更改,包括 38 次插入22 次删除
  1. 1 1
      applications/wxSpiderApi.py
  2. 7 6
      spider/weixinCategoryCrawler.py
  3. 2 4
      tasks/task3.py
  4. 3 3
      tasks/task6.py
  5. 25 8
      updateAccountAvgDaily.py

+ 1 - 1
applications/wxSpiderApi.py

@@ -69,7 +69,7 @@ class WeixinSpider(object):
             'Content-Type': 'application/json'
         }
         response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=120)
-        print("response", response.text)
+        # print("response", response.text)
         return response.json()
 
     @classmethod

+ 7 - 6
spider/weixinCategoryCrawler.py

@@ -140,12 +140,12 @@ if __name__ == "__main__":
     category_list = [
         '军事',
         '历史',
-        '娱乐八卦',
-        '情感生活',
-        '健康养生',
-        '新闻媒体'
+        # '娱乐八卦',
+        # '情感生活',
+        # '健康养生',
+        # '新闻媒体'
     ]
-    for category in category_list[4:]:
+    for category in category_list:
         account_list = wxCategory.getAccountList(category)
         for account in tqdm(account_list):
             try:
@@ -153,12 +153,13 @@ if __name__ == "__main__":
                 category = account['category']
                 try:
                     timestamp = int(account['latest_timestamp'].timestamp())
-                except:
+                except Exception as e:
                     timestamp = 1704038400
                 wxCategory.updateEachAccountArticles(
                     gh_id=gh_id,
                     category=category,
                     latest_time_stamp=timestamp
                 )
+                print("success")
             except Exception as e:
                 print("fail because of {}".format(e))

+ 2 - 4
tasks/task3.py

@@ -5,7 +5,7 @@ import datetime
 
 from tqdm import tqdm
 
-from applications import AIDTApi, DeNetMysql, PQMySQL, Functions, AdMySQL
+from applications import AIDTApi, DeNetMysql, PQMySQL, Functions, longArticlesMySQL
 from config import poolTagMap
 from stratrgy import ArticlePoolStrategy
 
@@ -18,7 +18,7 @@ class SendToMultiLevels(object):
     DeMysql = DeNetMysql()
     PqMysql = PQMySQL()
     Fun = Functions()
-    Ad = AdMySQL()
+    # Ad =
 
     @classmethod
     def getYesterdayData(cls):
@@ -91,8 +91,6 @@ class SendToMultiLevels(object):
         """
         yesterday_data = cls.getYesterdayData()
         level_url_list_map = cls.splitToDifferentPools(yesterday_data)
-        # for line in level_url_list_map:
-        #     print(line)
         cls.sendToDifferentPools(pool_info=level_url_list_map)
 
 

+ 3 - 3
tasks/task6.py

@@ -17,7 +17,7 @@ def get_account_avg():
     获取账号
     :return:
     """
-    with open("/Users/luojunhui/cyber/LongArticlesJob/dev/avg_new_health.json", encoding="utf-8") as f:
+    with open("/Users/luojunhui/cyber/LongArticlesJob/dev/军事历史.json", encoding="utf-8") as f:
         avg_dict = json.loads(f.read())
 
     account_position_list = list(avg_dict.keys())
@@ -31,7 +31,7 @@ def get_account_avg():
         """
         result_list = D.select(select_sql)
         try:
-            avg_read = avg_dict[account]
+            avg_read = avg_dict[account]['readAvg']
             for i in result_list:
                 title, read_cnt, link = i
                 avg_score = read_cnt / avg_read
@@ -57,7 +57,7 @@ def get_account_avg():
     print(a)
     print(b)
     df = DataFrame(LL, columns=["title", "link", "read", "read_avg"])
-    df.to_excel("health_2.xlsx", index=False)
+    df.to_excel("historyArmy.xlsx", index=False)
     # url_list = [i[1] for i in LL[3:]]
     # try:
     #     AIDTApi().updateArticleIntoCrawlerPlan(

+ 25 - 8
updateAccountAvgDaily.py

@@ -56,12 +56,18 @@ class UpdateAvgDaily(object):
         :return:
         """
         sql = f"""
-        SELECT t1.`name`, t1.gh_id, t1.follower_count, t3.account_type
+        SELECT  t1.`name`,
+                t1.gh_id, 
+                t1.follower_count, 
+                t3.account_type, 
+                t3.account_source_name, 
+                t3.mode_type, 
+                t3.status
         FROM `publish_account` t1
         JOIN wx_statistics_group_source_account t2
-            ON t1.id = t2.account_id
+            on t1.id = t2.account_id
         JOIN wx_statistics_group_source t3
-            ON t2.group_source_name = t3.account_source_name
+            on t2.group_source_name = t3.account_source_name;
         """
         response = cls.deNetClient.select(sql)
         log(
@@ -75,7 +81,10 @@ class UpdateAvgDaily(object):
                 "accountName": item[0],
                 "ghId": item[1],
                 "fans": item[2],
-                "accountType": item[3]
+                "accountType": item[3],
+                "accountSource": item[4],
+                "accountMode": item[5],
+                "accountStatus": item[6]
             }
             if temp["accountName"] in ['口琴', '二胡']:
                 continue
@@ -99,9 +108,9 @@ class UpdateAvgDaily(object):
         """
         sql = f"""
         INSERT INTO account_avg_info_v2
-        (gh_id, position, account_name, fans, read_avg, like_avg, update_time, status)
+        (gh_id, position, account_name, fans, read_avg, like_avg, update_time, status, account_type, account_mode, account_source, account_status)
         values 
-        (%s, %s, %s, %s, %s, %s, %s, %s);
+        (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
         """
         try:
             cls.pqClient.update(
@@ -114,7 +123,11 @@ class UpdateAvgDaily(object):
                     data['avg_read'],
                     data['avg_like'],
                     data['update_time'],
-                    1
+                    1,
+                    data['account_type'],
+                    data['account_mode'],
+                    data['account_source'],
+                    data['account_status']
                 )
             )
             log(
@@ -159,7 +172,11 @@ class UpdateAvgDaily(object):
                         "position": index,
                         "avg_read": avg_read if str(avg_read) != "nan" else 0,
                         "avg_like": avg_like if str(avg_like) != "nan" else 0,
-                        "update_time": dt_str
+                        "update_time": dt_str,
+                        "account_type": item['accountType'],
+                        "account_mode": item['accountMode'],
+                        "account_source": item['accountSource'],
+                        "account_status": item['accountStatus']
                     }
                     cls.insertIntoMysql(obj)
                     L.append(obj)