罗俊辉 vor 1 Jahr
Ursprung
Commit
882267d844
10 geänderte Dateien mit 464 neuen und 335 gelöschten Zeilen
  1. 2 1
      applications/__init__.py
  2. 117 0
      applications/wx_spider_api.py
  3. 187 275
      config/__init__.py
  4. 36 19
      dev/test.py
  5. 41 13
      stratrgy/strategy.py
  6. 4 3
      tasks/task1.py
  7. 12 5
      tasks/task2.py
  8. 11 18
      tasks/task3.py
  9. 1 1
      tasks/task4.py
  10. 53 0
      tasks/task5.py

+ 2 - 1
applications/__init__.py

@@ -5,4 +5,5 @@ from .aidit_api import AIDTApi
 from .denet_mysql import DeNetMysql
 from .pq_mysql import PQMySQL
 from .functions import Functions
-from .data_works import ODPSApi
+from .data_works import ODPSApi
+from .wx_spider_api import WeixinSpider

+ 117 - 0
applications/wx_spider_api.py

@@ -0,0 +1,117 @@
+"""
+@author: luojunhui
+"""
+import json
+import time
+import requests
+
+
+def retryOnNone():
+    """
+    基于None类型数据的重试装饰器
+    :return:
+    """
+
+    def decorator(func):
+        """
+        :param func:
+        :return:
+        """
+        max_retries = 5
+        wait_seconds = 1
+
+        def wrapper(*args, **kwargs):
+            """
+
+            :param args:
+            :param kwargs:
+            :return:
+            """
+            for attempt in range(max_retries):
+                response = func(*args, **kwargs)
+                if response['data'] is not None:
+                    return response
+                time.sleep(wait_seconds)
+            return None
+
+        return wrapper
+
+    return decorator
+
+
+class WeixinSpider(object):
+    """
+    Update account articles
+    """
+
+    @classmethod
+    @retryOnNone()
+    def search_articles(cls, title):
+        """
+        search articles in wx
+        :return:
+        """
+        url = "http://8.217.190.241:8888/crawler/wei_xin/keyword"
+        payload = json.dumps({
+            "keyword": title,
+            "cursor": "1"
+        })
+        headers = {
+            'Content-Type': 'application/json'
+        }
+
+        response = requests.request("POST", url, headers=headers, data=payload)
+        return response.json()
+
+    @classmethod
+    @retryOnNone()
+    def get_article_text(cls, content_link):
+        """
+        获取文章
+        :param content_link:
+        :return:
+        """
+        url = "http://8.217.190.241:8888/crawler/wei_xin/detail"
+        payload = json.dumps({
+            "content_link": content_link,
+            "is_count": False,
+            "is_ad": False
+        })
+        headers = {
+            'Content-Type': 'application/json'
+        }
+        response = requests.request("POST", url, headers=headers, data=payload)
+        return response.json()
+
+    @classmethod
+    @retryOnNone()
+    def update_msg_list(cls, ghId, index):
+        """
+        :return:
+        """
+        url = 'http://8.217.190.241:8888/crawler/wei_xin/blogger'
+        payload = {
+            'account_id': ghId,
+            'cursor': index,
+        }
+        headers = {
+            'Content-Type': 'application/json'
+        }
+        response = requests.post(url, headers=headers, data=json.dumps(payload))
+        return response.json()
+
+    @classmethod
+    @retryOnNone()
+    def get_account_by_url(cls, content_url):
+        """
+        通过文章获取账号信息
+        :param content_url:
+        :return:
+        """
+        response = requests.request(
+            "POST",
+            url='http://8.217.190.241:8888/crawler/wei_xin/account_info',
+            headers={'Content-Type': 'application/json'},
+            json={"content_link": content_url}
+        )
+        return response.json()

+ 187 - 275
config/__init__.py

@@ -45,281 +45,193 @@ planConfigDict = {
     "20240723131151203388473": "宗教历史-年龄56-66"
 }
 
-poolLevelConfig = {
-    "1": [
-        "指尖奇文--1",
-        "老友闲谈--1",
-        "老友欢聚地--1",
-        "趣味晚年--1",
-        "情为老友--1",
-        "退休老年圈--1"
-    ],
-    "2": [
-        "指尖奇文--2",
-        "老友闲谈--2",
-        "老友欢聚地--2",
-        "情为老友--2",
-        "多彩妙生活--1",
-        "多彩妙生活--2",
-        "家家生活指南--1",
-        "老友快乐谈--1",
-        "人生百事观--1",
-        "农耕趣时刻--1",
-        "幸福启示--1",
-        "路边闲聊社--1",
-        "快乐精选集--1",
-        "生活百态观--1",
-        "生活百态观--2",
-        "日常巧思集--1",
-        "趣史论--1",
-        "畅聊奇闻--1",
-        "生活指示录--1",
-        "奇趣百味生活--1",
-        "无忧自在生活--1",
-        "喜乐生活派--1",
-        "态度说--1",
-        "缘来养心厅--1",
-        "便捷生活好方法--1",
-        "看不够妙招--1"
-    ],
-    "3": [
-        "趣味晚年--2",
-        "退休老年圈--2",
-        "家家生活指南--2",
-        "老友快乐谈--2",
-        "农耕趣时刻--2",
-        "幸福启示--2",
-        "路边闲聊社--2",
-        "快乐精选集--2",
-        "日常巧思集--2",
-        "趣史论--2",
-        "畅聊奇闻--2",
-        "生活指示录--2",
-        "奇趣百味生活--2",
-        "无忧自在生活--2",
-        "喜乐生活派--2",
-        "态度说--2",
-        "缘来养心厅--2",
-        "便捷生活好方法--2",
-        "看不够妙招--2",
-        "日常生活小技巧集--1",
-        "日常生活小技巧集--2",
-        "都市镜头--1",
-        "都市镜头--2",
-        "退休无忧生活--1",
-        "退休无忧生活--2",
-        "心灵情感驿站--1",
-        "心灵情感驿站--2",
-        "票圈极速版--1",
-        "票圈极速版--2",
-        "生活良读--1",
-        "生活良读--2",
-        "史记有言--1",
-        "史记有言--2",
-        "幸福妙招合集--1",
-        "幸福妙招合集--2",
-        "妙招持家帮手--1",
-        "妙招持家帮手--2",
-        "乐享生活小窍门--1",
-        "乐享生活小窍门--2",
-        "天天学生活技巧--1",
-        "天天学生活技巧--2",
-        "音药金曲厅--1",
-        "日常小妙招秘籍--1",
-        "日常小妙招秘籍--2",
-        "窦都事说--1",
-        "巷尾风声--1",
-        "趣谈史记--1",
-        "趣谈史记--2",
-        "趣味生活达人--1",
-        "趣味生活达人--2",
-        "异闻趣事多--1",
-        "异闻趣事多--2",
-        "无忧生活小妙招--1",
-        "幸福晚年知音--1",
-        "麒阁史记--1",
-        "老来生活家--1",
-        "那些历史--1",
-        "祝福养心厅--1",
-        "音药养心馆--1",
-        "史记趣言--1",
-        "生活晓常识--1",
-        "生活情感叁读--1",
-        "心海情澜起--1",
-        "繁花史阁--1",
-        "小贝生活课堂--1"
-    ],
-    "4": [
-        "指尖奇文--3",
-        "指尖奇文--4",
-        "指尖奇文--5",
-        "指尖奇文--6",
-        "老友闲谈--3",
-        "老友闲谈--4",
-        "老友闲谈--5",
-        "老友闲谈--6",
-        "老友欢聚地--3",
-        "老友欢聚地--4",
-        "老友欢聚地--5",
-        "老友欢聚地--6",
-        "老友欢聚地--7",
-        "老友欢聚地--8",
-        "趣味晚年--3",
-        "趣味晚年--4",
-        "趣味晚年--5",
-        "趣味晚年--6",
-        "情为老友--3",
-        "情为老友--4",
-        "情为老友--5",
-        "情为老友--6",
-        "退休老年圈--3",
-        "退休老年圈--4",
-        "退休老年圈--5",
-        "退休老年圈--6",
-        "多彩妙生活--3",
-        "多彩妙生活--4",
-        "多彩妙生活--5",
-        "多彩妙生活--6",
-        "多彩妙生活--7",
-        "多彩妙生活--8",
-        "家家生活指南--3",
-        "家家生活指南--4",
-        "家家生活指南--5",
-        "家家生活指南--6",
-        "家家生活指南--7",
-        "家家生活指南--8",
-        "老友快乐谈--3",
-        "人生百事观--2",
-        "人生百事观--3",
-        "农耕趣时刻--3",
-        "农耕趣时刻--4",
-        "农耕趣时刻--5",
-        "农耕趣时刻--6",
-        "路边闲聊社--3",
-        "路边闲聊社--4",
-        "路边闲聊社--5",
-        "路边闲聊社--6",
-        "生活百态观--3",
-        "生活百态观--4",
-        "生活百态观--5",
-        "生活百态观--6",
-        "日常巧思集--3",
-        "日常巧思集--4",
-        "日常巧思集--5",
-        "日常巧思集--6",
-        "趣史论--3",
-        "趣史论--4",
-        "趣史论--5",
-        "趣史论--6",
-        "缘来养心厅--3",
-        "缘来养心厅--4",
-        "缘来养心厅--5",
-        "缘来养心厅--6",
-        "心灵情感驿站--3",
-        "票圈极速版--3",
-        "史记有言--3",
-        "史记有言--4",
-        "史记有言--5",
-        "史记有言--6",
-        "音药金曲厅--2",
-        "日常小妙招秘籍--3",
-        "日常小妙招秘籍--4",
-        "日常小妙招秘籍--5",
-        "日常小妙招秘籍--6",
-        "窦都事说--2",
-        "巷尾风声--2",
-        "趣谈史记--3",
-        "趣谈史记--4",
-        "趣谈史记--5",
-        "趣谈史记--6",
-        "无忧生活小妙招--2",
-        "幸福晚年知音--2",
-        "麒阁史记--2",
-        "老来生活家--2",
-        "那些历史--2",
-        "祝福养心厅--2",
-        "音药养心馆--2",
-        "史记趣言--2",
-        "生活晓常识--2",
-        "生活情感叁读--2",
-        "心海情澜起--2",
-        "繁花史阁--2",
-        "小贝生活课堂--2",
-        "无忧潮生活--1",
-        "无忧潮生活--2",
-        "实用妙招800个--1",
-        "实用妙招800个--2",
-        "票圈美文速递--1",
-        "票圈美文速递--2",
-        "票圈美文速递--3",
-        "小惠爱厨房--1",
-        "小惠爱厨房--2",
-        "小惠爱厨房--3",
-        "小惠爱厨房--4",
-        "小惠爱厨房--5",
-        "小惠爱厨房--6",
-        "小惠爱厨房--7",
-        "小惠爱厨房--8",
-        "小阳看天下--1",
-        "小阳看天下--2",
-        "票圈正能量--1",
-        "票圈正能量--2",
-        "票圈大事件--1",
-        "票圈大事件--2"
-    ],
-    "5": [
-        "老友快乐谈--4",
-        "人生百事观--4",
-        "幸福启示--3",
-        "幸福启示--4",
-        "快乐精选集--3",
-        "快乐精选集--4",
-        "畅聊奇闻--3",
-        "畅聊奇闻--4",
-        "生活指示录--3",
-        "生活指示录--4",
-        "奇趣百味生活--3",
-        "奇趣百味生活--4",
-        "无忧自在生活--3",
-        "无忧自在生活--4",
-        "喜乐生活派--3",
-        "喜乐生活派--4",
-        "态度说--3",
-        "态度说--4",
-        "便捷生活好方法--3",
-        "便捷生活好方法--4",
-        "看不够妙招--3",
-        "日常生活小技巧集--3",
-        "日常生活小技巧集--4",
-        "都市镜头--3",
-        "都市镜头--4",
-        "退休无忧生活--3",
-        "退休无忧生活--4",
-        "票圈极速版--4",
-        "幸福妙招合集--3",
-        "妙招持家帮手--3",
-        "乐享生活小窍门--3",
-        "天天学生活技巧--3",
-        "天天学生活技巧--4",
-        "音药金曲厅--3",
-        "音药金曲厅--4",
-        "巷尾风声--3",
-        "巷尾风声--4",
-        "趣味生活达人--3",
-        "异闻趣事多--3",
-        "异闻趣事多--4",
-        "无忧生活小妙招--3",
-        "麒阁史记--3",
-        "麒阁史记--4",
-        "那些历史--3",
-        "那些历史--4",
-        "祝福养心厅--3",
-        "音药养心馆--3",
-        "音药养心馆--4",
-        "史记趣言--3",
-        "心海情澜起--3",
-        "小阳看天下--3"
-    ]
+pool_level_detail = {
+    "gh_6d205db62f04_1": "1",
+    "gh_56ca3dae948c_1": "1",
+    "gh_c69776baf2cd_1": "1",
+    "gh_9877c8541764_1": "1",
+    "gh_6cfd1132df94_1": "1",
+    "gh_058e41145a0c_1": "1",
+    "gh_89ef4798d3ea_1": "1",
+    "gh_b15de7c99912_1": "1",
+    "gh_d4dffc34ac39_1": "1",
+    "gh_744cb16f6e16_1": "1",
+    "gh_c91b42649690_1": "1",
+    "gh_970460d9ccec_1": "1",
+    "gh_5ae65db96cb7_1": "1",
+    "gh_c5cdf60d9ab4_1": "1",
+    "gh_4c058673c07e_1": "1",
+    "gh_30816d8adb52_1": "1",
+    "gh_a2901d34f75b_1": "1",
+    "gh_0c89e11f8bf3_1": "1",
+    "gh_d49df5e974ca_1": "1",
+    "gh_bff0bcb0694a_1": "1",
+    "gh_e24da99dc899_1": "1",
+    "gh_bfe5b705324a_1": "1",
+    "gh_080bb43aa0dc_1": "1",
+    "gh_7f5075624a50_1": "1",
+    "gh_9eef14ad6c16_1": "1",
+    "gh_0e4fd9e88386_1": "1",
+    "gh_ac43eb24376d_1": "1",
+    "gh_dd4c857bbb36_1": "1",
+    "gh_3ed305b5817f_1": "1",
+    "gh_008ef23062ee_1": "1",
+    "gh_be8c29139989_1": "1",
+    "gh_de9f9ebc976b_1": "1",
+    "gh_7e5818b2dd83_1": "1",
+    "gh_57573f01b2ee_1": "1",
+    "gh_789a40fe7935_1": "1",
+    "gh_3e91f0624545_1": "1",
+    "gh_969f5ea5fee1_1": "1",
+    "gh_1d887d61088c_1": "1",
+    "gh_03d32e83122f_1": "1",
+    "gh_192c9cf58b13_1": "1",
+    "gh_ff487cb5dab3_1": "1",
+    "gh_d5f935d0d1f2_1": "1",
+    "gh_51e4ad40466d_1": "1",
+    "gh_b6f2c5332c72_1": "1",
+    "gh_adca24a8f429_1": "1",
+    "gh_6b7c2a257263_1": "1",
+    "gh_95ed5ecf9363_1": "1",
+    "gh_e0eb490115f5_1": "1",
+    "gh_56ca3dae948c_2": "1",
+    "gh_6d205db62f04_2": "1",
+    "gh_c69776baf2cd_2": "1",
+    "gh_9877c8541764_2": "1",
+    "gh_b15de7c99912_2": "1",
+    "gh_89ef4798d3ea_2": "1",
+    "gh_970460d9ccec_2": "1",
+    "gh_d4dffc34ac39_2": "1",
+    "gh_6cfd1132df94_2": "1",
+    "gh_5ae65db96cb7_2": "1",
+    "gh_058e41145a0c_2": "1",
+    "gh_744cb16f6e16_2": "1",
+    "gh_c91b42649690_2": "1",
+    "gh_30816d8adb52_2": "1",
+    "gh_4c058673c07e_2": "1",
+    "gh_c5cdf60d9ab4_2": "1",
+    "gh_a2901d34f75b_2": "1",
+    "gh_0c89e11f8bf3_2": "1",
+    "gh_d49df5e974ca_2": "1",
+    "gh_e24da99dc899_2": "1",
+    "gh_7f5075624a50_2": "1",
+    "gh_bff0bcb0694a_2": "1",
+    "gh_bfe5b705324a_2": "1",
+    "gh_03d32e83122f_2": "1",
+    "gh_080bb43aa0dc_2": "1",
+    "gh_0e4fd9e88386_2": "1",
+    "gh_95ed5ecf9363_2": "1",
+    "gh_9eef14ad6c16_2": "1",
+    "gh_3ed305b5817f_2": "1",
+    "gh_7e5818b2dd83_2": "1",
+    "gh_dd4c857bbb36_2": "1",
+    "gh_008ef23062ee_2": "1",
+    "gh_ac43eb24376d_2": "1",
+    "gh_be8c29139989_2": "1",
+    "gh_de9f9ebc976b_2": "1",
+    "gh_1d887d61088c_2": "1",
+    "gh_192c9cf58b13_2": "1",
+    "gh_adca24a8f429_2": "1",
+    "gh_ff487cb5dab3_2": "1",
+    "gh_789a40fe7935_2": "1",
+    "gh_3e91f0624545_2": "1",
+    "gh_57573f01b2ee_2": "1",
+    "gh_b6f2c5332c72_2": "1",
+    "gh_51e4ad40466d_2": "1",
+    "gh_e0eb490115f5_2": "1",
+    "gh_d5f935d0d1f2_2": "1",
+    "gh_6b7c2a257263_2": "1",
+    "gh_969f5ea5fee1_2": "1",
+    "gh_72bace6b3059_1": "2",
+    "gh_9f8dc5b0c74e_1": "2",
+    "gh_6d9f36e3a7be_1": "2",
+    "gh_7b4a5f86d68c_1": "2",
+    "gh_b676b7ad9b74_1": "2",
+    "gh_183d80deffb8_1": "2",
+    "gh_29074b51f2b7_1": "2",
+    "gh_f25b5fb01977_1": "2",
+    "gh_4568b5a7e2fe_1": "2",
+    "gh_5ff48e9fb9ef_1": "2",
+    "gh_f902cea89e48_1": "2",
+    "gh_084a485e859a_1": "2",
+    "gh_1b27dd1beeca_1": "2",
+    "gh_ee78360d06f5_1": "2",
+    "gh_72bace6b3059_2": "2",
+    "gh_9f8dc5b0c74e_2": "2",
+    "gh_b676b7ad9b74_2": "2",
+    "gh_7b4a5f86d68c_2": "2",
+    "gh_6d9f36e3a7be_2": "2",
+    "gh_29074b51f2b7_2": "2",
+    "gh_183d80deffb8_2": "2",
+    "gh_f25b5fb01977_2": "2",
+    "gh_5ff48e9fb9ef_2": "2",
+    "gh_084a485e859a_2": "2",
+    "gh_f902cea89e48_2": "2",
+    "gh_1b27dd1beeca_2": "2",
+    "gh_ee78360d06f5_2": "2",
+    "gh_4568b5a7e2fe_2": "2",
+    "gh_058e41145a0c_3": "3",
+    "gh_0e4fd9e88386_3": "3",
+    "gh_744cb16f6e16_3": "3",
+    "gh_ac43eb24376d_3": "3",
+    "gh_970460d9ccec_3": "3",
+    "gh_56ca3dae948c_3": "3",
+    "gh_c91b42649690_3": "3",
+    "gh_6d205db62f04_3": "3",
+    "gh_e24da99dc899_3": "3",
+    "gh_4c058673c07e_3": "3",
+    "gh_03d32e83122f_3": "3",
+    "gh_c69776baf2cd_3": "3",
+    "gh_30816d8adb52_3": "3",
+    "gh_789a40fe7935_3": "3",
+    "gh_95ed5ecf9363_3": "3",
+    "gh_3e91f0624545_3": "3",
+    "gh_57573f01b2ee_3": "3",
+    "gh_9877c8541764_3": "3",
+    "gh_6cfd1132df94_3": "3",
+    "gh_008ef23062ee_3": "3",
+    "gh_5ae65db96cb7_3": "3",
+    "gh_be8c29139989_3": "3",
+    "gh_51e4ad40466d_3": "3",
+    "gh_d4dffc34ac39_3": "3",
+    "gh_89ef4798d3ea_3": "3",
+    "gh_b15de7c99912_3": "3",
+    "gh_9f8dc5b0c74e_3": "3",
+    "gh_7b4a5f86d68c_3": "3",
+    "gh_c5cdf60d9ab4_3": "3",
+    "gh_0c89e11f8bf3_3": "3",
+    "gh_e0eb490115f5_3": "3",
+    "gh_a2901d34f75b_3": "3",
+    "gh_d5f935d0d1f2_3": "3",
+    "gh_0e4fd9e88386_4": "3",
+    "gh_058e41145a0c_4": "3",
+    "gh_ac43eb24376d_4": "3",
+    "gh_c91b42649690_4": "3",
+    "gh_970460d9ccec_4": "3",
+    "gh_56ca3dae948c_4": "3",
+    "gh_744cb16f6e16_4": "3",
+    "gh_6d205db62f04_4": "3",
+    "gh_4c058673c07e_4": "3",
+    "gh_30816d8adb52_4": "3",
+    "gh_789a40fe7935_4": "3",
+    "gh_03d32e83122f_4": "3",
+    "gh_e24da99dc899_4": "3",
+    "gh_c69776baf2cd_4": "3",
+    "gh_008ef23062ee_4": "3",
+    "gh_95ed5ecf9363_4": "3",
+    "gh_57573f01b2ee_4": "3",
+    "gh_5ae65db96cb7_4": "3",
+    "gh_3e91f0624545_4": "3",
+    "gh_be8c29139989_4": "3",
+    "gh_9f8dc5b0c74e_4": "3",
+    "gh_9877c8541764_4": "3",
+    "gh_89ef4798d3ea_4": "3",
+    "gh_d4dffc34ac39_4": "3",
+    "gh_51e4ad40466d_4": "3",
+    "gh_6cfd1132df94_4": "3",
+    "gh_d5f935d0d1f2_4": "3",
+    "gh_b15de7c99912_4": "3",
+    "gh_7b4a5f86d68c_4": "3"
 }
 
 cateMap = {

+ 36 - 19
dev/test.py

@@ -1,27 +1,44 @@
 import json
 
-with open("config.txt", encoding="utf-8") as f:
+with open("pool_.txt", encoding="utf-8") as f:
     data = f.readlines()
 
 L = {}
-for line in data:
+for line in data[1:]:
     line = line.strip().split("\t")
-    account_name = line[0]
-    gh_id = line[1]
-    fans = int(line[2])
-    level = line[3]
-    read_avg = float(line[4])
-    like_avg = float(line[5])
-    key = "{}_{}".format(gh_id, level)
-    obj = {
-        "accountName": account_name,
-        "ghId": gh_id,
-        "fans": fans,
-        "position": level,
-        "readAvg": read_avg,
-        "likeAvg": like_avg
-    }
-    L[key] = obj
+    # print(line)
+    gh_id = line[3]
+    position = line[5]
+    account_level = line[-2]
+    if account_level == "一级":
+        acc_l = "1"
+    elif account_level == "二级":
+        acc_l = "2"
+    elif account_level == "三级":
+        acc_l = "3"
+    else:
+        continue
+    key = "{}_{}".format(gh_id, position)
+    print(gh_id, position, account_level)
+    L[key] = acc_l
 
-with open("AccountInfo.json", "w", encoding="utf-8") as f:
+
+#     account_name = line[0]
+#     gh_id = line[1]
+#     fans = int(line[2])
+#     level = line[3]
+#     read_avg = float(line[4])
+#     like_avg = float(line[5])
+#     key = "{}_{}".format(gh_id, level)
+#     obj = {
+#         "accountName": account_name,
+#         "ghId": gh_id,
+#         "fans": fans,
+#         "position": level,
+#         "readAvg": read_avg,
+#         "likeAvg": like_avg
+#     }
+#     L[key] = obj
+#
+with open("pool_detail.json", "w", encoding="utf-8") as f:
     f.write(json.dumps(L, ensure_ascii=False, indent=4))

+ 41 - 13
stratrgy/strategy.py

@@ -1,10 +1,12 @@
 """
 @author: luojunhui
 """
+import json
+
 from tqdm import tqdm
 
 from applications.functions import Functions
-from config import accountBaseInfo
+from config import accountBaseInfo, pool_level_detail
 
 
 class ArticlePoolStrategy(object):
@@ -42,18 +44,44 @@ class ArticlePoolStrategy(object):
         for line in detail_list:
             key = "{}_{}".format(line['gh_id'], line['index'])
             article_read = line['increase_read_count']
-            avg_read = accountBaseInfo[key]['readAvg']
-            # 计算比率
-            level_rate = article_read / avg_read - 1
-            print(level_rate, article_read, avg_read)
-            print(line)
-            print("\n")
-            obj = {
-                "key": key,
-                "avg_read": avg_read,
-                "article_read": article_read,
-
-            }
+            if accountBaseInfo.get(key):
+                avg_read = accountBaseInfo[key]['readAvg']
+                # 计算比率
+                level_rate = article_read / avg_read - 1
+                obj = {
+                    "key": key,
+                    "avg_read": avg_read,
+                    "article_read": article_read,
+                    "level_rate": level_rate,
+                    "url": line['url']
+                }
+                L.append(obj)
+        L = sorted(L, key=lambda x: x["level_rate"], reverse=True)
+        for index, i in enumerate(L):
+            print(index,"\t", i['key'], "\t", i['level_rate'])
+        result = {
+            "Level1": [],
+            "Level2": []
+        }
+        c = 0
+        for line in L:
+            print(json.dumps(line, ensure_ascii=False, indent=4))
+            if line['level_rate'] > 0.2:
+                c += 1
+                gh_key = line['key']
+                if pool_level_detail.get(gh_key):
+                    now_level = pool_level_detail[gh_key]
+                    if now_level == "3":
+                        result['Level2'].append(line['url'])
+                    elif now_level == "2":
+                        result['Level1'].append(line['url'])
+                    else:
+                        continue
+                else:
+                    result['Level2'].append(line['url'])
+        print(c)
+        return result
+
 
 
 

+ 4 - 3
tasks/task1.py

@@ -59,9 +59,9 @@ class ColdStartPool(object):
         """
         sql = f"""
         INSERT INTO cold_start_article_pool
-            (content_id, content_link, title, cover, view_count, like_count, looking_count, publish_time_stamp, plan_id, category, content_channel_id)
+            (content_id, content_link, title, cover, view_count, like_count, looking_count, publish_time_stamp, plan_id, category, content_channel_id, status)
         VALUES
-            (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
+            (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
         """
         params = (
             article_obj.get("id"),
@@ -74,7 +74,8 @@ class ColdStartPool(object):
             article_obj.get("publishTimestamp"),
             article_obj.get("sourceCrawlerPlans")[0].get("id"),
             article_obj.get("sourceCrawlerPlans")[0].get("name").split("-")[1],
-            article_obj.get("channelContentId")
+            article_obj.get("channelContentId"),
+            1
         )
         cls.PqMysql.update(sql=sql, params=params)
 

+ 12 - 5
tasks/task2.py

@@ -7,7 +7,7 @@ import json
 from tqdm import tqdm
 
 from applications import AIDTApi, DeNetMysql, PQMySQL, Functions
-from config import poolLevelConfig, cateMap, coldPoolArticlesNum, accountBaseInfo
+from config import cateMap, coldPoolArticlesNum, accountBaseInfo
 
 
 class ColdStartTask(object):
@@ -18,7 +18,7 @@ class ColdStartTask(object):
     DeMysql = DeNetMysql()
     PqMysql = PQMySQL()
     Fun = Functions()
-    pool3 = poolLevelConfig['3']
+    pool3 = "autoArticlePoolLevel3"
 
     @classmethod
     def generate_account_dict(cls):
@@ -42,11 +42,18 @@ class ColdStartTask(object):
         sql = f"""
             select content_channel_id, content_link, title 
             from cold_start_article_pool
-            where category = '{category}'
+            where category = '{category}' and status = 1
             order by view_count DESC, publish_time_stamp DESC
             limit {limit_count};
         """
         result = cls.PqMysql.select(sql)
+        content_id = [i[0] for i in result]
+        sql2 = f"""
+            update cold_start_article_pool
+            set status = %s
+            where content_channel_id in %s
+        """
+        cls.PqMysql.update(sql=sql2, params=(0, tuple(content_id)))
         return result
 
     @classmethod
@@ -145,6 +152,6 @@ if __name__ == '__main__':
     CST = ColdStartTask()
     CST.sendToColdPool(
         plan_id=None,
-        plan_name="冷启池子--0729--Monday--分品类抓取--6个品类",
-        plan_tag="autoArticlePoolLevel1",
+        plan_name="冷启池子--0730--Monday--分品类抓取--6个品类",
+        plan_tag="autoArticlePoolLevel3",
     )

+ 11 - 18
tasks/task3.py

@@ -26,7 +26,7 @@ class SendToMultiLevels(object):
         获取前一天数据表现
         :return:
         """
-        odps_sql = "select * from loghubods.changwen_article_datastat where dt = '20240724';"
+        odps_sql = "select * from loghubods.changwen_article_datastat where dt = '20240729';"
         result = cls.OA.select(sql=odps_sql)
         response_list = [
             {
@@ -48,18 +48,10 @@ class SendToMultiLevels(object):
         分类至Pools
         :return:
         """
-        pool_level_1 = [cls.Fun.matchLinkById(i['article_id']) for i in tqdm(yesterday_data) if
-                        i['increase_read_count'] >= 9000]
-        pool_level_2 = [cls.Fun.matchLinkById(i['article_id']) for i in tqdm(yesterday_data) if
-                        3500 <= i['increase_read_count'] < 9000]
-        pool_level_3 = [cls.Fun.matchLinkById(i['article_id']) for i in tqdm(yesterday_data) if
-                        1000 <= i['increase_read_count'] < 3500]
-        L = {
-            "Level1": pool_level_1,
-            "Level2": pool_level_2,
-            "Level3": pool_level_3
-        }
-        return L
+        S = ArticlePoolStrategy()
+        detail_list = S.getData(article_list=yesterday_data)
+        result = S.splitByStrategy(detail_list=detail_list)
+        return result
 
     @classmethod
     def sendToEachCrawlerPlan(cls, key, url_list):
@@ -69,6 +61,9 @@ class SendToMultiLevels(object):
         :param url_list:
         :return:
         """
+        print(key)
+        print(len(url_list))
+        print(url_list)
         # daily自动创建新抓取计划
         # cls.AidApi.updateArticleIntoCrawlerPlan(
         #     plan_id=None,
@@ -97,8 +92,6 @@ class SendToMultiLevels(object):
         cls.sendToDifferentPools(pool_info=level_url_list_map)
 
 
-STML = SendToMultiLevels()
-yesterday_data = STML.getYesterdayData()
-S = ArticlePoolStrategy()
-detail_list = S.getData(article_list=yesterday_data)
-S.splitByStrategy(detail_list=detail_list)
+if __name__ == '__main__':
+    ST = SendToMultiLevels()
+    ST.deal()

+ 1 - 1
tasks/task4.py

@@ -12,7 +12,7 @@ def update_articles(gh_id):
     :param gh_id:
     :return:
     """
-    url = "http://192.168.100.31:6062/article_crawler"
+    url = "http://192.168.100.31:6060/article_crawler"
 
     headers = {"Content-Type": "application/json"}
 

+ 53 - 0
tasks/task5.py

@@ -0,0 +1,53 @@
+"""
+@author: luojunhui
+"""
+import time
+
+from applications import PQMySQL
+
+
+class AccountArticleProducer(object):
+    """
+    step1: 获取已有账号历史表现好的文章list
+    step2: 每篇文章搜索一个文章list,获取最好的文章所对应的账号信息
+    step3: 对于该账号的历史文章进行抓取,把数据更新的文章库
+    step4: 根据该账号历史文章的表现,返回该账号的优质文章
+    """
+
+    pq_mysql = PQMySQL()
+
+    @classmethod
+    def getHistoryArticles(cls, gh_id, latest_time_stamp):
+        """
+        获取账号的历史文章
+        :param latest_time_stamp:
+        :param gh_id:
+        :return:
+        """
+        twenty_hours_ago = int(time.time()) - 3600 * 20
+        sql = f"""
+        select title, show_view_count from official_articles
+        where ghId = '{gh_id}' and updateTime > {latest_time_stamp} and updateTime < {twenty_hours_ago};
+        """
+        history_article_list = cls.pq_mysql.select(sql)
+        return history_article_list
+
+    @classmethod
+    def findGoodArticles(cls, gh_id):
+        """
+
+        :param gh_id:
+        :return:
+        """
+        return
+
+    @classmethod
+    def updateArticlesToMysql(cls):
+        """
+
+        :return:
+        """
+
+
+
+